Skip to content

Commit

Permalink
Merge pull request #181 from llm-tools/sqlite
Browse files Browse the repository at this point in the history
Move BaseEmbedding to abstract class
  • Loading branch information
adhityan authored Dec 3, 2024
2 parents 1efd12e + f1dc9cb commit 0ee88b5
Show file tree
Hide file tree
Showing 47 changed files with 409 additions and 367 deletions.
2 changes: 1 addition & 1 deletion core/embedjs-interfaces/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@llm-tools/embedjs-interfaces",
"version": "0.1.22",
"version": "0.1.23",
"description": "Interfaces for extending the embedjs ecosystem",
"dependencies": {
"@langchain/core": "^0.3.19",
Expand Down
11 changes: 7 additions & 4 deletions core/embedjs-interfaces/src/interfaces/base-embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
export interface BaseEmbeddings {
embedDocuments(texts: string[]): Promise<number[][]>;
embedQuery(text: string): Promise<number[]>;
getDimensions(): Promise<number>;
export abstract class BaseEmbeddings {
// eslint-disable-next-line @typescript-eslint/no-empty-function
public async init(): Promise<void> {}

public abstract embedDocuments(texts: string[]): Promise<number[][]>;
public abstract embedQuery(text: string): Promise<number[]>;
public abstract getDimensions(): Promise<number>;
}
4 changes: 2 additions & 2 deletions core/embedjs-utils/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-utils",
"version": "0.1.22",
"version": "0.1.23",
"description": "Useful util functions when extending the embedjs ecosystem",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22"
"@llm-tools/embedjs-interfaces": "0.1.23"
},
"type": "module",
"main": "./src/index.js",
Expand Down
6 changes: 3 additions & 3 deletions core/embedjs/package.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "module",
"name": "@llm-tools/embedjs",
"version": "0.1.22",
"version": "0.1.23",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-utils": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-utils": "0.1.23",
"debug": "^4.3.7",
"langchain": "^0.3.6",
"md5": "^2.3.0",
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs/src/core/rag-application.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ export class RAGApplication {

this.searchResultCount = llmBuilder.getSearchResultCount();
this.embeddingRelevanceCutOff = llmBuilder.getEmbeddingRelevanceCutOff();

RAGEmbedding.init(llmBuilder.getEmbeddingModel());
}

/**
Expand All @@ -57,6 +55,8 @@ export class RAGApplication {
* LLM based on the configuration provided
*/
public async init(llmBuilder: RAGApplicationBuilder) {
await RAGEmbedding.init(llmBuilder.getEmbeddingModel());

this.model = await this.getModel(llmBuilder.getModel());
if (!this.model) this.debug('No base model set; query function unavailable!');
else BaseModel.setDefaultTemperature(llmBuilder.getTemperature());
Expand Down
3 changes: 2 additions & 1 deletion core/embedjs/src/core/rag-embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import { Document } from 'langchain/document';
export class RAGEmbedding {
private static singleton: RAGEmbedding;

public static init(embeddingModel: BaseEmbeddings) {
public static async init(embeddingModel: BaseEmbeddings) {
if (!this.singleton) {
await embeddingModel.init();
this.singleton = new RAGEmbedding(embeddingModel);
}
}
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-astra/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-astradb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add AstraDB support to embedjs",
"dependencies": {
"@datastax/astra-db-ts": "^1.5.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-cosmos/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-cosmos",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add CosmosDB support to embedjs",
"dependencies": {
"@azure/cosmos": "^4.2.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-hnswlib/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-hnswlib",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add HNSWLib support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7",
"hnswlib-node": "^3.0.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lancedb/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-lancedb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add LanceDb support to embedjs",
"dependencies": {
"@lancedb/lancedb": "^0.13.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.3.7"
},
Expand Down
6 changes: 3 additions & 3 deletions databases/embedjs-libsql/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "@llm-tools/embedjs-libsql",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add LibSQL support to embedjs",
"dependencies": {
"@libsql/client": "^0.14.0",
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-utils": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-utils": "0.1.23",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
13 changes: 11 additions & 2 deletions databases/embedjs-libsql/src/libsql-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,17 @@ export class LibSqlStore implements BaseStore {
}

async loaderCustomSet<T extends Record<string, unknown>>(loaderId: string, key: string, value: T): Promise<void> {
this.debug(`LibSQL custom set '${key}' with values`, value);
await this.loaderCustomDelete(key);

await this.client.execute({
this.debug(`LibSQL custom set '${key}' insert started`);
const results = await this.client.execute({
sql: `INSERT INTO ${this.loadersCustomDataTableName} (key, loaderId, value)
VALUES (?, ?, ?)`,
args: [key, loaderId, JSON.stringify(value)],
});

this.debug(`LibSQL custom set for key '${key}' resulted in`, results.rows);
}

async loaderCustomGet<T extends Record<string, unknown>>(key: string): Promise<T> {
Expand All @@ -144,10 +148,15 @@ export class LibSqlStore implements BaseStore {
}

async loaderCustomDelete(key: string): Promise<void> {
await this.client.execute(`DELETE FROM ${this.loadersCustomDataTableName} WHERE key = '${key}';`);
this.debug(`LibSQL custom delete '${key}'`);
const results = await this.client.execute(
`DELETE FROM ${this.loadersCustomDataTableName} WHERE key = '${key}';`,
);
this.debug(`LibSQL custom delete for key '${key}' resulted in`, results.rowsAffected);
}

async deleteLoaderMetadataAndCustomValues(loaderId: string): Promise<void> {
this.debug(`LibSQL deleteLoaderMetadataAndCustomValues for loader '${loaderId}'`);
await this.client.execute(`DELETE FROM ${this.loadersTableName} WHERE id = '${loaderId}';`);
await this.client.execute(`DELETE FROM ${this.loadersCustomDataTableName} WHERE loaderId = '${loaderId}';`);
}
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lmdb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-lmdb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add LMDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7",
"lmdb": "^3.1.6"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-mongodb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-mongodb",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add MongoDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"debug": "^4.3.7",
"mongodb": "^6.11.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-pinecone/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-pinecone",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Pinecone support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@pinecone-database/pinecone": "^4.0.0",
"debug": "^4.3.7"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-qdrant/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-qdrant",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Qdrant support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@qdrant/js-client-rest": "^1.12.0",
"debug": "^4.3.7",
"uuid": "^11.0.3"
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-redis/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-redis",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Redis support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"ioredis": "^5.4.1"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-weaviate/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-weaviate",
"version": "0.1.22",
"version": "0.1.23",
"description": "Add Weaviate support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.3.7",
"weaviate-ts-client": "^2.2.0"
Expand Down
7 changes: 5 additions & 2 deletions examples/confluence/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import 'dotenv/config';
import path from 'node:path';
import { RAGApplicationBuilder } from '@llm-tools/embedjs';
import { LibSqlDb, LibSqlStore } from '@llm-tools/embedjs-libsql';
import { OpenAi, OpenAiEmbeddings } from '@llm-tools/embedjs-openai';
import { ConfluenceLoader } from '@llm-tools/embedjs-loader-confluence';
import { HNSWDb } from '@llm-tools/embedjs-hnswlib';

const databasePath = path.resolve('./examples/confluence/data.db');
const llmApplication = await new RAGApplicationBuilder()
.setStore(new LibSqlStore({ path: databasePath }))
.setVectorDatabase(new LibSqlDb({ path: databasePath }))
.setModel(new OpenAi({ modelName: 'gpt-4o' }))
.setEmbeddingModel(new OpenAiEmbeddings())
.setVectorDatabase(new HNSWDb())
.build();

await llmApplication.addLoader(new ConfluenceLoader({ spaceNames: ['DEMO'] }));
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-confluence/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-confluence",
"version": "0.1.22",
"version": "0.1.23",
"description": "Confluence loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-loader-web": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-loader-web": "0.1.23",
"confluence.js": "^1.7.4",
"debug": "^4.3.7",
"md5": "^2.3.0"
Expand Down
33 changes: 21 additions & 12 deletions loaders/embedjs-loader-confluence/src/confluence-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {

private async *processSpace(spaceKey: string) {
this.debug('Processing space', spaceKey);

try {
const spaceContent = await this.confluence.space.getContentForSpace({ spaceKey });
this.debug(`Confluence space '${spaceKey}' has '${spaceContent['page'].results.length}' root pages`);

for (const { id } of spaceContent['page'].results) {
for await (const result of this.processPage(id)) {
for (const { id, title } of spaceContent['page'].results) {
for await (const result of this.processPage(id, title)) {
yield result;
}
}
Expand All @@ -78,8 +79,10 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
}
}

private async *processPage(pageId: string) {
private async *processPage(pageId: string, title: string) {
this.debug('Processing page', title);
let confluenceVersion = 0;

try {
const spaceProperties = await this.confluence.content.getContentById({
id: pageId,
Expand All @@ -89,29 +92,29 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
if (!spaceProperties.version.number) throw new Error('Version number not found in space properties...');
confluenceVersion = spaceProperties.version.number;
} catch (e) {
this.debug('Could not get page properties. Page will be SKIPPED!', pageId, e.response);
this.debug('Could not get page properties. Page will be SKIPPED!', title, e.response);
return;
}

let doProcess = false;
if (!(await this.checkInCache(pageId))) {
this.debug(`Processing '${pageId}' for the FIRST time...`);
this.debug(`Processing '${title}' for the FIRST time...`);
doProcess = true;
} else {
const cacheVersion = (await this.getFromCache(pageId)).version;
if (cacheVersion !== confluenceVersion) {
this.debug(
`For page '${pageId}' - version in cache is ${cacheVersion} and confluence version is ${confluenceVersion}. This page will be PROCESSED.`,
`For page '${title}' - version in cache is ${cacheVersion} and confluence version is ${confluenceVersion}. This page will be PROCESSED.`,
);
doProcess = true;
} else
this.debug(
`For page '${pageId}' - version in cache and confluence are the same ${confluenceVersion}. This page will be SKIPPED.`,
`For page '${title}' - version in cache and confluence are the same ${confluenceVersion}. This page will be SKIPPED.`,
);
}

if (!doProcess) {
this.debug(`Skipping page '${pageId}'`);
this.debug(`Skipping page '${title}'`);
return;
}

Expand All @@ -126,21 +129,27 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
return;
}

this.debug(`Processing content for page '${title}'...`);
for await (const result of this.getContentChunks(content.body.view.value, content._links.webui)) {
yield result;
}

await this.saveToCache(pageId, { version: confluenceVersion });

if (content.children) {
for (const { id } of content.children.page.results) {
for await (const result of this.processPage(id)) {
yield result;
for (const { id, title } of content.children.page.results) {
try {
for await (const result of this.processPage(id, title)) {
yield result;
}
} catch (e) {
this.debug(`Error! Could not process page child '${title}'`, pageId, e);
return;
}
}
}
} catch (e) {
this.debug('Error! Could not process page content or children', pageId, e);
this.debug('Error! Could not process page content', pageId, e);
return;
}
}
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-csv/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-csv",
"version": "0.1.22",
"version": "0.1.23",
"description": "CSV loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.22",
"@llm-tools/embedjs-utils": "0.1.22",
"@llm-tools/embedjs-interfaces": "0.1.23",
"@llm-tools/embedjs-utils": "0.1.23",
"csv-parse": "^5.6.0",
"debug": "^4.3.7",
"md5": "^2.3.0"
Expand Down
Loading

0 comments on commit 0ee88b5

Please sign in to comment.