-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #117 from llm-tools/monorepo
Addition of new examples, re-add localPath and URL loaders, rename astra to astradb
- Loading branch information
Showing
18 changed files
with
506 additions
and
256 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
import { RAGApplication } from './core/rag-application.js'; | ||
import { RAGApplicationBuilder } from './core/rag-application-builder.js'; | ||
import { LocalPathLoader } from './loaders/local-path-loader.js'; | ||
import { TextLoader } from './loaders/text-loader.js'; | ||
import { JsonLoader } from './loaders/json-loader.js'; | ||
import { UrlLoader } from './loaders/url-loader.js'; | ||
|
||
export { RAGApplication, RAGApplicationBuilder, TextLoader, JsonLoader }; | ||
export { RAGApplication, RAGApplicationBuilder, TextLoader, JsonLoader, LocalPathLoader, UrlLoader }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import { getMimeType } from 'stream-mime-type'; | ||
import createDebugMessages from 'debug'; | ||
import path from 'node:path'; | ||
import fs from 'node:fs'; | ||
import md5 from 'md5'; | ||
|
||
import { createLoaderFromMimeType } from '../util/mime.js'; | ||
import { BaseLoader, UnfilteredLoaderChunk } from '@llm-tools/embedjs-interfaces'; | ||
|
||
export class LocalPathLoader extends BaseLoader<{ type: 'LocalPathLoader' }> { | ||
private readonly debug = createDebugMessages('embedjs:loader:LocalPathLoader'); | ||
private readonly path: string; | ||
|
||
constructor({ path }: { path: string }) { | ||
super(`LocalPathLoader_${md5(path)}`, { path }); | ||
this.path = path; | ||
} | ||
|
||
override async *getUnfilteredChunks() { | ||
for await (const result of await this.recursivelyAddPath(this.path)) { | ||
yield { | ||
...result, | ||
metadata: { | ||
...result.metadata, | ||
type: <const>'LocalPathLoader', | ||
originalPath: this.path, | ||
}, | ||
}; | ||
} | ||
} | ||
|
||
private async *recursivelyAddPath(currentPath: string): AsyncGenerator<UnfilteredLoaderChunk, void, void> { | ||
const isDir = fs.lstatSync(currentPath).isDirectory(); | ||
this.debug(`Processing path '${currentPath}'. It is a ${isDir ? 'Directory!' : 'file...'}`); | ||
|
||
if (!isDir) { | ||
const stream = fs.createReadStream(currentPath); | ||
const { mime } = await getMimeType(stream); | ||
this.debug(`File '${this.path}' has mime type '${mime}'`); | ||
stream.destroy(); | ||
|
||
const loader = await createLoaderFromMimeType(currentPath, mime); | ||
for await (const result of await loader.getUnfilteredChunks()) { | ||
yield { | ||
pageContent: result.pageContent, | ||
metadata: { | ||
source: currentPath, | ||
}, | ||
}; | ||
} | ||
} else { | ||
const files = fs.readdirSync(currentPath); | ||
this.debug(`Dir '${currentPath}' has ${files.length} entries inside`, files); | ||
|
||
for (const file of files) { | ||
for await (const result of await this.recursivelyAddPath(path.resolve(currentPath, file))) { | ||
yield result; | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import { getMimeType } from 'stream-mime-type'; | ||
import createDebugMessages from 'debug'; | ||
import axios from 'axios'; | ||
import md5 from 'md5'; | ||
|
||
import { BaseLoader } from '@llm-tools/embedjs-interfaces'; | ||
import { truncateCenterString } from '@llm-tools/embedjs-utils'; | ||
import { createLoaderFromMimeType } from '../util/mime.js'; | ||
|
||
export class UrlLoader extends BaseLoader<{ type: 'UrlLoader' }> { | ||
private readonly debug = createDebugMessages('embedjs:loader:UrlLoader'); | ||
private readonly url: string; | ||
|
||
constructor({ url }: { url: string }) { | ||
super(`UrlLoader_${md5(url)}`, { url: truncateCenterString(url, 50) }); | ||
this.url = url; | ||
} | ||
|
||
override async *getUnfilteredChunks() { | ||
this.debug('Loader is a valid URL!'); | ||
const stream = (await axios.get(this.url, { responseType: 'stream' })).data; | ||
const { mime } = await getMimeType(stream); | ||
this.debug(`Loader type detected as '${mime}'`); | ||
stream.destroy(); | ||
|
||
const loader = await createLoaderFromMimeType(this.url, mime); | ||
for await (const result of await loader.getUnfilteredChunks()) { | ||
yield { | ||
pageContent: result.pageContent, | ||
metadata: { | ||
type: <const>'UrlLoader', | ||
source: this.url, | ||
}, | ||
}; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import mime from 'mime'; | ||
import createDebugMessages from 'debug'; | ||
import { BaseLoader } from '@llm-tools/embedjs-interfaces'; | ||
import { TextLoader } from '../loaders/text-loader.js'; | ||
|
||
export async function createLoaderFromMimeType(loader: string, mimeType: string): Promise<BaseLoader> { | ||
switch (mimeType) { | ||
case 'application/msword': | ||
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': { | ||
const { DocxLoader } = await import('@llm-tools/embedjs-loader-msoffice').catch(() => { | ||
throw new Error( | ||
'Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load docx files', | ||
); | ||
}); | ||
return new DocxLoader({ filePathOrUrl: loader }); | ||
} | ||
case 'application/vnd.ms-excel': | ||
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': { | ||
const { ExcelLoader } = await import('@llm-tools/embedjs-loader-msoffice').catch(() => { | ||
throw new Error( | ||
'Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load excel files', | ||
); | ||
}); | ||
return new ExcelLoader({ filePathOrUrl: loader }); | ||
} | ||
case 'application/pdf': { | ||
const { PdfLoader } = await import('@llm-tools/embedjs-loader-pdf').catch(() => { | ||
throw new Error('Package `@llm-tools/embedjs-loader-pdf` needs to be installed to load PDF files'); | ||
}); | ||
return new PdfLoader({ filePathOrUrl: loader }); | ||
} | ||
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation': { | ||
const { PptLoader } = await import('@llm-tools/embedjs-loader-msoffice').catch(() => { | ||
throw new Error( | ||
'Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load pptx files', | ||
); | ||
}); | ||
return new PptLoader({ filePathOrUrl: loader }); | ||
} | ||
case 'text/plain': { | ||
const fineType = mime.getType(loader); | ||
createDebugMessages('embedjs:createLoaderFromMimeType')(`Fine type for '${loader}' is '${fineType}'`); | ||
if (fineType === 'text/csv') { | ||
const { CsvLoader } = await import('@llm-tools/embedjs-loader-csv').catch(() => { | ||
throw new Error('Package `@llm-tools/embedjs-loader-csv` needs to be installed to load csv files'); | ||
}); | ||
return new CsvLoader({ filePathOrUrl: loader }); | ||
} else return new TextLoader({ text: loader }); | ||
} | ||
case 'application/csv': { | ||
const { CsvLoader } = await import('@llm-tools/embedjs-loader-csv').catch(() => { | ||
throw new Error('Package `@llm-tools/embedjs-loader-csv` needs to be installed to load csv files'); | ||
}); | ||
return new CsvLoader({ filePathOrUrl: loader }); | ||
} | ||
case 'text/html': { | ||
const { WebLoader } = await import('@llm-tools/embedjs-loader-web').catch(() => { | ||
throw new Error('Package `@llm-tools/embedjs-loader-web` needs to be installed to load web documents'); | ||
}); | ||
return new WebLoader({ urlOrContent: loader }); | ||
} | ||
case 'text/xml': { | ||
const { SitemapLoader } = await import('@llm-tools/embedjs-loader-sitemap').catch(() => { | ||
throw new Error('Package `@llm-tools/embedjs-loader-sitemap` needs to be installed to load sitemaps'); | ||
}); | ||
if (await SitemapLoader.test(loader)) { | ||
return new SitemapLoader({ url: loader }); | ||
} | ||
throw new SyntaxError(`No processor found for generic xml`); | ||
} | ||
default: | ||
throw new SyntaxError(`Unknown mime type '${mimeType}'`); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import baseConfig from '../../eslint.config.js'; | ||
|
||
export default [...baseConfig]; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"name": "@llm-tools/embedjs-examples-pinecone", | ||
"version": "0.1.1", | ||
"type": "module", | ||
"dependencies": { | ||
"dotenv": "^16.4.5" | ||
}, | ||
"scripts": { | ||
"start": "nx run examples-pinecone:serve" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
{ | ||
"name": "examples-pinecone", | ||
"$schema": "../../node_modules/nx/schemas/project-schema.json", | ||
"sourceRoot": "examples/pinecone/src", | ||
"projectType": "application", | ||
"tags": [], | ||
"targets": { | ||
"build": { | ||
"executor": "@nx/esbuild:esbuild", | ||
"outputs": ["{options.outputPath}"], | ||
"defaultConfiguration": "development", | ||
"options": { | ||
"platform": "node", | ||
"outputPath": "dist/examples/pinecone", | ||
"format": ["esm"], | ||
"bundle": true, | ||
"main": "examples/pinecone/src/main.ts", | ||
"tsConfig": "examples/pinecone/tsconfig.app.json", | ||
"generatePackageJson": false, | ||
"esbuildOptions": { | ||
"sourcemap": true, | ||
"outExtension": { | ||
".js": ".js" | ||
} | ||
} | ||
}, | ||
"configurations": { | ||
"development": {}, | ||
"production": { | ||
"esbuildOptions": { | ||
"sourcemap": false, | ||
"outExtension": { | ||
".js": ".js" | ||
} | ||
} | ||
} | ||
} | ||
}, | ||
"serve": { | ||
"executor": "@nx/js:node", | ||
"defaultConfiguration": "development", | ||
"dependsOn": ["build"], | ||
"options": { | ||
"buildTarget": "examples-pinecone:build", | ||
"runBuildTargetDependencies": true | ||
}, | ||
"configurations": { | ||
"development": { | ||
"buildTarget": "examples-pinecone:build:development" | ||
}, | ||
"production": { | ||
"buildTarget": "examples-pinecone:build:production" | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import { RAGApplicationBuilder } from '@llm-tools/embedjs'; | ||
import { PineconeDb } from '@llm-tools/embedjs-pinecone'; | ||
import { WebLoader } from 'loaders/embedjs-loader-web/src/web-loader.js'; | ||
|
||
const llmApplication = await new RAGApplicationBuilder() | ||
.setVectorDb( | ||
new PineconeDb({ | ||
projectName: 'test', | ||
namespace: 'dev', | ||
indexSpec: { | ||
pod: { | ||
podType: 'p1.x1', | ||
environment: 'us-east1-gcp', | ||
}, | ||
}, | ||
}), | ||
) | ||
.build(); | ||
|
||
await llmApplication.addLoader(new WebLoader({ urlOrContent: 'https://en.wikipedia.org/wiki/Tesla,_Inc.' })); | ||
|
||
console.log(await llmApplication.query('Who founded Tesla?')); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"extends": "./tsconfig.json", | ||
"compilerOptions": { | ||
"outDir": "../../dist/out-tsc", | ||
"types": ["node"] | ||
}, | ||
"exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], | ||
"include": ["src/**/*.ts"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"extends": "../../tsconfig.base.json", | ||
"files": [], | ||
"include": [], | ||
"references": [ | ||
{ | ||
"path": "./tsconfig.app.json" | ||
} | ||
], | ||
"compilerOptions": { | ||
"esModuleInterop": true, | ||
"target": "ES2022", | ||
"lib": ["ES2022", "ES2022.Object"], | ||
"module": "NodeNext", | ||
"moduleResolution": "nodenext" | ||
} | ||
} |
Oops, something went wrong.