Skip to content

Commit

Permalink
Merge pull request #184 from llm-tools/confluence_updates
Browse files Browse the repository at this point in the history
Confluence updates
  • Loading branch information
adhityan authored Dec 19, 2024
2 parents 84d6be8 + 5b7b3ba commit 519c1c2
Show file tree
Hide file tree
Showing 35 changed files with 315 additions and 299 deletions.
4 changes: 2 additions & 2 deletions core/embedjs-interfaces/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-interfaces",
"version": "0.1.24",
"version": "0.1.25",
"description": "Interfaces for extending the embedjs ecosystem",
"dependencies": {
"@langchain/core": "^0.3.23",
"@langchain/core": "^0.3.25",
"debug": "^4.4.0",
"md5": "^2.3.0",
"uuid": "^11.0.3"
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs-utils/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-utils",
"version": "0.1.24",
"version": "0.1.25",
"description": "Useful util functions when extending the embedjs ecosystem",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24"
"@llm-tools/embedjs-interfaces": "0.1.25"
},
"type": "module",
"main": "./src/index.js",
Expand Down
8 changes: 4 additions & 4 deletions core/embedjs/package.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"type": "module",
"name": "@llm-tools/embedjs",
"version": "0.1.24",
"version": "0.1.25",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-utils": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-utils": "0.1.25",
"debug": "^4.4.0",
"langchain": "^0.3.7",
"md5": "^2.3.0",
"mime": "^4.0.4",
"mime": "^4.0.6",
"stream-mime-type": "^2.0.0"
},
"devDependencies": {
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-astra/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-astradb",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add AstraDB support to embedjs",
"dependencies": {
"@datastax/astra-db-ts": "^1.5.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"debug": "^4.4.0"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-cosmos/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-cosmos",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add CosmosDB support to embedjs",
"dependencies": {
"@azure/cosmos": "^4.2.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"debug": "^4.4.0"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-hnswlib/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-hnswlib",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add HNSWLib support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"debug": "^4.4.0",
"hnswlib-node": "^3.0.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lancedb/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-lancedb",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add LanceDb support to embedjs",
"dependencies": {
"@lancedb/lancedb": "^0.14.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.4.0"
},
Expand Down
6 changes: 3 additions & 3 deletions databases/embedjs-libsql/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "@llm-tools/embedjs-libsql",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add LibSQL support to embedjs",
"dependencies": {
"@libsql/client": "^0.14.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-utils": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-utils": "0.1.25",
"debug": "^4.4.0"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lmdb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-lmdb",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add LMDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"debug": "^4.4.0",
"lmdb": "^3.2.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-mongodb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-mongodb",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add MongoDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"debug": "^4.4.0",
"mongodb": "^6.12.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-pinecone/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-pinecone",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add Pinecone support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@pinecone-database/pinecone": "^4.0.0",
"debug": "^4.4.0"
},
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-qdrant/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-qdrant",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add Qdrant support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@qdrant/js-client-rest": "^1.12.0",
"debug": "^4.4.0",
"uuid": "^11.0.3"
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-redis/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-redis",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add Redis support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"ioredis": "^5.4.1"
},
"type": "module",
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-weaviate/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-weaviate",
"version": "0.1.24",
"version": "0.1.25",
"description": "Add Weaviate support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.4.0",
"weaviate-ts-client": "^2.2.0"
Expand Down
2 changes: 1 addition & 1 deletion examples/confluence/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ const llmApplication = await new RAGApplicationBuilder()
.setEmbeddingModel(new OpenAiEmbeddings())
.build();

await llmApplication.addLoader(new ConfluenceLoader({ spaceNames: ['DEMO'] }));
await llmApplication.addLoader(new ConfluenceLoader({ spaceName: 'DEMO' }));
console.log(await llmApplication.query('Who founded Tesla?'));
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-confluence/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-confluence",
"version": "0.1.24",
"version": "0.1.25",
"description": "Confluence loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-loader-web": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-loader-web": "0.1.25",
"confluence.js": "^1.7.4",
"debug": "^4.4.0",
"md5": "^2.3.0"
Expand Down
60 changes: 36 additions & 24 deletions loaders/embedjs-loader-confluence/src/confluence-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,34 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {

private readonly confluence: ConfluenceClient;
private readonly confluenceBaseUrl: string;
private readonly spaceNames: string[];
private readonly spaceName: string;

private readonly lastUpdatedFilter?: Date;

constructor({
spaceNames,
spaceName,
confluenceBaseUrl,
confluenceUsername,
confluenceToken,
chunkSize,
chunkOverlap,
options,
}: {
spaceNames: [string, ...string[]];
spaceName: string;
confluenceBaseUrl?: string;
confluenceUsername?: string;
confluenceToken?: string;
chunkSize?: number;
chunkOverlap?: number;
options?: {
lastUpdatedFilter: Date;
};
}) {
super(
`ConfluenceLoader_${md5(spaceNames.sort().join(','))}`,
{ spaceNames },
chunkSize ?? 2000,
chunkOverlap ?? 200,
);

this.spaceNames = spaceNames;
super(`ConfluenceLoader_${md5(spaceName)}`, { spaceName }, chunkSize ?? 2000, chunkOverlap ?? 200);

this.spaceName = spaceName;
this.confluenceBaseUrl = confluenceBaseUrl ?? process.env.CONFLUENCE_BASE_URL;
this.lastUpdatedFilter = options?.lastUpdatedFilter ?? null;

this.confluence = new ConfluenceClient({
host: this.confluenceBaseUrl,
Expand All @@ -49,16 +51,13 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
}

override async *getUnfilteredChunks() {
for (const spaceKey of this.spaceNames) {
let count = 0;

for await (const result of this.processSpace(spaceKey)) {
yield result;
count++;
}

this.debug(`Space '${spaceKey}' had ${count} new pages`);
let count = 0;
for await (const result of this.processSpace(this.spaceName)) {
yield result;
count++;
}

this.debug(`Space '${this.spaceName}' had ${count} new pages`);
}

private async *processSpace(spaceKey: string) {
Expand All @@ -84,13 +83,26 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, {
let confluenceVersion = 0;

try {
const spaceProperties = await this.confluence.content.getContentById({
const pageProperties = await this.confluence.content.getContentById({
id: pageId,
expand: ['version'],
expand: ['version', 'history'],
});

if (!spaceProperties.version.number) throw new Error('Version number not found in space properties...');
confluenceVersion = spaceProperties.version.number;
if (this.lastUpdatedFilter) {
const pageLastEditDate = new Date(pageProperties.history.lastUpdated.when);

if (pageLastEditDate > this.lastUpdatedFilter) {
this.debug(`Page '${title}' has last edit date ${pageLastEditDate}. Continuing...`);
} else {
this.debug(
`Page '${title}' has last edit date ${pageLastEditDate}, which is less than filter date. Skipping...`,
);
return;
}
}

if (!pageProperties.version.number) throw new Error('Version number not found in page properties...');
confluenceVersion = pageProperties.version.number;
} catch (e) {
this.debug('Could not get page properties. Page will be SKIPPED!', title, e.response);
return;
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-csv/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-csv",
"version": "0.1.24",
"version": "0.1.25",
"description": "CSV loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-utils": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-utils": "0.1.25",
"csv-parse": "^5.6.0",
"debug": "^4.4.0",
"md5": "^2.3.0"
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-markdown/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-markdown",
"version": "0.1.24",
"version": "0.1.25",
"description": "XML loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-loader-web": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-loader-web": "0.1.25",
"debug": "^4.4.0",
"md5": "^2.3.0",
"micromark": "^4.0.1",
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-msoffice/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "@llm-tools/embedjs-loader-msoffice",
"version": "0.1.24",
"version": "0.1.25",
"description": "Word, PPT and Excel loader for embedjs",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-utils": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-utils": "0.1.25",
"md5": "^2.3.0",
"office-text-extractor": "^3.0.3"
},
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-pdf/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "@llm-tools/embedjs-loader-pdf",
"version": "0.1.24",
"version": "0.1.25",
"description": "PDF loader for embedjs",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-utils": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-utils": "0.1.25",
"md5": "^2.3.0",
"office-text-extractor": "^3.0.3"
},
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-sitemap/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-loader-sitemap",
"version": "0.1.24",
"version": "0.1.25",
"description": "Sitemap recursive loader for embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-loader-web": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-loader-web": "0.1.25",
"debug": "^4.4.0",
"md5": "^2.3.0",
"sitemapper": "^3.2.18"
Expand Down
6 changes: 3 additions & 3 deletions loaders/embedjs-loader-web/package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "@llm-tools/embedjs-loader-web",
"version": "0.1.24",
"version": "0.1.25",
"description": "Web page loader for embedjs",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.24",
"@llm-tools/embedjs-utils": "0.1.24",
"@llm-tools/embedjs-interfaces": "0.1.25",
"@llm-tools/embedjs-utils": "0.1.25",
"debug": "^4.4.0",
"html-to-text": "^9.0.5",
"md5": "^2.3.0"
Expand Down
Loading

0 comments on commit 519c1c2

Please sign in to comment.