Skip to content

Commit

Permalink
fix: rm llama processor
Browse files Browse the repository at this point in the history
  • Loading branch information
CahidArda committed Oct 3, 2024
1 parent 5b804cf commit 59975f7
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 58 deletions.
10 changes: 4 additions & 6 deletions src/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ import type { UnstructuredLoaderOptions } from "@langchain/community/document_lo
export type FilePath = string;
export type URL = string;

export type ProcessorType =
| {
name: "unstructured";
options: UnstructuredLoaderOptions;
}
| { name: "llama-parse"; options: unknown };
export type ProcessorType = {
name: "unstructured";
options: UnstructuredLoaderOptions;
};

export type DatasWithFileSource =
| {
Expand Down
97 changes: 45 additions & 52 deletions src/file-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,58 +75,51 @@ export class FileDataLoader {
// Without this check typescript complains about types because of unions
if (!hasProcessor(this.config)) throw new Error("Only processors are allowed");

switch (this.config.processor.name) {
case "unstructured": {
const client = new UnstructuredClient({
serverURL: "https://api.unstructuredapp.io",
security: {
apiKeyAuth: this.config.processor.options.apiKey,
},
});

//@ts-expect-error TS can't pick up the correct type due to complex union
const fileData = await Bun.file(this.config.fileSource).text();
const response = await client.general.partition({
//@ts-expect-error Will be fixed soon
partitionParameters: {
files: {
content: fileData,
//@ts-expect-error TS can't pick up the correct type due to complex union
fileName: this.config.fileSource,
},
...this.config.processor.options,
},
});
const elements = response.elements?.filter(
(element) => typeof element.text === "string"
) as Element[];

return {
// eslint-disable-next-line @typescript-eslint/require-await
load: async (): Promise<Document[]> => {
const documents: Document[] = [];
for (const element of elements) {
const { metadata, text } = element;
if (typeof text === "string" && text !== "") {
documents.push(
new Document({
pageContent: text,
metadata: {
...metadata,
category: element.type,
},
})
);
}
}
return documents;
},
};
}
case "llama-parse": {
throw new Error("llama-parse has been deprecated in @upstash/rag-chat 2.0.0.");
}
}
const client = new UnstructuredClient({
serverURL: "https://api.unstructuredapp.io",
security: {
apiKeyAuth: this.config.processor.options.apiKey,
},
});

//@ts-expect-error TS can't pick up the correct type due to complex union
const fileData = await Bun.file(this.config.fileSource).text();
const response = await client.general.partition({
//@ts-expect-error Will be fixed soon
partitionParameters: {
files: {
content: fileData,
//@ts-expect-error TS can't pick up the correct type due to complex union
fileName: this.config.fileSource,
},
...this.config.processor.options,
},
});
const elements = response.elements?.filter(
(element) => typeof element.text === "string"
) as Element[];

return {
// eslint-disable-next-line @typescript-eslint/require-await
load: async (): Promise<Document[]> => {
const documents: Document[] = [];
for (const element of elements) {
const { metadata, text } = element;
if (typeof text === "string" && text !== "") {
documents.push(
new Document({
pageContent: text,
metadata: {
...metadata,
category: element.type,
},
})
);
}
}
return documents;
},
};
}

private isURL(source: FilePath | Blob): source is URL {
Expand Down

0 comments on commit 59975f7

Please sign in to comment.