-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #143 from llm-tools/loaders
Dynamic loader fix
- Loading branch information
Showing
76 changed files
with
1,253 additions
and
1,347 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,4 +26,4 @@ jobs: | |
- name: Install dependencies | ||
run: npm ci | ||
- name: Test build | ||
run: npm run build:esm | ||
run: npm run build:ci |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,45 @@ | ||
import { getMimeType } from 'stream-mime-type'; | ||
import createDebugMessages from 'debug'; | ||
import axios from 'axios'; | ||
import md5 from 'md5'; | ||
|
||
import { contentTypeToMimeType, truncateCenterString } from '@llm-tools/embedjs-utils'; | ||
import { BaseLoader } from '@llm-tools/embedjs-interfaces'; | ||
import { truncateCenterString } from '@llm-tools/embedjs-utils'; | ||
import { createLoaderFromMimeType } from '../util/mime.js'; | ||
|
||
export class UrlLoader extends BaseLoader<{ type: 'UrlLoader' }> { | ||
private readonly debug = createDebugMessages('embedjs:loader:UrlLoader'); | ||
private readonly url: string; | ||
private readonly url: URL; | ||
|
||
constructor({ url }: { url: string }) { | ||
super(`UrlLoader_${md5(url)}`, { url: truncateCenterString(url, 50) }); | ||
this.url = url; | ||
this.url = new URL(url); | ||
this.debug(`UrlLoader verified '${url}' is a valid URL!`); | ||
} | ||
|
||
override async *getUnfilteredChunks() { | ||
this.debug('Loader is a valid URL!'); | ||
const stream = (await axios.get(this.url, { responseType: 'stream' })).data; | ||
const { mime } = await getMimeType(stream); | ||
this.debug(`Loader type detected as '${mime}'`); | ||
stream.destroy(); | ||
const response = await fetch(this.url, { headers: { 'Accept-Encoding': '' } }); | ||
const stream = response.body as unknown as NodeJS.ReadableStream; | ||
let { mime } = await getMimeType(stream, { strict: true }); | ||
this.debug(`Loader stream detected type '${mime}'`); | ||
|
||
const loader = await createLoaderFromMimeType(this.url, mime); | ||
for await (const result of await loader.getUnfilteredChunks()) { | ||
yield { | ||
pageContent: result.pageContent, | ||
metadata: { | ||
type: <const>'UrlLoader', | ||
source: this.url, | ||
}, | ||
}; | ||
if (!mime) { | ||
mime = contentTypeToMimeType(response.headers.get('content-type')); | ||
this.debug(`Using type '${mime}' from content-type header`); | ||
} | ||
|
||
try { | ||
const loader = await createLoaderFromMimeType(this.url.href, mime); | ||
for await (const result of await loader.getUnfilteredChunks()) { | ||
yield { | ||
pageContent: result.pageContent, | ||
metadata: { | ||
type: <const>'UrlLoader', | ||
source: this.url.href, | ||
}, | ||
}; | ||
} | ||
} catch (err) { | ||
this.debug(`Error creating loader for mime type '${mime}'`, err); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.