diff --git a/README.md b/README.md index 43bfe4c7..180e0b9c 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ type Config = { url: string; /** Pattern to match against for links on a page to subsequently crawl */ match: string; + /** Optional Selector to limit grabbing the links from */ + matchSelector?: string; /** Selector to grab the inner text from */ selector: string; /** Don't crawl more than this many pages */ diff --git a/src/config.ts b/src/config.ts index 7e5f5fbf..0db1e317 100644 --- a/src/config.ts +++ b/src/config.ts @@ -18,7 +18,12 @@ export const configSchema = z.object({ * @default "" */ match: z.string().or(z.array(z.string())), - + /** + * Selector to grab links from + * @example "li > a.block" + * @default "" + */ + matchSelector: z.string().optional(), /** * Selector to grab the inner text from * @example ".docs-builder-container" diff --git a/src/core.ts b/src/core.ts index 8e03bbe5..08f7a3d8 100644 --- a/src/core.ts +++ b/src/core.ts @@ -100,6 +100,7 @@ export async function crawl(config: Config) { await enqueueLinks({ globs: typeof config.match === "string" ? [config.match] : config.match, + selector: config.matchSelector, }); }, // Comment this option to scrape the full website.