diff --git a/src/config.ts b/src/config.ts index 7e5f5fbf..e5230b29 100644 --- a/src/config.ts +++ b/src/config.ts @@ -37,10 +37,18 @@ export const configSchema = z.object({ outputFileName: z.string(), /** Optional cookie to be set. E.g. for Cookie Consent */ cookie: z - .object({ - name: z.string(), - value: z.string(), - }) + .union([ + z.object({ + name: z.string(), + value: z.string(), + }), + z.array( + z.object({ + name: z.string(), + value: z.string(), + }), + ), + ]) .optional(), /** Optional function to run for each page found */ onVisitPage: z diff --git a/src/core.ts b/src/core.ts index 8e03bbe5..93179b0d 100644 --- a/src/core.ts +++ b/src/core.ts @@ -55,16 +55,6 @@ export async function crawl(config: Config) { const crawler = new PlaywrightCrawler({ // Use the requestHandler to process each of the crawled pages. async requestHandler({ request, page, enqueueLinks, log, pushData }) { - if (config.cookie) { - // Set the cookie for the specific URL - const cookie = { - name: config.cookie.name, - value: config.cookie.value, - url: request.loadedUrl, - }; - await page.context().addCookies([cookie]); - } - const title = await page.title(); pageCounter++; log.info( @@ -108,12 +98,24 @@ export async function crawl(config: Config) { // headless: false, preNavigationHooks: [ // Abort requests for certain resource types - async ({ page, log }) => { + async ({ request, page, log }) => { // If there are no resource exclusions, return const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? []; if (RESOURCE_EXCLUSTIONS.length === 0) { return; } + if (config.cookie) { + const cookies = ( + Array.isArray(config.cookie) ? config.cookie : [config.cookie] + ).map((cookie) => { + return { + name: cookie.name, + value: cookie.value, + url: request.loadedUrl, + }; + }); + await page.context().addCookies(cookies); + } await page.route(`**\/*.{${RESOURCE_EXCLUSTIONS.join()}}`, (route) => route.abort("aborted"), );