diff --git a/src/config.ts b/src/config.ts index e62340aa..e5230b29 100644 --- a/src/config.ts +++ b/src/config.ts @@ -39,14 +39,16 @@ export const configSchema = z.object({ cookie: z .union([ z.object({ - name: z.string(), - value: z.string(), + name: z.string(), + value: z.string(), }), - z.array(z.object({ - name: z.string(), - value: z.string(), - })), - ]) + z.array( + z.object({ + name: z.string(), + value: z.string(), + }), + ), + ]) .optional(), /** Optional function to run for each page found */ onVisitPage: z diff --git a/src/core.ts b/src/core.ts index 1f2e8f90..93179b0d 100644 --- a/src/core.ts +++ b/src/core.ts @@ -55,18 +55,6 @@ export async function crawl(config: Config) { const crawler = new PlaywrightCrawler({ // Use the requestHandler to process each of the crawled pages. async requestHandler({ request, page, enqueueLinks, log, pushData }) { - if (config.cookie) { - const cookies = (Array.isArray(config.cookie) ? config.cookie : [config.cookie]) - .map((cookie)=>{ - return { - name:cookie.name, - value:cookie.value, - url:request.loadedUrl - } - }); - await page.context().addCookies(cookies); - } - const title = await page.title(); pageCounter++; log.info( @@ -110,12 +98,24 @@ export async function crawl(config: Config) { // headless: false, preNavigationHooks: [ // Abort requests for certain resource types - async ({ page, log }) => { + async ({ request, page, log }) => { // If there are no resource exclusions, return const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? []; if (RESOURCE_EXCLUSTIONS.length === 0) { return; } + if (config.cookie) { + const cookies = ( + Array.isArray(config.cookie) ? config.cookie : [config.cookie] + ).map((cookie) => { + return { + name: cookie.name, + value: cookie.value, + url: request.loadedUrl, + }; + }); + await page.context().addCookies(cookies); + } await page.route(`**\/*.{${RESOURCE_EXCLUSTIONS.join()}}`, (route) => route.abort("aborted"), );