From 247b87bbfea1f2a5e8200f76eeeb32abd1f4d7f6 Mon Sep 17 00:00:00 2001 From: gongzhenxing Date: Wed, 20 Dec 2023 18:51:54 +0800 Subject: [PATCH 1/2] Support for multiple cookies --- src/config.ts | 14 ++++++++++---- src/core.ts | 16 +++++++++------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/config.ts b/src/config.ts index 7e5f5fbf..e62340aa 100644 --- a/src/config.ts +++ b/src/config.ts @@ -37,10 +37,16 @@ export const configSchema = z.object({ outputFileName: z.string(), /** Optional cookie to be set. E.g. for Cookie Consent */ cookie: z - .object({ - name: z.string(), - value: z.string(), - }) + .union([ + z.object({ + name: z.string(), + value: z.string(), + }), + z.array(z.object({ + name: z.string(), + value: z.string(), + })), + ]) .optional(), /** Optional function to run for each page found */ onVisitPage: z diff --git a/src/core.ts b/src/core.ts index 8e03bbe5..1f2e8f90 100644 --- a/src/core.ts +++ b/src/core.ts @@ -56,13 +56,15 @@ export async function crawl(config: Config) { // Use the requestHandler to process each of the crawled pages. async requestHandler({ request, page, enqueueLinks, log, pushData }) { if (config.cookie) { - // Set the cookie for the specific URL - const cookie = { - name: config.cookie.name, - value: config.cookie.value, - url: request.loadedUrl, - }; - await page.context().addCookies([cookie]); + const cookies = (Array.isArray(config.cookie) ? config.cookie : [config.cookie]) + .map((cookie)=>{ + return { + name:cookie.name, + value:cookie.value, + url:request.loadedUrl + } + }); + await page.context().addCookies(cookies); } const title = await page.title(); From 581cc7706c2c65485d0aa36853d9c0b97e4d48c0 Mon Sep 17 00:00:00 2001 From: gongzhenxing Date: Mon, 25 Dec 2023 09:46:43 +0800 Subject: [PATCH 2/2] Set cookies in advance and modify code style --- src/config.ts | 16 +++++++++------- src/core.ts | 26 +++++++++++++------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/config.ts b/src/config.ts index e62340aa..e5230b29 100644 --- a/src/config.ts +++ b/src/config.ts @@ -39,14 +39,16 @@ export const configSchema = z.object({ cookie: z .union([ z.object({ - name: z.string(), - value: z.string(), + name: z.string(), + value: z.string(), }), - z.array(z.object({ - name: z.string(), - value: z.string(), - })), - ]) + z.array( + z.object({ + name: z.string(), + value: z.string(), + }), + ), + ]) .optional(), /** Optional function to run for each page found */ onVisitPage: z diff --git a/src/core.ts b/src/core.ts index 1f2e8f90..93179b0d 100644 --- a/src/core.ts +++ b/src/core.ts @@ -55,18 +55,6 @@ export async function crawl(config: Config) { const crawler = new PlaywrightCrawler({ // Use the requestHandler to process each of the crawled pages. async requestHandler({ request, page, enqueueLinks, log, pushData }) { - if (config.cookie) { - const cookies = (Array.isArray(config.cookie) ? config.cookie : [config.cookie]) - .map((cookie)=>{ - return { - name:cookie.name, - value:cookie.value, - url:request.loadedUrl - } - }); - await page.context().addCookies(cookies); - } - const title = await page.title(); pageCounter++; log.info( @@ -110,12 +98,24 @@ export async function crawl(config: Config) { // headless: false, preNavigationHooks: [ // Abort requests for certain resource types - async ({ page, log }) => { + async ({ request, page, log }) => { // If there are no resource exclusions, return const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? []; if (RESOURCE_EXCLUSTIONS.length === 0) { return; } + if (config.cookie) { + const cookies = ( + Array.isArray(config.cookie) ? config.cookie : [config.cookie] + ).map((cookie) => { + return { + name: cookie.name, + value: cookie.value, + url: request.loadedUrl, + }; + }); + await page.context().addCookies(cookies); + } await page.route(`**\/*.{${RESOURCE_EXCLUSTIONS.join()}}`, (route) => route.abort("aborted"), );