From 581cc7706c2c65485d0aa36853d9c0b97e4d48c0 Mon Sep 17 00:00:00 2001 From: gongzhenxing Date: Mon, 25 Dec 2023 09:46:43 +0800 Subject: [PATCH] Set cookies in advance and modify code style --- src/config.ts | 16 +++++++++------- src/core.ts | 26 +++++++++++++------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/config.ts b/src/config.ts index e62340aa..e5230b29 100644 --- a/src/config.ts +++ b/src/config.ts @@ -39,14 +39,16 @@ export const configSchema = z.object({ cookie: z .union([ z.object({ - name: z.string(), - value: z.string(), + name: z.string(), + value: z.string(), }), - z.array(z.object({ - name: z.string(), - value: z.string(), - })), - ]) + z.array( + z.object({ + name: z.string(), + value: z.string(), + }), + ), + ]) .optional(), /** Optional function to run for each page found */ onVisitPage: z diff --git a/src/core.ts b/src/core.ts index 1f2e8f90..93179b0d 100644 --- a/src/core.ts +++ b/src/core.ts @@ -55,18 +55,6 @@ export async function crawl(config: Config) { const crawler = new PlaywrightCrawler({ // Use the requestHandler to process each of the crawled pages. async requestHandler({ request, page, enqueueLinks, log, pushData }) { - if (config.cookie) { - const cookies = (Array.isArray(config.cookie) ? config.cookie : [config.cookie]) - .map((cookie)=>{ - return { - name:cookie.name, - value:cookie.value, - url:request.loadedUrl - } - }); - await page.context().addCookies(cookies); - } - const title = await page.title(); pageCounter++; log.info( @@ -110,12 +98,24 @@ export async function crawl(config: Config) { // headless: false, preNavigationHooks: [ // Abort requests for certain resource types - async ({ page, log }) => { + async ({ request, page, log }) => { // If there are no resource exclusions, return const RESOURCE_EXCLUSTIONS = config.resourceExclusions ?? []; if (RESOURCE_EXCLUSTIONS.length === 0) { return; } + if (config.cookie) { + const cookies = ( + Array.isArray(config.cookie) ? config.cookie : [config.cookie] + ).map((cookie) => { + return { + name: cookie.name, + value: cookie.value, + url: request.loadedUrl, + }; + }); + await page.context().addCookies(cookies); + } await page.route(`**\/*.{${RESOURCE_EXCLUSTIONS.join()}}`, (route) => route.abort("aborted"), );