From 2233cbc3869cae0dba9159f9cb79cb4f830e268c Mon Sep 17 00:00:00 2001 From: Umar-Azam <92691687+Umar-Azam@users.noreply.github.com> Date: Wed, 22 Nov 2023 02:02:19 +0000 Subject: [PATCH] modified config.ts to fix containerized execution --- containerapp/data/config.ts | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/containerapp/data/config.ts b/containerapp/data/config.ts index a914c4eb..d7d44255 100644 --- a/containerapp/data/config.ts +++ b/containerapp/data/config.ts @@ -1,31 +1,8 @@ -import { Page } from "playwright"; +import { Config } from "./src/config"; -type Config = { - /** URL to start the crawl */ - url: string; - /** Pattern to match against for links on a page to subsequently crawl */ - match: string; - /** Selector to grab the inner text from */ - selector: string; - /** Don't crawl more than this many pages */ - maxPagesToCrawl: number; - /** File name for the finished data */ - outputFileName: string; - /** Optional cookie to be set. E.g. for Cookie Consent */ - cookie?: { name: string; value: string }; - /** Optional function to run for each page found */ - onVisitPage?: (options: { - page: Page; - pushData: (data: any) => Promise; - }) => Promise; - /** Optional timeout for waiting for a selector to appear */ - waitForSelectorTimeout?: number; -}; - -export const config: Config = { +export const defaultConfig: Config = { url: "https://www.builder.io/c/docs/developers", match: "https://www.builder.io/c/docs/**", - selector: `.docs-builder-container`, maxPagesToCrawl: 50, outputFileName: "../data/output.json", -}; +}; \ No newline at end of file