From 19246f6289172ea356bbcc45d40797bbf1838425 Mon Sep 17 00:00:00 2001 From: rafaelmmiller <150964962+rafaelsideguide@users.noreply.github.com> Date: Wed, 18 Dec 2024 18:36:04 -0300 Subject: [PATCH] feat-SDK/added crawl id to ws --- apps/js-sdk/firecrawl/package.json | 2 +- apps/js-sdk/firecrawl/src/index.ts | 15 +++++++++++++-- apps/python-sdk/firecrawl/__init__.py | 2 +- apps/python-sdk/firecrawl/firecrawl.py | 8 ++++---- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 74dfcb02a..1c7f082fc 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.9.4", + "version": "1.9.5", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 020a22936..440630978 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -934,9 +934,11 @@ export class CrawlWatcher extends TypedEventTarget { private ws: WebSocket; public data: FirecrawlDocument[]; public status: CrawlStatusResponse["status"]; + public id: string; constructor(id: string, app: FirecrawlApp) { super(); + this.id = id; this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey); this.status = "scraping"; this.data = []; @@ -967,6 +969,7 @@ export class CrawlWatcher extends TypedEventTarget { detail: { status: this.status, data: this.data, + id: this.id, }, })); } else if (msg.type === "error") { @@ -976,6 +979,7 @@ export class CrawlWatcher extends TypedEventTarget { status: this.status, data: this.data, error: msg.error, + id: this.id, }, })); } else if (msg.type === "catchup") { @@ -983,12 +987,18 @@ export class CrawlWatcher extends TypedEventTarget { this.data.push(...(msg.data.data ?? [])); for (const doc of this.data) { this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: doc, + detail: { + ...doc, + id: this.id, + }, })); } } else if (msg.type === "document") { this.dispatchTypedEvent("document", new CustomEvent("document", { - detail: msg.data, + detail: { + ...msg.data, + id: this.id, + }, })); } } @@ -1015,6 +1025,7 @@ export class CrawlWatcher extends TypedEventTarget { status: this.status, data: this.data, error: "WebSocket error", + id: this.id, }, })); }).bind(this); diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index 31d680956..8c5d1b44f 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ from .firecrawl import FirecrawlApp # noqa -__version__ = "1.6.4" +__version__ = "1.6.5" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 45ed27d88..7ac2d2dca 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -704,15 +704,15 @@ def dispatch_event(self, event_type: str, detail: Dict[str, Any]): async def _handle_message(self, msg: Dict[str, Any]): if msg['type'] == 'done': self.status = 'completed' - self.dispatch_event('done', {'status': self.status, 'data': self.data}) + self.dispatch_event('done', {'status': self.status, 'data': self.data, 'id': self.id}) elif msg['type'] == 'error': self.status = 'failed' - self.dispatch_event('error', {'status': self.status, 'data': self.data, 'error': msg['error']}) + self.dispatch_event('error', {'status': self.status, 'data': self.data, 'error': msg['error'], 'id': self.id}) elif msg['type'] == 'catchup': self.status = msg['data']['status'] self.data.extend(msg['data'].get('data', [])) for doc in self.data: - self.dispatch_event('document', doc) + self.dispatch_event('document', {'data': doc, 'id': self.id}) elif msg['type'] == 'document': self.data.append(msg['data']) - self.dispatch_event('document', msg['data']) + self.dispatch_event('document', {'data': msg['data'], 'id': self.id})