From 3f460af6c5b52bd5bd458807e9b81119db2c6123 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 7 May 2024 15:29:27 -0300 Subject: [PATCH 1/3] Added idempotency key to crawl route --- .../src/__tests__/e2e_withAuth/index.test.ts | 25 +++++++++++++++++ apps/api/src/controllers/crawl.ts | 10 +++++++ apps/api/src/services/idempotency/create.ts | 22 +++++++++++++++ apps/api/src/services/idempotency/validate.ts | 27 +++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 apps/api/src/services/idempotency/create.ts create mode 100644 apps/api/src/services/idempotency/validate.ts diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index 5e3777b39..da49a3a1a 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -1,6 +1,7 @@ import request from "supertest"; import { app } from "../../index"; import dotenv from "dotenv"; +import { v4 as uuidv4 } from "uuid"; dotenv.config(); @@ -145,6 +146,30 @@ describe("E2E Tests for API Routes", () => { /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ ); }); + it('should prevent duplicate requests using the same idempotency key', async () => { + const uniqueIdempotencyKey = uuidv4(); + + // First request with the idempotency key + const firstResponse = await request(TEST_URL) + .post('/v0/crawl') + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .set("x-idempotency-key", uniqueIdempotencyKey) + .send({ url: 'https://mendable.ai' }); + + expect(firstResponse.statusCode).toBe(200); + + // Second request with the same idempotency key + const secondResponse = await request(TEST_URL) + .post('/v0/crawl') + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .set("x-idempotency-key", uniqueIdempotencyKey) + .send({ url: 'https://mendable.ai' }); + + expect(secondResponse.statusCode).toBe(409); + expect(secondResponse.body.error).toBe('Idempotency key already used'); + }); // Additional tests for insufficient credits? }); diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index e53faedaf..8d573545d 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { logCrawl } from "../../src/services/logging/crawl_log"; +import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; +import { createIdempotencyKey } from "../../src/services/idempotency/create"; export async function crawlController(req: Request, res: Response) { try { @@ -19,6 +21,14 @@ export async function crawlController(req: Request, res: Response) { return res.status(status).json({ error }); } + if (req.headers["x-idempotency-key"]) { + const isIdempotencyValid = await validateIdempotencyKey(req); + if (!isIdempotencyValid) { + return res.status(409).json({ error: "Idempotency key already used" }); + } + createIdempotencyKey(req); + } + const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); if (!creditsCheckSuccess) { diff --git a/apps/api/src/services/idempotency/create.ts b/apps/api/src/services/idempotency/create.ts new file mode 100644 index 000000000..ec3e18e75 --- /dev/null +++ b/apps/api/src/services/idempotency/create.ts @@ -0,0 +1,22 @@ +import { Request } from "express"; +import { supabase_service } from "../supabase"; + +export async function createIdempotencyKey( + req: Request, +): Promise { + const idempotencyKey = req.headers['x-idempotency-key'] as string; + if (!idempotencyKey) { + throw new Error("No idempotency key provided in the request headers."); + } + + const { data, error } = await supabase_service + .from("idempotency_keys") + .insert({ key: idempotencyKey }); + + if (error) { + console.error("Failed to create idempotency key:", error); + throw error; + } + + return idempotencyKey; +} diff --git a/apps/api/src/services/idempotency/validate.ts b/apps/api/src/services/idempotency/validate.ts new file mode 100644 index 000000000..ef4373964 --- /dev/null +++ b/apps/api/src/services/idempotency/validate.ts @@ -0,0 +1,27 @@ +import { Request } from "express"; +import { supabase_service } from "../supabase"; + +export async function validateIdempotencyKey( + req: Request, +): Promise { + const idempotencyKey = req.headers['x-idempotency-key']; + if (!idempotencyKey) { + // // not returning for missing idempotency key for now + return true; + } + + const { data, error } = await supabase_service + .from("idempotency_keys") + .select("key") + .eq("key", idempotencyKey); + + if (error) { + console.error(error); + } + + if (!data || data.length === 0) { + return true; + } + + return false; +} From 184e4678f1ba08bb26863b2351703560c56592e0 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 23 May 2024 11:47:04 -0300 Subject: [PATCH 2/3] bugfix on idempotency key check --- apps/api/src/controllers/crawl.ts | 7 ++++++- apps/api/src/services/idempotency/validate.ts | 5 +++++ apps/js-sdk/example.js | 4 +++- apps/js-sdk/firecrawl/build/index.js | 16 ++++++---------- apps/js-sdk/firecrawl/src/index.ts | 10 ++++++---- apps/js-sdk/firecrawl/types/index.d.ts | 5 +++-- apps/js-sdk/package-lock.json | 15 ++++++++++++++- apps/js-sdk/package.json | 3 ++- 8 files changed, 45 insertions(+), 20 deletions(-) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index 8d573545d..5345b4f1d 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -26,7 +26,12 @@ export async function crawlController(req: Request, res: Response) { if (!isIdempotencyValid) { return res.status(409).json({ error: "Idempotency key already used" }); } - createIdempotencyKey(req); + try { + createIdempotencyKey(req); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } } const { success: creditsCheckSuccess, message: creditsCheckMessage } = diff --git a/apps/api/src/services/idempotency/validate.ts b/apps/api/src/services/idempotency/validate.ts index ef4373964..ad6f2c450 100644 --- a/apps/api/src/services/idempotency/validate.ts +++ b/apps/api/src/services/idempotency/validate.ts @@ -1,5 +1,6 @@ import { Request } from "express"; import { supabase_service } from "../supabase"; +import { validate as isUuid } from 'uuid'; export async function validateIdempotencyKey( req: Request, @@ -9,6 +10,10 @@ export async function validateIdempotencyKey( // // not returning for missing idempotency key for now return true; } + if (!isUuid(idempotencyKey)) { + console.error("Invalid idempotency key provided in the request headers."); + return false; + } const { data, error } = await supabase_service .from("idempotency_keys") diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index 7077b4c6f..e61457acc 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -1,8 +1,10 @@ +import { v4 as uuidv4 } from 'uuid'; import FirecrawlApp from '@mendable/firecrawl-js'; const app = new FirecrawlApp({apiKey: "YOUR_API_KEY"}); -const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false); +const idempotencyKey = uuidv4(); // optional +const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey); console.log(crawlResult) const jobId = await crawlResult['jobId']; diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 9d8237b47..b93c277cf 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -8,8 +8,6 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge }); }; import axios from 'axios'; -import dotenv from 'dotenv'; -dotenv.config(); /** * Main class for interacting with the Firecrawl API. */ @@ -19,7 +17,7 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }) { - this.apiKey = apiKey || process.env.FIRECRAWL_API_KEY || ''; + this.apiKey = apiKey || ''; if (!this.apiKey) { throw new Error('No API key provided'); } @@ -104,11 +102,12 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ crawlUrl(url_1) { - return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) { - const headers = this.prepareHeaders(); + return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) { + const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url }; if (params) { jsonData = Object.assign(Object.assign({}, jsonData), params); @@ -162,11 +161,8 @@ export default class FirecrawlApp { * Prepares the headers for an API request. * @returns {AxiosRequestHeaders} The prepared headers. */ - prepareHeaders() { - return { - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${this.apiKey}`, - }; + prepareHeaders(idempotencyKey) { + return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {})); } /** * Sends a POST request to the specified URL. diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index aea15f83d..67ff67c9a 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -141,10 +141,11 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ - async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2): Promise { - const headers = this.prepareHeaders(); + async crawlUrl(url: string, params: Params | null = null, waitUntilDone: boolean = true, timeout: number = 2, idempotencyKey?: string): Promise { + const headers = this.prepareHeaders(idempotencyKey); let jsonData: Params = { url }; if (params) { jsonData = { ...jsonData, ...params }; @@ -192,11 +193,12 @@ export default class FirecrawlApp { * Prepares the headers for an API request. * @returns {AxiosRequestHeaders} The prepared headers. */ - prepareHeaders(): AxiosRequestHeaders { + prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { return { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}`, - } as AxiosRequestHeaders; + ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}), + } as AxiosRequestHeaders & { 'x-idempotency-key'?: string }; } /** diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 7f79d6443..9828f2267 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -76,9 +76,10 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ - crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise; + crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise; /** * Checks the status of a crawl job using the Firecrawl API. * @param {string} jobId - The job ID of the crawl operation. @@ -89,7 +90,7 @@ export default class FirecrawlApp { * Prepares the headers for an API request. * @returns {AxiosRequestHeaders} The prepared headers. */ - prepareHeaders(): AxiosRequestHeaders; + prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders; /** * Sends a POST request to the specified URL. * @param {string} url - The URL to send the request to. diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 363f30137..aea332261 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -10,7 +10,8 @@ "license": "ISC", "dependencies": { "@mendable/firecrawl-js": "^0.0.15", - "axios": "^1.6.8" + "axios": "^1.6.8", + "uuid": "^9.0.1" } }, "node_modules/@mendable/firecrawl-js": { @@ -122,6 +123,18 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } } } } diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index 563e1e39c..c0ac6f1f1 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -12,6 +12,7 @@ "license": "ISC", "dependencies": { "@mendable/firecrawl-js": "^0.0.15", - "axios": "^1.6.8" + "axios": "^1.6.8", + "uuid": "^9.0.1" } } From c201ea1986c82b4b9f81a0f3e339fe3d6f18ee47 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Thu, 23 May 2024 12:52:59 -0300 Subject: [PATCH 3/3] added idempotency key to python sdk --- apps/js-sdk/firecrawl/build/index.js | 2 +- apps/js-sdk/firecrawl/package.json | 2 +- apps/python-sdk/example.py | 6 ++++-- apps/python-sdk/firecrawl/firecrawl.py | 14 ++++++++++---- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index 2a258cfae..76edfe59c 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -19,7 +19,7 @@ export default class FirecrawlApp { * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance. */ constructor({ apiKey = null }) { - this.apiKey = apiKey || ''; + this.apiKey = apiKey || ""; if (!this.apiKey) { throw new Error("No API key provided"); } diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 3bacdf43b..e43f6ea63 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.21", + "version": "0.0.22", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index d83be6dea..d80fa7959 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -1,4 +1,5 @@ -from firecrawl import FirecrawlApp +import uuid +from firecrawl.firecrawl import FirecrawlApp app = FirecrawlApp(api_key="fc-YOUR_API_KEY") @@ -7,7 +8,8 @@ print(scrape_result['markdown']) # Crawl a website: -crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) +idempotency_key = str(uuid.uuid4()) # optional idempotency key +crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key) print(crawl_result) # LLM Extraction: diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 98cb8ed68..1f59ec711 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -81,8 +81,8 @@ def search(self, query, params=None): else: raise Exception(f'Failed to search. Status code: {response.status_code}') - def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): - headers = self._prepare_headers() + def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None): + headers = self._prepare_headers(idempotency_key) json_data = {'url': url} if params: json_data.update(params) @@ -104,10 +104,16 @@ def check_crawl_status(self, job_id): else: self._handle_error(response, 'check crawl status') - def _prepare_headers(self): + def _prepare_headers(self, idempotency_key=None): + if idempotency_key: + return { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'x-idempotency-key': idempotency_key + } return { 'Content-Type': 'application/json', - 'Authorization': f'Bearer {self.api_key}' + 'Authorization': f'Bearer {self.api_key}', } def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):