diff --git a/apps/api/src/__tests__/e2e_withAuth/index.test.ts b/apps/api/src/__tests__/e2e_withAuth/index.test.ts index 39825c4a0..e03953176 100644 --- a/apps/api/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/api/src/__tests__/e2e_withAuth/index.test.ts @@ -1,6 +1,7 @@ import request from "supertest"; import { app } from "../../index"; import dotenv from "dotenv"; +import { v4 as uuidv4 } from "uuid"; dotenv.config(); @@ -175,6 +176,30 @@ describe("E2E Tests for API Routes", () => { /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/ ); }); + it('should prevent duplicate requests using the same idempotency key', async () => { + const uniqueIdempotencyKey = uuidv4(); + + // First request with the idempotency key + const firstResponse = await request(TEST_URL) + .post('/v0/crawl') + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .set("x-idempotency-key", uniqueIdempotencyKey) + .send({ url: 'https://mendable.ai' }); + + expect(firstResponse.statusCode).toBe(200); + + // Second request with the same idempotency key + const secondResponse = await request(TEST_URL) + .post('/v0/crawl') + .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`) + .set("Content-Type", "application/json") + .set("x-idempotency-key", uniqueIdempotencyKey) + .send({ url: 'https://mendable.ai' }); + + expect(secondResponse.statusCode).toBe(409); + expect(secondResponse.body.error).toBe('Idempotency key already used'); + }); it("should return a successful response with a valid API key and valid includes option", async () => { const crawlResponse = await request(TEST_URL) diff --git a/apps/api/src/controllers/crawl.ts b/apps/api/src/controllers/crawl.ts index e53faedaf..5345b4f1d 100644 --- a/apps/api/src/controllers/crawl.ts +++ b/apps/api/src/controllers/crawl.ts @@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types"; import { addWebScraperJob } from "../../src/services/queue-jobs"; import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist"; import { logCrawl } from "../../src/services/logging/crawl_log"; +import { validateIdempotencyKey } from "../../src/services/idempotency/validate"; +import { createIdempotencyKey } from "../../src/services/idempotency/create"; export async function crawlController(req: Request, res: Response) { try { @@ -19,6 +21,19 @@ export async function crawlController(req: Request, res: Response) { return res.status(status).json({ error }); } + if (req.headers["x-idempotency-key"]) { + const isIdempotencyValid = await validateIdempotencyKey(req); + if (!isIdempotencyValid) { + return res.status(409).json({ error: "Idempotency key already used" }); + } + try { + createIdempotencyKey(req); + } catch (error) { + console.error(error); + return res.status(500).json({ error: error.message }); + } + } + const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); if (!creditsCheckSuccess) { diff --git a/apps/api/src/services/idempotency/create.ts b/apps/api/src/services/idempotency/create.ts new file mode 100644 index 000000000..ec3e18e75 --- /dev/null +++ b/apps/api/src/services/idempotency/create.ts @@ -0,0 +1,22 @@ +import { Request } from "express"; +import { supabase_service } from "../supabase"; + +export async function createIdempotencyKey( + req: Request, +): Promise { + const idempotencyKey = req.headers['x-idempotency-key'] as string; + if (!idempotencyKey) { + throw new Error("No idempotency key provided in the request headers."); + } + + const { data, error } = await supabase_service + .from("idempotency_keys") + .insert({ key: idempotencyKey }); + + if (error) { + console.error("Failed to create idempotency key:", error); + throw error; + } + + return idempotencyKey; +} diff --git a/apps/api/src/services/idempotency/validate.ts b/apps/api/src/services/idempotency/validate.ts new file mode 100644 index 000000000..ad6f2c450 --- /dev/null +++ b/apps/api/src/services/idempotency/validate.ts @@ -0,0 +1,32 @@ +import { Request } from "express"; +import { supabase_service } from "../supabase"; +import { validate as isUuid } from 'uuid'; + +export async function validateIdempotencyKey( + req: Request, +): Promise { + const idempotencyKey = req.headers['x-idempotency-key']; + if (!idempotencyKey) { + // // not returning for missing idempotency key for now + return true; + } + if (!isUuid(idempotencyKey)) { + console.error("Invalid idempotency key provided in the request headers."); + return false; + } + + const { data, error } = await supabase_service + .from("idempotency_keys") + .select("key") + .eq("key", idempotencyKey); + + if (error) { + console.error(error); + } + + if (!data || data.length === 0) { + return true; + } + + return false; +} diff --git a/apps/js-sdk/example.js b/apps/js-sdk/example.js index 5f8119253..7f198598b 100644 --- a/apps/js-sdk/example.js +++ b/apps/js-sdk/example.js @@ -1,3 +1,4 @@ +import { v4 as uuidv4 } from 'uuid'; import FirecrawlApp from '@mendable/firecrawl-js'; import { z } from "zod"; @@ -8,7 +9,8 @@ const scrapeResult = await app.scrapeUrl('firecrawl.dev'); console.log(scrapeResult.data.content) // Crawl a website: -const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false); +const idempotencyKey = uuidv4(); // optional +const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey); console.log(crawlResult) const jobId = await crawlResult['jobId']; diff --git a/apps/js-sdk/firecrawl/build/index.js b/apps/js-sdk/firecrawl/build/index.js index b850d5cc0..76edfe59c 100644 --- a/apps/js-sdk/firecrawl/build/index.js +++ b/apps/js-sdk/firecrawl/build/index.js @@ -110,11 +110,12 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ crawlUrl(url_1) { - return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) { - const headers = this.prepareHeaders(); + return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) { + const headers = this.prepareHeaders(idempotencyKey); let jsonData = { url }; if (params) { jsonData = Object.assign(Object.assign({}, jsonData), params); @@ -172,11 +173,8 @@ export default class FirecrawlApp { * Prepares the headers for an API request. * @returns {AxiosRequestHeaders} The prepared headers. */ - prepareHeaders() { - return { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - }; + prepareHeaders(idempotencyKey) { + return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {})); } /** * Sends a POST request to the specified URL. diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 3bacdf43b..e43f6ea63 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.21", + "version": "0.0.22", "description": "JavaScript SDK for Firecrawl API", "main": "build/index.js", "types": "types/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 7654f1bc6..0bdcf7ceb 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -173,15 +173,17 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ async crawlUrl( url: string, params: Params | null = null, waitUntilDone: boolean = true, - timeout: number = 2 + timeout: number = 2, + idempotencyKey?: string ): Promise { - const headers = this.prepareHeaders(); + const headers = this.prepareHeaders(idempotencyKey); let jsonData: Params = { url }; if (params) { jsonData = { ...jsonData, ...params }; @@ -240,11 +242,12 @@ export default class FirecrawlApp { * Prepares the headers for an API request. * @returns {AxiosRequestHeaders} The prepared headers. */ - prepareHeaders(): AxiosRequestHeaders { + prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { return { - "Content-Type": "application/json", - Authorization: `Bearer ${this.apiKey}`, - } as AxiosRequestHeaders; + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.apiKey}`, + ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}), + } as AxiosRequestHeaders & { 'x-idempotency-key'?: string }; } /** diff --git a/apps/js-sdk/firecrawl/types/index.d.ts b/apps/js-sdk/firecrawl/types/index.d.ts index 40d95c4ae..cc186e88a 100644 --- a/apps/js-sdk/firecrawl/types/index.d.ts +++ b/apps/js-sdk/firecrawl/types/index.d.ts @@ -82,9 +82,10 @@ export default class FirecrawlApp { * @param {Params | null} params - Additional parameters for the crawl request. * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete. * @param {number} timeout - Timeout in seconds for job status checks. + * @param {string} idempotencyKey - Optional idempotency key for the request. * @returns {Promise} The response from the crawl operation. */ - crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise; + crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise; /** * Checks the status of a crawl job using the Firecrawl API. * @param {string} jobId - The job ID of the crawl operation. @@ -95,7 +96,7 @@ export default class FirecrawlApp { * Prepares the headers for an API request. * @returns {AxiosRequestHeaders} The prepared headers. */ - prepareHeaders(): AxiosRequestHeaders; + prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders; /** * Sends a POST request to the specified URL. * @param {string} url - The URL to send the request to. diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 4d2631946..516765347 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", + "uuid": "^9.0.1", "ts-node": "^10.9.2", "typescript": "^5.4.5", "zod": "^3.23.8" @@ -771,6 +772,18 @@ "peerDependencies": { "zod": "^3.23.3" } + }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } } } } diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index 0e93fe3c2..df9f99e5f 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,8 +11,9 @@ "author": "", "license": "ISC", "dependencies": { - "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", + "uuid": "^9.0.1", + "@mendable/firecrawl-js": "^0.0.19", "ts-node": "^10.9.2", "typescript": "^5.4.5", "zod": "^3.23.8" diff --git a/apps/python-sdk/example.py b/apps/python-sdk/example.py index d83be6dea..d80fa7959 100644 --- a/apps/python-sdk/example.py +++ b/apps/python-sdk/example.py @@ -1,4 +1,5 @@ -from firecrawl import FirecrawlApp +import uuid +from firecrawl.firecrawl import FirecrawlApp app = FirecrawlApp(api_key="fc-YOUR_API_KEY") @@ -7,7 +8,8 @@ print(scrape_result['markdown']) # Crawl a website: -crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}) +idempotency_key = str(uuid.uuid4()) # optional idempotency key +crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key) print(crawl_result) # LLM Extraction: diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index f28a057fd..6c0bc4150 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -127,7 +127,7 @@ def search(self, query, params=None): else: raise Exception(f'Failed to search. Status code: {response.status_code}') - def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): + def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None): """ Initiate a crawl job for the specified URL using the Firecrawl API. @@ -136,6 +136,7 @@ def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): params (Optional[Dict[str, Any]]): Additional parameters for the crawl request. wait_until_done (bool): Whether to wait until the crawl job is completed. timeout (int): Timeout between status checks when waiting for job completion. + idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests. Returns: Any: The crawl job ID or the crawl results if waiting until completion. @@ -143,7 +144,7 @@ def crawl_url(self, url, params=None, wait_until_done=True, timeout=2): Raises: Exception: If the crawl job initiation or monitoring fails. """ - headers = self._prepare_headers() + headers = self._prepare_headers(idempotency_key) json_data = {'url': url} if params: json_data.update(params) @@ -177,16 +178,26 @@ def check_crawl_status(self, job_id): else: self._handle_error(response, 'check crawl status') - def _prepare_headers(self): + def _prepare_headers(self, idempotency_key=None): """ Prepare the headers for API requests. + Args: + idempotency_key (Optional[str]): A unique key to ensure idempotency of requests. + Returns: - Dict[str, str]: The headers including content type and authorization. + Dict[str, str]: The headers including content type, authorization, and optionally idempotency key. """ + if idempotency_key: + return { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}', + 'x-idempotency-key': idempotency_key + } + return { 'Content-Type': 'application/json', - 'Authorization': f'Bearer {self.api_key}' + 'Authorization': f'Bearer {self.api_key}', } def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):