Skip to content

Commit

Permalink
Merge pull request mendableai#132 from mendableai/feat/idempotency-key
Browse files Browse the repository at this point in the history
[Feat] Added idempotency key to crawl route
  • Loading branch information
rafaelsideguide authored May 24, 2024
2 parents 605ba4c + d39860c commit 4ce2859
Show file tree
Hide file tree
Showing 13 changed files with 150 additions and 25 deletions.
25 changes: 25 additions & 0 deletions apps/api/src/__tests__/e2e_withAuth/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import request from "supertest";
import { app } from "../../index";
import dotenv from "dotenv";
import { v4 as uuidv4 } from "uuid";

dotenv.config();

Expand Down Expand Up @@ -175,6 +176,30 @@ describe("E2E Tests for API Routes", () => {
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
);
});
it('should prevent duplicate requests using the same idempotency key', async () => {
const uniqueIdempotencyKey = uuidv4();

// First request with the idempotency key
const firstResponse = await request(TEST_URL)
.post('/v0/crawl')
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.set("x-idempotency-key", uniqueIdempotencyKey)
.send({ url: 'https://mendable.ai' });

expect(firstResponse.statusCode).toBe(200);

// Second request with the same idempotency key
const secondResponse = await request(TEST_URL)
.post('/v0/crawl')
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.set("x-idempotency-key", uniqueIdempotencyKey)
.send({ url: 'https://mendable.ai' });

expect(secondResponse.statusCode).toBe(409);
expect(secondResponse.body.error).toBe('Idempotency key already used');
});

it("should return a successful response with a valid API key and valid includes option", async () => {
const crawlResponse = await request(TEST_URL)
Expand Down
15 changes: 15 additions & 0 deletions apps/api/src/controllers/crawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types";
import { addWebScraperJob } from "../../src/services/queue-jobs";
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
import { logCrawl } from "../../src/services/logging/crawl_log";
import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
import { createIdempotencyKey } from "../../src/services/idempotency/create";

export async function crawlController(req: Request, res: Response) {
try {
Expand All @@ -19,6 +21,19 @@ export async function crawlController(req: Request, res: Response) {
return res.status(status).json({ error });
}

if (req.headers["x-idempotency-key"]) {
const isIdempotencyValid = await validateIdempotencyKey(req);
if (!isIdempotencyValid) {
return res.status(409).json({ error: "Idempotency key already used" });
}
try {
createIdempotencyKey(req);
} catch (error) {
console.error(error);
return res.status(500).json({ error: error.message });
}
}

const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) {
Expand Down
22 changes: 22 additions & 0 deletions apps/api/src/services/idempotency/create.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { Request } from "express";
import { supabase_service } from "../supabase";

export async function createIdempotencyKey(
req: Request,
): Promise<string> {
const idempotencyKey = req.headers['x-idempotency-key'] as string;
if (!idempotencyKey) {
throw new Error("No idempotency key provided in the request headers.");
}

const { data, error } = await supabase_service
.from("idempotency_keys")
.insert({ key: idempotencyKey });

if (error) {
console.error("Failed to create idempotency key:", error);
throw error;
}

return idempotencyKey;
}
32 changes: 32 additions & 0 deletions apps/api/src/services/idempotency/validate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { Request } from "express";
import { supabase_service } from "../supabase";
import { validate as isUuid } from 'uuid';

export async function validateIdempotencyKey(
req: Request,
): Promise<boolean> {
const idempotencyKey = req.headers['x-idempotency-key'];
if (!idempotencyKey) {
// // not returning for missing idempotency key for now
return true;
}
if (!isUuid(idempotencyKey)) {
console.error("Invalid idempotency key provided in the request headers.");
return false;
}

const { data, error } = await supabase_service
.from("idempotency_keys")
.select("key")
.eq("key", idempotencyKey);

if (error) {
console.error(error);
}

if (!data || data.length === 0) {
return true;
}

return false;
}
4 changes: 3 additions & 1 deletion apps/js-sdk/example.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { v4 as uuidv4 } from 'uuid';
import FirecrawlApp from '@mendable/firecrawl-js';
import { z } from "zod";

Expand All @@ -8,7 +9,8 @@ const scrapeResult = await app.scrapeUrl('firecrawl.dev');
console.log(scrapeResult.data.content)

// Crawl a website:
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
const idempotencyKey = uuidv4(); // optional
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
console.log(crawlResult)

const jobId = await crawlResult['jobId'];
Expand Down
12 changes: 5 additions & 7 deletions apps/js-sdk/firecrawl/build/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,12 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url_1) {
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
const headers = this.prepareHeaders();
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
const headers = this.prepareHeaders(idempotencyKey);
let jsonData = { url };
if (params) {
jsonData = Object.assign(Object.assign({}, jsonData), params);
Expand Down Expand Up @@ -172,11 +173,8 @@ export default class FirecrawlApp {
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders() {
return {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
};
prepareHeaders(idempotencyKey) {
return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
}
/**
* Sends a POST request to the specified URL.
Expand Down
2 changes: 1 addition & 1 deletion apps/js-sdk/firecrawl/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@mendable/firecrawl-js",
"version": "0.0.21",
"version": "0.0.22",
"description": "JavaScript SDK for Firecrawl API",
"main": "build/index.js",
"types": "types/index.d.ts",
Expand Down
15 changes: 9 additions & 6 deletions apps/js-sdk/firecrawl/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,17 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
async crawlUrl(
url: string,
params: Params | null = null,
waitUntilDone: boolean = true,
timeout: number = 2
timeout: number = 2,
idempotencyKey?: string
): Promise<CrawlResponse | any> {
const headers = this.prepareHeaders();
const headers = this.prepareHeaders(idempotencyKey);
let jsonData: Params = { url };
if (params) {
jsonData = { ...jsonData, ...params };
Expand Down Expand Up @@ -240,11 +242,12 @@ export default class FirecrawlApp {
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders(): AxiosRequestHeaders {
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
return {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
} as AxiosRequestHeaders;
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`,
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
}

/**
Expand Down
5 changes: 3 additions & 2 deletions apps/js-sdk/firecrawl/types/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,10 @@ export default class FirecrawlApp {
* @param {Params | null} params - Additional parameters for the crawl request.
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
* @param {number} timeout - Timeout in seconds for job status checks.
* @param {string} idempotencyKey - Optional idempotency key for the request.
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
*/
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse | any>;
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
/**
* Checks the status of a crawl job using the Firecrawl API.
* @param {string} jobId - The job ID of the crawl operation.
Expand All @@ -95,7 +96,7 @@ export default class FirecrawlApp {
* Prepares the headers for an API request.
* @returns {AxiosRequestHeaders} The prepared headers.
*/
prepareHeaders(): AxiosRequestHeaders;
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
/**
* Sends a POST request to the specified URL.
* @param {string} url - The URL to send the request to.
Expand Down
13 changes: 13 additions & 0 deletions apps/js-sdk/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion apps/js-sdk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
"author": "",
"license": "ISC",
"dependencies": {
"@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8",
"uuid": "^9.0.1",
"@mendable/firecrawl-js": "^0.0.19",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"zod": "^3.23.8"
Expand Down
6 changes: 4 additions & 2 deletions apps/python-sdk/example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from firecrawl import FirecrawlApp
import uuid
from firecrawl.firecrawl import FirecrawlApp

app = FirecrawlApp(api_key="fc-YOUR_API_KEY")

Expand All @@ -7,7 +8,8 @@
print(scrape_result['markdown'])

# Crawl a website:
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}})
idempotency_key = str(uuid.uuid4()) # optional idempotency key
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
print(crawl_result)

# LLM Extraction:
Expand Down
21 changes: 16 additions & 5 deletions apps/python-sdk/firecrawl/firecrawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def search(self, query, params=None):
else:
raise Exception(f'Failed to search. Status code: {response.status_code}')

def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
"""
Initiate a crawl job for the specified URL using the Firecrawl API.
Expand All @@ -136,14 +136,15 @@ def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
wait_until_done (bool): Whether to wait until the crawl job is completed.
timeout (int): Timeout between status checks when waiting for job completion.
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
Returns:
Any: The crawl job ID or the crawl results if waiting until completion.
Raises:
Exception: If the crawl job initiation or monitoring fails.
"""
headers = self._prepare_headers()
headers = self._prepare_headers(idempotency_key)
json_data = {'url': url}
if params:
json_data.update(params)
Expand Down Expand Up @@ -177,16 +178,26 @@ def check_crawl_status(self, job_id):
else:
self._handle_error(response, 'check crawl status')

def _prepare_headers(self):
def _prepare_headers(self, idempotency_key=None):
"""
Prepare the headers for API requests.
Args:
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
Returns:
Dict[str, str]: The headers including content type and authorization.
Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
"""
if idempotency_key:
return {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'x-idempotency-key': idempotency_key
}

return {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}'
'Authorization': f'Bearer {self.api_key}',
}

def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
Expand Down

0 comments on commit 4ce2859

Please sign in to comment.