diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 09d81af15..deddc9f21 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -94,6 +94,25 @@ jobs: run: | npm run test working-directory: ./apps/test-suite + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + working-directory: ./apps/python-sdk + - name: Run E2E tests for Python SDK + run: | + pytest firecrawl/__tests__/e2e_withAuth/test.py + working-directory: ./apps/python-sdk + - name: Install dependencies for JavaScript SDK + run: pnpm install + working-directory: ./apps/js-sdk/firecrawl + - name: Run E2E tests for JavaScript SDK + run: npm run test + working-directory: ./apps/js-sdk/firecrawl deploy: name: Deploy app diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml new file mode 100644 index 000000000..3c914cc80 --- /dev/null +++ b/.github/workflows/js-sdk.yml @@ -0,0 +1,60 @@ +name: Run JavaScript SDK E2E Tests + +on: + pull_request: + branches: + - main +env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + HOST: ${{ secrets.HOST }} + LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} + LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} + NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} + PORT: ${{ secrets.PORT }} + REDIS_URL: ${{ secrets.REDIS_URL }} + SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} + SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} + SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} + HDX_NODE_BETA_MODE: 1 + +jobs: + build: + runs-on: ubuntu-latest + services: + redis: + image: redis + ports: + - 6379:6379 + + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: "20" + - name: Install pnpm + run: npm install -g pnpm + - name: Install dependencies for API + run: pnpm install + working-directory: ./apps/api + - name: Start the application + run: npm start & + working-directory: ./apps/api + - name: Start workers + run: npm run workers & + working-directory: ./apps/api + - name: Install dependencies for JavaScript SDK + run: pnpm install + working-directory: ./apps/js-sdk/firecrawl + - name: Run E2E tests for JavaScript SDK + run: npm run test + working-directory: ./apps/js-sdk/firecrawl \ No newline at end of file diff --git a/.github/workflows/python-sdk.yml b/.github/workflows/python-sdk.yml new file mode 100644 index 000000000..1308cdef5 --- /dev/null +++ b/.github/workflows/python-sdk.yml @@ -0,0 +1,72 @@ +name: Run Python SDK E2E Tests + +on: + pull_request: + branches: + - main +env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + HOST: ${{ secrets.HOST }} + LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} + LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} + NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} + PORT: ${{ secrets.PORT }} + REDIS_URL: ${{ secrets.REDIS_URL }} + SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} + SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} + SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} + HDX_NODE_BETA_MODE: 1 + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + services: + redis: + image: redis + ports: + - 6379:6379 + + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: "20" + - name: Install pnpm + run: npm install -g pnpm + - name: Install dependencies for API + run: pnpm install + working-directory: ./apps/api + - name: Start the application + run: npm start & + working-directory: ./apps/api + id: start_app + - name: Start workers + run: npm run workers & + working-directory: ./apps/api + id: start_workers + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + working-directory: ./apps/python-sdk + - name: Run E2E tests for Python SDK + run: | + pytest firecrawl/__tests__/e2e_withAuth/test.py + working-directory: ./apps/python-sdk diff --git a/apps/js-sdk/firecrawl/.env.example b/apps/js-sdk/firecrawl/.env.example new file mode 100644 index 000000000..6b1780bb8 --- /dev/null +++ b/apps/js-sdk/firecrawl/.env.example @@ -0,0 +1,3 @@ +API_URL=http://localhost:3002 +TEST_API_KEY=fc-YOUR_API_KEY + diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 6b085be8e..b1cebde83 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,22 +1,27 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.17-beta.8", + "version": "0.0.22", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.17-beta.8", + "version": "0.0.22", "license": "MIT", "dependencies": { "axios": "^1.6.8", + "dotenv": "^16.4.5", + "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", - "@types/node": "^20.12.7", + "@types/dotenv": "^8.2.0", + "@types/jest": "^29.5.12", + "@types/node": "^20.12.12", + "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.4.5" @@ -1013,6 +1018,16 @@ "@babel/types": "^7.20.7" } }, + "node_modules/@types/dotenv": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz", + "integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==", + "deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.", + "dev": true, + "dependencies": { + "dotenv": "*" + } + }, "node_modules/@types/graceful-fs": { "version": "4.1.9", "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", @@ -1046,10 +1061,20 @@ "@types/istanbul-lib-report": "*" } }, + "node_modules/@types/jest": { + "version": "29.5.12", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz", + "integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==", + "dev": true, + "dependencies": { + "expect": "^29.0.0", + "pretty-format": "^29.0.0" + } + }, "node_modules/@types/node": { - "version": "20.12.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", - "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", + "version": "20.12.12", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz", + "integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==", "dev": true, "dependencies": { "undici-types": "~5.26.4" @@ -1061,6 +1086,12 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, + "node_modules/@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==", + "dev": true + }, "node_modules/@types/yargs": { "version": "17.0.32", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", @@ -1602,6 +1633,17 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/electron-to-chromium": { "version": "1.4.748", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz", @@ -3641,6 +3683,18 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-to-istanbul": { "version": "9.2.0", "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index e43f6ea63..a1c42a0cc 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -9,7 +9,7 @@ "build": "tsc", "publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", - "test": "jest src/**/*.test.ts" + "test": "jest src/__tests__/**/*.test.ts" }, "repository": { "type": "git", @@ -19,6 +19,8 @@ "license": "MIT", "dependencies": { "axios": "^1.6.8", + "dotenv": "^16.4.5", + "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, @@ -29,7 +31,10 @@ "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", - "@types/node": "^20.12.7", + "@types/dotenv": "^8.2.0", + "@types/jest": "^29.5.12", + "@types/node": "^20.12.12", + "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.4.5" diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts new file mode 100644 index 000000000..c9db6a91e --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -0,0 +1,146 @@ +import FirecrawlApp from '../../index'; +import { v4 as uuidv4 } from 'uuid'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const TEST_API_KEY = process.env.TEST_API_KEY; +const API_URL = process.env.API_URL; + +describe('FirecrawlApp E2E Tests', () => { + test('should throw error for no API key', () => { + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).toThrow("No API key provided"); + }); + + test('should throw error for invalid API key on scrape', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + }); + + test('should throw error for blocklisted URL on scrape', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const blocklistedUrl = "https://facebook.com/fake-test"; + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + }); + + test('should return successful response with valid preview token', async () => { + const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + }, 10000); // 10 seconds timeout + + test('should return successful response for valid scrape', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data).toHaveProperty('markdown'); + expect(response.data).toHaveProperty('metadata'); + expect(response.data).not.toHaveProperty('html'); + }, 10000); // 10 seconds timeout + + test('should return successful response with valid API key and include HTML', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } }); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data.markdown).toContain("🔥 Firecrawl"); + expect(response.data.html).toContain(" { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + }, 30000); // 30 seconds timeout + + test('should return successful response for valid scrape with PDF file without explicit extension', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + }, 30000); // 30 seconds timeout + + test('should throw error for invalid API key on crawl', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + }); + + test('should throw error for blocklisted URL on crawl', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const blocklistedUrl = "https://twitter.com/fake-test"; + await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + }); + + test('should return successful response for crawl and wait for completion', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30); + expect(response).not.toBeNull(); + expect(response[0].content).toContain("🔥 Firecrawl"); + }, 60000); // 60 seconds timeout + + test('should handle idempotency key for crawl', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const uniqueIdempotencyKey = uuidv4(); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey); + expect(response).not.toBeNull(); + expect(response.jobId).toBeDefined(); + + await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); + }); + + test('should check crawl status', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false); + expect(response).not.toBeNull(); + expect(response.jobId).toBeDefined(); + + await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds + const statusResponse = await app.checkCrawlStatus(response.jobId); + expect(statusResponse).not.toBeNull(); + expect(statusResponse.status).toBe('completed'); + expect(statusResponse.data.length).toBeGreaterThan(0); + }, 30000); // 30 seconds timeout + + test('should return successful response for search', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.search("test query"); + expect(response).not.toBeNull(); + expect(response.data[0].content).toBeDefined(); + expect(response.data.length).toBeGreaterThan(2); + }, 30000); // 30 seconds timeout + + test('should throw error for invalid API key on search', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); + }); + + test('should perform LLM extraction', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl("https://mendable.ai", { + extractorOptions: { + mode: 'llm-extraction', + extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + extractionSchema: { + type: 'object', + properties: { + company_mission: { type: 'string' }, + supports_sso: { type: 'boolean' }, + is_open_source: { type: 'boolean' } + }, + required: ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }); + expect(response).not.toBeNull(); + expect(response.data.llm_extraction).toBeDefined(); + const llmExtraction = response.data.llm_extraction; + expect(llmExtraction.company_mission).toBeDefined(); + expect(typeof llmExtraction.supports_sso).toBe('boolean'); + expect(typeof llmExtraction.is_open_source).toBe('boolean'); + }, 30000); // 30 seconds timeout +}); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 0bdcf7ceb..2a07f60d0 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema"; */ export interface FirecrawlAppConfig { apiKey?: string | null; + apiUrl?: string | null; } /** @@ -63,6 +64,7 @@ export interface JobStatusResponse { */ export default class FirecrawlApp { private apiKey: string; + private apiUrl: string = "https://api.firecrawl.dev"; /** * Initializes a new instance of the FirecrawlApp class. @@ -107,7 +109,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await axios.post( - "https://api.firecrawl.dev/v0/scrape", + this.apiUrl + "/v0/scrape", jsonData, { headers }, ); @@ -147,7 +149,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await axios.post( - "https://api.firecrawl.dev/v0/search", + this.apiUrl + "/v0/search", jsonData, { headers } ); @@ -190,7 +192,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await this.postRequest( - "https://api.firecrawl.dev/v0/crawl", + this.apiUrl + "/v0/crawl", jsonData, headers ); @@ -220,7 +222,7 @@ export default class FirecrawlApp { const headers: AxiosRequestHeaders = this.prepareHeaders(); try { const response: AxiosResponse = await this.getRequest( - `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + this.apiUrl + `/v0/crawl/status/${jobId}`, headers ); if (response.status === 200) { @@ -292,7 +294,7 @@ export default class FirecrawlApp { ): Promise { while (true) { const statusResponse: AxiosResponse = await this.getRequest( - `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + this.apiUrl + `/v0/crawl/status/${jobId}`, headers ); if (statusResponse.status === 200) { diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 516765347..c59a371c4 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -11,9 +11,10 @@ "dependencies": { "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", - "uuid": "^9.0.1", + "dotenv": "^16.4.5", "ts-node": "^10.9.2", "typescript": "^5.4.5", + "uuid": "^9.0.1", "zod": "^3.23.8" }, "devDependencies": { @@ -531,6 +532,17 @@ "node": ">=0.3.1" } }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/esbuild": { "version": "0.20.2", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz", @@ -744,6 +756,18 @@ "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", "peer": true }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", @@ -772,18 +796,6 @@ "peerDependencies": { "zod": "^3.23.3" } - }, - "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "bin": { - "uuid": "dist/bin/uuid" - } } } } diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index df9f99e5f..0e93fe3c2 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,9 +11,8 @@ "author": "", "license": "ISC", "dependencies": { - "axios": "^1.6.8", - "uuid": "^9.0.1", "@mendable/firecrawl-js": "^0.0.19", + "axios": "^1.6.8", "ts-node": "^10.9.2", "typescript": "^5.4.5", "zod": "^3.23.8" diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 38ca843bc..ae0997387 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id) The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. +## Running the Tests with Pytest + +To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling. + +### Running the Tests + +To run the tests, execute the following commands: + +Install pytest: +```bash +pip install pytest +``` + +Run: +```bash +pytest firecrawl/__tests__/e2e_withAuth/test.py +``` + + ## Contributing Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository. diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example new file mode 100644 index 000000000..904887bf0 --- /dev/null +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example @@ -0,0 +1,3 @@ +API_URL=http://localhost:3002 +ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py +TEST_API_KEY=fc-YOUR_API_KEY \ No newline at end of file diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py new file mode 100644 index 000000000..86ce1f9f5 --- /dev/null +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -0,0 +1,168 @@ +import importlib.util +import pytest +import time +import os +from uuid import uuid4 +from dotenv import load_dotenv + +load_dotenv() + +API_URL = "http://127.0.0.1:3002"; +ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py" +TEST_API_KEY = os.getenv('TEST_API_KEY') + +print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}") + +spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH) +firecrawl = importlib.util.module_from_spec(spec) +spec.loader.exec_module(firecrawl) +FirecrawlApp = firecrawl.FirecrawlApp + +def test_no_api_key(): + with pytest.raises(Exception) as excinfo: + invalid_app = FirecrawlApp(api_url=API_URL) + assert "No API key provided" in str(excinfo.value) + +def test_scrape_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url('https://firecrawl.dev') + assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) + +def test_blocklisted_url(): + blocklisted_url = "https://facebook.com/fake-test" + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + with pytest.raises(Exception) as excinfo: + app.scrape_url(blocklisted_url) + assert "Failed to scrape URL. Status code: 403" in str(excinfo.value) + +def test_successful_response_with_valid_preview_token(): + app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") + response = app.scrape_url('https://firecrawl.dev') + assert response is not None + assert 'content' in response + assert "🔥 Firecrawl" in response['content'] + +def test_scrape_url_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://firecrawl.dev') + assert response is not None + assert 'content' in response + assert 'markdown' in response + assert 'metadata' in response + assert 'html' not in response + assert "🔥 Firecrawl" in response['content'] + +def test_successful_response_with_valid_api_key_and_include_html(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}}) + assert response is not None + assert 'content' in response + assert 'markdown' in response + assert 'html' in response + assert 'metadata' in response + assert "🔥 Firecrawl" in response['content'] + assert "🔥 Firecrawl" in response['markdown'] + assert " 0 + assert 'content' in response[0] + assert "🔥 Firecrawl" in response[0]['content'] + +def test_crawl_url_with_idempotency_key_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + uniqueIdempotencyKey = str(uuid4()) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + assert "🔥 Firecrawl" in response[0]['content'] + + with pytest.raises(Exception) as excinfo: + app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value) + +def test_check_crawl_status_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False) + assert response is not None + assert 'jobId' in response + + time.sleep(30) # wait for 30 seconds + status_response = app.check_crawl_status(response['jobId']) + assert status_response is not None + assert 'status' in status_response + assert status_response['status'] == 'completed' + assert 'data' in status_response + assert len(status_response['data']) > 0 + +def test_search_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.search("test query") + assert response is not None + assert 'content' in response[0] + assert len(response) > 2 + +def test_search_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.search("test query") + assert "Failed to search. Status code: 401" in str(excinfo.value) + +def test_llm_extraction(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url("https://mendable.ai", { + 'extractorOptions': { + 'mode': 'llm-extraction', + 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + 'extractionSchema': { + 'type': 'object', + 'properties': { + 'company_mission': {'type': 'string'}, + 'supports_sso': {'type': 'boolean'}, + 'is_open_source': {'type': 'boolean'} + }, + 'required': ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }) + assert response is not None + assert 'llm_extraction' in response + llm_extraction = response['llm_extraction'] + assert 'company_mission' in llm_extraction + assert isinstance(llm_extraction['supports_sso'], bool) + assert isinstance(llm_extraction['is_open_source'], bool) \ No newline at end of file diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt new file mode 100644 index 000000000..1bed58814 --- /dev/null +++ b/apps/python-sdk/requirements.txt @@ -0,0 +1,3 @@ +requests +pytest +python-dotenv \ No newline at end of file diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index 63b5c9fce..beee059de 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -16,7 +16,9 @@ long_description_content_type="text/markdown", packages=find_packages(), install_requires=[ - "requests", + 'requests', + 'pytest', + 'python-dotenv', ], python_requires='>=3.8', classifiers=[