diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml new file mode 100644 index 000000000..9c3783bb5 --- /dev/null +++ b/.github/workflows/python-tests.yml @@ -0,0 +1,31 @@ +name: Run Python SDK E2E Tests + +on: + pull_request: + branches: + - main +env: + TEST_API_KEY: ${{ secrets.TEST_API_KEY }} +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python setup.py + working-directory: ./apps/python-sdk + - name: Test with pytest + run: | + cd apps/python-sdk + pytest firecrawl/__tests__/e2e_withAuth/test.py + working-directory: ./apps/python-sdk diff --git a/apps/js-sdk/firecrawl/.env.example b/apps/js-sdk/firecrawl/.env.example new file mode 100644 index 000000000..6b1780bb8 --- /dev/null +++ b/apps/js-sdk/firecrawl/.env.example @@ -0,0 +1,3 @@ +API_URL=http://localhost:3002 +TEST_API_KEY=fc-YOUR_API_KEY + diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 6b085be8e..fec326b4a 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,22 +1,25 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.17-beta.8", + "version": "0.0.22", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.17-beta.8", + "version": "0.0.22", "license": "MIT", "dependencies": { "axios": "^1.6.8", + "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", - "@types/node": "^20.12.7", + "@types/jest": "^29.5.12", + "@types/node": "^20.12.12", + "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.4.5" @@ -1046,10 +1049,20 @@ "@types/istanbul-lib-report": "*" } }, + "node_modules/@types/jest": { + "version": "29.5.12", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz", + "integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==", + "dev": true, + "dependencies": { + "expect": "^29.0.0", + "pretty-format": "^29.0.0" + } + }, "node_modules/@types/node": { - "version": "20.12.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", - "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", + "version": "20.12.12", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz", + "integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==", "dev": true, "dependencies": { "undici-types": "~5.26.4" @@ -1061,6 +1074,12 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, + "node_modules/@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==", + "dev": true + }, "node_modules/@types/yargs": { "version": "17.0.32", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", @@ -3641,6 +3660,18 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-to-istanbul": { "version": "9.2.0", "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index e43f6ea63..a9fdaaf49 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -9,7 +9,7 @@ "build": "tsc", "publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", - "test": "jest src/**/*.test.ts" + "test": "jest src/__tests__/**/*.test.ts" }, "repository": { "type": "git", @@ -19,6 +19,7 @@ "license": "MIT", "dependencies": { "axios": "^1.6.8", + "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, @@ -29,7 +30,9 @@ "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", - "@types/node": "^20.12.7", + "@types/jest": "^29.5.12", + "@types/node": "^20.12.12", + "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.4.5" diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts new file mode 100644 index 000000000..13f53472c --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -0,0 +1,147 @@ +import FirecrawlApp from '../../index'; +import { v4 as uuidv4 } from 'uuid'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const TEST_API_KEY = process.env.TEST_API_KEY; +const API_URL = process.env.API_URL; + +describe('FirecrawlApp E2E Tests', () => { + test('should throw error for no API key', () => { + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).toThrow("No API key provided"); + }); + + test('should throw error for invalid API key on scrape', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + }); + + test('should throw error for blocklisted URL on scrape', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const blocklistedUrl = "https://facebook.com/fake-test"; + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + }); + + test('should return successful response with valid preview token', async () => { + const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + }, 10000); // 10 seconds timeout + + test('should return successful response for valid scrape', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data).toHaveProperty('markdown'); + expect(response.data).toHaveProperty('metadata'); + expect(response.data).not.toHaveProperty('html'); + }, 10000); // 10 seconds timeout + + test('should return successful response with valid API key and include HTML', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } }); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data.markdown).toContain("🔥 Firecrawl"); + expect(response.data.html).toContain(" { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + }, 30000); // 30 seconds timeout + + test('should return successful response for valid scrape with PDF file without explicit extension', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); + await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds + expect(response).not.toBeNull(); + expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + }, 30000); // 30 seconds timeout + + test('should throw error for invalid API key on crawl', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + }); + + test('should throw error for blocklisted URL on crawl', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const blocklistedUrl = "https://twitter.com/fake-test"; + await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + }); + + test('should return successful response for crawl and wait for completion', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true); + expect(response).not.toBeNull(); + expect(response[0].content).toContain("🔥 Firecrawl"); + }, 60000); // 60 seconds timeout + + test('should handle idempotency key for crawl', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const uniqueIdempotencyKey = uuidv4(); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey); + expect(response).not.toBeNull(); + expect(response[0].content).toContain("🔥 Firecrawl"); + + await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); + }, 30000); // 30 seconds timeout + + test('should check crawl status', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false); + expect(response).not.toBeNull(); + expect(response.jobId).toBeDefined(); + + await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds + const statusResponse = await app.checkCrawlStatus(response.jobId); + expect(statusResponse).not.toBeNull(); + expect(statusResponse.status).toBe('completed'); + expect(statusResponse.data.length).toBeGreaterThan(0); + }, 30000); // 30 seconds timeout + + test('should return successful response for search', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.search("test query"); + expect(response).not.toBeNull(); + expect(response.data[0].content).toBeDefined(); + expect(response.data.length).toBeGreaterThan(2); + }, 30000); // 30 seconds timeout + + test('should throw error for invalid API key on search', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); + }); + + test('should perform LLM extraction', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl("https://mendable.ai", { + extractorOptions: { + mode: 'llm-extraction', + extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + extractionSchema: { + type: 'object', + properties: { + company_mission: { type: 'string' }, + supports_sso: { type: 'boolean' }, + is_open_source: { type: 'boolean' } + }, + required: ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }); + expect(response).not.toBeNull(); + expect(response.data.llm_extraction).toBeDefined(); + const llmExtraction = response.data.llm_extraction; + expect(llmExtraction.company_mission).toBeDefined(); + expect(typeof llmExtraction.supports_sso).toBe('boolean'); + expect(typeof llmExtraction.is_open_source).toBe('boolean'); + }, 30000); // 30 seconds timeout +}); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 0bdcf7ceb..2a07f60d0 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema"; */ export interface FirecrawlAppConfig { apiKey?: string | null; + apiUrl?: string | null; } /** @@ -63,6 +64,7 @@ export interface JobStatusResponse { */ export default class FirecrawlApp { private apiKey: string; + private apiUrl: string = "https://api.firecrawl.dev"; /** * Initializes a new instance of the FirecrawlApp class. @@ -107,7 +109,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await axios.post( - "https://api.firecrawl.dev/v0/scrape", + this.apiUrl + "/v0/scrape", jsonData, { headers }, ); @@ -147,7 +149,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await axios.post( - "https://api.firecrawl.dev/v0/search", + this.apiUrl + "/v0/search", jsonData, { headers } ); @@ -190,7 +192,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await this.postRequest( - "https://api.firecrawl.dev/v0/crawl", + this.apiUrl + "/v0/crawl", jsonData, headers ); @@ -220,7 +222,7 @@ export default class FirecrawlApp { const headers: AxiosRequestHeaders = this.prepareHeaders(); try { const response: AxiosResponse = await this.getRequest( - `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + this.apiUrl + `/v0/crawl/status/${jobId}`, headers ); if (response.status === 200) { @@ -292,7 +294,7 @@ export default class FirecrawlApp { ): Promise { while (true) { const statusResponse: AxiosResponse = await this.getRequest( - `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + this.apiUrl + `/v0/crawl/status/${jobId}`, headers ); if (statusResponse.status === 200) { diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example new file mode 100644 index 000000000..904887bf0 --- /dev/null +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example @@ -0,0 +1,3 @@ +API_URL=http://localhost:3002 +ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py +TEST_API_KEY=fc-YOUR_API_KEY \ No newline at end of file diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py index 11b66e981..1a3b64143 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -1,78 +1,150 @@ +import importlib.util import pytest -from firecrawl import FirecrawlApp +import time +import os +from uuid import uuid4 +from dotenv import load_dotenv -TEST_API_KEY = "fc-YOUR_API_KEY" -TEST_URL = "https://firecrawl.dev" +load_dotenv() + +API_URL = "http://127.0.0.1:3002"; +ABSOLUTE_FIRECRAWL_PATH = "./apps/python-sdk/firecrawl/firecrawl.py" +TEST_API_KEY = os.getenv('TEST_API_KEY') + +print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}") + +spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH) +firecrawl = importlib.util.module_from_spec(spec) +spec.loader.exec_module(firecrawl) +FirecrawlApp = firecrawl.FirecrawlApp + +def test_no_api_key(): + with pytest.raises(Exception) as excinfo: + invalid_app = FirecrawlApp(api_url=API_URL) + assert "No API key provided" in str(excinfo.value) + +def test_scrape_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url('https://firecrawl.dev') + assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) + +def test_blocklisted_url(): + blocklisted_url = "https://facebook.com/fake-test" + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + with pytest.raises(Exception) as excinfo: + app.scrape_url(blocklisted_url) + assert "Failed to scrape URL. Status code: 403" in str(excinfo.value) + +def test_successful_response_with_valid_preview_token(): + app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") + response = app.scrape_url('https://firecrawl.dev') + assert response is not None + assert 'content' in response + assert "🔥 Firecrawl" in response['content'] def test_scrape_url_e2e(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.scrape_url(TEST_URL) - print(response) + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://firecrawl.dev') assert response is not None assert 'content' in response + assert 'markdown' in response + assert 'metadata' in response + assert 'html' not in response assert "🔥 Firecrawl" in response['content'] -def test_scrape_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.scrape_url(TEST_URL) - assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) +def test_successful_response_with_valid_api_key_and_include_html(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}}) + assert response is not None + assert 'content' in response + assert 'markdown' in response + assert 'html' in response + assert 'metadata' in response + assert "🔥 Firecrawl" in response['content'] + assert "🔥 Firecrawl" in response['markdown'] + assert " 0 - assert 'content' in response[0] - assert "🔥 Firecrawl" in response[0]['content'] + assert 'content' in response + assert 'metadata' in response + assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] + +def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001') + time.sleep(6) # wait for 6 seconds + assert response is not None + assert 'content' in response + assert 'metadata' in response + assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] def test_crawl_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_key="invalid_api_key") + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: - invalid_app.crawl_url(TEST_URL) + invalid_app.crawl_url('https://firecrawl.dev') assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value) -def test_search_e2e(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.search("test query") +def test_should_return_error_for_blocklisted_url(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + blocklisted_url = "https://twitter.com/fake-test" + with pytest.raises(Exception) as excinfo: + app.crawl_url(blocklisted_url) + assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value) + +def test_crawl_url_wait_for_completion_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True) assert response is not None + assert len(response) > 0 assert 'content' in response[0] - assert len(response) > 2 - -def test_search_invalid_api_key(): - invalid_app = FirecrawlApp(api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.search("test query") - assert "Failed to search. Status code: 401" in str(excinfo.value) + assert "🔥 Firecrawl" in response[0]['content'] -def test_crawl_with_fast_mode(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True) +def test_crawl_url_with_idempotency_key_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + uniqueIdempotencyKey = str(uuid4()) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) assert response is not None assert len(response) > 0 assert 'content' in response[0] + assert "🔥 Firecrawl" in response[0]['content'] + + with pytest.raises(Exception) as excinfo: + app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value) -def test_crawl_with_html_inclusion(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False) +def test_check_crawl_status_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False) assert response is not None assert 'jobId' in response + + time.sleep(30) # wait for 30 seconds + status_response = app.check_crawl_status(response['jobId']) + assert status_response is not None + assert 'status' in status_response + assert status_response['status'] == 'completed' + assert 'data' in status_response + assert len(status_response['data']) > 0 -def test_crawl_with_pdf_extraction(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001", - {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False) +def test_search_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.search("test query") assert response is not None - assert 'jobId' in response + assert 'content' in response[0] + assert len(response) > 2 -def test_timeout_during_scraping(): - app = FirecrawlApp(api_key=TEST_API_KEY) +def test_search_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: - app.scrape_url(TEST_URL, {'timeout': 1000}) - assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value) + invalid_app.search("test query") + assert "Failed to search. Status code: 401" in str(excinfo.value) def test_llm_extraction(): - app = FirecrawlApp(api_key=TEST_API_KEY) + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) response = app.scrape_url("https://mendable.ai", { 'extractorOptions': { 'mode': 'llm-extraction', diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index 6674a8927..726cafa34 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -11,5 +11,6 @@ install_requires=[ 'requests', 'pytest', + 'python-dotenv', ], )