From 397769c7e3579ca2709f127642def7a040249c58 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 24 May 2024 17:56:27 -0300 Subject: [PATCH 01/14] added python sdk e2e tests with pytest some of them are still missing though --- apps/python-sdk/README.md | 19 ++++ .../__tests__/e2e_withAuth/__init__.py | 0 .../firecrawl/__tests__/e2e_withAuth/test.py | 96 +++++++++++++++++++ apps/python-sdk/setup.py | 1 + 4 files changed, 116 insertions(+) create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 38ca843bc..ae0997387 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id) The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. +## Running the Tests with Pytest + +To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling. + +### Running the Tests + +To run the tests, execute the following commands: + +Install pytest: +```bash +pip install pytest +``` + +Run: +```bash +pytest firecrawl/__tests__/e2e_withAuth/test.py +``` + + ## Contributing Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository. diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py new file mode 100644 index 000000000..11b66e981 --- /dev/null +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -0,0 +1,96 @@ +import pytest +from firecrawl import FirecrawlApp + +TEST_API_KEY = "fc-YOUR_API_KEY" +TEST_URL = "https://firecrawl.dev" + +def test_scrape_url_e2e(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.scrape_url(TEST_URL) + print(response) + assert response is not None + assert 'content' in response + assert "🔥 Firecrawl" in response['content'] + +def test_scrape_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url(TEST_URL) + assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) + +def test_crawl_url_e2e(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url(TEST_URL, {'crawlerOptions': {'excludes': ['blog/*']}}, True) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + assert "🔥 Firecrawl" in response[0]['content'] + +def test_crawl_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.crawl_url(TEST_URL) + assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value) + +def test_search_e2e(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.search("test query") + assert response is not None + assert 'content' in response[0] + assert len(response) > 2 + +def test_search_invalid_api_key(): + invalid_app = FirecrawlApp(api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.search("test query") + assert "Failed to search. Status code: 401" in str(excinfo.value) + +def test_crawl_with_fast_mode(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + +def test_crawl_with_html_inclusion(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False) + assert response is not None + assert 'jobId' in response + +def test_crawl_with_pdf_extraction(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001", + {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False) + assert response is not None + assert 'jobId' in response + +def test_timeout_during_scraping(): + app = FirecrawlApp(api_key=TEST_API_KEY) + with pytest.raises(Exception) as excinfo: + app.scrape_url(TEST_URL, {'timeout': 1000}) + assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value) + +def test_llm_extraction(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.scrape_url("https://mendable.ai", { + 'extractorOptions': { + 'mode': 'llm-extraction', + 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + 'extractionSchema': { + 'type': 'object', + 'properties': { + 'company_mission': {'type': 'string'}, + 'supports_sso': {'type': 'boolean'}, + 'is_open_source': {'type': 'boolean'} + }, + 'required': ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }) + assert response is not None + assert 'llm_extraction' in response + llm_extraction = response['llm_extraction'] + assert 'company_mission' in llm_extraction + assert isinstance(llm_extraction['supports_sso'], bool) + assert isinstance(llm_extraction['is_open_source'], bool) \ No newline at end of file diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index 7df520eb7..6674a8927 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -10,5 +10,6 @@ packages=find_packages(), install_requires=[ 'requests', + 'pytest', ], ) From 63772ea7110c49fa91b64c62bae6e7fff9240bcd Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:14:00 -0300 Subject: [PATCH 02/14] added github action workflow --- .github/workflows/python-tests.yml | 31 ++++ apps/js-sdk/firecrawl/.env.example | 3 + apps/js-sdk/firecrawl/package-lock.json | 43 ++++- apps/js-sdk/firecrawl/package.json | 7 +- .../src/__tests__/e2e_withAuth/index.test.ts | 147 ++++++++++++++++ apps/js-sdk/firecrawl/src/index.ts | 12 +- .../__tests__/e2e_withAuth/.env.example | 3 + .../firecrawl/__tests__/e2e_withAuth/test.py | 162 +++++++++++++----- apps/python-sdk/setup.py | 1 + 9 files changed, 351 insertions(+), 58 deletions(-) create mode 100644 .github/workflows/python-tests.yml create mode 100644 apps/js-sdk/firecrawl/.env.example create mode 100644 apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml new file mode 100644 index 000000000..9c3783bb5 --- /dev/null +++ b/.github/workflows/python-tests.yml @@ -0,0 +1,31 @@ +name: Run Python SDK E2E Tests + +on: + pull_request: + branches: + - main +env: + TEST_API_KEY: ${{ secrets.TEST_API_KEY }} +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python setup.py + working-directory: ./apps/python-sdk + - name: Test with pytest + run: | + cd apps/python-sdk + pytest firecrawl/__tests__/e2e_withAuth/test.py + working-directory: ./apps/python-sdk diff --git a/apps/js-sdk/firecrawl/.env.example b/apps/js-sdk/firecrawl/.env.example new file mode 100644 index 000000000..6b1780bb8 --- /dev/null +++ b/apps/js-sdk/firecrawl/.env.example @@ -0,0 +1,3 @@ +API_URL=http://localhost:3002 +TEST_API_KEY=fc-YOUR_API_KEY + diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index 6b085be8e..fec326b4a 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -1,22 +1,25 @@ { "name": "@mendable/firecrawl-js", - "version": "0.0.17-beta.8", + "version": "0.0.22", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@mendable/firecrawl-js", - "version": "0.0.17-beta.8", + "version": "0.0.22", "license": "MIT", "dependencies": { "axios": "^1.6.8", + "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", - "@types/node": "^20.12.7", + "@types/jest": "^29.5.12", + "@types/node": "^20.12.12", + "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.4.5" @@ -1046,10 +1049,20 @@ "@types/istanbul-lib-report": "*" } }, + "node_modules/@types/jest": { + "version": "29.5.12", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz", + "integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==", + "dev": true, + "dependencies": { + "expect": "^29.0.0", + "pretty-format": "^29.0.0" + } + }, "node_modules/@types/node": { - "version": "20.12.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz", - "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==", + "version": "20.12.12", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz", + "integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==", "dev": true, "dependencies": { "undici-types": "~5.26.4" @@ -1061,6 +1074,12 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, + "node_modules/@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==", + "dev": true + }, "node_modules/@types/yargs": { "version": "17.0.32", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", @@ -3641,6 +3660,18 @@ "browserslist": ">= 4.21.0" } }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-to-istanbul": { "version": "9.2.0", "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index e43f6ea63..a9fdaaf49 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -9,7 +9,7 @@ "build": "tsc", "publish": "npm run build && npm publish --access public", "publish-beta": "npm run build && npm publish --access public --tag beta", - "test": "jest src/**/*.test.ts" + "test": "jest src/__tests__/**/*.test.ts" }, "repository": { "type": "git", @@ -19,6 +19,7 @@ "license": "MIT", "dependencies": { "axios": "^1.6.8", + "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" }, @@ -29,7 +30,9 @@ "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", - "@types/node": "^20.12.7", + "@types/jest": "^29.5.12", + "@types/node": "^20.12.12", + "@types/uuid": "^9.0.8", "jest": "^29.7.0", "ts-jest": "^29.1.2", "typescript": "^5.4.5" diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts new file mode 100644 index 000000000..13f53472c --- /dev/null +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -0,0 +1,147 @@ +import FirecrawlApp from '../../index'; +import { v4 as uuidv4 } from 'uuid'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const TEST_API_KEY = process.env.TEST_API_KEY; +const API_URL = process.env.API_URL; + +describe('FirecrawlApp E2E Tests', () => { + test('should throw error for no API key', () => { + expect(() => { + new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); + }).toThrow("No API key provided"); + }); + + test('should throw error for invalid API key on scrape', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + }); + + test('should throw error for blocklisted URL on scrape', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const blocklistedUrl = "https://facebook.com/fake-test"; + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + }); + + test('should return successful response with valid preview token', async () => { + const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + }, 10000); // 10 seconds timeout + + test('should return successful response for valid scrape', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data).toHaveProperty('markdown'); + expect(response.data).toHaveProperty('metadata'); + expect(response.data).not.toHaveProperty('html'); + }, 10000); // 10 seconds timeout + + test('should return successful response with valid API key and include HTML', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } }); + expect(response).not.toBeNull(); + expect(response.data.content).toContain("🔥 Firecrawl"); + expect(response.data.markdown).toContain("🔥 Firecrawl"); + expect(response.data.html).toContain(" { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); + expect(response).not.toBeNull(); + expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + }, 30000); // 30 seconds timeout + + test('should return successful response for valid scrape with PDF file without explicit extension', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); + await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds + expect(response).not.toBeNull(); + expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); + }, 30000); // 30 seconds timeout + + test('should throw error for invalid API key on crawl', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); + }); + + test('should throw error for blocklisted URL on crawl', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const blocklistedUrl = "https://twitter.com/fake-test"; + await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); + }); + + test('should return successful response for crawl and wait for completion', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true); + expect(response).not.toBeNull(); + expect(response[0].content).toContain("🔥 Firecrawl"); + }, 60000); // 60 seconds timeout + + test('should handle idempotency key for crawl', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const uniqueIdempotencyKey = uuidv4(); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey); + expect(response).not.toBeNull(); + expect(response[0].content).toContain("🔥 Firecrawl"); + + await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); + }, 30000); // 30 seconds timeout + + test('should check crawl status', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false); + expect(response).not.toBeNull(); + expect(response.jobId).toBeDefined(); + + await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds + const statusResponse = await app.checkCrawlStatus(response.jobId); + expect(statusResponse).not.toBeNull(); + expect(statusResponse.status).toBe('completed'); + expect(statusResponse.data.length).toBeGreaterThan(0); + }, 30000); // 30 seconds timeout + + test('should return successful response for search', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.search("test query"); + expect(response).not.toBeNull(); + expect(response.data[0].content).toBeDefined(); + expect(response.data.length).toBeGreaterThan(2); + }, 30000); // 30 seconds timeout + + test('should throw error for invalid API key on search', async () => { + const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); + await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); + }); + + test('should perform LLM extraction', async () => { + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); + const response = await app.scrapeUrl("https://mendable.ai", { + extractorOptions: { + mode: 'llm-extraction', + extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + extractionSchema: { + type: 'object', + properties: { + company_mission: { type: 'string' }, + supports_sso: { type: 'boolean' }, + is_open_source: { type: 'boolean' } + }, + required: ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }); + expect(response).not.toBeNull(); + expect(response.data.llm_extraction).toBeDefined(); + const llmExtraction = response.data.llm_extraction; + expect(llmExtraction.company_mission).toBeDefined(); + expect(typeof llmExtraction.supports_sso).toBe('boolean'); + expect(typeof llmExtraction.is_open_source).toBe('boolean'); + }, 30000); // 30 seconds timeout +}); diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 0bdcf7ceb..2a07f60d0 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema"; */ export interface FirecrawlAppConfig { apiKey?: string | null; + apiUrl?: string | null; } /** @@ -63,6 +64,7 @@ export interface JobStatusResponse { */ export default class FirecrawlApp { private apiKey: string; + private apiUrl: string = "https://api.firecrawl.dev"; /** * Initializes a new instance of the FirecrawlApp class. @@ -107,7 +109,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await axios.post( - "https://api.firecrawl.dev/v0/scrape", + this.apiUrl + "/v0/scrape", jsonData, { headers }, ); @@ -147,7 +149,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await axios.post( - "https://api.firecrawl.dev/v0/search", + this.apiUrl + "/v0/search", jsonData, { headers } ); @@ -190,7 +192,7 @@ export default class FirecrawlApp { } try { const response: AxiosResponse = await this.postRequest( - "https://api.firecrawl.dev/v0/crawl", + this.apiUrl + "/v0/crawl", jsonData, headers ); @@ -220,7 +222,7 @@ export default class FirecrawlApp { const headers: AxiosRequestHeaders = this.prepareHeaders(); try { const response: AxiosResponse = await this.getRequest( - `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + this.apiUrl + `/v0/crawl/status/${jobId}`, headers ); if (response.status === 200) { @@ -292,7 +294,7 @@ export default class FirecrawlApp { ): Promise { while (true) { const statusResponse: AxiosResponse = await this.getRequest( - `https://api.firecrawl.dev/v0/crawl/status/${jobId}`, + this.apiUrl + `/v0/crawl/status/${jobId}`, headers ); if (statusResponse.status === 200) { diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example new file mode 100644 index 000000000..904887bf0 --- /dev/null +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example @@ -0,0 +1,3 @@ +API_URL=http://localhost:3002 +ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py +TEST_API_KEY=fc-YOUR_API_KEY \ No newline at end of file diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py index 11b66e981..1a3b64143 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -1,78 +1,150 @@ +import importlib.util import pytest -from firecrawl import FirecrawlApp +import time +import os +from uuid import uuid4 +from dotenv import load_dotenv -TEST_API_KEY = "fc-YOUR_API_KEY" -TEST_URL = "https://firecrawl.dev" +load_dotenv() + +API_URL = "http://127.0.0.1:3002"; +ABSOLUTE_FIRECRAWL_PATH = "./apps/python-sdk/firecrawl/firecrawl.py" +TEST_API_KEY = os.getenv('TEST_API_KEY') + +print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}") + +spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH) +firecrawl = importlib.util.module_from_spec(spec) +spec.loader.exec_module(firecrawl) +FirecrawlApp = firecrawl.FirecrawlApp + +def test_no_api_key(): + with pytest.raises(Exception) as excinfo: + invalid_app = FirecrawlApp(api_url=API_URL) + assert "No API key provided" in str(excinfo.value) + +def test_scrape_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url('https://firecrawl.dev') + assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) + +def test_blocklisted_url(): + blocklisted_url = "https://facebook.com/fake-test" + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + with pytest.raises(Exception) as excinfo: + app.scrape_url(blocklisted_url) + assert "Failed to scrape URL. Status code: 403" in str(excinfo.value) + +def test_successful_response_with_valid_preview_token(): + app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token") + response = app.scrape_url('https://firecrawl.dev') + assert response is not None + assert 'content' in response + assert "🔥 Firecrawl" in response['content'] def test_scrape_url_e2e(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.scrape_url(TEST_URL) - print(response) + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://firecrawl.dev') assert response is not None assert 'content' in response + assert 'markdown' in response + assert 'metadata' in response + assert 'html' not in response assert "🔥 Firecrawl" in response['content'] -def test_scrape_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.scrape_url(TEST_URL) - assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) +def test_successful_response_with_valid_api_key_and_include_html(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}}) + assert response is not None + assert 'content' in response + assert 'markdown' in response + assert 'html' in response + assert 'metadata' in response + assert "🔥 Firecrawl" in response['content'] + assert "🔥 Firecrawl" in response['markdown'] + assert " 0 - assert 'content' in response[0] - assert "🔥 Firecrawl" in response[0]['content'] + assert 'content' in response + assert 'metadata' in response + assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] + +def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001') + time.sleep(6) # wait for 6 seconds + assert response is not None + assert 'content' in response + assert 'metadata' in response + assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content'] def test_crawl_url_invalid_api_key(): - invalid_app = FirecrawlApp(api_key="invalid_api_key") + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: - invalid_app.crawl_url(TEST_URL) + invalid_app.crawl_url('https://firecrawl.dev') assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value) -def test_search_e2e(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.search("test query") +def test_should_return_error_for_blocklisted_url(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + blocklisted_url = "https://twitter.com/fake-test" + with pytest.raises(Exception) as excinfo: + app.crawl_url(blocklisted_url) + assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value) + +def test_crawl_url_wait_for_completion_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True) assert response is not None + assert len(response) > 0 assert 'content' in response[0] - assert len(response) > 2 - -def test_search_invalid_api_key(): - invalid_app = FirecrawlApp(api_key="invalid_api_key") - with pytest.raises(Exception) as excinfo: - invalid_app.search("test query") - assert "Failed to search. Status code: 401" in str(excinfo.value) + assert "🔥 Firecrawl" in response[0]['content'] -def test_crawl_with_fast_mode(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True) +def test_crawl_url_with_idempotency_key_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + uniqueIdempotencyKey = str(uuid4()) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) assert response is not None assert len(response) > 0 assert 'content' in response[0] + assert "🔥 Firecrawl" in response[0]['content'] + + with pytest.raises(Exception) as excinfo: + app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey) + assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value) -def test_crawl_with_html_inclusion(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False) +def test_check_crawl_status_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False) assert response is not None assert 'jobId' in response + + time.sleep(30) # wait for 30 seconds + status_response = app.check_crawl_status(response['jobId']) + assert status_response is not None + assert 'status' in status_response + assert status_response['status'] == 'completed' + assert 'data' in status_response + assert len(status_response['data']) > 0 -def test_crawl_with_pdf_extraction(): - app = FirecrawlApp(api_key=TEST_API_KEY) - response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001", - {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False) +def test_search_e2e(): + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) + response = app.search("test query") assert response is not None - assert 'jobId' in response + assert 'content' in response[0] + assert len(response) > 2 -def test_timeout_during_scraping(): - app = FirecrawlApp(api_key=TEST_API_KEY) +def test_search_invalid_api_key(): + invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key") with pytest.raises(Exception) as excinfo: - app.scrape_url(TEST_URL, {'timeout': 1000}) - assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value) + invalid_app.search("test query") + assert "Failed to search. Status code: 401" in str(excinfo.value) def test_llm_extraction(): - app = FirecrawlApp(api_key=TEST_API_KEY) + app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY) response = app.scrape_url("https://mendable.ai", { 'extractorOptions': { 'mode': 'llm-extraction', diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index 6674a8927..726cafa34 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -11,5 +11,6 @@ install_requires=[ 'requests', 'pytest', + 'python-dotenv', ], ) From 19decd1062c08e0b6e42c6d9152a376cfd645457 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:21:33 -0300 Subject: [PATCH 03/14] fixing workflow --- .github/workflows/python-tests.yml | 2 +- apps/python-sdk/requirements.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 apps/python-sdk/requirements.txt diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 9c3783bb5..327fcb257 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python setup.py + pip install -r requirements.txt working-directory: ./apps/python-sdk - name: Test with pytest run: | diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt new file mode 100644 index 000000000..1bed58814 --- /dev/null +++ b/apps/python-sdk/requirements.txt @@ -0,0 +1,3 @@ +requests +pytest +python-dotenv \ No newline at end of file From c410dbe5bdec5b172118b049cb4aa055d7835ab5 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:24:30 -0300 Subject: [PATCH 04/14] Update python-tests.yml --- .github/workflows/python-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 327fcb257..9bd9ddb83 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -26,6 +26,5 @@ jobs: working-directory: ./apps/python-sdk - name: Test with pytest run: | - cd apps/python-sdk pytest firecrawl/__tests__/e2e_withAuth/test.py working-directory: ./apps/python-sdk From a9b68d95d88518b75a6551bede75ce7191c82eca Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:28:44 -0300 Subject: [PATCH 05/14] Update test.py --- apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py index 1a3b64143..86ce1f9f5 100644 --- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -8,7 +8,7 @@ load_dotenv() API_URL = "http://127.0.0.1:3002"; -ABSOLUTE_FIRECRAWL_PATH = "./apps/python-sdk/firecrawl/firecrawl.py" +ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py" TEST_API_KEY = os.getenv('TEST_API_KEY') print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}") From f32c16258a5aacf7cb15825df11509810fa63927 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:32:34 -0300 Subject: [PATCH 06/14] missing node setup --- .github/workflows/python-tests.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 9bd9ddb83..4e154b8c2 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -13,8 +13,25 @@ jobs: matrix: python-version: ["3.10"] - steps: + steps: - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: "20" + - name: Install pnpm + run: npm install -g pnpm + - name: Install dependencies for API + run: pnpm install + working-directory: ./apps/api + - name: Start the application + run: npm start & + working-directory: ./apps/api + id: start_app + - name: Start workers + run: npm run workers & + working-directory: ./apps/api + id: start_workers - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: From 952ccd8755dc334755a90e7a680b76386d86b192 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:34:39 -0300 Subject: [PATCH 07/14] envs --- .github/workflows/python-tests.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 4e154b8c2..1a7c1df58 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -5,7 +5,27 @@ on: branches: - main env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + HOST: ${{ secrets.HOST }} + LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} + LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} + NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} + PORT: ${{ secrets.PORT }} + REDIS_URL: ${{ secrets.REDIS_URL }} + SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} + SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} + SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} + HDX_NODE_BETA_MODE: 1 + jobs: build: runs-on: ubuntu-latest From d0c4b24a0ef29fa2a090e91e2c4a40c355367a5d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 14:36:32 -0300 Subject: [PATCH 08/14] missing redis --- .github/workflows/python-tests.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 1a7c1df58..9b2145dcc 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -25,16 +25,21 @@ env: TEST_API_KEY: ${{ secrets.TEST_API_KEY }} HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} HDX_NODE_BETA_MODE: 1 - + jobs: build: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10"] + python-version: ["3.10"] + services: + redis: + image: redis + ports: + - 6379:6379 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v3 - name: Set up Node.js uses: actions/setup-node@v3 with: From 127d2db1dd1d8aeec259d2d3e7cc51c4124d975e Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 15:54:09 -0300 Subject: [PATCH 09/14] added js/ts sdk tests --- .github/workflows/fly.yml | 16 ++++++ .github/workflows/js-sdk.yml | 57 +++++++++++++++++++ .../{python-tests.yml => python-sdk.yml} | 4 +- .../src/__tests__/e2e_withAuth/index.test.ts | 1 - 4 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/js-sdk.yml rename .github/workflows/{python-tests.yml => python-sdk.yml} (96%) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 09d81af15..6285831da 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -94,6 +94,22 @@ jobs: run: | npm run test working-directory: ./apps/test-suite + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + working-directory: ./apps/python-sdk + - name: Run E2E tests for Python SDK + run: | + pytest firecrawl/__tests__/e2e_withAuth/test.py + working-directory: ./apps/python-sdk + - name: Run E2E tests for JavaScript SDK + run: npm run tests + working-directory: ./apps/js-sdk/firecrawl deploy: name: Deploy app diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml new file mode 100644 index 000000000..86f0ba151 --- /dev/null +++ b/.github/workflows/js-sdk.yml @@ -0,0 +1,57 @@ +name: Run JavaScript SDK E2E Tests + +on: + pull_request: + branches: + - main +env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }} + FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} + HOST: ${{ secrets.HOST }} + LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }} + LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }} + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} + NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }} + PORT: ${{ secrets.PORT }} + REDIS_URL: ${{ secrets.REDIS_URL }} + SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }} + SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }} + SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }} + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + TEST_API_KEY: ${{ secrets.TEST_API_KEY }} + HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }} + HDX_NODE_BETA_MODE: 1 + +jobs: + build: + runs-on: ubuntu-latest + services: + redis: + image: redis + ports: + - 6379:6379 + + steps: + - uses: actions/checkout@v3 + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: "20" + - name: Install pnpm + run: npm install -g pnpm + - name: Install dependencies for API + run: pnpm install + working-directory: ./apps/api + - name: Start the application + run: npm start & + working-directory: ./apps/api + - name: Start workers + run: npm run workers & + working-directory: ./apps/api + - name: Run E2E tests for JavaScript SDK + run: npm run tests + working-directory: ./apps/js-sdk/firecrawl \ No newline at end of file diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-sdk.yml similarity index 96% rename from .github/workflows/python-tests.yml rename to .github/workflows/python-sdk.yml index 9b2145dcc..1308cdef5 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-sdk.yml @@ -61,12 +61,12 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Install Python dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt working-directory: ./apps/python-sdk - - name: Test with pytest + - name: Run E2E tests for Python SDK run: | pytest firecrawl/__tests__/e2e_withAuth/test.py working-directory: ./apps/python-sdk diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index 13f53472c..f06538bef 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -61,7 +61,6 @@ describe('FirecrawlApp E2E Tests', () => { test('should return successful response for valid scrape with PDF file without explicit extension', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); - await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds expect(response).not.toBeNull(); expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); }, 30000); // 30 seconds timeout From e87d39e6ecafd587517df963578b480504198eac Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 15:55:48 -0300 Subject: [PATCH 10/14] typo --- .github/workflows/fly.yml | 2 +- .github/workflows/js-sdk.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 6285831da..1976d8f1e 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -108,7 +108,7 @@ jobs: pytest firecrawl/__tests__/e2e_withAuth/test.py working-directory: ./apps/python-sdk - name: Run E2E tests for JavaScript SDK - run: npm run tests + run: npm run test working-directory: ./apps/js-sdk/firecrawl deploy: diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml index 86f0ba151..82aa42c9b 100644 --- a/.github/workflows/js-sdk.yml +++ b/.github/workflows/js-sdk.yml @@ -53,5 +53,5 @@ jobs: run: npm run workers & working-directory: ./apps/api - name: Run E2E tests for JavaScript SDK - run: npm run tests + run: npm run test working-directory: ./apps/js-sdk/firecrawl \ No newline at end of file From 6b58da1c96040963611ee0a2cf8b30fde5d9732d Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 16:01:48 -0300 Subject: [PATCH 11/14] jest --- .github/workflows/fly.yml | 3 +++ .github/workflows/js-sdk.yml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml index 1976d8f1e..deddc9f21 100644 --- a/.github/workflows/fly.yml +++ b/.github/workflows/fly.yml @@ -107,6 +107,9 @@ jobs: run: | pytest firecrawl/__tests__/e2e_withAuth/test.py working-directory: ./apps/python-sdk + - name: Install dependencies for JavaScript SDK + run: pnpm install + working-directory: ./apps/js-sdk/firecrawl - name: Run E2E tests for JavaScript SDK run: npm run test working-directory: ./apps/js-sdk/firecrawl diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml index 82aa42c9b..3c914cc80 100644 --- a/.github/workflows/js-sdk.yml +++ b/.github/workflows/js-sdk.yml @@ -52,6 +52,9 @@ jobs: - name: Start workers run: npm run workers & working-directory: ./apps/api + - name: Install dependencies for JavaScript SDK + run: pnpm install + working-directory: ./apps/js-sdk/firecrawl - name: Run E2E tests for JavaScript SDK run: npm run test working-directory: ./apps/js-sdk/firecrawl \ No newline at end of file From 41c4ef6a82919baba396549479c354d0175d886f Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 16:23:57 -0300 Subject: [PATCH 12/14] dotenv was missing --- apps/js-sdk/firecrawl/package-lock.json | 23 +++++++++++++++ apps/js-sdk/firecrawl/package.json | 2 ++ apps/js-sdk/package-lock.json | 38 ++++++++++++++++--------- apps/js-sdk/package.json | 3 +- 4 files changed, 51 insertions(+), 15 deletions(-) diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json index fec326b4a..b1cebde83 100644 --- a/apps/js-sdk/firecrawl/package-lock.json +++ b/apps/js-sdk/firecrawl/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "axios": "^1.6.8", + "dotenv": "^16.4.5", "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" @@ -17,6 +18,7 @@ "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", + "@types/dotenv": "^8.2.0", "@types/jest": "^29.5.12", "@types/node": "^20.12.12", "@types/uuid": "^9.0.8", @@ -1016,6 +1018,16 @@ "@babel/types": "^7.20.7" } }, + "node_modules/@types/dotenv": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz", + "integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==", + "deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.", + "dev": true, + "dependencies": { + "dotenv": "*" + } + }, "node_modules/@types/graceful-fs": { "version": "4.1.9", "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", @@ -1621,6 +1633,17 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/electron-to-chromium": { "version": "1.4.748", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz", diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index a9fdaaf49..a1c42a0cc 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -19,6 +19,7 @@ "license": "MIT", "dependencies": { "axios": "^1.6.8", + "dotenv": "^16.4.5", "uuid": "^9.0.1", "zod": "^3.23.8", "zod-to-json-schema": "^3.23.0" @@ -30,6 +31,7 @@ "devDependencies": { "@jest/globals": "^29.7.0", "@types/axios": "^0.14.0", + "@types/dotenv": "^8.2.0", "@types/jest": "^29.5.12", "@types/node": "^20.12.12", "@types/uuid": "^9.0.8", diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json index 516765347..c59a371c4 100644 --- a/apps/js-sdk/package-lock.json +++ b/apps/js-sdk/package-lock.json @@ -11,9 +11,10 @@ "dependencies": { "@mendable/firecrawl-js": "^0.0.19", "axios": "^1.6.8", - "uuid": "^9.0.1", + "dotenv": "^16.4.5", "ts-node": "^10.9.2", "typescript": "^5.4.5", + "uuid": "^9.0.1", "zod": "^3.23.8" }, "devDependencies": { @@ -531,6 +532,17 @@ "node": ">=0.3.1" } }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/esbuild": { "version": "0.20.2", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz", @@ -744,6 +756,18 @@ "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", "peer": true }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", @@ -772,18 +796,6 @@ "peerDependencies": { "zod": "^3.23.3" } - }, - "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", - "funding": [ - "https://github.com/sponsors/broofa", - "https://github.com/sponsors/ctavan" - ], - "bin": { - "uuid": "dist/bin/uuid" - } } } } diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json index df9f99e5f..0e93fe3c2 100644 --- a/apps/js-sdk/package.json +++ b/apps/js-sdk/package.json @@ -11,9 +11,8 @@ "author": "", "license": "ISC", "dependencies": { - "axios": "^1.6.8", - "uuid": "^9.0.1", "@mendable/firecrawl-js": "^0.0.19", + "axios": "^1.6.8", "ts-node": "^10.9.2", "typescript": "^5.4.5", "zod": "^3.23.8" From d5c83803cd9adb61209934d6c6e707ad1e757ab6 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 16:35:01 -0300 Subject: [PATCH 13/14] fixing idempotency test --- .../firecrawl/src/__tests__/e2e_withAuth/index.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index f06538bef..8f2e137ed 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -86,12 +86,12 @@ describe('FirecrawlApp E2E Tests', () => { test('should handle idempotency key for crawl', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const uniqueIdempotencyKey = uuidv4(); - const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey); expect(response).not.toBeNull(); - expect(response[0].content).toContain("🔥 Firecrawl"); + expect(response.jobId).toBeDefined(); await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); - }, 30000); // 30 seconds timeout + }); test('should check crawl status', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); From 71187b03a207a4a5bdfd69c0db4ca0cbeeba5592 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Mon, 27 May 2024 16:48:08 -0300 Subject: [PATCH 14/14] added timeout --- apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts index 8f2e137ed..c9db6a91e 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts @@ -78,7 +78,7 @@ describe('FirecrawlApp E2E Tests', () => { test('should return successful response for crawl and wait for completion', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true); + const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30); expect(response).not.toBeNull(); expect(response[0].content).toContain("🔥 Firecrawl"); }, 60000); // 60 seconds timeout