From 397769c7e3579ca2709f127642def7a040249c58 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Fri, 24 May 2024 17:56:27 -0300
Subject: [PATCH 01/14] added python sdk e2e tests with pytest
some of them are still missing though
---
apps/python-sdk/README.md | 19 ++++
.../__tests__/e2e_withAuth/__init__.py | 0
.../firecrawl/__tests__/e2e_withAuth/test.py | 96 +++++++++++++++++++
apps/python-sdk/setup.py | 1 +
4 files changed, 116 insertions(+)
create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py
create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md
index 38ca843bc..ae0997387 100644
--- a/apps/python-sdk/README.md
+++ b/apps/python-sdk/README.md
@@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id)
The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
+## Running the Tests with Pytest
+
+To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
+
+### Running the Tests
+
+To run the tests, execute the following commands:
+
+Install pytest:
+```bash
+pip install pytest
+```
+
+Run:
+```bash
+pytest firecrawl/__tests__/e2e_withAuth/test.py
+```
+
+
## Contributing
Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
new file mode 100644
index 000000000..11b66e981
--- /dev/null
+++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
@@ -0,0 +1,96 @@
+import pytest
+from firecrawl import FirecrawlApp
+
+TEST_API_KEY = "fc-YOUR_API_KEY"
+TEST_URL = "https://firecrawl.dev"
+
+def test_scrape_url_e2e():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.scrape_url(TEST_URL)
+ print(response)
+ assert response is not None
+ assert 'content' in response
+ assert "🔥 Firecrawl" in response['content']
+
+def test_scrape_url_invalid_api_key():
+ invalid_app = FirecrawlApp(api_key="invalid_api_key")
+ with pytest.raises(Exception) as excinfo:
+ invalid_app.scrape_url(TEST_URL)
+ assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
+
+def test_crawl_url_e2e():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.crawl_url(TEST_URL, {'crawlerOptions': {'excludes': ['blog/*']}}, True)
+ assert response is not None
+ assert len(response) > 0
+ assert 'content' in response[0]
+ assert "🔥 Firecrawl" in response[0]['content']
+
+def test_crawl_url_invalid_api_key():
+ invalid_app = FirecrawlApp(api_key="invalid_api_key")
+ with pytest.raises(Exception) as excinfo:
+ invalid_app.crawl_url(TEST_URL)
+ assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
+
+def test_search_e2e():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.search("test query")
+ assert response is not None
+ assert 'content' in response[0]
+ assert len(response) > 2
+
+def test_search_invalid_api_key():
+ invalid_app = FirecrawlApp(api_key="invalid_api_key")
+ with pytest.raises(Exception) as excinfo:
+ invalid_app.search("test query")
+ assert "Failed to search. Status code: 401" in str(excinfo.value)
+
+def test_crawl_with_fast_mode():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True)
+ assert response is not None
+ assert len(response) > 0
+ assert 'content' in response[0]
+
+def test_crawl_with_html_inclusion():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False)
+ assert response is not None
+ assert 'jobId' in response
+
+def test_crawl_with_pdf_extraction():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001",
+ {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False)
+ assert response is not None
+ assert 'jobId' in response
+
+def test_timeout_during_scraping():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ with pytest.raises(Exception) as excinfo:
+ app.scrape_url(TEST_URL, {'timeout': 1000})
+ assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value)
+
+def test_llm_extraction():
+ app = FirecrawlApp(api_key=TEST_API_KEY)
+ response = app.scrape_url("https://mendable.ai", {
+ 'extractorOptions': {
+ 'mode': 'llm-extraction',
+ 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
+ 'extractionSchema': {
+ 'type': 'object',
+ 'properties': {
+ 'company_mission': {'type': 'string'},
+ 'supports_sso': {'type': 'boolean'},
+ 'is_open_source': {'type': 'boolean'}
+ },
+ 'required': ['company_mission', 'supports_sso', 'is_open_source']
+ }
+ }
+ })
+ assert response is not None
+ assert 'llm_extraction' in response
+ llm_extraction = response['llm_extraction']
+ assert 'company_mission' in llm_extraction
+ assert isinstance(llm_extraction['supports_sso'], bool)
+ assert isinstance(llm_extraction['is_open_source'], bool)
\ No newline at end of file
diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py
index 7df520eb7..6674a8927 100644
--- a/apps/python-sdk/setup.py
+++ b/apps/python-sdk/setup.py
@@ -10,5 +10,6 @@
packages=find_packages(),
install_requires=[
'requests',
+ 'pytest',
],
)
From 63772ea7110c49fa91b64c62bae6e7fff9240bcd Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:14:00 -0300
Subject: [PATCH 02/14] added github action workflow
---
.github/workflows/python-tests.yml | 31 ++++
apps/js-sdk/firecrawl/.env.example | 3 +
apps/js-sdk/firecrawl/package-lock.json | 43 ++++-
apps/js-sdk/firecrawl/package.json | 7 +-
.../src/__tests__/e2e_withAuth/index.test.ts | 147 ++++++++++++++++
apps/js-sdk/firecrawl/src/index.ts | 12 +-
.../__tests__/e2e_withAuth/.env.example | 3 +
.../firecrawl/__tests__/e2e_withAuth/test.py | 162 +++++++++++++-----
apps/python-sdk/setup.py | 1 +
9 files changed, 351 insertions(+), 58 deletions(-)
create mode 100644 .github/workflows/python-tests.yml
create mode 100644 apps/js-sdk/firecrawl/.env.example
create mode 100644 apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
new file mode 100644
index 000000000..9c3783bb5
--- /dev/null
+++ b/.github/workflows/python-tests.yml
@@ -0,0 +1,31 @@
+name: Run Python SDK E2E Tests
+
+on:
+ pull_request:
+ branches:
+ - main
+env:
+ TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.10"]
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python setup.py
+ working-directory: ./apps/python-sdk
+ - name: Test with pytest
+ run: |
+ cd apps/python-sdk
+ pytest firecrawl/__tests__/e2e_withAuth/test.py
+ working-directory: ./apps/python-sdk
diff --git a/apps/js-sdk/firecrawl/.env.example b/apps/js-sdk/firecrawl/.env.example
new file mode 100644
index 000000000..6b1780bb8
--- /dev/null
+++ b/apps/js-sdk/firecrawl/.env.example
@@ -0,0 +1,3 @@
+API_URL=http://localhost:3002
+TEST_API_KEY=fc-YOUR_API_KEY
+
diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json
index 6b085be8e..fec326b4a 100644
--- a/apps/js-sdk/firecrawl/package-lock.json
+++ b/apps/js-sdk/firecrawl/package-lock.json
@@ -1,22 +1,25 @@
{
"name": "@mendable/firecrawl-js",
- "version": "0.0.17-beta.8",
+ "version": "0.0.22",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@mendable/firecrawl-js",
- "version": "0.0.17-beta.8",
+ "version": "0.0.22",
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
+ "uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
"devDependencies": {
"@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0",
- "@types/node": "^20.12.7",
+ "@types/jest": "^29.5.12",
+ "@types/node": "^20.12.12",
+ "@types/uuid": "^9.0.8",
"jest": "^29.7.0",
"ts-jest": "^29.1.2",
"typescript": "^5.4.5"
@@ -1046,10 +1049,20 @@
"@types/istanbul-lib-report": "*"
}
},
+ "node_modules/@types/jest": {
+ "version": "29.5.12",
+ "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.12.tgz",
+ "integrity": "sha512-eDC8bTvT/QhYdxJAulQikueigY5AsdBRH2yDKW3yveW7svY3+DzN84/2NUgkw10RTiJbWqZrTtoGVdYlvFJdLw==",
+ "dev": true,
+ "dependencies": {
+ "expect": "^29.0.0",
+ "pretty-format": "^29.0.0"
+ }
+ },
"node_modules/@types/node": {
- "version": "20.12.7",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.7.tgz",
- "integrity": "sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==",
+ "version": "20.12.12",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.12.tgz",
+ "integrity": "sha512-eWLDGF/FOSPtAvEqeRAQ4C8LSA7M1I7i0ky1I8U7kD1J5ITyW3AsRhQrKVoWf5pFKZ2kILsEGJhsI9r93PYnOw==",
"dev": true,
"dependencies": {
"undici-types": "~5.26.4"
@@ -1061,6 +1074,12 @@
"integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==",
"dev": true
},
+ "node_modules/@types/uuid": {
+ "version": "9.0.8",
+ "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
+ "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==",
+ "dev": true
+ },
"node_modules/@types/yargs": {
"version": "17.0.32",
"resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz",
@@ -3641,6 +3660,18 @@
"browserslist": ">= 4.21.0"
}
},
+ "node_modules/uuid": {
+ "version": "9.0.1",
+ "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
+ "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
+ "funding": [
+ "https://github.com/sponsors/broofa",
+ "https://github.com/sponsors/ctavan"
+ ],
+ "bin": {
+ "uuid": "dist/bin/uuid"
+ }
+ },
"node_modules/v8-to-istanbul": {
"version": "9.2.0",
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz",
diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json
index e43f6ea63..a9fdaaf49 100644
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@@ -9,7 +9,7 @@
"build": "tsc",
"publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta",
- "test": "jest src/**/*.test.ts"
+ "test": "jest src/__tests__/**/*.test.ts"
},
"repository": {
"type": "git",
@@ -19,6 +19,7 @@
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
+ "uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
@@ -29,7 +30,9 @@
"devDependencies": {
"@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0",
- "@types/node": "^20.12.7",
+ "@types/jest": "^29.5.12",
+ "@types/node": "^20.12.12",
+ "@types/uuid": "^9.0.8",
"jest": "^29.7.0",
"ts-jest": "^29.1.2",
"typescript": "^5.4.5"
diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
new file mode 100644
index 000000000..13f53472c
--- /dev/null
+++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
@@ -0,0 +1,147 @@
+import FirecrawlApp from '../../index';
+import { v4 as uuidv4 } from 'uuid';
+import dotenv from 'dotenv';
+
+dotenv.config();
+
+const TEST_API_KEY = process.env.TEST_API_KEY;
+const API_URL = process.env.API_URL;
+
+describe('FirecrawlApp E2E Tests', () => {
+ test('should throw error for no API key', () => {
+ expect(() => {
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
+ }).toThrow("No API key provided");
+ });
+
+ test('should throw error for invalid API key on scrape', async () => {
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
+ await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
+ });
+
+ test('should throw error for blocklisted URL on scrape', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const blocklistedUrl = "https://facebook.com/fake-test";
+ await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
+ });
+
+ test('should return successful response with valid preview token', async () => {
+ const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
+ const response = await app.scrapeUrl('https://firecrawl.dev');
+ expect(response).not.toBeNull();
+ expect(response.data.content).toContain("🔥 Firecrawl");
+ }, 10000); // 10 seconds timeout
+
+ test('should return successful response for valid scrape', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.scrapeUrl('https://firecrawl.dev');
+ expect(response).not.toBeNull();
+ expect(response.data.content).toContain("🔥 Firecrawl");
+ expect(response.data).toHaveProperty('markdown');
+ expect(response.data).toHaveProperty('metadata');
+ expect(response.data).not.toHaveProperty('html');
+ }, 10000); // 10 seconds timeout
+
+ test('should return successful response with valid API key and include HTML', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
+ expect(response).not.toBeNull();
+ expect(response.data.content).toContain("🔥 Firecrawl");
+ expect(response.data.markdown).toContain("🔥 Firecrawl");
+ expect(response.data.html).toContain("
{
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
+ expect(response).not.toBeNull();
+ expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
+ }, 30000); // 30 seconds timeout
+
+ test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
+ await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds
+ expect(response).not.toBeNull();
+ expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
+ }, 30000); // 30 seconds timeout
+
+ test('should throw error for invalid API key on crawl', async () => {
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
+ await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
+ });
+
+ test('should throw error for blocklisted URL on crawl', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const blocklistedUrl = "https://twitter.com/fake-test";
+ await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
+ });
+
+ test('should return successful response for crawl and wait for completion', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true);
+ expect(response).not.toBeNull();
+ expect(response[0].content).toContain("🔥 Firecrawl");
+ }, 60000); // 60 seconds timeout
+
+ test('should handle idempotency key for crawl', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const uniqueIdempotencyKey = uuidv4();
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey);
+ expect(response).not.toBeNull();
+ expect(response[0].content).toContain("🔥 Firecrawl");
+
+ await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
+ }, 30000); // 30 seconds timeout
+
+ test('should check crawl status', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
+ expect(response).not.toBeNull();
+ expect(response.jobId).toBeDefined();
+
+ await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds
+ const statusResponse = await app.checkCrawlStatus(response.jobId);
+ expect(statusResponse).not.toBeNull();
+ expect(statusResponse.status).toBe('completed');
+ expect(statusResponse.data.length).toBeGreaterThan(0);
+ }, 30000); // 30 seconds timeout
+
+ test('should return successful response for search', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.search("test query");
+ expect(response).not.toBeNull();
+ expect(response.data[0].content).toBeDefined();
+ expect(response.data.length).toBeGreaterThan(2);
+ }, 30000); // 30 seconds timeout
+
+ test('should throw error for invalid API key on search', async () => {
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
+ await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
+ });
+
+ test('should perform LLM extraction', async () => {
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
+ const response = await app.scrapeUrl("https://mendable.ai", {
+ extractorOptions: {
+ mode: 'llm-extraction',
+ extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
+ extractionSchema: {
+ type: 'object',
+ properties: {
+ company_mission: { type: 'string' },
+ supports_sso: { type: 'boolean' },
+ is_open_source: { type: 'boolean' }
+ },
+ required: ['company_mission', 'supports_sso', 'is_open_source']
+ }
+ }
+ });
+ expect(response).not.toBeNull();
+ expect(response.data.llm_extraction).toBeDefined();
+ const llmExtraction = response.data.llm_extraction;
+ expect(llmExtraction.company_mission).toBeDefined();
+ expect(typeof llmExtraction.supports_sso).toBe('boolean');
+ expect(typeof llmExtraction.is_open_source).toBe('boolean');
+ }, 30000); // 30 seconds timeout
+});
diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts
index 0bdcf7ceb..2a07f60d0 100644
--- a/apps/js-sdk/firecrawl/src/index.ts
+++ b/apps/js-sdk/firecrawl/src/index.ts
@@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
*/
export interface FirecrawlAppConfig {
apiKey?: string | null;
+ apiUrl?: string | null;
}
/**
@@ -63,6 +64,7 @@ export interface JobStatusResponse {
*/
export default class FirecrawlApp {
private apiKey: string;
+ private apiUrl: string = "https://api.firecrawl.dev";
/**
* Initializes a new instance of the FirecrawlApp class.
@@ -107,7 +109,7 @@ export default class FirecrawlApp {
}
try {
const response: AxiosResponse = await axios.post(
- "https://api.firecrawl.dev/v0/scrape",
+ this.apiUrl + "/v0/scrape",
jsonData,
{ headers },
);
@@ -147,7 +149,7 @@ export default class FirecrawlApp {
}
try {
const response: AxiosResponse = await axios.post(
- "https://api.firecrawl.dev/v0/search",
+ this.apiUrl + "/v0/search",
jsonData,
{ headers }
);
@@ -190,7 +192,7 @@ export default class FirecrawlApp {
}
try {
const response: AxiosResponse = await this.postRequest(
- "https://api.firecrawl.dev/v0/crawl",
+ this.apiUrl + "/v0/crawl",
jsonData,
headers
);
@@ -220,7 +222,7 @@ export default class FirecrawlApp {
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(
- `https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
+ this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (response.status === 200) {
@@ -292,7 +294,7 @@ export default class FirecrawlApp {
): Promise {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(
- `https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
+ this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (statusResponse.status === 200) {
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example
new file mode 100644
index 000000000..904887bf0
--- /dev/null
+++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example
@@ -0,0 +1,3 @@
+API_URL=http://localhost:3002
+ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py
+TEST_API_KEY=fc-YOUR_API_KEY
\ No newline at end of file
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
index 11b66e981..1a3b64143 100644
--- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
+++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
@@ -1,78 +1,150 @@
+import importlib.util
import pytest
-from firecrawl import FirecrawlApp
+import time
+import os
+from uuid import uuid4
+from dotenv import load_dotenv
-TEST_API_KEY = "fc-YOUR_API_KEY"
-TEST_URL = "https://firecrawl.dev"
+load_dotenv()
+
+API_URL = "http://127.0.0.1:3002";
+ABSOLUTE_FIRECRAWL_PATH = "./apps/python-sdk/firecrawl/firecrawl.py"
+TEST_API_KEY = os.getenv('TEST_API_KEY')
+
+print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
+
+spec = importlib.util.spec_from_file_location("FirecrawlApp", ABSOLUTE_FIRECRAWL_PATH)
+firecrawl = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(firecrawl)
+FirecrawlApp = firecrawl.FirecrawlApp
+
+def test_no_api_key():
+ with pytest.raises(Exception) as excinfo:
+ invalid_app = FirecrawlApp(api_url=API_URL)
+ assert "No API key provided" in str(excinfo.value)
+
+def test_scrape_url_invalid_api_key():
+ invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
+ with pytest.raises(Exception) as excinfo:
+ invalid_app.scrape_url('https://firecrawl.dev')
+ assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
+
+def test_blocklisted_url():
+ blocklisted_url = "https://facebook.com/fake-test"
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ with pytest.raises(Exception) as excinfo:
+ app.scrape_url(blocklisted_url)
+ assert "Failed to scrape URL. Status code: 403" in str(excinfo.value)
+
+def test_successful_response_with_valid_preview_token():
+ app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
+ response = app.scrape_url('https://firecrawl.dev')
+ assert response is not None
+ assert 'content' in response
+ assert "🔥 Firecrawl" in response['content']
def test_scrape_url_e2e():
- app = FirecrawlApp(api_key=TEST_API_KEY)
- response = app.scrape_url(TEST_URL)
- print(response)
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ response = app.scrape_url('https://firecrawl.dev')
assert response is not None
assert 'content' in response
+ assert 'markdown' in response
+ assert 'metadata' in response
+ assert 'html' not in response
assert "🔥 Firecrawl" in response['content']
-def test_scrape_url_invalid_api_key():
- invalid_app = FirecrawlApp(api_key="invalid_api_key")
- with pytest.raises(Exception) as excinfo:
- invalid_app.scrape_url(TEST_URL)
- assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
+def test_successful_response_with_valid_api_key_and_include_html():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ response = app.scrape_url('https://firecrawl.dev', {'pageOptions': {'includeHtml': True}})
+ assert response is not None
+ assert 'content' in response
+ assert 'markdown' in response
+ assert 'html' in response
+ assert 'metadata' in response
+ assert "🔥 Firecrawl" in response['content']
+ assert "🔥 Firecrawl" in response['markdown']
+ assert " 0
- assert 'content' in response[0]
- assert "🔥 Firecrawl" in response[0]['content']
+ assert 'content' in response
+ assert 'metadata' in response
+ assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
+
+def test_successful_response_for_valid_scrape_with_pdf_file_without_explicit_extension():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ response = app.scrape_url('https://arxiv.org/pdf/astro-ph/9301001')
+ time.sleep(6) # wait for 6 seconds
+ assert response is not None
+ assert 'content' in response
+ assert 'metadata' in response
+ assert 'We present spectrophotometric observations of the Broad Line Radio Galaxy' in response['content']
def test_crawl_url_invalid_api_key():
- invalid_app = FirecrawlApp(api_key="invalid_api_key")
+ invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo:
- invalid_app.crawl_url(TEST_URL)
+ invalid_app.crawl_url('https://firecrawl.dev')
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
-def test_search_e2e():
- app = FirecrawlApp(api_key=TEST_API_KEY)
- response = app.search("test query")
+def test_should_return_error_for_blocklisted_url():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ blocklisted_url = "https://twitter.com/fake-test"
+ with pytest.raises(Exception) as excinfo:
+ app.crawl_url(blocklisted_url)
+ assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value)
+
+def test_crawl_url_wait_for_completion_e2e():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True)
assert response is not None
+ assert len(response) > 0
assert 'content' in response[0]
- assert len(response) > 2
-
-def test_search_invalid_api_key():
- invalid_app = FirecrawlApp(api_key="invalid_api_key")
- with pytest.raises(Exception) as excinfo:
- invalid_app.search("test query")
- assert "Failed to search. Status code: 401" in str(excinfo.value)
+ assert "🔥 Firecrawl" in response[0]['content']
-def test_crawl_with_fast_mode():
- app = FirecrawlApp(api_key=TEST_API_KEY)
- response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True)
+def test_crawl_url_with_idempotency_key_e2e():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ uniqueIdempotencyKey = str(uuid4())
+ response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
assert response is not None
assert len(response) > 0
assert 'content' in response[0]
+ assert "🔥 Firecrawl" in response[0]['content']
+
+ with pytest.raises(Exception) as excinfo:
+ app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
+ assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value)
-def test_crawl_with_html_inclusion():
- app = FirecrawlApp(api_key=TEST_API_KEY)
- response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False)
+def test_check_crawl_status_e2e():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ response = app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, False)
assert response is not None
assert 'jobId' in response
+
+ time.sleep(30) # wait for 30 seconds
+ status_response = app.check_crawl_status(response['jobId'])
+ assert status_response is not None
+ assert 'status' in status_response
+ assert status_response['status'] == 'completed'
+ assert 'data' in status_response
+ assert len(status_response['data']) > 0
-def test_crawl_with_pdf_extraction():
- app = FirecrawlApp(api_key=TEST_API_KEY)
- response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001",
- {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False)
+def test_search_e2e():
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
+ response = app.search("test query")
assert response is not None
- assert 'jobId' in response
+ assert 'content' in response[0]
+ assert len(response) > 2
-def test_timeout_during_scraping():
- app = FirecrawlApp(api_key=TEST_API_KEY)
+def test_search_invalid_api_key():
+ invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
with pytest.raises(Exception) as excinfo:
- app.scrape_url(TEST_URL, {'timeout': 1000})
- assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value)
+ invalid_app.search("test query")
+ assert "Failed to search. Status code: 401" in str(excinfo.value)
def test_llm_extraction():
- app = FirecrawlApp(api_key=TEST_API_KEY)
+ app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
response = app.scrape_url("https://mendable.ai", {
'extractorOptions': {
'mode': 'llm-extraction',
diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py
index 6674a8927..726cafa34 100644
--- a/apps/python-sdk/setup.py
+++ b/apps/python-sdk/setup.py
@@ -11,5 +11,6 @@
install_requires=[
'requests',
'pytest',
+ 'python-dotenv',
],
)
From 19decd1062c08e0b6e42c6d9152a376cfd645457 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:21:33 -0300
Subject: [PATCH 03/14] fixing workflow
---
.github/workflows/python-tests.yml | 2 +-
apps/python-sdk/requirements.txt | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
create mode 100644 apps/python-sdk/requirements.txt
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 9c3783bb5..327fcb257 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -22,7 +22,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- python setup.py
+ pip install -r requirements.txt
working-directory: ./apps/python-sdk
- name: Test with pytest
run: |
diff --git a/apps/python-sdk/requirements.txt b/apps/python-sdk/requirements.txt
new file mode 100644
index 000000000..1bed58814
--- /dev/null
+++ b/apps/python-sdk/requirements.txt
@@ -0,0 +1,3 @@
+requests
+pytest
+python-dotenv
\ No newline at end of file
From c410dbe5bdec5b172118b049cb4aa055d7835ab5 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:24:30 -0300
Subject: [PATCH 04/14] Update python-tests.yml
---
.github/workflows/python-tests.yml | 1 -
1 file changed, 1 deletion(-)
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 327fcb257..9bd9ddb83 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -26,6 +26,5 @@ jobs:
working-directory: ./apps/python-sdk
- name: Test with pytest
run: |
- cd apps/python-sdk
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
From a9b68d95d88518b75a6551bede75ce7191c82eca Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:28:44 -0300
Subject: [PATCH 05/14] Update test.py
---
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
index 1a3b64143..86ce1f9f5 100644
--- a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
+++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
@@ -8,7 +8,7 @@
load_dotenv()
API_URL = "http://127.0.0.1:3002";
-ABSOLUTE_FIRECRAWL_PATH = "./apps/python-sdk/firecrawl/firecrawl.py"
+ABSOLUTE_FIRECRAWL_PATH = "firecrawl/firecrawl.py"
TEST_API_KEY = os.getenv('TEST_API_KEY')
print(f"ABSOLUTE_FIRECRAWL_PATH: {ABSOLUTE_FIRECRAWL_PATH}")
From f32c16258a5aacf7cb15825df11509810fa63927 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:32:34 -0300
Subject: [PATCH 06/14] missing node setup
---
.github/workflows/python-tests.yml | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 9bd9ddb83..4e154b8c2 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -13,8 +13,25 @@ jobs:
matrix:
python-version: ["3.10"]
- steps:
+ steps:
- uses: actions/checkout@v3
+ - name: Set up Node.js
+ uses: actions/setup-node@v3
+ with:
+ node-version: "20"
+ - name: Install pnpm
+ run: npm install -g pnpm
+ - name: Install dependencies for API
+ run: pnpm install
+ working-directory: ./apps/api
+ - name: Start the application
+ run: npm start &
+ working-directory: ./apps/api
+ id: start_app
+ - name: Start workers
+ run: npm run workers &
+ working-directory: ./apps/api
+ id: start_workers
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
From 952ccd8755dc334755a90e7a680b76386d86b192 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:34:39 -0300
Subject: [PATCH 07/14] envs
---
.github/workflows/python-tests.yml | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 4e154b8c2..1a7c1df58 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -5,7 +5,27 @@ on:
branches:
- main
env:
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
+ FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
+ HOST: ${{ secrets.HOST }}
+ LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
+ LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
+ POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+ POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
+ NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
+ PORT: ${{ secrets.PORT }}
+ REDIS_URL: ${{ secrets.REDIS_URL }}
+ SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
+ SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
+ SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
+ SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
+ HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
+ HDX_NODE_BETA_MODE: 1
+
jobs:
build:
runs-on: ubuntu-latest
From d0c4b24a0ef29fa2a090e91e2c4a40c355367a5d Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 14:36:32 -0300
Subject: [PATCH 08/14] missing redis
---
.github/workflows/python-tests.yml | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 1a7c1df58..9b2145dcc 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -25,16 +25,21 @@ env:
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
HDX_NODE_BETA_MODE: 1
-
+
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.10"]
+ python-version: ["3.10"]
+ services:
+ redis:
+ image: redis
+ ports:
+ - 6379:6379
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v3
- name: Set up Node.js
uses: actions/setup-node@v3
with:
From 127d2db1dd1d8aeec259d2d3e7cc51c4124d975e Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 15:54:09 -0300
Subject: [PATCH 09/14] added js/ts sdk tests
---
.github/workflows/fly.yml | 16 ++++++
.github/workflows/js-sdk.yml | 57 +++++++++++++++++++
.../{python-tests.yml => python-sdk.yml} | 4 +-
.../src/__tests__/e2e_withAuth/index.test.ts | 1 -
4 files changed, 75 insertions(+), 3 deletions(-)
create mode 100644 .github/workflows/js-sdk.yml
rename .github/workflows/{python-tests.yml => python-sdk.yml} (96%)
diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml
index 09d81af15..6285831da 100644
--- a/.github/workflows/fly.yml
+++ b/.github/workflows/fly.yml
@@ -94,6 +94,22 @@ jobs:
run: |
npm run test
working-directory: ./apps/test-suite
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install Python dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ working-directory: ./apps/python-sdk
+ - name: Run E2E tests for Python SDK
+ run: |
+ pytest firecrawl/__tests__/e2e_withAuth/test.py
+ working-directory: ./apps/python-sdk
+ - name: Run E2E tests for JavaScript SDK
+ run: npm run tests
+ working-directory: ./apps/js-sdk/firecrawl
deploy:
name: Deploy app
diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml
new file mode 100644
index 000000000..86f0ba151
--- /dev/null
+++ b/.github/workflows/js-sdk.yml
@@ -0,0 +1,57 @@
+name: Run JavaScript SDK E2E Tests
+
+on:
+ pull_request:
+ branches:
+ - main
+env:
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+ BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
+ FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
+ HOST: ${{ secrets.HOST }}
+ LLAMAPARSE_API_KEY: ${{ secrets.LLAMAPARSE_API_KEY }}
+ LOGTAIL_KEY: ${{ secrets.LOGTAIL_KEY }}
+ POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+ POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
+ NUM_WORKERS_PER_QUEUE: ${{ secrets.NUM_WORKERS_PER_QUEUE }}
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ PLAYWRIGHT_MICROSERVICE_URL: ${{ secrets.PLAYWRIGHT_MICROSERVICE_URL }}
+ PORT: ${{ secrets.PORT }}
+ REDIS_URL: ${{ secrets.REDIS_URL }}
+ SCRAPING_BEE_API_KEY: ${{ secrets.SCRAPING_BEE_API_KEY }}
+ SUPABASE_ANON_TOKEN: ${{ secrets.SUPABASE_ANON_TOKEN }}
+ SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
+ SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
+ TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
+ HYPERDX_API_KEY: ${{ secrets.HYPERDX_API_KEY }}
+ HDX_NODE_BETA_MODE: 1
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ services:
+ redis:
+ image: redis
+ ports:
+ - 6379:6379
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Node.js
+ uses: actions/setup-node@v3
+ with:
+ node-version: "20"
+ - name: Install pnpm
+ run: npm install -g pnpm
+ - name: Install dependencies for API
+ run: pnpm install
+ working-directory: ./apps/api
+ - name: Start the application
+ run: npm start &
+ working-directory: ./apps/api
+ - name: Start workers
+ run: npm run workers &
+ working-directory: ./apps/api
+ - name: Run E2E tests for JavaScript SDK
+ run: npm run tests
+ working-directory: ./apps/js-sdk/firecrawl
\ No newline at end of file
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-sdk.yml
similarity index 96%
rename from .github/workflows/python-tests.yml
rename to .github/workflows/python-sdk.yml
index 9b2145dcc..1308cdef5 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-sdk.yml
@@ -61,12 +61,12 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- - name: Install dependencies
+ - name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
working-directory: ./apps/python-sdk
- - name: Test with pytest
+ - name: Run E2E tests for Python SDK
run: |
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
index 13f53472c..f06538bef 100644
--- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
@@ -61,7 +61,6 @@ describe('FirecrawlApp E2E Tests', () => {
test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
- await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout
From e87d39e6ecafd587517df963578b480504198eac Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 15:55:48 -0300
Subject: [PATCH 10/14] typo
---
.github/workflows/fly.yml | 2 +-
.github/workflows/js-sdk.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml
index 6285831da..1976d8f1e 100644
--- a/.github/workflows/fly.yml
+++ b/.github/workflows/fly.yml
@@ -108,7 +108,7 @@ jobs:
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
- name: Run E2E tests for JavaScript SDK
- run: npm run tests
+ run: npm run test
working-directory: ./apps/js-sdk/firecrawl
deploy:
diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml
index 86f0ba151..82aa42c9b 100644
--- a/.github/workflows/js-sdk.yml
+++ b/.github/workflows/js-sdk.yml
@@ -53,5 +53,5 @@ jobs:
run: npm run workers &
working-directory: ./apps/api
- name: Run E2E tests for JavaScript SDK
- run: npm run tests
+ run: npm run test
working-directory: ./apps/js-sdk/firecrawl
\ No newline at end of file
From 6b58da1c96040963611ee0a2cf8b30fde5d9732d Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 16:01:48 -0300
Subject: [PATCH 11/14] jest
---
.github/workflows/fly.yml | 3 +++
.github/workflows/js-sdk.yml | 3 +++
2 files changed, 6 insertions(+)
diff --git a/.github/workflows/fly.yml b/.github/workflows/fly.yml
index 1976d8f1e..deddc9f21 100644
--- a/.github/workflows/fly.yml
+++ b/.github/workflows/fly.yml
@@ -107,6 +107,9 @@ jobs:
run: |
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
+ - name: Install dependencies for JavaScript SDK
+ run: pnpm install
+ working-directory: ./apps/js-sdk/firecrawl
- name: Run E2E tests for JavaScript SDK
run: npm run test
working-directory: ./apps/js-sdk/firecrawl
diff --git a/.github/workflows/js-sdk.yml b/.github/workflows/js-sdk.yml
index 82aa42c9b..3c914cc80 100644
--- a/.github/workflows/js-sdk.yml
+++ b/.github/workflows/js-sdk.yml
@@ -52,6 +52,9 @@ jobs:
- name: Start workers
run: npm run workers &
working-directory: ./apps/api
+ - name: Install dependencies for JavaScript SDK
+ run: pnpm install
+ working-directory: ./apps/js-sdk/firecrawl
- name: Run E2E tests for JavaScript SDK
run: npm run test
working-directory: ./apps/js-sdk/firecrawl
\ No newline at end of file
From 41c4ef6a82919baba396549479c354d0175d886f Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 16:23:57 -0300
Subject: [PATCH 12/14] dotenv was missing
---
apps/js-sdk/firecrawl/package-lock.json | 23 +++++++++++++++
apps/js-sdk/firecrawl/package.json | 2 ++
apps/js-sdk/package-lock.json | 38 ++++++++++++++++---------
apps/js-sdk/package.json | 3 +-
4 files changed, 51 insertions(+), 15 deletions(-)
diff --git a/apps/js-sdk/firecrawl/package-lock.json b/apps/js-sdk/firecrawl/package-lock.json
index fec326b4a..b1cebde83 100644
--- a/apps/js-sdk/firecrawl/package-lock.json
+++ b/apps/js-sdk/firecrawl/package-lock.json
@@ -10,6 +10,7 @@
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
+ "dotenv": "^16.4.5",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
@@ -17,6 +18,7 @@
"devDependencies": {
"@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0",
+ "@types/dotenv": "^8.2.0",
"@types/jest": "^29.5.12",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
@@ -1016,6 +1018,16 @@
"@babel/types": "^7.20.7"
}
},
+ "node_modules/@types/dotenv": {
+ "version": "8.2.0",
+ "resolved": "https://registry.npmjs.org/@types/dotenv/-/dotenv-8.2.0.tgz",
+ "integrity": "sha512-ylSC9GhfRH7m1EUXBXofhgx4lUWmFeQDINW5oLuS+gxWdfUeW4zJdeVTYVkexEW+e2VUvlZR2kGnGGipAWR7kw==",
+ "deprecated": "This is a stub types definition. dotenv provides its own type definitions, so you do not need this installed.",
+ "dev": true,
+ "dependencies": {
+ "dotenv": "*"
+ }
+ },
"node_modules/@types/graceful-fs": {
"version": "4.1.9",
"resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz",
@@ -1621,6 +1633,17 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
+ "node_modules/dotenv": {
+ "version": "16.4.5",
+ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
+ "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
+ "engines": {
+ "node": ">=12"
+ },
+ "funding": {
+ "url": "https://dotenvx.com"
+ }
+ },
"node_modules/electron-to-chromium": {
"version": "1.4.748",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.748.tgz",
diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json
index a9fdaaf49..a1c42a0cc 100644
--- a/apps/js-sdk/firecrawl/package.json
+++ b/apps/js-sdk/firecrawl/package.json
@@ -19,6 +19,7 @@
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
+ "dotenv": "^16.4.5",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
@@ -30,6 +31,7 @@
"devDependencies": {
"@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0",
+ "@types/dotenv": "^8.2.0",
"@types/jest": "^29.5.12",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
diff --git a/apps/js-sdk/package-lock.json b/apps/js-sdk/package-lock.json
index 516765347..c59a371c4 100644
--- a/apps/js-sdk/package-lock.json
+++ b/apps/js-sdk/package-lock.json
@@ -11,9 +11,10 @@
"dependencies": {
"@mendable/firecrawl-js": "^0.0.19",
"axios": "^1.6.8",
- "uuid": "^9.0.1",
+ "dotenv": "^16.4.5",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
+ "uuid": "^9.0.1",
"zod": "^3.23.8"
},
"devDependencies": {
@@ -531,6 +532,17 @@
"node": ">=0.3.1"
}
},
+ "node_modules/dotenv": {
+ "version": "16.4.5",
+ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
+ "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
+ "engines": {
+ "node": ">=12"
+ },
+ "funding": {
+ "url": "https://dotenvx.com"
+ }
+ },
"node_modules/esbuild": {
"version": "0.20.2",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.20.2.tgz",
@@ -744,6 +756,18 @@
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"peer": true
},
+ "node_modules/uuid": {
+ "version": "9.0.1",
+ "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
+ "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
+ "funding": [
+ "https://github.com/sponsors/broofa",
+ "https://github.com/sponsors/ctavan"
+ ],
+ "bin": {
+ "uuid": "dist/bin/uuid"
+ }
+ },
"node_modules/v8-compile-cache-lib": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
@@ -772,18 +796,6 @@
"peerDependencies": {
"zod": "^3.23.3"
}
- },
- "node_modules/uuid": {
- "version": "9.0.1",
- "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
- "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
- "funding": [
- "https://github.com/sponsors/broofa",
- "https://github.com/sponsors/ctavan"
- ],
- "bin": {
- "uuid": "dist/bin/uuid"
- }
}
}
}
diff --git a/apps/js-sdk/package.json b/apps/js-sdk/package.json
index df9f99e5f..0e93fe3c2 100644
--- a/apps/js-sdk/package.json
+++ b/apps/js-sdk/package.json
@@ -11,9 +11,8 @@
"author": "",
"license": "ISC",
"dependencies": {
- "axios": "^1.6.8",
- "uuid": "^9.0.1",
"@mendable/firecrawl-js": "^0.0.19",
+ "axios": "^1.6.8",
"ts-node": "^10.9.2",
"typescript": "^5.4.5",
"zod": "^3.23.8"
From d5c83803cd9adb61209934d6c6e707ad1e757ab6 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 16:35:01 -0300
Subject: [PATCH 13/14] fixing idempotency test
---
.../firecrawl/src/__tests__/e2e_withAuth/index.test.ts | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
index f06538bef..8f2e137ed 100644
--- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
@@ -86,12 +86,12 @@ describe('FirecrawlApp E2E Tests', () => {
test('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4();
- const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey);
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
expect(response).not.toBeNull();
- expect(response[0].content).toContain("🔥 Firecrawl");
+ expect(response.jobId).toBeDefined();
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
- }, 30000); // 30 seconds timeout
+ });
test('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
From 71187b03a207a4a5bdfd69c0db4ca0cbeeba5592 Mon Sep 17 00:00:00 2001
From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com>
Date: Mon, 27 May 2024 16:48:08 -0300
Subject: [PATCH 14/14] added timeout
---
apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
index 8f2e137ed..c9db6a91e 100644
--- a/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
+++ b/apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
@@ -78,7 +78,7 @@ describe('FirecrawlApp E2E Tests', () => {
test('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
- const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true);
+ const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl");
}, 60000); // 60 seconds timeout