forked from mendableai/firecrawl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
397769c
commit 63772ea
Showing
9 changed files
with
351 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: Run Python SDK E2E Tests | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
env: | ||
TEST_API_KEY: ${{ secrets.TEST_API_KEY }} | ||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python-version: ["3.10"] | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
python setup.py | ||
working-directory: ./apps/python-sdk | ||
- name: Test with pytest | ||
run: | | ||
cd apps/python-sdk | ||
pytest firecrawl/__tests__/e2e_withAuth/test.py | ||
working-directory: ./apps/python-sdk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
API_URL=http://localhost:3002 | ||
TEST_API_KEY=fc-YOUR_API_KEY | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
147 changes: 147 additions & 0 deletions
147
apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import FirecrawlApp from '../../index'; | ||
import { v4 as uuidv4 } from 'uuid'; | ||
import dotenv from 'dotenv'; | ||
|
||
dotenv.config(); | ||
|
||
const TEST_API_KEY = process.env.TEST_API_KEY; | ||
const API_URL = process.env.API_URL; | ||
|
||
describe('FirecrawlApp E2E Tests', () => { | ||
test('should throw error for no API key', () => { | ||
expect(() => { | ||
new FirecrawlApp({ apiKey: null, apiUrl: API_URL }); | ||
}).toThrow("No API key provided"); | ||
}); | ||
|
||
test('should throw error for invalid API key on scrape', async () => { | ||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); | ||
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); | ||
}); | ||
|
||
test('should throw error for blocklisted URL on scrape', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const blocklistedUrl = "https://facebook.com/fake-test"; | ||
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); | ||
}); | ||
|
||
test('should return successful response with valid preview token', async () => { | ||
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL }); | ||
const response = await app.scrapeUrl('https://firecrawl.dev'); | ||
expect(response).not.toBeNull(); | ||
expect(response.data.content).toContain("🔥 Firecrawl"); | ||
}, 10000); // 10 seconds timeout | ||
|
||
test('should return successful response for valid scrape', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.scrapeUrl('https://firecrawl.dev'); | ||
expect(response).not.toBeNull(); | ||
expect(response.data.content).toContain("🔥 Firecrawl"); | ||
expect(response.data).toHaveProperty('markdown'); | ||
expect(response.data).toHaveProperty('metadata'); | ||
expect(response.data).not.toHaveProperty('html'); | ||
}, 10000); // 10 seconds timeout | ||
|
||
test('should return successful response with valid API key and include HTML', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } }); | ||
expect(response).not.toBeNull(); | ||
expect(response.data.content).toContain("🔥 Firecrawl"); | ||
expect(response.data.markdown).toContain("🔥 Firecrawl"); | ||
expect(response.data.html).toContain("<h1"); | ||
}, 10000); // 10 seconds timeout | ||
|
||
test('should return successful response for valid scrape with PDF file', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf'); | ||
expect(response).not.toBeNull(); | ||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); | ||
}, 30000); // 30 seconds timeout | ||
|
||
test('should return successful response for valid scrape with PDF file without explicit extension', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001'); | ||
await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds | ||
expect(response).not.toBeNull(); | ||
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy'); | ||
}, 30000); // 30 seconds timeout | ||
|
||
test('should throw error for invalid API key on crawl', async () => { | ||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); | ||
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401"); | ||
}); | ||
|
||
test('should throw error for blocklisted URL on crawl', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const blocklistedUrl = "https://twitter.com/fake-test"; | ||
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403"); | ||
}); | ||
|
||
test('should return successful response for crawl and wait for completion', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true); | ||
expect(response).not.toBeNull(); | ||
expect(response[0].content).toContain("🔥 Firecrawl"); | ||
}, 60000); // 60 seconds timeout | ||
|
||
test('should handle idempotency key for crawl', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const uniqueIdempotencyKey = uuidv4(); | ||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey); | ||
expect(response).not.toBeNull(); | ||
expect(response[0].content).toContain("🔥 Firecrawl"); | ||
|
||
await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409"); | ||
}, 30000); // 30 seconds timeout | ||
|
||
test('should check crawl status', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false); | ||
expect(response).not.toBeNull(); | ||
expect(response.jobId).toBeDefined(); | ||
|
||
await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds | ||
const statusResponse = await app.checkCrawlStatus(response.jobId); | ||
expect(statusResponse).not.toBeNull(); | ||
expect(statusResponse.status).toBe('completed'); | ||
expect(statusResponse.data.length).toBeGreaterThan(0); | ||
}, 30000); // 30 seconds timeout | ||
|
||
test('should return successful response for search', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.search("test query"); | ||
expect(response).not.toBeNull(); | ||
expect(response.data[0].content).toBeDefined(); | ||
expect(response.data.length).toBeGreaterThan(2); | ||
}, 30000); // 30 seconds timeout | ||
|
||
test('should throw error for invalid API key on search', async () => { | ||
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL }); | ||
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401"); | ||
}); | ||
|
||
test('should perform LLM extraction', async () => { | ||
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); | ||
const response = await app.scrapeUrl("https://mendable.ai", { | ||
extractorOptions: { | ||
mode: 'llm-extraction', | ||
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", | ||
extractionSchema: { | ||
type: 'object', | ||
properties: { | ||
company_mission: { type: 'string' }, | ||
supports_sso: { type: 'boolean' }, | ||
is_open_source: { type: 'boolean' } | ||
}, | ||
required: ['company_mission', 'supports_sso', 'is_open_source'] | ||
} | ||
} | ||
}); | ||
expect(response).not.toBeNull(); | ||
expect(response.data.llm_extraction).toBeDefined(); | ||
const llmExtraction = response.data.llm_extraction; | ||
expect(llmExtraction.company_mission).toBeDefined(); | ||
expect(typeof llmExtraction.supports_sso).toBe('boolean'); | ||
expect(typeof llmExtraction.is_open_source).toBe('boolean'); | ||
}, 30000); // 30 seconds timeout | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
API_URL=http://localhost:3002 | ||
ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py | ||
TEST_API_KEY=fc-YOUR_API_KEY |
Oops, something went wrong.