Skip to content

Commit

Permalink
added github action workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
rafaelsideguide committed May 27, 2024
1 parent 397769c commit 63772ea
Show file tree
Hide file tree
Showing 9 changed files with 351 additions and 58 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Run Python SDK E2E Tests

on:
pull_request:
branches:
- main
env:
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python setup.py
working-directory: ./apps/python-sdk
- name: Test with pytest
run: |
cd apps/python-sdk
pytest firecrawl/__tests__/e2e_withAuth/test.py
working-directory: ./apps/python-sdk
3 changes: 3 additions & 0 deletions apps/js-sdk/firecrawl/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
API_URL=http://localhost:3002
TEST_API_KEY=fc-YOUR_API_KEY

43 changes: 37 additions & 6 deletions apps/js-sdk/firecrawl/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions apps/js-sdk/firecrawl/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"build": "tsc",
"publish": "npm run build && npm publish --access public",
"publish-beta": "npm run build && npm publish --access public --tag beta",
"test": "jest src/**/*.test.ts"
"test": "jest src/__tests__/**/*.test.ts"
},
"repository": {
"type": "git",
Expand All @@ -19,6 +19,7 @@
"license": "MIT",
"dependencies": {
"axios": "^1.6.8",
"uuid": "^9.0.1",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.0"
},
Expand All @@ -29,7 +30,9 @@
"devDependencies": {
"@jest/globals": "^29.7.0",
"@types/axios": "^0.14.0",
"@types/node": "^20.12.7",
"@types/jest": "^29.5.12",
"@types/node": "^20.12.12",
"@types/uuid": "^9.0.8",
"jest": "^29.7.0",
"ts-jest": "^29.1.2",
"typescript": "^5.4.5"
Expand Down
147 changes: 147 additions & 0 deletions apps/js-sdk/firecrawl/src/__tests__/e2e_withAuth/index.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import FirecrawlApp from '../../index';
import { v4 as uuidv4 } from 'uuid';
import dotenv from 'dotenv';

dotenv.config();

const TEST_API_KEY = process.env.TEST_API_KEY;
const API_URL = process.env.API_URL;

describe('FirecrawlApp E2E Tests', () => {
test('should throw error for no API key', () => {
expect(() => {
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
}).toThrow("No API key provided");
});

test('should throw error for invalid API key on scrape', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.scrapeUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
});

test('should throw error for blocklisted URL on scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://facebook.com/fake-test";
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});

test('should return successful response with valid preview token', async () => {
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
}, 10000); // 10 seconds timeout

test('should return successful response for valid scrape', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev');
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data).toHaveProperty('markdown');
expect(response.data).toHaveProperty('metadata');
expect(response.data).not.toHaveProperty('html');
}, 10000); // 10 seconds timeout

test('should return successful response with valid API key and include HTML', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://firecrawl.dev', { pageOptions: { includeHtml: true } });
expect(response).not.toBeNull();
expect(response.data.content).toContain("🔥 Firecrawl");
expect(response.data.markdown).toContain("🔥 Firecrawl");
expect(response.data.html).toContain("<h1");
}, 10000); // 10 seconds timeout

test('should return successful response for valid scrape with PDF file', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout

test('should return successful response for valid scrape with PDF file without explicit extension', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
await new Promise(resolve => setTimeout(resolve, 6000)); // wait for 6 seconds
expect(response).not.toBeNull();
expect(response.data.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
}, 30000); // 30 seconds timeout

test('should throw error for invalid API key on crawl', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.crawlUrl('https://firecrawl.dev')).rejects.toThrow("Request failed with status code 401");
});

test('should throw error for blocklisted URL on crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const blocklistedUrl = "https://twitter.com/fake-test";
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
});

test('should return successful response for crawl and wait for completion', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true);
expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl");
}, 60000); // 60 seconds timeout

test('should handle idempotency key for crawl', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const uniqueIdempotencyKey = uuidv4();
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey);
expect(response).not.toBeNull();
expect(response[0].content).toContain("🔥 Firecrawl");

await expect(app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
}, 30000); // 30 seconds timeout

test('should check crawl status', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.crawlUrl('https://firecrawl.dev', { crawlerOptions: { excludes: ['blog/*'] } }, false);
expect(response).not.toBeNull();
expect(response.jobId).toBeDefined();

await new Promise(resolve => setTimeout(resolve, 10000)); // wait for 10 seconds
const statusResponse = await app.checkCrawlStatus(response.jobId);
expect(statusResponse).not.toBeNull();
expect(statusResponse.status).toBe('completed');
expect(statusResponse.data.length).toBeGreaterThan(0);
}, 30000); // 30 seconds timeout

test('should return successful response for search', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.search("test query");
expect(response).not.toBeNull();
expect(response.data[0].content).toBeDefined();
expect(response.data.length).toBeGreaterThan(2);
}, 30000); // 30 seconds timeout

test('should throw error for invalid API key on search', async () => {
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
});

test('should perform LLM extraction', async () => {
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
const response = await app.scrapeUrl("https://mendable.ai", {
extractorOptions: {
mode: 'llm-extraction',
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
extractionSchema: {
type: 'object',
properties: {
company_mission: { type: 'string' },
supports_sso: { type: 'boolean' },
is_open_source: { type: 'boolean' }
},
required: ['company_mission', 'supports_sso', 'is_open_source']
}
}
});
expect(response).not.toBeNull();
expect(response.data.llm_extraction).toBeDefined();
const llmExtraction = response.data.llm_extraction;
expect(llmExtraction.company_mission).toBeDefined();
expect(typeof llmExtraction.supports_sso).toBe('boolean');
expect(typeof llmExtraction.is_open_source).toBe('boolean');
}, 30000); // 30 seconds timeout
});
12 changes: 7 additions & 5 deletions apps/js-sdk/firecrawl/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
*/
export interface FirecrawlAppConfig {
apiKey?: string | null;
apiUrl?: string | null;
}

/**
Expand Down Expand Up @@ -63,6 +64,7 @@ export interface JobStatusResponse {
*/
export default class FirecrawlApp {
private apiKey: string;
private apiUrl: string = "https://api.firecrawl.dev";

/**
* Initializes a new instance of the FirecrawlApp class.
Expand Down Expand Up @@ -107,7 +109,7 @@ export default class FirecrawlApp {
}
try {
const response: AxiosResponse = await axios.post(
"https://api.firecrawl.dev/v0/scrape",
this.apiUrl + "/v0/scrape",
jsonData,
{ headers },
);
Expand Down Expand Up @@ -147,7 +149,7 @@ export default class FirecrawlApp {
}
try {
const response: AxiosResponse = await axios.post(
"https://api.firecrawl.dev/v0/search",
this.apiUrl + "/v0/search",
jsonData,
{ headers }
);
Expand Down Expand Up @@ -190,7 +192,7 @@ export default class FirecrawlApp {
}
try {
const response: AxiosResponse = await this.postRequest(
"https://api.firecrawl.dev/v0/crawl",
this.apiUrl + "/v0/crawl",
jsonData,
headers
);
Expand Down Expand Up @@ -220,7 +222,7 @@ export default class FirecrawlApp {
const headers: AxiosRequestHeaders = this.prepareHeaders();
try {
const response: AxiosResponse = await this.getRequest(
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (response.status === 200) {
Expand Down Expand Up @@ -292,7 +294,7 @@ export default class FirecrawlApp {
): Promise<any> {
while (true) {
const statusResponse: AxiosResponse = await this.getRequest(
`https://api.firecrawl.dev/v0/crawl/status/${jobId}`,
this.apiUrl + `/v0/crawl/status/${jobId}`,
headers
);
if (statusResponse.status === 200) {
Expand Down
3 changes: 3 additions & 0 deletions apps/python-sdk/firecrawl/__tests__/e2e_withAuth/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
API_URL=http://localhost:3002
ABSOLUTE_FIRECRAWL_PATH=/Users/user/firecrawl/apps/python-sdk/firecrawl/firecrawl.py
TEST_API_KEY=fc-YOUR_API_KEY
Loading

0 comments on commit 63772ea

Please sign in to comment.