added python sdk e2e tests with pytest

some of them are still missing though
Axmoney · May 24, 2024 · 397769c · 397769c
1 parent 4ce2859
commit 397769c
Show file tree

Hide file tree

Showing 4 changed files with 116 additions and 0 deletions.
diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md
@@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id)
 
 The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message.
 
+## Running the Tests with Pytest
+
+To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling.
+
+### Running the Tests
+
+To run the tests, execute the following commands:
+
+Install pytest:
+```bash
+pip install pytest
+```
+
+Run:
+```bash
+pytest firecrawl/__tests__/e2e_withAuth/test.py
+```
+
+
 ## Contributing
 
 Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository.

diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py
diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py
@@ -0,0 +1,96 @@
+import pytest
+from firecrawl import FirecrawlApp
+
+TEST_API_KEY = "fc-YOUR_API_KEY"
+TEST_URL = "https://firecrawl.dev"
+
+def test_scrape_url_e2e():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.scrape_url(TEST_URL)
+    print(response)
+    assert response is not None
+    assert 'content' in response
+    assert "🔥 Firecrawl" in response['content']
+
+def test_scrape_url_invalid_api_key():
+    invalid_app = FirecrawlApp(api_key="invalid_api_key")
+    with pytest.raises(Exception) as excinfo:
+        invalid_app.scrape_url(TEST_URL)
+    assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
+
+def test_crawl_url_e2e():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.crawl_url(TEST_URL, {'crawlerOptions': {'excludes': ['blog/*']}}, True)
+    assert response is not None
+    assert len(response) > 0
+    assert 'content' in response[0]
+    assert "🔥 Firecrawl" in response[0]['content']
+
+def test_crawl_url_invalid_api_key():
+    invalid_app = FirecrawlApp(api_key="invalid_api_key")
+    with pytest.raises(Exception) as excinfo:
+        invalid_app.crawl_url(TEST_URL)
+    assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
+
+def test_search_e2e():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.search("test query")
+    assert response is not None
+    assert 'content' in response[0]
+    assert len(response) > 2
+
+def test_search_invalid_api_key():
+    invalid_app = FirecrawlApp(api_key="invalid_api_key")
+    with pytest.raises(Exception) as excinfo:
+        invalid_app.search("test query")
+    assert "Failed to search. Status code: 401" in str(excinfo.value)
+
+def test_crawl_with_fast_mode():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True)
+    assert response is not None
+    assert len(response) > 0
+    assert 'content' in response[0]
+
+def test_crawl_with_html_inclusion():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False)
+    assert response is not None
+    assert 'jobId' in response
+
+def test_crawl_with_pdf_extraction():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001", 
+                             {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False)
+    assert response is not None
+    assert 'jobId' in response
+
+def test_timeout_during_scraping():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    with pytest.raises(Exception) as excinfo:
+        app.scrape_url(TEST_URL, {'timeout': 1000})
+    assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value)
+
+def test_llm_extraction():
+    app = FirecrawlApp(api_key=TEST_API_KEY)
+    response = app.scrape_url("https://mendable.ai", {
+        'extractorOptions': {
+            'mode': 'llm-extraction',
+            'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
+            'extractionSchema': {
+                'type': 'object',
+                'properties': {
+                    'company_mission': {'type': 'string'},
+                    'supports_sso': {'type': 'boolean'},
+                    'is_open_source': {'type': 'boolean'}
+                },
+                'required': ['company_mission', 'supports_sso', 'is_open_source']
+            }
+        }
+    })
+    assert response is not None
+    assert 'llm_extraction' in response
+    llm_extraction = response['llm_extraction']
+    assert 'company_mission' in llm_extraction
+    assert isinstance(llm_extraction['supports_sso'], bool)
+    assert isinstance(llm_extraction['is_open_source'], bool)
diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py
@@ -10,5 +10,6 @@
     packages=find_packages(),
     install_requires=[
         'requests',
+        'pytest',
     ],
 )