From b0d2a99dcb4ab6733f85a7bd8916ebdb236e4bd6 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 10 Jan 2024 20:50:16 +0100 Subject: [PATCH] Update documentation of Scrapy Actor template (#261) --- .../python-beautifulsoup/requirements.txt | 2 +- templates/python-empty/requirements.txt | 2 +- templates/python-playwright/requirements.txt | 2 +- templates/python-scrapy/requirements.txt | 2 +- templates/python-scrapy/src/main.py | 23 ++++--------------- templates/python-selenium/requirements.txt | 2 +- templates/python-start/requirements.txt | 2 +- wrappers/python-scrapy/requirements_apify.txt | 2 +- .../{projectFolder}/main.template.py | 23 ++++--------------- 9 files changed, 15 insertions(+), 45 deletions(-) diff --git a/templates/python-beautifulsoup/requirements.txt b/templates/python-beautifulsoup/requirements.txt index 231186e8..259f1c5d 100644 --- a/templates/python-beautifulsoup/requirements.txt +++ b/templates/python-beautifulsoup/requirements.txt @@ -1,7 +1,7 @@ # Feel free to add your Python dependencies below. For formatting guidelines, see: # https://pip.pypa.io/en/latest/reference/requirements-file-format/ -apify ~= 1.5.0 +apify ~= 1.5.1 beautifulsoup4 ~= 4.12.2 httpx ~= 0.25.2 types-beautifulsoup4 ~= 4.12.0.7 diff --git a/templates/python-empty/requirements.txt b/templates/python-empty/requirements.txt index b8c54bdf..4ae49fa0 100644 --- a/templates/python-empty/requirements.txt +++ b/templates/python-empty/requirements.txt @@ -1,4 +1,4 @@ # Feel free to add your Python dependencies below. For formatting guidelines, see: # https://pip.pypa.io/en/latest/reference/requirements-file-format/ -apify ~= 1.5.0 +apify ~= 1.5.1 diff --git a/templates/python-playwright/requirements.txt b/templates/python-playwright/requirements.txt index 903e6698..0957eb1b 100644 --- a/templates/python-playwright/requirements.txt +++ b/templates/python-playwright/requirements.txt @@ -1,5 +1,5 @@ # Feel free to add your Python dependencies below. For formatting guidelines, see: # https://pip.pypa.io/en/latest/reference/requirements-file-format/ -apify ~= 1.5.0 +apify ~= 1.5.1 playwright ~= 1.39.0 diff --git a/templates/python-scrapy/requirements.txt b/templates/python-scrapy/requirements.txt index f4e7ac1a..4ac36154 100644 --- a/templates/python-scrapy/requirements.txt +++ b/templates/python-scrapy/requirements.txt @@ -1,6 +1,6 @@ # Feel free to add your Python dependencies below. For formatting guidelines, see: # https://pip.pypa.io/en/latest/reference/requirements-file-format/ -apify[scrapy] ~= 1.5.0 +apify[scrapy] ~= 1.5.1 nest-asyncio ~= 1.5.8 scrapy ~= 2.11.0 diff --git a/templates/python-scrapy/src/main.py b/templates/python-scrapy/src/main.py index ecad7530..fe9df68d 100644 --- a/templates/python-scrapy/src/main.py +++ b/templates/python-scrapy/src/main.py @@ -12,26 +12,11 @@ modifications. For instance, removing Apify-Scrapy components from the settings will break the integration between Scrapy and Apify. -Known limitations to be aware of: ---------------------------------- - -1. Asynchronous spiders and Twisted & AsyncIO integration - - Asynchronous spiders (and possibly other components) may encounter challenges due to the Twisted & AsyncIO - integration. If you need to execute a coroutine within the Spider, it's recommended to use Apify's custom - nested event loop. See the code example below or find inspiration from Apify's Scrapy components, such as - [ApifyScheduler](https://github.com/apify/apify-sdk-python/blob/v1.3.0/src/apify/scrapy/scheduler.py#L109). - - ``` - from apify.scrapy.utils import nested_event_loop - - nested_event_loop.run_until_complete(my_coroutine()) - ``` - -2. Single spider limitation +Documentation: +-------------- - The current implementation supports the execution of only one Spider per project. - Issue: https://github.com/apify/actor-templates/issues/202 +For an in-depth description of the Apify-Scrapy integration process, our Scrapy components, known limitations and +other stuff, please refer to the following documentation page: https://docs.apify.com/cli/docs/integrating-scrapy. """ from __future__ import annotations diff --git a/templates/python-selenium/requirements.txt b/templates/python-selenium/requirements.txt index e76d0986..6e9dd37d 100644 --- a/templates/python-selenium/requirements.txt +++ b/templates/python-selenium/requirements.txt @@ -1,5 +1,5 @@ # Feel free to add your Python dependencies below. For formatting guidelines, see: # https://pip.pypa.io/en/latest/reference/requirements-file-format/ -apify ~= 1.5.0 +apify ~= 1.5.1 selenium ~= 4.14.0 diff --git a/templates/python-start/requirements.txt b/templates/python-start/requirements.txt index 231186e8..259f1c5d 100644 --- a/templates/python-start/requirements.txt +++ b/templates/python-start/requirements.txt @@ -1,7 +1,7 @@ # Feel free to add your Python dependencies below. For formatting guidelines, see: # https://pip.pypa.io/en/latest/reference/requirements-file-format/ -apify ~= 1.5.0 +apify ~= 1.5.1 beautifulsoup4 ~= 4.12.2 httpx ~= 0.25.2 types-beautifulsoup4 ~= 4.12.0.7 diff --git a/wrappers/python-scrapy/requirements_apify.txt b/wrappers/python-scrapy/requirements_apify.txt index 7c7de1d7..1662a338 100644 --- a/wrappers/python-scrapy/requirements_apify.txt +++ b/wrappers/python-scrapy/requirements_apify.txt @@ -1,6 +1,6 @@ # Add your dependencies here. # See https://pip.pypa.io/en/latest/reference/requirements-file-format/ # for how to format them -apify[scrapy] ~= 1.5.0 +apify[scrapy] ~= 1.5.1 nest-asyncio ~= 1.5.8 scrapy ~= 2.11.0 diff --git a/wrappers/python-scrapy/{projectFolder}/main.template.py b/wrappers/python-scrapy/{projectFolder}/main.template.py index 8f113b1c..4f4cba61 100644 --- a/wrappers/python-scrapy/{projectFolder}/main.template.py +++ b/wrappers/python-scrapy/{projectFolder}/main.template.py @@ -12,26 +12,11 @@ modifications. For instance, removing Apify-Scrapy components from the settings will break the integration between Scrapy and Apify. -Known limitations to be aware of: ---------------------------------- - -1. Asynchronous spiders and Twisted & AsyncIO integration - - Asynchronous spiders (and possibly other components) may encounter challenges due to the Twisted & AsyncIO - integration. If you need to execute a coroutine within the Spider, it's recommended to use Apify's custom - nested event loop. See the code example below or find inspiration from Apify's Scrapy components, such as - [ApifyScheduler](https://github.com/apify/apify-sdk-python/blob/v1.3.0/src/apify/scrapy/scheduler.py#L109). - - ``` - from apify.scrapy.utils import nested_event_loop - - nested_event_loop.run_until_complete(my_coroutine()) - ``` - -2. Single spider limitation +Documentation: +-------------- - The current implementation supports the execution of only one Spider per project. - Issue: https://github.com/apify/actor-templates/issues/202 +For an in-depth description of the Apify-Scrapy integration process, our Scrapy components, known limitations and +other stuff, please refer to the following documentation page: https://docs.apify.com/cli/docs/integrating-scrapy. """ from __future__ import annotations