Format Python Actor templates (#213)

U used auto-formatters (`autopep8`, `isort`) that we use in our other Python libraries (`SDK`, `Client`) to format the Python Actor templates. I also used linter (`flake8` with plugins we use in our other libs) to fix some violations, like "trailing comma" or "bare except".
apify · Oct 4, 2023 · 83a16c1 · 83a16c1
1 parent 46aa262
commit 83a16c1
Show file tree

Hide file tree

Showing 9 changed files with 35 additions and 28 deletions.
diff --git a/templates/python-beautifulsoup/src/main.py b/templates/python-beautifulsoup/src/main.py
@@ -1,14 +1,16 @@
 from urllib.parse import urljoin
 
 import requests
-from apify import Actor
 from bs4 import BeautifulSoup
 
+from apify import Actor
+
+
 async def main():
     async with Actor:
         # Read the Actor input
         actor_input = await Actor.get_input() or {}
-        start_urls = actor_input.get('start_urls', [{ 'url': 'https://apify.com' }])
+        start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
         max_depth = actor_input.get('max_depth', 1)
 
         if not start_urls:
@@ -20,7 +22,7 @@ async def main():
         for start_url in start_urls:
             url = start_url.get('url')
             Actor.log.info(f'Enqueuing {url} ...')
-            await default_queue.add_request({ 'url': url, 'userData': { 'depth': 0 }})
+            await default_queue.add_request({'url': url, 'userData': {'depth': 0}})
 
         # Process the requests in the queue one by one
         while request := await default_queue.fetch_next_request():
@@ -43,13 +45,13 @@ async def main():
                             Actor.log.info(f'Enqueuing {link_url} ...')
                             await default_queue.add_request({
                                 'url': link_url,
-                                'userData': {'depth': depth + 1 },
+                                'userData': {'depth': depth + 1},
                             })
 
                 # Push the title of the page into the default dataset
                 title = soup.title.string if soup.title else None
-                await Actor.push_data({ 'url': url, 'title': title })
-            except:
+                await Actor.push_data({'url': url, 'title': title})
+            except Exception:
                 Actor.log.exception(f'Cannot extract data from {url}.')
             finally:
                 # Mark the request as handled so it's not processed again

diff --git a/templates/python-empty/src/main.py b/templates/python-empty/src/main.py
@@ -1,8 +1,10 @@
-# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python)
-from apify import Actor
 # Beautiful Soup - library for pulling data out of HTML and XML files (Read more at https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
 # from bs4 import BeautifulSoup
 
+# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python)
+from apify import Actor
+
+
 async def main():
     async with Actor:
         print('Hello from the Actor!')

diff --git a/templates/python-playwright/src/main.py b/templates/python-playwright/src/main.py
@@ -1,8 +1,9 @@
 from urllib.parse import urljoin
 
-from apify import Actor
 from playwright.async_api import async_playwright
 
+from apify import Actor
+
 # To run this Actor locally, you need to have the Playwright browsers installed.
 # Run `playwright install --with-deps` in the Actor's virtual environment to install them.
 # When running on the Apify platform, they are already included in the Actor's Docker image.
@@ -12,7 +13,7 @@ async def main():
     async with Actor:
         # Read the Actor input
         actor_input = await Actor.get_input() or {}
-        start_urls = actor_input.get('start_urls', [{ 'url': 'https://apify.com' }])
+        start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
         max_depth = actor_input.get('max_depth', 1)
 
         if not start_urls:
@@ -24,7 +25,7 @@ async def main():
         for start_url in start_urls:
             url = start_url.get('url')
             Actor.log.info(f'Enqueuing {url} ...')
-            await default_queue.add_request({ 'url': url, 'userData': { 'depth': 0 }})
+            await default_queue.add_request({'url': url, 'userData': {'depth': 0}})
 
         # Launch Playwright an open a new browser context
         Actor.log.info('Launching Playwright...')
@@ -53,13 +54,13 @@ async def main():
                                 Actor.log.info(f'Enqueuing {link_url} ...')
                                 await default_queue.add_request({
                                     'url': link_url,
-                                    'userData': {'depth': depth + 1 },
+                                    'userData': {'depth': depth + 1},
                                 })
 
                     # Push the title of the page into the default dataset
                     title = await page.title()
-                    await Actor.push_data({ 'url': url, 'title': title })
-                except:
+                    await Actor.push_data({'url': url, 'title': title})
+                except Exception:
                     Actor.log.exception(f'Cannot extract data from {url}.')
                 finally:
                     await page.close()

diff --git a/templates/python-scrapy/src/apify/main.py b/templates/python-scrapy/src/apify/main.py
@@ -1,6 +1,6 @@
 from scrapy.crawler import CrawlerProcess
-from scrapy.utils.project import get_project_settings
 from scrapy.settings import Settings
+from scrapy.utils.project import get_project_settings
 
 from apify import Actor
 

diff --git a/templates/python-scrapy/src/apify/middlewares.py b/templates/python-scrapy/src/apify/middlewares.py
@@ -2,9 +2,9 @@
 
 from scrapy import Spider
 from scrapy.downloadermiddlewares.retry import RetryMiddleware
+from scrapy.exceptions import IgnoreRequest
 from scrapy.http import Request, Response
 from scrapy.utils.response import response_status_message
-from scrapy.exceptions import IgnoreRequest
 
 from apify.storages import RequestQueue
 
@@ -74,7 +74,7 @@ async def _handle_retry_logic(
         self,
         request: Request,
         response: Response,
-        spider: Spider
+        spider: Spider,
     ) -> Request | Response:
         apify_request = to_apify_request(request)
 

diff --git a/templates/python-scrapy/src/apify/pipelines.py b/templates/python-scrapy/src/apify/pipelines.py
@@ -1,5 +1,4 @@
 from itemadapter import ItemAdapter
-
 from scrapy import Item, Spider
 
 from apify import Actor

diff --git a/templates/python-scrapy/src/apify/scheduler.py b/templates/python-scrapy/src/apify/scheduler.py
@@ -22,7 +22,7 @@ def __init__(self) -> None:
             raise ValueError(
                 f'{ApifyScheduler.__qualname__} requires the asyncio Twisted reactor. '
                 'Make sure you have it configured in the TWISTED_REACTOR setting. See the asyncio '
-                'documentation of Scrapy for more information.'
+                'documentation of Scrapy for more information.',
             )
         self._rq: RequestQueue | None = None
         self.spider: Spider | None = None

diff --git a/templates/python-selenium/src/main.py b/templates/python-selenium/src/main.py
@@ -1,10 +1,11 @@
 from urllib.parse import urljoin
 
-from apify import Actor
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options as ChromeOptions
 from selenium.webdriver.common.by import By
 
+from apify import Actor
+
 # To run this Actor locally, you need to have the Selenium Chromedriver installed.
 # https://www.selenium.dev/documentation/webdriver/getting_started/install_drivers/
 # When running on the Apify platform, it is already included in the Actor's Docker image.
@@ -14,7 +15,7 @@ async def main():
     async with Actor:
         # Read the Actor input
         actor_input = await Actor.get_input() or {}
-        start_urls = actor_input.get('start_urls', [{ 'url': 'https://apify.com' }])
+        start_urls = actor_input.get('start_urls', [{'url': 'https://apify.com'}])
         max_depth = actor_input.get('max_depth', 1)
 
         if not start_urls:
@@ -26,7 +27,7 @@ async def main():
         for start_url in start_urls:
             url = start_url.get('url')
             Actor.log.info(f'Enqueuing {url} ...')
-            await default_queue.add_request({ 'url': url, 'userData': { 'depth': 0 }})
+            await default_queue.add_request({'url': url, 'userData': {'depth': 0}})
 
         # Launch a new Selenium Chrome WebDriver
         Actor.log.info('Launching Chrome WebDriver...')
@@ -60,13 +61,13 @@ async def main():
                             Actor.log.info(f'Enqueuing {link_url} ...')
                             await default_queue.add_request({
                                 'url': link_url,
-                                'userData': {'depth': depth + 1 },
+                                'userData': {'depth': depth + 1},
                             })
 
                 # Push the title of the page into the default dataset
                 title = driver.title
-                await Actor.push_data({ 'url': url, 'title': title })
-            except:
+                await Actor.push_data({'url': url, 'title': title})
+            except Exception:
                 Actor.log.exception(f'Cannot extract data from {url}.')
             finally:
                 await default_queue.mark_request_as_handled(request)

diff --git a/templates/python-start/src/main.py b/templates/python-start/src/main.py
@@ -1,10 +1,12 @@
-# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).
-from apify import Actor
 # Requests - library for making HTTP requests in Python (Read more at https://requests.readthedocs.io)
 import requests
 # Beautiful Soup - library for pulling data out of HTML and XML files (Read more at https://www.crummy.com/software/BeautifulSoup/bs4/doc)
 from bs4 import BeautifulSoup
 
+# Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/python).
+from apify import Actor
+
+
 async def main():
     async with Actor:
         # Structure of input is defined in input_schema.json
@@ -20,7 +22,7 @@ async def main():
         # Extract all headings from the page (tag name and text).
         headings = []
         for heading in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
-            heading_object = { 'level': heading.name, 'text': heading.text }
+            heading_object = {'level': heading.name, 'text': heading.text}
             print('Extracted heading', heading_object)
             headings.append(heading_object)