Skip to content

Commit

Permalink
Migrate from Flake8 & Autopep8 to Ruff (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
vdusek authored Nov 16, 2023
1 parent 1799738 commit 3835188
Show file tree
Hide file tree
Showing 8 changed files with 459 additions and 767 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ __pycache__
.dmypy.json
dmypy.json
.pytest_cache
.ruff_cache

.scrapy
*.log
Expand Down
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
.PHONY: clean install-dev lint type-check check-code format

DIRS_WITH_CODE = src

clean:
rm -rf .venv .mypy_cache .pytest_cache __pycache__
rm -rf .venv .mypy_cache .pytest_cache .ruff_cache __pycache__

install-dev:
python3.11 -m pip install --upgrade pip
Expand All @@ -10,13 +12,13 @@ install-dev:
poetry run pre-commit install

lint:
poetry run flake8
poetry run ruff check $(DIRS_WITH_CODE)

type-check:
poetry run mypy
poetry run mypy $(DIRS_WITH_CODE)

check-code: lint type-check

format:
poetry run isort src
poetry run autopep8 --in-place --recursive src
poetry run ruff check --fix $(DIRS_WITH_CODE)
poetry run ruff format $(DIRS_WITH_CODE)
1,063 changes: 365 additions & 698 deletions poetry.lock

Large diffs are not rendered by default.

108 changes: 60 additions & 48 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,65 +7,24 @@ readme = "README.md"
version = "1.0.0"

[tool.poetry.dependencies]
apify = "^1.1.2"
apify = "^1.2.0"
beautifulsoup4 = "^4.12.2"
html5lib = "^1.1"
httpx = "^0.24.1"
httpx = "^0.25.1"
lxml = "^4.9.3"
python = "^3.11"
types-beautifulsoup4 = "^4.12.0.5"
types-beautifulsoup4 = "^4.12.0.7"

[tool.poetry.group.dev.dependencies]
autopep8 = "^2.0.2"
flake8 = "^6.1.0"
flake8-bugbear = "^23.7.10"
flake8-commas = "^2.1.0"
flake8-comprehensions = "^3.14.0"
flake8-datetimez = "^20.10.0"
flake8-docstrings = "^1.7.0"
flake8-encodings = "^0.5.0.post1"
flake8-isort = "^6.0.0"
flake8-noqa = "^1.3.2"
flake8-pyproject = "^1.2.3"
flake8-pytest-style = "^1.7.2"
flake8-quotes = "^3.3.2"
flake8-unused-arguments = "^0.0.13"
isort = "^5.12.0"
mypy = "^1.4.1"
pep8-naming = "^0.13.3"
pre-commit = "^3.3.3"
mypy = "^1.7.0"
pre-commit = "^3.5.0"
pytest = "^7.4.3"
ruff = "^0.1.5"

[build-system]
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core"]

[tool.autopep8]
max_line_length = 120

[tool.flake8]
docstring-convention = "all" # Google docstring convention + D204 & D401
filename = ["./src/*.py", "./tests/*.py"]
ignore = [
"D100",
"D104",
"D203",
"D213",
"D215",
"D401",
"D406",
"D407",
"D408",
"D409",
"D413",
"U101",
]
max_line_length = 120
per-file-ignores = ["tests/*: D"]
pytest-fixture-no-parentheses = true
pytest-mark-no-parentheses = true
unused-arguments-ignore-overload-functions = true
unused-arguments-ignore-stub-functions = true

[tool.isort]
include_trailing_comma = true
known_first_party = ["apify", "apify_client", "apify_shared"]
Expand Down Expand Up @@ -100,3 +59,56 @@ python_files = "test_*.py"
testpaths = "tests"
timeout = 1200
include_trailing_comma = true

[tool.ruff]
line-length = 120
select = ["ALL"]
ignore = [
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed in {filename}
"BLE001", # Do not catch blind exception
"COM812", # This rule may cause conflicts when used with the formatter
"D100", # Missing docstring in public module
"D104", # Missing docstring in public package
"EM", # flake8-errmsg
"ISC001", # This rule may cause conflicts when used with the formatter
"FIX", # flake8-fixme
"PGH003", # Use specific rule codes when ignoring type issues
"PLR0913", # Too many arguments in function definition
"PTH123", # `open()` should be replaced by `Path.open()`
"S102", # Use of `exec` detected
"S105", # Possible hardcoded password assigned to
"TID252", # Relative imports from parent modules are bannedRuff
"TRY003", # Avoid specifying long messages outside the exception class
]

[tool.ruff.format]
quote-style = "single"
indent-style = "space"

[tool.ruff.lint.per-file-ignores]
"**/__init__.py" = [
"F401", # Unused imports
]
"**/{scripts}/*" = [
"D", # Everything from the pydocstyle
"INP001", # File {filename} is part of an implicit namespace package, add an __init__.py
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
"T20", # flake8-print
]
"**/{tests}/*" = [
"D", # Everything from the pydocstyle
"INP001", # File {filename} is part of an implicit namespace package, add an __init__.py
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
"T20", # flake8-print
"S101", # Use of assert detected
]

[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
inline-quotes = "single"

[tool.ruff.lint.isort]
known-first-party = ["apify", "apify_client", "apify_shared"]

[tool.ruff.lint.pydocstyle]
convention = "google"
1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import asyncio
16 changes: 10 additions & 6 deletions src/dataclasses.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from dataclasses import dataclass
from typing import Any
from __future__ import annotations

from bs4 import BeautifulSoup
from httpx import Response
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from apify import Actor
from apify.storages import RequestQueue

if TYPE_CHECKING:
from bs4 import BeautifulSoup
from httpx import Response

from apify.storages import RequestQueue


@dataclass(frozen=True)
Expand All @@ -24,7 +28,7 @@ class ActorInputData:
page_function: str

@classmethod
async def from_input(cls) -> 'ActorInputData':
async def from_input(cls: type[ActorInputData]) -> ActorInputData:
"""Instatiate the class from Actor input."""
actor_input = await Actor.get_input() or {}

Expand Down
4 changes: 2 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ async def main() -> None:
Actor.log.info(f'Scraping {url} ...')

try:
# Todo: Think about using the same client for the whole request queue. It was discussed here -
# https://github.com/apify/actor-beautifulsoup-scraper/pull/1#pullrequestreview-1518377074.
# The usage of the same HTTPX client for the whole request queue was discussed here
# https://github.com/apify/actor-beautifulsoup-scraper/pull/1#pullrequestreview-1518377074
async with AsyncClient(proxies=proxies) as client:
response = await client.get(url, timeout=aid.request_timeout)

Expand Down
21 changes: 13 additions & 8 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
from __future__ import annotations

import re
from inspect import iscoroutinefunction
from typing import Callable, cast
from typing import TYPE_CHECKING, Callable, cast
from urllib.parse import urljoin

from bs4 import BeautifulSoup

from apify import Actor
from apify.storages import RequestQueue

from .dataclasses import Context

if TYPE_CHECKING:
from bs4 import BeautifulSoup

from apify.storages import RequestQueue


USER_DEFINED_FUNCTION_NAME = 'page_function'


async def get_proxies_from_conf(proxy_configuration: dict | None) -> dict | None:
"""Retrieves the proxies dictionary based on the provided proxy configuration.
"""Retrieve the proxies dictionary based on the provided proxy configuration.
Args:
proxy_configuration: The proxy configuration dictionary. If None, no proxies will be used.
Expand Down Expand Up @@ -44,7 +49,7 @@ async def update_request_queue(
link_selector: str,
link_patterns: list[str],
) -> None:
"""Updates the request queue with new links found in the response.
"""Update the request queue with new links found in the response.
This function parses the HTML content of the response using BeautifulSoup and extracts links based
on the provided CSS selector. It then checks each link against the specified regex patterns to determine
Expand Down Expand Up @@ -94,7 +99,7 @@ async def update_request_queue(


async def extract_user_function(page_function: str) -> Callable:
"""Extracts the user-defined function using exec and returns it as a Callable.
"""Extract the user-defined function using exec and returns it as a Callable.
This function uses `exec` internally to execute the `page_function` code in a separate scope. The `page_function`
should be a valid Python code snippet defining a function named `USER_DEFINED_FUNCTION_NAME`.
Expand All @@ -121,7 +126,7 @@ async def extract_user_function(page_function: str) -> Callable:


async def execute_user_function(context: Context, user_defined_function: Callable) -> None:
"""Executes the user-defined function with the provided context and pushes data to the Actor.
"""Execute the user-defined function with the provided context and pushes data to the Actor.
This function checks if the provided user-defined function is a coroutine. If it is, the function is awaited.
If it is not, it is executed directly.
Expand Down

0 comments on commit 3835188

Please sign in to comment.