Skip to content

Commit

Permalink
Update versions (Py & Apify) and minor refactor (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
vdusek authored Apr 4, 2024
1 parent 9ee2834 commit 5d2efb5
Show file tree
Hide file tree
Showing 8 changed files with 640 additions and 377 deletions.
24 changes: 12 additions & 12 deletions .actor/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM apify/actor-python:3.11
FROM apify/actor-python:3.12

RUN rm -rf /usr/src/app/*
WORKDIR /usr/src/app
Expand All @@ -8,17 +8,17 @@ COPY poetry.toml ./
COPY poetry.lock ./

RUN echo "Python version:" \
&& python --version \
&& echo "Pip version:" \
&& pip --version \
&& echo "Installing Poetry:" \
&& pip install --no-cache-dir poetry~=1.7.1 \
&& echo "Installing dependencies:" \
&& poetry config virtualenvs.create false \
&& poetry install --only main --no-interaction --no-ansi \
&& rm -rf /tmp/.poetry-cache \
&& echo "All installed Python packages:" \
&& pip freeze
&& python --version \
&& echo "Pip version:" \
&& pip --version \
&& echo "Installing Poetry:" \
&& pip install --no-cache-dir poetry~=1.8 \
&& echo "Installing dependencies:" \
&& poetry config virtualenvs.create false \
&& poetry install --only main --no-interaction --no-ansi \
&& rm -rf /tmp/.poetry-cache \
&& echo "All installed Python packages:" \
&& pip freeze

COPY . ./

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint_and_type_checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
python-version: ["3.12"]

steps:
- name: Checkout repository
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Environment

To ensure smooth local development, we highly recommend installing Python 3.11
To ensure smooth local development, we highly recommend installing Python 3.12
since the app has been written and tested for compatibility with this version.

We use [Poetry](https://python-poetry.org/) for project management.
Expand Down
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ clean:
rm -rf .venv .mypy_cache .pytest_cache .ruff_cache __pycache__

install-dev:
python3.11 -m pip install --upgrade pip
python3.11 -m pip install --no-cache-dir poetry~=1.7.1
poetry install --no-interaction --no-ansi
python3 -m pip install --upgrade pip poetry
poetry install
poetry run pre-commit install

lint:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def page_function(context: Context) -> Any:

### Context

The code runs in Python 3.11 and the `page_function` accepts a single argument `context` of type [Context](https://github.com/apify/-beautifulsoup-scraper/blob/master/src/dataclasses.py). It is a dataclass with the following fields:
The code runs in Python 3.12 and the `page_function` accepts a single argument `context` of type [Context](https://github.com/apify/-beautifulsoup-scraper/blob/master/src/dataclasses.py). It is a dataclass with the following fields:
- `soup` of type `BeautifulSoup` with the parsed HTTP payload,
- `request` of type `dict` with the HTTP request data,
- `request_queue` of type `apify.storages.RequestQueue` ([RequestQueue](https://docs.apify.com/sdk/python/reference/class/RequestQueue)) for the interaction with the HTTP request queue,
Expand Down
865 changes: 562 additions & 303 deletions poetry.lock

Large diffs are not rendered by default.

116 changes: 61 additions & 55 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,80 +5,59 @@ description = ""
name = "beautifulsoup-scraper"
readme = "README.md"
version = "1.0.0"
package-mode = false

[tool.poetry.dependencies]
python = "^3.11"
apify = "^1.3.0"
beautifulsoup4 = "^4.12.2"
python = "^3.12"
apify = "^1.7.0"
beautifulsoup4 = "^4.12.3"
html5lib = "^1.1"
httpx = "^0.25.2"
lxml = "^4.9.3"
types-beautifulsoup4 = "^4.12.0.7"
httpx = "^0.27.0"
lxml = "^5.2.1"
types-beautifulsoup4 = "^4.12.0.20240229"

[tool.poetry.group.dev.dependencies]
mypy = "^1.7.1"
pre-commit = "^3.5.0"
pytest = "^7.4.3"
ruff = "^0.1.6"

[build-system]
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core"]

[tool.isort]
include_trailing_comma = true
known_first_party = ["apify", "apify_client", "apify_shared"]
line_length = 120
multi_line_output = 3
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
use_parentheses = true

[tool.mypy]
check_untyped_defs = true
color_output = true
disallow_incomplete_defs = true
disallow_untyped_calls = true
disallow_untyped_decorators = true
disallow_untyped_defs = true
error_summary = true
files = ["src"] # Todo: add "tests"
ignore_missing_imports = true
no_implicit_optional = true
pretty = true
python_version = 3.11
show_absolute_path = false
show_error_codes = true
warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_ignores = true

[tool.pytest]
asyncio_mode = "auto"
python_files = "test_*.py"
testpaths = "tests"
timeout = 1200
include_trailing_comma = true
ipython = "^8.23.0"
mypy = "^1.9.0"
pre-commit = "^3.7.0"
ruff = "^0.3.5"

[tool.ruff]
line-length = 120

[tool.ruff.lint]
select = ["ALL"]
ignore = [
"ANN101", # Missing type annotation for `{name}` in method
"ANN102", # Missing type annotation for `{name}` in classmethod
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed in {filename}
"BLE001", # Do not catch blind exception
"C901", # `{name}` is too complex
"COM812", # This rule may cause conflicts when used with the formatter
"D100", # Missing docstring in public module
"D104", # Missing docstring in public package
"D107", # Missing docstring in `__init__`
"EM", # flake8-errmsg
"G004", # Logging statement uses f-string
"ISC001", # This rule may cause conflicts when used with the formatter
"FIX", # flake8-fixme
"PGH003", # Use specific rule codes when ignoring type issues
"PLR0911", # Too many return statements
"PLR0913", # Too many arguments in function definition
"PTH123", # `open()` should be replaced by `Path.open()`
"PLR0915", # Too many statements
"PTH", # flake8-use-pathlib
"PYI034", # `__aenter__` methods in classes like `{name}` usually return `self` at runtime
"PYI036", # The second argument in `__aexit__` should be annotated with `object` or `BaseException | None`
"S102", # Use of `exec` detected
"S105", # Possible hardcoded password assigned to
"TID252", # Relative imports from parent modules are bannedRuff
"S106", # Possible hardcoded password assigned to argument: "{name}"
"S301", # `pickle` and modules that wrap it can be unsafe when used to deserialize untrusted data, possible security issue
"S303", # Use of insecure MD2, MD4, MD5, or SHA1 hash function
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
"TD002", # Missing author in TODO; try: `# TODO(<author_name>): ...` or `# TODO @<author_name>: ...
"TRY003", # Avoid specifying long messages outside the exception class
#
"D",
]

[tool.ruff.format]
Expand All @@ -99,16 +78,43 @@ indent-style = "space"
"D", # Everything from the pydocstyle
"INP001", # File {filename} is part of an implicit namespace package, add an __init__.py
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
"T20", # flake8-print
"S101", # Use of assert detected
"SLF001", # Private member accessed: `{name}`
"T20", # flake8-print
"TRY301", # Abstract `raise` to an inner function
]

[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
inline-quotes = "single"

[tool.ruff.lint.isort]
known-first-party = ["apify", "apify_client", "apify_shared"]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.lint.isort]
known-first-party = ["apify"]

[tool.pytest.ini_options]
addopts = "-ra"
asyncio_mode = "auto"
timeout = 1200

[tool.mypy]
python_version = "3.12"
files = ["scripts", "src", "tests"]
check_untyped_defs = true
disallow_incomplete_defs = true
disallow_untyped_calls = true
disallow_untyped_decorators = true
disallow_untyped_defs = true
no_implicit_optional = true
warn_redundant_casts = true
warn_return_any = true
warn_unreachable = true
warn_unused_ignores = true

[tool.mypy-scrapy]
ignore_missing_imports = true

[tool.mypy-sortedcollections]
ignore_missing_imports = true
1 change: 0 additions & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
import asyncio

0 comments on commit 5d2efb5

Please sign in to comment.