From 6401bf2ba5fd3b670fc26c873b36bf09d94c472e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 15 Jul 2024 13:37:20 +0200 Subject: [PATCH] Add conversion methods to Request (#5) --- .flake8 | 5 ++ .github/workflows/test.yml | 4 + docs/conf.py | 2 + docs/usage.rst | 42 ++++++---- form2request/_base.py | 51 ++++++++++++ tests/test_conversion.py | 156 +++++++++++++++++++++++++++++++++++++ tox.ini | 36 +++++++-- 7 files changed, 273 insertions(+), 23 deletions(-) create mode 100644 tests/test_conversion.py diff --git a/.flake8 b/.flake8 index f06e676..ecfd97d 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,10 @@ [flake8] extend-select = TC, TC1 ignore = + # D205: 1 blank line required between summary line and description + # D400: First line should end with a period + # We need longer summary lines, specially since we use Sphinx syntax. + D205, D400 max-line-length = 88 per-file-ignores = # F401: Imported but unused @@ -8,4 +12,5 @@ per-file-ignores = # D100-D104: Missing docstring docs/conf.py:D100 tests/__init__.py:D104 + tests/test_conversion.py:D100,D103 tests/test_main.py:D100,D103 \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 70870f8..08358e2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,11 +13,15 @@ jobs: include: - python-version: '3.8' toxenv: min + - python-version: '3.8' + toxenv: min-extra - python-version: '3.8' - python-version: '3.9' - python-version: '3.10' - python-version: '3.11' - python-version: '3.12' + - python-version: '3.12' + toxenv: extra steps: - uses: actions/checkout@v4 diff --git a/docs/conf.py b/docs/conf.py index 8f3ddf4..8cc5f26 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,7 +13,9 @@ intersphinx_mapping = { "lxml": ("https://lxml.de/apidoc/", None), "parsel": ("https://parsel.readthedocs.io/en/stable", None), + "poet": ("https://web-poet.readthedocs.io/en/latest/", None), "python": ("https://docs.python.org/3", None), + "requests": ("https://requests.readthedocs.io/en/latest/", None), "scrapy": ("https://docs.scrapy.org/en/latest", None), } diff --git a/docs/usage.rst b/docs/usage.rst index 08abe3a..21b0344 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -15,20 +15,20 @@ You can use :func:`~form2request.form2request` to generate form submission request data: >>> from form2request import form2request ->>> req = form2request(form) ->>> req +>>> request_data = form2request(form) +>>> request_data Request(url='https://example.com?foo=bar', method='GET', headers=[], body=b'') :func:`~form2request.form2request` does not make requests, but you can use its -output to build requests with any HTTP client software, e.g. with the requests_ -library: - -.. _requests: https://requests.readthedocs.io/en/latest/ +output to build requests with any HTTP client software. It also provides +:ref:`conversion methods for common use cases `, e.g. for the +:doc:`requests ` library: .. _requests-example: >>> import requests ->>> requests.request(req.method, req.url, headers=req.headers, data=req.body) # doctest: +SKIP +>>> request = request_data.to_requests() +>>> requests.send(request) # doctest: +SKIP :func:`~form2request.form2request` supports :ref:`user-defined form data @@ -205,18 +205,28 @@ Using request data The output of :func:`~form2request.form2request`, :class:`~form2request.Request`, is a simple request data container: ->>> req = form2request(form) ->>> req +>>> request_data = form2request(form) +>>> request_data Request(url='https://example.com?foo=bar', method='GET', headers=[], body=b'') While :func:`~form2request.form2request` does not make requests, you can use its output request data to build an actual request with any HTTP client -software, like the requests_ library (see an example :ref:`above -`) or the :doc:`Scrapy ` web scraping -framework: +software. + +:class:`~form2request.Request` also provides conversion methods for common use +cases: + +- :class:`~form2request.Request.to_scrapy`, for :doc:`Scrapy 1.1.0+ + `: + + >>> request_data.to_scrapy(callback=self.parse) # doctest: +SKIP + + +- :class:`~form2request.Request.to_requests`, for :doc:`requests 1.0.0+ + ` (see an example :ref:`above `). -.. _Scrapy: https://docs.scrapy.org/en/latest/ +- :class:`~form2request.Request.to_poet`, for :doc:`web-poet 0.2.0+ + `: ->>> from scrapy import Request ->>> Request(req.url, method=req.method, headers=req.headers, body=req.body) - + >>> request_data.to_poet() + HttpRequest(url=RequestUrl('https://example.com?foo=bar'), method='GET', headers=, body=b'') diff --git a/form2request/_base.py b/form2request/_base.py index ed685b2..0ed3f89 100644 --- a/form2request/_base.py +++ b/form2request/_base.py @@ -178,6 +178,57 @@ class Request: headers: list[tuple[str, str]] body: bytes + def to_poet(self, **kwargs): + """Convert the request to :class:`web_poet.HttpRequest + `. + + All *kwargs* are passed to :class:`web_poet.HttpRequest + ` as is. + """ + import web_poet + + return web_poet.HttpRequest( + url=self.url, + method=self.method, + headers=self.headers, + body=self.body, + **kwargs, + ) + + def to_requests(self, **kwargs): + """Convert the request to :class:`requests.PreparedRequest`. + + All *kwargs* are passed to :class:`requests.Request` as is. + """ + import requests + + request = requests.Request( + self.method, + self.url, + headers=dict(self.headers), + data=self.body, + **kwargs, + ) + return request.prepare() + + def to_scrapy(self, callback, **kwargs): + """Convert the request to :class:`scrapy.Request + `. + + All *kwargs* are passed to :class:`scrapy.Request + ` as is. + """ + import scrapy # type: ignore[import-untyped] + + return scrapy.Request( + self.url, + callback=callback, + method=self.method, + headers=self.headers, + body=self.body, + **kwargs, + ) + def form2request( form: FormElement | Selector | SelectorList, diff --git a/tests/test_conversion.py b/tests/test_conversion.py new file mode 100644 index 0000000..8a33091 --- /dev/null +++ b/tests/test_conversion.py @@ -0,0 +1,156 @@ +import pytest + +from form2request import Request + +web_poet = pytest.importorskip("web_poet") +scrapy = pytest.importorskip("scrapy") +requests = pytest.importorskip("requests") + + +def fake_scrapy_callback(self, response): + pass + + +@pytest.mark.parametrize( + ("request_data", "method", "kwargs", "expected"), + ( + # GET + *( + ( + Request( + url="https://example.com?foo=bar", + method="GET", + headers=[], + body=b"", + ), + method, + kwargs, + expected, + ) + for method, kwargs, expected in ( + ( + "poet", + {}, + web_poet.HttpRequest( + url=web_poet.RequestUrl("https://example.com?foo=bar"), + method="GET", + headers=web_poet.HttpRequestHeaders(), + body=web_poet.HttpRequestBody(b""), + ), + ), + ( + "requests", + {}, + requests.Request("GET", "https://example.com?foo=bar").prepare(), + ), + ( + "scrapy", + {"callback": fake_scrapy_callback}, + scrapy.Request( + "https://example.com?foo=bar", callback=fake_scrapy_callback + ), + ), + ) + ), + # POST + *( + ( + Request( + url="https://example.com", + method="POST", + headers=[("Content-Type", "application/x-www-form-urlencoded")], + body=b"foo=bar", + ), + method, + kwargs, + expected, + ) + for method, kwargs, expected in ( + ( + "poet", + {}, + web_poet.HttpRequest( + url=web_poet.RequestUrl("https://example.com"), + method="POST", + headers=web_poet.HttpRequestHeaders( + {"Content-Type": "application/x-www-form-urlencoded"} + ), + body=web_poet.HttpRequestBody(b"foo=bar"), + ), + ), + ( + "requests", + {}, + requests.Request( + "POST", + "https://example.com", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data=b"foo=bar", + ).prepare(), + ), + ( + "scrapy", + {"callback": fake_scrapy_callback}, + scrapy.Request( + "https://example.com", + method="POST", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + body=b"foo=bar", + callback=fake_scrapy_callback, + ), + ), + ) + ), + # kwargs + ( + Request( + url="https://example.com", + method="POST", + headers=[("Content-Type", "application/x-www-form-urlencoded")], + body=b"foo=bar", + ), + "requests", + {"params": {"foo": "bar"}}, + requests.Request( + "POST", + "https://example.com?foo=bar", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data=b"foo=bar", + ).prepare(), + ), + ( + Request( + url="https://example.com", + method="POST", + headers=[("Content-Type", "application/x-www-form-urlencoded")], + body=b"foo=bar", + ), + "scrapy", + {"callback": fake_scrapy_callback, "meta": {"foo": "bar"}}, + scrapy.Request( + "https://example.com", + method="POST", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + body=b"foo=bar", + callback=fake_scrapy_callback, + meta={"foo": "bar"}, + ), + ), + ), +) +def test_conversion(request_data, method, kwargs, expected): + actual = getattr(request_data, f"to_{method}")(**kwargs) + if method == "poet": + for field in ("method", "headers", "body"): + assert getattr(actual, field) == getattr(expected, field) + # RequestUrl(…) != RequestUrl(…) + assert str(actual.url) == str(expected.url) + elif method == "requests": + # Request(…).prepare() != Request(…).prepare() + for field in ("url", "method", "headers", "body"): + assert getattr(actual, field) == getattr(expected, field) + else: + assert method == "scrapy" + # Request(…) != Request(…) + for field in ("url", "method", "headers", "body", "callback", "meta"): + assert getattr(actual, field) == getattr(expected, field) diff --git a/tox.ini b/tox.ini index 1553b72..a3b7043 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = pre-commit,mypy,docs,doctest,twinecheck,min,py38,py39,py310,py311,py312 +envlist = pre-commit,mypy,docs,doctest,twinecheck,min,min-extra,py38,py39,py310,py311,py312,extra [testenv] deps = @@ -12,14 +12,35 @@ commands = --cov=form2request \ {posargs:tests} -[testenv:min] -basepython = python3.8 +[min] deps = {[testenv]deps} lxml==4.4.1 parsel==1.8.1 + +[testenv:min] +basepython = python3.8 +deps = + {[min]deps} w3lib==1.19.0 +[testenv:extra] +deps = + {[testenv]deps} + requests + scrapy + web-poet + +[testenv:min-extra] +basepython = {[testenv:min]basepython} +deps = + {[min]deps} + # web-poet >= 0.2.0 requires w3lib >= 1.22.0 + w3lib==1.22.0 + requests==1.0.0 + scrapy==1.1.0 + web-poet==0.2.0 + [testenv:pre-commit] deps = pre-commit @@ -29,8 +50,11 @@ commands = pre-commit run --all-files --show-diff-on-failure basepython = python3.12 deps = mypy==1.10.0 - pytest lxml-stubs + pytest + scrapy + types-requests + web-poet commands = mypy form2request tests @@ -46,10 +70,8 @@ commands = [testenv:doctest] deps = - {[testenv]deps} + {[testenv:extra]deps} parsel - requests - scrapy commands = pytest \ --doctest-glob="*.rst" --doctest-modules \