Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typing improvements. #226

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,14 +1,2 @@
[flake8]
ignore =
# Refers to the max-line length. Let's suppress the error and simply
# let black take care on how it wants to format the lines.
E501,

# Refers to "line break before/after binary operator".
# Similar to above, let black take care of the formatting.
W503,
W504,

# black disagrees with flake8, and inserts whitespace
# E203: whitespace before ':'
E203,
ignore = E203, E501, E701, E704, W503, W504
1 change: 0 additions & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[mypy]
exclude = .*flycheck_.*
show_error_codes = True
check_untyped_defs = True

[mypy-w3lib.*]
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ commands =
[testenv:typing]
basepython = python3
deps =
# mypy would error if pytest (or its sub) not found
# mypy would error if pytest (or its stub) not found
pytest
mypy==1.0.0
mypy==1.10.0
commands =
mypy --strict {posargs: w3lib tests}

Expand Down
24 changes: 13 additions & 11 deletions w3lib/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import re
from html.entities import name2codepoint
from typing import AnyStr, Iterable, Match, Optional, Pattern, Tuple, Union
from typing import Iterable, Match, Optional, Pattern, Tuple, Union
from urllib.parse import urljoin

from w3lib._types import StrOrBytes
Expand Down Expand Up @@ -34,7 +34,7 @@


def replace_entities(
text: AnyStr,
text: StrOrBytes,
keep: Iterable[str] = (),
remove_illegal: bool = True,
encoding: str = "utf-8",
Expand Down Expand Up @@ -99,11 +99,13 @@ def convert_entity(m: Match[str]) -> str:
return _ent_re.sub(convert_entity, to_unicode(text, encoding))


def has_entities(text: AnyStr, encoding: Optional[str] = None) -> bool:
def has_entities(text: StrOrBytes, encoding: Optional[str] = None) -> bool:
return bool(_ent_re.search(to_unicode(text, encoding)))


def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None) -> str:
def replace_tags(
text: StrOrBytes, token: str = "", encoding: Optional[str] = None
) -> str:
"""Replace all markup tags found in the given `text` by the given token.
By default `token` is an empty string so it just removes all tags.

Expand All @@ -129,7 +131,7 @@ def replace_tags(text: AnyStr, token: str = "", encoding: Optional[str] = None)
_REMOVECOMMENTS_RE = re.compile("<!--.*?(?:-->|$)", re.DOTALL)


def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:
def remove_comments(text: StrOrBytes, encoding: Optional[str] = None) -> str:
"""Remove HTML Comments.

>>> import w3lib.html
Expand All @@ -144,7 +146,7 @@ def remove_comments(text: AnyStr, encoding: Optional[str] = None) -> str:


def remove_tags(
text: AnyStr,
text: StrOrBytes,
which_ones: Iterable[str] = (),
keep: Iterable[str] = (),
encoding: Optional[str] = None,
Expand Down Expand Up @@ -216,7 +218,7 @@ def remove_tag(m: Match[str]) -> str:


def remove_tags_with_content(
text: AnyStr, which_ones: Iterable[str] = (), encoding: Optional[str] = None
text: StrOrBytes, which_ones: Iterable[str] = (), encoding: Optional[str] = None
) -> str:
"""Remove tags and their content.

Expand All @@ -240,7 +242,7 @@ def remove_tags_with_content(


def replace_escape_chars(
text: AnyStr,
text: StrOrBytes,
which_ones: Iterable[str] = ("\n", "\t", "\r"),
replace_by: StrOrBytes = "",
encoding: Optional[str] = None,
Expand All @@ -262,7 +264,7 @@ def replace_escape_chars(


def unquote_markup(
text: AnyStr,
text: StrOrBytes,
keep: Iterable[str] = (),
remove_illegal: bool = True,
encoding: Optional[str] = None,
Expand Down Expand Up @@ -304,7 +306,7 @@ def _get_fragments(


def get_base_url(
text: AnyStr, baseurl: StrOrBytes = "", encoding: str = "utf-8"
text: StrOrBytes, baseurl: StrOrBytes = "", encoding: str = "utf-8"
) -> str:
"""Return the base url if declared in the given HTML `text`,
relative to the given base url.
Expand All @@ -324,7 +326,7 @@ def get_base_url(


def get_meta_refresh(
text: AnyStr,
text: StrOrBytes,
baseurl: str = "",
encoding: str = "utf-8",
ignore_tags: Iterable[str] = ("script", "noscript"),
Expand Down
32 changes: 29 additions & 3 deletions w3lib/http.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,30 @@
from base64 import b64encode
from typing import Any, AnyStr, List, Mapping, MutableMapping, Optional, Sequence, Union

from typing import (
Any,
List,
Mapping,
MutableMapping,
Optional,
Sequence,
Union,
overload,
)

from w3lib._types import StrOrBytes
from w3lib.util import to_bytes, to_unicode

HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]]
HeadersDictOutput = MutableMapping[bytes, List[bytes]]


@overload
def headers_raw_to_dict(headers_raw: bytes) -> HeadersDictOutput: ...


@overload
def headers_raw_to_dict(headers_raw: None) -> None: ...


def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOutput]:
r"""
Convert raw headers (single multi-line bytestring)
Expand Down Expand Up @@ -52,6 +70,14 @@ def headers_raw_to_dict(headers_raw: Optional[bytes]) -> Optional[HeadersDictOut
return result_dict


@overload
def headers_dict_to_raw(headers_dict: HeadersDictInput) -> bytes: ...


@overload
def headers_dict_to_raw(headers_dict: None) -> None: ...


def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[bytes]:
r"""
Returns a raw HTTP headers representation of headers
Expand Down Expand Up @@ -85,7 +111,7 @@ def headers_dict_to_raw(headers_dict: Optional[HeadersDictInput]) -> Optional[by


def basic_auth_header(
username: AnyStr, password: AnyStr, encoding: str = "ISO-8859-1"
username: StrOrBytes, password: StrOrBytes, encoding: str = "ISO-8859-1"
) -> bytes:
"""
Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_
Expand Down
19 changes: 19 additions & 0 deletions w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Tuple,
Union,
cast,
overload,
)
from urllib.parse import _coerce_args # type: ignore
from urllib.parse import (
Expand Down Expand Up @@ -221,6 +222,24 @@ def is_url(text: str) -> bool:
return text.partition("://")[0] in ("file", "http", "https")


@overload
def url_query_parameter(
url: StrOrBytes,
parameter: str,
default: None = None,
keep_blank_values: Union[bool, int] = 0,
) -> Optional[str]: ...


@overload
def url_query_parameter(
url: StrOrBytes,
parameter: str,
default: str,
keep_blank_values: Union[bool, int] = 0,
) -> str: ...


def url_query_parameter(
url: StrOrBytes,
parameter: str,
Expand Down
Loading