Skip to content

Commit

Permalink
Merge pull request #230 from Gallaecio/credential-case
Browse files Browse the repository at this point in the history
canonicalize_url: do not apply lowercase to userinfo
  • Loading branch information
wRAR authored Jun 12, 2024
2 parents d7c3307 + a4b444c commit cd54253
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
6 changes: 6 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,12 @@ def test_domains_are_case_insensitive(self):
canonicalize_url("http://www.EXAMPLE.com/"), "http://www.example.com/"
)

def test_userinfo_is_case_sensitive(self):
self.assertEqual(
canonicalize_url("sftp://UsEr:[email protected]/"),
"sftp://UsEr:[email protected]/",
)

def test_canonicalize_idns(self):
self.assertEqual(
canonicalize_url("http://www.bücher.de?q=bücher"),
Expand Down
9 changes: 6 additions & 3 deletions w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,10 +654,13 @@ def canonicalize_url(

fragment = "" if not keep_fragments else fragment

# Apply lowercase to the domain, but not to the userinfo.
netloc_parts = netloc.split("@")
netloc_parts[-1] = netloc_parts[-1].lower().rstrip(":")
netloc = "@".join(netloc_parts)

# every part should be safe already
return urlunparse(
(scheme, netloc.lower().rstrip(":"), path, params, query, fragment)
)
return urlunparse((scheme, netloc, path, params, query, fragment))


def _unquotepath(path: str) -> bytes:
Expand Down

0 comments on commit cd54253

Please sign in to comment.