From 3b56716a680fe447ae8d8e3b0cd4c3446d30efcf Mon Sep 17 00:00:00 2001 From: Eddie Cohen <31940792+edcohen08@users.noreply.github.com> Date: Wed, 26 Apr 2023 21:20:15 -0400 Subject: [PATCH] Hotfix/validate url strips query params (#3370) * reconstruct url in sanitize * tests for url validation --------- Co-authored-by: BillSchumacher <34168009+BillSchumacher@users.noreply.github.com> --- autogpt/url_utils/validators.py | 4 ++- tests/unit/test_url_validation.py | 59 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_url_validation.py diff --git a/autogpt/url_utils/validators.py b/autogpt/url_utils/validators.py index 8754667c31b6..c85a00bab6fa 100644 --- a/autogpt/url_utils/validators.py +++ b/autogpt/url_utils/validators.py @@ -61,7 +61,9 @@ def sanitize_url(url: str) -> str: Returns: str: The sanitized URL """ - return urljoin(url, urlparse(url).path) + parsed_url = urlparse(url) + reconstructed_url = f"{parsed_url.path}{parsed_url.params}?{parsed_url.query}" + return urljoin(url, reconstructed_url) def check_local_file_access(url: str) -> bool: diff --git a/tests/unit/test_url_validation.py b/tests/unit/test_url_validation.py new file mode 100644 index 000000000000..2bbd3f9e20b0 --- /dev/null +++ b/tests/unit/test_url_validation.py @@ -0,0 +1,59 @@ +import pytest +from pytest import raises + +from autogpt.url_utils.validators import validate_url + + +@validate_url +def dummy_method(url): + return url + + +successful_test_data = ( + ("https://google.com/search?query=abc"), + ("https://google.com/search?query=abc&p=123"), + ("http://google.com/"), + ("http://a.lot.of.domain.net/param1/param2"), +) + + +@pytest.mark.parametrize("url", successful_test_data) +def test_url_validation_succeeds(url): + assert dummy_method(url) == url + + +bad_protocol_data = ( + ("htt://example.com"), + ("httppp://example.com"), + (" https://example.com"), +) + + +@pytest.mark.parametrize("url", bad_protocol_data) +def test_url_validation_fails_bad_protocol(url): + with raises(ValueError, match="Invalid URL format"): + dummy_method(url) + + +missing_loc = (("http://?query=q"),) + + +@pytest.mark.parametrize("url", missing_loc) +def test_url_validation_fails_bad_protocol(url): + with raises(ValueError, match="Missing Scheme or Network location"): + dummy_method(url) + + +local_file = ( + ("http://localhost"), + ("https://localhost/"), + ("http://2130706433"), + ("https://2130706433"), + ("http://127.0.0.1/"), +) + + +@pytest.mark.parametrize("url", local_file) +def test_url_validation_fails_local_path(url): + with raises(ValueError, match="Access to local files is restricted"): + dummy_method(url)