From b82fc5eb24cfb68321ff89c0c50e8fae48040dc7 Mon Sep 17 00:00:00 2001 From: Ryukouss Date: Fri, 16 Feb 2024 19:51:03 +0100 Subject: [PATCH] Fix the issue #559 Fixing the errors output --- tests/attack/test_mod_wapp.py | 208 +++++++++++++++++++++++++----- tests/cli/test_options.py | 10 ++ wapitiCore/attack/mod_wapp.py | 36 ++++-- wapitiCore/controller/wapiti.py | 15 ++- wapitiCore/main/wapiti.py | 43 ++++-- wapitiCore/parsers/commandline.py | 1 - 6 files changed, 258 insertions(+), 55 deletions(-) diff --git a/tests/attack/test_mod_wapp.py b/tests/attack/test_mod_wapp.py index 65d9d2619..c67d20653 100644 --- a/tests/attack/test_mod_wapp.py +++ b/tests/attack/test_mod_wapp.py @@ -1,8 +1,9 @@ import os from asyncio import Event -from unittest.mock import AsyncMock, patch, MagicMock +from unittest.mock import AsyncMock, patch import httpx +from httpx import RequestError import pytest import respx @@ -592,53 +593,198 @@ async def test_merge_with_and_without_redirection(): @pytest.mark.asyncio @respx.mock -async def test_exception_json(): - json_string = { - "1C-Bitrix": { - "cats": [1, 6], - "cookies": { - "BITRIX_SM_GUEST_ID": "" - }, - "description": "1C-Bitrix is a system of web project management...", - "headers": { - "Set-Cookie": "BITRIX_", - "X-Powered-CMS": "Bitrix Site Manager" +async def test_raise_on_invalid_json(): + """Tests that a ValueError is raised when calling _dump_url_content_to_file with invalid or empty Json.""" + + respx.get("http://perdu.com/src/categories.json").mock( + return_value=httpx.Response( + 200, + content="Test") + ) + + persister = AsyncMock() + crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/")) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com"} + module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) + + with pytest.raises(ValueError) as exc_info: + await module._dump_url_content_to_file("http://perdu.com/src/categories.json", "test.json") + + assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json" + + +@pytest.mark.asyncio +@respx.mock +async def test_raise_on_not_valid_db_url(): + """Tests that a ValueError is raised when the URL doesn't contain a Wapp DB.""" + cat_url = "http://perdu.com/src/categories.json" + group_url = "http://perdu.com/src/groups.json" + tech_url = "http://perdu.com/src/technologies/" + + respx.get(url__regex=r"http://perdu.com/.*").mock( + return_value=httpx.Response( + 404, + content="Not Found") + ) + persister = AsyncMock() + crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/")) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"} + + module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) + + with pytest.raises(ValueError) as exc_info: + await module._load_wapp_database(cat_url, tech_url, group_url) + + assert exc_info.value.args[0] == "http://perdu.com/src/technologies/ is not a valid URL for a wapp database" + + +@pytest.mark.asyncio +@respx.mock +async def test_raise_on_value_error(): + """Tests that a ValueError is raised when calling the _load_wapp_database function when the json is not valid.""" + + example_json_content = { + "2B Advice": { + "cats": [67], + "description": "2B Advice provides a plug-in to manage GDPR cookie consent.", + "icon": "2badvice.png", + "js": { + "BBCookieControler": "" + }, + "saas": True, + "scriptSrc": "2badvice-cdn\\.azureedge\\.net", + "website": "https://www.2b-advice.com/en/data-privacy-software/cookie-consent-plugin/" }, - "icon": "1C-Bitrix.svg", - "website": "https://www.1c-bitrix.ru" - } - } + "30namaPlayer": { + "cats": [14], + "description": "30namaPlayer is a modified version of Video.", + "dom": "section[class*='player30nama']", + "icon": "30namaPlayer.png", + "website": "https://30nama.com/" + }} + + cat_url = "http://perdu.com/src/categories.json" + group_url = "http://perdu.com/src/groups.json" + tech_url = "http://perdu.com/src/technologies/" respx.get(url__regex=r"http://perdu.com/src/technologies/.*").mock( return_value=httpx.Response( 200, - text=str(json_string) - ) + content=str(example_json_content)) ) - - respx.get("http://perdu.com/src/groups.json").mock( + respx.get(url__regex=r"http://perdu.com/.*").mock( return_value=httpx.Response( 200, - content="Test") + content="No Json") ) + persister = AsyncMock() + crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/")) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"} - respx.get("http://perdu.com/src/categories.json").mock( + module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) + + with pytest.raises(ValueError) as exc_info: + await module._load_wapp_database(cat_url, tech_url, group_url) + + assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json" + + +@pytest.mark.asyncio +@respx.mock +async def test_raise_on_request_error(): + """Tests that a RequestError is raised when calling the _load_wapp_database function with wrong URL.""" + + cat_url = "http://perdu.com/src/categories.json" + group_url = "http://perdu.com/src/groups.json" + tech_url = "http://perdu.com/src/technologies/" + + respx.get(url__regex=r"http://perdu.com/.*").mock(side_effect=RequestError("RequestError occurred: [Errno -2] Name or service not known")) + persister = AsyncMock() + crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/")) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"} + + module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) + + with pytest.raises(RequestError) as exc_info: + await module._load_wapp_database(cat_url, tech_url, group_url) + + assert exc_info.value.args[0] == "RequestError occurred: [Errno -2] Name or service not known" + + +@pytest.mark.asyncio +@respx.mock +async def test_raise_on_request_error_for_dump_url(): + """Tests that a RequestError is raised when calling the _dump_url_content_to_file function with wrong URL.""" + + url = "http://perdu.com/" + group_url = "http://perdu.com/src/groups.json" + tech_url = "http://perdu.com/src/technologies/" + + respx.get(url__regex=r"http://perdu.com/.*").mock(side_effect=RequestError("RequestError occurred: [Errno -2] Name or service not known")) + persister = AsyncMock() + crawler_configuration = CrawlerConfiguration(Request("http://perduu.com/")) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"} + + module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) + + with pytest.raises(RequestError) as exc_info: + await module._dump_url_content_to_file(url, "cat.json") + + assert exc_info.value.args[0] == "RequestError occurred: [Errno -2] Name or service not known" + + +@pytest.mark.asyncio +@respx.mock +async def test_raise_on_request_error_for_update(): + """Tests that a RequestError is raised when calling the update function with wrong URL.""" + + url = "http://perdu.com/" + group_url = "http://perdu.com/src/groups.json" + tech_url = "http://perdu.com/src/technologies/" + + respx.get(url__regex=r"http://perdu.com/.*").mock(side_effect=RequestError("RequestError occurred: [Errno -2] Name or service not known")) + persister = AsyncMock() + crawler_configuration = CrawlerConfiguration(Request("http://perduu.com/")) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"} + + module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) + + with pytest.raises(RequestError) as exc_info: + await module.update() + + assert exc_info.value.args[0] == "RequestError occurred: [Errno -2] Name or service not known" + + +@pytest.mark.asyncio +@respx.mock +async def test_raise_on_value_error_for_update(): + """Tests that a ValueError is raised when calling the update function with URL doesn't contain a wapp DB.""" + + respx.get(url__regex=r"http://perdu.com/src/technologies/.*").mock( return_value=httpx.Response( 200, - content="Test''''") + content=str("{}")) + ) + respx.get(url__regex=r"http://perdu.com/.*").mock( + return_value=httpx.Response( + 200, + content="No Json") ) - request = Request("http://perdu.com/") persister = AsyncMock() - crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/")) + crawler_configuration = CrawlerConfiguration(Request("http://perduu.com/")) async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"} module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration) - with patch("builtins.open", MagicMock(side_effect=IOError)) as open_mock: - try: - await module.attack(request) - pytest.fail("Should raise an exception ..") - except (IOError, ValueError): - open_mock.assert_called_with(open_mock.mock_calls[0][1][0], 'r', encoding='utf-8') + with pytest.raises(ValueError) as exc_info: + await module.update() + + assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json" diff --git a/tests/cli/test_options.py b/tests/cli/test_options.py index 4878324e3..acb3e45d2 100644 --- a/tests/cli/test_options.py +++ b/tests/cli/test_options.py @@ -175,6 +175,16 @@ async def test_update_with_modules(mock_update): mock_update.assert_called_once_with("wapp,nikto") +@pytest.mark.asyncio +@mock.patch("wapitiCore.main.wapiti.is_valid_url") +async def test_update_with_not_valid_url(mock_valid_url): + testargs = ["wapiti", "--update", "-m", "wapp", "--wapp-url", "htp:/perdu"] + with mock.patch.object(sys, 'argv', testargs): + with pytest.raises(SystemExit) as ve: + await wapiti_main() + mock_valid_url.assert_called_once_with("htp:/perdu") + + @pytest.mark.asyncio @mock.patch("wapitiCore.main.wapiti.Wapiti.update") async def test_update_without_modules(mock_update): diff --git a/wapitiCore/attack/mod_wapp.py b/wapitiCore/attack/mod_wapp.py index 4388815aa..f86c2e516 100644 --- a/wapitiCore/attack/mod_wapp.py +++ b/wapitiCore/attack/mod_wapp.py @@ -29,7 +29,9 @@ from arsenic.errors import JavascriptError, UnknownError, ArsenicError from wapitiCore.main.log import logging, log_blue +from wapitiCore.main.wapiti import is_valid_url from wapitiCore.attack.attack import Attack +from wapitiCore.controller.wapiti import InvalidOptionValue from wapitiCore.net.response import Response from wapitiCore.wappalyzer.wappalyzer import Wappalyzer, ApplicationData, ApplicationDataException from wapitiCore.definitions.fingerprint import NAME as TECHNO_DETECTED, WSTG_CODE as TECHNO_DETECTED_WSTG_CODE @@ -110,15 +112,24 @@ async def update(self): wapp_categories_url = f"{self.BASE_URL}src/categories.json" wapp_technologies_base_url = f"{self.BASE_URL}src/technologies/" wapp_groups_url = f"{self.BASE_URL}src/groups.json" - + if not is_valid_url(self.BASE_URL): + raise InvalidOptionValue( + "--wapp-url", self.BASE_URL + ) try: await self._load_wapp_database( wapp_categories_url, wapp_technologies_base_url, wapp_groups_url ) + except RequestError as e: + logging.error(f"RequestError occurred: {e}") + raise except IOError: logging.error("Error downloading wapp database.") + except ValueError as e: + logging.error(f"Value error: {e}") + raise async def must_attack(self, request: Request, response: Optional[Response] = None): if self.finished: @@ -136,7 +147,10 @@ async def attack(self, request: Request, response: Optional[Response] = None): groups_file_path = os.path.join(self.user_config_dir, self.WAPP_GROUPS) technologies_file_path = os.path.join(self.user_config_dir, self.WAPP_TECHNOLOGIES) - await self._verify_wapp_database(categories_file_path, technologies_file_path, groups_file_path) + try: + await self._verify_wapp_database(categories_file_path, technologies_file_path, groups_file_path) + except ValueError: + return try: application_data = ApplicationData(categories_file_path, groups_file_path, technologies_file_path) @@ -226,6 +240,8 @@ async def _dump_url_content_to_file(self, url: str, file_path: str): response = await self.crawler.async_send(request) except RequestError: self.network_errors += 1 + raise + if response.status != 200: logging.error(f"Error: Non-200 status code for {url}") return if not _is_valid_json(response): @@ -247,9 +263,10 @@ async def _load_wapp_database(self, categories_url: str, technologies_base_url: response: Response = await self.crawler.async_send(request) except RequestError: self.network_errors += 1 - logging.error(f"Error: Non-200 status code for {technology_file_name}. Skipping.") - return - # Merging all technologies in one object + raise + if response.status != 200: + raise ValueError(f"{technologies_base_url} is not a valid URL for a wapp database") + # Merging all technologies in one object for technology_name in response.json: technologies[technology_name] = response.json[technology_name] try: @@ -257,9 +274,12 @@ async def _load_wapp_database(self, categories_url: str, technologies_base_url: await asyncio.gather( self._dump_url_content_to_file(categories_url, categories_file_path), self._dump_url_content_to_file(groups_url, groups_file_path)) - except ValueError: - logging.error(f"Invalid or empty JSON response for {categories_url} or {groups_url}") - return + except RequestError as req_error: + logging.error(f"Caught a RequestError: {req_error}") + raise + except ValueError as value_error: + logging.error(f"Caught a ValueError: {value_error}") + raise # Saving technologies with open(technologies_file_path, 'w', encoding='utf-8') as file: json.dump(technologies, file) diff --git a/wapitiCore/controller/wapiti.py b/wapitiCore/controller/wapiti.py index 77b16701c..921584003 100644 --- a/wapitiCore/controller/wapiti.py +++ b/wapitiCore/controller/wapiti.py @@ -335,7 +335,19 @@ async def update(self, requested_modules: str = "all"): ) if hasattr(class_instance, "update"): logging.info(f"Updating module {mod_name}") - await class_instance.update() + try: + await class_instance.update() + logging.success("Update done.") + except RequestError: + logging.error("Request Error :") + raise + except InvalidOptionValue: + logging.error("Invalid Option Error :") + raise + except ValueError: + logging.error("Value Error :") + raise + except ImportError: continue except Exception: # pylint: disable=broad-except @@ -343,7 +355,6 @@ async def update(self, requested_modules: str = "all"): logging.error(f"[!] Module {mod_name} seems broken and will be skipped") continue - logging.success("Update done.") async def load_scan_state(self): async for request in self.persister.get_to_browse(): diff --git a/wapitiCore/main/wapiti.py b/wapitiCore/main/wapiti.py index 65c8ad5e1..5697b921d 100755 --- a/wapitiCore/main/wapiti.py +++ b/wapitiCore/main/wapiti.py @@ -59,11 +59,15 @@ def fix_url_path(url: str): def is_valid_url(url: str): """Verify if the url provided has the right format""" try: - urlparse(url) + parts = urlparse(url) except ValueError: - logging.error(f"ValueError, {url} is not a valid URL") + logging.error('ValueError') return False - return True + else: + if parts.scheme in ("http", "https") and parts.netloc: + return True + logging.error(f"Error: {url} is not a valid URL") + return False def is_valid_endpoint(url_type, url: str): @@ -91,9 +95,8 @@ def is_mod_cms_set(args): def is_mod_wapp_or_update_set(args): - if (args.modules and "wapp" in args.modules) or "update" in args: + if (args.modules and "wapp" in args.modules) or args.update: return True - logging.error("Error: Invalid option --wapp-url, module wapp or option --update is required when using this option") return False @@ -169,11 +172,21 @@ async def wapiti_main(): if args.update: await wap.init_persister() logging.log("GREEN", "[*] Updating modules") - attack_options = {"level": args.level, "timeout": args.timeout, "wapp_url": args.wapp_url} + if args.wapp_url: + attack_options = {"level": args.level, "timeout": args.timeout, "wapp_url": fix_url_path(args.wapp_url)} + else: + attack_options = {"level": args.level, "timeout": args.timeout,\ + "wapp_url": "https://raw.githubusercontent.com/wapiti-scanner/wappalyzer/main/"} wap.set_attack_options(attack_options) - await wap.update(args.modules) - sys.exit() - + try: + await wap.update(args.modules) + sys.exit() + except InvalidOptionValue: + logging.error("Invalid Option error :") + raise + except ValueError as e: + logging.error(f"Value error: {e}") + raise try: for start_url in args.starting_urls: if start_url.startswith(("http://", "https://")): @@ -321,13 +334,17 @@ async def wapiti_main(): if args.modules and "cms" in args.modules and not args.cms: attack_options["cms"] = "drupal,joomla,prestashop,spip,wp" - if "wapp_url" in args: + if args.wapp_url: + if not is_mod_wapp_or_update_set(args): + raise InvalidOptionValue("--wapp-url", "module wapp or --update option is required when --wapp-url is " + "used") url_value = fix_url_path(args.wapp_url) if is_valid_url(url_value): - if not is_mod_wapp_or_update_set(args): - raise InvalidOptionValue("--wapp-url", "module wapp or --update option \ - is required when --wapp-url is used") attack_options["wapp_url"] = url_value + else: + raise InvalidOptionValue( + "--wapp-url", url_value + ) if args.skipped_parameters: attack_options["skipped_parameters"] = set(args.skipped_parameters) diff --git a/wapitiCore/parsers/commandline.py b/wapitiCore/parsers/commandline.py index a4fa11eab..0cdc2392f 100644 --- a/wapitiCore/parsers/commandline.py +++ b/wapitiCore/parsers/commandline.py @@ -492,7 +492,6 @@ def parse_args(): parser.add_argument( "--wapp-url", help="Provide a custom URL for updating Wappalyzer Database", - default="https://raw.githubusercontent.com/wapiti-scanner/wappalyzer/main/", metavar="WAPP_URL" )