From b82fc5eb24cfb68321ff89c0c50e8fae48040dc7 Mon Sep 17 00:00:00 2001
From: Ryukouss <benghechoua.oussama@gmail.com>
Date: Fri, 16 Feb 2024 19:51:03 +0100
Subject: [PATCH] Fix the issue #559

Fixing the errors output
---
 tests/attack/test_mod_wapp.py     | 208 +++++++++++++++++++++++++-----
 tests/cli/test_options.py         |  10 ++
 wapitiCore/attack/mod_wapp.py     |  36 ++++--
 wapitiCore/controller/wapiti.py   |  15 ++-
 wapitiCore/main/wapiti.py         |  43 ++++--
 wapitiCore/parsers/commandline.py |   1 -
 6 files changed, 258 insertions(+), 55 deletions(-)

diff --git a/tests/attack/test_mod_wapp.py b/tests/attack/test_mod_wapp.py
index 65d9d2619..c67d20653 100644
--- a/tests/attack/test_mod_wapp.py
+++ b/tests/attack/test_mod_wapp.py
@@ -1,8 +1,9 @@
 import os
 from asyncio import Event
-from unittest.mock import AsyncMock, patch, MagicMock
+from unittest.mock import AsyncMock, patch
 
 import httpx
+from httpx import RequestError
 import pytest
 import respx
 
@@ -592,53 +593,198 @@ async def test_merge_with_and_without_redirection():
 
 @pytest.mark.asyncio
 @respx.mock
-async def test_exception_json():
-    json_string = {
-    "1C-Bitrix": {
-        "cats": [1, 6],
-        "cookies": {
-            "BITRIX_SM_GUEST_ID": ""
-        },
-        "description": "1C-Bitrix is a system of web project management...",
-        "headers": {
-            "Set-Cookie": "BITRIX_",
-            "X-Powered-CMS": "Bitrix Site Manager"
+async def test_raise_on_invalid_json():
+    """Tests that a ValueError is raised when calling _dump_url_content_to_file with invalid or empty Json."""
+
+    respx.get("http://perdu.com/src/categories.json").mock(
+        return_value=httpx.Response(
+            200,
+            content="Test")
+    )
+
+    persister = AsyncMock()
+    crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
+    async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
+        options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com"}
+        module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
+
+        with pytest.raises(ValueError) as exc_info:
+            await module._dump_url_content_to_file("http://perdu.com/src/categories.json", "test.json")
+
+        assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_raise_on_not_valid_db_url():
+    """Tests that a ValueError is raised when the URL doesn't contain a Wapp DB."""
+    cat_url = "http://perdu.com/src/categories.json"
+    group_url = "http://perdu.com/src/groups.json"
+    tech_url = "http://perdu.com/src/technologies/"
+
+    respx.get(url__regex=r"http://perdu.com/.*").mock(
+        return_value=httpx.Response(
+            404,
+            content="Not Found")
+    )
+    persister = AsyncMock()
+    crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
+    async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
+        options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"}
+
+        module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
+
+        with pytest.raises(ValueError) as exc_info:
+            await module._load_wapp_database(cat_url, tech_url, group_url)
+
+        assert exc_info.value.args[0] == "http://perdu.com/src/technologies/ is not a valid URL for a wapp database"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_raise_on_value_error():
+    """Tests that a ValueError is raised when calling the _load_wapp_database function when the json is not valid."""
+
+    example_json_content = {
+        "2B Advice": {
+            "cats": [67],
+            "description": "2B Advice provides a plug-in to manage GDPR cookie consent.",
+            "icon": "2badvice.png",
+            "js": {
+                "BBCookieControler": ""
+            },
+            "saas": True,
+            "scriptSrc": "2badvice-cdn\\.azureedge\\.net",
+            "website": "https://www.2b-advice.com/en/data-privacy-software/cookie-consent-plugin/"
         },
-        "icon": "1C-Bitrix.svg",
-        "website": "https://www.1c-bitrix.ru"
-        }
-    }
+        "30namaPlayer": {
+            "cats": [14],
+            "description": "30namaPlayer is a modified version of Video.",
+            "dom": "section[class*='player30nama']",
+            "icon": "30namaPlayer.png",
+            "website": "https://30nama.com/"
+        }}
+
+    cat_url = "http://perdu.com/src/categories.json"
+    group_url = "http://perdu.com/src/groups.json"
+    tech_url = "http://perdu.com/src/technologies/"
 
     respx.get(url__regex=r"http://perdu.com/src/technologies/.*").mock(
         return_value=httpx.Response(
             200,
-            text=str(json_string)
-        )
+            content=str(example_json_content))
     )
-
-    respx.get("http://perdu.com/src/groups.json").mock(
+    respx.get(url__regex=r"http://perdu.com/.*").mock(
         return_value=httpx.Response(
             200,
-            content="Test")
+            content="No Json")
     )
+    persister = AsyncMock()
+    crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
+    async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
+        options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"}
 
-    respx.get("http://perdu.com/src/categories.json").mock(
+        module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
+
+        with pytest.raises(ValueError) as exc_info:
+            await module._load_wapp_database(cat_url, tech_url, group_url)
+
+        assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_raise_on_request_error():
+    """Tests that a RequestError is raised when calling the _load_wapp_database function with wrong URL."""
+
+    cat_url = "http://perdu.com/src/categories.json"
+    group_url = "http://perdu.com/src/groups.json"
+    tech_url = "http://perdu.com/src/technologies/"
+
+    respx.get(url__regex=r"http://perdu.com/.*").mock(side_effect=RequestError("RequestError occurred: [Errno -2] Name or service not known"))
+    persister = AsyncMock()
+    crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
+    async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
+        options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"}
+
+        module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
+
+        with pytest.raises(RequestError) as exc_info:
+            await module._load_wapp_database(cat_url, tech_url, group_url)
+
+        assert exc_info.value.args[0] == "RequestError occurred: [Errno -2] Name or service not known"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_raise_on_request_error_for_dump_url():
+    """Tests that a RequestError is raised when calling the _dump_url_content_to_file function with wrong URL."""
+
+    url = "http://perdu.com/"
+    group_url = "http://perdu.com/src/groups.json"
+    tech_url = "http://perdu.com/src/technologies/"
+
+    respx.get(url__regex=r"http://perdu.com/.*").mock(side_effect=RequestError("RequestError occurred: [Errno -2] Name or service not known"))
+    persister = AsyncMock()
+    crawler_configuration = CrawlerConfiguration(Request("http://perduu.com/"))
+    async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
+        options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"}
+
+        module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
+
+        with pytest.raises(RequestError) as exc_info:
+            await module._dump_url_content_to_file(url, "cat.json")
+
+        assert exc_info.value.args[0] == "RequestError occurred: [Errno -2] Name or service not known"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_raise_on_request_error_for_update():
+    """Tests that a RequestError is raised when calling the update function with wrong URL."""
+
+    url = "http://perdu.com/"
+    group_url = "http://perdu.com/src/groups.json"
+    tech_url = "http://perdu.com/src/technologies/"
+
+    respx.get(url__regex=r"http://perdu.com/.*").mock(side_effect=RequestError("RequestError occurred: [Errno -2] Name or service not known"))
+    persister = AsyncMock()
+    crawler_configuration = CrawlerConfiguration(Request("http://perduu.com/"))
+    async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
+        options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"}
+
+        module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
+
+        with pytest.raises(RequestError) as exc_info:
+            await module.update()
+
+        assert exc_info.value.args[0] == "RequestError occurred: [Errno -2] Name or service not known"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_raise_on_value_error_for_update():
+    """Tests that a ValueError is raised when calling the update function with URL doesn't contain a wapp DB."""
+
+    respx.get(url__regex=r"http://perdu.com/src/technologies/.*").mock(
         return_value=httpx.Response(
             200,
-            content="Test''''")
+            content=str("{}"))
+    )
+    respx.get(url__regex=r"http://perdu.com/.*").mock(
+        return_value=httpx.Response(
+            200,
+            content="No Json")
     )
 
-    request = Request("http://perdu.com/")
     persister = AsyncMock()
-    crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
+    crawler_configuration = CrawlerConfiguration(Request("http://perduu.com/"))
     async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
         options = {"timeout": 10, "level": 2, "wapp_url": "http://perdu.com/"}
 
         module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)
 
-        with patch("builtins.open", MagicMock(side_effect=IOError)) as open_mock:
-            try:
-                await module.attack(request)
-                pytest.fail("Should raise an exception ..")
-            except (IOError, ValueError):
-                open_mock.assert_called_with(open_mock.mock_calls[0][1][0], 'r', encoding='utf-8')
+        with pytest.raises(ValueError) as exc_info:
+            await module.update()
+
+        assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json"
diff --git a/tests/cli/test_options.py b/tests/cli/test_options.py
index 4878324e3..acb3e45d2 100644
--- a/tests/cli/test_options.py
+++ b/tests/cli/test_options.py
@@ -175,6 +175,16 @@ async def test_update_with_modules(mock_update):
             mock_update.assert_called_once_with("wapp,nikto")
 
 
+@pytest.mark.asyncio
+@mock.patch("wapitiCore.main.wapiti.is_valid_url")
+async def test_update_with_not_valid_url(mock_valid_url):
+    testargs = ["wapiti", "--update", "-m", "wapp", "--wapp-url", "htp:/perdu"]
+    with mock.patch.object(sys, 'argv', testargs):
+        with pytest.raises(SystemExit) as ve:
+            await wapiti_main()
+            mock_valid_url.assert_called_once_with("htp:/perdu")
+
+
 @pytest.mark.asyncio
 @mock.patch("wapitiCore.main.wapiti.Wapiti.update")
 async def test_update_without_modules(mock_update):
diff --git a/wapitiCore/attack/mod_wapp.py b/wapitiCore/attack/mod_wapp.py
index 4388815aa..f86c2e516 100644
--- a/wapitiCore/attack/mod_wapp.py
+++ b/wapitiCore/attack/mod_wapp.py
@@ -29,7 +29,9 @@
 from arsenic.errors import JavascriptError, UnknownError, ArsenicError
 
 from wapitiCore.main.log import logging, log_blue
+from wapitiCore.main.wapiti import is_valid_url
 from wapitiCore.attack.attack import Attack
+from wapitiCore.controller.wapiti import InvalidOptionValue
 from wapitiCore.net.response import Response
 from wapitiCore.wappalyzer.wappalyzer import Wappalyzer, ApplicationData, ApplicationDataException
 from wapitiCore.definitions.fingerprint import NAME as TECHNO_DETECTED, WSTG_CODE as TECHNO_DETECTED_WSTG_CODE
@@ -110,15 +112,24 @@ async def update(self):
         wapp_categories_url = f"{self.BASE_URL}src/categories.json"
         wapp_technologies_base_url = f"{self.BASE_URL}src/technologies/"
         wapp_groups_url = f"{self.BASE_URL}src/groups.json"
-
+        if not is_valid_url(self.BASE_URL):
+            raise InvalidOptionValue(
+                "--wapp-url", self.BASE_URL
+            )
         try:
             await self._load_wapp_database(
                 wapp_categories_url,
                 wapp_technologies_base_url,
                 wapp_groups_url
             )
+        except RequestError as e:
+            logging.error(f"RequestError occurred: {e}")
+            raise
         except IOError:
             logging.error("Error downloading wapp database.")
+        except ValueError as e:
+            logging.error(f"Value error: {e}")
+            raise
 
     async def must_attack(self, request: Request, response: Optional[Response] = None):
         if self.finished:
@@ -136,7 +147,10 @@ async def attack(self, request: Request, response: Optional[Response] = None):
         groups_file_path = os.path.join(self.user_config_dir, self.WAPP_GROUPS)
         technologies_file_path = os.path.join(self.user_config_dir, self.WAPP_TECHNOLOGIES)
 
-        await self._verify_wapp_database(categories_file_path, technologies_file_path, groups_file_path)
+        try:
+            await self._verify_wapp_database(categories_file_path, technologies_file_path, groups_file_path)
+        except ValueError:
+            return
 
         try:
             application_data = ApplicationData(categories_file_path, groups_file_path, technologies_file_path)
@@ -226,6 +240,8 @@ async def _dump_url_content_to_file(self, url: str, file_path: str):
             response = await self.crawler.async_send(request)
         except RequestError:
             self.network_errors += 1
+            raise
+        if response.status != 200:
             logging.error(f"Error: Non-200 status code for {url}")
             return
         if not _is_valid_json(response):
@@ -247,9 +263,10 @@ async def _load_wapp_database(self, categories_url: str, technologies_base_url:
                 response: Response = await self.crawler.async_send(request)
             except RequestError:
                 self.network_errors += 1
-                logging.error(f"Error: Non-200 status code for {technology_file_name}. Skipping.")
-                return
-                # Merging all technologies in one object
+                raise
+            if response.status != 200:
+                raise ValueError(f"{technologies_base_url} is not a valid URL for a wapp database")
+            # Merging all technologies in one object
             for technology_name in response.json:
                 technologies[technology_name] = response.json[technology_name]
             try:
@@ -257,9 +274,12 @@ async def _load_wapp_database(self, categories_url: str, technologies_base_url:
                 await asyncio.gather(
                     self._dump_url_content_to_file(categories_url, categories_file_path),
                     self._dump_url_content_to_file(groups_url, groups_file_path))
-            except ValueError:
-                logging.error(f"Invalid or empty JSON response for {categories_url} or {groups_url}")
-                return
+            except RequestError as req_error:
+                logging.error(f"Caught a RequestError: {req_error}")
+                raise
+            except ValueError as value_error:
+                logging.error(f"Caught a ValueError: {value_error}")
+                raise
             # Saving technologies
             with open(technologies_file_path, 'w', encoding='utf-8') as file:
                 json.dump(technologies, file)
diff --git a/wapitiCore/controller/wapiti.py b/wapitiCore/controller/wapiti.py
index 77b16701c..921584003 100644
--- a/wapitiCore/controller/wapiti.py
+++ b/wapitiCore/controller/wapiti.py
@@ -335,7 +335,19 @@ async def update(self, requested_modules: str = "all"):
                     )
                     if hasattr(class_instance, "update"):
                         logging.info(f"Updating module {mod_name}")
-                        await class_instance.update()
+                        try:
+                            await class_instance.update()
+                            logging.success("Update done.")
+                        except RequestError:
+                            logging.error("Request Error :")
+                            raise
+                        except InvalidOptionValue:
+                            logging.error("Invalid Option Error :")
+                            raise
+                        except ValueError:
+                            logging.error("Value Error :")
+                            raise
+
                 except ImportError:
                     continue
                 except Exception:  # pylint: disable=broad-except
@@ -343,7 +355,6 @@ async def update(self, requested_modules: str = "all"):
                     logging.error(f"[!] Module {mod_name} seems broken and will be skipped")
                     continue
 
-        logging.success("Update done.")
 
     async def load_scan_state(self):
         async for request in self.persister.get_to_browse():
diff --git a/wapitiCore/main/wapiti.py b/wapitiCore/main/wapiti.py
index 65c8ad5e1..5697b921d 100755
--- a/wapitiCore/main/wapiti.py
+++ b/wapitiCore/main/wapiti.py
@@ -59,11 +59,15 @@ def fix_url_path(url: str):
 def is_valid_url(url: str):
     """Verify if the url provided has the right format"""
     try:
-        urlparse(url)
+        parts = urlparse(url)
     except ValueError:
-        logging.error(f"ValueError, {url} is not a valid URL")
+        logging.error('ValueError')
         return False
-    return True
+    else:
+        if parts.scheme in ("http", "https") and parts.netloc:
+            return True
+    logging.error(f"Error: {url} is not a valid URL")
+    return False
 
 
 def is_valid_endpoint(url_type, url: str):
@@ -91,9 +95,8 @@ def is_mod_cms_set(args):
 
 
 def is_mod_wapp_or_update_set(args):
-    if (args.modules and "wapp" in args.modules) or "update" in args:
+    if (args.modules and "wapp" in args.modules) or args.update:
         return True
-    logging.error("Error: Invalid option --wapp-url, module wapp or option --update is required when using this option")
     return False
 
 
@@ -169,11 +172,21 @@ async def wapiti_main():
     if args.update:
         await wap.init_persister()
         logging.log("GREEN", "[*] Updating modules")
-        attack_options = {"level": args.level, "timeout": args.timeout, "wapp_url": args.wapp_url}
+        if args.wapp_url:
+            attack_options = {"level": args.level, "timeout": args.timeout, "wapp_url": fix_url_path(args.wapp_url)}
+        else:
+            attack_options = {"level": args.level, "timeout": args.timeout,\
+                              "wapp_url": "https://raw.githubusercontent.com/wapiti-scanner/wappalyzer/main/"}
         wap.set_attack_options(attack_options)
-        await wap.update(args.modules)
-        sys.exit()
-
+        try:
+            await wap.update(args.modules)
+            sys.exit()
+        except InvalidOptionValue:
+            logging.error("Invalid Option error :")
+            raise
+        except ValueError as e:
+            logging.error(f"Value error: {e}")
+            raise
     try:
         for start_url in args.starting_urls:
             if start_url.startswith(("http://", "https://")):
@@ -321,13 +334,17 @@ async def wapiti_main():
         if args.modules and "cms" in args.modules and not args.cms:
             attack_options["cms"] = "drupal,joomla,prestashop,spip,wp"
 
-        if "wapp_url" in args:
+        if args.wapp_url:
+            if not is_mod_wapp_or_update_set(args):
+                raise InvalidOptionValue("--wapp-url", "module wapp or --update option is required when --wapp-url is "
+                                                       "used")
             url_value = fix_url_path(args.wapp_url)
             if is_valid_url(url_value):
-                if not is_mod_wapp_or_update_set(args):
-                    raise InvalidOptionValue("--wapp-url", "module wapp or --update option \
-                    is required when --wapp-url is used")
                 attack_options["wapp_url"] = url_value
+            else:
+                raise InvalidOptionValue(
+                    "--wapp-url", url_value
+                )
 
         if args.skipped_parameters:
             attack_options["skipped_parameters"] = set(args.skipped_parameters)
diff --git a/wapitiCore/parsers/commandline.py b/wapitiCore/parsers/commandline.py
index a4fa11eab..0cdc2392f 100644
--- a/wapitiCore/parsers/commandline.py
+++ b/wapitiCore/parsers/commandline.py
@@ -492,7 +492,6 @@ def parse_args():
     parser.add_argument(
         "--wapp-url",
         help="Provide a custom URL for updating Wappalyzer Database",
-        default="https://raw.githubusercontent.com/wapiti-scanner/wappalyzer/main/",
         metavar="WAPP_URL"
     )