Skip to content

Commit

Permalink
Add a "wapp-dir" option
Browse files Browse the repository at this point in the history
add an option "wapp-dir" for  to update the wapp database from local files
  • Loading branch information
OussamaBeng committed Mar 6, 2024
1 parent 8efae4c commit bff30b4
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 20 deletions.
131 changes: 130 additions & 1 deletion tests/attack/test_mod_wapp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from asyncio import Event
from unittest.mock import AsyncMock, patch
from unittest.mock import AsyncMock, patch, mock_open

import httpx
from httpx import RequestError
Expand Down Expand Up @@ -786,3 +786,132 @@ async def test_raise_on_value_error_for_update():
await module.update()

assert exc_info.value.args[0] == "Invalid or empty JSON response for http://perdu.com/src/categories.json"


@pytest.mark.asyncio
@respx.mock
async def test_raise_on_not_valid_directory_for_update():
"""Tests that a ValueError is raised when calling update() with a directory that does not exist."""
wapp_dir = "/"

respx.get(url__regex=r"http://perdu.com/.*").mock(
return_value=httpx.Response(
404,
content="Not Found")
)
persister = AsyncMock()
crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "wapp_dir": "/"}

module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)

with pytest.raises(ValueError) as exc_info:
await module.update()

assert exc_info.value.args[0] == "Update failed : Something went wrong with files in /"

def read_directory_structure(directory_path):
file_a_path = os.path.join(directory_path, 'categories.json')
file_b_path = os.path.join(directory_path, 'groups.json')
file_c_path = os.path.join(directory_path, 'technologies', 'a.json')

data_a = read_json_file(file_a_path)
data_b = read_json_file(file_b_path)
data_c = read_json_file(file_c_path)

return {'categories': data_a, 'groups': data_b, 'a': data_c}

def read_json_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()

@pytest.mark.asyncio
@respx.mock
async def test_raise_on_not_valid_json_for_update():

"""Tests that a ValueError is raised when calling update() with an invalid json file."""
respx.get(url__regex=r"http://perdu.com/.*").mock(
return_value=httpx.Response(
404,
content="Not Found")
)

wapp_dir = "wapp/"
# Mock os.path.isfile to simulate file existence
with patch('os.path.isfile', side_effect=lambda x: True if x.endswith('.json') else False):
# Mock os.listdir to simulate the directory structure
with patch('os.listdir', return_value=['categories.json', 'groups.json', 'technologies']):
# Mock builtins.open to provide content for the JSON files
with patch('builtins.open', new_callable=mock_open, read_data='{"key": "value"}'):
persister = AsyncMock()
crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "wapp_dir": wapp_dir}

module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)

with pytest.raises(ValueError) as exc_info:
await module.update()

assert exc_info.value.args[0] == "Update failed : Something went wrong with files in wapp/"


@pytest.mark.asyncio
@respx.mock
async def test_raise_on_not_valid_json_file_for_update():

"""Tests that a ValueError is raised when calling update() with an invalid json file."""
respx.get(url__regex=r"http://perdu.com/.*").mock(
return_value=httpx.Response(
404,
content="Not Found")
)

wapp_dir = "wapp/"
# Mock os.path.isfile to simulate file existence
with patch('os.path.isfile', side_effect=lambda x: True if x.endswith('.json') else False):
# Mock os.listdir to simulate the directory structure
with patch('os.listdir', return_value=['categories.json', 'groups.json', 'technologies']):
# Mock builtins.open to provide content for the JSON files
with patch('builtins.open', new_callable=mock_open, read_data='{"{key "value"}'):
persister = AsyncMock()
crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "wapp_dir": wapp_dir}

module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)

with pytest.raises(ValueError) as exc_info:
await module.update()

assert exc_info.value.args[0] == "Update failed : Something went wrong with files in wapp/"

@pytest.mark.asyncio
@respx.mock
async def test_raise_on_file_does_not_exist_for_update():
"""Tests that a ValueError is raised when calling update() with a missing json file."""
respx.get(url__regex=r"http://perdu.com/.*").mock(
return_value=httpx.Response(
404,
content="Not Found")
)

wapp_dir = "wapp/"
# Mock os.path.isfile to simulate file existence
with patch('os.path.isfile', side_effect=lambda x: True if x.endswith('.json') else False):
# Mock os.listdir to simulate the directory structure
with patch('os.listdir', return_value=['cat.json', 'gr.json', 'technologie']):
# Mock builtins.open to provide content for the JSON files
with patch('builtins.open', new_callable=mock_open, read_data='{"{key "value"}'):
persister = AsyncMock()
crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"))
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "wapp_dir": wapp_dir}

module = ModuleWapp(crawler, persister, options, Event(), crawler_configuration)

with pytest.raises(ValueError) as exc_info:
await module.update()

assert exc_info.value.args[0] == "Update failed : Something went wrong with files in wapp/"
45 changes: 45 additions & 0 deletions tests/cli/test_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,16 @@ async def test_update_with_wapp_url(mock_update):
await wapiti_main()
mock_update.assert_called_once_with(None)

@pytest.mark.asyncio
@mock.patch("wapitiCore.main.wapiti.Wapiti.update")
async def test_update_with_wapp_dir(mock_update):
"""Ensure that no module should be updated when no module is requested."""
testargs = ["wapiti", "--update", "-m", "wapp", "--wapp-dir", "/"]
with mock.patch.object(sys, 'argv', testargs):
with pytest.raises(SystemExit):
await wapiti_main()
mock_update.assert_called_once_with(None)


@pytest.mark.asyncio
async def test_update_with_proxy():
Expand Down Expand Up @@ -330,6 +340,41 @@ async def test_mod_wapp_is_not_set(mock_is_mod_wapp_or_update_set, _, __):
await wapiti_main()
mock_is_mod_wapp_or_update_set.assert_called_once()

@pytest.mark.asyncio
@mock.patch("wapitiCore.main.wapiti.Wapiti.browse")
@mock.patch("wapitiCore.main.wapiti.Wapiti.attack")
@mock.patch("wapitiCore.main.wapiti.is_mod_wapp_or_update_set",return_value=(False, {}, []))
async def test_mod_wapp_is_set_with_wapp_dir(mock_is_mod_wapp_or_update_set, _, __):
"""Let's ensure that the --wapp-dir option is only used when the module wapp or update option is called."""
testargs = [
"wapiti",
"--url", "http://testphp.vulnweb.com/",
"-m", "wapp",
"--wapp-dir", "/"
]

with mock.patch.object(sys, "argv", testargs):
await wapiti_main()
mock_is_mod_wapp_or_update_set.assert_called_once()


@pytest.mark.asyncio
@mock.patch("wapitiCore.main.wapiti.Wapiti.browse")
@mock.patch("wapitiCore.main.wapiti.Wapiti.attack")
@mock.patch("wapitiCore.main.wapiti.is_mod_wapp_or_update_set",return_value=(False, {}, []))
async def test_mod_wapp_is_not_set_with_wapp_dir(mock_is_mod_wapp_or_update_set, _, __):
"""Let's ensure that the --wapp-dir option is only used when the module wapp or update option is called."""
testargs = [
"wapiti",
"--url", "http://testphp.vulnweb.com/",
"-m", "xss",
"--wapp-dir", "/"
]

with mock.patch.object(sys, "argv", testargs):
await wapiti_main()
mock_is_mod_wapp_or_update_set.assert_called_once()


@pytest.mark.asyncio
@mock.patch("wapitiCore.main.wapiti.Wapiti.browse")
Expand Down
4 changes: 4 additions & 0 deletions wapitiCore/attack/attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ def cms(self):
def wapp_url(self):
return self.options.get("wapp_url", "https://raw.githubusercontent.com/wapiti-scanner/wappalyzer/main/")

@property
def wapp_dir(self):
return self.options.get("wapp_dir", None)

@property
def proto_endpoint(self):
parts = urlparse(self.external_endpoint)
Expand Down
122 changes: 103 additions & 19 deletions wapitiCore/attack/mod_wapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
import json
import os
import asyncio
import shutil
import string
from typing import Dict, Tuple, Optional
from typing import Dict, Tuple, Optional, List
import re
from urllib.parse import urlparse

Expand Down Expand Up @@ -69,6 +70,44 @@ def _is_valid_json(response):
return False


def is_json_file(filepath):
try:
with open(filepath, 'r', encoding="utf-8") as f:
json.load(f)
except json.JSONDecodeError as json_error:
raise ValueError(f"{filepath} is not a valid JSON.") from json_error
except FileNotFoundError as file_not_found_error:
raise OSError(f"{filepath} does not exist !") from file_not_found_error
return True


def merge_json_files(directory_path, output_file):
# Get a list of all files in the directory
files = [f for f in os.listdir(directory_path) if f.endswith('.json')]

# Check if there are any JSON files in the directory
if not files:
raise OSError(f"{directory_path} does not contain any file !")

# Initialize an empty dictionary to store the merged JSON data
merged_data = {}

# Iterate through each JSON file and merge its contents
for file_name in files:
file_path = os.path.join(directory_path, file_name)

try:
with open(file_path, 'r', encoding='utf-8') as file:
file_data = json.load(file)
merged_data.update(file_data)
except json.JSONDecodeError as json_error:
raise ValueError(f"{file_name} is not a valid JSON.") from json_error

# Write the merged JSON data to the output file
with open(output_file, 'w', encoding='utf-8') as output:
json.dump(merged_data, output, ensure_ascii=False, indent=2)


def get_tests(data: dict):
tests = {}

Expand All @@ -94,6 +133,7 @@ class ModuleWapp(Attack):
name = "wapp"

BASE_URL = Attack.wapp_url
WAPP_DIR = Attack.wapp_dir
WAPP_CATEGORIES = "categories.json"
WAPP_GROUPS = "groups.json"
WAPP_TECHNOLOGIES = "technologies.json"
Expand All @@ -106,30 +146,74 @@ def __init__(self, crawler, persister, attack_options, stop_event, crawler_confi
if not os.path.isdir(self.user_config_dir):
os.makedirs(self.user_config_dir)

async def copy_files_to_conf(self, files_to_copy: List[str]):
"""
This function copies wapp DB files specified as arguments to the config directory.
"""
for source_file in files_to_copy:
# Check if file exists before attempting to copy
if not os.path.isfile(source_file):
logging.error(f"Warning: File {source_file} does not exist, skipping.")
continue

# Construct the destination file path using the filename
destination_file = os.path.join(self.user_config_dir, os.path.basename(source_file))

try:
shutil.copy(source_file, destination_file)
logging.info(f"Copied {source_file} to {destination_file}")
except shutil.Error as err:
logging.error(f"Error copying {source_file}: {err}")

async def update(self):
"""Update the Wappalizer database from the web and load the patterns."""

wapp_categories_url = f"{self.BASE_URL}src/categories.json"
wapp_technologies_base_url = f"{self.BASE_URL}src/technologies/"
wapp_groups_url = f"{self.BASE_URL}src/groups.json"
if not is_valid_url(self.BASE_URL):
raise InvalidOptionValue(
"--wapp-url", self.BASE_URL
)
try:
await self._load_wapp_database(
wapp_categories_url,
wapp_technologies_base_url,
wapp_groups_url
)
except RequestError:
self.network_errors += 1
raise
except IOError:
logging.error("Error downloading wapp database.")
except ValueError as e:
logging.error(f"Value error: {e}")
raise
if self.WAPP_DIR:
categories_file_path = os.path.join(self.WAPP_DIR, self.WAPP_CATEGORIES)
groups_file_path = os.path.join(self.WAPP_DIR, self.WAPP_GROUPS)
technologies_directory_path = os.path.join(self.WAPP_DIR, "technologies/")
technologies_file_path = os.path.join(self.WAPP_DIR, self.WAPP_TECHNOLOGIES)
try:
merge_json_files(technologies_directory_path, technologies_file_path)
except (ValueError, OSError) as error:
logging.error(error)
raise ValueError(f"Update failed : Something went wrong with files in {self.WAPP_DIR}") from error
try:
if is_json_file(categories_file_path) and is_json_file(groups_file_path) \
and is_json_file(technologies_file_path):
files_list = [categories_file_path, groups_file_path, technologies_file_path]
try:
await self.copy_files_to_conf(files_list)
except ValueError:
return
else:
return
except (ValueError, OSError) as error:
logging.error(error)
raise ValueError(f"Update failed : Something went wrong with files in {self.WAPP_DIR}") from error

elif self.BASE_URL:
if not is_valid_url(self.BASE_URL):
raise InvalidOptionValue(
"--wapp-url", self.BASE_URL
)
try:
await self._load_wapp_database(
wapp_categories_url,
wapp_technologies_base_url,
wapp_groups_url
)
except RequestError as e:
logging.error(f"RequestError occurred: {e}")
raise
except IOError:
logging.error("Error downloading wapp database.")
except ValueError as e:
logging.error(f"Value error: {e}")
raise

async def must_attack(self, request: Request, response: Optional[Response] = None):
if self.finished:
Expand Down
14 changes: 14 additions & 0 deletions wapitiCore/main/wapiti.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ async def wapiti_main():
logging.log("GREEN", "[*] Updating modules")
if args.wapp_url:
attack_options = {"level": args.level, "timeout": args.timeout, "wapp_url": fix_url_path(args.wapp_url)}
elif args.wapp_dir:
attack_options = {"level": args.level, "timeout": args.timeout, "wapp_dir": args.wapp_dir}
else:
attack_options = {"level": args.level, "timeout": args.timeout,\
"wapp_url": "https://raw.githubusercontent.com/wapiti-scanner/wappalyzer/main/"}
Expand Down Expand Up @@ -346,6 +348,18 @@ async def wapiti_main():
"--wapp-url", url_value
)

if args.wapp_dir:
if not is_mod_wapp_or_update_set(args):
raise InvalidOptionValue("--wapp-url", "module wapp or --update option is required when --wapp-url is "
"used")
dir_value = args.wapp_dir
if os.path.isdir(dir_value):
attack_options["wapp_dir"] = dir_value
else:
raise InvalidOptionValue(
"--wapp-dir", dir_value
)

if args.skipped_parameters:
attack_options["skipped_parameters"] = set(args.skipped_parameters)

Expand Down
Loading

0 comments on commit bff30b4

Please sign in to comment.