Skip to content

Commit

Permalink
Merge pull request #1345 from valentijnscholten/patch-1
Browse files Browse the repository at this point in the history
Allow commenting out of urls in url file
  • Loading branch information
shelld3v authored Dec 30, 2023
2 parents 97b0c88 + 15c859c commit b2ccd6d
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ jobs:
run: |
flake8 .
- name: Codespell
run: codespell
run: codespell -S CONTRIBUTORS.md
3 changes: 1 addition & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

## [Unreleased]
- Support non-default network interface

- Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet)

## [0.4.3] - October 2nd, 2022
Expand Down Expand Up @@ -123,7 +122,7 @@
- Exclude status switch
- Pause/next directory feature
- Changed help structure
- Expaded default dictionary
- Expanded default dictionary

## 0.2.2 - July 2, 2014
- Fixed some bugs
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
- [FantasqueX](https://www.github.com/FantasqueX)
- [Ovi3](https://github.com/Ovi3)
- [u21h2](https://www.github.com/u21h2)
- [Valentijn Scholten](https://www.github.com/valentijnscholten)

Special thanks to all the people who are named here!

Expand Down
100 changes: 64 additions & 36 deletions lib/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from lib.parse.cmdline import parse_arguments
from lib.parse.config import ConfigParser
from lib.parse.headers import HeadersParser
from lib.utils.common import iprange, read_stdin, uniq
from lib.utils.common import iprange, read_stdin, strip_and_uniquify
from lib.utils.file import File, FileUtils


Expand All @@ -52,7 +52,12 @@ def parse_options():
exit(1)

if not opt.raw_file:
opt.urls = uniq(opt.urls)
opt.urls = strip_and_uniquify(
filter(
lambda url: not url.startswith("#"),
opt.urls,
)
)

if not opt.extensions and not opt.remove_extensions:
print("WARNING: No extension was specified!")
Expand Down Expand Up @@ -113,37 +118,46 @@ def parse_options():
opt.exclude_status_codes = _parse_status_codes(opt.exclude_status_codes)
opt.recursion_status_codes = _parse_status_codes(opt.recursion_status_codes)
opt.skip_on_status = _parse_status_codes(opt.skip_on_status)
opt.prefixes = uniq([prefix.strip() for prefix in opt.prefixes.split(",") if prefix], tuple)
opt.suffixes = uniq([suffix.strip() for suffix in opt.suffixes.split(",") if suffix], tuple)
opt.subdirs = [
subdir.lstrip(" /") + ("" if not subdir or subdir.endswith("/") else "/")
for subdir in opt.subdirs.split(",")
]
opt.exclude_subdirs = [
subdir.lstrip(" /") + ("" if not subdir or subdir.endswith("/") else "/")
for subdir in opt.exclude_subdirs.split(",")
]
opt.prefixes = tuple(strip_and_uniquify(opt.prefixes.split(",")))
opt.suffixes = tuple(strip_and_uniquify(opt.suffixes.split(",")))
opt.subdirs = strip_and_uniquify(
[
subdir.lstrip("/") + ("" if not subdir or subdir.endswith("/") else "/")
for subdir in opt.subdirs.split(",")
]
)
opt.exclude_subdirs = strip_and_uniquify(
[
subdir.lstrip("/") + ("" if not subdir or subdir.endswith("/") else "/")
for subdir in opt.exclude_subdirs.split(",")
]
)
opt.exclude_sizes = {size.strip().upper() for size in opt.exclude_sizes.split(",")}

if opt.remove_extensions:
opt.extensions = ("",)
elif opt.extensions == "*":
opt.extensions = COMMON_EXTENSIONS
elif opt.extensions == "CHANGELOG.md":
print("A weird extension was provided: 'CHANGELOG.md'. Please do not use * as the "
"extension or enclose it in double quotes")
print(
"A weird extension was provided: 'CHANGELOG.md'. Please do not use * as the "
"extension or enclose it in double quotes"
)
exit(0)
else:
opt.extensions = uniq(
[extension.lstrip(" .") for extension in opt.extensions.split(",")],
tuple,
opt.extensions = tuple(
strip_and_uniquify(
[extension.lstrip(".") for extension in opt.extensions.split(",")]
)
)

opt.exclude_extensions = uniq(
[
exclude_extension.lstrip(" .")
for exclude_extension in opt.exclude_extensions.split(",")
], tuple
opt.exclude_extensions = tuple(
strip_and_uniquify(
[
exclude_extension.lstrip(".")
for exclude_extension in opt.exclude_extensions.split(",")
]
)
)

if opt.auth and not opt.auth_type:
Expand All @@ -153,18 +167,24 @@ def parse_options():
print("No authentication credential found")
exit(1)
elif opt.auth and opt.auth_type not in AUTHENTICATION_TYPES:
print(f"'{opt.auth_type}' is not in available authentication "
f"types: {', '.join(AUTHENTICATION_TYPES)}")
print(
f"'{opt.auth_type}' is not in available authentication "
f"types: {', '.join(AUTHENTICATION_TYPES)}"
)
exit(1)

if set(opt.extensions).intersection(opt.exclude_extensions):
print("Exclude extension list can not contain any extension "
"that has already in the extension list")
print(
"Exclude extension list can not contain any extension "
"that has already in the extension list"
)
exit(1)

if opt.output_format not in OUTPUT_FORMATS:
print("Select one of the following output formats: "
f"{', '.join(OUTPUT_FORMATS)}")
print(
"Select one of the following output formats: "
f"{', '.join(OUTPUT_FORMATS)}"
)
exit(1)

return vars(opt)
Expand Down Expand Up @@ -212,17 +232,19 @@ def parse_config(opt):
config.read(opt.config)

# General
opt.thread_count = opt.thread_count or config.safe_getint(
"general", "threads", 25
)
opt.thread_count = opt.thread_count or config.safe_getint("general", "threads", 25)
opt.include_status_codes = opt.include_status_codes or config.safe_get(
"general", "include-status"
)
opt.exclude_status_codes = opt.exclude_status_codes or config.safe_get(
"general", "exclude-status"
)
opt.exclude_sizes = opt.exclude_sizes or config.safe_get("general", "exclude-sizes", "")
opt.exclude_texts = opt.exclude_texts or config.safe_getlist("general", "exclude-texts")
opt.exclude_sizes = opt.exclude_sizes or config.safe_get(
"general", "exclude-sizes", ""
)
opt.exclude_texts = opt.exclude_texts or config.safe_getlist(
"general", "exclude-texts"
)
opt.exclude_regex = opt.exclude_regex or config.safe_get("general", "exclude-regex")
opt.exclude_redirect = opt.exclude_redirect or config.safe_get(
"general", "exclude-redirect"
Expand Down Expand Up @@ -282,7 +304,9 @@ def parse_config(opt):
)

# Request
opt.http_method = opt.http_method or config.safe_get("request", "http-method", "get")
opt.http_method = opt.http_method or config.safe_get(
"request", "http-method", "get"
)
opt.headers = opt.headers or config.safe_getlist("request", "headers")
opt.headers_file = opt.headers_file or config.safe_get("request", "headers-file")
opt.follow_redirects = opt.follow_redirects or config.safe_getboolean(
Expand All @@ -297,15 +321,19 @@ def parse_config(opt):
# Connection
opt.delay = opt.delay or config.safe_getfloat("connection", "delay")
opt.timeout = opt.timeout or config.safe_getfloat("connection", "timeout", 7.5)
opt.max_retries = opt.max_retries or config.safe_getint("connection", "max-retries", 1)
opt.max_retries = opt.max_retries or config.safe_getint(
"connection", "max-retries", 1
)
opt.max_rate = opt.max_rate or config.safe_getint("connection", "max-rate")
opt.proxies = opt.proxies or config.safe_getlist("connection", "proxies")
opt.proxies_file = opt.proxies_file or config.safe_get("connection", "proxies-file")
opt.scheme = opt.scheme or config.safe_get(
"connection", "scheme", None, ("http", "https")
)
opt.replay_proxy = opt.replay_proxy or config.safe_get("connection", "replay-proxy")
opt.network_interface = opt.network_interface or config.safe_get("connection", "network-interface")
opt.network_interface = opt.network_interface or config.safe_get(
"connection", "network-interface"
)

# Advanced
opt.crawl = opt.crawl or config.safe_getboolean("advanced", "crawl")
Expand Down
14 changes: 12 additions & 2 deletions lib/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import sys

from functools import reduce
from ipaddress import IPv4Network, IPv6Network
from urllib.parse import quote, urljoin

Expand All @@ -42,8 +43,17 @@ def safequote(string_):
return quote(string_, safe=URL_SAFE_CHARS)


def uniq(array, type_=list):
return type_(filter(None, dict.fromkeys(array)))
def _strip_and_uniquify_callback(array, item):
item = item.strip()
if not item or item in array:
return array

return array + [item]


# Strip values and remove duplicates from a list, respect the order
def strip_and_uniquify(array, type_=list):
return type_(reduce(_strip_and_uniquify_callback, array, []))


def lstrip_once(string, pattern):
Expand Down
6 changes: 3 additions & 3 deletions tests/utils/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

from unittest import TestCase

from lib.utils.common import merge_path, uniq, get_valid_filename
from lib.utils.common import merge_path, strip_and_uniquify, get_valid_filename


class TestCommonUtils(TestCase):
def test_uniq(self):
self.assertEqual(uniq(["foo", "bar", "foo"]), ["foo", "bar"], "The result is not unique or in wrong order")
def test_strip_and_uniquify(self):
self.assertEqual(strip_and_uniquify(["foo", "bar", " bar ", "foo"]), ["foo", "bar"], "The results are not stripped or contain duplicates or in wrong order")

def test_get_valid_filename(self):
self.assertEqual(get_valid_filename("http://example.com:80/foobar"), "http___example.com_80_foobar", "Invalid filename for Windows")
Expand Down

0 comments on commit b2ccd6d

Please sign in to comment.