-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
1,825 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from proxycrawler.cli import run | ||
|
||
if __name__ == "__main__": | ||
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import os | ||
import sys | ||
import typer | ||
|
||
from rich import print | ||
from rich.console import Console | ||
|
||
from proxycrawler import helpers | ||
from proxycrawler import constants | ||
from proxycrawler.messages import ( | ||
info, | ||
errors | ||
) | ||
from proxycrawler.src.proxycrawler import ProxyCrawler | ||
from proxycrawler.src.database.database_handler import DatabaseHandler | ||
|
||
# Init cli | ||
cli = typer.Typer() | ||
|
||
@cli.command() | ||
def version(): | ||
""" proxycrawler's version """ | ||
print(f"[bold white]Version [bold cyan]{constants.VERSION}[bold white]") | ||
|
||
@cli.command() | ||
def scrap( | ||
enable_save_on_run: bool = typer.Option(True, "--enable-save-on-run", help="Save valid proxies while proxycrawler is still running (can be useful in case of a bad internet connection)"), | ||
group_by_protocol: bool = typer.Option(False, "--group-by-protocol", help="Save proxies into seperate files based on the supported protocols [http, https, socks4, sock5]"), | ||
output_file_path: str = typer.Option(None, "--output-file-path", help="Costum output file path to save results (.txt)") | ||
): | ||
""" Start scrapping proxies """ | ||
console = Console() | ||
|
||
# Configuring console | ||
console._log_render.omit_repeated_times = False # Repeat the timestamp even if the logs were logged on the same time | ||
|
||
# Check output file path | ||
if output_file_path is not None and not os.path.exists("/".join(output_file_path.split("/")[:-1])): | ||
console.log( | ||
errors.UNVALID_OUTPUT_FILE_PATH( | ||
output_file_path=output_file_path | ||
) | ||
) | ||
sys.exit(1) | ||
|
||
# Init database handler | ||
database_handler = DatabaseHandler() | ||
|
||
# Init ProxyCrawler | ||
proxy_crawler = ProxyCrawler( | ||
database_handler=database_handler, | ||
console=console, | ||
) | ||
|
||
# Fetching proxies and validating them | ||
proxy_crawler.crawl_proxies( | ||
enable_save_on_run=enable_save_on_run, | ||
group_by_protocol=group_by_protocol, | ||
output_file_path=output_file_path | ||
) | ||
|
||
@cli.command() | ||
def export_db( | ||
proxies_count: int = typer.Option(None, "--proxies-count", help="Number of proxies to export (exports all by default)"), | ||
validate_proxies: bool = typer.Option(True, "--validate", help="Validate proxies"), | ||
group_by_protocol: bool = typer.Option(False, "--group-by-protocol", help="Save proxies into seperate files based on the supported protocols [http, https, sock4, sock5]"), | ||
output_file_path: str = typer.Option(None, "--output-file-path", help="Costum output file path to save results (.txt)") | ||
): | ||
""" Export proxies from the database """ | ||
console = Console() | ||
|
||
# Configuring console | ||
console._log_render.omit_repeated_times = False # Repeat the timestamp even if the logs were logged on the same time | ||
|
||
# Check output file path | ||
if output_file_path is not None and not os.path.exists("/".join(output_file_path.split("/")[:-1])): | ||
console.log( | ||
errors.UNVALID_OUTPUT_FILE_PATH( | ||
output_file_path=output_file_path | ||
) | ||
) | ||
sys.exit(1) | ||
|
||
# Init database handler | ||
database_handler = DatabaseHandler() | ||
|
||
# Init proxycrawler | ||
proxy_crawler = ProxyCrawler( | ||
database_handler=database_handler, | ||
console=console, | ||
) | ||
|
||
console.log( | ||
info.FETCHING_AND_VALIDATING_PROXIES_FROM_DATABASE | ||
) | ||
|
||
proxy_crawler.export_database_proxies( | ||
proxies_count=proxies_count, | ||
group_by_protocol=group_by_protocol, | ||
validate_proxies=validate_proxies, | ||
output_file_path=output_file_path | ||
) | ||
|
||
@cli.command() | ||
def validate( | ||
proxy_file_path: str = typer.Option(None, "--proxy-file", help="path to the proxy file"), | ||
protocol: str = typer.Option(None, "--protocol", help="Set a specific protocol to test the proxies on"), | ||
test_all_protocols: bool = typer.Option(False, "--test-all-protocols", help="Test all the protocols on a proxy"), | ||
group_by_protocol: bool = typer.Option(False, "--group-by-protocol", help="Save proxies into seperate files based on the supported protocols [http, https, sock4, sock5]"), | ||
output_file_path: str = typer.Option(None, "--output-file-path", help="Costum output file path to save results (.txt)") | ||
): | ||
""" Validate a proxies list file """ | ||
console = Console() | ||
|
||
# Configuring console | ||
console._log_render.omit_repeated_times = False # Repeat the timestamp even if the logs were logged on the same time | ||
|
||
# Init database handler | ||
database_handler = DatabaseHandler() | ||
|
||
# Init proxycrawler | ||
proxy_crawler = ProxyCrawler( | ||
database_handler=database_handler, | ||
console=console, | ||
) | ||
|
||
# Check output file path | ||
if output_file_path is not None and not os.path.exists("/".join(output_file_path.split("/")[:-1])): | ||
console.log( | ||
errors.UNVALID_OUTPUT_FILE_PATH( | ||
output_file_path=output_file_path | ||
) | ||
) | ||
sys.exit(1) | ||
|
||
# Check if the proxies file exists | ||
if not os.path.exists(proxy_file_path): | ||
console.log(errors.PROXY_FILE_DOESNT_EXIST) | ||
sys.exit(1) | ||
|
||
# Check the file's extension | ||
if not proxy_file_path.endswith(".txt"): | ||
console.log(errors.FILE_EXTENSION_NOT_SUPPORTED) | ||
sys.exit(1) | ||
|
||
# Check the format of the proxies | ||
proxies = [proxy.strip() for proxy in open(proxy_file_path, "r").readlines()] | ||
results = [] | ||
|
||
for proxy in proxies: | ||
if not proxy_crawler.check_proxy_fromat(proxy=proxy): | ||
results.append(proxy) | ||
|
||
if len(results) != 0: | ||
console.log(errors.UNVALID_PROXY_FORMAT) | ||
sys.exit(1) | ||
|
||
# Check the protocol | ||
protocols = [ | ||
"http", | ||
"https", | ||
"socks4", | ||
"socks5" | ||
] | ||
if protocol is not None and protocol not in protocols: | ||
console.log( | ||
errors.UNVALID_PROXY_PROTOCOL( | ||
protocol=protocol | ||
) | ||
) | ||
sys.exit(1) | ||
|
||
# Validate the list of proxies | ||
console.log( | ||
info.VALIDATING_PROXIES_FROM_FILE( | ||
proxies_count=len(proxies), | ||
proxy_file_path=proxy_file_path | ||
) | ||
) | ||
|
||
proxy_crawler.validate_proxies( | ||
proxies=proxies, | ||
protocol=protocol, | ||
test_all_protocols=test_all_protocols, | ||
group_by_protocol=group_by_protocol, | ||
proxy_file_path=proxy_file_path, | ||
output_file_path=output_file_path | ||
) | ||
|
||
def run(): | ||
""" Runs proxycrawler """ | ||
helpers.banner() | ||
cli() | ||
|
||
if __name__ == "__main__": | ||
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import os | ||
|
||
# Package main info | ||
PACKAGE = "proxycrawler" | ||
VERSION = "0.1.0" | ||
AUTHOR = "ramsy0dev" | ||
GITHUB = "https://github.com/ramsy0dev/proxycrawler" | ||
|
||
# Banner | ||
BANNER = f"""[bold white] | ||
__ | ||
____ _________ _ ____ ________________ __ __/ /__ _____ | ||
/ __ \/ ___/ __ \| |/_/ / / / ___/ ___/ __ `/ | /| / / / _ \/ ___/ | ||
/ /_/ / / / /_/ /> </ /_/ / /__/ / / /_/ /| |/ |/ / / __/ / | ||
/ .___/_/ \____/_/|_|\__, /\___/_/ \__,_/ |__/|__/_/\___/_/ Version [bold cyan]{VERSION}[bold white] | ||
/_/ /____/ | ||
Made by [bold green]`ramsy0dev`[bold white] | ||
""" | ||
|
||
# Home path | ||
HOME = os.path.expanduser("~") | ||
|
||
# Database URL | ||
DATABASE_URL = f"sqlite+pysqlite:///{HOME}/.config/proxycrawler/database.db" | ||
|
||
# Debug proxycrawler | ||
DEBUG = False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import uuid | ||
import string | ||
import random | ||
import hashlib | ||
import datetime | ||
|
||
from rich import print | ||
|
||
from proxycrawler import constants | ||
|
||
def banner() -> None: | ||
""" proxycrawler's banner """ | ||
print(constants.BANNER) | ||
|
||
# def log_json(json_data: str, console) -> None: | ||
# """ Logs out the json data in a beautified way """ | ||
# splited_json_data = json_data.split("\n") | ||
|
||
# for log_line in splited_json_data: | ||
# console.log(log_line) | ||
|
||
def date() -> datetime: | ||
""" Returns the current date """ | ||
return datetime.datetime.now() | ||
|
||
def generate_uid(data: str) -> str: | ||
""" Generates a UID based on the given data """ | ||
data = f"{data}{''.join([char for char in random.choices(string.ascii_letters)])}" | ||
|
||
hashed_data_salt = hashlib.md5(data.encode()).hexdigest() | ||
generated_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, hashed_data_salt) | ||
|
||
return str(generated_uuid) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
""" | ||
Debug messages used through out proxycrawler | ||
to help in debugging | ||
""" | ||
|
||
EXCEPTION_RAISED_WHEN_VALIDATING_PROXY = lambda proxy, error: f"[bold blue][DEBUG] [bold white]Exception raised when validating proxy:[bold green]{proxy}[bold white]. Error: {error}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
""" | ||
Errors messages used through out proxycrawler | ||
to log out to the end-user | ||
""" | ||
|
||
FILE_EXTENSION_NOT_SUPPORTED = f"[bold red][ERROR] [bold white]The provided proxy file's extension is not supported. Please make sure it's a plain text file (.txt) and try again" | ||
|
||
PROXY_FILE_DOESNT_EXIST = f"[bold red][ERROR] [bold white]The provided proxy file path doesn't seem to exists. Please verify it and try again" | ||
|
||
UNVALID_OUTPUT_FILE_PATH = lambda output_file_path: f"[bold red][ERROR] [bold white]Unvalid output file path [bold red]'{output_file_path}'[bold white]. Please change it and try again (or you can leave it empty)" | ||
|
||
FAILD_TO_REQUEST_GEONODE_API = lambda error: f"[bold red][ERROR] [bold white]Faild to request [bold green]geonode[bold white]'s API. Error: {error}" | ||
FAILD_TO_REQUEST_FREE_PROXY_LIST = lambda error: f"[bold red][ERROR] [bold white]Faild to request [bold green]free-proxy-list.net[bold white]. Error: {error}" | ||
|
||
UNVALID_COUNTRY_CODE = lambda country_code, supported_country_code: f"[bold red][ ! ] [bold white]Unvalid country code [bold red]'{country_code}'[bold white]. Supported country code: \n{supported_country_code}" | ||
|
||
UNVALID_PROXY_FORMAT = f"[bold red][ERROR] [bold white]Unvalid proxies format. Format should be [bold green]<protocol>://ip:port[bold white]. Please fix it and try again" | ||
|
||
UNVALID_PROXY_PROTOCOL = lambda protocol, protocols: f"[bold red][ERROR] [bold white]Unvalid proxy protocol [bold red]'{protocol}'. the supported protocols are [bold green]{protocols}[bold white] (you may keep --protocol null to test it on all protocols)" | ||
|
||
NO_PROXIES_WHERE_GATHERED = lambda proxies: f"[bold red][ERROR] [bold white]No proxies where gathered. proxies:[bold red]{proxies}[bold white]" | ||
|
||
NO_PROXIES_WHERE_FOUND_IN_THE_DATABASE = "[bold red][ERROR] [bold white]No proxies where found in the database" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
""" | ||
Info messages used through out proxycrawler | ||
to log out to the end-user | ||
""" | ||
|
||
USING_SERVICE = lambda service_name, service_url: f"[bold green][INFO] [bold white]Using service [bold green]'{service_name}'[bold white] with url:[bold red]'{service_url}'[bold white]" | ||
|
||
REQUESTING_GEONODE_API = lambda api_url, payload: f"[bold green][INFO] [bold white]Requesting [bold green]Geonode[bold white]'s API at api_url:[bold green]'{api_url}'[bold white] with payload: {payload}" | ||
|
||
REQUESTING_FREE_PROXY_LIST = lambda url: f"[bold green][INFO] [bold white]Scrapping [bold green]free-proxy-list[bold white] at url:[bold green]'{url}'[bold white]" | ||
|
||
FOUND_A_VALID_PROXY = lambda proxy: f"[bold green][INFO] [bold white]Found a valid proxy: [bold green]{proxy.proxy}[bold white]" | ||
|
||
PROXIES_SAVED_IN_PATHS = lambda output_file_paths: "[bold green][INFO] [bold white]Proxies saved in the following files:{}".format("".join([f"\n\t[bold green]->[bold white] {path}" for path in output_file_paths])) | ||
|
||
FETCHING_AND_VALIDATING_PROXIES_FROM_DATABASE = f"[bold green][INFO] [bold white]Fetching and validating proxies from the database" | ||
|
||
FETCHED_PROXIES_FROM_THE_DATABASE = lambda count: f"[bold green][INFO] [bold white]Fetched [bold green]'{count}'[bold white] proxies from the database. Validating them ..." | ||
|
||
VALIDATING_PROXIES_FROM_FILE = lambda proxies_count, proxy_file_path: f"[bold green][INFO] [bold white]Found [bold green]'{proxies_count}'[bold white] proxies from [bold green]'{proxy_file_path}'[bold white]. Validating them..." |
Oops, something went wrong.