-
Notifications
You must be signed in to change notification settings - Fork 0
/
jssniffer.py
140 lines (110 loc) · 4.29 KB
/
jssniffer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# jssniffer 1.0 2023
# Author : Alessio M
# https://github.com/CptAlessio/jssniffer
import os
import requests
import shutil
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
# print list of javascript files the website is using
outputfoundfiles = True
# function to get all JavaScript files on a page
def get_js_files(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
js_files = set()
# find all script tags
for script in soup.find_all("script"):
src = script.get("src")
if src and src.endswith(".js"):
js_files.add(src)
return js_files
except Exception as e:
print(f"Error getting JS files for {url}: {str(e)}")
return set()
# function to crawl a website
def crawl_website(websiteurl):
visited = set()
js_files = set()
queue = [websiteurl]
while queue:
# get next page to visit from queue
websiteurl = queue.pop(0)
# check if page has already been visited
if websiteurl in visited:
continue
try:
domain = urlparse(websiteurl).netloc
page_js_files = get_js_files(websiteurl)
js_files.update(page_js_files)
visited.add(websiteurl)
print(".", end="", flush=True)
# find all links on the page
response = requests.get(websiteurl)
soup = BeautifulSoup(response.content, "html.parser")
links = soup.find_all("a")
# add links to queue
for link in links:
href = link.get("href")
if href:
# resolve relative URLs
href = urljoin(websiteurl, href)
# check if link is on the same domain and hasn't been visited
if urlparse(href).netloc == domain and href not in visited:
queue.append(href)
except Exception as e:
print(f"\nError crawling website {websiteurl}: {str(e)}")
# print the list of JavaScript files found
print(f"\n\nFound {len(js_files)} unique JavaScript files on {domain}:")
if outputfoundfiles:
for js_file in js_files:
print(f" - {js_file}")
# create a directory for the JavaScript files
js_dir = os.path.join(os.getcwd(), "js_files")
if not os.path.exists(js_dir):
os.mkdir(js_dir)
# download and search for keywords in each JavaScript file
keywords = ["token"]
# create list of files with keywords
fileswithkeyword = []
for js_file in js_files:
file_url = urljoin(websiteurl, js_file)
file_name = os.path.basename(js_file)
file_path = os.path.join(js_dir, file_name)
if not os.path.exists(file_path):
response = requests.get(file_url)
with open(file_path, "wb") as f:
f.write(response.content)
js_dir = os.path.join(os.getcwd(), "js_files")
for file_name in os.listdir(js_dir):
file_path = os.path.join(js_dir, file_name)
with open(file_path, "rb") as f:
try:
contents = f.read().decode()
for keyword in keywords:
if keyword.lower() in contents.lower():
# print(f"File {file_name} contains keyword '{keyword}'")
fileswithkeyword.append(file_name)
except UnicodeDecodeError:
os.remove(file_path)
print(f"Error decoding {file_path}, skipping and removing file...")
continue
# remove duplicates
unique_list = list(set(fileswithkeyword))
# print unique files
for file in unique_list:
print(f"File {file} contains one or more keywords")
# delete all javascript files if no keyword found
if os.path.exists(js_dir):
confirm = input(f"Are you sure you want to delete {js_dir}? (y/n): ")
if confirm.lower() == "y":
shutil.rmtree(js_dir)
print(f"Deleted {js_dir}")
else:
print(f"{js_dir} was not deleted.")
# clear console
os.system('cls' if os.name == 'nt' else 'clear')
# run the script
url = input("Enter a URL to crawl: ")
crawl_website(url)