-
Notifications
You must be signed in to change notification settings - Fork 0
/
url-extractor.py
44 lines (37 loc) · 1.23 KB
/
url-extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Author Ryan Arya Pramudya
# My GitHub https://github.com/ryanaryap
import re
# Membaca daftar URL dari file teks
def read_urls_from_file(file_path):
with open(file_path, 'r') as file:
urls = file.readlines()
# Membersihkan URL dari karakter newline dan spasi tambahan
urls = [url.strip() for url in urls]
return urls
# Mengambil URL depan dan menambahkan protokol
def extract_domain(url):
pattern = r"(https?://)([\w.-]+)"
match = re.match(pattern, url)
if match:
protocol = match.group(1)
domain = match.group(2)
return protocol + domain
else:
return None
# Meminta pengguna untuk memasukkan path file
file_path = input("Masukkan path file teks: ")
# Membaca daftar URL dari file
urls = read_urls_from_file(file_path)
# Ekstraksi URL depan dan menambahkan protokol
output_lines = []
for url in urls:
domain = extract_domain(url)
if domain:
output_lines.append(domain)
else:
output_lines.append("URL tidak valid: " + url)
# Menyimpan output ke dalam file result.txt
output_file_path = "result.txt"
with open(output_file_path, 'w') as output_file:
output_file.write('\n'.join(output_lines))
print("Output telah disimpan dalam file", output_file_path)