-
Notifications
You must be signed in to change notification settings - Fork 0
/
plagiarism.py
69 lines (46 loc) · 1.47 KB
/
plagiarism.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
from bs4 import BeautifulSoup
from Levenshtein import distance
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)
term = input("Search Term: ")
term = term.replace(" ", "+")
text = input("Text: ")
intensity = int(input("Intensity (1-): "))
intensity = intensity*5+5
url = 'https://www.google.dz/search?q={}'.format(term)
driver.get(url)
links = driver.find_elements(By.CSS_SELECTOR, 'a')
sums = []
for link in links:
alink = link.get_attribute('href')
try:
if alink.startswith("https"):
sums.append(alink)
except Exception as e:
print()
driver.quit()
count = 0
for i in sums:
if count == intensity:
break
response = requests.get(i)
soup = BeautifulSoup(response.content, 'html.parser')
web_text = soup.text
min_distance = len(text)
for j in range(len(web_text) - len(text) + 1):
distance_i = distance(text, web_text[j:j+len(text)])
if distance_i < min_distance:
min_distance = distance_i
mind = round(0.2*len(text))
if min_distance <= mind:
print("Most likely plagiarized!\nMake sure to check the link and see for yourself.\n")
print(i)
break
else:
print()
count += 1