-
Notifications
You must be signed in to change notification settings - Fork 0
/
stopWords.py
27 lines (22 loc) · 856 Bytes
/
stopWords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from Stemmer import Stemmer
stemmer = Stemmer('english')
class StopWords:
"Load stop words from stopWords.txt to a set"
def __init__(self):
self.stopWordsList = []
self.stopWordsSet = set()
def readStopWords(self):
with open("./stopWords.txt") as input_file:
for input_line_raw in input_file:
input_tokens = input_line_raw.split(', ')
self.stopWordsList.extend(input_tokens)
input_tokens = list(map(stemmer.stemWord, input_tokens))
self.stopWordsSet = set(self.stopWordsList)
def isStopWord(self, token):
try:
if token in self.stopWordsSet or len(token) < 3 or len(token) > 20:
return True
else:
return False
except IOError:
print "StopWordsSet not found"