-
Notifications
You must be signed in to change notification settings - Fork 1
/
LBC.py
90 lines (76 loc) · 2.7 KB
/
LBC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from utils import tokenize
class LBC:
booster_words = [
'muito',
'super',
]
downtoner_words = [
'pouco',
'apenas'
]
negative_words = [
'não',
'nem'
]
BOOST_POLARITY = 3
def __init__(self, sentiment_lexicon_file):
self.sentiment_lexicon = self.read_lexicon(
self.liwc_reader, sentiment_lexicon_file)
def read_lexicon(self, reader, lexicon_file):
return reader(lexicon_file)
def liwc_reader(self, lexicon_file):
sentiment_lex = {}
posemo = '126'
negemo = '127'
words_tags = open(lexicon_file, 'r').readlines()[66:]
for entry in words_tags:
w_tags = entry.split('\t')
word = w_tags[0]
if word[-1] != '*' \
and word not in self.booster_words \
and word not in self.downtoner_words \
and word not in self.negative_words:
if posemo in w_tags:
sentiment_lex[word] = 1
elif negemo in w_tags:
sentiment_lex[word] = -1
return sentiment_lex
def get_polarity(self, word):
if word in self.sentiment_lexicon:
return self.sentiment_lexicon[word]
return 0
def context_polarity(self, tokens, sent_word_idx):
negation = False
booster = False
downtoner = False
sentiment_word = tokens[sent_word_idx]
word_polarity = self.get_polarity(sentiment_word)
if len(list(set(tokens[:sent_word_idx]) &
set(self.negative_words))) > 0:
negation = True
if len(list(set(tokens[:sent_word_idx]) &
set(self.booster_words))) > 0:
booster = True
if len(list(set(tokens[:sent_word_idx]) &
set(self.downtoner_words))) > 0:
downtoner = True
if negation:
if downtoner:
return self.BOOST_POLARITY * word_polarity
if booster:
return 1 / self.BOOST_POLARITY * word_polarity
return -1 * word_polarity
elif booster:
return self.BOOST_POLARITY * word_polarity
elif downtoner:
return 1 / self.BOOST_POLARITY * word_polarity
return word_polarity
def classify(self, text):
tokens = tokenize(text)
return sum([self.context_polarity(tokens, idx)
for idx in range(len(tokens))])
if __name__ == '__main__':
classifier = LBC('./data/LIWC2007_Portugues_win.dic.txt')
text = 'não é legal o filme'
result = classifier.classify(text)
print(text + '\t' + str(result))