-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentiment.py
92 lines (72 loc) · 2.87 KB
/
sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
This script is written to do sentimental analysis on mandarin tweets,
this uses TextBlob to translate and gauge the sentiment.
Author: Farhaan Bukhsh <[email protected]>
"""
import argparse
import csv
import re
import textblob
from textblob import TextBlob
def check_post_is_japanese(content):
regex = "/[\u3000-\u303F]|[\u3040-\u309F]|[\u30A0-\u30FF]|[\uFF00-\uFFEF]|[\u4E00-\u9FAF]|[\u2605-\u2606]|[\u2190-\u2195]|\u203B/g"
if re.search(regex, content) is None:
return False
return True
def predict_sentiments(text):
if check_post_is_japanese(text):
try:
blob = TextBlob(text).translate(to="en")
except textblob.exceptions.NotTranslated:
blob = TextBlob(text)
else:
blob = TextBlob(text)
print("English")
if blob.sentiment.polarity > 0.4:
return "Positive", blob.sentiment.polarity
elif blob.sentiment.polarity < -0.63:
return "Negative", blob.sentiment.polarity
else:
return "Neutral", blob.sentiment.polarity
def read_posts(input_file):
with open(input_file, 'r') as datafile:
reader = csv.reader(datafile)
header = next(reader)
posts = [row for row in reader]
return header, posts
def calculate_sentiment_score(posts):
for index, data in enumerate(posts):
post_content = data[7]
print("Row number:" + str(index) + "\n")
sentiment, polarity = predict_sentiments(post_content)
data.extend([sentiment, polarity])
def write_to_file(file_name, header, rows):
with open(file_name, 'w') as outfile:
writer = csv.writer(outfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(header)
for row in rows:
writer.writerow(row)
def write_sentiment_output(file_name, header, posts):
header = header + ['sentiment', 'polarity score']
write_to_file(file_name, header, posts)
def overwrite_posts_with_sentiment(file_name, header, posts):
# Remove last 2 column data. sentiment type and scores
# and set sentiment type
for post in posts:
sentiment = post[-2]
del post[-2:]
post[8] = sentiment
file_name = file_name.replace(".csv", "") + "_sentiment_type.csv"
write_to_file(file_name, header, posts)
def csv_read_write(input_file, output_file):
header, posts = read_posts(input_file)
calculate_sentiment_score(posts)
write_sentiment_output(output_file, header, posts)
overwrite_posts_with_sentiment(input_file, header, posts)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--inputFile', help='Input csv file which has tweets')
parser.add_argument(
'--outputFile', help='Output file csv that has sentiments and polarity')
args = parser.parse_args()
csv_read_write(input_file=args.inputFile, output_file=args.outputFile)