-
Notifications
You must be signed in to change notification settings - Fork 0
/
LSTM_model_loader.py
96 lines (80 loc) · 2.71 KB
/
LSTM_model_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
from keras.preprocessing import sequence
from collections import Counter
import os
import getEmbeddings2
import pickle
top_words = 5000
if __name__ == '__main__':
if not os.path.isfile('./xtr_shuffled.npy') or \
not os.path.isfile('./xte_shuffled.npy') or \
not os.path.isfile('./ytr_shuffled.npy') or \
not os.path.isfile('./yte_shuffled.npy'):
getEmbeddings2.clean_data()
xtr = np.load('./xtr_shuffled.npy')
xte = np.load('./xte_shuffled.npy')
y_train = np.load('./ytr_shuffled.npy')
y_test = np.load('./yte_shuffled.npy')
cnt = Counter()
x_train = []
for x in xtr:
x_train.append(x.split())
for word in x_train[-1]:
cnt[word] += 1
# Storing most common words
most_common = cnt.most_common(top_words + 1)
word_bank = {}
id_num = 1
for word, freq in most_common:
word_bank[word] = id_num
id_num += 1
# Encode the sentences
for news in x_train:
i = 0
while i < len(news):
if news[i] in word_bank:
news[i] = word_bank[news[i]]
i += 1
else:
del news[i]
y_train = list(y_train)
y_test = list(y_test)
# Delete the short news
i = 0
while i < len(x_train):
if len(x_train[i]) > 10:
i += 1
else:
del x_train[i]
del y_train[i]
# Generating test data
x_test = []
for x in xte:
x_test.append(x.split())
# Encode the sentences
for news in x_test:
i = 0
while i < len(news):
if news[i] in word_bank:
news[i] = word_bank[news[i]]
i += 1
else:
del news[i]
# Truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(x_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(x_test, maxlen=max_review_length)
# Convert to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)
print("Huh")
model = pickle.load(open("lstm_model.pickle", "rb"))
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy= %.2f%%" % (scores[1] * 100))
# Draw the confusion matrix
y_pred = model.predict_classes(X_test)
y_prob = model.predict_proba(X_test)
text_file = open("LSTM_results.txt", "w")
for idx, (read, truth, pred, prob) in enumerate(zip(xte, y_test, y_pred.flatten(), y_prob.flatten())):
text_file.write("idx: {}\n{}\ntruth: {}\npred: {} {}\n\n".format(idx, read, truth, pred, prob))
text_file.close()