-
Notifications
You must be signed in to change notification settings - Fork 9
/
game.py
executable file
·150 lines (124 loc) · 6.43 KB
/
game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import numpy as np
import json
from train_test import dl_model
import matplotlib.pyplot as plt
#class responsible for actual gameplay
class Game:
def __init__(self, char_to_id_pth):
#load character to id mapping dumped by dataloader
with open(char_to_id_pth, 'r') as f:
self.char_to_id = json.load(f)
#initialise model
self.dl_model = dl_model('test_one')
def play(self):
#store correctly and incorresclty predicted characters
misses, hits = [], []
print("Instructions: For every predicted letter,")
print("1. Type 'x' if letter does not exists in your word.")
print("2. If it does exist, input the places at which it exists.")
print("For e.g., if the word is hello and model's prediction is 'l', type 3 4 in the prompt. If prediction is 'a', type x.\n")
print("Think of a word and input the number of characters:")
num_chars = int(input())
#initialise target string with blanks
predicted_string = ['*']*num_chars
while 1:
#get sorted predictions according to probability
best_chars = self.dl_model.predict(predicted_string, misses, self.char_to_id)
#best char is the one with highest probability AND which does not belong to list of incorrectly predicted chars
#AND not already present in the target string
for pred in best_chars:
if pred not in misses and pred not in predicted_string:
best_char = pred
break
#predict and ask user for feedback
print("Prediction: " + best_char + "\nWhat do ya think?")
while 1:
#get input
inp = input().strip()
if inp == 'x': #denotes character not present
output = 0
break
try:
#if it is present, user returns a list of indices at which character is present (note that indexing begins from 1 for user feedback)
output = [int(x) for x in inp.split(' ')]
#update target string
for pred in output:
predicted_string[pred-1] = best_char
break
except:
print("Invalid format. Please refer to instructions.")
continue
if output == 0: #indicates miss
print("Miss")
#append to missed characters list
misses.append(best_char)
else:
#correctly predicted
if '*' in predicted_string: #indicates game is not yet over since we still have unknown characters in target string
print("Guess correct! New target: " + ''.join(predicted_string))
hits.append(best_char)
else: #indicates game is over. Report number of misses and return
print("Game over. Predicted word: " + ''.join(predicted_string))
print("Total misses: ", len(misses))
return misses
def test_performance(self, dataset_pth='dataset/20k.txt', num_trials=100, min_word_len=3, plot=True):
with open(dataset_pth, 'r') as f:
words = f.readlines()
words = [x.strip() for x in words if len(x) >= min_word_len + 1] #+1 since /n is yet to be stripped
#randomly choose words from corpus
to_test = np.random.choice(words, num_trials)
print("Testing performance on the following words:", to_test)
#stores information about average misses for various lengths of target words
len_misses_dict = {}
for word in to_test:
#intialise dict
if len(word) not in len_misses_dict:
len_misses_dict[len(word)] = {'misses': 0, 'num': 0}
hits, misses = [], []
predicted_string = ['*']*len(word)
#keep predicting
while 1:
#get sorted predictions according to probability
best_chars = self.dl_model.predict(predicted_string, misses, self.char_to_id)
#best char is the one with highest probability AND which does not belong to list of incorrectly predicted chars
#AND not already present in the target string
for pred in best_chars:
if pred not in misses and pred not in predicted_string:
best_char = pred
break
found_char = False
if best_char in word: #denotes character not present
#if it is present, user returns a list of indices at which character is present (note that indexing begins from 1 for user feedback)
indices = []
for i, c in enumerate(word):
if c == best_char:
indices.append(i)
#update target string
for pred in indices:
predicted_string[pred] = best_char
found_char = True
if found_char is False: #indicates miss
#append to missed characters list
misses.append(best_char)
else:
#correctly predicted
if '*' in predicted_string: #indicates game is not yet over since we still have unknown characters in target string
hits.append(best_char)
else: #indicates game is over. Report number of misses and return
len_misses_dict[len(word)]['misses'] += len(misses)
len_misses_dict[len(word)]['num'] += 1
break
len_misses_list = [(l, x['misses']/x['num']) for l, x in len_misses_dict.items()]
len_misses_list = sorted(len_misses_list, key = lambda x: x[0])
print("Average number of misses:", len_misses_list)
#plot performance
if plot:
plt.bar([x[0] for x in len_misses_list], [x[1] for x in len_misses_list])
plt.xlabel('Length of word')
plt.ylabel('Average misses (lesser the better)')
plt.title("Comparing performance as a function of word length")
plt.xticks(list(range(min_word_len, len_misses_list[-1][0])))
plt.show()
a = Game('pickle/char_to_id.json')
# a.play()
a.test_performance()