-
Notifications
You must be signed in to change notification settings - Fork 3
/
evaluation.py
106 lines (75 loc) · 3.26 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
from random import shuffle
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import sys
import os
import time
import math
import pickle
from role_assignment_functions import *
# Functions for evaluating seq2seq models and TPDNs
use_cuda = torch.cuda.is_available()
# Given an encoder, a decoder, and an input, return the guessed output
# and the encoding of the input
def evaluate(encoder1, decoder1, example, input_to_output):
encoding = encoder1([example])
predictions = decoder1(encoding, len(example), [parse_digits(example)])
correct = input_to_output(example)
guessed_seq = []
for prediction in predictions:
topv, topi = prediction.data.topk(1)
ni = topi.item()
guessed_seq.append(ni)
return guessed_seq, encoding
# Given an encoder, decoder, evaluation set, and function for generating
# the correct outputs, return the number of correct predictions
# and the total number of predictions
def score(encoder1, decoder1, evaluation_set, input_to_output):
total_correct = 0
total = 0
for batch in evaluation_set:
for example in batch:
correct = input_to_output(example)
guess = evaluate(encoder1, decoder1, example, input_to_output)
if tuple(guess[0]) == tuple(correct):
total_correct += 1
total += 1
return total_correct, total
# This function takes a tensor product encoder and a standard decoder, as well as a sequence
# of digits as inputs. It then uses the tensor product encoder to encode the sequence and uses
# the standard decoder to decode it, and returns the result.
def evaluate2(encoder, decoder, example):
if use_cuda:
encoder_hidden = encoder(Variable(torch.LongTensor(example[0])).cuda().unsqueeze(0), Variable(torch.LongTensor(example[1])).cuda().unsqueeze(0))
else:
encoder_hidden = encoder(Variable(torch.LongTensor(example[0])).unsqueeze(0), Variable(torch.LongTensor(example[1])).unsqueeze(0))
predictions = decoder(encoder_hidden, len(example[0]), [parse_digits(example[0])])
guessed_seq = []
for prediction in predictions:
topv, topi = prediction.data.topk(1)
ni = topi.item()
guessed_seq.append(ni)
return guessed_seq
def score2(encoder, decoder, input_to_output, test_set, index_to_filler):
# Evaluate this TPR encoder for how well it can encode sequences in a way
# that our original mystery_decoder can decode
accurate = 0
total = 0
for batch in test_set:
for example in batch:
example = example[0]
pred = evaluate2(encoder, decoder, example)
if tuple(input_to_output([index_to_filler[x] for x in example[0]])) == tuple([str(x) for x in pred]):
accurate += 1
total += 1
# Gives how many sequences were properly decoded, out of the total number of test sequences
return accurate, total