-
Notifications
You must be signed in to change notification settings - Fork 5
/
launch_test.py
executable file
·126 lines (106 loc) · 4.52 KB
/
launch_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/python
import pickle
import string
import numpy
import getopt
import sys
import random
import time
import re
import pprint
import codecs
import datetime, os
import scipy.io
import nltk
import numpy
import optparse
# bash scrip to terminate all sub-processes
# kill $(ps aux | grep 'python infag' | awk '{print $2}')
def parse_args():
parser = optparse.OptionParser()
parser.set_defaults(
# parameter set 1
input_directory=None,
model_directory=None,
non_terminal_symbol="Word",
number_of_samples=10,
number_of_processes=0,
)
# parameter set 1
parser.add_option("--input_directory", type="string", dest="input_directory",
help="input directory [None]")
parser.add_option("--model_directory", type="string", dest="model_directory",
help="model directory [None]")
parser.add_option("--non_terminal_symbol", type="string", dest="non_terminal_symbol",
help="non-terminal symbol [Word]")
parser.add_option("--number_of_samples", type="int", dest="number_of_samples",
help="number of samples [10]")
parser.add_option("--number_of_processes", type="int", dest="number_of_processes",
help="number of processes [0]")
(options, args) = parser.parse_args()
return options
def main():
options = parse_args()
# parameter set 1
assert (options.input_directory is not None)
input_directory = options.input_directory
assert (options.model_directory is not None)
model_directory = options.model_directory
assert (options.non_terminal_symbol is not None)
non_terminal_symbol = options.non_terminal_symbol
assert (options.number_of_samples > 0)
number_of_samples = options.number_of_samples
assert (options.number_of_processes >= 0)
number_of_processes = options.number_of_processes
print("========== ========== ========== ========== ==========")
# parameter set 1
print("input_directory=" + input_directory)
print("model_directory=" + model_directory)
print("non_terminal_symbol=" + non_terminal_symbol)
print("number_of_samples=" + str(number_of_samples))
print("number_of_processes=" + str(number_of_processes))
print("========== ========== ========== ========== ==========")
# Documents
train_docs = []
input_stream = open(os.path.join(input_directory, 'train.dat'), 'r')
for line in input_stream:
train_docs.append(line.strip())
input_stream.close()
print("successfully load %d training documents..." % (len(train_docs)))
refer_docs = []
input_stream = open(os.path.join(input_directory, 'truth.dat'), 'r')
for line in input_stream:
refer_docs.append(line.strip())
input_stream.close()
print("successfully load %d testing documents..." % (len(refer_docs)))
for model_file in os.listdir(model_directory):
if not model_file.startswith("model-"):
continue
model_file_path = os.path.join(model_directory, model_file)
try:
cpickle_file = open(model_file_path, 'rb')
infinite_adaptor_grammar = pickle.load(cpickle_file)
print("successfully load model from %s" % (model_file_path))
cpickle_file.close()
except ValueError:
print("warning: unsuccessfully load model from %s due to value error..." % (model_file_path))
continue
except EOFError:
print("warning: unsuccessfully load model from %s due to EOF error..." % (model_file_path))
continue
non_terminal = nltk.grammar.Nonterminal(non_terminal_symbol)
# assert non_terminal in infinite_adaptor_grammar._adapted_non_terminals, (
# non_terminal, infinite_adaptor_grammar._adapted_non_terminals)
inference_parameter = (refer_docs, non_terminal, model_directory, model_file)
infinite_adaptor_grammar.inference(train_docs, inference_parameter, number_of_samples, number_of_processes)
'''
#from launch_train import shuffle_lists
#shuffle_lists(train_docs, refer_docs)
if number_of_processes==0:
infinite_adaptor_grammar.inference(train_docs, refer_docs, non_terminal, model_directory, model_file, number_of_samples)
else:
from hybrid_process import inference_process
inference_process(infinite_adaptor_grammar, train_docs, refer_docs, non_terminal, model_directory, number_of_samples, number_of_processes)
'''
if __name__ == '__main__':
main()