forked from klb3713/cw_word_embedding
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
executable file
·69 lines (52 loc) · 2.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
__author__ = 'klb3713'
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import logging
import config
import samples
import state
import model_gpu
# import cProfile, pstats, StringIO
logger = logging.getLogger(__name__)
def train(debug=False):
run_dir = state.creat_run_dir()
# logfile = os.path.join(run_dir, "log")
# logging.basicConfig(filename=logfile, filemode="w", level=logging.DEBUG)
logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO)
logger.info("INITIALIZING...")
cw_model = model_gpu.Model()
cnt = 0
epoch = 1
train_mini_batchs = samples.TrainingMiniBatchStream()
logger.info("INITIALIZING TRAINING STATE")
while epoch <= config.EPOCH:
logger.info("STARTING TRAINING...")
logger.info("STARTING EPOCH #%d" % epoch)
# pr = cProfile.Profile()
# pr.enable()
for batch in train_mini_batchs:
cnt += len(batch)
cw_model.train(batch)
if debug and cnt % (int(100000./config.MINIBATCH_SIZE) * config.MINIBATCH_SIZE) == 0:
logger.info("FINISH TRAINED %d SAMPLES of epoch #%d." % (cnt, epoch))
# pr.disable()
# s = StringIO.StringIO()
# sortby = 'cumulative'
# ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
# ps.print_stats()
# print s.getvalue()
# exit()
# save embedding for every epoch
cw_model.save_word2vec_format(os.path.join(run_dir, config.VECTOR_FILE + '_epoch%d.bin' % epoch), binary=True)
cw_model.save_word2vec_format(os.path.join(run_dir, config.VECTOR_FILE + '_epoch%d.txt' % epoch), binary=False)
logger.info("After #%d epoch updates, train loss: %f" % (epoch, cw_model.train_loss))
logger.info("After #%d epoch updates, train error: %d" % (epoch, cw_model.train_err))
logger.info("After #%d epoch updates, train loss nonzero: %d" % (epoch, cw_model.train_lossnonzero))
cw_model.reset()
logger.info("FINISH TRAIN EPOCH #%d" % epoch)
train_mini_batchs = samples.TrainingMiniBatchStream()
epoch += 1
if __name__ == "__main__":
train(debug=True)