forked from klb3713/cw_word_embedding
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
94 lines (66 loc) · 2.44 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding: utf-8 -*-
__author__ = 'klb3713'
# Not actually used directly, just for convenience
DATA_DIR = "/home/klb3713/workspace/cw_word_embedding/data/"
## 32-bit for the GPU
##import theano.config as config
##floatX = config.floatX
#floatX = 'float32'
TRAIN_FILE = DATA_DIR + "corpus"
# SAMPLE_FILE = DATA_DIR + "samples"
SAMPLE_FILE = DATA_DIR + "text8_samples"
# Should we induce an embedding for OOV words?
INCLUDE_UNKNOWN_WORD = True
UNKNOWN_WORD = "*UNKNOWN*"
SYMBOL_WORD = "*SYMBOL*"
PADDING_WORD = "*PADDING*"
EPOCH = 20
VOCABULARY_FILE = DATA_DIR + "text8_voc.txt"
# VOCABULARY_FILE = DATA_DIR + "vocabulary.pkl"
SAVE_VOCABULARY = DATA_DIR + "vocabulary.txt"
WORD_COUNT = 5
VECTOR_FILE = "vector"
# Number of examples per minibach
MINIBATCH_SIZE = 100
# Randomly initialize embeddings uniformly in the range [-this value, +this value]
INITIAL_EMBEDDING_RANGE = 0.01
# l1 penalty appliedto C&W embeddings
CW_EMBEDDING_L1_PENALTY = 0.
NORMALIZE_EMBEDDINGS = False
#UPDATES_PER_NORMALIZE_EMBEDDINGS = 1000
# Number of validation examples
VALIDATION_EXAMPLES = 1000
# What percent of noise examples should we use for computing the logrank
# during validation?
# This is a speed optimization.
PERCENT_OF_NOISE_EXAMPLES_FOR_VALIDATION_LOGRANK = 0.01
#NGRAMS = {(1, 5000) = join(DATA_DIR, "1grams-wikitext-5000.json.gz"),
#(1, 10000) = join(DATA_DIR, "1grams-wikitext-10000.json.gz"),
#(1, 20000) = join(DATA_DIR, "1grams-wikitext-20000.json.gz")}
# Number of instances of each ngram to add, for smoothing.
TRAINING_NOISE_SMOOTHING_ADDITION = 0
# Each embedded word representation has this width
EMBEDDING_SIZE = 100
# Predict with a window of five words at a time
WINDOW_SIZE = 5
HIDDEN_SIZE = 100
# = Scaling value to control range for weight initialization
#SCALE_INITIAL_WEIGHTS_BY = math.sqrt(3)
SCALE_INITIAL_WEIGHTS_BY = 1
# Which activation function to use?
#ACTIVATION_FUNCTION="sigmoid"
#ACTIVATION_FUNCTION="tanh"
ACTIVATION_FUNCTION = "softsign"
# LEARNING_RATE = 0.000000011
LEARNING_RATE = 0.001
# The learning rate for the embeddings
# EMBEDDING_LEARNING_RATE = 0.00000000034
EMBEDDING_LEARNING_RATE = 0.01
## number of (higher-order) quadratic filters for James's neuron
#NUMBER_OF_QUADRATIC_FILTERS=0
## We use this scaling factor for initial weights of quadratic filters,
## instead of SCALE_INITIAL_WEIGHTS_BY
## @note = Try between 10 and 0.01
#SCALE_QUADRATIC_INITIAL_WEIGHTS_BY = 1
# Validate after this many examples
VALIDATE_EVERY = 10000000