Skip to content

Commit

Permalink
Fix initialization for position_dependent_vector_size < vector_size
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Nov 30, 2020
1 parent fa9dfcf commit 8f9110a
Showing 1 changed file with 49 additions and 14 deletions.
63 changes: 49 additions & 14 deletions gensim/models/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,8 +484,8 @@ def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, vector_size=100
bucket = 0

self.wv = FastTextKeyedVectors(
vector_size, position_dependent_weights, position_dependent_vector_size, min_n, max_n,
bucket)
vector_size, self.position_dependent_weights, position_dependent_vector_size,
min_n, max_n, bucket)
self.wv.bucket = bucket

super(FastText, self).__init__(
Expand Down Expand Up @@ -1366,10 +1366,6 @@ def init_ngrams_weights(self, seed, window):

rand_obj = np.random.default_rng(seed=seed) # use new instance of numpy's recommended generator/algorithm

vocab_shape = (len(self), self.vector_size)
ngrams_shape = (self.bucket, self.vector_size)
positions_shape = (2 * window, self.position_dependent_vector_size)
#
# We could have initialized vectors_ngrams at construction time, but we
# do it here for two reasons:
#
Expand All @@ -1379,13 +1375,38 @@ def init_ngrams_weights(self, seed, window):
# time because the vocab is not initialized at that stage.
#
if self.position_dependent_weights:
vocab_shape = (len(self), self.position_dependent_vector_size)
ngrams_shape = (self.bucket, self.position_dependent_vector_size)
positions_shape = (2 * window, self.position_dependent_vector_size)
hi = sqrt(sqrt(3.0) / self.vector_size)
lo = -hi
self.vectors_positions = rand_obj.uniform(lo, hi, positions_shape).astype(REAL)
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)
if self.vector_size > self.position_dependent_vector_size:
vocab_shape = (len(self), self.vector_size - self.position_dependent_vector_size)
ngrams_shape = (self.bucket, self.vector_size - self.position_dependent_vector_size)
lo, hi = -1.0 / self.vector_size, 1.0 / self.vector_size
self.vectors_vocab = np.concatenate(
(
self.vectors_vocab,
rand_obj.uniform(lo, hi, vocab_shape).astype(REAL),
),
axis=-1,
)
self.vectors_ngrams = np.concatenate(
(
self.vectors_ngrams,
rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL),
),
axis=-1,
)
else:
vocab_shape = (len(self), self.vector_size)
ngrams_shape = (self.bucket, self.vector_size)
lo, hi = -1.0 / self.vector_size, 1.0 / self.vector_size
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)
self.vectors_vocab = rand_obj.uniform(lo, hi, vocab_shape).astype(REAL)
self.vectors_ngrams = rand_obj.uniform(lo, hi, ngrams_shape).astype(REAL)

def update_ngrams_weights(self, seed, old_vocab_len):
"""Update the vocabulary weights for training continuation.
Expand All @@ -1408,8 +1429,13 @@ def update_ngrams_weights(self, seed, old_vocab_len):
rand_obj.seed(seed)

new_vocab = len(self) - old_vocab_len
self.vectors_vocab = _pad_random(self.vectors_vocab, new_vocab, rand_obj,
squared=self.position_dependent_weights)
self.vectors_vocab = _pad_random(
self.vectors_vocab,
new_vocab,
rand_obj,
position_dependent_weights=self.position_dependent_weights,
position_dependent_vector_size=self.position_dependent_vector_size,
)

def init_post_load(self, fb_vectors):
"""Perform initialization after loading a native Facebook model.
Expand Down Expand Up @@ -1476,16 +1502,25 @@ def recalc_char_ngram_buckets(self):
)


def _pad_random(m, new_rows, rand, squared=False):
def _pad_random(m, new_rows, rand, position_dependent_weights=False, position_dependent_vector_size=0):
"""Pad a matrix with additional rows filled with random values."""
_, columns = m.shape
shape = (new_rows, columns)
if squared:
if position_dependent_weights:
shape = (new_rows, position_dependent_vector_size)
high = sqrt(sqrt(3.0) / columns)
low = -high
suffix = rand.uniform(low, high, shape).astype(REAL)
if columns > position_dependent_vector_size:
shape = (new_rows, columns - position_dependent_vector_size)
low, high = -1.0 / columns, 1.0 / columns
suffix = np.concatenate(
(suffix, rand.uniform(low, high, shape).astype(REAL)),
axis=-1,
)
else:
shape = (new_rows, columns)
low, high = -1.0 / columns, 1.0 / columns
suffix = rand.uniform(low, high, shape).astype(REAL)
suffix = rand.uniform(low, high, shape).astype(REAL)
return vstack([m, suffix])


Expand Down

0 comments on commit 8f9110a

Please sign in to comment.