-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
63 lines (51 loc) · 1.93 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import torch as t
import numpy as np
class AliasMultinomial(object):
"""
Fast sampling from a multinomial distribution.
https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
Code taken from: https://github.com/TropComplique/lda2vec-pytorch/blob/master/utils/alias_multinomial.py
"""
def __init__(self, probs):
"""
probs: a float tensor with shape [K].
It represents probabilities of different outcomes.
There are K outcomes. Probabilities sum to one.
"""
K = len(probs)
self.q = t.zeros(K)
self.J = t.LongTensor([0] * K)
# sort the data into the outcomes with probabilities
# that are larger and smaller than 1/K
smaller = []
larger = []
for kk, prob in enumerate(probs):
self.q[kk] = K * prob
if self.q[kk] < 1.0:
smaller.append(kk)
else:
larger.append(kk)
# loop though and create little binary mixtures that
# appropriately allocate the larger outcomes over the
# overall uniform mixture
while len(smaller) > 0 and len(larger) > 0:
small = smaller.pop()
large = larger.pop()
self.J[small] = large
self.q[large] = (self.q[large] - 1.0) + self.q[small]
if self.q[large] < 1.0:
smaller.append(large)
else:
larger.append(large)
self.q.clamp(0.0, 1.0)
self.J.clamp(0, K - 1)
def draw(self, N):
"""Draw N samples from the distribution."""
K = self.J.size(0)
r = t.LongTensor(np.random.randint(0, K, size=N))
q = self.q.index_select(0, r).clamp(0.0, 1.0)
j = self.J.index_select(0, r)
b = t.bernoulli(q)
oq = r.mul(b.long())
oj = j.mul((1 - b).long())
return (oq + oj).numpy()