-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunctions_ex8.py
63 lines (53 loc) · 1.93 KB
/
functions_ex8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""Functions For Exercise 8 (still doesn't have vector quantization"""
def initialize_clusters(data, k):
"""initialize the k cluster centers (the means).
input:
data: original data with shape (num_sample, num_feature).
k: predefined number of clusters for the k-means algorithm.
output:
a numpy array with shape (k, num_feature)
"""
seed = 162.
indices = np.random.choice(data.shape[0],k)
centers = data[indices]
return centers
def build_distance_matrix(data, mu):
"""build a distance matrix.
return
distance matrix:
row of the matrix represents the data point,
column of the matrix represents the k-th cluster.
"""
dist = sp.spatial.distance.cdist(data,mu)
return dist
def update_kmeans_parameters(data, mu_old):
"""update the parameter of kmeans
return:
losses: loss of each data point with shape (num_samples, 1)
assignments: assignments vector z with shape (num_samples, 1)
mu: mean vector mu with shape (k, num_features)
"""
#Initialize stuff
z = np.zeros((data.shape[0],mu_old.shape[0]))#assignment vector
dist = build_distance_matrix(data, mu_old)
mu = np.zeros((mu_old.shape[0],data.shape[1]))
#Start looping
for i in range(data.shape[0]):#loop through n rows of data x
ind = np.argmin(dist[i,:])
z[i,ind] = 1.
for j in range(mu_old.shape[0]):#loop over k rows of mu
sum_ = np.sum(z[:,j])
prod = z[:,j].T.dot(data)
mu[j,:] = (prod/sum_)
#Compute loss
diff = data.T - mu.T.dot(z.T)
losses = np.sum(np.square(diff))
return losses, z, mu
#Image Compression Functions
def preprocess_image(original_image):
"""preprocess the image."""
rows = original_image[0]
cols = original_image[1]
processed_image = np.reshape(rows*cols, -1)
return processed_image
#Should have the kmean_compression function here