-
Notifications
You must be signed in to change notification settings - Fork 1
/
unsupervisedLearning.m
119 lines (103 loc) · 4.94 KB
/
unsupervisedLearning.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
%% ------------------------------------------------------------------------
%% GAIT RECOGNITION BASED ON IMU DATA AND ML ALGORITHM
% Albi Matteo, Cardone Andrea, Oselin Pierfrancesco
%
% Required packages:
% Parallel Computing Toolbox
% Neural Network Toolbox
% Signal Toolbox
% Statistics Toolbox
% -------------------------------------------------------------------------
%% ------------------------------------------------------------------------
%% GOAL OF THE FUNCTION
% Goal of this function is grouping data into clusters, according to
% their similarity
% -------------------------------------------------------------------------
clear ;
close all;
clc
addpath("include");
%% DATA IMPORTING
try
file01 = readtable('data/record_walk_7-12-21_caviglia/personaA4kmh.csv', "VariableNamingRule","preserve");
file02 = readtable('data/record_walk_7-12-21_caviglia/personaB4kmh.csv', "VariableNamingRule","preserve");
file03 = readtable('data/record_walk_7-12-21_caviglia/personaC4kmh.csv', "VariableNamingRule","preserve");
file04 = readtable('data/record_walk_7-12-21_caviglia/personaD4kmh.csv', "VariableNamingRule","preserve");
file05 = readtable('data/record_walk_7-12-21_caviglia/personaE4kmh.csv', "VariableNamingRule","preserve");
file06 = readtable('data/record_walk_7-12-21_caviglia/personaA6kmh.csv', "VariableNamingRule","preserve");
file07 = readtable('data/record_walk_7-12-21_caviglia/personaB6kmh.csv', "VariableNamingRule","preserve");
file08 = readtable('data/record_walk_7-12-21_caviglia/personaC5_8kmh.csv', "VariableNamingRule","preserve");
file09 = readtable('data/record_walk_7-12-21_caviglia/personaD6kmh.csv', "VariableNamingRule","preserve");
file10 = readtable('data/record_walk_7-12-21_caviglia/personaE6kmh.csv', "VariableNamingRule","preserve");
%adding cutted lab data
file11 = readtable('data/record_lab_15-12-21/IMU1_1.csv', "VariableNamingRule","preserve");
file12 = readtable('data/record_lab_15-12-21/IMU1_2.csv', "VariableNamingRule","preserve");
file13 = readtable('data/record_lab_15-12-21/IMU2_1.csv', "VariableNamingRule","preserve");
file14 = readtable('data/record_lab_15-12-21/IMU3_1.csv', "VariableNamingRule","preserve");
file15 = readtable('data/record_lab_15-12-21_afternoon/IMU4_1.csv', "VariableNamingRule","preserve");
disp("Data successfully imported");
catch ME
if strcmp(ME.identifier, 'MATLAB:textio:textio:FileNotFound')
disp("ERROR: some data cannot be found");
return;
end
end
train = {file01, file02, file03, file04, file06, file07, file08, file09, file11, file12, file13, file14, file15};
test = {file05, file10};
acc_train = [0 0];
acc_test = [0 0];
%% START OF K-MEANS METHOD
% A for-cycle is used to run it twice. The first time no feature
% estimations are applied, the second time yes
for i=1:2
if i==1
%% [NO FEATURES] Labeling and preparing data to train and test the network
processed_train = dataPreprocessingUnsupervised(train);
processed_test = dataPreprocessingUnsupervised(test );
disp("Starting K_Means Method on raw data");
else
%% [FEATURES] Labeling and preparing data to train and test the network
processed_train = dataPreprocessingUnsupervised(train,'features',150);
processed_test = dataPreprocessingUnsupervised(test, 'features',150);
disp("Starting K_Means Method on feature data");
end
%% Setting data properly for unsupervised learning
Xtrain = processed_train(:,1:end-1);
Ytrain = processed_train(:,end);
Xtest = processed_test(:,1:end-1);
Ytest = processed_test(:, end);
%% Unsupervised Learning: k-Means
[idx, C] = kmeans(Xtrain, ...
4, ...
"Replicates", 60 ...
);
% Train accuracy is computed
acc_train(i) = sum(idx==Ytrain)./numel(idx);
%% Prediction for the unsupervised learning
[~,idx_test] = pdist2(C,Xtest,'euclidean','Smallest',1);
% Test accuracy is computed
acc_test(i) = sum(idx_test==Ytest')./numel(idx_test);
%%
f = figure(i);
gscatter(Xtrain(:,1),Xtrain(:,2),idx,'rbcg')
hold on
plot(C(:,1),C(:,2),'kx')
gscatter(Xtest(:,1),Xtest(:,2),idx_test,'bgmr','oooo')
legend('Cluster 1','Cluster 2','Cluster 3','Cluster 4','Cluster Centroid', ...
'Data classified to Cluster 1','Data classified to Cluster 2', ...
'Data classified to Cluster 3','Data classified to Cluster 4')
title("k-Means clustering")
xlabel("Acc(X)")
ylabel("Acc(y)")
hold off
end
%% Displaying the results
for i = 1:2
if i == 1
mode = "[NOFEATUR] ";
else
mode = "[FEATURES] ";
end
disp(mode + "Unsupervised [kMeans] train accuracy: " + num2str(acc_train(i)));
disp(mode + "Unsupervised [kMeans] test accuracy: " + num2str(acc_test(i)));
end