-
Notifications
You must be signed in to change notification settings - Fork 2
/
DCC.py
96 lines (70 loc) · 3.52 KB
/
DCC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
class DCC:
def __init__(self, dataset):
dataset.minmax_arc_lengths = []
working_df = dataset.dataframe.copy()
scaler = MinMaxScaler((0, 1))
working_df[dataset.attribute_names] = scaler.fit_transform(working_df[dataset.attribute_names])
dataset.normalized_dataframe = working_df
working_frame = working_df.copy()
# strip last column
working_frame = working_frame.iloc[:, 0:-1]
scaler = MinMaxScaler((0, 1))
attributes_scaled = scaler.fit_transform(working_df[dataset.attribute_names])
# Prepare the data for LDA
X = attributes_scaled
y = working_df['class'].values
# Fit LDA model first plot of DCC only per data loaded
if not dataset.fitted:
lda = LinearDiscriminantAnalysis()
lda.fit(X, y)
lda_coefs = np.abs(lda.coef_).mean(axis=0)
dataset.fitted = True
dataset.coefs = lda_coefs
# sort the attributes by the coefficients
sorted_indices = np.argsort(lda_coefs)
dataset.attribute_names = list(np.array(dataset.attribute_names)[sorted_indices])
dataset.attribute_order = sorted_indices
dataset.coefs = dataset.coefs[sorted_indices]
coefArr = dataset.coefs / 100
for index, col in enumerate(working_frame.columns):
columnCoef = coefArr[index]
working_df[col] = columnCoef * working_df[col]
section_array = np.linspace(start=0, stop=1, num=dataset.vertex_count)
for class_index, class_name in enumerate(dataset.class_names):
base_radius = (dataset.attribute_count / (2 * np.pi))
# Adjust the radius based on class index
if class_index < 2:
# First two classes share the first axis
radius_factor = 1
else:
# Subsequent classes each get their own axis, scaling geometrically
scale_factor = 2.1 # Adjust this factor to control the rate of radius increase
radius_factor = scale_factor * (class_index - 1)
radius = base_radius * radius_factor
df_name = working_df[working_df['class'] == class_name]
df_name = df_name.drop(columns='class', axis=1)
x_coord = np.tile(section_array, reps=len(df_name.index))
y_coord = df_name.to_numpy().ravel()
MinE = dataset.attribute_count
MaxE = 0
arc_length = 0
arc_rule = 0
attribute_length = dataset.attribute_count * dataset.count_per_class[class_index]
for i in range(attribute_length):
if arc_rule >= dataset.attribute_count:
arc_length = 0
arc_rule = 0
arc_length += y_coord[i]
center_angle = arc_length * 360 / (2 * np.pi * radius) * radius_factor
center_angle = np.pi * center_angle / 180
x_coord[i] = radius * np.sin(center_angle)
y_coord[i] = radius * np.cos(center_angle)
arc_rule += 1
dataset.minmax_arc_lengths.append(MinE)
dataset.minmax_arc_lengths.append(MaxE)
pos_array = np.column_stack((x_coord, y_coord))
dataset.positions.append(pos_array)
dataset.axis_count = dataset.attribute_count