-
Notifications
You must be signed in to change notification settings - Fork 0
/
slimmable_ops.py
217 lines (190 loc) · 7.46 KB
/
slimmable_ops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
from torch import nn
def make_divisible(v, divisible_factor=8, min_value=1):
"""
forked from slim:
https://github.com/tensorflow/models/blob/\
0344c5503ee55e24f0de7f37336a6e08f10976fd/\
research/slim/nets/mobilenet/mobilenet.py#L62-L69
"""
if min_value is None:
min_value = divisible_factor
new_v = max(min_value, int(v + divisible_factor / 2) // divisible_factor * divisible_factor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisible_factor
return new_v
class SlimmableConv2d(nn.Conv2d):
"""
SuperNet Convolution Module.
automatically adapt to any number of input channels.
capable of conducting convolutions according to designate width.
Args:
us (bool): 是否可以裁剪
Default: True
divisible_factor (int): 使out_channel为几的整数倍
Default: 8
divisor (bool): 裁剪时channel切割份数
Default: 8
linked (int or string or None): 用于裁剪宽度时skip connection的channel对齐的标志
Default: None
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True,
slimmable=True,
divisible_factor=8,):
super(SlimmableConv2d, self).__init__(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias)
self.depthwise = groups == in_channels
self.out_channels = out_channels
self.out_channels_max = out_channels
self.slimmable = slimmable
self.divisible_factor = divisible_factor
self.width_mult = 1
def forward(self, input):
input_size = input.size()
in_channels = input_size[1]
if self.slimmable:
out_channels = make_divisible(self.out_channels * self.width_mult, self.divisible_factor)
# depthwise
self.groups = in_channels if self.depthwise else 1
out_channels = in_channels if self.depthwise else out_channels
else:
out_channels = self.out_channels
weight = self.weight[:out_channels,:in_channels,:, :]
if self.bias is not None:
bias = self.bias[:out_channels]
else:
bias = self.bias
y = nn.functional.conv2d(input, weight, bias, self.stride,
self.padding, self.dilation, self.groups)
return y
class SlimmableBatchNorm2d(nn.BatchNorm2d):
"""
SuperNet BatchNorm2d Module.
automatically adapt to any number of input channels.
capable of conducting batch norm according to designate width.
record different mean and varience using submodule batch norm layer.
learn a shared-weight to adapt to different width.
Args:
width_mult_list (list[float]): 可接受宽度的列表
Default: [1, 0.25]
divisible_factor (int): 使out_channel为几的整数倍
Default: 8
"""
def __init__(self, num_features, **kwargs):
super().__init__(num_features, affine=True, track_running_stats=True)
self.num_features_max = num_features
self.divisible_factor = kwargs.get('divisible_factor', 8)
# for tracking performance during training
self.bn = nn.ModuleList([
nn.BatchNorm2d(i, affine=False) for i in [
make_divisible(self.num_features_max, self.divisible_factor)
for _ in range(2)
]
])
self.bn_setting = None
self.ignore_model_profiling = True
def forward(self, input):
input_size = input.size()
weight = self.weight
bias = self.bias
c = input_size[1]
self.num_features = c
if self.bn_setting == 'max':
y = nn.functional.batch_norm(input, self.bn[0].running_mean[:c],
self.bn[0].running_var[:c],
weight[:c], bias[:c], self.training,
self.momentum, self.eps)
elif self.bn_setting == 'min':
y = nn.functional.batch_norm(input, self.bn[1].running_mean[:c],
self.bn[1].running_var[:c],
weight[:c], bias[:c], self.training,
self.momentum, self.eps)
else:
# cannot be used when evaluating
# assert self.training
y = nn.functional.batch_norm(input, self.running_mean[:c],
self.running_var[:c], weight[:c],
bias[:c], self.training,
self.momentum, self.eps)
return y
class SlimmableGroupNorm2d(nn.GroupNorm):
def forward(self, input):
input_size = input.size()
c = input_size[1]
weight = self.weight[:c]
bias = self.bias[:c]
return nn.functional.group_norm(
input, self.num_groups, weight, bias, self.eps)
class SlimmableLinear(nn.Linear):
def __init__(self,
in_features,
out_features,
bias=True,
slimmable=True,
divisible_factor=16,):
super().__init__(in_features, out_features, bias=bias)
self.in_features_max = in_features
self.out_features_max = out_features
self.out_features = out_features
self.width_mult = 1
self.divisible_factor = divisible_factor
self.slimmable = slimmable
def forward(self, x):
input_size = x.size()
in_features = input_size[1]
if self.slimmable:
out_features = make_divisible(self.out_features * self.width_mult, self.divisible_factor)
else:
out_features = self.out_features
weight = self.weight[:out_features, :in_features]
if self.bias is not None:
bias = self.bias[:out_features]
else:
bias = self.bias
return nn.functional.linear(x, weight, bias)
class SlimmableConv1d(nn.Conv1d):
def __init__(self,
in_channels,
out_channels,
kernel_size,
slimmable=True,
divisible_factor=8,):
super(SlimmableConv1d, self).__init__(
in_channels,
out_channels,
kernel_size)
self.out_channels = out_channels
self.out_channels_max = out_channels
self.slimmable = slimmable
self.divisible_factor = divisible_factor
self.width_mult = 1
def forward(self, input):
input_size = input.size()
in_channels = input_size[1]
if self.slimmable:
out_channels = make_divisible(self.out_channels * self.width_mult, self.divisible_factor)
else:
out_channels = self.out_channels
weight = self.weight[:out_channels, :in_channels, :]
if self.bias is not None:
bias = self.bias[:out_channels]
else:
bias = self.bias
y = nn.functional.conv1d(input, weight, bias, self.stride,
self.padding, self.dilation, self.groups)
return y