-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathyolov2_tiny.py
executable file
·128 lines (89 loc) · 4.71 KB
/
yolov2_tiny.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
# from loss import build_target, yolo_loss
class Yolov2(nn.Module):
num_classes = 20
num_anchors = 5
def __init__(self, classes=None, weights_file=False):
super(Yolov2, self).__init__()
if classes:
self.num_classes = len(classes)
self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
self.lrelu = nn.LeakyReLU(0.1, inplace=True)
# self.lrelu = nn.SiLU()
# self.lrelu = nn.ReLU6(inplace=True)
# self.slowpool = nn.MaxPool2d(kernel_size=2, stride=1)
self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn3 = nn.BatchNorm2d(64)
self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)
self.bn4 = nn.BatchNorm2d(128)
self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
self.bn5 = nn.BatchNorm2d(256)
self.conv6 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)
self.bn6 = nn.BatchNorm2d(512)
self.conv7 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)
self.bn7 = nn.BatchNorm2d(1024)
self.conv8 = nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)
self.bn8 = nn.BatchNorm2d(1024)
self.conv9 = nn.Sequential(nn.Conv2d(1024, (5 + self.num_classes) * self.num_anchors, kernel_size=1))
def forward(self, x, gt_boxes=None, gt_classes=None, num_boxes=None, training=False):
"""
x: Variable
gt_boxes, gt_classes, num_boxes: Tensor
"""
x = self.maxpool(self.lrelu(self.bn1(self.conv1(x))))
x = self.maxpool(self.lrelu(self.bn2(self.conv2(x))))
x = self.maxpool(self.lrelu(self.bn3(self.conv3(x))))
x = self.maxpool(self.lrelu(self.bn4(self.conv4(x))))
x = self.maxpool(self.lrelu(self.bn5(self.conv5(x))))
x = self.lrelu(self.bn6(self.conv6(x)))
# x = F.pad(x, (0, 1, 0, 1))
# x = self.slowpool(x)
x = self.lrelu(self.bn7(self.conv7(x)))
x = self.lrelu(self.bn8(self.conv8(x)))
out = self.conv9(x)
# out -- tensor of shape (B, num_anchors * (5 + num_classes), H, W)
bsize, _, h, w = out.size()
# 5 + num_class tensor represents (t_x, t_y, t_h, t_w, t_c) and (class1_score, class2_score, ...)
# reorganize the output tensor to shape (B, H * W * num_anchors, 5 + num_classes)
# 5 + num_class tensor represents (t_x, t_y, t_h, t_w, t_c) and (class1_score, class2_score, ...)
# reorganize the output tensor to shape (B, H * W * num_anchors, 5 + num_classes)
out = out.permute(0, 2, 3, 1).contiguous().view(bsize, h * w * self.num_anchors, 5 + self.num_classes)
# activate the output tensor
# `sigmoid` for t_x, t_y, t_c; `exp` for t_h, t_w;
# `softmax` for (class1_score, class2_score, ...)
xy_pred = torch.sigmoid(out[:, :, 0:2])
conf_pred = torch.sigmoid(out[:, :, 4:5])
hw_pred = torch.exp(out[:, :, 2:4])
class_score = out[:, :, 5:]
class_pred = F.softmax(class_score, dim=-1)
delta_pred = torch.cat([xy_pred, hw_pred], dim=-1)
if training:
output_variable = (delta_pred, conf_pred, class_score)
output_data = [v.data for v in output_variable]
gt_data = (gt_boxes, gt_classes, num_boxes)
target_data = build_target(output_data, gt_data, h, w)
target_variable = [Variable(v) for v in target_data]
box_loss, iou_loss, class_loss = yolo_loss(output_variable, target_variable)
return box_loss, iou_loss, class_loss
return delta_pred, conf_pred, class_pred
if __name__ == '__main__':
model = Yolov2()
print(model)
im = np.random.randn(1, 3, 416, 416)
im_variable = Variable(torch.from_numpy(im)).float()
out = model(im_variable)
delta_pred, conf_pred, class_pred = out
print('delta_pred size:', delta_pred.size())
print('conf_pred size:', conf_pred.size())
print('class_pred size:', class_pred.size())