-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdetect.py
194 lines (161 loc) · 8.16 KB
/
detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# YOLOv3 🚀 by Ultralytics, GPL-3.0 license
# edited by hyeniii
"""
Run inference on images, videos, directories, streams, etc.
Usage:
$ python path/to/detect.py --weights yolov3.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
path/ # directory
path/*.jpg # glob
'https://youtu.be/Zgi9g1ksQHc' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
"""
import argparse
import os
import sys
from pathlib import Path
from webbrowser import get
import numpy as np
from PIL import Image
import time
from io import BytesIO
import cv2
import onnxruntime
FILE = Path(__file__).resolve() # file = path to train.py
ROOT = FILE.parents[0] # root directory = file train.py is contained in [0]=1 folder [1] = 2 folder ...etc
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # get relative path
# from models.common import DetectMultiBackend
from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages
from utils.general import (LOGGER, check_img_size, check_requirements, colorstr, check_suffix,
non_max_suppression, scale_coords, xyxy2xywh)
from utils.plots import Annotator, save_one_box, colors
# from utils.torch_utils import select_device
from utils.s3 import get_img
import os
class DetectMultiBackend():
# MultiBackend class for python inference on various backends
def __init__(self, weights='yolov3.pt', device=None, dnn=True):
# Usage:
# ONNX Runtime: *.onnx
# OpenCV DNN: *.onnx with dnn=True
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx']
check_suffix(w, suffixes) # check weights have acceptable suffix
pt, onnx = (suffix == x for x in suffixes) # backend booleans
stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults
if onnx: # ONNX Runtime
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
check_requirements(('onnx', 'onnxruntime-gpu' if onnxruntime.get_device()=='GPU' else 'onnxruntime'))
#2022.03.02 edited by nhwh
names = ["pork_belly","ramen","bibimbap","champon","cold_noodle","cutlassfish","egg_custard","egg_soup","jajangmyeon","kimchi_stew","multigrain_rice",
"oxtail_soup","pickled spianch","pizza","pork_feet","quail_egg_stew","seasoned_chicken","seaweed_soup","soy_bean_paste_soup","stewed_bean","stewed_lotus_stew",
"stir_fried_anchovy","sitr_fried_pork","salad","ice_americano","Bottled_Beer","Canned_Beer","Draft_Beer","Fried_Chicken","Tteokbokki","Cabbage_Kimchi","Radish_Kimchi"]
#2022.03.02 edited by nhwh
session = onnxruntime.InferenceSession(w, providers=['CUDAExecutionProvider','TensorrtExecutionProvider', 'CPUExecutionProvider'])
self.__dict__.update(locals()) # assign all variables to self
def forward(self, im, augment=False, visualize=False, val=False):
# MultiBackend inference
b, ch, h, w = im.shape # batch, channel, height, width
if self.onnx: # ONNX
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
# y = torch.tensor(y)
return (y, []) if val else y
# @torch.no_grad()
def run(image,
weights=ROOT / 'best.onnx', # model.pt path(s)
imgsz=[640,640], # inference size (pixels)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
save_txt=False, # save results to *.txt
save_conf=False, # save confidences in --save-txt labels
update=False, # update all models
exist_ok=False, # existing project/name ok, do not increment
):
img = Image.open(BytesIO(image))
source = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
save_img = not nosave
# Load model
device = 'cuda:0' if onnxruntime.get_device()== 'GPU' else 'cpu'
model = DetectMultiBackend(weights, device=device, dnn=dnn)
stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
imgsz = check_img_size(imgsz, s=stride) # check image size
# Dataloader
im, im0s , s = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
bs = 1 # batch_size
dt, seen = [0.0, 0.0, 0.0], 0
t1 = time.time()
# im = torch.from_numpy(im).to(device) ########
im = im.half() if half else np.float32(im) # uint8 to fp16/32
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
t2 = time.time()
dt[0] += t2 - t1
# Inference
pred = model.forward(im, augment=augment, visualize=visualize) #forward
t3 = time.time()
dt[1] += t3 - t2
# NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
dt[2] += time.time() - t3
# Process predictions
for i, det in enumerate(pred): # per image
seen += 1
im0 = im0s.copy()
s += '%gx%g ' % im.shape[2:] # print string
# gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
gn = np.array(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
annotator = Annotator(im0, line_width=line_thickness, example=str(names))
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in np.unique(det[:, -1]):
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_img or save_crop or view_img: # Add bbox to image
c = int(cls) # integer class
label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
annotator.box_label(xyxy, label, color=colors(c, True))
# Print time (inference-only)
LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')
# Stream results
im0 = annotator.result()
_, im0_byte = cv2.imencode('.jpg', im0)
# im0_re = Image.open(BytesIO(im0_byte))
# im0_re_ndarr = cv2.cvtColor(np.array(im0_re), cv2.COLOR_RGB2BGR)
# test = cv2.resize(im0_re_ndarr, (1000, 1000))
# cv2.imshow(f'{names[c]} {conf:.2f}', test)
# cv2.waitKey(6000)
# Print results
t = tuple(x / seen * 1E3 for x in dt) # speeds per image
LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
return im0_byte, c
def main(img_b):
check_requirements(exclude=('tensorboard', 'thop'))
run(image = img_b)
# if __name__ == '__main__':
# main()
# def from_s3(self, bucket, key):
# file_byte_string = self.s3.get_object(Bucket=bucket, Key=key)['Body'].read()
# return Image.open(BytesIO(file_byte_string))