Skip to content
This repository has been archived by the owner on Dec 18, 2024. It is now read-only.

Commit

Permalink
Merge pull request #5 from occ-ai/roy.resolve_crash_on_dml
Browse files Browse the repository at this point in the history
Refactor code formatting in .clang-format, obs-config-utils.cpp, obs-…
  • Loading branch information
royshil authored Apr 11, 2024
2 parents 33b061f + fb29d97 commit 3bd282a
Show file tree
Hide file tree
Showing 12 changed files with 347 additions and 494 deletions.
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ BreakBeforeBraces: Custom
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakStringLiterals: false # apparently unpredictable
ColumnLimit: 80
ColumnLimit: 100
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8
Expand Down
522 changes: 224 additions & 298 deletions src/detect-filter.cpp

Large diffs are not rendered by default.

65 changes: 26 additions & 39 deletions src/edgeyolo/coco_names.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,45 +31,32 @@ static const std::vector<std::string> COCO_CLASSES = {
"vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
const float color_list[80][3] = {
{0.000f, 0.447f, 0.741f}, {0.850f, 0.325f, 0.098f},
{0.929f, 0.694f, 0.125f}, {0.494f, 0.184f, 0.556f},
{0.466f, 0.674f, 0.188f}, {0.301f, 0.745f, 0.933f},
{0.635f, 0.078f, 0.184f}, {0.300f, 0.300f, 0.300f},
{0.600f, 0.600f, 0.600f}, {1.000f, 0.000f, 0.000f},
{1.000f, 0.500f, 0.000f}, {0.749f, 0.749f, 0.000f},
{0.000f, 1.000f, 0.000f}, {0.000f, 0.000f, 1.000f},
{0.667f, 0.000f, 1.000f}, {0.333f, 0.333f, 0.000f},
{0.333f, 0.667f, 0.000f}, {0.333f, 1.000f, 0.000f},
{0.667f, 0.333f, 0.000f}, {0.667f, 0.667f, 0.000f},
{0.667f, 1.000f, 0.000f}, {1.000f, 0.333f, 0.000f},
{1.000f, 0.667f, 0.000f}, {1.000f, 1.000f, 0.000f},
{0.000f, 0.333f, 0.500f}, {0.000f, 0.667f, 0.500f},
{0.000f, 1.000f, 0.500f}, {0.333f, 0.000f, 0.500f},
{0.333f, 0.333f, 0.500f}, {0.333f, 0.667f, 0.500f},
{0.333f, 1.000f, 0.500f}, {0.667f, 0.000f, 0.500f},
{0.667f, 0.333f, 0.500f}, {0.667f, 0.667f, 0.500f},
{0.667f, 1.000f, 0.500f}, {1.000f, 0.000f, 0.500f},
{1.000f, 0.333f, 0.500f}, {1.000f, 0.667f, 0.500f},
{1.000f, 1.000f, 0.500f}, {0.000f, 0.333f, 1.000f},
{0.000f, 0.667f, 1.000f}, {0.000f, 1.000f, 1.000f},
{0.333f, 0.000f, 1.000f}, {0.333f, 0.333f, 1.000f},
{0.333f, 0.667f, 1.000f}, {0.333f, 1.000f, 1.000f},
{0.667f, 0.000f, 1.000f}, {0.667f, 0.333f, 1.000f},
{0.667f, 0.667f, 1.000f}, {0.667f, 1.000f, 1.000f},
{1.000f, 0.000f, 1.000f}, {1.000f, 0.333f, 1.000f},
{1.000f, 0.667f, 1.000f}, {0.333f, 0.000f, 0.000f},
{0.500f, 0.000f, 0.000f}, {0.667f, 0.000f, 0.000f},
{0.833f, 0.000f, 0.000f}, {1.000f, 0.000f, 0.000f},
{0.000f, 0.167f, 0.000f}, {0.000f, 0.333f, 0.000f},
{0.000f, 0.500f, 0.000f}, {0.000f, 0.667f, 0.000f},
{0.000f, 0.833f, 0.000f}, {0.000f, 1.000f, 0.000f},
{0.000f, 0.000f, 0.167f}, {0.000f, 0.000f, 0.333f},
{0.000f, 0.000f, 0.500f}, {0.000f, 0.000f, 0.667f},
{0.000f, 0.000f, 0.833f}, {0.000f, 0.000f, 1.000f},
{0.000f, 0.000f, 0.000f}, {0.143f, 0.143f, 0.143f},
{0.286f, 0.286f, 0.286f}, {0.429f, 0.429f, 0.429f},
{0.571f, 0.571f, 0.571f}, {0.714f, 0.714f, 0.714f},
{0.857f, 0.857f, 0.857f}, {0.000f, 0.447f, 0.741f},
{0.000f, 0.447f, 0.741f}, {0.850f, 0.325f, 0.098f}, {0.929f, 0.694f, 0.125f},
{0.494f, 0.184f, 0.556f}, {0.466f, 0.674f, 0.188f}, {0.301f, 0.745f, 0.933f},
{0.635f, 0.078f, 0.184f}, {0.300f, 0.300f, 0.300f}, {0.600f, 0.600f, 0.600f},
{1.000f, 0.000f, 0.000f}, {1.000f, 0.500f, 0.000f}, {0.749f, 0.749f, 0.000f},
{0.000f, 1.000f, 0.000f}, {0.000f, 0.000f, 1.000f}, {0.667f, 0.000f, 1.000f},
{0.333f, 0.333f, 0.000f}, {0.333f, 0.667f, 0.000f}, {0.333f, 1.000f, 0.000f},
{0.667f, 0.333f, 0.000f}, {0.667f, 0.667f, 0.000f}, {0.667f, 1.000f, 0.000f},
{1.000f, 0.333f, 0.000f}, {1.000f, 0.667f, 0.000f}, {1.000f, 1.000f, 0.000f},
{0.000f, 0.333f, 0.500f}, {0.000f, 0.667f, 0.500f}, {0.000f, 1.000f, 0.500f},
{0.333f, 0.000f, 0.500f}, {0.333f, 0.333f, 0.500f}, {0.333f, 0.667f, 0.500f},
{0.333f, 1.000f, 0.500f}, {0.667f, 0.000f, 0.500f}, {0.667f, 0.333f, 0.500f},
{0.667f, 0.667f, 0.500f}, {0.667f, 1.000f, 0.500f}, {1.000f, 0.000f, 0.500f},
{1.000f, 0.333f, 0.500f}, {1.000f, 0.667f, 0.500f}, {1.000f, 1.000f, 0.500f},
{0.000f, 0.333f, 1.000f}, {0.000f, 0.667f, 1.000f}, {0.000f, 1.000f, 1.000f},
{0.333f, 0.000f, 1.000f}, {0.333f, 0.333f, 1.000f}, {0.333f, 0.667f, 1.000f},
{0.333f, 1.000f, 1.000f}, {0.667f, 0.000f, 1.000f}, {0.667f, 0.333f, 1.000f},
{0.667f, 0.667f, 1.000f}, {0.667f, 1.000f, 1.000f}, {1.000f, 0.000f, 1.000f},
{1.000f, 0.333f, 1.000f}, {1.000f, 0.667f, 1.000f}, {0.333f, 0.000f, 0.000f},
{0.500f, 0.000f, 0.000f}, {0.667f, 0.000f, 0.000f}, {0.833f, 0.000f, 0.000f},
{1.000f, 0.000f, 0.000f}, {0.000f, 0.167f, 0.000f}, {0.000f, 0.333f, 0.000f},
{0.000f, 0.500f, 0.000f}, {0.000f, 0.667f, 0.000f}, {0.000f, 0.833f, 0.000f},
{0.000f, 1.000f, 0.000f}, {0.000f, 0.000f, 0.167f}, {0.000f, 0.000f, 0.333f},
{0.000f, 0.000f, 0.500f}, {0.000f, 0.000f, 0.667f}, {0.000f, 0.000f, 0.833f},
{0.000f, 0.000f, 1.000f}, {0.000f, 0.000f, 0.000f}, {0.143f, 0.143f, 0.143f},
{0.286f, 0.286f, 0.286f}, {0.429f, 0.429f, 0.429f}, {0.571f, 0.571f, 0.571f},
{0.714f, 0.714f, 0.714f}, {0.857f, 0.857f, 0.857f}, {0.000f, 0.447f, 0.741f},
{0.314f, 0.717f, 0.741f}, {0.50f, 0.5f, 0.0f}};
} // namespace edgeyolo_cpp
#endif
60 changes: 21 additions & 39 deletions src/edgeyolo/core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,15 @@ struct GridAndStride {
class AbcEdgeYOLO {
public:
AbcEdgeYOLO() {}
AbcEdgeYOLO(float nms_th = 0.45f, float conf_th = 0.3f,
int num_classes = 80)
AbcEdgeYOLO(float nms_th = 0.45f, float conf_th = 0.3f, int num_classes = 80)
: nms_thresh_(nms_th),
bbox_conf_thresh_(conf_th),
num_classes_(num_classes)
{
}
virtual std::vector<Object> inference(const cv::Mat &frame) = 0;

void setBBoxConfThresh(float thresh)
{
this->bbox_conf_thresh_ = thresh;
}
void setBBoxConfThresh(float thresh) { this->bbox_conf_thresh_ = thresh; }

protected:
int input_w_;
Expand All @@ -66,8 +62,7 @@ class AbcEdgeYOLO {
int unpad_h = (int)(r * (float)img.rows);
cv::Mat re(unpad_h, unpad_w, CV_8UC3);
cv::resize(img, re, re.size());
cv::Mat out(input_h_, input_w_, CV_8UC3,
cv::Scalar(114, 114, 114));
cv::Mat out(input_h_, input_w_, CV_8UC3, cv::Scalar(114, 114, 114));
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
return out;
}
Expand All @@ -81,10 +76,8 @@ class AbcEdgeYOLO {
for (size_t c = 0; c < channels; ++c) {
for (size_t h = 0; h < img_h; ++h) {
for (size_t w = 0; w < img_w; ++w) {
blob_data[(int)(c * img_w * img_h +
h * img_w + w)] =
(float)img.ptr<cv::Vec3b>(
(int)h)[(int)w][(int)c];
blob_data[(int)(c * img_w * img_h + h * img_w + w)] =
(float)img.ptr<cv::Vec3b>((int)h)[(int)w][(int)c];
}
}
}
Expand All @@ -98,16 +91,13 @@ class AbcEdgeYOLO {
size_t img_w = img.cols;
for (size_t i = 0; i < img_h * img_w; ++i) {
for (size_t c = 0; c < channels; ++c) {
blob_data[i * channels + c] =
(float)img.data[i * channels + c];
blob_data[i * channels + c] = (float)img.data[i * channels + c];
}
}
}

void generate_edgeyolo_proposals(const int num_array,
const float *feat_ptr,
const float prob_threshold,
std::vector<Object> &objects)
void generate_edgeyolo_proposals(const int num_array, const float *feat_ptr,
const float prob_threshold, std::vector<Object> &objects)
{

for (int idx = 0; idx < num_array; ++idx) {
Expand All @@ -116,10 +106,8 @@ class AbcEdgeYOLO {
float box_objectness = feat_ptr[basic_pos + 4];
int class_id = 0;
float max_class_score = 0.0;
for (int class_idx = 0; class_idx < num_classes_;
++class_idx) {
float box_cls_score =
feat_ptr[basic_pos + 5 + class_idx];
for (int class_idx = 0; class_idx < num_classes_; ++class_idx) {
float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];
float box_prob = box_objectness * box_cls_score;
if (box_prob > max_class_score) {
class_id = class_idx;
Expand Down Expand Up @@ -152,8 +140,7 @@ class AbcEdgeYOLO {
return inter.area();
}

void qsort_descent_inplace(std::vector<Object> &faceobjects, int left,
int right)
void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right)
{
int i = left;
int j = right;
Expand Down Expand Up @@ -187,8 +174,7 @@ class AbcEdgeYOLO {
qsort_descent_inplace(objects, 0, (int)(objects.size() - 1));
}

void nms_sorted_bboxes(const std::vector<Object> &faceobjects,
std::vector<int> &picked,
void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked,
const float nms_threshold)
{
picked.clear();
Expand All @@ -210,8 +196,7 @@ class AbcEdgeYOLO {

// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] -
inter_area;
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
Expand All @@ -222,15 +207,13 @@ class AbcEdgeYOLO {
}
}

void decode_outputs(const float *prob, const int num_array,
std::vector<Object> &objects,
const float bbox_conf_thresh, const float scale,
const int img_w, const int img_h)
void decode_outputs(const float *prob, const int num_array, std::vector<Object> &objects,
const float bbox_conf_thresh, const float scale, const int img_w,
const int img_h)
{

std::vector<Object> proposals;
generate_edgeyolo_proposals(num_array, prob, bbox_conf_thresh,
proposals);
generate_edgeyolo_proposals(num_array, prob, bbox_conf_thresh, proposals);

qsort_descent_inplace(proposals);

Expand All @@ -244,12 +227,11 @@ class AbcEdgeYOLO {
// adjust offset to original unpadded
float x0 = (proposals[picked[i]].rect.x) / scale;
float y0 = (proposals[picked[i]].rect.y) / scale;
float x1 = (proposals[picked[i]].rect.x +
proposals[picked[i]].rect.width) /
scale;
float y1 = (proposals[picked[i]].rect.y +
proposals[picked[i]].rect.height) /
float x1 = (proposals[picked[i]].rect.x + proposals[picked[i]].rect.width) /
scale;
float y1 =
(proposals[picked[i]].rect.y + proposals[picked[i]].rect.height) /
scale;

// clip
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
Expand Down
86 changes: 34 additions & 52 deletions src/edgeyolo/edgeyolo_onnxruntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

namespace edgeyolo_cpp {

EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
file_name_t path_to_model, int intra_op_num_threads,
int inter_op_num_threads, const std::string &use_gpu_, int device_id,
bool use_parallel, float nms_th, float conf_th, int num_classes)
EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(file_name_t path_to_model, int intra_op_num_threads,
int inter_op_num_threads, const std::string &use_gpu_,
int device_id, bool use_parallel, float nms_th,
float conf_th, int num_classes)
: AbcEdgeYOLO(nms_th, conf_th, num_classes),
intra_op_num_threads_(intra_op_num_threads),
inter_op_num_threads_(inter_op_num_threads),
Expand All @@ -20,41 +20,32 @@ EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
try {
Ort::SessionOptions session_options;

session_options.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_ALL);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
if (this->use_parallel_) {
session_options.SetExecutionMode(
ExecutionMode::ORT_PARALLEL);
session_options.SetInterOpNumThreads(
this->inter_op_num_threads_);
session_options.SetExecutionMode(ExecutionMode::ORT_PARALLEL);
session_options.SetInterOpNumThreads(this->inter_op_num_threads_);
} else {
session_options.SetExecutionMode(
ExecutionMode::ORT_SEQUENTIAL);
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
}
session_options.SetIntraOpNumThreads(
this->intra_op_num_threads_);
session_options.SetIntraOpNumThreads(this->intra_op_num_threads_);

#ifdef _WIN32
if (this->use_gpu == "cuda") {
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = this->device_id_;
session_options.AppendExecutionProvider_CUDA(
cuda_option);
session_options.AppendExecutionProvider_CUDA(cuda_option);
}
if (this->use_gpu == "dml") {
auto &api = Ort::GetApi();
OrtDmlApi *dmlApi = nullptr;
Ort::ThrowOnError(api.GetExecutionProviderApi(
"DML", ORT_API_VERSION,
(const void **)&dmlApi));
Ort::ThrowOnError(
dmlApi->SessionOptionsAppendExecutionProvider_DML(
session_options, 0));
Ort::ThrowOnError(api.GetExecutionProviderApi("DML", ORT_API_VERSION,
(const void **)&dmlApi));
Ort::ThrowOnError(dmlApi->SessionOptionsAppendExecutionProvider_DML(
session_options, 0));
}
#endif

this->session_ = Ort::Session(this->env_, path_to_model.c_str(),
session_options);
this->session_ = Ort::Session(this->env_, path_to_model.c_str(), session_options);
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;
throw e;
Expand All @@ -63,31 +54,25 @@ EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
Ort::AllocatorWithDefaultOptions ort_alloc;

// Allocate input memory buffer
this->input_name_ = std::string(
this->session_.GetInputNameAllocated(0, ort_alloc).get());
this->input_name_ = std::string(this->session_.GetInputNameAllocated(0, ort_alloc).get());
auto input_info = this->session_.GetInputTypeInfo(0);
auto input_shape_info = input_info.GetTensorTypeAndShapeInfo();
std::vector<int64_t> input_shape = input_shape_info.GetShape();
ONNXTensorElementDataType input_tensor_type =
input_shape_info.GetElementType();
ONNXTensorElementDataType input_tensor_type = input_shape_info.GetElementType();
this->input_h_ = (int)(input_shape[2]);
this->input_w_ = (int)(input_shape[3]);

size_t input_byte_count =
sizeof(float) * input_shape_info.GetElementCount();
std::unique_ptr<uint8_t[]> input_buffer =
std::make_unique<uint8_t[]>(input_byte_count);
auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator,
OrtMemTypeDefault);
size_t input_byte_count = sizeof(float) * input_shape_info.GetElementCount();
std::unique_ptr<uint8_t[]> input_buffer = std::make_unique<uint8_t[]>(input_byte_count);
auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

this->input_tensor_ = Ort::Value::CreateTensor(
input_memory_info, input_buffer.get(), input_byte_count,
input_shape.data(), input_shape.size(), input_tensor_type);
this->input_tensor_ = Ort::Value::CreateTensor(input_memory_info, input_buffer.get(),
input_byte_count, input_shape.data(),
input_shape.size(), input_tensor_type);
this->input_buffer_.emplace_back(std::move(input_buffer));

// Allocate output memory buffer
this->output_name_ = std::string(
this->session_.GetOutputNameAllocated(0, ort_alloc).get());
this->output_name_ = std::string(this->session_.GetOutputNameAllocated(0, ort_alloc).get());

auto output_info = this->session_.GetOutputTypeInfo(0);
auto output_shape_info = output_info.GetTensorTypeAndShapeInfo();
Expand All @@ -100,16 +85,13 @@ EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
}
this->num_array_ /= (5 + this->num_classes_);

size_t output_byte_count =
sizeof(float) * output_shape_info.GetElementCount();
std::unique_ptr<uint8_t[]> output_buffer =
std::make_unique<uint8_t[]>(output_byte_count);
auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator,
OrtMemTypeDefault);
size_t output_byte_count = sizeof(float) * output_shape_info.GetElementCount();
std::unique_ptr<uint8_t[]> output_buffer = std::make_unique<uint8_t[]>(output_byte_count);
auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

this->output_tensor_ = Ort::Value::CreateTensor(
output_memory_info, output_buffer.get(), output_byte_count,
output_shape.data(), output_shape.size(), output_tensor_type);
this->output_tensor_ = Ort::Value::CreateTensor(output_memory_info, output_buffer.get(),
output_byte_count, output_shape.data(),
output_shape.size(), output_tensor_type);
this->output_buffer_.emplace_back(std::move(output_buffer));
}

Expand All @@ -126,17 +108,17 @@ std::vector<Object> EdgeYOLOONNXRuntime::inference(const cv::Mat &frame)

// Inference
Ort::RunOptions run_options;
this->session_.Run(run_options, input_names_, &this->input_tensor_, 1,
output_names_, &this->output_tensor_, 1);
this->session_.Run(run_options, input_names_, &this->input_tensor_, 1, output_names_,
&this->output_tensor_, 1);

float *net_pred = (float *)this->output_buffer_[0].get();

// post process
float scale = std::fminf((float)input_w_ / (float)frame.cols,
(float)input_h_ / (float)frame.rows);
std::vector<Object> objects;
decode_outputs(net_pred, this->num_array_, objects,
this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
decode_outputs(net_pred, this->num_array_, objects, this->bbox_conf_thresh_, scale,
frame.cols, frame.rows);
return objects;
}

Expand Down
5 changes: 2 additions & 3 deletions src/edgeyolo/edgeyolo_onnxruntime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@ namespace edgeyolo_cpp {
class EdgeYOLOONNXRuntime : public AbcEdgeYOLO {
public:
EdgeYOLOONNXRuntime(file_name_t path_to_model, int intra_op_num_threads,
int inter_op_num_threads = 1,
const std::string &use_gpu_ = "", int device_id = 0,
bool use_parallel = false, float nms_th = 0.45f,
int inter_op_num_threads = 1, const std::string &use_gpu_ = "",
int device_id = 0, bool use_parallel = false, float nms_th = 0.45f,
float conf_th = 0.3f, int num_classes = 80);
std::vector<Object> inference(const cv::Mat &frame) override;

Expand Down
Loading

0 comments on commit 3bd282a

Please sign in to comment.