Merge pull request #5 from occ-ai/roy.resolve_crash_on_dml

Refactor code formatting in .clang-format, obs-config-utils.cpp, obs-…
locaal-ai · Apr 11, 2024 · 3bd282a · 3bd282a
2 parents 33b061f + fb29d97
commit 3bd282a
Show file tree

Hide file tree

Showing 12 changed files with 347 additions and 494 deletions.
diff --git a/.clang-format b/.clang-format
@@ -44,7 +44,7 @@ BreakBeforeBraces: Custom
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializers: BeforeColon
 BreakStringLiterals: false  # apparently unpredictable
-ColumnLimit: 80
+ColumnLimit: 100
 CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: true
 ConstructorInitializerIndentWidth: 8

diff --git a/src/detect-filter.cpp b/src/detect-filter.cpp
diff --git a/src/edgeyolo/coco_names.hpp b/src/edgeyolo/coco_names.hpp
@@ -31,45 +31,32 @@ static const std::vector<std::string> COCO_CLASSES = {
 	"vase",          "scissors",     "teddy bear",
 	"hair drier",    "toothbrush"};
 const float color_list[80][3] = {
-	{0.000f, 0.447f, 0.741f}, {0.850f, 0.325f, 0.098f},
-	{0.929f, 0.694f, 0.125f}, {0.494f, 0.184f, 0.556f},
-	{0.466f, 0.674f, 0.188f}, {0.301f, 0.745f, 0.933f},
-	{0.635f, 0.078f, 0.184f}, {0.300f, 0.300f, 0.300f},
-	{0.600f, 0.600f, 0.600f}, {1.000f, 0.000f, 0.000f},
-	{1.000f, 0.500f, 0.000f}, {0.749f, 0.749f, 0.000f},
-	{0.000f, 1.000f, 0.000f}, {0.000f, 0.000f, 1.000f},
-	{0.667f, 0.000f, 1.000f}, {0.333f, 0.333f, 0.000f},
-	{0.333f, 0.667f, 0.000f}, {0.333f, 1.000f, 0.000f},
-	{0.667f, 0.333f, 0.000f}, {0.667f, 0.667f, 0.000f},
-	{0.667f, 1.000f, 0.000f}, {1.000f, 0.333f, 0.000f},
-	{1.000f, 0.667f, 0.000f}, {1.000f, 1.000f, 0.000f},
-	{0.000f, 0.333f, 0.500f}, {0.000f, 0.667f, 0.500f},
-	{0.000f, 1.000f, 0.500f}, {0.333f, 0.000f, 0.500f},
-	{0.333f, 0.333f, 0.500f}, {0.333f, 0.667f, 0.500f},
-	{0.333f, 1.000f, 0.500f}, {0.667f, 0.000f, 0.500f},
-	{0.667f, 0.333f, 0.500f}, {0.667f, 0.667f, 0.500f},
-	{0.667f, 1.000f, 0.500f}, {1.000f, 0.000f, 0.500f},
-	{1.000f, 0.333f, 0.500f}, {1.000f, 0.667f, 0.500f},
-	{1.000f, 1.000f, 0.500f}, {0.000f, 0.333f, 1.000f},
-	{0.000f, 0.667f, 1.000f}, {0.000f, 1.000f, 1.000f},
-	{0.333f, 0.000f, 1.000f}, {0.333f, 0.333f, 1.000f},
-	{0.333f, 0.667f, 1.000f}, {0.333f, 1.000f, 1.000f},
-	{0.667f, 0.000f, 1.000f}, {0.667f, 0.333f, 1.000f},
-	{0.667f, 0.667f, 1.000f}, {0.667f, 1.000f, 1.000f},
-	{1.000f, 0.000f, 1.000f}, {1.000f, 0.333f, 1.000f},
-	{1.000f, 0.667f, 1.000f}, {0.333f, 0.000f, 0.000f},
-	{0.500f, 0.000f, 0.000f}, {0.667f, 0.000f, 0.000f},
-	{0.833f, 0.000f, 0.000f}, {1.000f, 0.000f, 0.000f},
-	{0.000f, 0.167f, 0.000f}, {0.000f, 0.333f, 0.000f},
-	{0.000f, 0.500f, 0.000f}, {0.000f, 0.667f, 0.000f},
-	{0.000f, 0.833f, 0.000f}, {0.000f, 1.000f, 0.000f},
-	{0.000f, 0.000f, 0.167f}, {0.000f, 0.000f, 0.333f},
-	{0.000f, 0.000f, 0.500f}, {0.000f, 0.000f, 0.667f},
-	{0.000f, 0.000f, 0.833f}, {0.000f, 0.000f, 1.000f},
-	{0.000f, 0.000f, 0.000f}, {0.143f, 0.143f, 0.143f},
-	{0.286f, 0.286f, 0.286f}, {0.429f, 0.429f, 0.429f},
-	{0.571f, 0.571f, 0.571f}, {0.714f, 0.714f, 0.714f},
-	{0.857f, 0.857f, 0.857f}, {0.000f, 0.447f, 0.741f},
+	{0.000f, 0.447f, 0.741f}, {0.850f, 0.325f, 0.098f}, {0.929f, 0.694f, 0.125f},
+	{0.494f, 0.184f, 0.556f}, {0.466f, 0.674f, 0.188f}, {0.301f, 0.745f, 0.933f},
+	{0.635f, 0.078f, 0.184f}, {0.300f, 0.300f, 0.300f}, {0.600f, 0.600f, 0.600f},
+	{1.000f, 0.000f, 0.000f}, {1.000f, 0.500f, 0.000f}, {0.749f, 0.749f, 0.000f},
+	{0.000f, 1.000f, 0.000f}, {0.000f, 0.000f, 1.000f}, {0.667f, 0.000f, 1.000f},
+	{0.333f, 0.333f, 0.000f}, {0.333f, 0.667f, 0.000f}, {0.333f, 1.000f, 0.000f},
+	{0.667f, 0.333f, 0.000f}, {0.667f, 0.667f, 0.000f}, {0.667f, 1.000f, 0.000f},
+	{1.000f, 0.333f, 0.000f}, {1.000f, 0.667f, 0.000f}, {1.000f, 1.000f, 0.000f},
+	{0.000f, 0.333f, 0.500f}, {0.000f, 0.667f, 0.500f}, {0.000f, 1.000f, 0.500f},
+	{0.333f, 0.000f, 0.500f}, {0.333f, 0.333f, 0.500f}, {0.333f, 0.667f, 0.500f},
+	{0.333f, 1.000f, 0.500f}, {0.667f, 0.000f, 0.500f}, {0.667f, 0.333f, 0.500f},
+	{0.667f, 0.667f, 0.500f}, {0.667f, 1.000f, 0.500f}, {1.000f, 0.000f, 0.500f},
+	{1.000f, 0.333f, 0.500f}, {1.000f, 0.667f, 0.500f}, {1.000f, 1.000f, 0.500f},
+	{0.000f, 0.333f, 1.000f}, {0.000f, 0.667f, 1.000f}, {0.000f, 1.000f, 1.000f},
+	{0.333f, 0.000f, 1.000f}, {0.333f, 0.333f, 1.000f}, {0.333f, 0.667f, 1.000f},
+	{0.333f, 1.000f, 1.000f}, {0.667f, 0.000f, 1.000f}, {0.667f, 0.333f, 1.000f},
+	{0.667f, 0.667f, 1.000f}, {0.667f, 1.000f, 1.000f}, {1.000f, 0.000f, 1.000f},
+	{1.000f, 0.333f, 1.000f}, {1.000f, 0.667f, 1.000f}, {0.333f, 0.000f, 0.000f},
+	{0.500f, 0.000f, 0.000f}, {0.667f, 0.000f, 0.000f}, {0.833f, 0.000f, 0.000f},
+	{1.000f, 0.000f, 0.000f}, {0.000f, 0.167f, 0.000f}, {0.000f, 0.333f, 0.000f},
+	{0.000f, 0.500f, 0.000f}, {0.000f, 0.667f, 0.000f}, {0.000f, 0.833f, 0.000f},
+	{0.000f, 1.000f, 0.000f}, {0.000f, 0.000f, 0.167f}, {0.000f, 0.000f, 0.333f},
+	{0.000f, 0.000f, 0.500f}, {0.000f, 0.000f, 0.667f}, {0.000f, 0.000f, 0.833f},
+	{0.000f, 0.000f, 1.000f}, {0.000f, 0.000f, 0.000f}, {0.143f, 0.143f, 0.143f},
+	{0.286f, 0.286f, 0.286f}, {0.429f, 0.429f, 0.429f}, {0.571f, 0.571f, 0.571f},
+	{0.714f, 0.714f, 0.714f}, {0.857f, 0.857f, 0.857f}, {0.000f, 0.447f, 0.741f},
 	{0.314f, 0.717f, 0.741f}, {0.50f, 0.5f, 0.0f}};
 } // namespace edgeyolo_cpp
 #endif
diff --git a/src/edgeyolo/core.hpp b/src/edgeyolo/core.hpp
@@ -33,19 +33,15 @@ struct GridAndStride {
 class AbcEdgeYOLO {
 public:
 	AbcEdgeYOLO() {}
-	AbcEdgeYOLO(float nms_th = 0.45f, float conf_th = 0.3f,
-		    int num_classes = 80)
+	AbcEdgeYOLO(float nms_th = 0.45f, float conf_th = 0.3f, int num_classes = 80)
 		: nms_thresh_(nms_th),
 		  bbox_conf_thresh_(conf_th),
 		  num_classes_(num_classes)
 	{
 	}
 	virtual std::vector<Object> inference(const cv::Mat &frame) = 0;
 
-	void setBBoxConfThresh(float thresh)
-	{
-		this->bbox_conf_thresh_ = thresh;
-	}
+	void setBBoxConfThresh(float thresh) { this->bbox_conf_thresh_ = thresh; }
 
 protected:
 	int input_w_;
@@ -66,8 +62,7 @@ class AbcEdgeYOLO {
 		int unpad_h = (int)(r * (float)img.rows);
 		cv::Mat re(unpad_h, unpad_w, CV_8UC3);
 		cv::resize(img, re, re.size());
-		cv::Mat out(input_h_, input_w_, CV_8UC3,
-			    cv::Scalar(114, 114, 114));
+		cv::Mat out(input_h_, input_w_, CV_8UC3, cv::Scalar(114, 114, 114));
 		re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
 		return out;
 	}
@@ -81,10 +76,8 @@ class AbcEdgeYOLO {
 		for (size_t c = 0; c < channels; ++c) {
 			for (size_t h = 0; h < img_h; ++h) {
 				for (size_t w = 0; w < img_w; ++w) {
-					blob_data[(int)(c * img_w * img_h +
-							h * img_w + w)] =
-						(float)img.ptr<cv::Vec3b>(
-							(int)h)[(int)w][(int)c];
+					blob_data[(int)(c * img_w * img_h + h * img_w + w)] =
+						(float)img.ptr<cv::Vec3b>((int)h)[(int)w][(int)c];
 				}
 			}
 		}
@@ -98,16 +91,13 @@ class AbcEdgeYOLO {
 		size_t img_w = img.cols;
 		for (size_t i = 0; i < img_h * img_w; ++i) {
 			for (size_t c = 0; c < channels; ++c) {
-				blob_data[i * channels + c] =
-					(float)img.data[i * channels + c];
+				blob_data[i * channels + c] = (float)img.data[i * channels + c];
 			}
 		}
 	}
 
-	void generate_edgeyolo_proposals(const int num_array,
-					 const float *feat_ptr,
-					 const float prob_threshold,
-					 std::vector<Object> &objects)
+	void generate_edgeyolo_proposals(const int num_array, const float *feat_ptr,
+					 const float prob_threshold, std::vector<Object> &objects)
 	{
 
 		for (int idx = 0; idx < num_array; ++idx) {
@@ -116,10 +106,8 @@ class AbcEdgeYOLO {
 			float box_objectness = feat_ptr[basic_pos + 4];
 			int class_id = 0;
 			float max_class_score = 0.0;
-			for (int class_idx = 0; class_idx < num_classes_;
-			     ++class_idx) {
-				float box_cls_score =
-					feat_ptr[basic_pos + 5 + class_idx];
+			for (int class_idx = 0; class_idx < num_classes_; ++class_idx) {
+				float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];
 				float box_prob = box_objectness * box_cls_score;
 				if (box_prob > max_class_score) {
 					class_id = class_idx;
@@ -152,8 +140,7 @@ class AbcEdgeYOLO {
 		return inter.area();
 	}
 
-	void qsort_descent_inplace(std::vector<Object> &faceobjects, int left,
-				   int right)
+	void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right)
 	{
 		int i = left;
 		int j = right;
@@ -187,8 +174,7 @@ class AbcEdgeYOLO {
 		qsort_descent_inplace(objects, 0, (int)(objects.size() - 1));
 	}
 
-	void nms_sorted_bboxes(const std::vector<Object> &faceobjects,
-			       std::vector<int> &picked,
+	void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked,
 			       const float nms_threshold)
 	{
 		picked.clear();
@@ -210,8 +196,7 @@ class AbcEdgeYOLO {
 
 				// intersection over union
 				float inter_area = intersection_area(a, b);
-				float union_area = areas[i] + areas[picked[j]] -
-						   inter_area;
+				float union_area = areas[i] + areas[picked[j]] - inter_area;
 				// float IoU = inter_area / union_area
 				if (inter_area / union_area > nms_threshold)
 					keep = 0;
@@ -222,15 +207,13 @@ class AbcEdgeYOLO {
 		}
 	}
 
-	void decode_outputs(const float *prob, const int num_array,
-			    std::vector<Object> &objects,
-			    const float bbox_conf_thresh, const float scale,
-			    const int img_w, const int img_h)
+	void decode_outputs(const float *prob, const int num_array, std::vector<Object> &objects,
+			    const float bbox_conf_thresh, const float scale, const int img_w,
+			    const int img_h)
 	{
 
 		std::vector<Object> proposals;
-		generate_edgeyolo_proposals(num_array, prob, bbox_conf_thresh,
-					    proposals);
+		generate_edgeyolo_proposals(num_array, prob, bbox_conf_thresh, proposals);
 
 		qsort_descent_inplace(proposals);
 
@@ -244,12 +227,11 @@ class AbcEdgeYOLO {
 			// adjust offset to original unpadded
 			float x0 = (proposals[picked[i]].rect.x) / scale;
 			float y0 = (proposals[picked[i]].rect.y) / scale;
-			float x1 = (proposals[picked[i]].rect.x +
-				    proposals[picked[i]].rect.width) /
-				   scale;
-			float y1 = (proposals[picked[i]].rect.y +
-				    proposals[picked[i]].rect.height) /
+			float x1 = (proposals[picked[i]].rect.x + proposals[picked[i]].rect.width) /
 				   scale;
+			float y1 =
+				(proposals[picked[i]].rect.y + proposals[picked[i]].rect.height) /
+				scale;
 
 			// clip
 			x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);

diff --git a/src/edgeyolo/edgeyolo_onnxruntime.cpp b/src/edgeyolo/edgeyolo_onnxruntime.cpp
@@ -6,10 +6,10 @@
 
 namespace edgeyolo_cpp {
 
-EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
-	file_name_t path_to_model, int intra_op_num_threads,
-	int inter_op_num_threads, const std::string &use_gpu_, int device_id,
-	bool use_parallel, float nms_th, float conf_th, int num_classes)
+EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(file_name_t path_to_model, int intra_op_num_threads,
+					 int inter_op_num_threads, const std::string &use_gpu_,
+					 int device_id, bool use_parallel, float nms_th,
+					 float conf_th, int num_classes)
 	: AbcEdgeYOLO(nms_th, conf_th, num_classes),
 	  intra_op_num_threads_(intra_op_num_threads),
 	  inter_op_num_threads_(inter_op_num_threads),
@@ -20,41 +20,32 @@ EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
 	try {
 		Ort::SessionOptions session_options;
 
-		session_options.SetGraphOptimizationLevel(
-			GraphOptimizationLevel::ORT_ENABLE_ALL);
+		session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
 		if (this->use_parallel_) {
-			session_options.SetExecutionMode(
-				ExecutionMode::ORT_PARALLEL);
-			session_options.SetInterOpNumThreads(
-				this->inter_op_num_threads_);
+			session_options.SetExecutionMode(ExecutionMode::ORT_PARALLEL);
+			session_options.SetInterOpNumThreads(this->inter_op_num_threads_);
 		} else {
-			session_options.SetExecutionMode(
-				ExecutionMode::ORT_SEQUENTIAL);
+			session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
 		}
-		session_options.SetIntraOpNumThreads(
-			this->intra_op_num_threads_);
+		session_options.SetIntraOpNumThreads(this->intra_op_num_threads_);
 
 #ifdef _WIN32
 		if (this->use_gpu == "cuda") {
 			OrtCUDAProviderOptions cuda_option;
 			cuda_option.device_id = this->device_id_;
-			session_options.AppendExecutionProvider_CUDA(
-				cuda_option);
+			session_options.AppendExecutionProvider_CUDA(cuda_option);
 		}
 		if (this->use_gpu == "dml") {
 			auto &api = Ort::GetApi();
 			OrtDmlApi *dmlApi = nullptr;
-			Ort::ThrowOnError(api.GetExecutionProviderApi(
-				"DML", ORT_API_VERSION,
-				(const void **)&dmlApi));
-			Ort::ThrowOnError(
-				dmlApi->SessionOptionsAppendExecutionProvider_DML(
-					session_options, 0));
+			Ort::ThrowOnError(api.GetExecutionProviderApi("DML", ORT_API_VERSION,
+								      (const void **)&dmlApi));
+			Ort::ThrowOnError(dmlApi->SessionOptionsAppendExecutionProvider_DML(
+				session_options, 0));
 		}
 #endif
 
-		this->session_ = Ort::Session(this->env_, path_to_model.c_str(),
-					      session_options);
+		this->session_ = Ort::Session(this->env_, path_to_model.c_str(), session_options);
 	} catch (std::exception &e) {
 		std::cerr << e.what() << std::endl;
 		throw e;
@@ -63,31 +54,25 @@ EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
 	Ort::AllocatorWithDefaultOptions ort_alloc;
 
 	// Allocate input memory buffer
-	this->input_name_ = std::string(
-		this->session_.GetInputNameAllocated(0, ort_alloc).get());
+	this->input_name_ = std::string(this->session_.GetInputNameAllocated(0, ort_alloc).get());
 	auto input_info = this->session_.GetInputTypeInfo(0);
 	auto input_shape_info = input_info.GetTensorTypeAndShapeInfo();
 	std::vector<int64_t> input_shape = input_shape_info.GetShape();
-	ONNXTensorElementDataType input_tensor_type =
-		input_shape_info.GetElementType();
+	ONNXTensorElementDataType input_tensor_type = input_shape_info.GetElementType();
 	this->input_h_ = (int)(input_shape[2]);
 	this->input_w_ = (int)(input_shape[3]);
 
-	size_t input_byte_count =
-		sizeof(float) * input_shape_info.GetElementCount();
-	std::unique_ptr<uint8_t[]> input_buffer =
-		std::make_unique<uint8_t[]>(input_byte_count);
-	auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator,
-							    OrtMemTypeDefault);
+	size_t input_byte_count = sizeof(float) * input_shape_info.GetElementCount();
+	std::unique_ptr<uint8_t[]> input_buffer = std::make_unique<uint8_t[]>(input_byte_count);
+	auto input_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
 
-	this->input_tensor_ = Ort::Value::CreateTensor(
-		input_memory_info, input_buffer.get(), input_byte_count,
-		input_shape.data(), input_shape.size(), input_tensor_type);
+	this->input_tensor_ = Ort::Value::CreateTensor(input_memory_info, input_buffer.get(),
+						       input_byte_count, input_shape.data(),
+						       input_shape.size(), input_tensor_type);
 	this->input_buffer_.emplace_back(std::move(input_buffer));
 
 	// Allocate output memory buffer
-	this->output_name_ = std::string(
-		this->session_.GetOutputNameAllocated(0, ort_alloc).get());
+	this->output_name_ = std::string(this->session_.GetOutputNameAllocated(0, ort_alloc).get());
 
 	auto output_info = this->session_.GetOutputTypeInfo(0);
 	auto output_shape_info = output_info.GetTensorTypeAndShapeInfo();
@@ -100,16 +85,13 @@ EdgeYOLOONNXRuntime::EdgeYOLOONNXRuntime(
 	}
 	this->num_array_ /= (5 + this->num_classes_);
 
-	size_t output_byte_count =
-		sizeof(float) * output_shape_info.GetElementCount();
-	std::unique_ptr<uint8_t[]> output_buffer =
-		std::make_unique<uint8_t[]>(output_byte_count);
-	auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator,
-							     OrtMemTypeDefault);
+	size_t output_byte_count = sizeof(float) * output_shape_info.GetElementCount();
+	std::unique_ptr<uint8_t[]> output_buffer = std::make_unique<uint8_t[]>(output_byte_count);
+	auto output_memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
 
-	this->output_tensor_ = Ort::Value::CreateTensor(
-		output_memory_info, output_buffer.get(), output_byte_count,
-		output_shape.data(), output_shape.size(), output_tensor_type);
+	this->output_tensor_ = Ort::Value::CreateTensor(output_memory_info, output_buffer.get(),
+							output_byte_count, output_shape.data(),
+							output_shape.size(), output_tensor_type);
 	this->output_buffer_.emplace_back(std::move(output_buffer));
 }
 
@@ -126,17 +108,17 @@ std::vector<Object> EdgeYOLOONNXRuntime::inference(const cv::Mat &frame)
 
 	// Inference
 	Ort::RunOptions run_options;
-	this->session_.Run(run_options, input_names_, &this->input_tensor_, 1,
-			   output_names_, &this->output_tensor_, 1);
+	this->session_.Run(run_options, input_names_, &this->input_tensor_, 1, output_names_,
+			   &this->output_tensor_, 1);
 
 	float *net_pred = (float *)this->output_buffer_[0].get();
 
 	// post process
 	float scale = std::fminf((float)input_w_ / (float)frame.cols,
 				 (float)input_h_ / (float)frame.rows);
 	std::vector<Object> objects;
-	decode_outputs(net_pred, this->num_array_, objects,
-		       this->bbox_conf_thresh_, scale, frame.cols, frame.rows);
+	decode_outputs(net_pred, this->num_array_, objects, this->bbox_conf_thresh_, scale,
+		       frame.cols, frame.rows);
 	return objects;
 }
 

diff --git a/src/edgeyolo/edgeyolo_onnxruntime.hpp b/src/edgeyolo/edgeyolo_onnxruntime.hpp
@@ -17,9 +17,8 @@ namespace edgeyolo_cpp {
 class EdgeYOLOONNXRuntime : public AbcEdgeYOLO {
 public:
 	EdgeYOLOONNXRuntime(file_name_t path_to_model, int intra_op_num_threads,
-			    int inter_op_num_threads = 1,
-			    const std::string &use_gpu_ = "", int device_id = 0,
-			    bool use_parallel = false, float nms_th = 0.45f,
+			    int inter_op_num_threads = 1, const std::string &use_gpu_ = "",
+			    int device_id = 0, bool use_parallel = false, float nms_th = 0.45f,
 			    float conf_th = 0.3f, int num_classes = 80);
 	std::vector<Object> inference(const cv::Mat &frame) override;