Skip to content

Commit

Permalink
set default FP16 and update GELAN
Browse files Browse the repository at this point in the history
  • Loading branch information
WuxinrongY committed Apr 23, 2024
1 parent d033a63 commit fdd1fc2
Show file tree
Hide file tree
Showing 7 changed files with 387 additions and 11 deletions.
23 changes: 17 additions & 6 deletions yolov9/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,21 @@ The Pytorch implementation is [WongKinYiu/yolov9](https://github.com/WongKinYiu/

## Progress
- [x] YOLOv9-c:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] FP32
- [x] FP16
- [x] INT8
- [x] YOLOv9-e:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] FP32
- [x] FP16
- [x] INT8
- [x] GELAN-c:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] GELAN-e:
- [x] FP32
- [x] FP16
- [x] INT8

## Requirements

Expand All @@ -32,7 +40,10 @@ The speed test is done on a desktop with R7-5700G CPU and RTX 4060Ti GPU. The in
| tensorrt | YOLOv9-c | 13.5ms | 4.6ms | 3.0ms |
| tensorrt | YOLOv9-e | 8.3ms | 3.2ms | 2.15ms |

**GELAN will be updated later.**

YOLOv9-e is faster than YOLOv9-c in tensorrt, because the YOLOv9-e requires fewer layers of inference.

```
YOLOv9-c:
[[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]] # [A3, A4, A5, P3, P4, P5]
Expand Down
11 changes: 8 additions & 3 deletions yolov9/demo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@ void serialize_engine(unsigned int max_batchsize, std::string& wts_name, std::st
serialized_engine = build_engine_yolov9_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else if (sub_type == "c") {
serialized_engine = build_engine_yolov9_c(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else {
} else if (sub_type == "ge") {
serialized_engine = build_engine_gelan_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else if (sub_type == "gc") {
serialized_engine = build_engine_gelan_c(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
}
else {
return;
}
assert(serialized_engine != nullptr);
Expand Down Expand Up @@ -113,15 +118,15 @@ int main(int argc, char** argv) {

std::string wts_name = "";
std::string engine_name = "";
std::string img_dir;
std::string img_dir = "";
std::string sub_type = "";
// speed test or inference
// const int speed_test_iter = 1000;
const int speed_test_iter = 1;

if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type)) {
std::cerr << "Arguments not right!" << std::endl;
std::cerr << "./yolov9 -s [.wts] [.engine] [c/e] // serialize model to plan file" << std::endl;
std::cerr << "./yolov9 -s [.wts] [.engine] [c/e/gc/ge] // serialize model to plan file" << std::endl;
std::cerr << "./yolov9 -d [.engine] ../samples // deserialize plan file and run inference" << std::endl;
return -1;
}
Expand Down
3 changes: 3 additions & 0 deletions yolov9/include/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std
nvinfer1::ILayer* convBnNoAct(nvinfer1::INetworkDefinition* network,
std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input, int ch,
int k, int s, int p, std::string lname, int g);
std::vector<IConcatenationLayer*> DDetect(INetworkDefinition* network, std::map<std::string, Weights>& weightMap,
std::vector<ILayer*> dets, int cls, std::vector<int> ch,
std::string lname);
2 changes: 1 addition & 1 deletion yolov9/include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

// For INT8, you need prepare the calibration dataset, please refer to
// https://github.com/wang-xinyu/tensorrtx/tree/master/yolov5#int8-quantization
#define USE_INT8 // set USE_INT8 or USE_FP16 or USE_FP32
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32

#ifdef USE_INT8
const static char* gCalibTablePath = "./calib";
Expand Down
4 changes: 3 additions & 1 deletion yolov9/include/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
#include <NvInfer.h>
#include <string>
nvinfer1::IHostMemory* build_engine_yolov9_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, std::string& wts_name);
nvinfer1::IHostMemory* build_engine_yolov9_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, std::string& wts_name);
nvinfer1::IHostMemory* build_engine_yolov9_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, std::string& wts_name);
nvinfer1::IHostMemory* build_engine_gelan_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, std::string& wts_name);
nvinfer1::IHostMemory* build_engine_gelan_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt, std::string& wts_name);
33 changes: 33 additions & 0 deletions yolov9/src/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,36 @@ std::vector<IConcatenationLayer*> DualDDetect(INetworkDefinition* network, std::
}
return ret;
}

std::vector<IConcatenationLayer*> DDetect(INetworkDefinition* network, std::map<std::string, Weights>& weightMap,
std::vector<ILayer*> dets, int cls, std::vector<int> ch,
std::string lname) {
int c2 = std::max(int(ch[0] / 4), int(16 * 4));
int c3 = std::max(ch[0], std::min(cls * 2, 128));
int reg_max = 16;

std::vector<ILayer*> bboxlayers;
std::vector<ILayer*> clslayers;

for (int i = 0; i < dets.size(); i++) {
// Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)
bboxlayers.push_back(DetectBbox_Conv(network, weightMap, *dets[i]->getOutput(0), c2, reg_max,
lname + ".cv2." + std::to_string(i)));
// Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, self.nc, 1)
auto cls_layer = DetectCls_Conv(network, weightMap, *dets[i]->getOutput(0), c3, cls,
lname + ".cv3." + std::to_string(i));
auto dim = cls_layer->getOutput(0)->getDimensions();
nvinfer1::IShuffleLayer* shuffle = network->addShuffle(*cls_layer->getOutput(0));
shuffle->setReshapeDimensions(nvinfer1::Dims2{kNumClass, dim.d[1] * dim.d[2]});
clslayers.push_back(shuffle);
}

std::vector<IConcatenationLayer*> ret;
for (int i = 0; i < dets.size(); i++) {
// softmax 16*4, w, h => 16, 4, w, h
auto loc = DFL(network, weightMap, *bboxlayers[i]->getOutput(0), 16, 1, 1, 0, lname + ".dfl");
nvinfer1::ITensor* inputTensor[] = {loc->getOutput(0), clslayers[i]->getOutput(0)};
ret.push_back(network->addConcatenation(inputTensor, 2));
}
return ret;
}
Loading

0 comments on commit fdd1fc2

Please sign in to comment.