forked from wang-xinyu/tensorrtx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
InferenceEngine.h
executable file
·76 lines (59 loc) · 2.27 KB
/
InferenceEngine.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/**************************************************************************
* Handle memory pre-alloc
* both on host(pinned memory, allow CUDA DMA) & device
*************************************************************************/
#pragma once
#include <thread>
#include <chrono>
#include <memory>
#include <functional>
#include <opencv2/opencv.hpp>
#include "utils.h"
#include "holder.h"
#include "logging.h"
#include "NvInfer.h"
#include "cuda_runtime_api.h"
static Logger gLogger;
namespace trt {
struct EngineConfig {
const char* input_name;
const char* output_name;
std::shared_ptr<char> trtModelStream;
int max_batch_size; /* create engine */
int input_h;
int input_w;
int output_size;
int stream_size;
int device_id;
};
class InferenceEngine {
public:
InferenceEngine(const EngineConfig &enginecfg);
InferenceEngine(InferenceEngine &&other) noexcept;
~InferenceEngine();
InferenceEngine(const InferenceEngine &) = delete;
InferenceEngine& operator=(const InferenceEngine &) = delete;
InferenceEngine& operator=(InferenceEngine && other) = delete;
bool doInference(const int inference_batch_size, std::function<void(float*)> preprocessing);
float* getOutput() { return _prob; }
std::thread::id getThreadID() { return std::this_thread::get_id(); }
private:
EngineConfig _engineCfg;
float* _data{nullptr};
float* _prob{nullptr};
// Pointers to input and output device buffers to pass to engine.
// Engine requires exactly IEngine::getNbBindings() number of buffers.
void* _buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
int _inputIndex;
int _outputIndex;
int _inputSize;
int _outputSize;
static constexpr std::size_t _depth{sizeof(float)};
TensorRTHolder<nvinfer1::IRuntime> _runtime{nullptr};
TensorRTHolder<nvinfer1::ICudaEngine> _engine{nullptr};
TensorRTHolder<nvinfer1::IExecutionContext> _context{nullptr};
std::shared_ptr<cudaStream_t> _streamptr;
};
}