Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disabled conversion to float of model's input #25555

Merged
merged 4 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions modules/dnn/src/cuda/concat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void concat<__half>(const Stream&, TensorSpan<__half>, std::size_t, TensorView<__half>, std::size_t);
#endif
template void concat<float>(const Stream&, TensorSpan<float>, std::size_t, TensorView<float>, std::size_t);
template void concat<int8_t>(const Stream&, TensorSpan<int8_t>, std::size_t, TensorView<int8_t>, std::size_t);
template void concat<uint8_t>(const Stream&, TensorSpan<uint8_t>, std::size_t, TensorView<uint8_t>, std::size_t);
template void concat<int32_t>(const Stream&, TensorSpan<int32_t>, std::size_t, TensorView<int32_t>, std::size_t);
template void concat<int64_t>(const Stream&, TensorSpan<int64_t>, std::size_t, TensorView<int64_t>, std::size_t);

Expand Down Expand Up @@ -277,6 +279,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void concat_with_offsets(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::size_t>);
#endif
template void concat_with_offsets(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::size_t>);
template void concat_with_offsets(const Stream&, TensorSpan<int8_t>, TensorView<int8_t>, std::vector<std::size_t>);
template void concat_with_offsets(const Stream&, TensorSpan<uint8_t>, TensorView<uint8_t>, std::vector<std::size_t>);
template void concat_with_offsets(const Stream&, TensorSpan<int32_t>, TensorView<int32_t>, std::vector<std::size_t>);
template void concat_with_offsets(const Stream&, TensorSpan<int64_t>, TensorView<int64_t>, std::vector<std::size_t>);

Expand Down
20 changes: 20 additions & 0 deletions modules/dnn/src/cuda/eltwise_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,26 @@ void eltwise_fmod_2(const Stream& stream, TensorSpan<T> output, TensorView<T> x,
template void eltwise_max_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);
template void eltwise_min_2(const Stream& stream, TensorSpan<float> output, TensorView<float> x, TensorView<float> y);

template void eltwise_mod_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_fmod_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_sub_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_div_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_prod_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<int8_t>, int8_t, TensorView<int8_t>, int8_t, TensorView<int8_t>);
template void eltwise_sum_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_max_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);
template void eltwise_min_2(const Stream& stream, TensorSpan<int8_t> output, TensorView<int8_t> x, TensorView<int8_t> y);

template void eltwise_mod_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_fmod_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_sub_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_div_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_prod_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_sum_coeff_2(const Stream&, TensorSpan<uint8_t>, uint8_t, TensorView<uint8_t>, uint8_t, TensorView<uint8_t>);
template void eltwise_sum_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_max_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);
template void eltwise_min_2(const Stream& stream, TensorSpan<uint8_t> output, TensorView<uint8_t> x, TensorView<uint8_t> y);

template void eltwise_mod_2(const Stream& stream, TensorSpan<int32_t> output, TensorView<int32_t> x, TensorView<int32_t> y);
template void eltwise_fmod_2(const Stream& stream, TensorSpan<int32_t> output, TensorView<int32_t> x, TensorView<int32_t> y);
template void eltwise_sub_2(const Stream& stream, TensorSpan<int32_t> output, TensorView<int32_t> x, TensorView<int32_t> y);
Expand Down
4 changes: 4 additions & 0 deletions modules/dnn/src/cuda/fill_copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void fill(const Stream&, Span<__half>, __half);
#endif
template void fill(const Stream&, Span<float>, float);
template void fill(const Stream&, Span<int8_t>, int8_t);
template void fill(const Stream&, Span<uint8_t>, uint8_t);
template void fill(const Stream&, Span<int>, int);
template void fill(const Stream&, Span<int64_t>, int64_t);

Expand Down Expand Up @@ -95,6 +97,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void copy(const Stream&, Span<__half>, View<__half>);
#endif
template void copy(const Stream&, Span<float>, View<float>);
template void copy(const Stream&, Span<int8_t>, View<int8_t>);
template void copy(const Stream&, Span<uint8_t>, View<uint8_t>);
template void copy(const Stream&, Span<int32_t>, View<int32_t>);
template void copy(const Stream&, Span<int64_t>, View<int64_t>);

Expand Down
14 changes: 14 additions & 0 deletions modules/dnn/src/cuda/limits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace de
__device__ static float lowest() { return -FLT_MAX; }
};

template <>
struct numeric_limits<signed char> {
__device__ static signed char min() { return 1; }
__device__ static signed char max() { return SCHAR_MAX; }
__device__ static signed char lowest() { return SCHAR_MIN; }
};

template <>
struct numeric_limits<unsigned char> {
__device__ static unsigned char min() { return 1; }
__device__ static unsigned char max() { return UCHAR_MAX; }
__device__ static unsigned char lowest() { return 0; }
};

template <>
struct numeric_limits<int32_t> {
__device__ static int32_t min() { return 1; }
Expand Down
40 changes: 40 additions & 0 deletions modules/dnn/src/cuda/max_unpooling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,26 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_pooling_with_indices(const Stream&,
TensorSpan<int8_t>, TensorSpan<int32_t>, TensorView<int8_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_pooling_with_indices(const Stream&,
TensorSpan<int8_t>, TensorSpan<int64_t>, TensorView<int8_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_pooling_with_indices(const Stream&,
TensorSpan<uint8_t>, TensorSpan<int32_t>, TensorView<uint8_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_pooling_with_indices(const Stream&,
TensorSpan<uint8_t>, TensorSpan<int64_t>, TensorView<uint8_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_pooling_with_indices(const Stream&,
TensorSpan<int32_t>, TensorSpan<int32_t>, TensorView<int32_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
Expand Down Expand Up @@ -365,6 +385,26 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_unpooling(const Stream&,
TensorSpan<int8_t>, TensorView<int8_t>, TensorView<int32_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_unpooling(const Stream&,
TensorSpan<int8_t>, TensorView<int8_t>, TensorView<int64_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_unpooling(const Stream&,
TensorSpan<uint8_t>, TensorView<uint8_t>, TensorView<int32_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_unpooling(const Stream&,
TensorSpan<uint8_t>, TensorView<uint8_t>, TensorView<int64_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
const std::vector<std::size_t>&);

template void max_unpooling(const Stream&,
TensorSpan<int32_t>, TensorView<int32_t>, TensorView<int32_t>,
const std::vector<std::size_t>&, const std::vector<std::size_t>&,
Expand Down
2 changes: 2 additions & 0 deletions modules/dnn/src/cuda/padding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void copy_with_reflection101(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
#endif
template void copy_with_reflection101(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
template void copy_with_reflection101(const Stream&, TensorSpan<int8_t>, TensorView<int8_t>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
template void copy_with_reflection101(const Stream&, TensorSpan<uint8_t>, TensorView<uint8_t>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
template void copy_with_reflection101(const Stream&, TensorSpan<int32_t>, TensorView<int32_t>, std::vector<std::pair<std::size_t, std::size_t>> ranges);
template void copy_with_reflection101(const Stream&, TensorSpan<int64_t>, TensorView<int64_t>, std::vector<std::pair<std::size_t, std::size_t>> ranges);

Expand Down
4 changes: 4 additions & 0 deletions modules/dnn/src/cuda/permute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {

template void transpose(const Stream&, Span<__half>, View<__half>, std::size_t, std::size_t);
template void transpose(const Stream&, Span<float>, View<float>, std::size_t, std::size_t);
template void transpose(const Stream&, Span<int8_t>, View<int8_t>, std::size_t, std::size_t);
template void transpose(const Stream&, Span<uint8_t>, View<uint8_t>, std::size_t, std::size_t);
template void transpose(const Stream&, Span<int32_t>, View<int32_t>, std::size_t, std::size_t);
template void transpose(const Stream&, Span<int64_t>, View<int64_t>, std::size_t, std::size_t);

Expand Down Expand Up @@ -286,6 +288,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void permute(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::size_t>);
#endif
template void permute(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::size_t>);
template void permute(const Stream&, TensorSpan<int8_t>, TensorView<int8_t>, std::vector<std::size_t>);
template void permute(const Stream&, TensorSpan<uint8_t>, TensorView<uint8_t>, std::vector<std::size_t>);
template void permute(const Stream&, TensorSpan<int32_t>, TensorView<int32_t>, std::vector<std::size_t>);
template void permute(const Stream&, TensorSpan<int64_t>, TensorView<int64_t>, std::vector<std::size_t>);

Expand Down
2 changes: 2 additions & 0 deletions modules/dnn/src/cuda/slice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template void slice(const Stream&, TensorSpan<__half>, TensorView<__half>, std::vector<std::size_t>);
#endif
template void slice(const Stream&, TensorSpan<float>, TensorView<float>, std::vector<std::size_t>);
template void slice(const Stream&, TensorSpan<int8_t>, TensorView<int8_t>, std::vector<std::size_t>);
template void slice(const Stream&, TensorSpan<uint8_t>, TensorView<uint8_t>, std::vector<std::size_t>);
template void slice(const Stream&, TensorSpan<int32_t>, TensorView<int32_t>, std::vector<std::size_t>);
template void slice(const Stream&, TensorSpan<int64_t>, TensorView<int64_t>, std::vector<std::size_t>);

Expand Down
10 changes: 6 additions & 4 deletions modules/dnn/src/layer_internals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,10 @@ struct DataLayer : public Layer
for (int i = 0; i < inputsData.size(); ++i)
{
bool isFP16 = outputs[i].depth() == CV_16F;
if (inputsData[i].type() == CV_32S || inputsData[i].type() == CV_64S) {
if (inputsData[i].type() != CV_32F)
{
CV_CheckTypeEQ(outputs[i].type(), inputsData[i].type(), "");
CV_Assert(means[i] == Scalar() && scaleFactors[i] == 1.0);
CV_CheckTrue(means[i] == Scalar() && scaleFactors[i] == 1.0, "Input mean and scale are supported only for float32 input");
asmorkalov marked this conversation as resolved.
Show resolved Hide resolved
inputsData[i].copyTo(outputs[i]);
continue;
}
Expand Down Expand Up @@ -221,9 +222,10 @@ struct DataLayer : public Layer
for (int i = 0; i < inputsData.size(); ++i)
{
bool isFP16 = outputs[i].depth() == CV_16F;
if (inputsData[i].type() == CV_32S || inputsData[i].type() == CV_64S) {
if (inputsData[i].type() != CV_32F)
{
CV_CheckTypeEQ(outputs[i].type(), inputsData[i].type(), "");
CV_Assert(means[i] == Scalar() && scaleFactors[i] == 1.0);
CV_CheckTrue(means[i] == Scalar() && scaleFactors[i] == 1.0, "Input mean and scale are supported only for float32 input");
asmorkalov marked this conversation as resolved.
Show resolved Hide resolved
inputsData[i].copyTo(outputs[i]);
continue;
}
Expand Down
4 changes: 1 addition & 3 deletions modules/dnn/src/layers/nary_eltwise_layers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,7 @@ class NaryEltwiseLayerImpl CV_FINAL : public NaryEltwiseLayer
for (auto input : inputs)
{
CV_CheckTypeEQ(inputs[0], input, "All inputs should have equal types");
if (preferableTarget == DNN_TARGET_CUDA_FP16 || preferableTarget == DNN_TARGET_CUDA)
CV_CheckType(input, input == CV_32F || input == CV_32S || input == CV_64S, "Unsupported type");
else if (preferableTarget == DNN_TARGET_OPENCL_FP16)
if (preferableTarget == DNN_TARGET_OPENCL_FP16)
CV_CheckType(input, input == CV_16F || input == CV_8S || input == CV_8U || input == CV_32S || input == CV_64S, "");
else
CV_CheckType(input, input == CV_32F || input == CV_8S || input == CV_8U || input == CV_32S || input == CV_64S, "");
Expand Down
6 changes: 5 additions & 1 deletion modules/dnn/src/legacy_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
CV_Assert(haveCUDA());

#ifdef HAVE_CUDA
CV_CheckType(m.depth(), m.depth() == CV_32F || m.depth() == CV_32S || m.depth() == CV_64S, "Unsupported type for CUDA");
CV_CheckType(m.depth(), m.depth() == CV_32F || m.depth() == CV_8S || m.depth() == CV_8U || m.depth() == CV_32S || m.depth() == CV_64S, "Unsupported type for CUDA");
CV_Assert(IS_DNN_CUDA_TARGET(targetId));
switch (m.depth())
{
Expand All @@ -99,6 +99,10 @@ Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
return CUDABackendWrapperFP16::create(m);
else
return CUDABackendWrapperFP32::create(m);
case CV_8S:
return CUDABackendWrapperINT8::create(m);
case CV_8U:
return CUDABackendWrapperUINT8::create(m);
case CV_32S:
return CUDABackendWrapperINT32::create(m);
case CV_64S:
Expand Down
5 changes: 1 addition & 4 deletions modules/dnn/src/net_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ void Net::Impl::allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
Mat& inp = layers[0].outputBlobs[i];
CV_Assert(inp.total());
int type = inp.type();
if (type != CV_32S && type != CV_64S)
if (type == CV_32F)
{
type = CV_32F;
if (preferableBackend == DNN_BACKEND_OPENCV &&
Expand All @@ -562,9 +562,6 @@ void Net::Impl::allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
if (layers[0].dtype == CV_32F)
layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16F);
}
if (netWasQuantized && inp.type() == CV_8S) {
type = CV_8S;
}
}
inputShapes.push_back(shape(inp));
inputTypes.push_back(type);
Expand Down
6 changes: 5 additions & 1 deletion modules/dnn/src/net_impl_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Ptr<BackendWrapper> Net::Impl::wrap(Mat& host)
{
CV_Assert(haveCUDA());
#ifdef HAVE_CUDA
CV_CheckType(host.depth(), host.depth() == CV_32F || host.depth() == CV_32S || host.depth() == CV_64S, "Unsupported type for CUDA");
CV_CheckType(host.depth(), host.depth() == CV_32F || host.depth() == CV_8S || host.depth() == CV_8U || host.depth() == CV_32S || host.depth() == CV_64S, "Unsupported type for CUDA");
CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
switch (host.depth())
{
Expand All @@ -71,6 +71,10 @@ Ptr<BackendWrapper> Net::Impl::wrap(Mat& host)
return CUDABackendWrapperFP16::create(baseBuffer, shape);
else
return CUDABackendWrapperFP32::create(baseBuffer, shape);
case CV_8S:
return CUDABackendWrapperINT8::create(baseBuffer, shape);
case CV_8U:
return CUDABackendWrapperUINT8::create(baseBuffer, shape);
case CV_32S:
return CUDABackendWrapperINT32::create(baseBuffer, shape);
case CV_64S:
Expand Down
30 changes: 23 additions & 7 deletions modules/dnn/src/onnx/onnx_graph_simplifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1704,7 +1704,7 @@ void simplifySubgraphs(opencv_onnx::GraphProto& net)
simplifySubgraphs(Ptr<ImportGraphWrapper>(new ONNXGraphWrapper(net)), subgraphs);
}

Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto, bool uint8ToInt8)
{
if (tensor_proto.raw_data().empty() && tensor_proto.float_data().empty() &&
tensor_proto.double_data().empty() && tensor_proto.int64_data().empty() &&
Expand Down Expand Up @@ -1834,22 +1834,38 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
Mat(sizes, CV_64SC1, (void*)src).copyTo(blob);
}
}
else if (datatype == opencv_onnx::TensorProto_DataType_INT8 ||
datatype == opencv_onnx::TensorProto_DataType_UINT8)
else if (datatype == opencv_onnx::TensorProto_DataType_INT8)
{
if (!tensor_proto.int32_data().empty())
{
const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
Mat(sizes, CV_32SC1, (void*)field.data()).convertTo(blob, CV_8S);
}
else
{
char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
Mat(sizes, CV_8S, val).copyTo(blob);
}
}
else if (datatype == opencv_onnx::TensorProto_DataType_UINT8)
{
// TODO : Add support for uint8 weights and acitvations. For now, converting uint8 tensors to int8.
int offset = datatype == opencv_onnx::TensorProto_DataType_INT8 ? 0 : -128;
int depth = datatype == opencv_onnx::TensorProto_DataType_INT8 ? CV_8S : CV_8U;

if (!tensor_proto.int32_data().empty())
{
const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
Mat(sizes, CV_32SC1, (void*)field.data()).convertTo(blob, CV_8S, 1.0, offset);
if (uint8ToInt8)
Mat(sizes, CV_32SC1, (void*)field.data()).convertTo(blob, CV_8S, 1, -128); // handle as ONNX quantized weight
else
Mat(sizes, CV_32SC1, (void*)field.data()).convertTo(blob, CV_8U);
}
else
{
char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
Mat(sizes, depth, val).convertTo(blob, CV_8S, 1.0, offset);
if (uint8ToInt8)
Mat(sizes, CV_8U, val).convertTo(blob, CV_8S, 1, -128); // handle as ONNX quantized weight
else
Mat(sizes, CV_8U, val).copyTo(blob);
}
}
else
Expand Down