opencv/modules/dnn/src/layer.cpp
alexlyulkov 1d1faaabef
Merge pull request #24411 from alexlyulkov:al/dnn-type-inference
Added int32, int64 support and type inference to dnn #24411

**Added a type inference to dnn similar to the shape inference, added int32 and int64 support.**

- Added getTypes method for layers that calculates layer outputs types and internals types from inputs types (Similar to getMemoryShapes). By default outputs and internals types = input[0] type
- Added type inference pipeline similar to shape inference pipeline. LayersShapes struct (that is used in shape inference pipeline) now contains both shapes and types
- All layers output blobs are now allocated using the calculated types from the type inference.
- Inputs and constants with int32 and int64 types are not automatically converted into float32 now.
- Added int32 and int64 support for all the layers with indexing and for all the layers required in tests.

Added  int32 and int64 support for CUDA:
- Added host<->device data moving for int32 and int64
- Added int32 and int64 support for several layers (just slightly modified CUDA C++ templates)

Passed all the accuracy tests on CPU, OCL, OCL_FP16, CUDA, CUDA_FP16. (except RAFT model)

**CURRENT PROBLEMS**:
-  ONNX parser always converts int64 constants and layers attributes to int32, so some models with int64 constants doesn't work (e.g. RAFT). The solution is to disable int64->int32 conversion and fix attributes reading in a lot of ONNX layers parsers (https://github.com/opencv/opencv/issues/25102)
- I didn't add type inference and int support to VULCAN, so it doesn't work at all now.
- Some layers don't support int yet, so some unknown models may not work.

**CURRENT WORKAROUNDS**:
- CPU arg_layer indides are implemented in int32 followed by a int32->int64 conversion (the master branch has the same workaround with int32->float conversion)
- CPU and OCL pooling_layer indices are implemented in float followed by a float->int64 conversion
- CPU gather_layer indices are implemented in int32, so int64 indices are converted to int32 (the master branch has the same workaround with float->int32 conversion)

**DISABLED TESTS**:
- RAFT model

**REMOVED TESTS**:
- Greater_input_dtype_int64 (because it doesn't fit ONNX rules, the whole test is just comparing float tensor with int constant)

**TODO IN NEXT PULL REQUESTS**:
- Add int64 support for ONNX parser
- Add int support for more layers
- Add int support for OCL (currently int layers just run on CPU)
- Add int tests
- Add int support for other backends
2024-03-01 17:07:38 +03:00

281 lines
8.4 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
namespace cv {
namespace dnn {
CV__DNN_INLINE_NS_BEGIN
Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
Layer::Layer(const LayerParams& params)
: blobs(params.blobs)
, name(params.name)
, type(params.type)
{
preferableTarget = DNN_TARGET_CPU;
}
void Layer::setParamsFrom(const LayerParams& params)
{
blobs = params.blobs;
name = params.name;
type = params.type;
}
int Layer::inputNameToIndex(String)
{
return -1;
}
int Layer::outputNameToIndex(const String&)
{
return 0;
}
bool Layer::supportBackend(int backendId)
{
return backendId == DNN_BACKEND_OPENCV;
}
Ptr<BackendNode> Layer::initCUDA(
void*,
const std::vector<Ptr<BackendWrapper>>&,
const std::vector<Ptr<BackendWrapper>>&)
{
CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + " layers is not defined.");
return Ptr<BackendNode>();
}
Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs,
std::vector<Ptr<BackendWrapper> > &outputs)
{
CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + " layers is not defined.");
return Ptr<BackendNode>();
}
Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
{
CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined.");
return Ptr<BackendNode>();
}
Ptr<BackendNode> Layer::initWebnn(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
{
CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + " layers is not defined.");
return Ptr<BackendNode>();
}
Ptr<BackendNode> Layer::initTimVX(void* timVxInfo,
const std::vector<Ptr<BackendWrapper> > & inputsWrapper,
const std::vector<Ptr<BackendWrapper> > & outputsWrapper,
bool isLast)
{
CV_Error(Error::StsNotImplemented, "TimVX pipeline of " + type +
" layers is not defined.");
return Ptr<BackendNode>();
}
Ptr<BackendNode> Layer::initCann(const std::vector<Ptr<BackendWrapper> > &inputs,
const std::vector<Ptr<BackendWrapper> > &outputs,
const std::vector<Ptr<BackendNode> >& nodes)
{
CV_Error(Error::StsNotImplemented, "CANN pipeline of " + type + " layers is not defined.");
return Ptr<BackendNode>();
}
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
void Layer::getScaleShift(Mat& scale, Mat& shift) const
{
scale = Mat();
shift = Mat();
}
void Layer::getScaleZeropoint(float& scale, int& zeropoint) const
{
scale = 1.f;
zeropoint = 0;
}
void Layer::unsetAttached()
{
setActivation(Ptr<ActivationLayer>());
}
template <typename T>
static void vecToPVec(const std::vector<T>& v, std::vector<T*>& pv)
{
pv.resize(v.size());
for (size_t i = 0; i < v.size(); i++)
pv[i] = const_cast<T*>(&v[i]);
}
void Layer::finalize(const std::vector<Mat>& inputs, std::vector<Mat>& outputs)
{
CV_TRACE_FUNCTION();
this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
}
void Layer::finalize(const std::vector<Mat*>& input, std::vector<Mat>& output)
{
CV_UNUSED(input);
CV_UNUSED(output);
}
void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
{
CV_TRACE_FUNCTION();
std::vector<Mat> inputs, outputs;
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
std::vector<Mat*> inputsp;
vecToPVec(inputs, inputsp);
this->finalize(inputsp, outputs);
}
std::vector<Mat> Layer::finalize(const std::vector<Mat>& inputs)
{
CV_TRACE_FUNCTION();
std::vector<Mat> outputs;
this->finalize(inputs, outputs);
return outputs;
}
void Layer::forward(std::vector<Mat*>& input, std::vector<Mat>& output, std::vector<Mat>& internals)
{
// We kept this method for compatibility. DNN calls it now only to support users' implementations.
}
void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
}
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
{
CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16F)
{
std::vector<UMat> inputs;
std::vector<UMat> outputs;
std::vector<UMat> internals;
std::vector<UMat> orig_inputs;
std::vector<UMat> orig_outputs;
std::vector<UMat> orig_internals;
inputs_arr.getUMatVector(orig_inputs);
outputs_arr.getUMatVector(orig_outputs);
internals_arr.getUMatVector(orig_internals);
inputs.resize(orig_inputs.size());
for (size_t i = 0; i < orig_inputs.size(); i++)
if (orig_inputs[i].depth() == CV_16F)
orig_inputs[i].convertTo(inputs[i], CV_32F);
else
inputs[i] = orig_inputs[i];
outputs.resize(orig_outputs.size());
for (size_t i = 0; i < orig_outputs.size(); i++)
if (orig_outputs[i].depth() == CV_16F)
outputs[i].create(shape(orig_outputs[i]), CV_32F);
else
outputs[i] = orig_outputs[i];
internals.resize(orig_internals.size());
for (size_t i = 0; i < orig_internals.size(); i++)
if (orig_internals[i].depth() == CV_16F)
internals[i].create(shape(orig_internals[i]), CV_32F);
else
internals[i] = orig_internals[i];
forward(inputs, outputs, internals);
for (size_t i = 0; i < outputs.size(); i++)
if (orig_outputs[i].depth() == CV_16F)
outputs[i].convertTo(orig_outputs[i], CV_16F);
else
outputs[i] = orig_outputs[i];
// sync results back
outputs_arr.assign(orig_outputs);
internals_arr.assign(orig_internals);
return;
}
std::vector<Mat> inpvec;
std::vector<Mat> outputs;
std::vector<Mat> internals;
inputs_arr.getMatVector(inpvec);
outputs_arr.getMatVector(outputs);
internals_arr.getMatVector(internals);
std::vector<Mat*> inputs(inpvec.size());
for (int i = 0; i < inpvec.size(); i++)
inputs[i] = &inpvec[i];
this->forward(inputs, outputs, internals);
// sync results back
outputs_arr.assign(outputs);
internals_arr.assign(internals);
}
void Layer::run(const std::vector<Mat>& inputs, std::vector<Mat>& outputs, std::vector<Mat>& internals)
{
CV_TRACE_FUNCTION();
this->finalize(inputs, outputs);
this->forward(inputs, outputs, internals);
}
Layer::~Layer() {}
bool Layer::getMemoryShapes(const std::vector<MatShape>& inputs,
const int requiredOutputs,
std::vector<MatShape>& outputs,
std::vector<MatShape>& internals) const
{
CV_Assert(inputs.size());
outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
return false;
}
void Layer::getTypes(const std::vector<MatType>&inputs,
const int requiredOutputs,
const int requiredInternals,
std::vector<MatType>&outputs,
std::vector<MatType>&internals) const
{
CV_Assert(inputs.size());
for (auto input : inputs)
if (preferableTarget == DNN_TARGET_CUDA_FP16 || preferableTarget == DNN_TARGET_CUDA)
CV_CheckTypeEQ(input, CV_32F, "");
else if (preferableTarget == DNN_TARGET_OPENCL_FP16)
CV_CheckType(input, input == CV_16F || input == CV_8S, "");
else
CV_CheckType(input, input == CV_32F || input == CV_8S, "");
outputs.assign(requiredOutputs, inputs[0]);
internals.assign(requiredInternals, inputs[0]);
}
bool Layer::updateMemoryShapes(const std::vector<MatShape>& inputs)
{
return true;
}
CV__DNN_INLINE_NS_END
}} // namespace cv::dnn