mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00

Added int32, int64 support and type inference to dnn #24411 **Added a type inference to dnn similar to the shape inference, added int32 and int64 support.** - Added getTypes method for layers that calculates layer outputs types and internals types from inputs types (Similar to getMemoryShapes). By default outputs and internals types = input[0] type - Added type inference pipeline similar to shape inference pipeline. LayersShapes struct (that is used in shape inference pipeline) now contains both shapes and types - All layers output blobs are now allocated using the calculated types from the type inference. - Inputs and constants with int32 and int64 types are not automatically converted into float32 now. - Added int32 and int64 support for all the layers with indexing and for all the layers required in tests. Added int32 and int64 support for CUDA: - Added host<->device data moving for int32 and int64 - Added int32 and int64 support for several layers (just slightly modified CUDA C++ templates) Passed all the accuracy tests on CPU, OCL, OCL_FP16, CUDA, CUDA_FP16. (except RAFT model) **CURRENT PROBLEMS**: - ONNX parser always converts int64 constants and layers attributes to int32, so some models with int64 constants doesn't work (e.g. RAFT). The solution is to disable int64->int32 conversion and fix attributes reading in a lot of ONNX layers parsers (https://github.com/opencv/opencv/issues/25102) - I didn't add type inference and int support to VULCAN, so it doesn't work at all now. - Some layers don't support int yet, so some unknown models may not work. **CURRENT WORKAROUNDS**: - CPU arg_layer indides are implemented in int32 followed by a int32->int64 conversion (the master branch has the same workaround with int32->float conversion) - CPU and OCL pooling_layer indices are implemented in float followed by a float->int64 conversion - CPU gather_layer indices are implemented in int32, so int64 indices are converted to int32 (the master branch has the same workaround with float->int32 conversion) **DISABLED TESTS**: - RAFT model **REMOVED TESTS**: - Greater_input_dtype_int64 (because it doesn't fit ONNX rules, the whole test is just comparing float tensor with int constant) **TODO IN NEXT PULL REQUESTS**: - Add int64 support for ONNX parser - Add int support for more layers - Add int support for OCL (currently int layers just run on CPU) - Add int tests - Add int support for other backends
281 lines
8.4 KiB
C++
281 lines
8.4 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#include "precomp.hpp"
|
|
|
|
namespace cv {
|
|
namespace dnn {
|
|
CV__DNN_INLINE_NS_BEGIN
|
|
|
|
|
|
Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
|
|
|
|
Layer::Layer(const LayerParams& params)
|
|
: blobs(params.blobs)
|
|
, name(params.name)
|
|
, type(params.type)
|
|
{
|
|
preferableTarget = DNN_TARGET_CPU;
|
|
}
|
|
|
|
void Layer::setParamsFrom(const LayerParams& params)
|
|
{
|
|
blobs = params.blobs;
|
|
name = params.name;
|
|
type = params.type;
|
|
}
|
|
|
|
int Layer::inputNameToIndex(String)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int Layer::outputNameToIndex(const String&)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
bool Layer::supportBackend(int backendId)
|
|
{
|
|
return backendId == DNN_BACKEND_OPENCV;
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initCUDA(
|
|
void*,
|
|
const std::vector<Ptr<BackendWrapper>>&,
|
|
const std::vector<Ptr<BackendWrapper>>&)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs,
|
|
std::vector<Ptr<BackendWrapper> > &outputs)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initWebnn(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initTimVX(void* timVxInfo,
|
|
const std::vector<Ptr<BackendWrapper> > & inputsWrapper,
|
|
const std::vector<Ptr<BackendWrapper> > & outputsWrapper,
|
|
bool isLast)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "TimVX pipeline of " + type +
|
|
" layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initCann(const std::vector<Ptr<BackendWrapper> > &inputs,
|
|
const std::vector<Ptr<BackendWrapper> > &outputs,
|
|
const std::vector<Ptr<BackendNode> >& nodes)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "CANN pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
|
|
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
|
|
void Layer::getScaleShift(Mat& scale, Mat& shift) const
|
|
{
|
|
scale = Mat();
|
|
shift = Mat();
|
|
}
|
|
|
|
void Layer::getScaleZeropoint(float& scale, int& zeropoint) const
|
|
{
|
|
scale = 1.f;
|
|
zeropoint = 0;
|
|
}
|
|
|
|
void Layer::unsetAttached()
|
|
{
|
|
setActivation(Ptr<ActivationLayer>());
|
|
}
|
|
|
|
template <typename T>
|
|
static void vecToPVec(const std::vector<T>& v, std::vector<T*>& pv)
|
|
{
|
|
pv.resize(v.size());
|
|
for (size_t i = 0; i < v.size(); i++)
|
|
pv[i] = const_cast<T*>(&v[i]);
|
|
}
|
|
|
|
void Layer::finalize(const std::vector<Mat>& inputs, std::vector<Mat>& outputs)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
|
|
}
|
|
|
|
void Layer::finalize(const std::vector<Mat*>& input, std::vector<Mat>& output)
|
|
{
|
|
CV_UNUSED(input);
|
|
CV_UNUSED(output);
|
|
}
|
|
|
|
void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
std::vector<Mat> inputs, outputs;
|
|
inputs_arr.getMatVector(inputs);
|
|
outputs_arr.getMatVector(outputs);
|
|
|
|
std::vector<Mat*> inputsp;
|
|
vecToPVec(inputs, inputsp);
|
|
this->finalize(inputsp, outputs);
|
|
}
|
|
|
|
std::vector<Mat> Layer::finalize(const std::vector<Mat>& inputs)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
|
|
std::vector<Mat> outputs;
|
|
this->finalize(inputs, outputs);
|
|
return outputs;
|
|
}
|
|
|
|
void Layer::forward(std::vector<Mat*>& input, std::vector<Mat>& output, std::vector<Mat>& internals)
|
|
{
|
|
// We kept this method for compatibility. DNN calls it now only to support users' implementations.
|
|
}
|
|
|
|
void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
|
}
|
|
|
|
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16F)
|
|
{
|
|
std::vector<UMat> inputs;
|
|
std::vector<UMat> outputs;
|
|
std::vector<UMat> internals;
|
|
|
|
std::vector<UMat> orig_inputs;
|
|
std::vector<UMat> orig_outputs;
|
|
std::vector<UMat> orig_internals;
|
|
|
|
inputs_arr.getUMatVector(orig_inputs);
|
|
outputs_arr.getUMatVector(orig_outputs);
|
|
internals_arr.getUMatVector(orig_internals);
|
|
|
|
inputs.resize(orig_inputs.size());
|
|
for (size_t i = 0; i < orig_inputs.size(); i++)
|
|
if (orig_inputs[i].depth() == CV_16F)
|
|
orig_inputs[i].convertTo(inputs[i], CV_32F);
|
|
else
|
|
inputs[i] = orig_inputs[i];
|
|
|
|
outputs.resize(orig_outputs.size());
|
|
for (size_t i = 0; i < orig_outputs.size(); i++)
|
|
if (orig_outputs[i].depth() == CV_16F)
|
|
outputs[i].create(shape(orig_outputs[i]), CV_32F);
|
|
else
|
|
outputs[i] = orig_outputs[i];
|
|
|
|
internals.resize(orig_internals.size());
|
|
for (size_t i = 0; i < orig_internals.size(); i++)
|
|
if (orig_internals[i].depth() == CV_16F)
|
|
internals[i].create(shape(orig_internals[i]), CV_32F);
|
|
else
|
|
internals[i] = orig_internals[i];
|
|
|
|
forward(inputs, outputs, internals);
|
|
|
|
for (size_t i = 0; i < outputs.size(); i++)
|
|
if (orig_outputs[i].depth() == CV_16F)
|
|
outputs[i].convertTo(orig_outputs[i], CV_16F);
|
|
else
|
|
outputs[i] = orig_outputs[i];
|
|
|
|
// sync results back
|
|
outputs_arr.assign(orig_outputs);
|
|
internals_arr.assign(orig_internals);
|
|
return;
|
|
}
|
|
std::vector<Mat> inpvec;
|
|
std::vector<Mat> outputs;
|
|
std::vector<Mat> internals;
|
|
|
|
inputs_arr.getMatVector(inpvec);
|
|
outputs_arr.getMatVector(outputs);
|
|
internals_arr.getMatVector(internals);
|
|
|
|
std::vector<Mat*> inputs(inpvec.size());
|
|
for (int i = 0; i < inpvec.size(); i++)
|
|
inputs[i] = &inpvec[i];
|
|
|
|
this->forward(inputs, outputs, internals);
|
|
|
|
// sync results back
|
|
outputs_arr.assign(outputs);
|
|
internals_arr.assign(internals);
|
|
}
|
|
|
|
void Layer::run(const std::vector<Mat>& inputs, std::vector<Mat>& outputs, std::vector<Mat>& internals)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
|
|
this->finalize(inputs, outputs);
|
|
this->forward(inputs, outputs, internals);
|
|
}
|
|
|
|
Layer::~Layer() {}
|
|
|
|
bool Layer::getMemoryShapes(const std::vector<MatShape>& inputs,
|
|
const int requiredOutputs,
|
|
std::vector<MatShape>& outputs,
|
|
std::vector<MatShape>& internals) const
|
|
{
|
|
CV_Assert(inputs.size());
|
|
outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
|
|
return false;
|
|
}
|
|
|
|
void Layer::getTypes(const std::vector<MatType>&inputs,
|
|
const int requiredOutputs,
|
|
const int requiredInternals,
|
|
std::vector<MatType>&outputs,
|
|
std::vector<MatType>&internals) const
|
|
{
|
|
CV_Assert(inputs.size());
|
|
for (auto input : inputs)
|
|
if (preferableTarget == DNN_TARGET_CUDA_FP16 || preferableTarget == DNN_TARGET_CUDA)
|
|
CV_CheckTypeEQ(input, CV_32F, "");
|
|
else if (preferableTarget == DNN_TARGET_OPENCL_FP16)
|
|
CV_CheckType(input, input == CV_16F || input == CV_8S, "");
|
|
else
|
|
CV_CheckType(input, input == CV_32F || input == CV_8S, "");
|
|
|
|
outputs.assign(requiredOutputs, inputs[0]);
|
|
internals.assign(requiredInternals, inputs[0]);
|
|
}
|
|
|
|
bool Layer::updateMemoryShapes(const std::vector<MatShape>& inputs)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
CV__DNN_INLINE_NS_END
|
|
}} // namespace cv::dnn
|