mirror of
https://github.com/opencv/opencv.git
synced 2024-12-16 10:29:11 +08:00
efc9837df1
DNN: avoid CV_16S usage for FP16 #24892 **Merge after**: #24918 TODO: - [x] measure performance changes - [x] optimize convertTo for OpenCL: #24918 12700K iGPU: |Name of Test|0|1|1 vs 0 (x-factor)| |---|:-:|:-:|:-:| |AlexNet::DNNTestNetwork::OCV/OCL_FP16|7.441|7.480|0.99| |CRNN::DNNTestNetwork::OCV/OCL_FP16|10.776|10.736|1.00| |DenseNet_121::DNNTestNetwork::OCV/OCL_FP16|52.762|52.833|1.00| |EAST_text_detection::DNNTestNetwork::OCV/OCL_FP16|60.694|60.721|1.00| |EfficientNet::DNNTestNetwork::OCV/OCL_FP16|33.373|33.173|1.01| |FastNeuralStyle_eccv16::DNNTestNetwork::OCV/OCL_FP16|81.840|81.724|1.00| |GoogLeNet::DNNTestNetwork::OCV/OCL_FP16|20.965|20.927|1.00| |Inception_5h::DNNTestNetwork::OCV/OCL_FP16|22.204|22.173|1.00| |Inception_v2_SSD_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|47.115|47.460|0.99| |MPHand::DNNTestNetwork::OCV/OCL_FP16|6.760|6.670|1.01| |MPPalm::DNNTestNetwork::OCV/OCL_FP16|10.188|10.171|1.00| |MPPose::DNNTestNetwork::OCV/OCL_FP16|12.510|12.561|1.00| |MobileNet_SSD_Caffe::DNNTestNetwork::OCV/OCL_FP16|17.290|17.072|1.01| |MobileNet_SSD_v1_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|19.473|19.306|1.01| |MobileNet_SSD_v2_TensorFlow::DNNTestNetwork::OCV/OCL_FP16|22.874|23.404|0.98| |OpenFace::DNNTestNetwork::OCV/OCL_FP16|9.568|9.517|1.01| |OpenPose_pose_mpi_faster_4_stages::DNNTestNetwork::OCV/OCL_FP16|539.899|539.845|1.00| |PPHumanSeg::DNNTestNetwork::OCV/OCL_FP16|18.015|18.769|0.96| |PPOCRv3::DNNTestNetwork::OCV/OCL_FP16|63.122|63.540|0.99| |ResNet_50::DNNTestNetwork::OCV/OCL_FP16|34.947|34.925|1.00| |SFace::DNNTestNetwork::OCV/OCL_FP16|10.249|10.206|1.00| |SSD::DNNTestNetwork::OCV/OCL_FP16|213.068|213.108|1.00| |SqueezeNet_v1_1::DNNTestNetwork::OCV/OCL_FP16|4.867|4.878|1.00| |VIT_B_32::DNNTestNetwork::OCV/OCL_FP16|200.563|190.788|1.05| |VitTrack::DNNTestNetwork::OCV/OCL_FP16|7.528|7.173|1.05| |YOLOX::DNNTestNetwork::OCV/OCL_FP16|132.858|132.701|1.00| |YOLOv3::DNNTestNetwork::OCV/OCL_FP16|209.559|208.809|1.00| |YOLOv4::DNNTestNetwork::OCV/OCL_FP16|221.357|220.924|1.00| |YOLOv4_tiny::DNNTestNetwork::OCV/OCL_FP16|24.446|24.382|1.00| |YOLOv5::DNNTestNetwork::OCV/OCL_FP16|43.922|44.080|1.00| |YOLOv8::DNNTestNetwork::OCV/OCL_FP16|64.159|63.842|1.00| |YuNet::DNNTestNetwork::OCV/OCL_FP16|10.177|10.231|0.99| |opencv_face_detector::DNNTestNetwork::OCV/OCL_FP16|15.121|15.445|0.98| Co-authored-by: Alexander Alekhin <alexander.a.alekhin@gmail.com>
267 lines
7.6 KiB
C++
267 lines
7.6 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
#include "precomp.hpp"
|
|
|
|
namespace cv {
|
|
namespace dnn {
|
|
CV__DNN_INLINE_NS_BEGIN
|
|
|
|
|
|
Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
|
|
|
|
Layer::Layer(const LayerParams& params)
|
|
: blobs(params.blobs)
|
|
, name(params.name)
|
|
, type(params.type)
|
|
{
|
|
preferableTarget = DNN_TARGET_CPU;
|
|
}
|
|
|
|
void Layer::setParamsFrom(const LayerParams& params)
|
|
{
|
|
blobs = params.blobs;
|
|
name = params.name;
|
|
type = params.type;
|
|
}
|
|
|
|
int Layer::inputNameToIndex(String)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
int Layer::outputNameToIndex(const String&)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
bool Layer::supportBackend(int backendId)
|
|
{
|
|
return backendId == DNN_BACKEND_OPENCV;
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initCUDA(
|
|
void*,
|
|
const std::vector<Ptr<BackendWrapper>>&,
|
|
const std::vector<Ptr<BackendWrapper>>&)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper> > &inputs,
|
|
std::vector<Ptr<BackendWrapper> > &outputs)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper>>&)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initWebnn(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initTimVX(void* timVxInfo,
|
|
const std::vector<Ptr<BackendWrapper> > & inputsWrapper,
|
|
const std::vector<Ptr<BackendWrapper> > & outputsWrapper,
|
|
bool isLast)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "TimVX pipeline of " + type +
|
|
" layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::initCann(const std::vector<Ptr<BackendWrapper> > &inputs,
|
|
const std::vector<Ptr<BackendWrapper> > &outputs,
|
|
const std::vector<Ptr<BackendNode> >& nodes)
|
|
{
|
|
CV_Error(Error::StsNotImplemented, "CANN pipeline of " + type + " layers is not defined.");
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
|
|
{
|
|
return Ptr<BackendNode>();
|
|
}
|
|
|
|
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
|
|
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
|
|
void Layer::getScaleShift(Mat& scale, Mat& shift) const
|
|
{
|
|
scale = Mat();
|
|
shift = Mat();
|
|
}
|
|
|
|
void Layer::getScaleZeropoint(float& scale, int& zeropoint) const
|
|
{
|
|
scale = 1.f;
|
|
zeropoint = 0;
|
|
}
|
|
|
|
void Layer::unsetAttached()
|
|
{
|
|
setActivation(Ptr<ActivationLayer>());
|
|
}
|
|
|
|
template <typename T>
|
|
static void vecToPVec(const std::vector<T>& v, std::vector<T*>& pv)
|
|
{
|
|
pv.resize(v.size());
|
|
for (size_t i = 0; i < v.size(); i++)
|
|
pv[i] = const_cast<T*>(&v[i]);
|
|
}
|
|
|
|
void Layer::finalize(const std::vector<Mat>& inputs, std::vector<Mat>& outputs)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
|
|
}
|
|
|
|
void Layer::finalize(const std::vector<Mat*>& input, std::vector<Mat>& output)
|
|
{
|
|
CV_UNUSED(input);
|
|
CV_UNUSED(output);
|
|
}
|
|
|
|
void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
std::vector<Mat> inputs, outputs;
|
|
inputs_arr.getMatVector(inputs);
|
|
outputs_arr.getMatVector(outputs);
|
|
|
|
std::vector<Mat*> inputsp;
|
|
vecToPVec(inputs, inputsp);
|
|
this->finalize(inputsp, outputs);
|
|
}
|
|
|
|
std::vector<Mat> Layer::finalize(const std::vector<Mat>& inputs)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
|
|
std::vector<Mat> outputs;
|
|
this->finalize(inputs, outputs);
|
|
return outputs;
|
|
}
|
|
|
|
void Layer::forward(std::vector<Mat*>& input, std::vector<Mat>& output, std::vector<Mat>& internals)
|
|
{
|
|
// We kept this method for compatibility. DNN calls it now only to support users' implementations.
|
|
}
|
|
|
|
void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
|
}
|
|
|
|
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
|
|
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16F)
|
|
{
|
|
std::vector<UMat> inputs;
|
|
std::vector<UMat> outputs;
|
|
std::vector<UMat> internals;
|
|
|
|
std::vector<UMat> orig_inputs;
|
|
std::vector<UMat> orig_outputs;
|
|
std::vector<UMat> orig_internals;
|
|
|
|
inputs_arr.getUMatVector(orig_inputs);
|
|
outputs_arr.getUMatVector(orig_outputs);
|
|
internals_arr.getUMatVector(orig_internals);
|
|
|
|
inputs.resize(orig_inputs.size());
|
|
for (size_t i = 0; i < orig_inputs.size(); i++)
|
|
orig_inputs[i].convertTo(inputs[i], CV_32F);
|
|
|
|
outputs.resize(orig_outputs.size());
|
|
for (size_t i = 0; i < orig_outputs.size(); i++)
|
|
outputs[i].create(shape(orig_outputs[i]), CV_32F);
|
|
|
|
internals.resize(orig_internals.size());
|
|
for (size_t i = 0; i < orig_internals.size(); i++)
|
|
internals[i].create(shape(orig_internals[i]), CV_32F);
|
|
|
|
forward(inputs, outputs, internals);
|
|
|
|
for (size_t i = 0; i < outputs.size(); i++)
|
|
outputs[i].convertTo(orig_outputs[i], CV_16F);
|
|
|
|
// sync results back
|
|
outputs_arr.assign(orig_outputs);
|
|
internals_arr.assign(orig_internals);
|
|
return;
|
|
}
|
|
std::vector<Mat> inpvec;
|
|
std::vector<Mat> outputs;
|
|
std::vector<Mat> internals;
|
|
|
|
inputs_arr.getMatVector(inpvec);
|
|
outputs_arr.getMatVector(outputs);
|
|
internals_arr.getMatVector(internals);
|
|
|
|
std::vector<Mat*> inputs(inpvec.size());
|
|
for (int i = 0; i < inpvec.size(); i++)
|
|
inputs[i] = &inpvec[i];
|
|
|
|
this->forward(inputs, outputs, internals);
|
|
|
|
// sync results back
|
|
outputs_arr.assign(outputs);
|
|
internals_arr.assign(internals);
|
|
}
|
|
|
|
void Layer::run(const std::vector<Mat>& inputs, std::vector<Mat>& outputs, std::vector<Mat>& internals)
|
|
{
|
|
CV_TRACE_FUNCTION();
|
|
|
|
this->finalize(inputs, outputs);
|
|
this->forward(inputs, outputs, internals);
|
|
}
|
|
|
|
bool Layer::tryQuantize(const std::vector<std::vector<float>>& scales,
|
|
const std::vector<std::vector<int>>& zeropoints, LayerParams& params)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
Layer::~Layer() {}
|
|
|
|
bool Layer::getMemoryShapes(const std::vector<MatShape>& inputs,
|
|
const int requiredOutputs,
|
|
std::vector<MatShape>& outputs,
|
|
std::vector<MatShape>& internals) const
|
|
{
|
|
CV_Assert(inputs.size());
|
|
outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
|
|
return false;
|
|
}
|
|
|
|
bool Layer::updateMemoryShapes(const std::vector<MatShape>& inputs)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
CV__DNN_INLINE_NS_END
|
|
}} // namespace cv::dnn
|