mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 03:30:34 +08:00
Merge pull request #21662 from alalek:dnn_split
This commit is contained in:
commit
685797f403
File diff suppressed because it is too large
Load Diff
@ -19,7 +19,44 @@ void initializeLayerFactory();
|
||||
extern bool DNN_DIAGNOSTICS_RUN;
|
||||
extern bool DNN_SKIP_REAL_IMPORT;
|
||||
|
||||
namespace detail {
|
||||
//
|
||||
// dnn_params.cpp
|
||||
//
|
||||
|
||||
/// Network dump level
|
||||
size_t getParam_DNN_NETWORK_DUMP();
|
||||
|
||||
/// This parameter is useful to run with valgrind memory errors detection
|
||||
bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS();
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES();
|
||||
#endif
|
||||
|
||||
int getParam_DNN_BACKEND_DEFAULT();
|
||||
|
||||
// Additional checks (slowdowns execution!)
|
||||
bool getParam_DNN_CHECK_NAN_INF();
|
||||
bool getParam_DNN_CHECK_NAN_INF_DUMP();
|
||||
bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR();
|
||||
|
||||
|
||||
inline namespace detail {
|
||||
|
||||
typedef std::vector<MatShape> ShapesVec;
|
||||
|
||||
struct LayerShapes
|
||||
{
|
||||
ShapesVec in, out, internal;
|
||||
// No guarantees that layer which support in-place computations
|
||||
// will be computed in-place (input.data_ptr == output.data_ptr).
|
||||
// If layer said that it could work in-place and layers after it
|
||||
// no longer use input blob, we'll set output = input.
|
||||
bool supportInPlace;
|
||||
LayerShapes() {supportInPlace = false;}
|
||||
};
|
||||
|
||||
|
||||
#define CALL_MEMBER_FN(object, ptrToMemFn) ((object).*(ptrToMemFn))
|
||||
|
||||
class NotImplemented : public Layer
|
||||
@ -82,8 +119,6 @@ struct NetImplBase
|
||||
} // namespace detail
|
||||
|
||||
|
||||
typedef std::vector<MatShape> ShapesVec;
|
||||
|
||||
static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string())
|
||||
{
|
||||
std::ostringstream ss;
|
||||
|
67
modules/dnn/src/dnn_params.cpp
Normal file
67
modules/dnn/src/dnn_params.cpp
Normal file
@ -0,0 +1,67 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "dnn_common.hpp"
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
size_t getParam_DNN_NETWORK_DUMP()
|
||||
{
|
||||
static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
|
||||
return DNN_NETWORK_DUMP;
|
||||
}
|
||||
|
||||
// this option is useful to run with valgrind memory errors detection
|
||||
bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS()
|
||||
{
|
||||
static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
|
||||
return DNN_DISABLE_MEMORY_OPTIMIZATIONS;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
|
||||
{
|
||||
static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
|
||||
return DNN_OPENCL_ALLOW_ALL_DEVICES;
|
||||
}
|
||||
#endif
|
||||
|
||||
int getParam_DNN_BACKEND_DEFAULT()
|
||||
{
|
||||
static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
(size_t)DNN_BACKEND_INFERENCE_ENGINE
|
||||
#else
|
||||
(size_t)DNN_BACKEND_OPENCV
|
||||
#endif
|
||||
);
|
||||
return PARAM_DNN_BACKEND_DEFAULT;
|
||||
}
|
||||
|
||||
// Additional checks (slowdowns execution!)
|
||||
bool getParam_DNN_CHECK_NAN_INF()
|
||||
{
|
||||
static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
|
||||
return DNN_CHECK_NAN_INF;
|
||||
}
|
||||
bool getParam_DNN_CHECK_NAN_INF_DUMP()
|
||||
{
|
||||
static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
|
||||
return DNN_CHECK_NAN_INF_DUMP;
|
||||
}
|
||||
bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()
|
||||
{
|
||||
static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
|
||||
return DNN_CHECK_NAN_INF_RAISE_ERROR;
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
93
modules/dnn/src/dnn_read.cpp
Normal file
93
modules/dnn/src/dnn_read.cpp
Normal file
@ -0,0 +1,93 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
Net readNet(const String& _model, const String& _config, const String& _framework)
|
||||
{
|
||||
String framework = toLowerCase(_framework);
|
||||
String model = _model;
|
||||
String config = _config;
|
||||
const std::string modelExt = model.substr(model.rfind('.') + 1);
|
||||
const std::string configExt = config.substr(config.rfind('.') + 1);
|
||||
if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || modelExt == "prototxt" || configExt == "prototxt")
|
||||
{
|
||||
if (modelExt == "prototxt" || configExt == "caffemodel")
|
||||
std::swap(model, config);
|
||||
return readNetFromCaffe(config, model);
|
||||
}
|
||||
if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || modelExt == "pbtxt" || configExt == "pbtxt")
|
||||
{
|
||||
if (modelExt == "pbtxt" || configExt == "pb")
|
||||
std::swap(model, config);
|
||||
return readNetFromTensorflow(model, config);
|
||||
}
|
||||
if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net")
|
||||
{
|
||||
return readNetFromTorch(model.empty() ? config : model);
|
||||
}
|
||||
if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg")
|
||||
{
|
||||
if (modelExt == "cfg" || configExt == "weights")
|
||||
std::swap(model, config);
|
||||
return readNetFromDarknet(config, model);
|
||||
}
|
||||
if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || modelExt == "xml" || configExt == "xml")
|
||||
{
|
||||
if (modelExt == "xml" || configExt == "bin")
|
||||
std::swap(model, config);
|
||||
return readNetFromModelOptimizer(config, model);
|
||||
}
|
||||
if (framework == "onnx" || modelExt == "onnx")
|
||||
{
|
||||
return readNetFromONNX(model);
|
||||
}
|
||||
CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + model + (config.empty() ? "" : ", " + config));
|
||||
}
|
||||
|
||||
Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
|
||||
const std::vector<uchar>& bufferConfig)
|
||||
{
|
||||
String framework = toLowerCase(_framework);
|
||||
if (framework == "caffe")
|
||||
return readNetFromCaffe(bufferConfig, bufferModel);
|
||||
else if (framework == "tensorflow")
|
||||
return readNetFromTensorflow(bufferModel, bufferConfig);
|
||||
else if (framework == "darknet")
|
||||
return readNetFromDarknet(bufferConfig, bufferModel);
|
||||
else if (framework == "torch")
|
||||
CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
|
||||
else if (framework == "dldt")
|
||||
return readNetFromModelOptimizer(bufferConfig, bufferModel);
|
||||
CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
|
||||
}
|
||||
|
||||
Net readNetFromModelOptimizer(const String& xml, const String& bin)
|
||||
{
|
||||
return Net::readFromModelOptimizer(xml, bin);
|
||||
}
|
||||
|
||||
Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
|
||||
{
|
||||
return Net::readFromModelOptimizer(bufferCfg, bufferModel);
|
||||
}
|
||||
|
||||
Net readNetFromModelOptimizer(
|
||||
const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
|
||||
const uchar* bufferWeightsPtr, size_t bufferWeightsSize)
|
||||
{
|
||||
return Net::readFromModelOptimizer(
|
||||
bufferModelConfigPtr, bufferModelConfigSize,
|
||||
bufferWeightsPtr, bufferWeightsSize);
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
158
modules/dnn/src/dnn_utils.cpp
Normal file
158
modules/dnn/src/dnn_utils.cpp
Normal file
@ -0,0 +1,158 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
|
||||
const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
Mat blob;
|
||||
blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
|
||||
return blob;
|
||||
}
|
||||
|
||||
void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
|
||||
const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
std::vector<Mat> images(1, image.getMat());
|
||||
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
|
||||
}
|
||||
|
||||
Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
|
||||
const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
Mat blob;
|
||||
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
|
||||
return blob;
|
||||
}
|
||||
|
||||
void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
|
||||
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
|
||||
if (ddepth == CV_8U)
|
||||
{
|
||||
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
|
||||
CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
|
||||
}
|
||||
|
||||
std::vector<Mat> images;
|
||||
images_.getMatVector(images);
|
||||
CV_Assert(!images.empty());
|
||||
for (size_t i = 0; i < images.size(); i++)
|
||||
{
|
||||
Size imgSize = images[i].size();
|
||||
if (size == Size())
|
||||
size = imgSize;
|
||||
if (size != imgSize)
|
||||
{
|
||||
if (crop)
|
||||
{
|
||||
float resizeFactor = std::max(size.width / (float)imgSize.width,
|
||||
size.height / (float)imgSize.height);
|
||||
resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
|
||||
Rect crop(Point(0.5 * (images[i].cols - size.width),
|
||||
0.5 * (images[i].rows - size.height)),
|
||||
size);
|
||||
images[i] = images[i](crop);
|
||||
}
|
||||
else
|
||||
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
|
||||
}
|
||||
if (images[i].depth() == CV_8U && ddepth == CV_32F)
|
||||
images[i].convertTo(images[i], CV_32F);
|
||||
Scalar mean = mean_;
|
||||
if (swapRB)
|
||||
std::swap(mean[0], mean[2]);
|
||||
|
||||
images[i] -= mean;
|
||||
images[i] *= scalefactor;
|
||||
}
|
||||
|
||||
size_t nimages = images.size();
|
||||
Mat image0 = images[0];
|
||||
int nch = image0.channels();
|
||||
CV_Assert(image0.dims == 2);
|
||||
if (nch == 3 || nch == 4)
|
||||
{
|
||||
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
|
||||
blob_.create(4, sz, ddepth);
|
||||
Mat blob = blob_.getMat();
|
||||
Mat ch[4];
|
||||
|
||||
for (size_t i = 0; i < nimages; i++)
|
||||
{
|
||||
const Mat& image = images[i];
|
||||
CV_Assert(image.depth() == blob_.depth());
|
||||
nch = image.channels();
|
||||
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
|
||||
CV_Assert(image.size() == image0.size());
|
||||
|
||||
for (int j = 0; j < nch; j++)
|
||||
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
|
||||
if (swapRB)
|
||||
std::swap(ch[0], ch[2]);
|
||||
split(image, ch);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(nch == 1);
|
||||
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
|
||||
blob_.create(4, sz, ddepth);
|
||||
Mat blob = blob_.getMat();
|
||||
|
||||
for (size_t i = 0; i < nimages; i++)
|
||||
{
|
||||
const Mat& image = images[i];
|
||||
CV_Assert(image.depth() == blob_.depth());
|
||||
nch = image.channels();
|
||||
CV_Assert(image.dims == 2 && (nch == 1));
|
||||
CV_Assert(image.size() == image0.size());
|
||||
|
||||
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
// A blob is a 4 dimensional matrix in floating point precision
|
||||
// blob_[0] = batchSize = nbOfImages
|
||||
// blob_[1] = nbOfChannels
|
||||
// blob_[2] = height
|
||||
// blob_[3] = width
|
||||
CV_Assert(blob_.depth() == CV_32F);
|
||||
CV_Assert(blob_.dims == 4);
|
||||
|
||||
images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
|
||||
|
||||
std::vector<Mat> vectorOfChannels(blob_.size[1]);
|
||||
for (int n = 0; n < blob_.size[0]; ++n)
|
||||
{
|
||||
for (int c = 0; c < blob_.size[1]; ++c)
|
||||
{
|
||||
vectorOfChannels[c] = getPlane(blob_, n, c);
|
||||
}
|
||||
cv::merge(vectorOfChannels, images_.getMatRef(n));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
247
modules/dnn/src/layer.cpp
Normal file
247
modules/dnn/src/layer.cpp
Normal file
@ -0,0 +1,247 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
Layer::Layer() { preferableTarget = DNN_TARGET_CPU; }
|
||||
|
||||
Layer::Layer(const LayerParams& params)
|
||||
: blobs(params.blobs)
|
||||
, name(params.name)
|
||||
, type(params.type)
|
||||
{
|
||||
preferableTarget = DNN_TARGET_CPU;
|
||||
}
|
||||
|
||||
void Layer::setParamsFrom(const LayerParams& params)
|
||||
{
|
||||
blobs = params.blobs;
|
||||
name = params.name;
|
||||
type = params.type;
|
||||
}
|
||||
|
||||
int Layer::inputNameToIndex(String)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int Layer::outputNameToIndex(const String&)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Layer::supportBackend(int backendId)
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
Ptr<BackendNode> Layer::initCUDA(
|
||||
void*,
|
||||
const std::vector<Ptr<BackendWrapper>>&,
|
||||
const std::vector<Ptr<BackendWrapper>>&)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "CUDA pipeline of " + type + " layers is not defined.");
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
Ptr<BackendNode> Layer::initVkCom(const std::vector<Ptr<BackendWrapper>>&)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "VkCom pipeline of " + type + " layers is not defined.");
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
Ptr<BackendNode> Layer::initHalide(const std::vector<Ptr<BackendWrapper>>&)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "Halide pipeline of " + type + " layers is not defined.");
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
Ptr<BackendNode> Layer::initNgraph(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "Inference Engine pipeline of " + type + " layers is not defined.");
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
Ptr<BackendNode> Layer::initWebnn(const std::vector<Ptr<BackendWrapper>>& inputs, const std::vector<Ptr<BackendNode>>& nodes)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "WebNN pipeline of " + type + " layers is not defined.");
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
|
||||
{
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
|
||||
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
|
||||
void Layer::getScaleShift(Mat& scale, Mat& shift) const
|
||||
{
|
||||
scale = Mat();
|
||||
shift = Mat();
|
||||
}
|
||||
|
||||
void Layer::getScaleZeropoint(float& scale, int& zeropoint) const
|
||||
{
|
||||
scale = 1.f;
|
||||
zeropoint = 0;
|
||||
}
|
||||
|
||||
void Layer::unsetAttached()
|
||||
{
|
||||
setActivation(Ptr<ActivationLayer>());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void vecToPVec(const std::vector<T>& v, std::vector<T*>& pv)
|
||||
{
|
||||
pv.resize(v.size());
|
||||
for (size_t i = 0; i < v.size(); i++)
|
||||
pv[i] = const_cast<T*>(&v[i]);
|
||||
}
|
||||
|
||||
void Layer::finalize(const std::vector<Mat>& inputs, std::vector<Mat>& outputs)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
this->finalize((InputArrayOfArrays)inputs, (OutputArrayOfArrays)outputs);
|
||||
}
|
||||
|
||||
void Layer::finalize(const std::vector<Mat*>& input, std::vector<Mat>& output)
|
||||
{
|
||||
CV_UNUSED(input);
|
||||
CV_UNUSED(output);
|
||||
}
|
||||
|
||||
void Layer::finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
std::vector<Mat> inputs, outputs;
|
||||
inputs_arr.getMatVector(inputs);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
std::vector<Mat*> inputsp;
|
||||
vecToPVec(inputs, inputsp);
|
||||
this->finalize(inputsp, outputs);
|
||||
}
|
||||
|
||||
std::vector<Mat> Layer::finalize(const std::vector<Mat>& inputs)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
std::vector<Mat> outputs;
|
||||
this->finalize(inputs, outputs);
|
||||
return outputs;
|
||||
}
|
||||
|
||||
void Layer::forward(std::vector<Mat*>& input, std::vector<Mat>& output, std::vector<Mat>& internals)
|
||||
{
|
||||
// We kept this method for compatibility. DNN calls it now only to support users' implementations.
|
||||
}
|
||||
|
||||
void Layer::forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||
}
|
||||
|
||||
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
if (preferableTarget == DNN_TARGET_OPENCL_FP16 && inputs_arr.depth() == CV_16S)
|
||||
{
|
||||
std::vector<UMat> inputs;
|
||||
std::vector<UMat> outputs;
|
||||
std::vector<UMat> internals;
|
||||
|
||||
std::vector<UMat> orig_inputs;
|
||||
std::vector<UMat> orig_outputs;
|
||||
std::vector<UMat> orig_internals;
|
||||
|
||||
inputs_arr.getUMatVector(orig_inputs);
|
||||
outputs_arr.getUMatVector(orig_outputs);
|
||||
internals_arr.getUMatVector(orig_internals);
|
||||
|
||||
inputs.resize(orig_inputs.size());
|
||||
for (size_t i = 0; i < orig_inputs.size(); i++)
|
||||
convertFp16(orig_inputs[i], inputs[i]);
|
||||
|
||||
outputs.resize(orig_outputs.size());
|
||||
for (size_t i = 0; i < orig_outputs.size(); i++)
|
||||
outputs[i].create(shape(orig_outputs[i]), CV_32F);
|
||||
|
||||
internals.resize(orig_internals.size());
|
||||
for (size_t i = 0; i < orig_internals.size(); i++)
|
||||
internals[i].create(shape(orig_internals[i]), CV_32F);
|
||||
|
||||
forward(inputs, outputs, internals);
|
||||
|
||||
for (size_t i = 0; i < outputs.size(); i++)
|
||||
convertFp16(outputs[i], orig_outputs[i]);
|
||||
|
||||
// sync results back
|
||||
outputs_arr.assign(orig_outputs);
|
||||
internals_arr.assign(orig_internals);
|
||||
return;
|
||||
}
|
||||
std::vector<Mat> inpvec;
|
||||
std::vector<Mat> outputs;
|
||||
std::vector<Mat> internals;
|
||||
|
||||
inputs_arr.getMatVector(inpvec);
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
std::vector<Mat*> inputs(inpvec.size());
|
||||
for (int i = 0; i < inpvec.size(); i++)
|
||||
inputs[i] = &inpvec[i];
|
||||
|
||||
this->forward(inputs, outputs, internals);
|
||||
|
||||
// sync results back
|
||||
outputs_arr.assign(outputs);
|
||||
internals_arr.assign(internals);
|
||||
}
|
||||
|
||||
void Layer::run(const std::vector<Mat>& inputs, std::vector<Mat>& outputs, std::vector<Mat>& internals)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
this->finalize(inputs, outputs);
|
||||
this->forward(inputs, outputs, internals);
|
||||
}
|
||||
|
||||
bool Layer::tryQuantize(const std::vector<std::vector<float>>& scales,
|
||||
const std::vector<std::vector<int>>& zeropoints, LayerParams& params)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Layer::~Layer() {}
|
||||
|
||||
bool Layer::getMemoryShapes(const std::vector<MatShape>& inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape>& outputs,
|
||||
std::vector<MatShape>& internals) const
|
||||
{
|
||||
CV_Assert(inputs.size());
|
||||
outputs.assign(std::max(requiredOutputs, (int)inputs.size()), inputs[0]);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Layer::updateMemoryShapes(const std::vector<MatShape>& inputs)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
111
modules/dnn/src/layer_factory.cpp
Normal file
111
modules/dnn/src/layer_factory.cpp
Normal file
@ -0,0 +1,111 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include <opencv2/dnn/layer_reg.private.hpp> // getLayerFactoryImpl
|
||||
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
Mutex& getLayerFactoryMutex()
|
||||
{
|
||||
static Mutex* volatile instance = NULL;
|
||||
if (instance == NULL)
|
||||
{
|
||||
cv::AutoLock lock(getInitializationMutex());
|
||||
if (instance == NULL)
|
||||
instance = new Mutex();
|
||||
}
|
||||
return *instance;
|
||||
}
|
||||
|
||||
static LayerFactory_Impl& getLayerFactoryImpl_()
|
||||
{
|
||||
static LayerFactory_Impl impl;
|
||||
return impl;
|
||||
}
|
||||
|
||||
LayerFactory_Impl& getLayerFactoryImpl()
|
||||
{
|
||||
static LayerFactory_Impl* volatile instance = NULL;
|
||||
if (instance == NULL)
|
||||
{
|
||||
cv::AutoLock lock(getLayerFactoryMutex());
|
||||
if (instance == NULL)
|
||||
{
|
||||
instance = &getLayerFactoryImpl_();
|
||||
initializeLayerFactory();
|
||||
}
|
||||
}
|
||||
return *instance;
|
||||
}
|
||||
|
||||
void LayerFactory::registerLayer(const String& type, Constructor constructor)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
|
||||
|
||||
cv::AutoLock lock(getLayerFactoryMutex());
|
||||
LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
|
||||
|
||||
if (it != getLayerFactoryImpl().end())
|
||||
{
|
||||
if (it->second.back() == constructor)
|
||||
CV_Error(cv::Error::StsBadArg, "Layer \"" + type + "\" already was registered");
|
||||
it->second.push_back(constructor);
|
||||
}
|
||||
getLayerFactoryImpl().insert(std::make_pair(type, std::vector<Constructor>(1, constructor)));
|
||||
}
|
||||
|
||||
void LayerFactory::unregisterLayer(const String& type)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
|
||||
|
||||
cv::AutoLock lock(getLayerFactoryMutex());
|
||||
|
||||
LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type);
|
||||
if (it != getLayerFactoryImpl().end())
|
||||
{
|
||||
if (it->second.size() > 1)
|
||||
it->second.pop_back();
|
||||
else
|
||||
getLayerFactoryImpl().erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
bool LayerFactory::isLayerRegistered(const std::string& type)
|
||||
{
|
||||
cv::AutoLock lock(getLayerFactoryMutex());
|
||||
auto& registeredLayers = getLayerFactoryImpl();
|
||||
return registeredLayers.find(type) != registeredLayers.end();
|
||||
}
|
||||
|
||||
Ptr<Layer> LayerFactory::createLayerInstance(const String& type, LayerParams& params)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
|
||||
|
||||
cv::AutoLock lock(getLayerFactoryMutex());
|
||||
LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type);
|
||||
|
||||
if (it != getLayerFactoryImpl().end())
|
||||
{
|
||||
CV_Assert(!it->second.empty());
|
||||
return it->second.back()(params);
|
||||
}
|
||||
else
|
||||
{
|
||||
return Ptr<Layer>(); // NULL
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
335
modules/dnn/src/layer_internals.hpp
Normal file
335
modules/dnn/src/layer_internals.hpp
Normal file
@ -0,0 +1,335 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__
|
||||
#define __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
inline namespace detail {
|
||||
|
||||
struct LayerPin
|
||||
{
|
||||
int lid;
|
||||
int oid;
|
||||
|
||||
LayerPin(int layerId = -1, int outputId = -1)
|
||||
: lid(layerId)
|
||||
, oid(outputId)
|
||||
{}
|
||||
|
||||
bool valid() const
|
||||
{
|
||||
return (lid >= 0 && oid >= 0);
|
||||
}
|
||||
|
||||
bool equal(const LayerPin& r) const
|
||||
{
|
||||
return (lid == r.lid && oid == r.oid);
|
||||
}
|
||||
|
||||
bool operator<(const LayerPin& r) const
|
||||
{
|
||||
return lid < r.lid || (lid == r.lid && oid < r.oid);
|
||||
}
|
||||
|
||||
bool operator==(const LayerPin& r) const
|
||||
{
|
||||
return lid == r.lid && oid == r.oid;
|
||||
}
|
||||
};
|
||||
|
||||
struct LayerData
|
||||
{
|
||||
LayerData()
|
||||
: id(-1)
|
||||
, dtype(CV_32F)
|
||||
, skip(false)
|
||||
, flag(0)
|
||||
{}
|
||||
LayerData(int _id, const String& _name, const String& _type, const int& _dtype, LayerParams& _params)
|
||||
: id(_id)
|
||||
, name(_name)
|
||||
, type(_type)
|
||||
, dtype(_dtype)
|
||||
, params(_params)
|
||||
, skip(false)
|
||||
, flag(0)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
// add logging info
|
||||
params.name = name;
|
||||
params.type = type;
|
||||
}
|
||||
|
||||
int id;
|
||||
String name;
|
||||
String type;
|
||||
int dtype; // Datatype of output blobs.
|
||||
LayerParams params;
|
||||
|
||||
std::vector<LayerPin> inputBlobsId;
|
||||
std::set<int> inputLayersId;
|
||||
std::set<int> requiredOutputs;
|
||||
std::vector<LayerPin> consumers;
|
||||
std::vector<Ptr<BackendWrapper>> outputBlobsWrappers;
|
||||
std::vector<Ptr<BackendWrapper>> inputBlobsWrappers;
|
||||
std::vector<Ptr<BackendWrapper>> internalBlobsWrappers;
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
/* output ids which must be transferred to the host in the background
|
||||
* after the completion of the forward pass of the layer
|
||||
*/
|
||||
std::vector<int> cudaD2HBackgroundTransfers;
|
||||
#endif
|
||||
|
||||
Ptr<Layer> layerInstance;
|
||||
std::vector<Mat> outputBlobs;
|
||||
std::vector<Mat*> inputBlobs;
|
||||
std::vector<Mat> internals;
|
||||
// Computation nodes of implemented backends (except DEFAULT).
|
||||
std::map<int, Ptr<BackendNode>> backendNodes;
|
||||
// Flag for skip layer computation for specific backend.
|
||||
bool skip;
|
||||
|
||||
int flag;
|
||||
|
||||
Ptr<Layer> getLayerInstance()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(type, "type", type.c_str());
|
||||
|
||||
if (layerInstance)
|
||||
return layerInstance;
|
||||
|
||||
layerInstance = LayerFactory::createLayerInstance(type, params);
|
||||
if (!layerInstance)
|
||||
{
|
||||
CV_Error(Error::StsError, "Can't create layer \"" + name + "\" of type \"" + type + "\"");
|
||||
}
|
||||
|
||||
return layerInstance;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// fake layer containing network input blobs
|
||||
struct DataLayer : public Layer
|
||||
{
|
||||
DataLayer()
|
||||
: Layer()
|
||||
{
|
||||
skip = false;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
|
||||
// FIXIT: add wrapper without exception suppression
|
||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||
|
||||
bool isFP16 = outputs_arr.depth() == CV_16S;
|
||||
|
||||
std::vector<Mat> outputs, internals;
|
||||
outputs_arr.getMatVector(outputs);
|
||||
internals_arr.getMatVector(internals);
|
||||
|
||||
for (int i = 0; i < inputsData.size(); ++i)
|
||||
{
|
||||
double scale = scaleFactors[i];
|
||||
Scalar& mean = means[i];
|
||||
|
||||
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
|
||||
if (isFP16)
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
|
||||
else
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
|
||||
|
||||
bool singleMean = true;
|
||||
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
|
||||
{
|
||||
singleMean = mean[j] == mean[j - 1];
|
||||
}
|
||||
|
||||
if (singleMean)
|
||||
{
|
||||
if (isFP16)
|
||||
{
|
||||
Mat input_f32;
|
||||
inputsData[i].convertTo(input_f32, CV_32F, scale, -mean[0] * scale);
|
||||
convertFp16(input_f32, outputs[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int n = 0; n < inputsData[i].size[0]; ++n)
|
||||
{
|
||||
for (int c = 0; c < inputsData[i].size[1]; ++c)
|
||||
{
|
||||
Mat inp = getPlane(inputsData[i], n, c);
|
||||
Mat out = getPlane(outputs[i], n, c);
|
||||
if (isFP16)
|
||||
{
|
||||
Mat input_f32;
|
||||
inp.convertTo(input_f32, CV_32F, scale, -mean[c] * scale);
|
||||
convertFp16(input_f32, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
bool isFP16 = outputs_.depth() == CV_16S;
|
||||
|
||||
std::vector<UMat> outputs;
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
for (int i = 0; i < inputsData.size(); ++i)
|
||||
{
|
||||
Mat inputData = inputsData[i];
|
||||
|
||||
double scale = scaleFactors[i];
|
||||
Scalar& mean = means[i];
|
||||
|
||||
CV_Assert(mean == Scalar() || inputData.size[1] <= 4);
|
||||
if (isFP16)
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_16SC1, "");
|
||||
else
|
||||
CV_CheckTypeEQ(outputs[i].type(), CV_32FC1, "");
|
||||
|
||||
bool singleMean = true;
|
||||
for (int j = 1; j < std::min(4, inputData.size[1]) && singleMean; ++j)
|
||||
{
|
||||
singleMean = mean[j] == mean[j - 1];
|
||||
}
|
||||
|
||||
if (singleMean)
|
||||
{
|
||||
if (isFP16)
|
||||
{
|
||||
UMat input_i;
|
||||
inputData.convertTo(input_i, CV_32F, scale, -mean[0] * scale);
|
||||
convertFp16(input_i, outputs[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
inputData.convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int n = 0; n < inputData.size[0]; ++n)
|
||||
{
|
||||
for (int c = 0; c < inputData.size[1]; ++c)
|
||||
{
|
||||
Mat inp = getPlane(inputData, n, c);
|
||||
|
||||
std::vector<cv::Range> plane(4, Range::all());
|
||||
plane[0] = Range(n, n + 1);
|
||||
plane[1] = Range(c, c + 1);
|
||||
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
|
||||
|
||||
if (isFP16)
|
||||
{
|
||||
UMat input_i;
|
||||
inp.convertTo(input_i, CV_32F, scale, -mean[c] * scale);
|
||||
convertFp16(input_i, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
int outputNameToIndex(const String& tgtName) CV_OVERRIDE
|
||||
{
|
||||
int idx = (int)(std::find(outNames.begin(), outNames.end(), tgtName) - outNames.begin());
|
||||
return (idx < (int)outNames.size()) ? idx : -1;
|
||||
}
|
||||
|
||||
void setNames(const std::vector<String>& names)
|
||||
{
|
||||
outNames.assign(names.begin(), names.end());
|
||||
shapes.clear();
|
||||
shapes.resize(outNames.size());
|
||||
}
|
||||
|
||||
void setInputShape(const String& tgtName, const MatShape& shape)
|
||||
{
|
||||
std::vector<String>::const_iterator it = std::find(outNames.begin(), outNames.end(), tgtName);
|
||||
CV_Check(tgtName, it != outNames.end(), "Unknown input");
|
||||
int idx = (int)(it - outNames.begin());
|
||||
|
||||
CV_Assert(idx < (int)shapes.size());
|
||||
CV_Check(tgtName, shapes[idx].empty(), "Input shape redefinition is not allowed");
|
||||
shapes[idx] = shape;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape>& inputs,
|
||||
const int requiredOutputs,
|
||||
std::vector<MatShape>& outputs,
|
||||
std::vector<MatShape>& internals) const CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(inputs.size() == requiredOutputs);
|
||||
outputs.assign(inputs.begin(), inputs.end());
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays outputs_arr) CV_OVERRIDE
|
||||
{
|
||||
std::vector<Mat> outputs;
|
||||
outputs_arr.getMatVector(outputs);
|
||||
|
||||
CV_Assert_N(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
|
||||
inputsData.size() == outputs.size());
|
||||
skip = true;
|
||||
for (int i = 0; skip && i < inputsData.size(); ++i)
|
||||
{
|
||||
if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
|
||||
skip = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<String> outNames;
|
||||
std::vector<MatShape> shapes;
|
||||
// Preprocessing parameters for each network's input.
|
||||
std::vector<double> scaleFactors;
|
||||
std::vector<Scalar> means;
|
||||
std::vector<Mat> inputsData;
|
||||
bool skip;
|
||||
}; // DataLayer
|
||||
|
||||
|
||||
} // namespace detail
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
||||
#endif // __OPENCV_DNN_SRC_LAYER_INTERNALS_HPP__
|
@ -8,7 +8,7 @@
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
namespace detail {
|
||||
inline namespace detail {
|
||||
|
||||
class NotImplementedImpl CV_FINAL : public NotImplemented
|
||||
{
|
||||
|
122
modules/dnn/src/legacy_backend.cpp
Normal file
122
modules/dnn/src/legacy_backend.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "legacy_backend.hpp"
|
||||
|
||||
#include "op_halide.hpp"
|
||||
#include "op_inf_engine.hpp"
|
||||
#include "ie_ngraph.hpp"
|
||||
#include "op_vkcom.hpp"
|
||||
#include "op_cuda.hpp"
|
||||
#include "op_webnn.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
BackendNode::BackendNode(int backendId)
|
||||
: backendId(backendId)
|
||||
{}
|
||||
|
||||
BackendNode::~BackendNode() {};
|
||||
|
||||
BackendWrapper::BackendWrapper(int backendId, int targetId)
|
||||
: backendId(backendId)
|
||||
, targetId(targetId)
|
||||
{}
|
||||
|
||||
BackendWrapper::BackendWrapper(int targetId, const cv::Mat& m)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented,
|
||||
"Constructor of backend wrapper must be implemented");
|
||||
}
|
||||
|
||||
BackendWrapper::BackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented,
|
||||
"Constructor of backend wrapper must be implemented");
|
||||
}
|
||||
|
||||
BackendWrapper::~BackendWrapper() {}
|
||||
|
||||
|
||||
|
||||
inline namespace detail {
|
||||
|
||||
|
||||
Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m)
|
||||
{
|
||||
if (backendId == DNN_BACKEND_OPENCV)
|
||||
{
|
||||
if (targetId == DNN_TARGET_CPU)
|
||||
return Ptr<BackendWrapper>();
|
||||
#ifdef HAVE_OPENCL
|
||||
else if (IS_DNN_OPENCL_TARGET(targetId))
|
||||
return OpenCLBackendWrapper::create(m);
|
||||
#endif
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown/unsupported target identifier");
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_HALIDE)
|
||||
{
|
||||
CV_Assert(haveHalide());
|
||||
#ifdef HAVE_HALIDE
|
||||
return Ptr<BackendWrapper>(new HalideBackendWrapper(targetId, m));
|
||||
#endif // HAVE_HALIDE
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
{
|
||||
CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019;
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
return Ptr<BackendWrapper>(new NgraphBackendWrapper(targetId, m));
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO / Inference Engine + nGraph");
|
||||
#endif
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_WEBNN)
|
||||
{
|
||||
#ifdef HAVE_WEBNN
|
||||
return Ptr<BackendWrapper>(new WebnnBackendWrapper(targetId, m));
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN");
|
||||
#endif
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_VKCOM)
|
||||
{
|
||||
CV_Assert(haveVulkan());
|
||||
#ifdef HAVE_VULKAN
|
||||
return Ptr<BackendWrapper>(new VkComBackendWrapper(m));
|
||||
#endif // HAVE_VULKAN
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_CUDA)
|
||||
{
|
||||
CV_Assert(haveCUDA());
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
switch (targetId)
|
||||
{
|
||||
case DNN_TARGET_CUDA:
|
||||
return CUDABackendWrapperFP32::create(m);
|
||||
case DNN_TARGET_CUDA_FP16:
|
||||
return CUDABackendWrapperFP16::create(m);
|
||||
default:
|
||||
CV_Assert(IS_DNN_CUDA_TARGET(targetId));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
|
||||
return Ptr<BackendWrapper>(); // TODO Error?
|
||||
} // wrapMat()
|
||||
|
||||
|
||||
} // namespace detail
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
339
modules/dnn/src/legacy_backend.hpp
Normal file
339
modules/dnn/src/legacy_backend.hpp
Normal file
@ -0,0 +1,339 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__
|
||||
#define __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__
|
||||
|
||||
#include "layer_internals.hpp" // LayerPin LayerData DataLayer
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
inline namespace detail {
|
||||
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
class OpenCLBackendWrapper : public BackendWrapper
|
||||
{
|
||||
public:
|
||||
OpenCLBackendWrapper(Mat& m)
|
||||
: BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
|
||||
{
|
||||
m.copyTo(umat);
|
||||
host = &m;
|
||||
hostDirty = false;
|
||||
}
|
||||
|
||||
OpenCLBackendWrapper(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
|
||||
: BackendWrapper(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL)
|
||||
{
|
||||
Ptr<OpenCLBackendWrapper> base = baseBuffer.dynamicCast<OpenCLBackendWrapper>();
|
||||
CV_Assert(!base.empty());
|
||||
|
||||
host = &m;
|
||||
|
||||
int shape[] = { 1, (int)base->umat.total() };
|
||||
umat = base->umat.reshape(1, 2, &shape[0])
|
||||
.colRange(0, host->total())
|
||||
.reshape(1, host->dims, &host->size[0]);
|
||||
hostDirty = false;
|
||||
}
|
||||
|
||||
static Ptr<BackendWrapper> create(Mat& m)
|
||||
{
|
||||
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(m));
|
||||
}
|
||||
|
||||
static Ptr<BackendWrapper> create(const Ptr<BackendWrapper>& baseBuffer, Mat& m)
|
||||
{
|
||||
return Ptr<BackendWrapper>(new OpenCLBackendWrapper(baseBuffer, m));
|
||||
}
|
||||
|
||||
static std::vector<UMat> getUMatVector(const std::vector<Ptr<BackendWrapper>>& wrappers)
|
||||
{
|
||||
const int numWrappers = wrappers.size();
|
||||
std::vector<UMat> mats(wrappers.size());
|
||||
for (int i = 0; i < numWrappers; ++i)
|
||||
{
|
||||
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
|
||||
CV_Assert(!umatWrapper.empty());
|
||||
umatWrapper->copyToDevice();
|
||||
mats[i] = umatWrapper->umat;
|
||||
}
|
||||
return mats;
|
||||
}
|
||||
|
||||
// Replaces all umats in wrappers to specific ones.
|
||||
static void update(const std::vector<Ptr<BackendWrapper>>& wrappers,
|
||||
const std::vector<UMat>& umats)
|
||||
{
|
||||
CV_Assert(wrappers.size() == umats.size());
|
||||
for (int i = 0, n = umats.size(); i < n; ++i)
|
||||
{
|
||||
Ptr<OpenCLBackendWrapper> umatWrapper = wrappers[i].dynamicCast<OpenCLBackendWrapper>();
|
||||
CV_Assert(!umatWrapper.empty());
|
||||
umatWrapper->umat = umats[i];
|
||||
}
|
||||
}
|
||||
|
||||
~OpenCLBackendWrapper() {}
|
||||
|
||||
// Copies data from device to a host memory.
|
||||
virtual void copyToHost() CV_OVERRIDE
|
||||
{
|
||||
umat.copyTo(*host);
|
||||
}
|
||||
|
||||
virtual void setHostDirty() CV_OVERRIDE
|
||||
{
|
||||
hostDirty = true;
|
||||
};
|
||||
|
||||
void copyToDevice()
|
||||
{
|
||||
if (hostDirty)
|
||||
{
|
||||
host->copyTo(umat);
|
||||
hostDirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
UMat umat;
|
||||
Mat* host;
|
||||
bool hostDirty;
|
||||
}; // OpenCLBackendWrapper
|
||||
#endif // HAVE_OPENCL
|
||||
|
||||
|
||||
struct BlobManager
|
||||
{
|
||||
public:
|
||||
// Increase references counter to layer output.
|
||||
void addReference(const LayerPin& lp)
|
||||
{
|
||||
std::map<LayerPin, int>::iterator it = refCounter.find(lp);
|
||||
if (it == refCounter.end())
|
||||
refCounter[lp] = 1;
|
||||
else
|
||||
it->second += 1;
|
||||
}
|
||||
|
||||
void addReferences(const std::vector<LayerPin>& pins)
|
||||
{
|
||||
for (int i = 0; i < pins.size(); i++)
|
||||
{
|
||||
addReference(pins[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns number of references to allocated memory that used in specific
|
||||
// layer blob.
|
||||
int numReferences(const LayerPin& lp)
|
||||
{
|
||||
std::map<LayerPin, LayerPin>::const_iterator mapIt = reuseMap.find(lp);
|
||||
CV_Assert(mapIt != reuseMap.end());
|
||||
LayerPin memHost = mapIt->second;
|
||||
|
||||
std::map<LayerPin, int>::const_iterator refIt = refCounter.find(memHost);
|
||||
CV_Assert(refIt != refCounter.end());
|
||||
return refIt->second;
|
||||
}
|
||||
|
||||
// Reuse data allocated in <host> inside the <user> blob.
|
||||
void reuse(const LayerPin& host, const LayerPin& user)
|
||||
{
|
||||
CV_Assert(reuseMap.find(user) == reuseMap.end());
|
||||
CV_Assert(reuseMap.find(host) != reuseMap.end());
|
||||
LayerPin memHost = reuseMap[host];
|
||||
reuseMap[user] = memHost;
|
||||
if (refCounter.find(memHost) != refCounter.end())
|
||||
{
|
||||
std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
|
||||
if (userRefIt != refCounter.end())
|
||||
{
|
||||
refCounter[memHost] += userRefIt->second;
|
||||
refCounter.erase(userRefIt);
|
||||
}
|
||||
else
|
||||
refCounter[memHost] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Decrease references counter to allocated memory inside specific blob.
|
||||
void releaseReference(const LayerPin& lp)
|
||||
{
|
||||
std::map<LayerPin, LayerPin>::const_iterator mapIt = reuseMap.find(lp);
|
||||
CV_Assert(mapIt != reuseMap.end());
|
||||
|
||||
std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
|
||||
CV_Assert(refIt != refCounter.end());
|
||||
CV_Assert(refIt->second > 0);
|
||||
refIt->second -= 1;
|
||||
}
|
||||
|
||||
void releaseReferences(const std::vector<LayerPin>& pins)
|
||||
{
|
||||
for (int i = 0; i < pins.size(); i++)
|
||||
{
|
||||
releaseReference(pins[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, const int& dtype)
|
||||
{
|
||||
if (!getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS())
|
||||
{
|
||||
Mat bestBlob;
|
||||
LayerPin bestBlobPin;
|
||||
|
||||
std::map<LayerPin, Mat>::const_iterator hostIt;
|
||||
std::map<LayerPin, int>::const_iterator refIt;
|
||||
|
||||
const int targetTotal = total(shape);
|
||||
int bestBlobTotal = INT_MAX;
|
||||
|
||||
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
|
||||
{
|
||||
refIt = refCounter.find(hostIt->first);
|
||||
// Use only blobs that had references before because if not,
|
||||
// it might be used as output.
|
||||
if (refIt != refCounter.end() && refIt->second == 0)
|
||||
{
|
||||
const Mat& unusedBlob = hostIt->second;
|
||||
if (unusedBlob.total() >= targetTotal && unusedBlob.total() < bestBlobTotal && unusedBlob.type() == dtype)
|
||||
{
|
||||
bestBlobPin = hostIt->first;
|
||||
bestBlob = unusedBlob;
|
||||
bestBlobTotal = unusedBlob.total();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!bestBlob.empty())
|
||||
{
|
||||
reuse(bestBlobPin, lp);
|
||||
dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// if dst already has been allocated with total(shape) elements,
|
||||
// it won't be recreated and pointer of dst.data remains the same.
|
||||
dst.create(shape, dtype);
|
||||
addHost(lp, dst);
|
||||
}
|
||||
}
|
||||
|
||||
void allocateBlobsForLayer(LayerData& ld, const LayerShapes& layerShapes,
|
||||
std::vector<LayerPin>& pinsForInternalBlobs)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
pinsForInternalBlobs.clear();
|
||||
|
||||
std::vector<Mat>&outputBlobs = ld.outputBlobs,
|
||||
&internalBlobs = ld.internals;
|
||||
|
||||
const ShapesVec &outShapes = layerShapes.out,
|
||||
internalShapes = layerShapes.internal;
|
||||
|
||||
outputBlobs.resize(std::max((size_t)1, outShapes.size())); // layer produce at least one output blob
|
||||
internalBlobs.resize(internalShapes.size());
|
||||
|
||||
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
|
||||
|
||||
// Check that layer could work in-place.
|
||||
bool inPlace = false;
|
||||
if (layerShapes.supportInPlace)
|
||||
{
|
||||
if (ld.inputBlobs.size() == 1)
|
||||
{
|
||||
// Get number of references to the input memory.
|
||||
int numRef = numReferences(ld.inputBlobsId[0]);
|
||||
// If current layer is one and only customer of this blob.
|
||||
inPlace = numRef == 1;
|
||||
}
|
||||
}
|
||||
|
||||
ShapesVec shapes(outShapes);
|
||||
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
|
||||
std::vector<Mat*> blobs;
|
||||
for (int i = 0; i < outputBlobs.size(); i++)
|
||||
{
|
||||
blobs.push_back(&outputBlobs[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < internalBlobs.size(); i++)
|
||||
{
|
||||
blobs.push_back(&internalBlobs[i]);
|
||||
if (total(internalShapes[i]))
|
||||
{
|
||||
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
|
||||
}
|
||||
}
|
||||
|
||||
addReferences(pinsForInternalBlobs);
|
||||
|
||||
std::map<int, std::vector<int>> idxSizes;
|
||||
for (int i = 0; i < shapes.size(); i++)
|
||||
{
|
||||
idxSizes[total(shapes[i])].push_back(i);
|
||||
}
|
||||
|
||||
std::map<int, std::vector<int>>::reverse_iterator it;
|
||||
for (it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
|
||||
{
|
||||
for (int j = 0; j < it->second.size(); j++)
|
||||
{
|
||||
int index = it->second[j];
|
||||
if (total(shapes[index]))
|
||||
{
|
||||
LayerPin blobPin(ld.id, index);
|
||||
if (index < outShapes.size() && inPlace)
|
||||
{
|
||||
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
|
||||
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
|
||||
reuse(ld.inputBlobsId[0], blobPin);
|
||||
}
|
||||
else
|
||||
reuseOrCreate(shapes[index], blobPin, *blobs[index], ld.dtype);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear internal state. Calls before an every reallocation.
|
||||
void reset()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
refCounter.clear();
|
||||
reuseMap.clear();
|
||||
memHosts.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
// Register allocated memory.
|
||||
void addHost(const LayerPin& lp, const Mat& mat)
|
||||
{
|
||||
CV_Assert(memHosts.find(lp) == memHosts.end());
|
||||
reuseMap[lp] = lp;
|
||||
memHosts[lp] = mat;
|
||||
}
|
||||
|
||||
std::map<LayerPin, int> refCounter;
|
||||
// Maps pin to origin blob (for whom memory was allocated firstly).
|
||||
// For origin blobs key == value.
|
||||
std::map<LayerPin, LayerPin> reuseMap;
|
||||
std::map<LayerPin, Mat> memHosts;
|
||||
}; // BlobManager
|
||||
|
||||
|
||||
Ptr<BackendWrapper> wrapMat(int backendId, int targetId, cv::Mat& m);
|
||||
|
||||
|
||||
} // namespace detail
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
||||
#endif // __OPENCV_DNN_SRC_LEGACY_BACKEND_HPP__
|
@ -1567,4 +1567,4 @@ int TextDetectionModel_DB::getMaxCandidates() const
|
||||
}
|
||||
|
||||
|
||||
}} // namespace
|
||||
}} // namespace
|
||||
|
414
modules/dnn/src/net.cpp
Normal file
414
modules/dnn/src/net.cpp
Normal file
@ -0,0 +1,414 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "net_impl.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
Net::Net()
|
||||
: impl(makePtr<Net::Impl>())
|
||||
{
|
||||
}
|
||||
|
||||
Net::~Net()
|
||||
{
|
||||
}
|
||||
|
||||
int Net::addLayer(const String& name, const String& type, const int& dtype, LayerParams& params)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->addLayer(name, type, dtype, params);
|
||||
}
|
||||
|
||||
int Net::addLayer(const String& name, const String& type, LayerParams& params)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
return addLayer(name, type, CV_32F, params);
|
||||
}
|
||||
|
||||
int Net::addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->addLayerToPrev(name, type, dtype, params);
|
||||
}
|
||||
|
||||
int Net::addLayerToPrev(const String& name, const String& type, LayerParams& params)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
return addLayerToPrev(name, type, CV_32F, params);
|
||||
}
|
||||
|
||||
void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
impl->connect(outLayerId, outNum, inpLayerId, inpNum);
|
||||
}
|
||||
|
||||
void Net::connect(String _outPin, String _inPin)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CV_Assert(impl);
|
||||
LayerPin outPin = impl->getPinByAlias(_outPin);
|
||||
LayerPin inpPin = impl->getPinByAlias(_inPin);
|
||||
|
||||
CV_Assert(outPin.valid() && inpPin.valid());
|
||||
|
||||
impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
|
||||
}
|
||||
|
||||
int Net::registerOutput(const std::string& outputName, int layerId, int outputPort)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->registerOutput(outputName, layerId, outputPort);
|
||||
}
|
||||
|
||||
Mat Net::forward(const String& outputName)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->forward(outputName);
|
||||
}
|
||||
|
||||
AsyncArray Net::forwardAsync(const String& outputName)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->forwardAsync(outputName);
|
||||
}
|
||||
|
||||
void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->forward(outputBlobs, outputName);
|
||||
}
|
||||
|
||||
void Net::forward(OutputArrayOfArrays outputBlobs,
|
||||
const std::vector<String>& outBlobNames)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->forward(outputBlobs, outBlobNames);
|
||||
}
|
||||
|
||||
void Net::forward(std::vector<std::vector<Mat>>& outputBlobs,
|
||||
const std::vector<String>& outBlobNames)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->forward(outputBlobs, outBlobNames);
|
||||
}
|
||||
|
||||
// FIXIT drop from inference API
|
||||
Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->quantize(calibData, inputsDtype, outputsDtype);
|
||||
}
|
||||
|
||||
// FIXIT drop from inference API
|
||||
void Net::getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->getInputDetails(scales, zeropoints);
|
||||
}
|
||||
|
||||
// FIXIT drop from inference API
|
||||
void Net::getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->getOutputDetails(scales, zeropoints);
|
||||
}
|
||||
|
||||
void Net::setPreferableBackend(int backendId)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG(backendId);
|
||||
CV_Assert(impl);
|
||||
return impl->setPreferableBackend(backendId);
|
||||
}
|
||||
|
||||
void Net::setPreferableTarget(int targetId)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG(targetId);
|
||||
CV_Assert(impl);
|
||||
return impl->setPreferableTarget(targetId);
|
||||
}
|
||||
|
||||
void Net::setInputsNames(const std::vector<String>& inputBlobNames)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->setInputsNames(inputBlobNames);
|
||||
}
|
||||
|
||||
void Net::setInputShape(const String& inputName, const MatShape& shape)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->setInputShape(inputName, shape);
|
||||
}
|
||||
|
||||
void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
CV_Assert(impl);
|
||||
return impl->setInput(blob, name, scalefactor, mean);
|
||||
}
|
||||
|
||||
Mat Net::getParam(int layer, int numParam) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getParam(layer, numParam);
|
||||
}
|
||||
|
||||
void Net::setParam(int layer, int numParam, const Mat& blob)
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->setParam(layer, numParam, blob);
|
||||
}
|
||||
|
||||
int Net::getLayerId(const String& layer) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getLayerId(layer);
|
||||
}
|
||||
|
||||
String Net::dump()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
return impl->dump(true);
|
||||
}
|
||||
|
||||
void Net::dumpToFile(const String& path)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
CV_Assert(!empty());
|
||||
std::ofstream file(path.c_str());
|
||||
file << dump();
|
||||
file.close();
|
||||
}
|
||||
|
||||
Ptr<Layer> Net::getLayer(int layerId) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getLayer(layerId);
|
||||
}
|
||||
Ptr<Layer> Net::getLayer(const LayerId& layerId) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getLayer(layerId);
|
||||
}
|
||||
|
||||
std::vector<Ptr<Layer>> Net::getLayerInputs(int layerId) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getLayerInputs(layerId);
|
||||
}
|
||||
|
||||
std::vector<String> Net::getLayerNames() const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getLayerNames();
|
||||
}
|
||||
|
||||
bool Net::empty() const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->empty();
|
||||
}
|
||||
|
||||
// FIXIT drop "unconnected" API
|
||||
std::vector<int> Net::getUnconnectedOutLayers() const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getUnconnectedOutLayers();
|
||||
}
|
||||
|
||||
// FIXIT drop "unconnected" API
|
||||
std::vector<String> Net::getUnconnectedOutLayersNames() const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getUnconnectedOutLayersNames();
|
||||
}
|
||||
|
||||
void Net::getLayersShapes(const ShapesVec& netInputShapes,
|
||||
std::vector<int>& layersIds,
|
||||
std::vector<ShapesVec>& inLayersShapes,
|
||||
std::vector<ShapesVec>& outLayersShapes) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
return impl->getLayersShapes(netInputShapes, layersIds, inLayersShapes, outLayersShapes);
|
||||
}
|
||||
|
||||
void Net::getLayersShapes(const MatShape& netInputShape,
|
||||
std::vector<int>& layerIds,
|
||||
std::vector<ShapesVec>& inLayersShapes,
|
||||
std::vector<ShapesVec>& outLayersShapes) const
|
||||
{
|
||||
getLayersShapes(ShapesVec(1, netInputShape),
|
||||
layerIds, inLayersShapes, outLayersShapes);
|
||||
}
|
||||
|
||||
void Net::getLayerShapes(const MatShape& netInputShape,
|
||||
const int layerId,
|
||||
ShapesVec& inLayerShapes,
|
||||
ShapesVec& outLayerShapes) const
|
||||
{
|
||||
getLayerShapes(ShapesVec(1, netInputShape),
|
||||
layerId, inLayerShapes, outLayerShapes);
|
||||
}
|
||||
|
||||
void Net::getLayerShapes(const ShapesVec& netInputShapes,
|
||||
const int layerId,
|
||||
ShapesVec& inLayerShapes,
|
||||
ShapesVec& outLayerShapes) const
|
||||
{
|
||||
CV_Assert(impl);
|
||||
LayerShapes shapes;
|
||||
impl->getLayerShapes(netInputShapes, layerId, shapes);
|
||||
inLayerShapes = shapes.in;
|
||||
outLayerShapes = shapes.out;
|
||||
}
|
||||
|
||||
int64 Net::getFLOPS(const std::vector<MatShape>& netInputShapes) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getFLOPS(netInputShapes);
|
||||
}
|
||||
|
||||
int64 Net::getFLOPS(const MatShape& netInputShape) const
|
||||
{
|
||||
return getFLOPS(std::vector<MatShape>(1, netInputShape));
|
||||
}
|
||||
|
||||
int64 Net::getFLOPS(const int layerId,
|
||||
const std::vector<MatShape>& netInputShapes) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getFLOPS(layerId, netInputShapes);
|
||||
}
|
||||
|
||||
int64 Net::getFLOPS(const int layerId,
|
||||
const MatShape& netInputShape) const
|
||||
{
|
||||
return getFLOPS(layerId, std::vector<MatShape>(1, netInputShape));
|
||||
}
|
||||
|
||||
void Net::getLayerTypes(std::vector<String>& layersTypes) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getLayerTypes(layersTypes);
|
||||
}
|
||||
|
||||
int Net::getLayersCount(const String& layerType) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getLayersCount(layerType);
|
||||
}
|
||||
|
||||
void Net::getMemoryConsumption(const int layerId,
|
||||
const std::vector<MatShape>& netInputShapes,
|
||||
size_t& weights, size_t& blobs) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getMemoryConsumption(layerId, netInputShapes, weights, blobs);
|
||||
}
|
||||
|
||||
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
|
||||
size_t& weights, size_t& blobs) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getMemoryConsumption(netInputShapes, weights, blobs);
|
||||
}
|
||||
|
||||
void Net::getMemoryConsumption(const int layerId,
|
||||
const MatShape& netInputShape,
|
||||
size_t& weights, size_t& blobs) const
|
||||
{
|
||||
getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
|
||||
weights, blobs);
|
||||
}
|
||||
|
||||
void Net::getMemoryConsumption(const MatShape& netInputShape,
|
||||
size_t& weights, size_t& blobs) const
|
||||
{
|
||||
getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
|
||||
weights, blobs);
|
||||
}
|
||||
|
||||
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
|
||||
std::vector<int>& layerIds, std::vector<size_t>& weights,
|
||||
std::vector<size_t>& blobs) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getMemoryConsumption(netInputShapes, layerIds, weights, blobs);
|
||||
}
|
||||
|
||||
void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
|
||||
std::vector<size_t>& weights, std::vector<size_t>& blobs) const
|
||||
{
|
||||
getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
|
||||
weights, blobs);
|
||||
}
|
||||
|
||||
// FIXIT return old value or add get method
|
||||
void Net::enableFusion(bool fusion)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->enableFusion(fusion);
|
||||
}
|
||||
|
||||
void Net::setHalideScheduler(const String& scheduler)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(scheduler, "scheduler", scheduler.c_str());
|
||||
CV_Assert(impl);
|
||||
return impl->setHalideScheduler(scheduler);
|
||||
}
|
||||
|
||||
int64 Net::getPerfProfile(std::vector<double>& timings)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(impl);
|
||||
return impl->getPerfProfile(timings);
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
2087
modules/dnn/src/net_impl.cpp
Normal file
2087
modules/dnn/src/net_impl.cpp
Normal file
File diff suppressed because it is too large
Load Diff
261
modules/dnn/src/net_impl.hpp
Normal file
261
modules/dnn/src/net_impl.hpp
Normal file
@ -0,0 +1,261 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef __OPENCV_DNN_SRC_NET_IMPL_HPP__
|
||||
#define __OPENCV_DNN_SRC_NET_IMPL_HPP__
|
||||
|
||||
#include "op_halide.hpp"
|
||||
#include "op_inf_engine.hpp"
|
||||
#include "ie_ngraph.hpp"
|
||||
#include "op_vkcom.hpp"
|
||||
#include "op_cuda.hpp"
|
||||
#include "op_webnn.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn/layer_reg.private.hpp>
|
||||
|
||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#include "layer_internals.hpp" // LayerPin LayerData DataLayer
|
||||
|
||||
#include "legacy_backend.hpp" // wrapMat BlobManager OpenCLBackendWrapper
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
using std::make_pair;
|
||||
using std::string;
|
||||
|
||||
// NB: Implementation is divided between of multiple .cpp files
|
||||
struct Net::Impl : public detail::NetImplBase
|
||||
{
|
||||
typedef std::map<int, LayerShapes> LayersShapesMap;
|
||||
typedef std::map<int, LayerData> MapIdToLayerData;
|
||||
|
||||
Impl();
|
||||
|
||||
Ptr<DataLayer> netInputLayer;
|
||||
std::vector<LayerPin> blobsToKeep;
|
||||
MapIdToLayerData layers;
|
||||
std::map<String, int> layerNameToId;
|
||||
std::map<std::string, int> outputNameToId; // use registerOutput() to populate outputs
|
||||
BlobManager blobManager;
|
||||
int preferableBackend;
|
||||
int preferableTarget;
|
||||
String halideConfigFile;
|
||||
bool skipInfEngineInit;
|
||||
bool hasDynamicShapes;
|
||||
// Map host data to backend specific wrapper.
|
||||
std::map<void*, Ptr<BackendWrapper>> backendWrappers;
|
||||
|
||||
int lastLayerId;
|
||||
|
||||
bool netWasAllocated;
|
||||
bool netWasQuantized;
|
||||
bool fusion;
|
||||
bool isAsync;
|
||||
std::vector<int64> layersTimings;
|
||||
|
||||
|
||||
bool empty() const;
|
||||
void setPreferableBackend(int backendId);
|
||||
void setPreferableTarget(int targetId);
|
||||
|
||||
// FIXIT use inheritance
|
||||
Ptr<BackendWrapper> wrap(Mat& host);
|
||||
|
||||
|
||||
void clear();
|
||||
|
||||
void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>());
|
||||
|
||||
|
||||
Ptr<Layer> getLayer(int layerId) const;
|
||||
Ptr<Layer> getLayer(const LayerId& layerId) const;
|
||||
|
||||
int getLayerId(const String& layerName) const;
|
||||
|
||||
int getLayerId(int id) const;
|
||||
|
||||
int getLayerId(DictValue& layerDesc) const;
|
||||
|
||||
String getLayerName(int id) const;
|
||||
|
||||
LayerData& getLayerData(int id) const;
|
||||
|
||||
LayerData& getLayerData(const String& layerName) const;
|
||||
|
||||
LayerData& getLayerData(const DictValue& layerDesc) const;
|
||||
|
||||
static void addLayerInput(LayerData& ld, int inNum, LayerPin from);
|
||||
|
||||
int resolvePinOutputName(LayerData& ld, const String& outName) const;
|
||||
|
||||
LayerPin getPinByAlias(const String& layerName) const;
|
||||
|
||||
std::vector<LayerPin> getLayerOutPins(const String& layerName) const;
|
||||
|
||||
// FIXIT remove dtype
|
||||
int addLayer(const String& name, const String& type, const int& dtype, LayerParams& params);
|
||||
|
||||
int addLayerToPrev(const String& name, const String& type, const int& dtype, LayerParams& params);
|
||||
|
||||
|
||||
void connect(int outLayerId, int outNum, int inLayerId, int inNum);
|
||||
|
||||
int registerOutput(const std::string& outputName, int layerId, int outputPort);
|
||||
|
||||
// FIXIT drop "unconnected" API
|
||||
std::vector<int> getUnconnectedOutLayers() const;
|
||||
std::vector<String> getUnconnectedOutLayersNames() /*const*/;
|
||||
|
||||
|
||||
void setInputsNames(const std::vector<String>& inputBlobNames);
|
||||
void setInputShape(const String& inputName, const MatShape& shape);
|
||||
void setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean);
|
||||
Mat getParam(int layer, int numParam) const;
|
||||
void setParam(int layer, int numParam, const Mat& blob);
|
||||
std::vector<Ptr<Layer>> getLayerInputs(int layerId) const;
|
||||
std::vector<String> getLayerNames() const;
|
||||
|
||||
|
||||
// TODO drop?
|
||||
void getLayerTypes(std::vector<String>& layersTypes) const;
|
||||
int getLayersCount(const String& layerType) const;
|
||||
|
||||
|
||||
// FIXIT use inheritance
|
||||
void initBackend(const std::vector<LayerPin>& blobsToKeep_);
|
||||
|
||||
void setHalideScheduler(const String& scheduler);
|
||||
#ifdef HAVE_HALIDE
|
||||
void compileHalide();
|
||||
void initHalideBackend();
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
void addNgraphOutputs(LayerData& ld);
|
||||
void initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
void addWebnnOutputs(LayerData& ld);
|
||||
void initWebnnBackend(const std::vector<LayerPin>& blobsToKeep_);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
void initVkComBackend();
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
struct CudaInfo_t
|
||||
{
|
||||
CudaInfo_t(cuda4dnn::csl::CSLContext ctxt, cuda4dnn::csl::Stream d2h_stream_)
|
||||
: context(std::move(ctxt))
|
||||
, d2h_stream(std::move(d2h_stream_))
|
||||
{}
|
||||
cuda4dnn::csl::CSLContext context;
|
||||
cuda4dnn::csl::Stream d2h_stream;
|
||||
cuda4dnn::csl::Workspace workspace;
|
||||
};
|
||||
|
||||
std::unique_ptr<CudaInfo_t> cudaInfo;
|
||||
|
||||
void initCUDABackend(const std::vector<LayerPin>& blobsToKeep_);
|
||||
#endif
|
||||
|
||||
void allocateLayer(int lid, const LayersShapesMap& layersShapes);
|
||||
|
||||
// TODO add getter
|
||||
void enableFusion(bool fusion_);
|
||||
|
||||
void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
|
||||
|
||||
void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
|
||||
|
||||
void forwardLayer(LayerData& ld);
|
||||
|
||||
void forwardToLayer(LayerData& ld, bool clearFlags = true);
|
||||
|
||||
Mat forward(const String& outputName);
|
||||
AsyncArray forwardAsync(const String& outputName);
|
||||
void forward(OutputArrayOfArrays outputBlobs, const String& outputName);
|
||||
void forward(OutputArrayOfArrays outputBlobs,
|
||||
const std::vector<String>& outBlobNames);
|
||||
void forward(std::vector<std::vector<Mat>>& outputBlobs,
|
||||
const std::vector<String>& outBlobNames);
|
||||
|
||||
|
||||
void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes);
|
||||
|
||||
void getLayersShapes(
|
||||
const ShapesVec& netInputShapes,
|
||||
std::vector<int>& layersIds,
|
||||
std::vector<ShapesVec>& inLayersShapes,
|
||||
std::vector<ShapesVec>& outLayersShapes) /*const*/;
|
||||
|
||||
void getLayersShapes(const ShapesVec& netInputShapes,
|
||||
LayersShapesMap& inOutShapes);
|
||||
|
||||
void getLayerShapes(const ShapesVec& netInputShapes,
|
||||
const int layerId,
|
||||
LayerShapes& shapes);
|
||||
|
||||
void updateLayersShapes();
|
||||
|
||||
int64 getFLOPS(const std::vector<MatShape>& netInputShapes) /*const*/;
|
||||
int64 getFLOPS(
|
||||
const int layerId,
|
||||
const std::vector<MatShape>& netInputShapes) /*const*/;
|
||||
|
||||
void getMemoryConsumption(
|
||||
const int layerId,
|
||||
const std::vector<MatShape>& netInputShapes,
|
||||
size_t& weights, size_t& blobs) /*const*/;
|
||||
void getMemoryConsumption(
|
||||
const std::vector<MatShape>& netInputShapes,
|
||||
size_t& weights, size_t& blobs) /*const*/;
|
||||
void getMemoryConsumption(
|
||||
const std::vector<MatShape>& netInputShapes,
|
||||
std::vector<int>& layerIds, std::vector<size_t>& weights,
|
||||
std::vector<size_t>& blobs) /*const*/;
|
||||
int64 getPerfProfile(std::vector<double>& timings) const;
|
||||
|
||||
// TODO drop
|
||||
LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins) const;
|
||||
|
||||
Mat getBlob(const LayerPin& pin) const;
|
||||
|
||||
Mat getBlob(String outputName) const;
|
||||
|
||||
#ifdef CV_CXX11
|
||||
AsyncArray getBlobAsync(const LayerPin& pin);
|
||||
|
||||
AsyncArray getBlobAsync(String outputName);
|
||||
#endif // CV_CXX11
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
static
|
||||
Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet);
|
||||
#endif
|
||||
|
||||
string dump(bool forceAllocation = false) const;
|
||||
|
||||
void dumpNetworkToFile() const;
|
||||
|
||||
// FIXIT drop from inference API
|
||||
Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) /*const*/;
|
||||
void getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
|
||||
void getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/;
|
||||
|
||||
}; // Net::Impl
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
||||
#endif // __OPENCV_DNN_SRC_NET_IMPL_HPP__
|
200
modules/dnn/src/net_impl_backend.cpp
Normal file
200
modules/dnn/src/net_impl_backend.cpp
Normal file
@ -0,0 +1,200 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "net_impl.hpp"
|
||||
#include "legacy_backend.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
Ptr<BackendWrapper> Net::Impl::wrap(Mat& host)
|
||||
{
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_CPU)
|
||||
return Ptr<BackendWrapper>();
|
||||
|
||||
MatShape shape(host.dims);
|
||||
for (int i = 0; i < host.dims; ++i)
|
||||
shape[i] = host.size[i];
|
||||
|
||||
void* data = host.data;
|
||||
if (backendWrappers.find(data) != backendWrappers.end())
|
||||
{
|
||||
Ptr<BackendWrapper> baseBuffer = backendWrappers[data];
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV)
|
||||
{
|
||||
#ifdef HAVE_OPENCL
|
||||
CV_Assert(IS_DNN_OPENCL_TARGET(preferableTarget));
|
||||
return OpenCLBackendWrapper::create(baseBuffer, host);
|
||||
#else
|
||||
CV_Error(Error::StsInternal, "");
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_HALIDE)
|
||||
{
|
||||
CV_Assert(haveHalide());
|
||||
#ifdef HAVE_HALIDE
|
||||
return Ptr<BackendWrapper>(new HalideBackendWrapper(baseBuffer, shape));
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019)
|
||||
{
|
||||
CV_ERROR_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019;
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
return wrapMat(preferableBackend, preferableTarget, host);
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_WEBNN)
|
||||
{
|
||||
#ifdef HAVE_WEBNN
|
||||
return wrapMat(preferableBackend, preferableTarget, host);
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_VKCOM)
|
||||
{
|
||||
#ifdef HAVE_VULKAN
|
||||
return Ptr<BackendWrapper>(new VkComBackendWrapper(baseBuffer, host));
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_CUDA)
|
||||
{
|
||||
CV_Assert(haveCUDA());
|
||||
#ifdef HAVE_CUDA
|
||||
switch (preferableTarget)
|
||||
{
|
||||
case DNN_TARGET_CUDA:
|
||||
return CUDABackendWrapperFP32::create(baseBuffer, shape);
|
||||
case DNN_TARGET_CUDA_FP16:
|
||||
return CUDABackendWrapperFP16::create(baseBuffer, shape);
|
||||
default:
|
||||
CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
|
||||
}
|
||||
|
||||
Ptr<BackendWrapper> wrapper = wrapMat(preferableBackend, preferableTarget, host);
|
||||
backendWrappers[data] = wrapper;
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
|
||||
void Net::Impl::initBackend(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV)
|
||||
{
|
||||
CV_Assert(preferableTarget == DNN_TARGET_CPU || IS_DNN_OPENCL_TARGET(preferableTarget));
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_HALIDE)
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
initHalideBackend();
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Halide");
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
initNgraphBackend(blobsToKeep_);
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of OpenVINO");
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_WEBNN)
|
||||
{
|
||||
#ifdef HAVE_WEBNN
|
||||
initWebnnBackend(blobsToKeep_);
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of WebNN");
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_VKCOM)
|
||||
{
|
||||
#ifdef HAVE_VULKAN
|
||||
initVkComBackend();
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of Vulkan");
|
||||
#endif
|
||||
}
|
||||
else if (preferableBackend == DNN_BACKEND_CUDA)
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
initCUDABackend(blobsToKeep_);
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "This OpenCV version is built without support of CUDA/CUDNN");
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, cv::format("Unknown backend identifier: %d", preferableBackend));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Net::Impl::setPreferableBackend(int backendId)
|
||||
{
|
||||
if (backendId == DNN_BACKEND_DEFAULT)
|
||||
backendId = (Backend)getParam_DNN_BACKEND_DEFAULT();
|
||||
|
||||
if (netWasQuantized && backendId != DNN_BACKEND_OPENCV)
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: Only default backend supports quantized networks");
|
||||
backendId = DNN_BACKEND_OPENCV;
|
||||
}
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
#endif
|
||||
|
||||
if (preferableBackend != backendId)
|
||||
{
|
||||
preferableBackend = backendId;
|
||||
clear();
|
||||
}
|
||||
}
|
||||
|
||||
void Net::Impl::setPreferableTarget(int targetId)
|
||||
{
|
||||
if (netWasQuantized && targetId != DNN_TARGET_CPU &&
|
||||
targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16)
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: Only CPU and OpenCL/OpenCL FP16 target is supported by quantized networks");
|
||||
targetId = DNN_TARGET_CPU;
|
||||
}
|
||||
|
||||
if (preferableTarget != targetId)
|
||||
{
|
||||
preferableTarget = targetId;
|
||||
if (IS_DNN_OPENCL_TARGET(targetId))
|
||||
{
|
||||
#ifndef HAVE_OPENCL
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV)
|
||||
#else
|
||||
if (preferableBackend == DNN_BACKEND_DEFAULT ||
|
||||
preferableBackend == DNN_BACKEND_OPENCV)
|
||||
#endif // HAVE_INF_ENGINE
|
||||
preferableTarget = DNN_TARGET_CPU;
|
||||
#else
|
||||
bool fp16 = ocl::Device::getDefault().isExtensionSupported("cl_khr_fp16");
|
||||
if (!fp16 && targetId == DNN_TARGET_OPENCL_FP16)
|
||||
preferableTarget = DNN_TARGET_OPENCL;
|
||||
#endif
|
||||
}
|
||||
clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
607
modules/dnn/src/net_impl_fuse.cpp
Normal file
607
modules/dnn/src/net_impl_fuse.cpp
Normal file
@ -0,0 +1,607 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "net_impl.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "cuda4dnn/primitives/eltwise.hpp" // required by fuseLayers
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
void Net::Impl::enableFusion(bool fusion_)
|
||||
{
|
||||
if (fusion != fusion_)
|
||||
{
|
||||
fusion = fusion_;
|
||||
clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
#define printf_(args) printf args
|
||||
#else
|
||||
#define printf_(args)
|
||||
#endif
|
||||
|
||||
|
||||
void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
if(!fusion || (preferableBackend != DNN_BACKEND_OPENCV &&
|
||||
preferableBackend != DNN_BACKEND_CUDA &&
|
||||
preferableBackend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH))
|
||||
return;
|
||||
|
||||
#if 0 // FIXIT mode without fusion is broken due to unsupported layers and handling of "custom" nodes
|
||||
if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
return;
|
||||
#endif
|
||||
|
||||
// scan through all the layers. If there is convolution layer followed by the activation layer,
|
||||
// we try to embed this activation into the convolution and disable separate execution of the activation
|
||||
|
||||
// FIXIT replace by layersToKeep to avoid hacks like "LayerPin(lid, 0)"
|
||||
std::set<LayerPin> pinsToKeep(blobsToKeep_.begin(),
|
||||
blobsToKeep_.end());
|
||||
for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
|
||||
{
|
||||
int lid = it->first;
|
||||
LayerData& ld = layers[lid];
|
||||
if (ld.skip)
|
||||
{
|
||||
printf_(("skipped %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
|
||||
continue;
|
||||
}
|
||||
printf_(("analyzing %s: %s\n", ld.layerInstance->name.c_str(), ld.layerInstance->type.c_str()));
|
||||
|
||||
// the optimization #1. try to fuse batch norm, scaling and/or activation layers
|
||||
// with the current layer if they follow it. Normally, the are fused with the convolution layer,
|
||||
// but some of them (like activation) may be fused with fully-connected, elemwise (+) and
|
||||
// some other layers.
|
||||
Ptr<Layer>& currLayer = ld.layerInstance;
|
||||
if (ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0)
|
||||
{
|
||||
LayerData* nextData = &layers[ld.consumers[0].lid];
|
||||
LayerPin lpNext(ld.consumers[0].lid, 0);
|
||||
while (nextData)
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (preferableBackend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && pinsToKeep.count(lpNext) != 0)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: skip fusing with 'output' node: " << nextData->name << "@" << nextData->type);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
/* we use `tryFuse` member of convolution layer to fuse eltwise later
|
||||
* it's not intended to be fused here; hence, we stop when we encounter eltwise
|
||||
*/
|
||||
if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise")
|
||||
break;
|
||||
Ptr<Layer> nextLayer = nextData->layerInstance;
|
||||
if (currLayer->tryFuse(nextLayer))
|
||||
{
|
||||
printf_(("\tfused with %s\n", nextLayer->name.c_str()));
|
||||
nextData->skip = true;
|
||||
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
|
||||
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
|
||||
if (nextData->consumers.size() == 1)
|
||||
{
|
||||
int nextLayerId = nextData->consumers[0].lid;
|
||||
nextData = &layers[nextLayerId];
|
||||
lpNext = LayerPin(nextLayerId, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
nextData = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
|
||||
continue; // Go to the next layer.
|
||||
|
||||
// TODO: OpenCL target support more fusion styles.
|
||||
if ( preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
(!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
|
||||
ld.layerInstance->type != "MVN" && ld.layerInstance->type != "Pooling" &&
|
||||
ld.layerInstance->type != "Concat")) )
|
||||
continue;
|
||||
|
||||
if (preferableBackend == DNN_BACKEND_CUDA && IS_DNN_CUDA_TARGET(preferableTarget)
|
||||
&& ld.layerInstance->type != "Convolution"
|
||||
&& ld.layerInstance->type != "Concat")
|
||||
continue;
|
||||
|
||||
while (nextData)
|
||||
{
|
||||
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
|
||||
if (IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||
nextData->type != "ReLU" &&
|
||||
nextData->type != "ChannelsPReLU" &&
|
||||
nextData->type != "ReLU6" &&
|
||||
nextData->type != "TanH" &&
|
||||
nextData->type != "Power")
|
||||
break;
|
||||
|
||||
Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
|
||||
if (nextActivLayer.empty())
|
||||
break;
|
||||
|
||||
if (currLayer->setActivation(nextActivLayer))
|
||||
{
|
||||
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
|
||||
nextData->skip = true;
|
||||
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
|
||||
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
|
||||
if (nextData->consumers.size() == 1)
|
||||
{
|
||||
int nextLayerId = nextData->consumers[0].lid;
|
||||
nextData = &layers[nextLayerId];
|
||||
lpNext = LayerPin(nextLayerId, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
nextData = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// OpenCL: fuse convolution layer followed by eltwise + relu
|
||||
// CUDA: fuse convolution layer followed by eltwise (and optional activation)
|
||||
while (nextData &&
|
||||
(IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) &&
|
||||
ld.layerInstance->type == "Convolution"
|
||||
) // semantic of 'if'
|
||||
{
|
||||
Ptr<EltwiseLayer> nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
|
||||
if (nextEltwiseLayer.empty())
|
||||
break;
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
// CUDA backend supports fusion with eltwise sum (without variable channels)
|
||||
if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty())
|
||||
{
|
||||
// we create a temporary backend node for eltwise layer to obtain the eltwise configuration
|
||||
cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init
|
||||
const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers);
|
||||
auto eltwiseNode = node.dynamicCast<cuda4dnn::EltwiseOpBase>();
|
||||
|
||||
// broadcasting not supported in fused ops
|
||||
auto required_shape = shape(nextData->outputBlobs[0]);
|
||||
for (int i = 0; i < nextData->inputBlobs.size(); i++)
|
||||
{
|
||||
if (shape(*nextData->inputBlobs[i]) != required_shape)
|
||||
{
|
||||
eltwiseNode.reset();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// CUDA backend uses EltwiseOp when all operands have the same number of channels; otherwise, ShortcutOp is used.
|
||||
// Hence, a successful cast to EltwiseOp implies that the number of channels is same in all operand tensors.
|
||||
if (eltwiseNode.empty() || eltwiseNode->op != cuda4dnn::EltwiseOpType::SUM || !eltwiseNode->coeffs.empty())
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (IS_DNN_OPENCL_TARGET(preferableTarget) && pinsToKeep.count(lpNext) != 0)
|
||||
break;
|
||||
if (nextData->inputBlobsId.size() != 2)
|
||||
break;
|
||||
|
||||
if (IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
{
|
||||
if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) == "sum")
|
||||
{
|
||||
if (nextData->params.has("coeff"))
|
||||
{
|
||||
DictValue paramCoeff = nextData->params.get("coeff");
|
||||
int n = paramCoeff.size();
|
||||
bool isCoeffOneOne = (n == 2);
|
||||
for (int i = 0; isCoeffOneOne && i < n; i++)
|
||||
{
|
||||
float c = paramCoeff.get<float>(i);
|
||||
isCoeffOneOne &= (c == 1.0f);
|
||||
}
|
||||
if (!isCoeffOneOne)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion of 'Sum' without coeffs (or {1.0, 1.0}) is supported only");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/OpenCL: fusion with eltwise operation is not supported: " << nextData->params.get<String>("operation"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
LayerData *eltwiseData = nextData;
|
||||
|
||||
// Eltwise layer has two inputs. We need to determine which
|
||||
// is a base convolution layer and which could be used as it's bias.
|
||||
LayerData* biasLayerData = 0;
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
|
||||
CV_Assert(downLayerData);
|
||||
while (downLayerData->skip)
|
||||
{
|
||||
if (downLayerData->inputBlobsId.size() == 1)
|
||||
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
|
||||
else
|
||||
{
|
||||
downLayerData = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (downLayerData && ld.id == downLayerData->id)
|
||||
{
|
||||
biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
|
||||
break;
|
||||
}
|
||||
}
|
||||
CV_Assert(biasLayerData);
|
||||
{
|
||||
// fuse eltwise + activation layer
|
||||
// bias must already be computed to fuse => bias layer must appear before convolution
|
||||
if (biasLayerData->id < ld.id)
|
||||
{
|
||||
/* we can fuse activation if:
|
||||
* => activation layer that follows is the only consumer of eltwise output
|
||||
* => activation layer does not process multiple inputs
|
||||
* => we do not require to keep the output of eltwise
|
||||
*/
|
||||
Ptr<ActivationLayer> nextFusabeleActivLayer;
|
||||
if (eltwiseData->consumers.size() == 1 && pinsToKeep.count(lpNext) == 0)
|
||||
{
|
||||
nextData = &layers[eltwiseData->consumers[0].lid];
|
||||
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
|
||||
CV_Assert(nextData);
|
||||
if (nextData->outputBlobs.size() == 1)
|
||||
nextFusabeleActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
|
||||
}
|
||||
else
|
||||
{
|
||||
// OCL backend cannot fuse in this case but the CUDA backend can continue with just eltwise
|
||||
nextData = 0;
|
||||
}
|
||||
|
||||
// the requirements of OCV OpenCL backend and CUDA backend are different
|
||||
// we need to check them separately; hence, the fuse variables
|
||||
bool fuse_eltwise = false, fuse_activation = false;
|
||||
|
||||
Ptr<PowerLayer> activ_power;
|
||||
if (IS_DNN_OPENCL_TARGET(preferableTarget) && !nextFusabeleActivLayer.empty() &&
|
||||
nextData &&
|
||||
(!nextData->type.compare("ReLU") ||
|
||||
!nextData->type.compare("ChannelsPReLU") ||
|
||||
(!nextData->type.compare("Power") && (activ_power = nextFusabeleActivLayer.dynamicCast<PowerLayer>()) && activ_power->scale == 1.0f)
|
||||
) &&
|
||||
currLayer->setActivation(nextFusabeleActivLayer))
|
||||
{
|
||||
fuse_eltwise = true;
|
||||
fuse_activation = true;
|
||||
}
|
||||
|
||||
if (IS_DNN_CUDA_TARGET(preferableTarget))
|
||||
{
|
||||
/* supported fusion options:
|
||||
* => convolution + eltwise
|
||||
* => activation(convolution) + eltwise
|
||||
* > convolution + activation would have been fused already; we have to fuse eltwise
|
||||
* => activation(convolution + eltwise)
|
||||
* > fuse eltwise and then activation
|
||||
*/
|
||||
auto layer = nextEltwiseLayer.staticCast<Layer>();
|
||||
if (currLayer->tryFuse(layer))
|
||||
{
|
||||
fuse_eltwise = true; /* eltwise was successfully fused */
|
||||
if (!nextFusabeleActivLayer.empty() && nextData)
|
||||
{
|
||||
if ((!nextData->type.compare("ReLU") ||
|
||||
!nextData->type.compare("ReLU6") ||
|
||||
!nextData->type.compare("Power") ||
|
||||
!nextData->type.compare("TanH") ||
|
||||
!nextData->type.compare("Sigmoid") ||
|
||||
!nextData->type.compare("Swish") ||
|
||||
!nextData->type.compare("Mish")) &&
|
||||
currLayer->setActivation(nextFusabeleActivLayer))
|
||||
{
|
||||
// activation was fused
|
||||
fuse_activation = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CV_Assert(!fuse_activation || fuse_eltwise); /* cannot fuse activation without eltwise */
|
||||
if(fuse_eltwise && fuse_activation)
|
||||
{
|
||||
CV_Assert(nextData);
|
||||
CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
|
||||
ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
|
||||
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
|
||||
printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str()));
|
||||
eltwiseData->skip = true;
|
||||
nextData->skip = true;
|
||||
// This optimization for cases like
|
||||
// some_layer conv
|
||||
// | |
|
||||
// +-- eltwise --+
|
||||
// |
|
||||
// activ
|
||||
// This way all the element-wise computations
|
||||
// (i.e. some_layer+conv or some_layer*conv)
|
||||
// would be done at [conv] layer. So we need to
|
||||
// replace [conv]'s output blob to [eltwise]'s one
|
||||
// considering that [activ] is an in-place layer.
|
||||
// Also we need to move all the consumers' references.
|
||||
// To prevent memory collisions (i.e. when input of
|
||||
// [conv] and output of [eltwise] is the same blob)
|
||||
// we allocate a new blob.
|
||||
CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
|
||||
ld.outputBlobs[0] = ld.outputBlobs[0].clone();
|
||||
ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
|
||||
|
||||
eltwiseData->outputBlobs = ld.outputBlobs;
|
||||
nextData->outputBlobs = ld.outputBlobs;
|
||||
eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
|
||||
nextData->outputBlobsWrappers = ld.outputBlobsWrappers;
|
||||
|
||||
// Move references of [activ] layer consumers to the newly allocated blob.
|
||||
for (int i = 0; i < nextData->consumers.size(); ++i)
|
||||
{
|
||||
LayerData& consumer = layers[nextData->consumers[i].lid];
|
||||
for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
|
||||
{
|
||||
if (consumer.inputBlobsId[j].lid == lpNext.lid)
|
||||
{
|
||||
consumer.inputBlobs[j] = &ld.outputBlobs[0];
|
||||
consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise)
|
||||
{
|
||||
CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget));
|
||||
CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
|
||||
ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
|
||||
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
|
||||
eltwiseData->skip = true;
|
||||
// This optimization is for cases like
|
||||
// some_layer conv (maybe fused with activ)
|
||||
// | |
|
||||
// +-- eltwise --+
|
||||
//
|
||||
// This way all the element-wise computations
|
||||
// (i.e. some_layer+conv or some_layer*conv)
|
||||
// would be done at [conv] layer. So we need to
|
||||
// replace [conv]'s output blob to [eltwise]'s one.
|
||||
// Also we need to move all the consumers' references.
|
||||
// To prevent memory collisions (i.e. when input of
|
||||
// [conv] and output of [eltwise] is the same blob)
|
||||
// we allocate a new blob.
|
||||
CV_Assert_N(ld.outputBlobs.size() == 1, ld.outputBlobsWrappers.size() == 1);
|
||||
ld.outputBlobs[0] = ld.outputBlobs[0].clone();
|
||||
ld.outputBlobsWrappers[0] = wrap(ld.outputBlobs[0]);
|
||||
|
||||
eltwiseData->outputBlobs = ld.outputBlobs;
|
||||
eltwiseData->outputBlobsWrappers = ld.outputBlobsWrappers;
|
||||
|
||||
// Move references of [eltwise] layer consumers to the newly allocated blob.
|
||||
for (int i = 0; i < eltwiseData->consumers.size(); ++i)
|
||||
{
|
||||
LayerData& consumer = layers[eltwiseData->consumers[i].lid];
|
||||
for (int j = 0; j < consumer.inputBlobsId.size(); ++j)
|
||||
{
|
||||
if (consumer.inputBlobsId[j].lid == eltwiseData->id)
|
||||
{
|
||||
consumer.inputBlobs[j] = &ld.outputBlobs[0];
|
||||
consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (preferableBackend != DNN_BACKEND_OPENCV && preferableBackend != DNN_BACKEND_CUDA)
|
||||
continue; // Go to the next layer.
|
||||
|
||||
// the optimization #2. if there is concat layer that concatenates channels
|
||||
// from the inputs together (i.e. axis == 1) then we make the inputs of
|
||||
// the concat layer to write to the concatenation output buffer
|
||||
// (and so we eliminate the concatenation layer, because the channels
|
||||
// are concatenated implicitly).
|
||||
Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
|
||||
if( !concatLayer.empty() && !concatLayer->padding && ld.outputBlobs.size() == 1 )
|
||||
{
|
||||
Mat& output = ld.outputBlobs[0];
|
||||
UMat umat_output;
|
||||
#ifdef HAVE_OPENCL
|
||||
if (!ld.outputBlobsWrappers.empty() &&
|
||||
(preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget)))
|
||||
{
|
||||
size_t i, ninputs = ld.inputBlobsId.size();
|
||||
bool conv_layer = true;
|
||||
for( i = 0; i < ninputs; i++ )
|
||||
{
|
||||
LayerPin pin = ld.inputBlobsId[i];
|
||||
LayerData* inp_i_data = &layers[pin.lid];
|
||||
while(inp_i_data->skip &&
|
||||
inp_i_data->inputBlobsId.size() == 1 &&
|
||||
inp_i_data->consumers.size() == 1)
|
||||
{
|
||||
pin = inp_i_data->inputBlobsId[0];
|
||||
inp_i_data = &layers[pin.lid];
|
||||
}
|
||||
conv_layer = conv_layer && (inp_i_data->getLayerInstance()->type == "Convolution");
|
||||
}
|
||||
if (!conv_layer)
|
||||
continue;
|
||||
std::vector<UMat> umat_outputBlobs;
|
||||
umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||
umat_output = umat_outputBlobs[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO: in general, this optimization can always be done, but
|
||||
// many layers currently check that the input/output blobs are
|
||||
// continuous arrays. Unfortunately, this is not true when
|
||||
// the concatenation optimization is applied with batch_size > 1.
|
||||
// so, for now, we only apply this optimization in the most popular
|
||||
// case batch_size == 1.
|
||||
int axis = normalize_axis(concatLayer->axis, output.dims);
|
||||
if( output.total(0, axis) == 1 )
|
||||
{
|
||||
size_t i, ninputs = ld.inputBlobsId.size();
|
||||
std::vector<LayerPin> realinputs(ninputs);
|
||||
for( i = 0; i < ninputs; i++ )
|
||||
{
|
||||
LayerPin pin = ld.inputBlobsId[i];
|
||||
LayerData* inp_i_data = &layers[pin.lid];
|
||||
while(inp_i_data->skip &&
|
||||
inp_i_data->inputBlobsId.size() == 1 &&
|
||||
inp_i_data->consumers.size() == 1)
|
||||
{
|
||||
pin = inp_i_data->inputBlobsId[0];
|
||||
inp_i_data = &layers[pin.lid];
|
||||
}
|
||||
printf_(("\treal input for %s is %s\n",
|
||||
layers[ld.inputBlobsId[i].lid].getLayerInstance()->name.c_str(),
|
||||
inp_i_data->getLayerInstance()->name.c_str()));
|
||||
|
||||
if(inp_i_data->skip || inp_i_data->consumers.size() != 1)
|
||||
break;
|
||||
#ifdef HAVE_CUDA
|
||||
if (preferableBackend == DNN_BACKEND_CUDA &&
|
||||
(inp_i_data->layerInstance->supportBackend(DNN_BACKEND_CUDA) == false ||
|
||||
(inp_i_data->layerInstance->type != "Convolution" &&
|
||||
inp_i_data->layerInstance->type != "Pooling" &&
|
||||
inp_i_data->layerInstance->type != "Resize" &&
|
||||
inp_i_data->layerInstance->type != "Flatten" &&
|
||||
inp_i_data->layerInstance->type != "Permute" &&
|
||||
inp_i_data->layerInstance->type != "Reorg" &&
|
||||
inp_i_data->layerInstance->type != "Eltwise" &&
|
||||
inp_i_data->layerInstance.dynamicCast<ActivationLayer>().empty())))
|
||||
{
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
realinputs[i] = pin;
|
||||
}
|
||||
|
||||
if( i >= ninputs )
|
||||
{
|
||||
// Allocate new memory to prevent collisions during memory
|
||||
// reusing (see https://github.com/opencv/opencv/pull/10456).
|
||||
output = output.clone();
|
||||
#ifdef HAVE_OPENCL
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV &&
|
||||
IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
{
|
||||
std::vector<UMat> umats(1);
|
||||
umat_output = umat_output.clone();
|
||||
umats[0] = umat_output;
|
||||
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umats);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
if (preferableBackend == DNN_BACKEND_CUDA)
|
||||
ld.outputBlobsWrappers[0] = wrap(output);
|
||||
#endif
|
||||
std::vector<Range> chrange(output.dims, Range::all());
|
||||
int ofs = 0;
|
||||
for( i = 0; i < ninputs; i++ )
|
||||
{
|
||||
LayerPin pin = realinputs[i];
|
||||
LayerData* inp_i_data = &layers[pin.lid];
|
||||
int channels_i = ld.inputBlobs[i]->size[axis];
|
||||
chrange[axis] = Range(ofs, ofs + channels_i);
|
||||
printf_(("\toutput %s(%d) to channels (%d, %d)\n", inp_i_data->layerInstance->name.c_str(),
|
||||
pin.oid, ofs, ofs + channels_i));
|
||||
ofs += channels_i;
|
||||
Mat output_slice = output(chrange);
|
||||
Mat& curr_output = inp_i_data->outputBlobs[pin.oid];
|
||||
CV_Assert(output_slice.isContinuous() && output_slice.size == curr_output.size);
|
||||
Mat* oldPtr = &curr_output;
|
||||
curr_output = output_slice;
|
||||
#ifdef HAVE_OPENCL
|
||||
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||
{
|
||||
std::vector<UMat> umats(inp_i_data->outputBlobsWrappers.size());
|
||||
umats[pin.oid] = umat_output(chrange);
|
||||
OpenCLBackendWrapper::update(inp_i_data->outputBlobsWrappers, umats);
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_CUDA
|
||||
if (preferableBackend == DNN_BACKEND_CUDA)
|
||||
{
|
||||
auto cuda_wrapper = wrap(output).dynamicCast<CUDABackendWrapper>();
|
||||
auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims);
|
||||
auto new_shape = shape(output_slice);
|
||||
cuda_wrapper->update(new_shape, offset);
|
||||
inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast<BackendWrapper>();
|
||||
}
|
||||
#endif
|
||||
// Layers that refer old input Mat will refer to the
|
||||
// new data but the same Mat object.
|
||||
CV_Assert_N(curr_output.data == output_slice.data, oldPtr == &curr_output);
|
||||
}
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
if (preferableBackend == DNN_BACKEND_CUDA)
|
||||
{
|
||||
for (int i = 0; i < ld.consumers.size(); i++)
|
||||
{
|
||||
LayerData& consumer = layers[ld.consumers[i].lid];
|
||||
for (int j = 0; j < consumer.inputBlobsId.size(); j++)
|
||||
{
|
||||
if (consumer.inputBlobsId[j].lid == ld.id)
|
||||
{
|
||||
CV_Assert(consumer.inputBlobs[j]->data == ld.outputBlobs[0].data);
|
||||
consumer.inputBlobsWrappers[j] = ld.outputBlobsWrappers[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
ld.skip = true;
|
||||
printf_(("\toptimized out Concat layer %s\n", concatLayer->name.c_str()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
568
modules/dnn/src/net_openvino.cpp
Normal file
568
modules/dnn/src/net_openvino.cpp
Normal file
@ -0,0 +1,568 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include <opencv2/core/utils/fp_control_utils.hpp>
|
||||
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
#include <opencv2/core/utils/logger.hpp>
|
||||
|
||||
#include "net_impl.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
|
||||
|
||||
/** mark input pins as outputs from other subnetworks
|
||||
* FIXIT must be done by DNN engine not ngraph.
|
||||
*/
|
||||
void Net::Impl::addNgraphOutputs(LayerData& ld)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: layer of new subnet: " << ld.name << "@" << ld.type);
|
||||
|
||||
Ptr<InfEngineNgraphNet> layerNet;
|
||||
auto it = ld.backendNodes.find(preferableBackend);
|
||||
if (it != ld.backendNodes.end())
|
||||
{
|
||||
Ptr<BackendNode> node = it->second;
|
||||
if (!node.empty())
|
||||
{
|
||||
Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
|
||||
CV_Assert(!ieNode.empty());
|
||||
CV_Assert(!ieNode->net.empty());
|
||||
layerNet = ieNode->net;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
LayerData& inpLd = layers[ld.inputBlobsId[i].lid];
|
||||
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
|
||||
if (!inpNode.empty())
|
||||
{
|
||||
Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
|
||||
CV_Assert(!ieInpNode.empty());
|
||||
CV_Assert(!ieInpNode->net.empty());
|
||||
if (layerNet != ieInpNode->net)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name());
|
||||
ieInpNode->net->addOutput(ieInpNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Net::Impl::initNgraphBackend(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_CheckEQ(preferableBackend, DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, "");
|
||||
|
||||
Ptr<InfEngineNgraphNet> net;
|
||||
|
||||
for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
const LayerData& ld = it->second;
|
||||
if (ld.id == 0)
|
||||
{
|
||||
CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
|
||||
(netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
|
||||
std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
|
||||
outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
|
||||
dataPtr->setName(outputName);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
|
||||
std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
|
||||
dataPtr->setName(outputName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (skipInfEngineInit)
|
||||
{
|
||||
Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
|
||||
CV_Assert(!node.empty());
|
||||
|
||||
Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
|
||||
CV_Assert(!ieNode.empty());
|
||||
|
||||
CV_Assert(ieNode->net);
|
||||
InfEngineNgraphNet& ienet = *ieNode->net;
|
||||
ienet.reset();
|
||||
|
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData& ld = it->second;
|
||||
if (ld.id == 0)
|
||||
{
|
||||
for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.inputBlobsWrappers[i]);
|
||||
dataPtr->setName(netInputLayer->outNames[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
auto it = ienet.outputsDesc.find(ld.name);
|
||||
if (it != ienet.outputsDesc.end())
|
||||
{
|
||||
const InferenceEngine::TensorDesc& descriptor = it->second;
|
||||
InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name);
|
||||
dataPtr->setName(ld.name);
|
||||
}
|
||||
else
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
|
||||
dataPtr->setName(ld.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
ienet.addBlobs(ld.inputBlobsWrappers);
|
||||
ienet.addBlobs(ld.outputBlobsWrappers);
|
||||
ld.skip = true;
|
||||
}
|
||||
layers[lastLayerId].skip = false;
|
||||
ienet.init((Target)preferableTarget);
|
||||
return;
|
||||
}
|
||||
|
||||
bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU ||
|
||||
openvino::checkTarget(DNN_TARGET_CPU));
|
||||
|
||||
// Build Inference Engine networks from sets of layers that support this
|
||||
// backend. Split a whole model on several Inference Engine networks if
|
||||
// some of layers are not implemented.
|
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData& ld = it->second;
|
||||
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: processing layer " << ld.name << "@" << ld.type << " (" << ld.id << ") ...");
|
||||
|
||||
if (ld.id == 0 && ld.skip)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: SKIP!");
|
||||
continue;
|
||||
}
|
||||
|
||||
bool fused = ld.skip;
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
if (!fused && !layer->supportBackend(preferableBackend))
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!");
|
||||
bool customizable = ld.id != 0 && supportsCPUFallback;
|
||||
|
||||
// TODO: there is a bug in Myriad plugin with custom layers shape infer.
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD || preferableTarget == DNN_TARGET_HDDL)
|
||||
{
|
||||
for (int i = 0; customizable && i < ld.inputBlobs.size(); ++i)
|
||||
{
|
||||
customizable = ld.inputBlobs[i]->size[0] == 1;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: fix these workarounds
|
||||
if (preferableTarget == DNN_TARGET_MYRIAD ||
|
||||
preferableTarget == DNN_TARGET_HDDL ||
|
||||
preferableTarget == DNN_TARGET_OPENCL ||
|
||||
preferableTarget == DNN_TARGET_OPENCL_FP16)
|
||||
customizable &= ld.type != "Concat";
|
||||
|
||||
if (preferableTarget == DNN_TARGET_OPENCL ||
|
||||
preferableTarget == DNN_TARGET_OPENCL_FP16)
|
||||
customizable &= ld.type != "Power";
|
||||
|
||||
if (preferableTarget == DNN_TARGET_OPENCL)
|
||||
customizable &= ld.type != "Eltwise";
|
||||
|
||||
if (!customizable)
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: NOT customizable!");
|
||||
addNgraphOutputs(ld);
|
||||
net = Ptr<InfEngineNgraphNet>();
|
||||
layer->preferableTarget = DNN_TARGET_CPU;
|
||||
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
LayerData& inpLd = layers[ld.inputBlobsId[i].lid];
|
||||
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
|
||||
if (!inpNode.empty())
|
||||
{
|
||||
Ptr<InfEngineNgraphNode> ieNode = inpNode.dynamicCast<InfEngineNgraphNode>();
|
||||
CV_Assert(!ieNode.empty());
|
||||
ieNode->net->addOutput(ieNode);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
ld.skip = true; // Initially skip all Inference Engine supported layers.
|
||||
|
||||
// Create a new network if one of inputs from different Inference Engine graph.
|
||||
std::vector<Ptr<BackendNode>> inputNodes;
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
// Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
|
||||
if (inputNodes.size() == ld.inputBlobsId.size())
|
||||
{
|
||||
break;
|
||||
}
|
||||
LayerData& inpLd = layers[ld.inputBlobsId[i].lid];
|
||||
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
|
||||
if (!inpNode.empty())
|
||||
{
|
||||
Ptr<InfEngineNgraphNode> ieInpNode = inpNode.dynamicCast<InfEngineNgraphNode>();
|
||||
CV_Assert(!ieInpNode.empty());
|
||||
CV_Assert(!ieInpNode->net.empty());
|
||||
if (ieInpNode->net == net && !fused)
|
||||
{
|
||||
inputNodes.push_back(inpNode);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (net.empty())
|
||||
{
|
||||
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
|
||||
}
|
||||
|
||||
if (!fused)
|
||||
{
|
||||
std::vector<std::string> inputNames;
|
||||
std::vector<cv::Mat> inputs;
|
||||
|
||||
auto curr_pos = inpLd.consumers.begin();
|
||||
auto compare = [&ld](const LayerPin& lp) { return lp.lid == ld.id; };
|
||||
auto cons = curr_pos;
|
||||
while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
|
||||
inpLd.consumers.end()) {
|
||||
int cons_inp = cons->oid;
|
||||
Ptr<NgraphBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
|
||||
dynamicCast<NgraphBackendWrapper>();
|
||||
CV_Assert(!inpWrapper.empty());
|
||||
auto iter = std::find(inputNames.begin(), inputNames.end(),
|
||||
inpWrapper->dataPtr->getName());
|
||||
if (iter == inputNames.end())
|
||||
{
|
||||
inputNames.push_back(inpWrapper->dataPtr->getName());
|
||||
inputs.push_back(inpLd.outputBlobs[cons_inp]);
|
||||
}
|
||||
curr_pos = cons + 1;
|
||||
}
|
||||
|
||||
auto inps = net->setInputs(inputs, inputNames);
|
||||
for (auto& inp : inps)
|
||||
{
|
||||
inputNodes.emplace_back(Ptr<BackendNode>(new InfEngineNgraphNode(inp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ptr<BackendNode> node;
|
||||
if (!net.empty())
|
||||
{
|
||||
if (fused)
|
||||
{
|
||||
bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
|
||||
ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
|
||||
CV_Assert(inPlace);
|
||||
node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
|
||||
ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*this));
|
||||
}
|
||||
|
||||
if (!fused)
|
||||
{
|
||||
CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
int lid = ld.inputBlobsId[i].lid;
|
||||
int oid = ld.inputBlobsId[i].oid;
|
||||
if (oid == 0 || lid == 0)
|
||||
continue;
|
||||
|
||||
auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
|
||||
const auto& ngraph_input_node = ieInpNode->node;
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
|
||||
|
||||
// Handle parameters from other subnets. Output port is not used in this case
|
||||
if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
|
||||
ngraph_input_node->get_output_size() == 1)
|
||||
{
|
||||
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ngraph_input_node));
|
||||
continue;
|
||||
}
|
||||
CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), "");
|
||||
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
|
||||
// FIXIT refactor ".initNgraph()" API to use Output<Node>
|
||||
// WA: use Concat to emulate Identity operation with requested output port
|
||||
auto oid_node = std::make_shared<ngraph::op::Concat>(ngraph::OutputVector { ngraph_input_node->output(oid) }, 0);
|
||||
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(oid_node));
|
||||
#elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
|
||||
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
|
||||
#else
|
||||
inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid, false)));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (layer->supportBackend(preferableBackend))
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: wrap layer " << ld.name << "@" << ld.type << " - outputs: " << ld.outputBlobsWrappers.size());
|
||||
node = layer->initNgraph(ld.inputBlobsWrappers, inputNodes);
|
||||
#if 0 // FIXIT doesn't work with multiple outputs (set name is applied to the same node)
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
|
||||
node.dynamicCast<InfEngineNgraphNode>()->setName(dataPtr->getName());
|
||||
}
|
||||
#else
|
||||
node.dynamicCast<InfEngineNgraphNode>()->setName(layer->name);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: layer is not supported: " << ld.name << "@" << ld.type);
|
||||
node = Ptr<BackendNode>(new InfEngineNgraphNode(inputNodes,
|
||||
ld.layerInstance, ld.inputBlobs, ld.outputBlobs, ld.internals));
|
||||
}
|
||||
}
|
||||
else if (node.empty())
|
||||
{
|
||||
CV_LOG_DEBUG(NULL, "DNN/IE: node.empty() bypass...");
|
||||
continue;
|
||||
}
|
||||
|
||||
ld.backendNodes[preferableBackend] = node;
|
||||
|
||||
Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
|
||||
CV_Assert(!ieNode.empty());
|
||||
ieNode->net = net;
|
||||
|
||||
for (const auto& pin : blobsToKeep_)
|
||||
{
|
||||
if (pin.lid == ld.id)
|
||||
{
|
||||
ieNode->net->addOutput(ieNode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ieNode->net->setNodePtr(&ieNode->node);
|
||||
|
||||
net->addBlobs(ld.inputBlobsWrappers);
|
||||
net->addBlobs(ld.outputBlobsWrappers);
|
||||
addNgraphOutputs(ld);
|
||||
}
|
||||
|
||||
// Initialize all networks.
|
||||
for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
|
||||
{
|
||||
LayerData& ld = it->second;
|
||||
auto iter = ld.backendNodes.find(preferableBackend);
|
||||
if (iter == ld.backendNodes.end())
|
||||
continue;
|
||||
|
||||
Ptr<BackendNode>& node = iter->second;
|
||||
if (node.empty())
|
||||
continue;
|
||||
|
||||
Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
|
||||
if (ieNode.empty())
|
||||
continue;
|
||||
|
||||
CV_Assert(!ieNode->net.empty());
|
||||
|
||||
if (!ieNode->net->isInitialized())
|
||||
{
|
||||
ieNode->net->addOutput(ieNode);
|
||||
ieNode->net->createNet((Target)preferableTarget);
|
||||
ld.skip = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//} // Net::Impl
|
||||
|
||||
/*static*/
|
||||
Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CV_TRACE_REGION("register_inputs");
|
||||
|
||||
std::vector<String> inputsNames;
|
||||
std::vector<MatShape> inp_shapes;
|
||||
for (auto& it : ieNet.getInputsInfo())
|
||||
{
|
||||
inputsNames.push_back(it.first);
|
||||
std::vector<size_t> dims = it.second->getTensorDesc().getDims();
|
||||
inp_shapes.push_back(std::vector<int>(dims.begin(), dims.end()));
|
||||
}
|
||||
|
||||
Net cvNet;
|
||||
cvNet.setInputsNames(inputsNames);
|
||||
|
||||
// set empty input to determine input shapes
|
||||
for (int inp_id = 0; inp_id < inputsNames.size(); ++inp_id)
|
||||
{
|
||||
cvNet.setInputShape(inputsNames[inp_id], inp_shapes[inp_id]);
|
||||
}
|
||||
|
||||
CV_TRACE_REGION_NEXT("backendNode");
|
||||
|
||||
Ptr<BackendNode> backendNode;
|
||||
{
|
||||
auto fake_node = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape {});
|
||||
Ptr<InfEngineNgraphNode> backendNodeNGraph(new InfEngineNgraphNode(fake_node));
|
||||
backendNodeNGraph->net = Ptr<InfEngineNgraphNet>(new InfEngineNgraphNet(*(cvNet.impl), ieNet));
|
||||
backendNode = backendNodeNGraph;
|
||||
}
|
||||
|
||||
CV_TRACE_REGION_NEXT("register_outputs");
|
||||
|
||||
auto ngraphFunction = ieNet.getFunction();
|
||||
CV_Assert(ngraphFunction);
|
||||
std::vector<std::shared_ptr<ngraph::Node>> ngraphOperations = ngraphFunction->get_ops();
|
||||
|
||||
for (auto& it : ieNet.getOutputsInfo())
|
||||
{
|
||||
CV_TRACE_REGION("output");
|
||||
const auto& outputName = it.first;
|
||||
|
||||
LayerParams lp;
|
||||
int lid = cvNet.addLayer(it.first, "", lp);
|
||||
|
||||
LayerData& ld = cvNet.impl->layers[lid];
|
||||
|
||||
{
|
||||
Ptr<Layer> cvLayer(new NgraphBackendLayer(ieNet));
|
||||
cvLayer->name = outputName;
|
||||
cvLayer->type = "_unknown_";
|
||||
|
||||
auto process_layer = [&](const std::string& name) -> bool
|
||||
{
|
||||
CV_TRACE_REGION("ngraph_function");
|
||||
for (const auto& op : ngraphOperations)
|
||||
{
|
||||
CV_Assert(op);
|
||||
if (op->get_friendly_name() == name)
|
||||
{
|
||||
const std::string typeName = op->get_type_info().name;
|
||||
cvLayer->type = typeName;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
bool found = process_layer(outputName);
|
||||
if (!found)
|
||||
{
|
||||
auto pos = outputName.rfind('.'); // cut port number: ".0"
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
std::string layerName = outputName.substr(0, pos);
|
||||
found = process_layer(layerName);
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
CV_LOG_WARNING(NULL, "DNN/IE: Can't determine output layer type: '" << outputName << "'");
|
||||
|
||||
ld.layerInstance = cvLayer;
|
||||
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE_NGRAPH] = backendNode;
|
||||
}
|
||||
|
||||
for (int i = 0; i < inputsNames.size(); ++i)
|
||||
cvNet.connect(0, i, lid, i);
|
||||
}
|
||||
|
||||
CV_TRACE_REGION_NEXT("finalize");
|
||||
|
||||
cvNet.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH);
|
||||
|
||||
cvNet.impl->skipInfEngineInit = true;
|
||||
return cvNet;
|
||||
}
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
Net Net::readFromModelOptimizer(const String& xml, const String& bin)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
#ifndef HAVE_INF_ENGINE
|
||||
CV_UNUSED(xml); CV_UNUSED(bin);
|
||||
CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
|
||||
#else
|
||||
|
||||
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
|
||||
|
||||
InferenceEngine::Core& ie = getCore("");
|
||||
InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin);
|
||||
|
||||
return Impl::createNetworkFromModelOptimizer(ieNet);
|
||||
#endif // HAVE_INF_ENGINE
|
||||
}
|
||||
|
||||
Net Net::readFromModelOptimizer(const std::vector<uchar>& bufferModelConfig, const std::vector<uchar>& bufferWeights)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(!bufferModelConfig.empty());
|
||||
CV_Assert(!bufferWeights.empty());
|
||||
return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(),
|
||||
bufferWeights.data(), bufferWeights.size());
|
||||
}
|
||||
|
||||
Net Net::readFromModelOptimizer(
|
||||
const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
|
||||
const uchar* bufferWeightsPtr, size_t bufferWeightsSize
|
||||
)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
#ifndef HAVE_INF_ENGINE
|
||||
CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr);
|
||||
CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize);
|
||||
CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
|
||||
#else
|
||||
|
||||
FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
|
||||
|
||||
InferenceEngine::Core& ie = getCore("");
|
||||
|
||||
std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize);
|
||||
|
||||
InferenceEngine::CNNNetwork ieNet;
|
||||
try
|
||||
{
|
||||
InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C);
|
||||
InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize);
|
||||
|
||||
ieNet = ie.ReadNetwork(model, weights_blob);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what());
|
||||
}
|
||||
|
||||
return Impl::createNetworkFromModelOptimizer(ieNet);
|
||||
#endif // HAVE_INF_ENGINE
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
296
modules/dnn/src/net_quantization.cpp
Normal file
296
modules/dnn/src/net_quantization.cpp
Normal file
@ -0,0 +1,296 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "net_impl.hpp"
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
// FIXIT drop from inference API
|
||||
static
|
||||
void getQuantizationParams(const Mat& src, std::vector<float>& scales, std::vector<int>& zeropoints)
|
||||
{
|
||||
const int qmin = -128; // INT8_MIN
|
||||
const int qmax = 127; // INT8_MAX
|
||||
|
||||
double rmin, rmax, sc, zp;
|
||||
cv::minMaxIdx(src, &rmin, &rmax);
|
||||
|
||||
// 0 must be present in the range [rmin, rmax]
|
||||
rmin = std::min(rmin, 0.0);
|
||||
rmax = std::max(rmax, 0.0);
|
||||
|
||||
sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin);
|
||||
zp = qmin - (rmin/sc);
|
||||
|
||||
scales.push_back((float)sc);
|
||||
zeropoints.push_back((int)std::round(zp));
|
||||
}
|
||||
|
||||
// FIXIT drop from inference API
|
||||
Net Net::Impl::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype)
|
||||
{
|
||||
// Net can be quantized only once.
|
||||
if (netWasQuantized)
|
||||
CV_Error(Error::StsBadArg, "Cannot quantize a quantized net");
|
||||
|
||||
CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S");
|
||||
CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S");
|
||||
|
||||
bool originalFusion = fusion;
|
||||
int prefBackend = preferableBackend;
|
||||
int prefTarget = preferableTarget;
|
||||
|
||||
// Disable fusions and use CPU backend to quantize net
|
||||
setPreferableBackend(DNN_BACKEND_OPENCV);
|
||||
setPreferableTarget(DNN_TARGET_CPU);
|
||||
enableFusion(false);
|
||||
|
||||
if (calibData.isMat())
|
||||
{
|
||||
setInput(calibData.getMat(), /*name=*/"", /*scalefactor=*/1.0, /*mean=*/Scalar());
|
||||
}
|
||||
else if (calibData.isMatVector())
|
||||
{
|
||||
std::vector<Mat> calibDataVec;
|
||||
calibData.getMatVector(calibDataVec);
|
||||
|
||||
std::vector<String> inpNames = netInputLayer->outNames;
|
||||
CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs");
|
||||
for (int i = 0; i < calibDataVec.size(); i++)
|
||||
setInput(calibDataVec[i], inpNames[i], /*scalefactor=*/1.0, /*mean=*/Scalar());
|
||||
}
|
||||
|
||||
std::vector<String> outNames = getUnconnectedOutLayersNames();
|
||||
std::vector<LayerPin> pins;
|
||||
for (int i = 0; i < outNames.size(); i++)
|
||||
pins.push_back(getPinByAlias(outNames[i]));
|
||||
setUpNet(pins);
|
||||
|
||||
// Compute scales and zeropoints for all the layers
|
||||
std::vector<std::vector<float> > scales;
|
||||
std::vector<std::vector<int> > zeropoints;
|
||||
for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
|
||||
{
|
||||
LayerData& ld = it->second;
|
||||
if (!ld.skip)
|
||||
{
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
std::vector<Mat> inps(ld.inputBlobs.size());
|
||||
for (int i = 0; i < ld.inputBlobs.size(); ++i)
|
||||
inps[i] = *ld.inputBlobs[i];
|
||||
layer->forward(inps, ld.outputBlobs, ld.internals);
|
||||
}
|
||||
|
||||
std::vector<float> sc;
|
||||
std::vector<int> zp;
|
||||
if (ld.type == "TanH")
|
||||
{
|
||||
sc.push_back(1.f/128);
|
||||
zp.push_back(0);
|
||||
}
|
||||
else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax")
|
||||
{
|
||||
if (ld.params.get<bool>("log_softmax", false))
|
||||
{
|
||||
sc.push_back(16.f/256);
|
||||
zp.push_back(127);
|
||||
}
|
||||
else
|
||||
{
|
||||
sc.push_back(1.f/256);
|
||||
zp.push_back(-128);
|
||||
}
|
||||
}
|
||||
else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop")
|
||||
{
|
||||
std::vector<float> inp_sc; std::vector<int> inp_zp;
|
||||
getQuantizationParams(*ld.inputBlobs[0], inp_sc, inp_zp);
|
||||
sc.assign(ld.outputBlobs.size(), inp_sc[0]);
|
||||
zp.assign(ld.outputBlobs.size(), inp_zp[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobs.size(); i++)
|
||||
getQuantizationParams(ld.outputBlobs[i], sc, zp);
|
||||
}
|
||||
scales.push_back(sc);
|
||||
zeropoints.push_back(zp);
|
||||
}
|
||||
|
||||
// For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs
|
||||
// is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints
|
||||
// TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer
|
||||
for (Impl::MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
|
||||
{
|
||||
LayerData& ld = it->second;
|
||||
// Layers with multiple outputs. Number of outputs is equal to number of inputs
|
||||
if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" ||
|
||||
ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" ||
|
||||
ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || ld.type == "Resize" ||
|
||||
(ld.type == "ReLU" && !ld.params.get<float>("negative_slope", 0.f)) /* ReLU with negative slope 0 */)
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobs.size(); i++)
|
||||
{
|
||||
LayerPin &pin = ld.inputBlobsId[i];
|
||||
scales[pin.lid][pin.oid] = scales[ld.id][i];
|
||||
zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i];
|
||||
}
|
||||
}
|
||||
// Layers with multiple inputs and single output.
|
||||
else if ((ld.type == "Pooling" && toLowerCase(ld.params.get<String>("pool", "max")) == "max") /* Max Pooling */ ||
|
||||
(ld.type == "Eltwise" && toLowerCase(ld.params.get<String>("operation", "sum")) == "max") /* Elementwise max */ ||
|
||||
ld.type == "Concat")
|
||||
{
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); i++)
|
||||
{
|
||||
LayerPin &pin = ld.inputBlobsId[i];
|
||||
scales[pin.lid][pin.oid] = scales[ld.id][0];
|
||||
zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a new Net and add quantized layers to it.
|
||||
Net dstNet_;
|
||||
Net::Impl& dstNet = *(dstNet_.impl);
|
||||
dstNet.netWasQuantized = true;
|
||||
dstNet.setInputsNames(netInputLayer->outNames);
|
||||
dstNet.setPreferableBackend(prefBackend);
|
||||
dstNet.setPreferableTarget(prefTarget);
|
||||
dstNet.enableFusion(originalFusion);
|
||||
|
||||
for (Impl::MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
|
||||
{
|
||||
LayerData ld = it->second;
|
||||
if (ld.id == 0)
|
||||
{
|
||||
LayerData &quantInpLd = dstNet.layers[0];
|
||||
quantInpLd.dtype = inputsDtype;
|
||||
quantInpLd.params.set("scales", DictValue::arrayReal(scales[0].data(), scales[0].size()));
|
||||
quantInpLd.params.set("zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size()));
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<LayerPin> inpPins = ld.inputBlobsId;
|
||||
// Fill input and output scales/zeropoints for the layer
|
||||
std::vector<std::vector<float> > inp_out_sc(2);
|
||||
std::vector<std::vector<int> > inp_out_zp(2);
|
||||
for (int i = 0; i < inpPins.size(); i++)
|
||||
{
|
||||
LayerPin &pin = inpPins[i];
|
||||
inp_out_sc[0].push_back(scales[pin.lid][pin.oid]);
|
||||
inp_out_zp[0].push_back(zeropoints[pin.lid][pin.oid]);
|
||||
}
|
||||
inp_out_sc[1] = scales[ld.id];
|
||||
inp_out_zp[1] = zeropoints[ld.id];
|
||||
|
||||
// Quantize layer
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
if (layer->tryQuantize(inp_out_sc, inp_out_zp, ld.params))
|
||||
{
|
||||
ld.type += "Int8";
|
||||
ld.dtype = CV_8S;
|
||||
}
|
||||
ld.params.set("scales", DictValue::arrayReal(inp_out_sc[1].data(), inp_out_sc[1].size()));
|
||||
ld.params.set("zeropoints", DictValue::arrayInt(inp_out_zp[1].data(), inp_out_zp[1].size()));
|
||||
|
||||
// Check and add quantize/dequantize node before layer
|
||||
for (int i = 0; i < inpPins.size(); i++)
|
||||
{
|
||||
LayerPin &pin = inpPins[i];
|
||||
LayerData &inpLd = dstNet.getLayerData(getLayerName(pin.lid));
|
||||
pin.lid = inpLd.id;
|
||||
if (inpLd.dtype != ld.dtype)
|
||||
{
|
||||
String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format("quantize/%s/%d", inpLd.name.c_str(), pin.oid)
|
||||
: cv::format("dequantize/%s/%d", inpLd.name.c_str(), pin.oid);
|
||||
// Check if quantize/dequantize node for the input layer already exists
|
||||
if (dstNet.getLayerId(layerName) >= 0)
|
||||
{
|
||||
pin.lid = dstNet.getLayerId(layerName);
|
||||
pin.oid = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
LayerParams lp;
|
||||
lp.set("scales", inp_out_sc[0][i]);
|
||||
lp.set("zeropoints", inp_out_zp[0][i]);
|
||||
lp.name = layerName;
|
||||
lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize";
|
||||
int newLid = dstNet.addLayer(lp.name, lp.type, ld.dtype, lp);
|
||||
dstNet.connect(pin.lid, pin.oid, newLid, 0);
|
||||
pin.lid = newLid; pin.oid = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add quantized layer to Net and connect to its inputs.
|
||||
int newLid = dstNet.addLayer(ld.name, ld.type, ld.dtype, ld.params);
|
||||
for( int i = 0; i < inpPins.size(); i++ )
|
||||
dstNet.connect(inpPins[i].lid, inpPins[i].oid, newLid, i);
|
||||
|
||||
// If the layer is a output layer, add quantize/dequantize node after it based on output's data type.
|
||||
if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype)
|
||||
{
|
||||
LayerParams lp;
|
||||
lp.set("scales", inp_out_sc[1][0]);
|
||||
lp.set("zeropoints", inp_out_zp[1][0]);
|
||||
lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name;
|
||||
lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize";
|
||||
dstNet.addLayerToPrev(lp.name, lp.type, outputsDtype, lp);
|
||||
}
|
||||
}
|
||||
// Restore FP32 Net's backend, target and fusion
|
||||
setPreferableBackend(prefBackend);
|
||||
setPreferableTarget(prefTarget);
|
||||
enableFusion(originalFusion);
|
||||
return dstNet_;
|
||||
}
|
||||
|
||||
// FIXIT drop from inference API
|
||||
void Net::Impl::getInputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/
|
||||
{
|
||||
if (!netWasQuantized)
|
||||
CV_Error(Error::StsBadFunc, "Net isn't quantized");
|
||||
|
||||
LayerParams &lp = layers[0].params;
|
||||
DictValue sc = lp.get("scales");
|
||||
DictValue zp = lp.get("zeropoints");
|
||||
|
||||
for (int i = 0; i < sc.size(); i++)
|
||||
{
|
||||
scales.push_back(sc.get<float>(i));
|
||||
zeropoints.push_back(zp.get<int>(i));
|
||||
}
|
||||
}
|
||||
|
||||
// FIXIT drop from inference API
|
||||
void Net::Impl::getOutputDetails(std::vector<float>& scales, std::vector<int>& zeropoints) /*const*/
|
||||
{
|
||||
if (!netWasQuantized)
|
||||
CV_Error(Error::StsBadFunc, "Net isn't quantized");
|
||||
|
||||
std::vector<int> outLayerIds = getUnconnectedOutLayers();
|
||||
for (auto &lid : outLayerIds)
|
||||
{
|
||||
LayerParams &lp = layers[lid].params;
|
||||
DictValue sc = lp.get("scales");
|
||||
DictValue zp = lp.get("zeropoints");
|
||||
|
||||
for (int i = 0; i < sc.size(); i++)
|
||||
{
|
||||
scales.push_back(sc.get<float>(i));
|
||||
zeropoints.push_back(zp.get<int>(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
106
modules/dnn/src/op_cuda.cpp
Normal file
106
modules/dnn/src/op_cuda.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include "op_cuda.hpp"
|
||||
#include "cuda4dnn/init.hpp"
|
||||
#include "net_impl.hpp"
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
void Net::Impl::initCUDABackend(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
CV_Assert(preferableBackend == DNN_BACKEND_CUDA);
|
||||
|
||||
if (!cudaInfo) /* we need to check only once */
|
||||
cuda4dnn::checkVersions();
|
||||
|
||||
if (cuda4dnn::getDeviceCount() <= 0)
|
||||
CV_Error(Error::StsError, "No CUDA capable device found.");
|
||||
|
||||
if (cuda4dnn::getDevice() < 0)
|
||||
CV_Error(Error::StsError, "No CUDA capable device selected.");
|
||||
|
||||
if (!cuda4dnn::isDeviceCompatible())
|
||||
CV_Error(Error::GpuNotSupported, "OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration.");
|
||||
|
||||
if (preferableTarget == DNN_TARGET_CUDA_FP16 && !cuda4dnn::doesDeviceSupportFP16())
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "The selected CUDA device does not support FP16 target; switching to FP32 target.");
|
||||
preferableTarget = DNN_TARGET_CUDA;
|
||||
}
|
||||
|
||||
if (!cudaInfo)
|
||||
{
|
||||
cuda4dnn::csl::CSLContext context;
|
||||
context.stream = cuda4dnn::csl::Stream(true);
|
||||
context.cublas_handle = cuda4dnn::csl::cublas::Handle(context.stream);
|
||||
context.cudnn_handle = cuda4dnn::csl::cudnn::Handle(context.stream);
|
||||
|
||||
auto d2h_stream = cuda4dnn::csl::Stream(true); // stream for background D2H data transfers
|
||||
cudaInfo = std::unique_ptr<CudaInfo_t>(new CudaInfo_t(std::move(context), std::move(d2h_stream)));
|
||||
}
|
||||
|
||||
cudaInfo->workspace = cuda4dnn::csl::Workspace(); // release workspace memory if any
|
||||
|
||||
for (auto& layer : layers)
|
||||
{
|
||||
auto& ld = layer.second;
|
||||
if (ld.id == 0)
|
||||
{
|
||||
for (auto& wrapper : ld.inputBlobsWrappers)
|
||||
{
|
||||
auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
|
||||
cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& wrapper : ld.outputBlobsWrappers)
|
||||
{
|
||||
auto cudaWrapper = wrapper.dynamicCast<CUDABackendWrapper>();
|
||||
cudaWrapper->setStream(cudaInfo->context.stream, cudaInfo->d2h_stream);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& layer : layers)
|
||||
{
|
||||
auto& ld = layer.second;
|
||||
auto& layerInstance = ld.layerInstance;
|
||||
|
||||
if (!layerInstance->supportBackend(DNN_BACKEND_CUDA))
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "CUDA backend will fallback to the CPU implementation for the layer \"" << ld.name
|
||||
<< "\" of type " << ld.type << '\n';
|
||||
CV_LOG_INFO(NULL, os.str().c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we make a copy so that `initCUDA` doesn't modify `cudaInfo->context` */
|
||||
auto context = cudaInfo->context;
|
||||
auto node = layerInstance->initCUDA(&context, ld.inputBlobsWrappers, ld.outputBlobsWrappers);
|
||||
ld.backendNodes[DNN_BACKEND_CUDA] = node;
|
||||
|
||||
auto cudaNode = node.dynamicCast<CUDABackendNode>();
|
||||
cudaInfo->workspace.require(cudaNode->get_workspace_memory_in_bytes());
|
||||
}
|
||||
|
||||
if (blobsToKeep_.size() > 1)
|
||||
{
|
||||
for (const auto& pin : blobsToKeep_)
|
||||
{
|
||||
LayerData& ld = layers[pin.lid];
|
||||
ld.cudaD2HBackgroundTransfers.push_back(pin.oid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
||||
#endif // HAVE_CUDA
|
@ -8,15 +8,135 @@
|
||||
#include "precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "op_halide.hpp"
|
||||
#include "net_impl.hpp"
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
#include "halide_scheduler.hpp"
|
||||
|
||||
#include <HalideRuntimeOpenCL.h>
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
namespace cv
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
void Net::Impl::setHalideScheduler(const String& scheduler)
|
||||
{
|
||||
namespace dnn
|
||||
halideConfigFile = scheduler;
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
|
||||
|
||||
void Net::Impl::compileHalide()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
|
||||
|
||||
HalideScheduler scheduler(halideConfigFile);
|
||||
std::vector< std::reference_wrapper<LayerData> > compileList; compileList.reserve(64);
|
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData& ld = it->second;
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skip)
|
||||
{
|
||||
CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
|
||||
bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
|
||||
if (!scheduled)
|
||||
{
|
||||
// Use automatic scheduling provided by layer.
|
||||
layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
|
||||
ld.inputBlobs, ld.outputBlobs,
|
||||
preferableTarget);
|
||||
}
|
||||
compileList.emplace_back(ld);
|
||||
}
|
||||
}
|
||||
std::atomic<int> progress(0);
|
||||
auto fn = ([&] () -> void
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
int id = progress.fetch_add(1);
|
||||
if ((size_t)id >= compileList.size())
|
||||
return;
|
||||
const LayerData& ld = compileList[id].get();
|
||||
Ptr<BackendNode> node = ld.backendNodes.find(DNN_BACKEND_HALIDE)->second;
|
||||
dnn::compileHalide(ld.outputBlobs, node, preferableTarget);
|
||||
}
|
||||
});
|
||||
size_t num_threads = std::min(compileList.size(), (size_t)std::thread::hardware_concurrency());
|
||||
num_threads = std::max((size_t)1u, std::min((size_t)8u, num_threads));
|
||||
std::vector<std::thread> threads(num_threads - 1);
|
||||
for (auto& t: threads) t = std::thread(fn);
|
||||
fn(); // process own tasks
|
||||
for (auto& t: threads) t.join();
|
||||
}
|
||||
|
||||
|
||||
void Net::Impl::initHalideBackend()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert_N(preferableBackend == DNN_BACKEND_HALIDE, haveHalide());
|
||||
|
||||
// Iterator to current layer.
|
||||
MapIdToLayerData::iterator it = layers.begin();
|
||||
// Iterator to base layer for fusion. In example, in case of conv+bn+relu
|
||||
// it'll be a conv layer.
|
||||
MapIdToLayerData::iterator baseIt = layers.begin();
|
||||
for (; it != layers.end(); it++)
|
||||
{
|
||||
LayerData &ldTop = it->second;
|
||||
Ptr<Layer> layerTop = ldTop.layerInstance;
|
||||
if (!layerTop->supportBackend(preferableBackend))
|
||||
{
|
||||
// Move base iterator to layer that don't support preferable
|
||||
// backend to prevent fusion over layer of different backend.
|
||||
baseIt = it;
|
||||
continue;
|
||||
}
|
||||
// Try to do layers fusion.
|
||||
LayerData &ldBot = baseIt->second;
|
||||
Ptr<Layer> layerBot = ldBot.layerInstance;
|
||||
// 1. Check that bottom and top from the same backends.
|
||||
if (it != layers.begin() && layerBot->supportBackend(preferableBackend))
|
||||
{
|
||||
// 2. Check that current layer works in-place.
|
||||
bool inPlace = ldTop.inputBlobs.size() == 1 &&
|
||||
ldBot.outputBlobs.size() == 1 &&
|
||||
ldTop.inputBlobs[0]->data ==
|
||||
ldBot.outputBlobs[0].data;
|
||||
if (inPlace)
|
||||
{
|
||||
// 3. Try to attach node.
|
||||
CV_Assert(!ldBot.backendNodes[preferableBackend].empty());
|
||||
Ptr<BackendNode> fusedNode =
|
||||
layerTop->tryAttach(ldBot.backendNodes[preferableBackend]);
|
||||
if (!fusedNode.empty())
|
||||
{
|
||||
ldTop.skip = true;
|
||||
ldBot.backendNodes[preferableBackend] = fusedNode;
|
||||
ldBot.outputBlobsWrappers = ldTop.outputBlobsWrappers;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
// No layers fusion.
|
||||
ldTop.skip = false;
|
||||
ldTop.backendNodes[DNN_BACKEND_HALIDE] =
|
||||
layerTop->initHalide(ldTop.inputBlobsWrappers);
|
||||
baseIt = it;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // HAVE_HALIDE
|
||||
CV__DNN_INLINE_NS_END
|
||||
|
||||
|
||||
#ifdef HAVE_HALIDE
|
||||
static MatShape getBufferShape(const MatShape& shape)
|
||||
@ -226,5 +346,83 @@ bool haveHalide()
|
||||
#endif // HAVE_HALIDE
|
||||
}
|
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
void Layer::applyHalideScheduler(Ptr<BackendNode>& node, const std::vector<Mat*> &inputs,
|
||||
const std::vector<Mat> &outputs, int targetId) const
|
||||
{
|
||||
#ifndef HAVE_HALIDE
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
#else
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
Halide::Var x("x"), y("y"), c("c"), n("n"), co("co"), ci("ci"),
|
||||
xo("xo"), xi("xi"), yo("yo"), yi("yi"), tile("tile");
|
||||
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back();
|
||||
|
||||
int outW, outH, outC, outN;
|
||||
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN);
|
||||
|
||||
if (targetId == DNN_TARGET_CPU)
|
||||
{
|
||||
if (outW == 1 && outH == 1)
|
||||
{
|
||||
if (outC + outN == 1)
|
||||
return;
|
||||
|
||||
if (outC > 8)
|
||||
top.split(c, co, ci, 8)
|
||||
.fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
|
||||
.parallel(tile)
|
||||
.vectorize(ci, 8);
|
||||
else
|
||||
top.fuse(x, y, tile).fuse(c, tile, tile).fuse(n, tile, tile)
|
||||
.parallel(tile);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (outH > 2)
|
||||
{
|
||||
top.reorder(x, c, y)
|
||||
.split(y, yo, yi, 2)
|
||||
.fuse(yo, n, tile)
|
||||
.parallel(tile)
|
||||
.unroll(yi)
|
||||
.vectorize(x, outW >= 16 ? 16 : outW);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (targetId == DNN_TARGET_OPENCL)
|
||||
{
|
||||
if (outW == 1 && outH == 1)
|
||||
{
|
||||
int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : outC;
|
||||
top.split(c, co, ci, c_split)
|
||||
.fuse(x, y, tile).fuse(co, tile, tile).fuse(n, tile, tile)
|
||||
.gpu_blocks(tile)
|
||||
.gpu_threads(ci);
|
||||
}
|
||||
else
|
||||
{
|
||||
int x_split = outW > 8 ? (outW >= 32 ? 16 : 8) : outW;
|
||||
int y_split = outH > 8 ? (outH >= 32 ? 16 : 8) : outH;
|
||||
// Supported vectorization widths: 2, 3, 4, 8, 16
|
||||
int c_split = outC > 8 ? (outC > 16 ? 8 : 4) : std::min(4, outC);
|
||||
top.split(x, xo, xi, x_split).split(y, yo, yi, y_split)
|
||||
.split(c, co, ci, c_split)
|
||||
.gpu_blocks(xo, yo, co)
|
||||
.gpu_threads(xi, yi)
|
||||
.reorder(xi, yi, ci, xo, yo, co)
|
||||
.vectorize(ci);
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "Unknown target identifier");
|
||||
#endif // HAVE_HALIDE
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace
|
||||
|
@ -254,6 +254,31 @@ cv::String getInferenceEngineCPUType()
|
||||
return cpu_type;
|
||||
}
|
||||
|
||||
|
||||
namespace openvino {
|
||||
|
||||
bool checkTarget(Target target)
|
||||
{
|
||||
// Lightweight detection
|
||||
const std::vector<std::string> devices = getCore("").GetAvailableDevices();
|
||||
for (std::vector<std::string>::const_iterator i = devices.begin(); i != devices.end(); ++i)
|
||||
{
|
||||
if (std::string::npos != i->find("MYRIAD") && target == DNN_TARGET_MYRIAD)
|
||||
return true;
|
||||
if (std::string::npos != i->find("HDDL") && target == DNN_TARGET_HDDL)
|
||||
return true;
|
||||
else if (std::string::npos != i->find("FPGA") && target == DNN_TARGET_FPGA)
|
||||
return true;
|
||||
else if (std::string::npos != i->find("CPU") && target == DNN_TARGET_CPU)
|
||||
return true;
|
||||
else if (std::string::npos != i->find("GPU") && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace openvino
|
||||
|
||||
#else // HAVE_INF_ENGINE
|
||||
|
||||
cv::String getInferenceEngineBackendType()
|
||||
|
@ -73,6 +73,13 @@ void infEngineBlobsToMats(const std::vector<InferenceEngine::Blob::Ptr>& blobs,
|
||||
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
namespace openvino {
|
||||
|
||||
// TODO: use std::string as parameter
|
||||
bool checkTarget(Target target);
|
||||
|
||||
} // namespace openvino
|
||||
|
||||
bool isMyriadX();
|
||||
|
||||
bool isArmComputePlugin();
|
||||
|
@ -8,12 +8,51 @@
|
||||
#include "precomp.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "op_vkcom.hpp"
|
||||
#include "net_impl.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
#ifdef HAVE_VULKAN
|
||||
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
void Net::Impl::initVkComBackend()
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert(preferableBackend == DNN_BACKEND_VKCOM);
|
||||
|
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); it++)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
if (!layer->supportBackend(preferableBackend))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ld.skip = false;
|
||||
|
||||
try
|
||||
{
|
||||
ld.backendNodes[DNN_BACKEND_VKCOM] =
|
||||
layer->initVkCom(ld.inputBlobsWrappers);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
CV_LOG_ERROR(NULL, "initVkCom failed, fallback to CPU implementation. " << e.what());
|
||||
ld.backendNodes[DNN_BACKEND_VKCOM] = Ptr<BackendNode>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void copyToTensor(vkcom::Tensor &dst, const Mat &src)
|
||||
{
|
||||
CV_Assert(src.isContinuous() && src.type() == CV_32F);
|
||||
|
@ -2,6 +2,7 @@
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <fstream>
|
||||
#include "op_webnn.hpp"
|
||||
|
||||
@ -13,10 +14,281 @@
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
#include "net_impl.hpp"
|
||||
|
||||
namespace cv { namespace dnn {
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
void Net::Impl::addWebnnOutputs(LayerData &ld)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
Ptr<WebnnNet> layerNet;
|
||||
auto it = ld.backendNodes.find(preferableBackend);
|
||||
if (it != ld.backendNodes.end())
|
||||
{
|
||||
Ptr<BackendNode> node = it->second;
|
||||
if (!node.empty())
|
||||
{
|
||||
Ptr<WebnnBackendNode> webnnNode = node.dynamicCast<WebnnBackendNode>();
|
||||
CV_Assert(!webnnNode.empty()); CV_Assert(!webnnNode->net.empty());
|
||||
layerNet = webnnNode->net;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
|
||||
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
|
||||
if (!inpNode.empty())
|
||||
{
|
||||
Ptr<WebnnBackendNode> webnnInpNode = inpNode.dynamicCast<WebnnBackendNode>();
|
||||
CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty());
|
||||
if (layerNet != webnnInpNode->net)
|
||||
{
|
||||
webnnInpNode->net->addOutput(webnnInpNode->name);
|
||||
webnnInpNode->net->setUnconnectedNodes(webnnInpNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Net::Impl::initWebnnBackend(const std::vector<LayerPin>& blobsToKeep_)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_Assert_N(preferableBackend == DNN_BACKEND_WEBNN, haveWebnn());
|
||||
|
||||
Ptr<WebnnNet> net;
|
||||
|
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
if (ld.id == 0)
|
||||
{
|
||||
CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
|
||||
(netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
Ptr<WebnnBackendWrapper> wrapper = ld.outputBlobsWrappers[i].dynamicCast<WebnnBackendWrapper>();
|
||||
std::string outputName = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
|
||||
outputName = ld.outputBlobsWrappers.size() > 1 ? (outputName + "." + std::to_string(i)) : outputName;
|
||||
wrapper->name = outputName;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
Ptr<WebnnBackendWrapper> wrapper = ld.outputBlobsWrappers[i].dynamicCast<WebnnBackendWrapper>();
|
||||
std::string outputName = ld.outputBlobsWrappers.size() > 1 ? (ld.name + "." + std::to_string(i)) : ld.name;
|
||||
wrapper->name = outputName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build WebNN networks from sets of layers that support this
|
||||
// backend. Split a whole model on several WebNN networks if
|
||||
// some of layers are not implemented.
|
||||
for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
|
||||
if (ld.id == 0 && ld.skip)
|
||||
continue;
|
||||
|
||||
bool fused = ld.skip;
|
||||
Ptr<Layer> layer = ld.layerInstance;
|
||||
if (!fused && !layer->supportBackend(preferableBackend))
|
||||
{
|
||||
// For test use. when not using WebNN, the test case will fail
|
||||
// with the following code.
|
||||
CV_LOG_WARNING(NULL, "Layer " + ld.type + " name " + ld.name + " is unsupported by WebNN backend.");
|
||||
|
||||
addWebnnOutputs(ld);
|
||||
net = Ptr<WebnnNet>();
|
||||
layer->preferableTarget = DNN_TARGET_CPU;
|
||||
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
|
||||
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
|
||||
if (!inpNode.empty()) {
|
||||
Ptr<WebnnBackendNode> webnnNode = inpNode.dynamicCast<WebnnBackendNode>();
|
||||
CV_Assert(!webnnNode.empty());
|
||||
webnnNode->net->setUnconnectedNodes(webnnNode);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ld.skip = true; // Initially skip all WebNN supported layers.
|
||||
|
||||
// Create a new network if one of inputs from different WebNN graph.
|
||||
std::vector<Ptr<BackendNode>> inputNodes;
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
// Layer_Test_ROIPooling.Accuracy has 2 inputs inpLD = 0, 0 -> has 4 inputNodes (input, rois, input, rois)
|
||||
if (inputNodes.size() == ld.inputBlobsId.size()) {
|
||||
break;
|
||||
}
|
||||
LayerData &inpLd = layers[ld.inputBlobsId[i].lid];
|
||||
Ptr<BackendNode> inpNode = inpLd.backendNodes[preferableBackend];
|
||||
if (!inpNode.empty())
|
||||
{
|
||||
Ptr<WebnnBackendNode> webnnInpNode = inpNode.dynamicCast<WebnnBackendNode>();
|
||||
CV_Assert(!webnnInpNode.empty()); CV_Assert(!webnnInpNode->net.empty());
|
||||
if (webnnInpNode->net == net && !fused) {
|
||||
inputNodes.push_back(inpNode);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (net.empty()) {
|
||||
net = Ptr<WebnnNet>(new WebnnNet());
|
||||
}
|
||||
|
||||
if (!fused) {
|
||||
std::vector<std::string> inputNames;
|
||||
std::vector<cv::Mat> inputs;
|
||||
|
||||
auto curr_pos = inpLd.consumers.begin();
|
||||
auto compare = [&ld] (const LayerPin& lp) { return lp.lid == ld.id; };
|
||||
auto cons = curr_pos;
|
||||
while ((cons = std::find_if(curr_pos, inpLd.consumers.end(), compare)) !=
|
||||
inpLd.consumers.end()) {
|
||||
int cons_inp = cons->oid;
|
||||
Ptr<WebnnBackendWrapper> inpWrapper = inpLd.outputBlobsWrappers[cons_inp].
|
||||
dynamicCast<WebnnBackendWrapper>();
|
||||
CV_Assert(!inpWrapper.empty());
|
||||
auto iter = std::find(inputNames.begin(), inputNames.end(),
|
||||
inpWrapper->name);
|
||||
if (iter == inputNames.end()) {
|
||||
inputNames.push_back(inpWrapper->name);
|
||||
inputs.push_back(inpLd.outputBlobs[cons_inp]);
|
||||
}
|
||||
curr_pos = cons + 1;
|
||||
}
|
||||
|
||||
auto inps = net->setInputs(inputs, inputNames);
|
||||
for (auto& inp : inps) {
|
||||
WebnnBackendNode* node = new WebnnBackendNode(inp);
|
||||
node->net = net;
|
||||
inputNodes.emplace_back(Ptr<BackendNode>(node));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ptr<BackendNode> node;
|
||||
if (!net.empty())
|
||||
{
|
||||
if (fused)
|
||||
{
|
||||
bool inPlace = ld.inputBlobsId.size() == 1 && ld.outputBlobs.size() == 1 &&
|
||||
ld.inputBlobs[0]->data == ld.outputBlobs[0].data;
|
||||
CV_Assert(inPlace);
|
||||
node = layers[ld.inputBlobsId[0].lid].backendNodes[preferableBackend];
|
||||
ld.inputBlobsWrappers = layers[ld.inputBlobsId[0].lid].inputBlobsWrappers;
|
||||
}
|
||||
}
|
||||
else {
|
||||
net = Ptr<WebnnNet>(new WebnnNet());
|
||||
}
|
||||
|
||||
if (!fused)
|
||||
{
|
||||
CV_Assert(ld.inputBlobsId.size() == inputNodes.size());
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
int lid = ld.inputBlobsId[i].lid;
|
||||
int oid = ld.inputBlobsId[i].oid;
|
||||
if (oid == 0 || lid == 0)
|
||||
continue;
|
||||
|
||||
auto webnnInpNode = inputNodes[i].dynamicCast<WebnnBackendNode>();
|
||||
inputNodes[i] = Ptr<BackendNode>(new WebnnBackendNode(webnnInpNode->operand));
|
||||
}
|
||||
|
||||
if (layer->supportBackend(preferableBackend))
|
||||
{
|
||||
if (ld.type == "Const") {
|
||||
ml::Operand fake_operand;
|
||||
Ptr<WebnnBackendNode> fake_input_node = Ptr<WebnnBackendNode>(new WebnnBackendNode(fake_operand));
|
||||
fake_input_node->net = net;
|
||||
inputNodes.push_back(fake_input_node);
|
||||
}
|
||||
node = layer->initWebnn(ld.inputBlobsWrappers, inputNodes);
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
Ptr<WebnnBackendWrapper> wrapper = ld.outputBlobsWrappers[i].dynamicCast<WebnnBackendWrapper>();
|
||||
node.dynamicCast<WebnnBackendNode>()->name = wrapper->name;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (node.empty())
|
||||
continue;
|
||||
|
||||
ld.backendNodes[preferableBackend] = node;
|
||||
|
||||
Ptr<WebnnBackendNode> webnnNode = node.dynamicCast<WebnnBackendNode>();
|
||||
CV_Assert(!webnnNode.empty());
|
||||
webnnNode->net = net;
|
||||
|
||||
if (ld.consumers.empty()) {
|
||||
// TF EAST_text_detection
|
||||
webnnNode->net->setUnconnectedNodes(webnnNode);
|
||||
}
|
||||
for (const auto& pin : blobsToKeep_)
|
||||
{
|
||||
if (pin.lid == ld.id)
|
||||
{
|
||||
webnnNode->net->addOutput(webnnNode->name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
net->addBlobs(ld.inputBlobsWrappers);
|
||||
net->addBlobs(ld.outputBlobsWrappers);
|
||||
addWebnnOutputs(ld);
|
||||
}
|
||||
|
||||
// Initialize all networks.
|
||||
for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
auto iter = ld.backendNodes.find(preferableBackend);
|
||||
if (iter == ld.backendNodes.end())
|
||||
continue;
|
||||
|
||||
Ptr<BackendNode>& node = iter->second;
|
||||
if (node.empty())
|
||||
continue;
|
||||
|
||||
Ptr<WebnnBackendNode> webnnNode = node.dynamicCast<WebnnBackendNode>();
|
||||
if (webnnNode.empty())
|
||||
continue;
|
||||
|
||||
CV_Assert(!webnnNode->net.empty());
|
||||
|
||||
if (!webnnNode->net->isInitialized())
|
||||
{
|
||||
webnnNode->net->setUnconnectedNodes(webnnNode);
|
||||
webnnNode->net->createNet((Target)preferableTarget);
|
||||
ld.skip = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
|
||||
|
||||
namespace webnn {
|
||||
ml::Operand BuildConstant(const ml::GraphBuilder& builder,
|
||||
const std::vector<int32_t>& dimensions,
|
||||
|
@ -66,6 +66,15 @@
|
||||
#undef HAVE_CUDA
|
||||
#endif
|
||||
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <iterator>
|
||||
|
||||
#include <opencv2/core/ocl.hpp>
|
||||
#include <opencv2/core/opencl/ocl_defs.hpp>
|
||||
|
||||
|
144
modules/dnn/src/registry.cpp
Normal file
144
modules/dnn/src/registry.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "op_halide.hpp"
|
||||
#include "op_inf_engine.hpp"
|
||||
#include "ie_ngraph.hpp"
|
||||
#include "op_vkcom.hpp"
|
||||
#include "op_cuda.hpp"
|
||||
#include "op_webnn.hpp"
|
||||
|
||||
#include "halide_scheduler.hpp"
|
||||
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
CV__DNN_INLINE_NS_BEGIN
|
||||
|
||||
|
||||
class BackendRegistry
|
||||
{
|
||||
public:
|
||||
typedef std::vector< std::pair<Backend, Target> > BackendsList;
|
||||
const BackendsList & getBackends() const { return backends; }
|
||||
static BackendRegistry & getRegistry()
|
||||
{
|
||||
static BackendRegistry impl;
|
||||
return impl;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
BackendRegistry()
|
||||
{
|
||||
#ifdef HAVE_HALIDE
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_CPU));
|
||||
#ifdef HAVE_OPENCL
|
||||
if (cv::ocl::useOpenCL())
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_HALIDE, DNN_TARGET_OPENCL));
|
||||
#endif
|
||||
#endif // HAVE_HALIDE
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (openvino::checkTarget(DNN_TARGET_CPU))
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
|
||||
#endif
|
||||
}
|
||||
if (openvino::checkTarget(DNN_TARGET_MYRIAD))
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_MYRIAD));
|
||||
#endif
|
||||
}
|
||||
if (openvino::checkTarget(DNN_TARGET_HDDL))
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_HDDL));
|
||||
#endif
|
||||
}
|
||||
#ifdef HAVE_OPENCL
|
||||
if (cv::ocl::useOpenCL() && ocl::Device::getDefault().isIntel())
|
||||
{
|
||||
if (openvino::checkTarget(DNN_TARGET_OPENCL))
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL));
|
||||
#endif
|
||||
}
|
||||
if (openvino::checkTarget(DNN_TARGET_OPENCL_FP16))
|
||||
{
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_OPENCL_FP16));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_INF_ENGINE
|
||||
|
||||
#ifdef HAVE_WEBNN
|
||||
if (haveWebnn())
|
||||
{
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_WEBNN, DNN_TARGET_CPU));
|
||||
}
|
||||
#endif // HAVE_WEBNN
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
if (cv::ocl::useOpenCL())
|
||||
{
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL));
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_OPENCL_FP16));
|
||||
}
|
||||
#endif
|
||||
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
|
||||
|
||||
#ifdef HAVE_VULKAN
|
||||
if (haveVulkan())
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_VKCOM, DNN_TARGET_VULKAN));
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
if (haveCUDA())
|
||||
{
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA));
|
||||
backends.push_back(std::make_pair(DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
BackendsList backends;
|
||||
};
|
||||
|
||||
|
||||
std::vector<std::pair<Backend, Target>> getAvailableBackends()
|
||||
{
|
||||
return BackendRegistry::getRegistry().getBackends();
|
||||
}
|
||||
|
||||
std::vector<Target> getAvailableTargets(Backend be)
|
||||
{
|
||||
if (be == DNN_BACKEND_DEFAULT)
|
||||
be = (Backend)getParam_DNN_BACKEND_DEFAULT();
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
if (be == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
be = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
#endif
|
||||
|
||||
std::vector<Target> result;
|
||||
const BackendRegistry::BackendsList all_backends = getAvailableBackends();
|
||||
for (BackendRegistry::BackendsList::const_iterator i = all_backends.begin(); i != all_backends.end(); ++i)
|
||||
{
|
||||
if (i->first == be)
|
||||
result.push_back(i->second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
CV__DNN_INLINE_NS_END
|
||||
}} // namespace cv::dnn
|
Loading…
Reference in New Issue
Block a user