mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00
Merge pull request #23987 from dkurt:openvino_int8_backend
OpenVINO backend for INT8 models #23987 ### Pull Request Readiness Checklist TODO: - [x] DetectionOutput layer (https://github.com/opencv/opencv/pull/24069) - [x] Less FP32 fallbacks (i.e. Sigmoid, eltwise sum) - [x] Accuracy, performance tests (https://github.com/opencv/opencv/pull/24039) - [x] Single layer tests (convolution) - [x] ~~Fixes for OpenVINO 2022.1 (https://pullrequest.opencv.org/buildbot/builders/precommit_custom_linux/builds/100334)~~ Performace results for object detection model `coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite`: | backend | performance (median time) | |---|---| | OpenCV | 77.42ms | | OpenVINO 2023.0 | 10.90ms | CPU: `11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz` Serialized model per-layer stats (note that Convolution should use `*_I8` primitives if they are quantized correctly): https://gist.github.com/dkurt/7772bbf1907035441bb5454f19f0feef --- See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
b8d4ac589d
commit
c7ec0d599a
@ -302,6 +302,18 @@ PERF_TEST_P_(DNNTestNetwork, EfficientDet)
|
||||
processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp);
|
||||
}
|
||||
|
||||
|
||||
PERF_TEST_P_(DNNTestNetwork, EfficientDet_int8)
|
||||
{
|
||||
if (target != DNN_TARGET_CPU || (backend != DNN_BACKEND_OPENCV &&
|
||||
backend != DNN_BACKEND_TIMVX && backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) {
|
||||
throw SkipTestException("");
|
||||
}
|
||||
Mat inp = imread(findDataFile("dnn/dog416.png"));
|
||||
resize(inp, inp, Size(320, 320));
|
||||
processNet("", "dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", "", inp);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());
|
||||
|
||||
} // namespace
|
||||
|
@ -590,7 +590,7 @@ void InfEngineNgraphNet::init(Target targetId)
|
||||
allBlobs[name] = ov::Tensor(src.get_element_type(), outShape, src.data());
|
||||
}
|
||||
|
||||
ppp.output(i++).tensor().set_element_type(ov::element::f32); // Should be always FP32
|
||||
ppp.output(i++).tensor().set_element_type(src.get_element_type());
|
||||
}
|
||||
|
||||
ppp.build();
|
||||
@ -840,6 +840,8 @@ ov::Tensor wrapToNgraphBlob(const Mat& m) {
|
||||
return ov::Tensor(ov::element::f32, shape, m.data);
|
||||
else if (m.type() == CV_8U)
|
||||
return ov::Tensor(ov::element::u8, shape, m.data);
|
||||
else if (m.type() == CV_8SC1)
|
||||
return ov::Tensor(ov::element::i8, shape, m.data);
|
||||
else if (m.type() == CV_32SC1)
|
||||
return ov::Tensor(ov::element::i32, shape, m.data);
|
||||
else
|
||||
@ -1234,6 +1236,32 @@ void InfEngineNgraphNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlo
|
||||
#endif // OpenVINO >= 2022.1
|
||||
}
|
||||
|
||||
ngraph::Output<ngraph::Node> ngraphQuantize(ngraph::Output<ngraph::Node> input, float output_sc, float output_zp) {
|
||||
float outLow = -128, outHigh = 127;
|
||||
float inpLow = output_sc * (outLow - output_zp);
|
||||
float inpHigh = output_sc * (outHigh - output_zp);
|
||||
return std::make_shared<ngraph::op::FakeQuantize>(input,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpLow),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpHigh),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outLow),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outHigh),
|
||||
256 // levels
|
||||
);
|
||||
}
|
||||
|
||||
ngraph::Output<ngraph::Node> ngraphDequantize(ngraph::Output<ngraph::Node> input, float input_sc, float input_zp) {
|
||||
float inpLow = -128, inpHigh = 127;
|
||||
float outLow = input_sc * (inpLow - input_zp);
|
||||
float outHigh = input_sc * (inpHigh - input_zp);
|
||||
return std::make_shared<ngraph::op::FakeQuantize>(input,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpLow),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpHigh),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outLow),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outHigh),
|
||||
256 // levels
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}}
|
||||
|
@ -148,6 +148,9 @@ private:
|
||||
InferenceEngine::CNNNetwork t_net;
|
||||
};
|
||||
|
||||
ngraph::Output<ngraph::Node> ngraphQuantize(ngraph::Output<ngraph::Node> input, float output_sc, float output_zp);
|
||||
ngraph::Output<ngraph::Node> ngraphDequantize(ngraph::Output<ngraph::Node> input, float input_sc, float input_zp);
|
||||
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
}} // namespace cv::dnn
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
@ -110,7 +111,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
@ -238,6 +240,27 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
input = ngraphDequantize(input, input_sc, input_zp);
|
||||
|
||||
std::vector<size_t> shape(input.get_shape().size(), 1);
|
||||
shape[1] = origin_weights.total();
|
||||
|
||||
ngraph::Output<ngraph::Node> res;
|
||||
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, origin_weights.data);
|
||||
auto ieBias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, origin_bias.data);
|
||||
res = std::make_shared<ngraph::op::v1::Multiply>(input, ieWeights);
|
||||
res = std::make_shared<ngraph::op::v1::Add>(res, ieBias);
|
||||
|
||||
res = ngraphQuantize(res, output_sc, output_zp);
|
||||
return new InfEngineNgraphNode(res);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "opencv2/core/hal/hal.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
|
||||
@ -195,7 +196,8 @@ public:
|
||||
}
|
||||
#endif
|
||||
// Only default backend and Conv1D/Conv2D/Conv3D are supported
|
||||
return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3;
|
||||
return (backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3) ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -561,6 +563,126 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(!blobs.empty());
|
||||
CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1);
|
||||
CV_CheckTypeEQ(weightsMat.type(), CV_8S, "");
|
||||
auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
std::vector<size_t> dims = ieInpNode.get_shape();
|
||||
CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, "");
|
||||
CV_Assert(ieInpNode.get_element_type() == ngraph::element::f32);
|
||||
ngraph::Output<ngraph::Node> ieWeights;
|
||||
if (nodes.size() > 1)
|
||||
ieWeights = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
const int inpCn = dims[1];
|
||||
const int inpGroupCn = nodes.size() > 1 ? ieWeights.get_shape()[1] : blobs[0].size[1];
|
||||
const int group = inpCn / inpGroupCn;
|
||||
|
||||
std::vector<size_t> kernel_shape;
|
||||
if (group != 1)
|
||||
{
|
||||
kernel_shape.push_back(group);
|
||||
}
|
||||
kernel_shape.push_back(numOutput / group);
|
||||
kernel_shape.push_back(inpCn / group);
|
||||
std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape));
|
||||
|
||||
if (nodes.size() == 1)
|
||||
{
|
||||
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::i8, kernel_shape, blobs[0].data);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{kernel_shape.size()}, std::vector<int64_t>(kernel_shape.begin(), kernel_shape.end()));
|
||||
ieWeights = std::make_shared<ngraph::op::v1::Reshape>(ieWeights, shape, true);
|
||||
}
|
||||
|
||||
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
|
||||
if (!padMode.empty())
|
||||
pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
|
||||
|
||||
ieInpNode = ngraphDequantize(ieInpNode, input_sc, input_zp);
|
||||
|
||||
const float low = -128, high = 127;
|
||||
std::vector<float> inpLows(numOutput, low);
|
||||
std::vector<float> inpHighs(numOutput, high);
|
||||
std::vector<float> outLows(numOutput);
|
||||
std::vector<float> outHighs(numOutput);
|
||||
std::vector<size_t> quantShape(kernel_shape.size(), 1);
|
||||
if (group != 1)
|
||||
{
|
||||
quantShape[0] = group;
|
||||
quantShape[1] = numOutput / group;
|
||||
}
|
||||
else
|
||||
{
|
||||
quantShape[0] = numOutput;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numOutput; ++i) {
|
||||
outLows[i] = low * outputMultiplier[i] * output_sc / input_sc;
|
||||
outHighs[i] = high * outputMultiplier[i] * output_sc / input_sc;
|
||||
}
|
||||
ieWeights = std::make_shared<ngraph::op::Convert>(ieWeights, ngraph::element::f32);
|
||||
ieWeights = std::make_shared<ngraph::op::FakeQuantize>(ieWeights,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, inpLows.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, inpHighs.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, outLows.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, outHighs.data()),
|
||||
256 // levels
|
||||
);
|
||||
|
||||
ngraph::Output<ngraph::Node> conv_node;
|
||||
if (group != 1) {
|
||||
conv_node = std::make_shared<ngraph::op::v1::GroupConvolution>(
|
||||
ieInpNode, ieWeights,
|
||||
ngraph::Strides(strides),
|
||||
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
|
||||
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_end.begin(), pads_end.end())),
|
||||
ngraph::Strides(dilations),
|
||||
pad_type);
|
||||
} else {
|
||||
conv_node = std::make_shared<ngraph::op::v1::Convolution>(
|
||||
ieInpNode, ieWeights,
|
||||
ngraph::Strides(strides),
|
||||
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
|
||||
ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_end.begin(), pads_end.end())),
|
||||
ngraph::Strides(dilations),
|
||||
pad_type);
|
||||
}
|
||||
|
||||
std::vector<size_t> shape(conv_node.get_shape().size(), 1);
|
||||
shape[1] = conv_node.get_shape()[1];
|
||||
if (biasvec.size() || nodes.size() == 3)
|
||||
{
|
||||
std::shared_ptr<ngraph::Node> bias;
|
||||
if (nodes.size() == 3)
|
||||
{
|
||||
auto bias_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
|
||||
ngraph::Shape{shape.size()}, std::vector<int64_t>(shape.begin(), shape.end()));
|
||||
bias = std::make_shared<ngraph::op::v1::Reshape>(nodes[2].dynamicCast<InfEngineNgraphNode>()->node, bias_shape, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<float> ovBias(numOutput);
|
||||
for (int i = 0; i < numOutput; ++i) {
|
||||
ovBias[i] = (biasvec[i] + input_zp * cv::sum(blobs[0].row(i))[0]) * outputMultiplier[i] * output_sc;
|
||||
}
|
||||
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), ovBias.data());
|
||||
}
|
||||
conv_node = std::make_shared<ngraph::op::v1::Add>(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
}
|
||||
|
||||
conv_node = ngraphQuantize(conv_node, output_sc, output_zp);
|
||||
|
||||
return new InfEngineNgraphNode(conv_node);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
class ParallelConv : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include <iostream>
|
||||
@ -56,7 +57,7 @@ public:
|
||||
return tvActType != tvActNotSupported;
|
||||
}
|
||||
#endif
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -244,6 +245,42 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
input = ngraphDequantize(input, input_sc, input_zp);
|
||||
|
||||
ngraph::Output<ngraph::Node> res;
|
||||
if (type == "ReLU6Int8") {
|
||||
res = std::make_shared<ngraph::op::Clamp>(input, 0.0f, 6.0f);
|
||||
} else if (type == "ReLUInt8") {
|
||||
if (slope) {
|
||||
auto param = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &slope);
|
||||
res = std::make_shared<ngraph::op::PRelu>(input, param);
|
||||
} else {
|
||||
res = std::make_shared<ngraph::op::Relu>(input);
|
||||
}
|
||||
} else if (type == "ELUInt8") {
|
||||
res = std::make_shared<ngraph::op::Elu>(input, 1.0f);
|
||||
} else if (type == "MishInt8") {
|
||||
res = std::make_shared<ngraph::op::v4::Mish>(input);
|
||||
} else if (type == "AbsValInt8") {
|
||||
res = std::make_shared<ngraph::op::Abs>(input);
|
||||
} else if (type == "SigmoidInt8") {
|
||||
res = std::make_shared<ngraph::op::Sigmoid>(input);
|
||||
} else {
|
||||
CV_Error(Error::StsNotImplemented, type + " activation with OpenVINO");
|
||||
}
|
||||
|
||||
res = ngraphQuantize(res, output_sc, output_zp);
|
||||
|
||||
return new InfEngineNgraphNode(res);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
namespace cv
|
||||
@ -138,7 +139,7 @@ public:
|
||||
// For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported.
|
||||
if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
|
||||
return channelsModeInput == ELTWISE_CHANNNELS_SAME;
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -369,6 +370,38 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(nodes.size() >= 2);
|
||||
std::vector<ngraph::Output<ngraph::Node>> ieInpNodes(nodes.size());
|
||||
for (size_t i = 0; i < nodes.size(); i++)
|
||||
{
|
||||
ieInpNodes[i] = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
float input_sc = !coeffs.empty() ? coeffs[i] : 1.0f;
|
||||
float input_zp = op == PROD ? zeropoints[i] : 0.0f;
|
||||
ieInpNodes[i] = ngraphDequantize(ieInpNodes[i], input_sc, input_zp);
|
||||
}
|
||||
|
||||
auto res = ieInpNodes[0];
|
||||
for (size_t i = 1; i < ieInpNodes.size(); i++)
|
||||
{
|
||||
switch (op) {
|
||||
case SUM: res = std::make_shared<ngraph::op::v1::Add>(res, ieInpNodes[i]); break;
|
||||
case PROD: res = std::make_shared<ngraph::op::v1::Multiply>(res, ieInpNodes[i]); break;
|
||||
case MAX: res = std::make_shared<ngraph::op::v1::Maximum>(res, ieInpNodes[i]); break;
|
||||
default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
|
||||
}
|
||||
}
|
||||
|
||||
res = ngraphQuantize(res, 1.0f, offset);
|
||||
|
||||
return new InfEngineNgraphNode(res);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
class EltwiseInvoker : public ParallelLoopBody
|
||||
{
|
||||
EltwiseLayerInt8Impl& self;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
|
||||
@ -86,7 +87,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
@ -395,6 +397,77 @@ public:
|
||||
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
CV_CheckTypeEQ(blobs[0].type(), CV_8S, ""); // weights
|
||||
CV_CheckTypeEQ(blobs[1].type(), CV_32S, ""); // bias
|
||||
CV_CheckTypeEQ(outputMultiplier.type(), CV_32F, "");
|
||||
|
||||
ngraph::Output<ngraph::Node> input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
ngraph::Output<ngraph::Node> ieWeights, ieBias, matmul;
|
||||
bool transA = false, transB = true;
|
||||
size_t numOutput = blobs[0].size[0];
|
||||
|
||||
if (nodes.size() == 2)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
// auto inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
// matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<int> shape(1 + normalize_axis(axis, input.get_shape().size()), 0);
|
||||
shape[shape.size() - 1] = -1;
|
||||
input = std::make_shared<ngraph::op::v1::Reshape>(
|
||||
input,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
|
||||
true
|
||||
);
|
||||
|
||||
input = ngraphDequantize(input, input_sc, input_zp);
|
||||
|
||||
const float low = -128, high = 127;
|
||||
std::vector<float> inpLows(numOutput, low);
|
||||
std::vector<float> inpHighs(numOutput, high);
|
||||
std::vector<float> outLows(numOutput);
|
||||
std::vector<float> outHighs(numOutput);
|
||||
for (int i = 0; i < numOutput; ++i) {
|
||||
outLows[i] = low * outputMultiplier.ptr<float>()[i] * output_sc / input_sc;
|
||||
outHighs[i] = high * outputMultiplier.ptr<float>()[i] * output_sc / input_sc;
|
||||
}
|
||||
|
||||
std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
|
||||
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::i8, weight_shape, blobs[0].data);
|
||||
ieWeights = std::make_shared<ngraph::op::Convert>(ieWeights, ngraph::element::f32);
|
||||
ieWeights = std::make_shared<ngraph::op::FakeQuantize>(ieWeights,
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, inpLows.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, inpHighs.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, outLows.data()),
|
||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, outHighs.data()),
|
||||
256 // levels
|
||||
);
|
||||
matmul = std::make_shared<ngraph::op::MatMul>(input, ieWeights, transA, transB);
|
||||
}
|
||||
|
||||
if (blobs.size() > 1) {
|
||||
int32_t* bias = blobs[1].ptr<int32_t>();
|
||||
std::vector<float> ovBias(blobs[1].total());
|
||||
for (int i = 0; i < ovBias.size(); ++i) {
|
||||
ovBias[i] = (bias[i] + input_zp * cv::sum(blobs[0].row(i))[0]) * outputMultiplier.ptr<float>()[i] * output_sc;
|
||||
}
|
||||
auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||
ngraph::Shape{blobs[1].total()}, ovBias.data());
|
||||
matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node);
|
||||
}
|
||||
|
||||
matmul = ngraphQuantize(matmul, output_sc, output_zp);
|
||||
|
||||
return new InfEngineNgraphNode(matmul);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
Mat weightsMat, biasMat, outputMultiplier, activationLUT;
|
||||
Ptr<ActivationLayerInt8> activ;
|
||||
};
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
#include <float.h>
|
||||
@ -124,6 +125,10 @@ public:
|
||||
return type == MAX || type == AVE;
|
||||
return false;
|
||||
}
|
||||
else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -271,6 +276,49 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
input = ngraphDequantize(input, input_sc, input_zp);
|
||||
|
||||
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
|
||||
if (!padMode.empty())
|
||||
pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
|
||||
|
||||
auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
|
||||
ngraph::Output<ngraph::Node> pool;
|
||||
if (type == MAX) {
|
||||
pool = std::make_shared<ngraph::op::v1::MaxPool>(input, ngraph::Strides(strides),
|
||||
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||
rounding_type, pad_type);
|
||||
} else if (type == AVE) {
|
||||
pool = std::make_shared<ngraph::op::v1::AvgPool>(input, ngraph::Strides(strides),
|
||||
ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
|
||||
!avePoolPaddedArea, rounding_type, pad_type);
|
||||
} else if (type == SUM) {
|
||||
ngraph::Shape inpShape = input.get_shape();
|
||||
CV_Assert(inpShape.size() == 2 + kernel_size.size());
|
||||
std::vector<int64_t> axes;
|
||||
for (size_t i = 0; i < kernel_size.size(); i++)
|
||||
{
|
||||
if (inpShape[2 + i] == kernel_size[i])
|
||||
axes.push_back(2 + i);
|
||||
}
|
||||
auto reduction_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
|
||||
pool = std::make_shared<ngraph::op::v1::ReduceSum>(input, reduction_axes, true);
|
||||
} else {
|
||||
CV_Error(Error::StsNotImplemented, format("INT8 Pooling type: %d", type));
|
||||
}
|
||||
|
||||
pool = ngraphQuantize(pool, output_sc, output_zp);
|
||||
|
||||
return new InfEngineNgraphNode(pool);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -98,7 +99,8 @@ public:
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -171,6 +173,16 @@ public:
|
||||
else
|
||||
inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
const auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto quantized = ngraphQuantize(input, scales[0], zeropoints[0]);
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(quantized));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
};
|
||||
|
||||
// Dequantize INT8 Inputs to FP32/FP16
|
||||
@ -214,7 +226,7 @@ public:
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||
@ -285,6 +297,16 @@ public:
|
||||
else
|
||||
inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0]));
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
const auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
auto quantized = ngraphDequantize(input, scales[0], zeropoints[0]);
|
||||
return new InfEngineNgraphNode(quantized);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
};
|
||||
|
||||
// Rescale/Requantize INT8 Inputs from (scale1, zeropoint1) to (scale2, zeropoint2)
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "layers_common.hpp"
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn/shape_utils.hpp>
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -72,7 +73,8 @@ public:
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV;
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
|
||||
@ -186,6 +188,59 @@ public:
|
||||
return flops;
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
std::vector<ngraph::Output<ngraph::Node>> ieInpNodes(nodes.size());
|
||||
for (int i = 0; i < nodes.size(); ++i) {
|
||||
ieInpNodes[i] = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
}
|
||||
|
||||
ieInpNodes[0] = ngraphDequantize(ieInpNodes[0], inp_sc[0], inp_zp[0]);
|
||||
|
||||
CV_Assert(!blobs.empty() || ieInpNodes.size() == 1 + (int)hasWeights + (int)hasBias);
|
||||
|
||||
ngraph::Output<ngraph::Node> weights, bias;
|
||||
if (blobs.empty()) {
|
||||
if (hasWeights)
|
||||
weights = ieInpNodes[1];
|
||||
if (hasBias)
|
||||
bias = ieInpNodes[1 + (int)hasWeights];
|
||||
} else {
|
||||
std::vector<size_t> shape = ieInpNodes[0].get_shape();
|
||||
int cAxis = normalize_axis(axis, shape.size());
|
||||
|
||||
size_t numWeights = blobs[0].total();
|
||||
for (int i = 0; i < cAxis; ++i) {
|
||||
shape[i] = 1;
|
||||
}
|
||||
for (int i = cAxis; i < shape.size(); ++i) {
|
||||
if (numWeights == 1) {
|
||||
shape[i] = 1;
|
||||
}
|
||||
numWeights = std::max(numWeights / shape[i], (size_t)1);
|
||||
}
|
||||
|
||||
if (hasWeights)
|
||||
weights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, blobs[0].data);
|
||||
if (hasBias)
|
||||
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, blobs[(int)hasWeights].data);
|
||||
}
|
||||
|
||||
ngraph::Output<ngraph::Node> res = ieInpNodes[0];
|
||||
if (hasWeights) {
|
||||
res = std::make_shared<ngraph::op::v1::Multiply>(res, weights);
|
||||
}
|
||||
if (hasBias) {
|
||||
res = std::make_shared<ngraph::op::v1::Add>(res, bias);
|
||||
}
|
||||
|
||||
res = ngraphQuantize(res, output_sc, output_zp);
|
||||
|
||||
return new InfEngineNgraphNode(res);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
private:
|
||||
bool hasWeights;
|
||||
std::vector<float> inp_sc;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "../op_timvx.hpp"
|
||||
#include "../ie_ngraph.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
@ -90,7 +91,8 @@ public:
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
(backendId == DNN_BACKEND_TIMVX && haveTimVX());
|
||||
(backendId == DNN_BACKEND_TIMVX && haveTimVX()) ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
|
||||
}
|
||||
|
||||
virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
|
||||
@ -194,6 +196,26 @@ public:
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
|
||||
input = ngraphDequantize(input, input_sc, input_zp);
|
||||
|
||||
ngraph::Output<ngraph::Node> res;
|
||||
if (logSoftMax) {
|
||||
res = std::make_shared<ngraph::op::v5::LogSoftmax>(input, axis);
|
||||
} else {
|
||||
res = std::make_shared<ngraph::op::v1::Softmax>(input, axis);
|
||||
}
|
||||
|
||||
res = ngraphQuantize(res, output_sc, output_zp);
|
||||
return new InfEngineNgraphNode(res);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
template <bool with_log>
|
||||
class SoftmaxInt8Invoker : public ParallelLoopBody {
|
||||
public:
|
||||
|
@ -123,9 +123,21 @@ public:
|
||||
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
|
||||
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
|
||||
{
|
||||
auto node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
|
||||
ngraph::element::Type dType;
|
||||
if (blobs[0].depth() == CV_32F) {
|
||||
dType = ngraph::element::f32;
|
||||
} else if (blobs[0].depth() == CV_8S) {
|
||||
dType = ngraph::element::i8;
|
||||
} else {
|
||||
CV_Error(Error::StsNotImplemented, format("Unexpected Const data depth: %d", blobs[0].depth()));
|
||||
}
|
||||
std::shared_ptr<ngraph::Node> node =
|
||||
std::make_shared<ngraph::op::Constant>(dType,
|
||||
getShape<size_t>(blobs[0]),
|
||||
blobs[0].data);
|
||||
if (node->get_element_type() != ngraph::element::f32) {
|
||||
node = std::make_shared<ngraph::op::Convert>(node, ngraph::element::f32);
|
||||
}
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(node));
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
@ -1076,13 +1076,7 @@ struct MishFunctor : public BaseDefaultFunctor<MishFunctor>
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
|
||||
{
|
||||
float one = 1.0f;
|
||||
auto constant = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &one);
|
||||
auto exp_node = std::make_shared<ngraph::op::v0::Exp>(node);
|
||||
auto sum = std::make_shared<ngraph::op::v1::Add>(constant, exp_node, ngraph::op::AutoBroadcastType::NUMPY);
|
||||
auto log_node = std::make_shared<ngraph::op::v0::Log>(sum);
|
||||
auto tanh_node = std::make_shared<ngraph::op::Tanh>(log_node);
|
||||
return std::make_shared<ngraph::op::v1::Multiply>(node, tanh_node);
|
||||
return std::make_shared<ngraph::op::v4::Mish>(node);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
@ -1309,10 +1303,7 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
|
||||
{
|
||||
float coeff = -0.999999f;
|
||||
// float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f;
|
||||
auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeff);
|
||||
return std::make_shared<ngraph::op::PRelu>(node, slope);
|
||||
return std::make_shared<ngraph::op::Abs>(node);
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
@ -386,11 +386,11 @@ public:
|
||||
{
|
||||
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
|
||||
int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size());
|
||||
auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
|
||||
if (logSoftMax)
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(std::make_shared<ngraph::op::v0::Log>(softmax)));
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineNgraphNode(softmax));
|
||||
if (logSoftMax) {
|
||||
return new InfEngineNgraphNode(std::make_shared<ngraph::op::v5::LogSoftmax>(ieInpNode, axis));
|
||||
} else {
|
||||
return new InfEngineNgraphNode(std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis));
|
||||
}
|
||||
}
|
||||
#endif // HAVE_DNN_NGRAPH
|
||||
|
||||
|
@ -170,11 +170,19 @@ void Net::Impl::setPreferableBackend(Net& net, int backendId)
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH; // = getInferenceEngineBackendTypeParam();
|
||||
|
||||
if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX)
|
||||
if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX &&
|
||||
backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks");
|
||||
CV_LOG_WARNING(NULL, "DNN: Only default, TIMVX and OpenVINO backends support quantized networks");
|
||||
backendId = DNN_BACKEND_OPENCV;
|
||||
}
|
||||
#ifdef HAVE_DNN_NGRAPH
|
||||
if (netWasQuantized && backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2023_0))
|
||||
{
|
||||
CV_LOG_WARNING(NULL, "DNN: OpenVINO 2023.0 and higher is required to supports quantized networks");
|
||||
backendId = DNN_BACKEND_OPENCV;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (preferableBackend != backendId)
|
||||
{
|
||||
|
@ -48,7 +48,6 @@ public:
|
||||
CV_Assert(basePtr_);
|
||||
Net::Impl& base = *basePtr_;
|
||||
CV_Assert(!base.netWasAllocated);
|
||||
CV_Assert(!base.netWasQuantized);
|
||||
netInputLayer = base.netInputLayer;
|
||||
blobsToKeep = base.blobsToKeep;
|
||||
layers = base.layers;
|
||||
|
@ -27,6 +27,7 @@
|
||||
#define INF_ENGINE_RELEASE_2021_3 2021030000
|
||||
#define INF_ENGINE_RELEASE_2021_4 2021040000
|
||||
#define INF_ENGINE_RELEASE_2022_1 2022010000
|
||||
#define INF_ENGINE_RELEASE_2023_0 2023000000
|
||||
|
||||
#ifndef INF_ENGINE_RELEASE
|
||||
#warning("IE version have not been provided via command-line. Using 2021.4 by default")
|
||||
|
@ -14,6 +14,9 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
|
||||
targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
|
||||
#ifdef HAVE_TIMVX
|
||||
targets.push_back(make_tuple(DNN_BACKEND_TIMVX, DNN_TARGET_NPU));
|
||||
#endif
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
|
||||
#endif
|
||||
return testing::ValuesIn(targets);
|
||||
}
|
||||
@ -66,8 +69,6 @@ public:
|
||||
outPath = _tf("onnx/data/output_" + basename);
|
||||
}
|
||||
ASSERT_FALSE(net.empty());
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
for (int i = 0; i < numInps; i++)
|
||||
inps[i] = blobFromNPY(inpPath + ((numInps > 1) ? cv::format("_%d.npy", i) : ".npy"));
|
||||
@ -78,6 +79,8 @@ public:
|
||||
qnet = net.quantize(inps, CV_8S, CV_8S, perChannel);
|
||||
qnet.getInputDetails(inputScale, inputZp);
|
||||
qnet.getOutputDetails(outputScale, outputZp);
|
||||
qnet.setPreferableBackend(backend);
|
||||
qnet.setPreferableTarget(target);
|
||||
|
||||
// Quantize inputs to int8
|
||||
// int8_value = float_value/scale + zero-point
|
||||
@ -94,7 +97,7 @@ public:
|
||||
for (int i = 0; i < numOuts; i++)
|
||||
{
|
||||
outs_int8[i].convertTo(outs_dequantized[i], CV_32F, outputScale[i], -(outputScale[i] * outputZp[i]));
|
||||
normAssert(refs[i], outs_dequantized[i], "", l1, lInf);
|
||||
normAssert(refs[i], outs_dequantized[i], basename.c_str(), l1, lInf);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -197,10 +200,13 @@ TEST_P(Test_Int8_layers, Padding)
|
||||
|
||||
TEST_P(Test_Int8_layers, AvePooling)
|
||||
{
|
||||
testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075);
|
||||
// Some tests failed with OpenVINO due to wrong padded area calculation
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075);
|
||||
testLayer("ave_pool_same", "TensorFlow", 0.00153, 0.0041);
|
||||
testLayer("average_pooling_1d", "ONNX", 0.002, 0.0048);
|
||||
testLayer("average_pooling", "ONNX", 0.0014, 0.0032);
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
testLayer("average_pooling", "ONNX", 0.0014, 0.0032);
|
||||
testLayer("average_pooling_dynamic_axes", "ONNX", 0.0014, 0.006);
|
||||
|
||||
if (target != DNN_TARGET_CPU)
|
||||
@ -216,8 +222,6 @@ TEST_P(Test_Int8_layers, MaxPooling)
|
||||
throw SkipTestException("Only CPU is supported");
|
||||
testLayer("pool_conv_3d", "ONNX", 0.0033, 0.0124);
|
||||
|
||||
/* All the below tests have MaxPooling as last layer, so computeMaxIdx is set to true
|
||||
which is not supported by int8 maxpooling
|
||||
testLayer("layer_pooling_max", "Caffe", 0.0021, 0.004);
|
||||
testLayer("max_pool_even", "TensorFlow", 0.0048, 0.0139);
|
||||
testLayer("max_pool_odd_valid", "TensorFlow", 0.0043, 0.012);
|
||||
@ -227,7 +231,7 @@ TEST_P(Test_Int8_layers, MaxPooling)
|
||||
testLayer("two_maxpooling_1d", "ONNX", 0.0037, 0.0052);
|
||||
testLayer("maxpooling", "ONNX", 0.0034, 0.0065);
|
||||
testLayer("two_maxpooling", "ONNX", 0.0025, 0.0052);
|
||||
testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);*/
|
||||
testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);
|
||||
}
|
||||
|
||||
TEST_P(Test_Int8_layers, Reduce)
|
||||
@ -322,7 +326,10 @@ TEST_P(Test_Int8_layers, DISABLED_Softmax_unfused_ONNX) // FIXIT Support 'Ident
|
||||
TEST_P(Test_Int8_layers, Concat)
|
||||
{
|
||||
testLayer("layer_concat_shared_input", "Caffe", 0.0076, 0.029, 1, 1, true, false);
|
||||
testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017);
|
||||
if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
// Crashes with segfault
|
||||
testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017);
|
||||
}
|
||||
testLayer("keras_pad_concat", "TensorFlow", 0.0032, 0.0089);
|
||||
testLayer("concat_3d", "TensorFlow", 0.005, 0.014);
|
||||
testLayer("concatenation", "ONNX", 0.0032, 0.009);
|
||||
@ -400,10 +407,13 @@ TEST_P(Test_Int8_layers, Reshape)
|
||||
testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029);
|
||||
|
||||
testLayer("reshape_conv", "TensorFlow", 0.035, 0.054);
|
||||
testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
testLayer("reshape_reduce", "TensorFlow", 0.0053, 0.011);
|
||||
else
|
||||
testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078);
|
||||
testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028);
|
||||
testLayer("reshape_no_reorder", "TensorFlow", 0.0014, 0.0028);
|
||||
testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, 0.014);
|
||||
testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.016 : 0.014);
|
||||
testLayer("dynamic_reshape", "ONNX", 0.0047, 0.0079);
|
||||
testLayer("dynamic_reshape_opset_11", "ONNX", 0.0048, 0.0081);
|
||||
testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081);
|
||||
@ -491,10 +501,10 @@ TEST_P(Test_Int8_layers, Eltwise)
|
||||
|
||||
testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false);
|
||||
testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047);
|
||||
testLayer("eltwise_add_vec", "TensorFlow", 0.037, 0.21); // tflite 0.0095, 0.0365
|
||||
testLayer("eltwise_add_vec", "TensorFlow", 0.037, backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.24 : 0.21); // tflite 0.0095, 0.0365
|
||||
testLayer("eltwise_mul_vec", "TensorFlow", 0.173, 1.14); // tflite 0.0028, 0.017
|
||||
testLayer("channel_broadcast", "TensorFlow", 0.0025, 0.0063);
|
||||
testLayer("split_equals", "TensorFlow", 0.02, 0.065);
|
||||
testLayer("split_equals", "TensorFlow", backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.021 : 0.02, 0.065);
|
||||
testLayer("mul", "ONNX", 0.0039, 0.014);
|
||||
testLayer("split_max", "ONNX", 0.004, 0.012);
|
||||
}
|
||||
@ -551,10 +561,10 @@ public:
|
||||
Mat blob = readTensorFromONNX(findDataFile("dnn/onnx/data/input_" + basename + ".pb"));
|
||||
Mat ref = readTensorFromONNX(findDataFile("dnn/onnx/data/output_" + basename + ".pb"));
|
||||
Net baseNet = readNetFromONNX(onnxmodel);
|
||||
baseNet.setPreferableBackend(backend);
|
||||
baseNet.setPreferableTarget(target);
|
||||
|
||||
Net qnet = baseNet.quantize(blob, CV_32F, CV_32F, perChannel);
|
||||
qnet.setPreferableBackend(backend);
|
||||
qnet.setPreferableTarget(target);
|
||||
qnet.setInput(blob);
|
||||
Mat out = qnet.forward();
|
||||
|
||||
@ -699,9 +709,6 @@ TEST_P(Test_Int8_nets, AlexNet)
|
||||
#else
|
||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||
#endif
|
||||
if (backend != DNN_BACKEND_OPENCV)
|
||||
throw SkipTestException("Only OpenCV backend is supported");
|
||||
|
||||
if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
@ -742,8 +749,6 @@ TEST_P(Test_Int8_nets, GoogLeNet)
|
||||
TEST_P(Test_Int8_nets, ResNet50)
|
||||
{
|
||||
applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
|
||||
if (backend != DNN_BACKEND_OPENCV)
|
||||
throw SkipTestException("Only OpenCV backend is supported");
|
||||
|
||||
if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
@ -774,6 +779,8 @@ TEST_P(Test_Int8_nets, DenseNet121)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/DenseNet_121.prototxt", false),
|
||||
findDataFile("dnn/DenseNet_121.caffemodel", false));
|
||||
@ -955,6 +962,8 @@ TEST_P(Test_Int8_nets, opencv_face_detector)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/opencv_face_detector.prototxt"),
|
||||
findDataFile("dnn/opencv_face_detector.caffemodel", false));
|
||||
@ -1021,7 +1030,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_resnet50)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
|
||||
@ -1048,7 +1058,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_inceptionv2)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
|
||||
@ -1079,6 +1090,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_vgg16)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_vgg16.prototxt"),
|
||||
findDataFile("dnn/VGG16_faster_rcnn_final.caffemodel", false));
|
||||
@ -1106,6 +1119,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_zf)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_zf.prototxt"),
|
||||
findDataFile("dnn/ZF_faster_rcnn_final.caffemodel", false));
|
||||
@ -1138,6 +1153,9 @@ TEST_P(Test_Int8_nets, RFCN)
|
||||
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
|
||||
|
||||
float confThreshold = 0.8, scoreDiff = 0.15, iouDiff = 0.11;
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
|
||||
iouDiff = 0.12;
|
||||
}
|
||||
testFaster(net, ref, confThreshold, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
@ -1317,6 +1335,8 @@ TEST_P(Test_Int8_nets, YOLOv4_tiny)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
|
||||
if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
|
||||
const float confThreshold = 0.6;
|
||||
|
||||
|
@ -2052,12 +2052,16 @@ TEST_P(Test_ONNX_layers, Quantized_Unsqueeze)
|
||||
TEST_P(Test_ONNX_layers, Quantized_Resize)
|
||||
{
|
||||
testONNXModels("quantized_resize_nearest");
|
||||
testONNXModels("quantized_resize_bilinear", npy, 2e-4, 0.003);
|
||||
testONNXModels("quantized_resize_bilinear_align", npy, 3e-4, 0.003);
|
||||
double l1 = backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.0013 : 2e-4;
|
||||
testONNXModels("quantized_resize_bilinear", npy, l1, 0.003);
|
||||
l1 = backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.0013 : 3e-4;
|
||||
testONNXModels("quantized_resize_bilinear_align", npy, l1, 0.003);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Quantized_Concat)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
testONNXModels("quantized_concat");
|
||||
testONNXModels("quantized_concat_const_blob");
|
||||
}
|
||||
@ -2074,6 +2078,8 @@ TEST_P(Test_ONNX_layers, OutputRegistration)
|
||||
|
||||
TEST_P(Test_ONNX_layers, QLinearSoftmax)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
|
||||
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
|
||||
testONNXModels("qlinearsoftmax_v11", npy, 0.002, 0.002); // 2D coerced
|
||||
testONNXModels("qlinearsoftmax_v13", npy, 0.002, 0.002);
|
||||
}
|
||||
|
@ -204,6 +204,10 @@ TEST_P(Test_TFLite, max_unpooling)
|
||||
}
|
||||
|
||||
TEST_P(Test_TFLite, EfficientDet_int8) {
|
||||
if (target != DNN_TARGET_CPU || (backend != DNN_BACKEND_OPENCV &&
|
||||
backend != DNN_BACKEND_TIMVX && backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) {
|
||||
throw SkipTestException("Only OpenCV, TimVX and OpenVINO targets support INT8 on CPU");
|
||||
}
|
||||
Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
Loading…
Reference in New Issue
Block a user