Merge pull request #23987 from dkurt:openvino_int8_backend

OpenVINO backend for INT8 models #23987 ### Pull Request Readiness Checklist TODO: - [x] DetectionOutput layer (https://github.com/opencv/opencv/pull/24069) - [x] Less FP32 fallbacks (i.e. Sigmoid, eltwise sum) - [x] Accuracy, performance tests (https://github.com/opencv/opencv/pull/24039) - [x] Single layer tests (convolution) - [x] ~~Fixes for OpenVINO 2022.1 (https://pullrequest.opencv.org/buildbot/builders/precommit_custom_linux/builds/100334)~~ Performace results for object detection model `coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite`: | backend | performance (median time) | |---|---| | OpenCV | 77.42ms | | OpenVINO 2023.0 | 10.90ms | CPU: `11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz` Serialized model per-layer stats (note that Convolution should use `*_I8` primitives if they are quantized correctly): https://gist.github.com/dkurt/7772bbf1907035441bb5454f19f0feef --- See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-06-07 09:25:45 +08:00 · 2023-09-28 16:24:43 +03:00 · 2023-09-28 16:24:43 +03:00 · c7ec0d599a
commit c7ec0d599a
parent b8d4ac589d
21 changed files with 573 additions and 54 deletions
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@ -302,6 +302,18 @@ PERF_TEST_P_(DNNTestNetwork, EfficientDet)
    processNet("dnn/efficientdet-d0.pb", "dnn/efficientdet-d0.pbtxt", "", inp);
 }

+
+PERF_TEST_P_(DNNTestNetwork, EfficientDet_int8)
+{
+    if (target != DNN_TARGET_CPU || (backend != DNN_BACKEND_OPENCV &&
+        backend != DNN_BACKEND_TIMVX && backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) {
+        throw SkipTestException("");
+    }
+    Mat inp = imread(findDataFile("dnn/dog416.png"));
+    resize(inp, inp, Size(320, 320));
+    processNet("", "dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", "", inp);
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/, DNNTestNetwork, dnnBackendsAndTargets());

 } // namespace
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -590,7 +590,7 @@ void InfEngineNgraphNet::init(Target targetId)
            allBlobs[name] = ov::Tensor(src.get_element_type(), outShape, src.data());
        }

-        ppp.output(i++).tensor().set_element_type(ov::element::f32);  // Should be always FP32
+        ppp.output(i++).tensor().set_element_type(src.get_element_type());
    }

    ppp.build();
@ -840,6 +840,8 @@ ov::Tensor wrapToNgraphBlob(const Mat& m) {
        return ov::Tensor(ov::element::f32, shape, m.data);
    else if (m.type() == CV_8U)
        return ov::Tensor(ov::element::u8, shape, m.data);
+    else if (m.type() == CV_8SC1)
+        return ov::Tensor(ov::element::i8, shape, m.data);
    else if (m.type() == CV_32SC1)
        return ov::Tensor(ov::element::i32, shape, m.data);
    else
@ -1234,6 +1236,32 @@ void InfEngineNgraphNet::forward(const std::vector<Ptr<BackendWrapper> >& outBlo
 #endif // OpenVINO >= 2022.1
 }

+ngraph::Output<ngraph::Node> ngraphQuantize(ngraph::Output<ngraph::Node> input, float output_sc, float output_zp) {
+    float outLow = -128, outHigh = 127;
+    float inpLow = output_sc * (outLow - output_zp);
+    float inpHigh = output_sc * (outHigh - output_zp);
+    return std::make_shared<ngraph::op::FakeQuantize>(input,
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpLow),
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpHigh),
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outLow),
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outHigh),
+        256 // levels
+    );
+}
+
+ngraph::Output<ngraph::Node> ngraphDequantize(ngraph::Output<ngraph::Node> input, float input_sc, float input_zp) {
+    float inpLow = -128, inpHigh = 127;
+    float outLow = input_sc * (inpLow - input_zp);
+    float outHigh = input_sc * (inpHigh - input_zp);
+    return std::make_shared<ngraph::op::FakeQuantize>(input,
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpLow),
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &inpHigh),
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outLow),
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &outHigh),
+        256 // levels
+    );
+}
+
 #endif

 }}
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@ -148,6 +148,9 @@ private:
    InferenceEngine::CNNNetwork t_net;
 };

+ngraph::Output<ngraph::Node> ngraphQuantize(ngraph::Output<ngraph::Node> input, float output_sc, float output_zp);
+ngraph::Output<ngraph::Node> ngraphDequantize(ngraph::Output<ngraph::Node> input, float input_sc, float input_zp);
+
 #endif  // HAVE_DNN_NGRAPH

 }}  // namespace cv::dnn
--- a/modules/dnn/src/int8layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/int8layers/batch_norm_layer.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"

 #include <opencv2/dnn/shape_utils.hpp>

@ -110,7 +111,8 @@ public:
            return true;
        }

-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
@ -238,6 +240,27 @@ public:
        return Ptr<BackendNode>();
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+
+        input = ngraphDequantize(input, input_sc, input_zp);
+
+        std::vector<size_t> shape(input.get_shape().size(), 1);
+        shape[1] = origin_weights.total();
+
+        ngraph::Output<ngraph::Node> res;
+        auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, origin_weights.data);
+        auto ieBias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, origin_bias.data);
+        res = std::make_shared<ngraph::op::v1::Multiply>(input, ieWeights);
+        res = std::make_shared<ngraph::op::v1::Add>(res, ieBias);
+
+        res = ngraphQuantize(res, output_sc, output_zp);
+        return new InfEngineNgraphNode(res);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
--- a/modules/dnn/src/int8layers/convolution_layer.cpp
+++ b/modules/dnn/src/int8layers/convolution_layer.cpp
@ -10,6 +10,7 @@
 #include "opencv2/core/hal/hal.hpp"
 #include "opencv2/core/hal/intrin.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"
 #include <iostream>
 #include <numeric>

@ -195,7 +196,8 @@ public:
        }
 #endif
        // Only default backend and Conv1D/Conv2D/Conv3D are supported
-        return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3;
+        return (backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3) ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -561,6 +563,126 @@ public:
        return Ptr<BackendNode>();
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        CV_Assert(!blobs.empty());
+        CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1);
+        CV_CheckTypeEQ(weightsMat.type(), CV_8S, "");
+        auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        std::vector<size_t> dims = ieInpNode.get_shape();
+        CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, "");
+        CV_Assert(ieInpNode.get_element_type() == ngraph::element::f32);
+        ngraph::Output<ngraph::Node> ieWeights;
+        if (nodes.size() > 1)
+            ieWeights = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+        const int inpCn = dims[1];
+        const int inpGroupCn = nodes.size() > 1 ? ieWeights.get_shape()[1] : blobs[0].size[1];
+        const int group = inpCn / inpGroupCn;
+
+        std::vector<size_t> kernel_shape;
+        if (group != 1)
+        {
+            kernel_shape.push_back(group);
+        }
+        kernel_shape.push_back(numOutput / group);
+        kernel_shape.push_back(inpCn / group);
+        std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape));
+
+        if (nodes.size() == 1)
+        {
+            ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::i8, kernel_shape, blobs[0].data);
+        }
+        else
+        {
+            auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                             ngraph::Shape{kernel_shape.size()}, std::vector<int64_t>(kernel_shape.begin(), kernel_shape.end()));
+            ieWeights  = std::make_shared<ngraph::op::v1::Reshape>(ieWeights, shape, true);
+        }
+
+        ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
+        if (!padMode.empty())
+            pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
+
+        ieInpNode = ngraphDequantize(ieInpNode, input_sc, input_zp);
+
+        const float low = -128, high = 127;
+        std::vector<float> inpLows(numOutput, low);
+        std::vector<float> inpHighs(numOutput, high);
+        std::vector<float> outLows(numOutput);
+        std::vector<float> outHighs(numOutput);
+        std::vector<size_t> quantShape(kernel_shape.size(), 1);
+        if (group != 1)
+        {
+            quantShape[0] = group;
+            quantShape[1] = numOutput / group;
+        }
+        else
+        {
+            quantShape[0] = numOutput;
+        }
+
+        for (int i = 0; i < numOutput; ++i) {
+            outLows[i] = low * outputMultiplier[i] * output_sc / input_sc;
+            outHighs[i] = high * outputMultiplier[i] * output_sc / input_sc;
+        }
+        ieWeights = std::make_shared<ngraph::op::Convert>(ieWeights, ngraph::element::f32);
+        ieWeights = std::make_shared<ngraph::op::FakeQuantize>(ieWeights,
+            std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, inpLows.data()),
+            std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, inpHighs.data()),
+            std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, outLows.data()),
+            std::make_shared<ngraph::op::Constant>(ngraph::element::f32, quantShape, outHighs.data()),
+            256 // levels
+        );
+
+        ngraph::Output<ngraph::Node> conv_node;
+        if (group != 1) {
+            conv_node = std::make_shared<ngraph::op::v1::GroupConvolution>(
+                                ieInpNode, ieWeights,
+                                ngraph::Strides(strides),
+                                ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
+                                ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_end.begin(),   pads_end.end())),
+                                ngraph::Strides(dilations),
+                                pad_type);
+        } else {
+            conv_node = std::make_shared<ngraph::op::v1::Convolution>(
+                                ieInpNode, ieWeights,
+                                ngraph::Strides(strides),
+                                ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
+                                ngraph::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_end.begin(), pads_end.end())),
+                                ngraph::Strides(dilations),
+                                pad_type);
+        }
+
+        std::vector<size_t> shape(conv_node.get_shape().size(), 1);
+        shape[1] = conv_node.get_shape()[1];
+        if (biasvec.size() || nodes.size() == 3)
+        {
+            std::shared_ptr<ngraph::Node> bias;
+            if (nodes.size() == 3)
+            {
+                auto bias_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                    ngraph::Shape{shape.size()}, std::vector<int64_t>(shape.begin(), shape.end()));
+                bias = std::make_shared<ngraph::op::v1::Reshape>(nodes[2].dynamicCast<InfEngineNgraphNode>()->node, bias_shape, true);
+            }
+            else
+            {
+                std::vector<float> ovBias(numOutput);
+                for (int i = 0; i < numOutput; ++i) {
+                    ovBias[i] = (biasvec[i] + input_zp * cv::sum(blobs[0].row(i))[0]) * outputMultiplier[i] * output_sc;
+                }
+                bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), ovBias.data());
+            }
+            conv_node = std::make_shared<ngraph::op::v1::Add>(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
+        }
+
+        conv_node = ngraphQuantize(conv_node, output_sc, output_zp);
+
+        return new InfEngineNgraphNode(conv_node);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    class ParallelConv : public cv::ParallelLoopBody
    {
    public:
--- a/modules/dnn/src/int8layers/elementwise_layers.cpp
+++ b/modules/dnn/src/int8layers/elementwise_layers.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"

 #include <opencv2/dnn/shape_utils.hpp>
 #include <iostream>
@ -56,7 +57,7 @@ public:
            return tvActType != tvActNotSupported;
        }
 #endif
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -244,6 +245,42 @@ public:
        return Ptr<BackendNode>();
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+
+        input = ngraphDequantize(input, input_sc, input_zp);
+
+        ngraph::Output<ngraph::Node> res;
+        if (type == "ReLU6Int8") {
+            res = std::make_shared<ngraph::op::Clamp>(input, 0.0f, 6.0f);
+        } else if (type == "ReLUInt8") {
+            if (slope) {
+                auto param = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &slope);
+                res = std::make_shared<ngraph::op::PRelu>(input, param);
+            } else {
+                res = std::make_shared<ngraph::op::Relu>(input);
+            }
+        } else if (type == "ELUInt8") {
+            res = std::make_shared<ngraph::op::Elu>(input, 1.0f);
+        } else if (type == "MishInt8") {
+            res = std::make_shared<ngraph::op::v4::Mish>(input);
+        } else if (type == "AbsValInt8") {
+            res = std::make_shared<ngraph::op::Abs>(input);
+        } else if (type == "SigmoidInt8") {
+            res = std::make_shared<ngraph::op::Sigmoid>(input);
+        } else {
+            CV_Error(Error::StsNotImplemented, type + " activation with OpenVINO");
+        }
+
+        res = ngraphQuantize(res, output_sc, output_zp);
+
+        return new InfEngineNgraphNode(res);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
--- a/modules/dnn/src/int8layers/eltwise_layer.cpp
+++ b/modules/dnn/src/int8layers/eltwise_layer.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"
 #include <opencv2/dnn/shape_utils.hpp>

 namespace cv
@ -138,7 +139,7 @@ public:
        // For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported.
        if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
            return channelsModeInput == ELTWISE_CHANNNELS_SAME;
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -369,6 +370,38 @@ public:
        return Ptr<BackendNode>();
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        CV_Assert(nodes.size() >= 2);
+        std::vector<ngraph::Output<ngraph::Node>> ieInpNodes(nodes.size());
+        for (size_t i = 0; i < nodes.size(); i++)
+        {
+            ieInpNodes[i] = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
+
+            float input_sc = !coeffs.empty() ? coeffs[i] : 1.0f;
+            float input_zp = op == PROD ? zeropoints[i] : 0.0f;
+            ieInpNodes[i] = ngraphDequantize(ieInpNodes[i], input_sc, input_zp);
+        }
+
+        auto res = ieInpNodes[0];
+        for (size_t i = 1; i < ieInpNodes.size(); i++)
+        {
+            switch (op) {
+                case SUM:  res = std::make_shared<ngraph::op::v1::Add>(res, ieInpNodes[i]); break;
+                case PROD: res = std::make_shared<ngraph::op::v1::Multiply>(res, ieInpNodes[i]); break;
+                case MAX:  res = std::make_shared<ngraph::op::v1::Maximum>(res, ieInpNodes[i]); break;
+                default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
+            }
+        }
+
+        res = ngraphQuantize(res, 1.0f, offset);
+
+        return new InfEngineNgraphNode(res);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    class EltwiseInvoker : public ParallelLoopBody
    {
        EltwiseLayerInt8Impl& self;
--- a/modules/dnn/src/int8layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"

 #include <opencv2/dnn/shape_utils.hpp>

@ -86,7 +87,8 @@ public:
               return false;
        }

-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
@ -395,6 +397,77 @@ public:

    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        CV_CheckTypeEQ(blobs[0].type(), CV_8S, "");  // weights
+        CV_CheckTypeEQ(blobs[1].type(), CV_32S, "");  // bias
+        CV_CheckTypeEQ(outputMultiplier.type(), CV_32F, "");
+
+        ngraph::Output<ngraph::Node> input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        ngraph::Output<ngraph::Node> ieWeights, ieBias, matmul;
+        bool transA = false, transB = true;
+        size_t numOutput = blobs[0].size[0];
+
+        if (nodes.size() == 2)
+        {
+            CV_Error(Error::StsNotImplemented, "");
+            // auto inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+            // matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
+        }
+        else
+        {
+            std::vector<int> shape(1 + normalize_axis(axis, input.get_shape().size()), 0);
+            shape[shape.size() - 1] = -1;
+            input = std::make_shared<ngraph::op::v1::Reshape>(
+                input,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
+                true
+            );
+
+            input = ngraphDequantize(input, input_sc, input_zp);
+
+            const float low = -128, high = 127;
+            std::vector<float> inpLows(numOutput, low);
+            std::vector<float> inpHighs(numOutput, high);
+            std::vector<float> outLows(numOutput);
+            std::vector<float> outHighs(numOutput);
+            for (int i = 0; i < numOutput; ++i) {
+                outLows[i] = low * outputMultiplier.ptr<float>()[i] * output_sc / input_sc;
+                outHighs[i] = high * outputMultiplier.ptr<float>()[i] * output_sc / input_sc;
+            }
+
+            std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
+            ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::i8, weight_shape, blobs[0].data);
+            ieWeights = std::make_shared<ngraph::op::Convert>(ieWeights, ngraph::element::f32);
+            ieWeights = std::make_shared<ngraph::op::FakeQuantize>(ieWeights,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, inpLows.data()),
+                std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, inpHighs.data()),
+                std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, outLows.data()),
+                std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numOutput, 1}, outHighs.data()),
+                256 // levels
+            );
+            matmul = std::make_shared<ngraph::op::MatMul>(input, ieWeights, transA, transB);
+        }
+
+        if (blobs.size() > 1) {
+            int32_t* bias = blobs[1].ptr<int32_t>();
+            std::vector<float> ovBias(blobs[1].total());
+            for (int i = 0; i < ovBias.size(); ++i) {
+                ovBias[i] = (bias[i] + input_zp * cv::sum(blobs[0].row(i))[0]) * outputMultiplier.ptr<float>()[i] * output_sc;
+            }
+            auto bias_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+                                            ngraph::Shape{blobs[1].total()}, ovBias.data());
+            matmul = std::make_shared<ngraph::op::v1::Add>(matmul, bias_node);
+        }
+
+        matmul = ngraphQuantize(matmul, output_sc, output_zp);
+
+        return new InfEngineNgraphNode(matmul);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    Mat weightsMat, biasMat, outputMultiplier, activationLUT;
    Ptr<ActivationLayerInt8> activ;
 };
--- a/modules/dnn/src/int8layers/pooling_layer.cpp
+++ b/modules/dnn/src/int8layers/pooling_layer.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"
 #include "opencv2/core/hal/intrin.hpp"

 #include <float.h>
@ -124,6 +125,10 @@ public:
                return type == MAX || type == AVE;
            return false;
        }
+        else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            return true;
+        }

        return false;
    }
@ -271,6 +276,49 @@ public:
        return Ptr<BackendNode>();
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+
+        input = ngraphDequantize(input, input_sc, input_zp);
+
+        ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
+        if (!padMode.empty())
+            pad_type = padMode == "VALID" ? ngraph::op::PadType::VALID : ngraph::op::PadType::SAME_UPPER;
+
+        auto rounding_type = ceilMode ? ngraph::op::RoundingType::CEIL : ngraph::op::RoundingType::FLOOR;
+        ngraph::Output<ngraph::Node> pool;
+        if (type == MAX) {
+            pool = std::make_shared<ngraph::op::v1::MaxPool>(input, ngraph::Strides(strides),
+                        ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
+                        rounding_type, pad_type);
+        } else if (type == AVE) {
+            pool = std::make_shared<ngraph::op::v1::AvgPool>(input, ngraph::Strides(strides),
+                        ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
+                        !avePoolPaddedArea, rounding_type, pad_type);
+        } else if (type == SUM) {
+            ngraph::Shape inpShape = input.get_shape();
+            CV_Assert(inpShape.size() == 2 + kernel_size.size());
+            std::vector<int64_t> axes;
+            for (size_t i = 0; i < kernel_size.size(); i++)
+            {
+                if (inpShape[2 + i] == kernel_size[i])
+                    axes.push_back(2 + i);
+            }
+            auto reduction_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes);
+            pool = std::make_shared<ngraph::op::v1::ReduceSum>(input, reduction_axes, true);
+        } else {
+            CV_Error(Error::StsNotImplemented, format("INT8 Pooling type: %d", type));
+        }
+
+        pool = ngraphQuantize(pool, output_sc, output_zp);
+
+        return new InfEngineNgraphNode(pool);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
--- a/modules/dnn/src/int8layers/quantization_utils.cpp
+++ b/modules/dnn/src/int8layers/quantization_utils.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"

 namespace cv
 {
@ -98,7 +99,8 @@ public:

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -171,6 +173,16 @@ public:
        else
            inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]);
    }
+
+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        const auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto quantized = ngraphQuantize(input, scales[0], zeropoints[0]);
+        return Ptr<BackendNode>(new InfEngineNgraphNode(quantized));
+    }
+#endif  // HAVE_DNN_NGRAPH
 };

 // Dequantize INT8 Inputs to FP32/FP16
@ -214,7 +226,7 @@ public:

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -285,6 +297,16 @@ public:
        else
            inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0]));
    }
+
+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        const auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto quantized = ngraphDequantize(input, scales[0], zeropoints[0]);
+        return new InfEngineNgraphNode(quantized);
+    }
+#endif  // HAVE_DNN_NGRAPH
 };

 // Rescale/Requantize INT8 Inputs from (scale1, zeropoint1) to (scale2, zeropoint2)
--- a/modules/dnn/src/int8layers/scale_layer.cpp
+++ b/modules/dnn/src/int8layers/scale_layer.cpp
@ -6,6 +6,7 @@
 #include "layers_common.hpp"
 #include <opencv2/imgproc.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
+#include "../ie_ngraph.hpp"

 namespace cv
 {
@ -72,7 +73,8 @@ public:

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
@ -186,6 +188,59 @@ public:
        return flops;
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        std::vector<ngraph::Output<ngraph::Node>> ieInpNodes(nodes.size());
+        for (int i = 0; i < nodes.size(); ++i) {
+            ieInpNodes[i] = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
+        }
+
+        ieInpNodes[0] = ngraphDequantize(ieInpNodes[0], inp_sc[0], inp_zp[0]);
+
+        CV_Assert(!blobs.empty() || ieInpNodes.size() == 1 + (int)hasWeights + (int)hasBias);
+
+        ngraph::Output<ngraph::Node> weights, bias;
+        if (blobs.empty()) {
+            if (hasWeights)
+                weights = ieInpNodes[1];
+            if (hasBias)
+                bias = ieInpNodes[1 + (int)hasWeights];
+        } else {
+            std::vector<size_t> shape = ieInpNodes[0].get_shape();
+            int cAxis = normalize_axis(axis, shape.size());
+
+            size_t numWeights = blobs[0].total();
+            for (int i = 0; i < cAxis; ++i) {
+                shape[i] = 1;
+            }
+            for (int i = cAxis; i < shape.size(); ++i) {
+                if (numWeights == 1) {
+                    shape[i] = 1;
+                }
+                numWeights = std::max(numWeights / shape[i], (size_t)1);
+            }
+
+            if (hasWeights)
+                weights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, blobs[0].data);
+            if (hasBias)
+                bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, blobs[(int)hasWeights].data);
+        }
+
+        ngraph::Output<ngraph::Node> res = ieInpNodes[0];
+        if (hasWeights) {
+            res = std::make_shared<ngraph::op::v1::Multiply>(res, weights);
+        }
+        if (hasBias) {
+            res = std::make_shared<ngraph::op::v1::Add>(res, bias);
+        }
+
+        res = ngraphQuantize(res, output_sc, output_zp);
+
+        return new InfEngineNgraphNode(res);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
 private:
    bool hasWeights;
    std::vector<float> inp_sc;
--- a/modules/dnn/src/int8layers/softmax_layer.cpp
+++ b/modules/dnn/src/int8layers/softmax_layer.cpp
@ -5,6 +5,7 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "../op_timvx.hpp"
+#include "../ie_ngraph.hpp"

 #include <algorithm>
 #include <stdlib.h>
@ -90,7 +91,8 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        return backendId == DNN_BACKEND_OPENCV ||
-            (backendId == DNN_BACKEND_TIMVX && haveTimVX());
+            (backendId == DNN_BACKEND_TIMVX && haveTimVX()) ||
+            backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }

    virtual bool tryFuse(Ptr<Layer>& top) CV_OVERRIDE
@ -194,6 +196,26 @@ public:
        return Ptr<BackendNode>();
    }

+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+
+        input = ngraphDequantize(input, input_sc, input_zp);
+
+        ngraph::Output<ngraph::Node> res;
+        if (logSoftMax) {
+            res = std::make_shared<ngraph::op::v5::LogSoftmax>(input, axis);
+        } else {
+            res = std::make_shared<ngraph::op::v1::Softmax>(input, axis);
+        }
+
+        res = ngraphQuantize(res, output_sc, output_zp);
+        return new InfEngineNgraphNode(res);
+    }
+#endif  // HAVE_DNN_NGRAPH
+
    template <bool with_log>
    class SoftmaxInt8Invoker : public ParallelLoopBody {
    public:
--- a/modules/dnn/src/layers/const_layer.cpp
+++ b/modules/dnn/src/layers/const_layer.cpp
@ -123,9 +123,21 @@ public:
    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
    {
-        auto node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
+        ngraph::element::Type dType;
+        if (blobs[0].depth() == CV_32F) {
+            dType = ngraph::element::f32;
+        } else if (blobs[0].depth() == CV_8S) {
+            dType = ngraph::element::i8;
+        } else {
+            CV_Error(Error::StsNotImplemented, format("Unexpected Const data depth: %d", blobs[0].depth()));
+        }
+        std::shared_ptr<ngraph::Node> node =
+                    std::make_shared<ngraph::op::Constant>(dType,
                                                           getShape<size_t>(blobs[0]),
                                                           blobs[0].data);
+        if (node->get_element_type() != ngraph::element::f32) {
+            node = std::make_shared<ngraph::op::Convert>(node, ngraph::element::f32);
+        }
        return Ptr<BackendNode>(new InfEngineNgraphNode(node));
    }
 #endif  // HAVE_DNN_NGRAPH
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -1076,13 +1076,7 @@ struct MishFunctor : public BaseDefaultFunctor<MishFunctor>
 #ifdef HAVE_DNN_NGRAPH
    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
    {
-        float one = 1.0f;
-        auto constant = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &one);
-        auto exp_node = std::make_shared<ngraph::op::v0::Exp>(node);
-        auto sum = std::make_shared<ngraph::op::v1::Add>(constant, exp_node, ngraph::op::AutoBroadcastType::NUMPY);
-        auto log_node = std::make_shared<ngraph::op::v0::Log>(sum);
-        auto tanh_node = std::make_shared<ngraph::op::Tanh>(log_node);
-        return std::make_shared<ngraph::op::v1::Multiply>(node, tanh_node);
+        return std::make_shared<ngraph::op::v4::Mish>(node);
    }
 #endif  // HAVE_DNN_NGRAPH

@ -1309,10 +1303,7 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
 #ifdef HAVE_DNN_NGRAPH
    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
    {
-        float coeff = -0.999999f;
-        // float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f;
-        auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeff);
-        return std::make_shared<ngraph::op::PRelu>(node, slope);
+        return std::make_shared<ngraph::op::Abs>(node);
    }
 #endif  // HAVE_DNN_NGRAPH

--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@ -386,11 +386,11 @@ public:
    {
        auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
        int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size());
-        auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
-        if (logSoftMax)
-            return Ptr<BackendNode>(new InfEngineNgraphNode(std::make_shared<ngraph::op::v0::Log>(softmax)));
-
-        return Ptr<BackendNode>(new InfEngineNgraphNode(softmax));
+        if (logSoftMax) {
+            return new InfEngineNgraphNode(std::make_shared<ngraph::op::v5::LogSoftmax>(ieInpNode, axis));
+        } else {
+            return new InfEngineNgraphNode(std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis));
+        }
    }
 #endif  // HAVE_DNN_NGRAPH

--- a/modules/dnn/src/net_impl_backend.cpp
+++ b/modules/dnn/src/net_impl_backend.cpp
@ -170,11 +170,19 @@ void Net::Impl::setPreferableBackend(Net& net, int backendId)
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
        backendId = DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;  // = getInferenceEngineBackendTypeParam();

-    if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX)
+    if (netWasQuantized && backendId != DNN_BACKEND_OPENCV && backendId != DNN_BACKEND_TIMVX &&
+        backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
-        CV_LOG_WARNING(NULL, "DNN: Only default and TIMVX backends support quantized networks");
+        CV_LOG_WARNING(NULL, "DNN: Only default, TIMVX and OpenVINO backends support quantized networks");
        backendId = DNN_BACKEND_OPENCV;
    }
+#ifdef HAVE_DNN_NGRAPH
+    if (netWasQuantized && backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2023_0))
+    {
+        CV_LOG_WARNING(NULL, "DNN: OpenVINO 2023.0 and higher is required to supports quantized networks");
+        backendId = DNN_BACKEND_OPENCV;
+    }
+#endif

    if (preferableBackend != backendId)
    {
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@ -48,7 +48,6 @@ public:
        CV_Assert(basePtr_);
        Net::Impl& base = *basePtr_;
        CV_Assert(!base.netWasAllocated);
-        CV_Assert(!base.netWasQuantized);
        netInputLayer = base.netInputLayer;
        blobsToKeep = base.blobsToKeep;
        layers = base.layers;
--- a/modules/dnn/src/op_inf_engine.hpp
+++ b/modules/dnn/src/op_inf_engine.hpp
@ -27,6 +27,7 @@
 #define INF_ENGINE_RELEASE_2021_3 2021030000
 #define INF_ENGINE_RELEASE_2021_4 2021040000
 #define INF_ENGINE_RELEASE_2022_1 2022010000
+#define INF_ENGINE_RELEASE_2023_0 2023000000

 #ifndef INF_ENGINE_RELEASE
 #warning("IE version have not been provided via command-line. Using 2021.4 by default")
--- a/modules/dnn/test/test_int8_layers.cpp
+++ b/modules/dnn/test/test_int8_layers.cpp
@ -14,6 +14,9 @@ testing::internal::ParamGenerator< tuple<Backend, Target> > dnnBackendsAndTarget
    targets.push_back(make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU));
 #ifdef HAVE_TIMVX
    targets.push_back(make_tuple(DNN_BACKEND_TIMVX, DNN_TARGET_NPU));
+#endif
+#ifdef HAVE_INF_ENGINE
+    targets.push_back(make_tuple(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, DNN_TARGET_CPU));
 #endif
    return testing::ValuesIn(targets);
 }
@ -66,8 +69,6 @@ public:
            outPath = _tf("onnx/data/output_" + basename);
        }
        ASSERT_FALSE(net.empty());
-        net.setPreferableBackend(backend);
-        net.setPreferableTarget(target);

        for (int i = 0; i < numInps; i++)
            inps[i] = blobFromNPY(inpPath + ((numInps > 1) ? cv::format("_%d.npy", i) : ".npy"));
@ -78,6 +79,8 @@ public:
        qnet = net.quantize(inps, CV_8S, CV_8S, perChannel);
        qnet.getInputDetails(inputScale, inputZp);
        qnet.getOutputDetails(outputScale, outputZp);
+        qnet.setPreferableBackend(backend);
+        qnet.setPreferableTarget(target);

        // Quantize inputs to int8
        // int8_value = float_value/scale + zero-point
@ -94,7 +97,7 @@ public:
        for (int i = 0; i < numOuts; i++)
        {
            outs_int8[i].convertTo(outs_dequantized[i], CV_32F, outputScale[i], -(outputScale[i] * outputZp[i]));
-            normAssert(refs[i], outs_dequantized[i], "", l1, lInf);
+            normAssert(refs[i], outs_dequantized[i], basename.c_str(), l1, lInf);
        }
    }
 };
@ -197,10 +200,13 @@ TEST_P(Test_Int8_layers, Padding)

 TEST_P(Test_Int8_layers, AvePooling)
 {
-    testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075);
+    // Some tests failed with OpenVINO due to wrong padded area calculation
+    if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075);
    testLayer("ave_pool_same", "TensorFlow", 0.00153, 0.0041);
    testLayer("average_pooling_1d", "ONNX", 0.002, 0.0048);
-    testLayer("average_pooling", "ONNX", 0.0014, 0.0032);
+    if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        testLayer("average_pooling", "ONNX", 0.0014, 0.0032);
    testLayer("average_pooling_dynamic_axes", "ONNX", 0.0014, 0.006);

    if (target != DNN_TARGET_CPU)
@ -216,8 +222,6 @@ TEST_P(Test_Int8_layers, MaxPooling)
        throw SkipTestException("Only CPU is supported");
    testLayer("pool_conv_3d", "ONNX", 0.0033, 0.0124);

-    /* All the below tests have MaxPooling as last layer, so computeMaxIdx is set to true
-       which is not supported by int8 maxpooling
    testLayer("layer_pooling_max", "Caffe", 0.0021, 0.004);
    testLayer("max_pool_even", "TensorFlow", 0.0048, 0.0139);
    testLayer("max_pool_odd_valid", "TensorFlow", 0.0043, 0.012);
@ -227,7 +231,7 @@ TEST_P(Test_Int8_layers, MaxPooling)
    testLayer("two_maxpooling_1d", "ONNX", 0.0037, 0.0052);
    testLayer("maxpooling", "ONNX", 0.0034, 0.0065);
    testLayer("two_maxpooling", "ONNX", 0.0025, 0.0052);
-    testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);*/
+    testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);
 }

 TEST_P(Test_Int8_layers, Reduce)
@ -322,7 +326,10 @@ TEST_P(Test_Int8_layers, DISABLED_Softmax_unfused_ONNX)  // FIXIT Support 'Ident
 TEST_P(Test_Int8_layers, Concat)
 {
    testLayer("layer_concat_shared_input", "Caffe", 0.0076, 0.029, 1, 1, true, false);
-    testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017);
+    if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        // Crashes with segfault
+        testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017);
+    }
    testLayer("keras_pad_concat", "TensorFlow", 0.0032, 0.0089);
    testLayer("concat_3d", "TensorFlow", 0.005, 0.014);
    testLayer("concatenation", "ONNX", 0.0032, 0.009);
@ -400,10 +407,13 @@ TEST_P(Test_Int8_layers, Reshape)
        testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029);

    testLayer("reshape_conv", "TensorFlow", 0.035, 0.054);
-    testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        testLayer("reshape_reduce", "TensorFlow", 0.0053, 0.011);
+    else
+        testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078);
    testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028);
    testLayer("reshape_no_reorder", "TensorFlow", 0.0014, 0.0028);
-    testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, 0.014);
+    testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.016 : 0.014);
    testLayer("dynamic_reshape", "ONNX", 0.0047, 0.0079);
    testLayer("dynamic_reshape_opset_11", "ONNX", 0.0048, 0.0081);
    testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081);
@ -491,10 +501,10 @@ TEST_P(Test_Int8_layers, Eltwise)

    testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false);
    testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047);
-    testLayer("eltwise_add_vec", "TensorFlow", 0.037, 0.21); // tflite 0.0095, 0.0365
+    testLayer("eltwise_add_vec", "TensorFlow", 0.037, backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.24 : 0.21); // tflite 0.0095, 0.0365
    testLayer("eltwise_mul_vec", "TensorFlow", 0.173, 1.14); // tflite 0.0028, 0.017
    testLayer("channel_broadcast", "TensorFlow", 0.0025, 0.0063);
-    testLayer("split_equals", "TensorFlow", 0.02, 0.065);
+    testLayer("split_equals", "TensorFlow", backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.021 : 0.02, 0.065);
    testLayer("mul", "ONNX", 0.0039, 0.014);
    testLayer("split_max", "ONNX", 0.004, 0.012);
 }
@ -551,10 +561,10 @@ public:
        Mat blob = readTensorFromONNX(findDataFile("dnn/onnx/data/input_" + basename + ".pb"));
        Mat ref = readTensorFromONNX(findDataFile("dnn/onnx/data/output_" + basename + ".pb"));
        Net baseNet = readNetFromONNX(onnxmodel);
-        baseNet.setPreferableBackend(backend);
-        baseNet.setPreferableTarget(target);

        Net qnet = baseNet.quantize(blob, CV_32F, CV_32F, perChannel);
+        qnet.setPreferableBackend(backend);
+        qnet.setPreferableTarget(target);
        qnet.setInput(blob);
        Mat out = qnet.forward();

@ -699,9 +709,6 @@ TEST_P(Test_Int8_nets, AlexNet)
 #else
    applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
 #endif
-    if (backend != DNN_BACKEND_OPENCV)
-        throw SkipTestException("Only OpenCV backend is supported");
-
    if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
@ -742,8 +749,6 @@ TEST_P(Test_Int8_nets, GoogLeNet)
 TEST_P(Test_Int8_nets, ResNet50)
 {
    applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB);
-    if (backend != DNN_BACKEND_OPENCV)
-        throw SkipTestException("Only OpenCV backend is supported");

    if (target == DNN_TARGET_OPENCL_FP16 && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
@ -774,6 +779,8 @@ TEST_P(Test_Int8_nets, DenseNet121)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

    Net net = readNetFromCaffe(findDataFile("dnn/DenseNet_121.prototxt", false),
                               findDataFile("dnn/DenseNet_121.caffemodel", false));
@ -955,6 +962,8 @@ TEST_P(Test_Int8_nets, opencv_face_detector)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

    Net net = readNetFromCaffe(findDataFile("dnn/opencv_face_detector.prototxt"),
                               findDataFile("dnn/opencv_face_detector.caffemodel", false));
@ -1021,7 +1030,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_resnet50)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);

@ -1048,7 +1058,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_inceptionv2)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
-
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);

@ -1079,6 +1090,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_vgg16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

    Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_vgg16.prototxt"),
                               findDataFile("dnn/VGG16_faster_rcnn_final.caffemodel", false));
@ -1106,6 +1119,8 @@ TEST_P(Test_Int8_nets, FasterRCNN_zf)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

    Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_zf.prototxt"),
                               findDataFile("dnn/ZF_faster_rcnn_final.caffemodel", false));
@ -1138,6 +1153,9 @@ TEST_P(Test_Int8_nets, RFCN)
                                    0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);

    float confThreshold = 0.8, scoreDiff = 0.15, iouDiff = 0.11;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        iouDiff = 0.12;
+    }
    testFaster(net, ref, confThreshold, scoreDiff, iouDiff);
 }

@ -1317,6 +1335,8 @@ TEST_P(Test_Int8_nets, YOLOv4_tiny)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

    const float confThreshold = 0.6;

--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -2052,12 +2052,16 @@ TEST_P(Test_ONNX_layers, Quantized_Unsqueeze)
 TEST_P(Test_ONNX_layers, Quantized_Resize)
 {
    testONNXModels("quantized_resize_nearest");
-    testONNXModels("quantized_resize_bilinear", npy, 2e-4, 0.003);
-    testONNXModels("quantized_resize_bilinear_align", npy, 3e-4, 0.003);
+    double l1 = backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.0013 : 2e-4;
+    testONNXModels("quantized_resize_bilinear", npy, l1, 0.003);
+    l1 = backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ? 0.0013 : 3e-4;
+    testONNXModels("quantized_resize_bilinear_align", npy, l1, 0.003);
 }

 TEST_P(Test_ONNX_layers, Quantized_Concat)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    testONNXModels("quantized_concat");
    testONNXModels("quantized_concat_const_blob");
 }
@ -2074,6 +2078,8 @@ TEST_P(Test_ONNX_layers, OutputRegistration)

 TEST_P(Test_ONNX_layers, QLinearSoftmax)
 {
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    testONNXModels("qlinearsoftmax_v11", npy, 0.002, 0.002); // 2D coerced
    testONNXModels("qlinearsoftmax_v13", npy, 0.002, 0.002);
 }
--- a/modules/dnn/test/test_tflite_importer.cpp
+++ b/modules/dnn/test/test_tflite_importer.cpp
@ -204,6 +204,10 @@ TEST_P(Test_TFLite, max_unpooling)
 }

 TEST_P(Test_TFLite, EfficientDet_int8) {
+    if (target != DNN_TARGET_CPU || (backend != DNN_BACKEND_OPENCV &&
+        backend != DNN_BACKEND_TIMVX && backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)) {
+        throw SkipTestException("Only OpenCV, TimVX and OpenVINO targets support INT8 on CPU");
+    }
    Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);