Merge branch 4.x

2025-06-07 17:44:04 +08:00 · 2023-08-08 17:31:57 +03:00 · 2023-08-08 17:31:57 +03:00 · a6748df587
commit a6748df587
parent b47704eabc 7d59db4ec4
37 changed files with 575 additions and 210 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -74,6 +74,10 @@ if(POLICY CMP0077)
  cmake_policy(SET CMP0077 NEW)  # CMake 3.13+: option() honors normal variables.
 endif()
 if(POLICY CMP0146)
  cmake_policy(SET CMP0146 OLD)  # CMake 3.27+: use CMake FindCUDA if available.
 endif()
 #
 # Configure OpenCV CMake hooks
 #
--- a/modules/dnn/perf/perf_layer.cpp
+++ b/modules/dnn/perf/perf_layer.cpp
@ -643,4 +643,69 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
 INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
 INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
 typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
 PERF_TEST_P_(Layer_FullyConnected, fc)
 {
    std::vector<int> inpShape;
    inpShape.reserve(4);
    for (int i = 0; i < 4; ++i) {
        int dim = get<0>(GetParam())[i];
        if (dim == 0)
            break;
        inpShape.push_back(dim);
    }
    Mat input(inpShape, CV_32F);
    randn(input, 0, 1);
    int axis = input.dims - 1;
    int outDims = get<1>(GetParam());
    bool isMatMul = get<2>(GetParam());
    int backendId = get<0>(get<3>(GetParam()));
    int targetId = get<1>(get<3>(GetParam()));
    std::vector<int> weightShape;
    if (isMatMul) {
        weightShape = inpShape;
        weightShape[weightShape.size() - 2] = outDims;
    } else {
        weightShape = {outDims, (int)input.total(axis, input.dims)};
    }
    Mat weights(weightShape, CV_32F);
    randn(weights, 0, 1);
    LayerParams lp;
    lp.set("axis", input.dims - 1);
    lp.set("is_matmul", weights.dims > 2);
    lp.set("bias_term", false);
    lp.set("transB", true);
    lp.set("num_output", (int)weights.total(0, weights.dims - 1));
    lp.blobs.resize(1, weights);
    Net net;
    net.addLayerToPrev("matmul", "InnerProduct", lp);
    net.setInput(input);
    net.setPreferableBackend(backendId);
    net.setPreferableTarget(targetId);
    // warmup
    Mat output = net.forward();
    TEST_CYCLE()
    {
        net.forward();
    }
    SANITY_CHECK_NOTHING();
 }
 INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(
    Values(                // input size
        Vec4i(5, 512, 384),
        Vec4i(5, 16, 512, 128)
    ),
    Values(256, 512, 1024),  // output dimension
    testing::Bool(),         // is_matmul
    dnnBackendsAndTargets()
 ));
 } // namespace
--- a/modules/dnn/src/cuda/activations.cu
+++ b/modules/dnn/src/cuda/activations.cu
@ -248,6 +248,11 @@ void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma)
    generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
 }
 template <class T>
 void gelu(const Stream& stream, Span<T> output, View<T> input) {
    generic_op<T, GeluFunctor<T>>(stream, output, input);
 }
 template <class T>
 void sign(const Stream& stream, Span<T> output, View<T> input) {
    generic_op<T, SignFunctor<T>>(stream, output, input);
@ -324,6 +329,7 @@ template void tan<__half>(const Stream&, Span<__half>, View<__half>);
 template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
 template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
 template void gelu<__half>(const Stream&, Span<__half>, View<__half>);
 template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
 template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
@ -366,6 +372,7 @@ template void tan<float>(const Stream&, Span<float>, View<float>);
 template void celu<float>(const Stream&, Span<float>, View<float>, float);
 template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
 template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
 template void gelu<float>(const Stream&, Span<float>, View<float>);
 template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
 template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
 template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
--- a/modules/dnn/src/cuda/functors.hpp
+++ b/modules/dnn/src/cuda/functors.hpp
@ -588,6 +588,21 @@ struct SeluFunctor {
    T alpha, gamma;
 };
 template <class T>
 struct GeluFunctor {
    struct Params {
        CUDA4DNN_HOST_DEVICE Params() { }
    };
    CUDA4DNN_DEVICE GeluFunctor() { }
    CUDA4DNN_DEVICE GeluFunctor(const Params& params) { }
    CUDA4DNN_DEVICE T operator()(T value) {
        using csl::device::erf;
        return static_cast<T>(0.5f) * value * (static_cast<T>(1.f) + erf(value * static_cast<T>(M_SQRT1_2)));
    }
 };
 template <class T>
 struct ThresholdedReluFunctor {
    struct Params {
--- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp
@ -114,6 +114,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    template <class T>
    void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
    template <class T>
    void gelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
    template <class T>
    void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
--- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp
@ -537,6 +537,20 @@ namespace cv { namespace dnn { namespace cuda4dnn {
        const T alpha, gamma;
    };
    template <class T>
    class GeluOp final : public BaseOp<GeluOp, T> {
    public:
        GeluOp(csl::Stream stream_) : stream(std::move(stream_)) { }
        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
        {
            kernels::gelu<T>(stream, output, input);
        }
    private:
        csl::Stream stream;
    };
    template <class T>
    class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
    public:
--- a/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp
@ -111,7 +111,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
             * or there might be several weights
             * or we don't have to scale
             */
-            if (weight != 1.0)
+            if (weight != static_cast<T>(1.0f))
            {
                kernels::scale1_with_bias1<T>(stream, output, input, weight, 1.0);
            }
--- a/modules/dnn/src/cuda4dnn/primitives/region.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/region.hpp
@ -121,7 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
                new_coords
            );
-            if (nms_iou_threshold > 0) {
+            if (nms_iou_threshold > static_cast<T>(0.0f)) {
                auto output_mat = output_wrapper->getMutableHostMat();
                CV_Assert(output_mat.type() == CV_32F);
                for (int i = 0; i < input.get_axis_size(0); i++) {
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -446,66 +446,6 @@ void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
    requestedOutputs.insert({name, node.get()});
 }
 void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
    all_nodes.emplace((*ptr)->get_friendly_name(), ptr);
 }
 void InfEngineNgraphNet::release()
 {
     // FIXIT release should not be conditional, release ALL
     for (auto& node : components.back()) {
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
         if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) {
 #else
         if (!(node->is_parameter() || node->is_output() || node->is_constant()) ) {
 #endif
             auto it = all_nodes.find(node->get_friendly_name());
             if (it != all_nodes.end()) {
                 it->second->reset();
                 all_nodes.erase(it);
             }
         }
     }
 }
 void InfEngineNgraphNet::dfs(std::shared_ptr<ngraph::Node>& node,
                             std::vector<std::shared_ptr<ngraph::Node>>& comp,
                             std::unordered_map<std::string, bool>& used) {
    used[node->get_friendly_name()] = true;
    comp.push_back(node);
    auto inputs = node->get_users();
    for (size_t i = 0; i < node->get_input_size(); ++i) {
        inputs.push_back(node->input_value(i).get_node()->shared_from_this());
    }
    for (auto& to : inputs) {
        if (!used[to->get_friendly_name()]) {
            dfs(to, comp, used);
        }
    }
 }
 int InfEngineNgraphNet::getNumComponents()
 {
    if (!components.empty()) {
        return components.size();
    }
    std::unordered_map<std::string, bool> used;
    auto inputs = ngraph_function->get_ordered_ops();
    for (auto& node : inputs) {
        used.emplace(node->get_friendly_name(), false);
    }
    for (auto& node : inputs) {
        if (!used[node->get_friendly_name()]) {
            std::vector<std::shared_ptr<ngraph::Node>> current_comp;
            dfs(node, current_comp, used);
            components.push_back(current_comp);
        }
    }
    return components.size();
 }
 void InfEngineNgraphNet::createNet(Target targetId) {
    if (!hasNetOwner)
    {
@ -524,46 +464,7 @@ void InfEngineNgraphNet::createNet(Target targetId) {
        }
        CV_Assert_N(!inputs_vec.empty(), !outs.empty());
        ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec);
-
+        init(targetId);
        int num_comp = getNumComponents();
        CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp);
        if (num_comp > 1) {
            for (int i = num_comp - 1; i >= 0; --i) {
                ngraph::ResultVector outputs;
                ngraph::ParameterVector inps;
                for (auto& node : components.back()) {
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
                    if (ngraph::op::is_parameter(node)) {
 #else
                    if (node->is_parameter()) {
 #endif
                        CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'");
                        auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
                        inps.push_back(parameter);
                    }
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
                    else if (ngraph::op::is_output(node)) {
 #else
                    else if (node->is_output()) {
 #endif
                        CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'");
                        auto result = std::dynamic_pointer_cast<ngraph::op::Result>(node);
                        outputs.push_back(result);
                    }
                }
                CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size());
                isInit = false;
                CV_Assert_N(!inps.empty(), !outputs.empty());
                ngraph_function = std::make_shared<ngraph::Function>(outputs, inps);
                release();
                components.pop_back();
                init(targetId);
            }
        } else {
            release();
            components.clear();
            init(targetId);
        }
    }
 }
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@ -50,22 +50,14 @@ public:
    void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
    void createNet(Target targetId);
    void setNodePtr(std::shared_ptr<ngraph::Node>* ptr);
    void reset();
 //private:
    detail::NetImplBase& netImpl_;
    void release();
    int getNumComponents();
    void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,
             std::unordered_map<std::string, bool>& used);
    ngraph::ParameterVector inputs_vec;
    std::shared_ptr<ngraph::Function> ngraph_function;
    std::vector<std::vector<std::shared_ptr<ngraph::Node>>> components;
    std::unordered_map<std::string, std::shared_ptr<ngraph::Node>* > all_nodes;
    InferenceEngine::ExecutableNetwork netExec;
 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@ -221,7 +221,7 @@ public:
    {
        return backendId == DNN_BACKEND_OPENCV ||
               (backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized);
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
    }
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@ -1006,9 +1006,30 @@ public:
    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
    {
        CV_Assert(nodes.size() == 3);
-        auto& box_logits  = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto box_logits  = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+        auto class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-        auto& proposals   = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
+        auto proposals   = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
        if (_locPredTransposed) {
            // Convert box predictions from yxYX to xyXY
            box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits,
                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, -1, 2}),
                true
            );
            int axis = 2;
            box_logits = std::make_shared<ngraph::op::v1::Reverse>(box_logits,
                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis),
                ngraph::op::v1::Reverse::Mode::INDEX
            );
        }
        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{2}, std::vector<int32_t>{0, -1});
        box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits, shape, true);
        class_preds = std::make_shared<ngraph::op::v1::Reshape>(class_preds, shape, true);
        proposals = std::make_shared<ngraph::op::v1::Reshape>(proposals,
            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, _varianceEncodedInTarget ? 1 : 2, -1}),
            true
        );
        ngraph::op::DetectionOutputAttrs attrs;
        attrs.num_classes                = _numClasses;
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -821,7 +821,7 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
    bool supportBackend(int backendId, int)
    {
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
    }
    inline float calculate(float x) const
@ -829,6 +829,13 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
        return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
    }
 #ifdef HAVE_CUDA
    Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
    {
        return make_cuda_node<cuda4dnn::GeluOp>(target, stream);
    }
 #endif
    int64 getFLOPSPerElement() const { return 100; }
 };
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -180,15 +180,12 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        bool tranAorB = transA || transB;
 #ifdef HAVE_INF_ENGINE
        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
            return axis == 1 && !tranAorB;
 #endif
        return backendId == DNN_BACKEND_OPENCV ||
               backendId == DNN_BACKEND_CUDA ||
               (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) ||
               (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) ||
               backendId == DNN_BACKEND_CANN ||
               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
               (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB);
    }
@ -630,8 +627,10 @@ public:
            if(input_wrapper->getRank() == inp2Dim)
                return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB);
-            else
+            else {
                CV_LOG_INFO(NULL, "DNN/CUDA: no implementation for MatMul with rank " << input_wrapper->getRank());
                return Ptr<BackendNode>();
            }
        }
        auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
@ -800,17 +799,26 @@ public:
        if (nodes.size() == 2)
        {
            auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-            matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
+            matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
        }
        else
        {
-            std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]};
+            std::vector<int> shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0);
-            auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
+            shape[shape.size() - 1] = -1;
-            auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
+            auto inp = std::make_shared<ngraph::op::v1::Reshape>(
                ieInpNode,
                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
                true
            );
-            std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
+            std::vector<size_t> weight_shape;
            if (isMatMul) {
                weight_shape = getShape<size_t>(oriMat);
            } else {
                weight_shape = {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
            }
            auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
-            matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
+            matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, transA, transB);
        }
        if (bias) {
--- a/modules/dnn/src/layers/max_unpooling_layer.cpp
+++ b/modules/dnn/src/layers/max_unpooling_layer.cpp
@ -13,6 +13,7 @@ Implementation of Batch Normalization layer.
 #include "layers_common.hpp"
 #include "../op_cuda.hpp"
 #include "../op_halide.hpp"
 #include "../ie_ngraph.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/core/utils/logger.hpp>
@ -41,6 +42,7 @@ public:
    {
        return backendId == DNN_BACKEND_OPENCV ||
               backendId == DNN_BACKEND_CUDA ||
               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
               (backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
    }
@ -181,6 +183,50 @@ public:
 #endif  // HAVE_HALIDE
        return Ptr<BackendNode>();
    }
 #ifdef HAVE_DNN_NGRAPH
    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
    {
        auto features = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
        auto indices = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
        std::vector<MatShape> inpShapes(nodes.size());
        std::vector<MatShape> outShapes, internals;
        for (int i = 0; i < nodes.size(); ++i) {
            std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
            inpShapes[i] = std::vector<int>(shape.begin(), shape.end());
        }
        getMemoryShapes(inpShapes, 1, outShapes, internals);
        Mat zeros = Mat::zeros(1, total(outShapes[0]), CV_32F);
        auto zeroInp = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{zeros.total()}, zeros.data);
        int newShape = -1;
        features = std::make_shared<ngraph::op::v1::Reshape>(
            features,
            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
            true
        );
        indices = std::make_shared<ngraph::op::v1::Reshape>(
            indices,
            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
            true
        );
        if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) {
            indices = std::make_shared<ngraph::op::Convert>(indices, ngraph::element::i64);
        }
        int axis = 0;
        std::shared_ptr<ngraph::Node> unpool = std::make_shared<ngraph::op::ScatterElementsUpdate>(zeroInp, indices, features,
            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis));
        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{outShapes[0].size()}, outShapes[0].data());
        unpool = std::make_shared<ngraph::op::v1::Reshape>(unpool, shape, true);
        return Ptr<BackendNode>(new InfEngineNgraphNode(unpool));
    }
 #endif  // HAVE_DNN_NGRAPH
 };
 Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@ -209,7 +209,7 @@ public:
 #ifdef HAVE_INF_ENGINE
        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
        {
-            return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
+            return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
        }
 #endif
        if (backendId == DNN_BACKEND_OPENCV)
@ -613,9 +613,17 @@ public:
            return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
        }
        else if (type == MAX) {
-            auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
+            std::shared_ptr<ngraph::Node> max_pool;
-                            ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
+            if (computeMaxIdx) {
-                            rounding_type, pad_type);
+                std::vector<size_t> dilations(kernel_size.size(), 1);
                max_pool = std::make_shared<ngraph::op::v8::MaxPool>(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations),
                                ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
                                rounding_type, pad_type);
            } else {
                max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
                                ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
                                rounding_type, pad_type);
            }
            return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
        }
        else if (type == ROI) {
--- a/modules/dnn/src/layers/reduce_layer.cpp
+++ b/modules/dnn/src/layers/reduce_layer.cpp
@ -425,7 +425,7 @@ public:
            dtype* p_dst = dst.ptr<dtype>();
            size_t main_index = start / last_unreduced_dim;
-            size_t loop = start / last_unreduced_dim;
+            size_t loop = start % last_unreduced_dim;
            size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step;
            for (int i = start; i < end; ++i) {
                Op accumulator(n_reduce, p_src[origin + projected_steps[0]]);
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@ -410,7 +410,10 @@ public:
        }
        attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES;
-        if (alignCorners) {
+        CV_Assert(!halfPixelCenters || !alignCorners);
        if (halfPixelCenters) {
            attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL;
        } else if (alignCorners) {
            attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
        }
@ -427,7 +430,10 @@ public:
        }
        attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
-        if (alignCorners) {
+        CV_Assert(!halfPixelCenters || !alignCorners);
        if (halfPixelCenters) {
            attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
        } else if (alignCorners) {
            attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
        }
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@ -476,13 +476,14 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
            {
                int lid = ld.inputBlobsId[i].lid;
                int oid = ld.inputBlobsId[i].oid;
                if (oid == 0 || lid == 0)
                    continue;
                auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
                const auto& ngraph_input_node = ieInpNode->node;
                CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
                if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0)
                    continue;
                // Handle parameters from other subnets. Output port is not used in this case
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
                if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
@ -549,7 +550,6 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
                break;
            }
        }
        ieNode->net->setNodePtr(&ieNode->node);
        net->addBlobs(ld.inputBlobsWrappers);
        net->addBlobs(ld.outputBlobsWrappers);
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@ -1385,13 +1385,19 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
        CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
        Mat splitsBlob = getBlob(node_proto, 1);
        int splitSize = splitsBlob.total();
-
+        if (splitSize == 1)
        std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
        for (int i = 1; i < splitSize - 1; ++i)
        {
-            slicePoints[i] = slicePoints[i - 1] + splitsBlob.at<int>(i);
+            layerParams.set("num_split", 1);
        }
        else
        {
            std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
            for (int i = 1; i < splitSize - 1; ++i)
            {
                slicePoints[i] = slicePoints[i - 1] + splitsBlob.at<int>(i);
            }
            layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
        }
        layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
    }
    else
    {
@ -1965,9 +1971,11 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
    }
    int transB = layerParams.get<int>("transB", 0);
    int secondInpDims;
    if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
    {
        Mat weights = getBlob(node_proto, 1);
        secondInpDims = weights.dims;
        if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0.
        {
@ -1993,7 +2001,10 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
        }
    }
    else
    {
        layerParams.set("transB", transB == 1);
        secondInpDims = outShapes[node_proto.input(1)].size();
    }
    if (node_proto.input_size() == 3)
    {
@ -2002,7 +2013,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
    }
    layerParams.set("bias_term", node_proto.input_size() == 3);
-    layerParams.set("is_matmul", true);
+    layerParams.set("is_matmul", secondInpDims > 2);
    addLayer(layerParams, node_proto);
 }
@ -2045,7 +2056,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
        layerParams.blobs.push_back(transBlob);
        int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1);
        layerParams.set("num_output", numOutput);
-        layerParams.set("is_matmul", true);
+        layerParams.set("is_matmul", secondInpDims > 2);
    } else
        secondInpDims = outShapes[node_proto.input(1)].size();
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@ -731,21 +731,23 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
-    double scoreDiff = 0.0;
+    double scoreDiff = 0.0, iouDiff = 0.0;
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
+#if defined(INF_ENGINE_RELEASE)
    // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
    // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
    // Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84}
    if (target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
-    if (target == DNN_TARGET_OPENCL_FP16)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
-        scoreDiff = 0.02;
+        iouDiff = 0.02;
        if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
            scoreDiff = 0.04;
            iouDiff = 0.06;
        }
    }
 #endif
    static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
                                           0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
                                           0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
-    testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff);
+    testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
 }
 TEST_P(Test_Caffe_nets, FasterRCNN_zf)
@ -766,9 +768,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
        );
 #endif
    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
@ -779,7 +778,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
    static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
                                           0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
                                           0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
-    testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
+
    double scoreDiff = 0.0, iouDiff = 0.0;
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
        scoreDiff = 0.02;
        iouDiff = 0.13;
    }
    testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
 }
 TEST_P(Test_Caffe_nets, RFCN)
@ -802,8 +808,8 @@ TEST_P(Test_Caffe_nets, RFCN)
        iouDiff = 0.12;
    }
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
+#if defined(INF_ENGINE_RELEASE)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
        scoreDiff = 0.1f;
        iouDiff = 0.2f;
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@ -102,11 +102,14 @@ TEST(Test_Darknet, read_yolo_voc_stream)
 class Test_Darknet_layers : public DNNTestLayer
 {
 public:
-    void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true)
+    void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true,
                          double l1 = 0.0, double lInf = 0.0)
    {
        SCOPED_TRACE(name);
        Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
        Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
        l1 = l1 ? l1 : default_l1;
        lInf = lInf ? lInf : default_lInf;
        std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg");
        std::string model = "";
@ -120,7 +123,7 @@ public:
        net.setPreferableTarget(target);
        net.setInput(inp);
        Mat out = net.forward();
-        normAssert(out, ref, "", default_l1, default_lInf);
+        normAssert(out, ref, "", l1, lInf);
        if (inp.size[0] == 1 && testBatchProcessing)  // test handling of batch size
        {
@ -166,8 +169,8 @@ public:
            }*/
            ASSERT_EQ(out2.dims, ref2.dims) << ref.dims;
-            normAssert(out2(ranges0), ref2, "", default_l1, default_lInf);
+            normAssert(out2(ranges0), ref2, "", l1, lInf);
-            normAssert(out2(ranges1), ref2, "", default_l1, default_lInf);
+            normAssert(out2(ranges1), ref2, "", l1, lInf);
        }
    }
 };
@ -1046,7 +1049,7 @@ TEST_P(Test_Darknet_layers, region)
       applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
    // accuracy on CPU, OpenCL
    // Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05
    //   |ref| = 1.207319974899292
@ -1116,7 +1119,12 @@ TEST_P(Test_Darknet_layers, connected)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
-    testDarknetLayer("connected", true);
+    double l1 = 0.0;
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
    {
        l1 = 3e-5;
    }
    testDarknetLayer("connected", true, true, l1);
 }
 TEST_P(Test_Darknet_layers, relu)
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@ -361,22 +361,9 @@ TEST_P(MaxPooling, Accuracy)
    Backend backendId = get<0>(get<5>(GetParam()));
    Target targetId = get<1>(get<5>(GetParam()));
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000)
+    // https://github.com/openvinotoolkit/openvino/issues/18731
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1))
-            && inSize == Size(7, 6) && kernel == Size(3, 2)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
            && (stride == Size(1, 1) || stride == Size(2, 2))
            && (pad == Size(0, 1) || pad == Size(1, 1))
    )
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
            && (kernel == Size(2, 2) || kernel == Size(3, 2))
            && stride == Size(1, 1) && (pad == Size(0, 0) || pad == Size(0, 1))
    )
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
@ -467,6 +454,11 @@ TEST_P(FullyConnected, Accuracy)
    {
        l1 = 0.01;
    }
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL)
    {
        l1 = 5e-3;
        lInf = 7e-3;
    }
 #endif
    if (targetId == DNN_TARGET_CUDA_FP16)
        l1 = 0.015;
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@ -465,8 +465,8 @@ TEST_P(DNNTestHighLevelAPI, predict)
    const std::string modelPath = getOpenVINOModel(modelName, isFP16);
    ASSERT_FALSE(modelPath.empty()) << modelName;
-    std::string xmlPath = findDataFile(modelPath + ".xml");
+    std::string xmlPath = findDataFile(modelPath + ".xml", false);
-    std::string binPath = findDataFile(modelPath + ".bin");
+    std::string binPath = findDataFile(modelPath + ".bin", false);
    Model model(xmlPath, binPath);
    Mat frame = imread(findDataFile("dnn/googlenet_1.png"));
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@ -215,7 +215,13 @@ TEST_P(Test_Caffe_layers, InnerProduct)
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
-    testLayerUsingCaffeModels("layer_inner_product", true);
+    double l1 = 0.0, lInf = 0.0;
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
    {
        l1 = 5e-3;
        lInf = 2e-2;
    }
    testLayerUsingCaffeModels("layer_inner_product", true, true, l1, lInf);
 }
 TEST_P(Test_Caffe_layers, Pooling_max)
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput)
    {
        if (backend == DNN_BACKEND_OPENCV)
            scoreDiff = 4e-3;
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
        else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
            scoreDiff = 4e-2;
 #endif
        else
            scoreDiff = 2e-2;
        iouDiff = 1.8e-1;
    }
 #if defined(INF_ENGINE_RELEASE)
        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
        {
            scoreDiff = 0.05;
            iouDiff = 0.08;
        }
 #endif
    testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
                    scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@ -579,9 +579,7 @@ CASE(test_dropout_default_mask_ratio)
 CASE(test_dropout_default_old)
    // no filter
 CASE(test_dropout_default_ratio)
-#if SKIP_SET_1
+    // no filter
    SKIP;
 #endif
 CASE(test_dropout_random_old)
    // no filter
 CASE(test_dynamicquantizelinear)
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@ -52,7 +52,7 @@ public:
    }
    void testONNXModels(const String& basename, const Extension ext = npy,
-                        const double l1 = 0, const float lInf = 0, const bool useSoftmax = false,
+                        double l1 = 0, double lInf = 0, const bool useSoftmax = false,
                        bool checkNoFallbacks = true, int numInps = 1)
    {
        String onnxmodel = _tf("models/" + basename + ".onnx", required);
@ -102,7 +102,12 @@ public:
            netSoftmax.setInput(ref);
            ref = netSoftmax.forward();
        }
-        normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
        {
            l1 = std::max(l1, 1.4e-3);
            lInf = std::max(lInf, 8e-3);
        }
        normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
        if (checkNoFallbacks)
            expectNoFallbacksFromIE(net);
    }
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
    double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5;
    double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
    {
        scoreDiff = std::max(scoreDiff, 0.06);
        iouDiff = std::max(iouDiff, 0.01);
    }
    normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
    // Output size of masks is NxCxHxW where
--- a/modules/dnn/test/test_tflite_importer.cpp
+++ b/modules/dnn/test/test_tflite_importer.cpp
@ -20,6 +20,14 @@ namespace opencv_test { namespace {
 using namespace cv;
 using namespace cv::dnn;
 class Test_TFLite : public DNNTestLayer {
 public:
    void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
    void testModel(const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
    void testModel(const std::string& modelName, const Size& inpSize, double l1 = 0, double lInf = 0);
    void testLayer(const std::string& modelName, double l1 = 0, double lInf = 0);
 };
 void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
    std::vector<MatShape> inLayerShapes;
    std::vector<MatShape> outLayerShapes;
@ -31,8 +39,14 @@ void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
    }
 }
-void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& input, double l1, double lInf)
 {
    l1 = l1 ? l1 : default_l1;
    lInf = lInf ? lInf : default_lInf;
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);
    testInputShapes(net, {input});
    net.setInput(input);
@ -48,20 +62,20 @@ void testModel(Net& net, const std::string& modelName, const Mat& input, double
    }
 }
-void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testModel(const std::string& modelName, const Mat& input, double l1, double lInf)
 {
    Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
    testModel(net, modelName, input, l1, lInf);
 }
-void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testModel(const std::string& modelName, const Size& inpSize, double l1, double lInf)
 {
    Mat input = imread(findDataFile("cv/shared/lena.png"));
    input = blobFromImage(input, 1.0 / 255, inpSize, 0, true);
    testModel(modelName, input, l1, lInf);
 }
-void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testLayer(const std::string& modelName, double l1, double lInf)
 {
    Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy"));
    Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite"));
@ -69,29 +83,66 @@ void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-
 }
 // https://google.github.io/mediapipe/solutions/face_mesh
-TEST(Test_TFLite, face_landmark)
+TEST_P(Test_TFLite, face_landmark)
 {
-    testModel("face_landmark", Size(192, 192), 2e-5, 2e-4);
+    if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
    double l1 = 2e-5, lInf = 2e-4;
    if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
        (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
    {
        l1 = 0.15;
        lInf = 0.82;
    }
    testModel("face_landmark", Size(192, 192), l1, lInf);
 }
 // https://google.github.io/mediapipe/solutions/face_detection
-TEST(Test_TFLite, face_detection_short_range)
+TEST_P(Test_TFLite, face_detection_short_range)
 {
-    testModel("face_detection_short_range", Size(128, 128));
+    double l1 = 0, lInf = 2e-4;
    if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
        (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
    {
        l1 = 0.04;
        lInf = 0.8;
    }
    testModel("face_detection_short_range", Size(128, 128), l1, lInf);
 }
 // https://google.github.io/mediapipe/solutions/selfie_segmentation
-TEST(Test_TFLite, selfie_segmentation)
+TEST_P(Test_TFLite, selfie_segmentation)
 {
-    testModel("selfie_segmentation", Size(256, 256));
+    double l1 = 0, lInf = 0;
    if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
        (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
    {
        l1 = 0.01;
        lInf = 0.48;
    }
    testModel("selfie_segmentation", Size(256, 256), l1, lInf);
 }
-TEST(Test_TFLite, max_unpooling)
+TEST_P(Test_TFLite, max_unpooling)
 {
    if (backend == DNN_BACKEND_CUDA)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) {
        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
    }
    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
    // Due Max Unpoling is a numerically unstable operation and small difference between frameworks
    // might lead to positional difference of maximal elements in the tensor, this test checks
    // behavior of Max Unpooling layer only.
    Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false));
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);
    Mat input = imread(findDataFile("cv/shared/lena.png"));
    cvtColor(input, input, COLOR_BGR2RGBA);
@ -101,7 +152,15 @@ TEST(Test_TFLite, max_unpooling)
    net.setInput(input);
    std::vector<std::vector<Mat> > outs;
-    net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
        // TODO: seems like a bug with a retrieving intermediate tensors
        net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
        outs.erase(outs.begin());
    }
    else {
        net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
    }
    ASSERT_EQ(outs.size(), 4);
    ASSERT_EQ(outs[0].size(), 1);
    ASSERT_EQ(outs[1].size(), 2);
@ -117,6 +176,8 @@ TEST(Test_TFLite, max_unpooling)
    ASSERT_EQ(poolOut.size, poolIds.size);
    ASSERT_EQ(poolOut.size, unpoolInp.size);
    ASSERT_EQ(countNonZero(poolInp), poolInp.total());
    for (int c = 0; c < 32; ++c) {
        float *poolInpData = poolInp.ptr<float>(0, c);
        float *poolOutData = poolOut.ptr<float>(0, c);
@ -135,15 +196,19 @@ TEST(Test_TFLite, max_unpooling)
                    }
                }
                EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg;
-                EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
+                if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
                    EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
                }
                EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg;
            }
        }
    }
 }
-TEST(Test_TFLite, EfficientDet_int8) {
+TEST_P(Test_TFLite, EfficientDet_int8) {
    Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
    net.setPreferableBackend(backend);
    net.setPreferableTarget(target);
    Mat img = imread(findDataFile("dnn/dog416.png"));
    Mat blob = blobFromImage(img, 1.0, Size(320, 320));
@ -158,10 +223,18 @@ TEST(Test_TFLite, EfficientDet_int8) {
    normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
 }
-TEST(Test_TFLite, replicate_by_pack) {
+TEST_P(Test_TFLite, replicate_by_pack) {
-    testLayer("replicate_by_pack");
+    double l1 = 0, lInf = 0;
    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
    {
        l1 = 4e-4;
        lInf = 2e-3;
    }
    testLayer("replicate_by_pack", l1, lInf);
 }
 INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());
 }}  // namespace
 #endif  // OPENCV_TEST_DNN_TFLITE
--- a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp
@ -39,6 +39,12 @@ public:
    GAPI_WRAP
    PyParams& cfgAddExecutionProvider(ep::DirectML ep);
    GAPI_WRAP
    PyParams& cfgAddExecutionProvider(ep::CUDA ep);
    GAPI_WRAP
    PyParams& cfgAddExecutionProvider(ep::TensorRT ep);
    GAPI_WRAP
    PyParams& cfgDisableMemPattern();
--- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
@ -32,6 +32,56 @@ namespace onnx {
 */
 namespace ep {
 /**
 * @brief This structure provides functions
 * that fill inference options for CUDA Execution Provider.
 * Please follow https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider
 */
 struct GAPI_EXPORTS_W_SIMPLE CUDA {
    // NB: Used from python.
    /// @private -- Exclude this constructor from OpenCV documentation
    GAPI_WRAP
    CUDA() = default;
    /** @brief Class constructor.
    Constructs CUDA parameters based on device type information.
    @param dev_id Target device id to use.
    */
    GAPI_WRAP
    explicit CUDA(const int dev_id)
        : device_id(dev_id) {
    }
    int device_id;
 };
 /**
 * @brief This structure provides functions
 * that fill inference options for TensorRT Execution Provider.
 * Please follow https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#tensorrt-execution-provider
 */
 struct GAPI_EXPORTS_W_SIMPLE TensorRT {
    // NB: Used from python.
    /// @private -- Exclude this constructor from OpenCV documentation
    GAPI_WRAP
    TensorRT() = default;
    /** @brief Class constructor.
    Constructs TensorRT parameters based on device type information.
    @param dev_id Target device id to use.
    */
    GAPI_WRAP
    explicit TensorRT(const int dev_id)
        : device_id(dev_id) {
    }
    int device_id;
 };
 /**
 * @brief This structure provides functions
 * that fill inference options for ONNX OpenVINO Execution Provider.
@ -143,7 +193,11 @@ public:
    DeviceDesc ddesc;
 };
-using EP = cv::util::variant<cv::util::monostate, OpenVINO, DirectML>;
+using EP = cv::util::variant< cv::util::monostate
                            , OpenVINO
                            , DirectML
                            , CUDA
                            , TensorRT>;
 } // namespace ep
@ -431,6 +485,34 @@ public:
        return *this;
    }
    /** @brief Adds execution provider for runtime.
    The function is used to add ONNX Runtime CUDA Execution Provider options.
    @param ep CUDA Execution Provider options.
    @see cv::gapi::onnx::ep::CUDA.
    @return the reference on modified object.
    */
    Params<Net>& cfgAddExecutionProvider(ep::CUDA&& ep) {
        desc.execution_providers.emplace_back(std::move(ep));
        return *this;
    }
    /** @brief Adds execution provider for runtime.
    The function is used to add ONNX Runtime TensorRT Execution Provider options.
    @param ep TensorRT Execution Provider options.
    @see cv::gapi::onnx::ep::TensorRT.
    @return the reference on modified object.
    */
    Params<Net>& cfgAddExecutionProvider(ep::TensorRT&& ep) {
        desc.execution_providers.emplace_back(std::move(ep));
        return *this;
    }
    /** @brief Disables the memory pattern optimization.
    @return the reference on modified object.
@ -491,6 +573,16 @@ public:
        desc.execution_providers.emplace_back(std::move(ep));
    }
    /** @see onnx::Params::cfgAddExecutionProvider. */
    void cfgAddExecutionProvider(ep::CUDA&& ep) {
        desc.execution_providers.emplace_back(std::move(ep));
    }
    /** @see onnx::Params::cfgAddExecutionProvider. */
    void cfgAddExecutionProvider(ep::TensorRT&& ep) {
        desc.execution_providers.emplace_back(std::move(ep));
    }
    /** @see onnx::Params::cfgDisableMemPattern. */
    void cfgDisableMemPattern() {
        desc.disable_mem_pattern = true;
--- a/modules/gapi/misc/python/pyopencv_gapi.hpp
+++ b/modules/gapi/misc/python/pyopencv_gapi.hpp
@ -31,6 +31,8 @@ using map_string_and_vector_float   = std::map<std::string, std::vector<float>>;
 using map_int_and_double            = std::map<int, double>;
 using ep_OpenVINO                   = cv::gapi::onnx::ep::OpenVINO;
 using ep_DirectML                   = cv::gapi::onnx::ep::DirectML;
 using ep_CUDA                       = cv::gapi::onnx::ep::CUDA;
 using ep_TensorRT                   = cv::gapi::onnx::ep::TensorRT;
 // NB: Python wrapper generate T_U for T<U>
 // This behavior is only observed for inputs
--- a/modules/gapi/src/backends/onnx/bindings_onnx.cpp
+++ b/modules/gapi/src/backends/onnx/bindings_onnx.cpp
@ -33,6 +33,18 @@ cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::DirectML e
    return *this;
 }
 cv::gapi::onnx::PyParams&
 cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::CUDA ep) {
    m_priv->cfgAddExecutionProvider(std::move(ep));
    return *this;
 }
 cv::gapi::onnx::PyParams&
 cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::TensorRT ep) {
    m_priv->cfgAddExecutionProvider(std::move(ep));
    return *this;
 }
 cv::gapi::onnx::PyParams&
 cv::gapi::onnx::PyParams::cfgDisableMemPattern() {
    m_priv->cfgDisableMemPattern();
--- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp
+++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
@ -145,9 +145,39 @@ public:
    void run();
 };
 static void addCUDAExecutionProvider(Ort::SessionOptions *session_options,
                                     const cv::gapi::onnx::ep::CUDA &cuda_ep) {
     OrtCUDAProviderOptions options{};
     options.device_id = cuda_ep.device_id;
     try {
        session_options->AppendExecutionProvider_CUDA(options);
     } catch (const std::exception &e) {
         std::stringstream ss;
         ss << "ONNX Backend: Failed to enable CUDA"
            << " Execution Provider: " << e.what();
         cv::util::throw_error(std::runtime_error(ss.str()));
     }
 }
 static void addTensorRTExecutionProvider(Ort::SessionOptions *session_options,
                                         const cv::gapi::onnx::ep::TensorRT &trt_ep) {
     OrtTensorRTProviderOptions options{};
     options.device_id = trt_ep.device_id;
     try {
        session_options->AppendExecutionProvider_TensorRT(options);
     } catch (const std::exception &e) {
         std::stringstream ss;
         ss << "ONNX Backend: Failed to enable TensorRT"
            << " Execution Provider: " << e.what();
         cv::util::throw_error(std::runtime_error(ss.str()));
     }
 }
 static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options,
                                         const cv::gapi::onnx::ep::OpenVINO &ov_ep) {
-     OrtOpenVINOProviderOptions options;
+     OrtOpenVINOProviderOptions options{};
     options.device_type = ov_ep.device_type.c_str();
     options.cache_dir = ov_ep.cache_dir.c_str();
     options.num_of_threads = ov_ep.num_of_threads;
@ -181,6 +211,18 @@ static void addExecutionProvider(Ort::SessionOptions          *session_options,
            addDMLExecutionProvider(session_options, dml_ep);
            break;
        }
        case ep::EP::index_of<ep::CUDA>(): {
            GAPI_LOG_INFO(NULL, "CUDA Execution Provider is added.");
            const auto &cuda_ep = cv::util::get<ep::CUDA>(execution_provider);
            addCUDAExecutionProvider(session_options, cuda_ep);
            break;
        }
        case ep::EP::index_of<ep::TensorRT>(): {
            GAPI_LOG_INFO(NULL, "TensorRT Execution Provider is added.");
            const auto &trt_ep = cv::util::get<ep::TensorRT>(execution_provider);
            addTensorRTExecutionProvider(session_options, trt_ep);
            break;
        }
        default:
            GAPI_LOG_INFO(NULL, "CPU Execution Provider is added.");
            break;
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@ -260,6 +260,10 @@ typedef uint32_t __u32;
 #define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17)
 #endif
 #ifndef v4l2_fourcc_be
 #define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
 #endif
 #ifndef V4L2_PIX_FMT_Y10
 #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
 #endif
--- a/modules/videoio/test/test_precomp.hpp
+++ b/modules/videoio/test/test_precomp.hpp
@ -65,7 +65,7 @@ inline std::string fourccToStringSafe(int fourcc)
 {
    std::string res = fourccToString(fourcc);
    // TODO: return hex values for invalid characters
-    std::transform(res.begin(), res.end(), res.begin(), [](uint8_t c) { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
+    std::transform(res.begin(), res.end(), res.begin(), [](char c) -> char { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
    return res;
 }
--- a/modules/videoio/test/test_v4l2.cpp
+++ b/modules/videoio/test/test_v4l2.cpp
@ -22,6 +22,9 @@
 #include <linux/videodev2.h>
 // workarounds for older versions
 #ifndef v4l2_fourcc_be
 #define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
 #endif
 #ifndef V4L2_PIX_FMT_Y10
 #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
 #endif