From 15815fb54d162bb682834f106ca0d23221ca2cfc Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Thu, 20 Jul 2023 17:54:42 +0200
Subject: [PATCH 01/12] Fix stereoRectify image boundaries.

This should hav ebeen fixed with https://github.com/opencv/opencv/issues/23304
---
 modules/calib3d/src/calibration.cpp           |  8 ++--
 .../calib3d/test/test_cameracalibration.cpp   | 48 +++++++++++++++++++
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp
index 9e8c252cee..002394b917 100644
--- a/modules/calib3d/src/calibration.cpp
+++ b/modules/calib3d/src/calibration.cpp
@@ -2699,11 +2699,11 @@ void cvStereoRectify( const CvMat* _cameraMatrix1, const CvMat* _cameraMatrix2,
     if( alpha >= 0 )
     {
         double s0 = std::max(std::max(std::max((double)cx1/(cx1_0 - inner1.x), (double)cy1/(cy1_0 - inner1.y)),
-                            (double)(newImgSize.width - cx1)/(inner1.x + inner1.width - cx1_0)),
-                        (double)(newImgSize.height - cy1)/(inner1.y + inner1.height - cy1_0));
+                            (double)(newImgSize.width - 1 - cx1)/(inner1.x + inner1.width - cx1_0)),
+                        (double)(newImgSize.height - 1 - cy1)/(inner1.y + inner1.height - cy1_0));
         s0 = std::max(std::max(std::max(std::max((double)cx2/(cx2_0 - inner2.x), (double)cy2/(cy2_0 - inner2.y)),
-                         (double)(newImgSize.width - cx2)/(inner2.x + inner2.width - cx2_0)),
-                     (double)(newImgSize.height - cy2)/(inner2.y + inner2.height - cy2_0)),
+                         (double)(newImgSize.width - 1 - cx2)/(inner2.x + inner2.width - cx2_0)),
+                     (double)(newImgSize.height - 1 - cy2)/(inner2.y + inner2.height - cy2_0)),
                  s0);
 
         double s1 = std::min(std::min(std::min((double)cx1/(cx1_0 - outer1.x), (double)cy1/(cy1_0 - outer1.y)),
diff --git a/modules/calib3d/test/test_cameracalibration.cpp b/modules/calib3d/test/test_cameracalibration.cpp
index d2ab906fee..f5e80bbd28 100644
--- a/modules/calib3d/test/test_cameracalibration.cpp
+++ b/modules/calib3d/test/test_cameracalibration.cpp
@@ -2150,6 +2150,54 @@ TEST(Calib3d_StereoCalibrate, regression_11131)
     EXPECT_GE(roi2.area(), 400*300) << roi2;
 }
 
+TEST(Calib3d_StereoCalibrate, regression_23305)
+{
+    const Matx33d M1(
+        850, 0, 640,
+        0, 850, 640,
+        0, 0, 1
+    );
+
+    const Matx34d P1_gold(
+        850, 0, 640, 0,
+        0, 850, 640, 0,
+        0, 0, 1, 0
+    );
+
+    const Matx33d M2(
+        850, 0, 640,
+        0, 850, 640,
+        0, 0, 1
+    );
+
+    const Matx34d P2_gold(
+        850, 0, 640, -2*850, // correcponds to T(-2., 0., 0.)
+        0, 850, 640, 0,
+        0, 0, 1, 0
+    );
+
+    const Matx<double, 5, 1> D1(0, 0, 0, 0, 0);
+    const Matx<double, 5, 1> D2(0, 0, 0, 0, 0);
+
+    const Matx33d R(
+        1., 0., 0.,
+        0., 1., 0.,
+        0., 0., 1.
+    );
+    const Matx31d T(-2., 0., 0.);
+
+    const Size imageSize(1280, 1280);
+
+    Mat R1, R2, P1, P2, Q;
+    Rect roi1, roi2;
+    stereoRectify(M1, D1, M2, D2, imageSize, R, T,
+                  R1, R2, P1, P2, Q,
+                  CALIB_ZERO_DISPARITY, 0, imageSize, &roi1, &roi2);
+
+    EXPECT_EQ(cv::norm(P1, P1_gold), 0.);
+    EXPECT_EQ(cv::norm(P2, P2_gold), 0.);
+}
+
 TEST(Calib3d_Triangulate, accuracy)
 {
     // the testcase from http://code.opencv.org/issues/4334

From 08d457edcf2003da2955098a46898d6af98bf86a Mon Sep 17 00:00:00 2001
From: cudawarped <12133430+cudawarped@users.noreply.github.com>
Date: Tue, 1 Aug 2023 09:15:31 +0300
Subject: [PATCH 02/12] cuda: suppress CMP0146 Warning

---
 CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ceeb8b8d7d..4dbcab578b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,6 +74,10 @@ if(POLICY CMP0077)
   cmake_policy(SET CMP0077 NEW)  # CMake 3.13+: option() honors normal variables.
 endif()
 
+if(POLICY CMP0146)
+  cmake_policy(SET CMP0146 OLD)  # CMake 3.27+: use CMake FindCUDA if available.
+endif()
+
 #
 # Configure OpenCV CMake hooks
 #

From ab8cb6f8a9034da2a289b84685c6d959266029be Mon Sep 17 00:00:00 2001
From: cudawarped <12133430+cudawarped@users.noreply.github.com>
Date: Tue, 1 Aug 2023 13:02:42 +0300
Subject: [PATCH 03/12] cuda: fix for compatibility with CUDA Toolkit >= 12.2.0

---
 modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp | 2 +-
 modules/dnn/src/cuda4dnn/primitives/region.hpp         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp b/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp
index f067dddaa7..91ff33f817 100644
--- a/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/normalize_bbox.hpp
@@ -111,7 +111,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
              * or there might be several weights
              * or we don't have to scale
              */
-            if (weight != 1.0)
+            if (weight != static_cast<T>(1.0f))
             {
                 kernels::scale1_with_bias1<T>(stream, output, input, weight, 1.0);
             }
diff --git a/modules/dnn/src/cuda4dnn/primitives/region.hpp b/modules/dnn/src/cuda4dnn/primitives/region.hpp
index d22d44214e..3af05155fe 100644
--- a/modules/dnn/src/cuda4dnn/primitives/region.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/region.hpp
@@ -121,7 +121,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
                 new_coords
             );
 
-            if (nms_iou_threshold > 0) {
+            if (nms_iou_threshold > static_cast<T>(0.0f)) {
                 auto output_mat = output_wrapper->getMutableHostMat();
                 CV_Assert(output_mat.type() == CV_32F);
                 for (int i = 0; i < input.get_axis_size(0); i++) {

From e0e537d94ef1bd77ce919d65b3aaf47276cc4ca5 Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Tue, 1 Aug 2023 14:09:22 +0300
Subject: [PATCH 04/12] videoio: fixed MSVC warning in test

---
 modules/videoio/test/test_precomp.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/videoio/test/test_precomp.hpp b/modules/videoio/test/test_precomp.hpp
index b4f340897e..fe2a65b1be 100644
--- a/modules/videoio/test/test_precomp.hpp
+++ b/modules/videoio/test/test_precomp.hpp
@@ -61,7 +61,7 @@ inline std::string fourccToStringSafe(int fourcc)
 {
     std::string res = fourccToString(fourcc);
     // TODO: return hex values for invalid characters
-    std::transform(res.begin(), res.end(), res.begin(), [](uint8_t c) { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
+    std::transform(res.begin(), res.end(), res.begin(), [](char c) -> char { return (c >= '0' && c <= 'z') ? c : (c == ' ' ? '_' : 'x'); });
     return res;
 }
 

From 9fc83ac5442688cceb7d1b6ffe2be1bd2a077daf Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Tue, 1 Aug 2023 14:11:14 +0300
Subject: [PATCH 05/12] videoio: fix V4L compilation for older kernels

---
 modules/videoio/src/cap_v4l.cpp    | 4 ++++
 modules/videoio/test/test_v4l2.cpp | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp
index e3c53d7cdd..905c79e42f 100644
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@@ -260,6 +260,10 @@ typedef uint32_t __u32;
 #define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17)
 #endif
 
+#ifndef v4l2_fourcc_be
+#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
+#endif
+
 #ifndef V4L2_PIX_FMT_Y10
 #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
 #endif
diff --git a/modules/videoio/test/test_v4l2.cpp b/modules/videoio/test/test_v4l2.cpp
index 5d56ac097c..1c4917bfca 100644
--- a/modules/videoio/test/test_v4l2.cpp
+++ b/modules/videoio/test/test_v4l2.cpp
@@ -22,6 +22,9 @@
 #include <linux/videodev2.h>
 
 // workarounds for older versions
+#ifndef v4l2_fourcc_be
+#define v4l2_fourcc_be(a, b, c, d) (v4l2_fourcc(a, b, c, d) | (1U << 31))
+#endif
 #ifndef V4L2_PIX_FMT_Y10
 #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ')
 #endif

From f46f7eff0c57d5c278dd73ccfe7645d63dc8cdd1 Mon Sep 17 00:00:00 2001
From: Anatoliy Talamanov <anatoliy.talamanov@intel.com>
Date: Wed, 2 Aug 2023 12:13:07 +0100
Subject: [PATCH 06/12] Merge pull request #24059 from
 TolyaTalamanov:at/add-onnx-cuda-execution-provider

G-API: Support CUDA & TensoRT Execution Providers for ONNXRT Backend #24059

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../opencv2/gapi/infer/bindings_onnx.hpp      |  6 ++
 .../gapi/include/opencv2/gapi/infer/onnx.hpp  | 94 ++++++++++++++++++-
 modules/gapi/misc/python/pyopencv_gapi.hpp    |  2 +
 .../gapi/src/backends/onnx/bindings_onnx.cpp  | 12 +++
 .../gapi/src/backends/onnx/gonnxbackend.cpp   | 44 ++++++++-
 5 files changed, 156 insertions(+), 2 deletions(-)

diff --git a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp
index 4ba829df09..c418c0d496 100644
--- a/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/bindings_onnx.hpp
@@ -39,6 +39,12 @@ public:
     GAPI_WRAP
     PyParams& cfgAddExecutionProvider(ep::DirectML ep);
 
+    GAPI_WRAP
+    PyParams& cfgAddExecutionProvider(ep::CUDA ep);
+
+    GAPI_WRAP
+    PyParams& cfgAddExecutionProvider(ep::TensorRT ep);
+
     GAPI_WRAP
     PyParams& cfgDisableMemPattern();
 
diff --git a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
index 64b855acd7..ff5febcf90 100644
--- a/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer/onnx.hpp
@@ -32,6 +32,56 @@ namespace onnx {
  */
 namespace ep {
 
+/**
+ * @brief This structure provides functions
+ * that fill inference options for CUDA Execution Provider.
+ * Please follow https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#cuda-execution-provider
+ */
+struct GAPI_EXPORTS_W_SIMPLE CUDA {
+    // NB: Used from python.
+    /// @private -- Exclude this constructor from OpenCV documentation
+    GAPI_WRAP
+    CUDA() = default;
+
+    /** @brief Class constructor.
+
+    Constructs CUDA parameters based on device type information.
+
+    @param dev_id Target device id to use.
+    */
+    GAPI_WRAP
+    explicit CUDA(const int dev_id)
+        : device_id(dev_id) {
+    }
+
+    int device_id;
+};
+
+/**
+ * @brief This structure provides functions
+ * that fill inference options for TensorRT Execution Provider.
+ * Please follow https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#tensorrt-execution-provider
+ */
+struct GAPI_EXPORTS_W_SIMPLE TensorRT {
+    // NB: Used from python.
+    /// @private -- Exclude this constructor from OpenCV documentation
+    GAPI_WRAP
+    TensorRT() = default;
+
+    /** @brief Class constructor.
+
+    Constructs TensorRT parameters based on device type information.
+
+    @param dev_id Target device id to use.
+    */
+    GAPI_WRAP
+    explicit TensorRT(const int dev_id)
+        : device_id(dev_id) {
+    }
+
+    int device_id;
+};
+
 /**
  * @brief This structure provides functions
  * that fill inference options for ONNX OpenVINO Execution Provider.
@@ -143,7 +193,11 @@ public:
     DeviceDesc ddesc;
 };
 
-using EP = cv::util::variant<cv::util::monostate, OpenVINO, DirectML>;
+using EP = cv::util::variant< cv::util::monostate
+                            , OpenVINO
+                            , DirectML
+                            , CUDA
+                            , TensorRT>;
 
 } // namespace ep
 
@@ -431,6 +485,34 @@ public:
         return *this;
     }
 
+    /** @brief Adds execution provider for runtime.
+
+    The function is used to add ONNX Runtime CUDA Execution Provider options.
+
+    @param ep CUDA Execution Provider options.
+    @see cv::gapi::onnx::ep::CUDA.
+
+    @return the reference on modified object.
+    */
+    Params<Net>& cfgAddExecutionProvider(ep::CUDA&& ep) {
+        desc.execution_providers.emplace_back(std::move(ep));
+        return *this;
+    }
+
+    /** @brief Adds execution provider for runtime.
+
+    The function is used to add ONNX Runtime TensorRT Execution Provider options.
+
+    @param ep TensorRT Execution Provider options.
+    @see cv::gapi::onnx::ep::TensorRT.
+
+    @return the reference on modified object.
+    */
+    Params<Net>& cfgAddExecutionProvider(ep::TensorRT&& ep) {
+        desc.execution_providers.emplace_back(std::move(ep));
+        return *this;
+    }
+
     /** @brief Disables the memory pattern optimization.
 
     @return the reference on modified object.
@@ -491,6 +573,16 @@ public:
         desc.execution_providers.emplace_back(std::move(ep));
     }
 
+    /** @see onnx::Params::cfgAddExecutionProvider. */
+    void cfgAddExecutionProvider(ep::CUDA&& ep) {
+        desc.execution_providers.emplace_back(std::move(ep));
+    }
+
+    /** @see onnx::Params::cfgAddExecutionProvider. */
+    void cfgAddExecutionProvider(ep::TensorRT&& ep) {
+        desc.execution_providers.emplace_back(std::move(ep));
+    }
+
     /** @see onnx::Params::cfgDisableMemPattern. */
     void cfgDisableMemPattern() {
         desc.disable_mem_pattern = true;
diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp
index 60d5f85479..3269a7d470 100644
--- a/modules/gapi/misc/python/pyopencv_gapi.hpp
+++ b/modules/gapi/misc/python/pyopencv_gapi.hpp
@@ -31,6 +31,8 @@ using map_string_and_vector_float   = std::map<std::string, std::vector<float>>;
 using map_int_and_double            = std::map<int, double>;
 using ep_OpenVINO                   = cv::gapi::onnx::ep::OpenVINO;
 using ep_DirectML                   = cv::gapi::onnx::ep::DirectML;
+using ep_CUDA                       = cv::gapi::onnx::ep::CUDA;
+using ep_TensorRT                   = cv::gapi::onnx::ep::TensorRT;
 
 // NB: Python wrapper generate T_U for T<U>
 // This behavior is only observed for inputs
diff --git a/modules/gapi/src/backends/onnx/bindings_onnx.cpp b/modules/gapi/src/backends/onnx/bindings_onnx.cpp
index 6051c6bb4d..b41ec7b1b1 100644
--- a/modules/gapi/src/backends/onnx/bindings_onnx.cpp
+++ b/modules/gapi/src/backends/onnx/bindings_onnx.cpp
@@ -33,6 +33,18 @@ cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::DirectML e
     return *this;
 }
 
+cv::gapi::onnx::PyParams&
+cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::CUDA ep) {
+    m_priv->cfgAddExecutionProvider(std::move(ep));
+    return *this;
+}
+
+cv::gapi::onnx::PyParams&
+cv::gapi::onnx::PyParams::cfgAddExecutionProvider(cv::gapi::onnx::ep::TensorRT ep) {
+    m_priv->cfgAddExecutionProvider(std::move(ep));
+    return *this;
+}
+
 cv::gapi::onnx::PyParams&
 cv::gapi::onnx::PyParams::cfgDisableMemPattern() {
     m_priv->cfgDisableMemPattern();
diff --git a/modules/gapi/src/backends/onnx/gonnxbackend.cpp b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
index b90d4d6974..c552b8b0e6 100644
--- a/modules/gapi/src/backends/onnx/gonnxbackend.cpp
+++ b/modules/gapi/src/backends/onnx/gonnxbackend.cpp
@@ -145,9 +145,39 @@ public:
     void run();
 };
 
+static void addCUDAExecutionProvider(Ort::SessionOptions *session_options,
+                                     const cv::gapi::onnx::ep::CUDA &cuda_ep) {
+     OrtCUDAProviderOptions options{};
+     options.device_id = cuda_ep.device_id;
+
+     try {
+        session_options->AppendExecutionProvider_CUDA(options);
+     } catch (const std::exception &e) {
+         std::stringstream ss;
+         ss << "ONNX Backend: Failed to enable CUDA"
+            << " Execution Provider: " << e.what();
+         cv::util::throw_error(std::runtime_error(ss.str()));
+     }
+}
+
+static void addTensorRTExecutionProvider(Ort::SessionOptions *session_options,
+                                         const cv::gapi::onnx::ep::TensorRT &trt_ep) {
+     OrtTensorRTProviderOptions options{};
+     options.device_id = trt_ep.device_id;
+
+     try {
+        session_options->AppendExecutionProvider_TensorRT(options);
+     } catch (const std::exception &e) {
+         std::stringstream ss;
+         ss << "ONNX Backend: Failed to enable TensorRT"
+            << " Execution Provider: " << e.what();
+         cv::util::throw_error(std::runtime_error(ss.str()));
+     }
+}
+
 static void addOpenVINOExecutionProvider(Ort::SessionOptions *session_options,
                                          const cv::gapi::onnx::ep::OpenVINO &ov_ep) {
-     OrtOpenVINOProviderOptions options;
+     OrtOpenVINOProviderOptions options{};
      options.device_type = ov_ep.device_type.c_str();
      options.cache_dir = ov_ep.cache_dir.c_str();
      options.num_of_threads = ov_ep.num_of_threads;
@@ -181,6 +211,18 @@ static void addExecutionProvider(Ort::SessionOptions          *session_options,
             addDMLExecutionProvider(session_options, dml_ep);
             break;
         }
+        case ep::EP::index_of<ep::CUDA>(): {
+            GAPI_LOG_INFO(NULL, "CUDA Execution Provider is added.");
+            const auto &cuda_ep = cv::util::get<ep::CUDA>(execution_provider);
+            addCUDAExecutionProvider(session_options, cuda_ep);
+            break;
+        }
+        case ep::EP::index_of<ep::TensorRT>(): {
+            GAPI_LOG_INFO(NULL, "TensorRT Execution Provider is added.");
+            const auto &trt_ep = cv::util::get<ep::TensorRT>(execution_provider);
+            addTensorRTExecutionProvider(session_options, trt_ep);
+            break;
+        }
         default:
             GAPI_LOG_INFO(NULL, "CPU Execution Provider is added.");
             break;

From 195aad8e6aadea02ba4c1daf4df301f34aeeb4a3 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Wed, 2 Aug 2023 14:28:47 +0300
Subject: [PATCH 07/12] Merge pull request #24069 from
 dkurt:openvino_detection_layer

DetectionOutput layer on OpenVINO without limitations #24069

### Pull Request Readiness Checklist

required for https://github.com/opencv/opencv/pull/23987

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 .../dnn/src/layers/detection_output_layer.cpp | 29 ++++++++++++++++---
 modules/dnn/test/test_caffe_importer.cpp      | 29 ++++++++++++++-----
 modules/dnn/test/test_model.cpp               | 11 ++++---
 modules/dnn/test/test_tf_importer.cpp         |  5 ++++
 4 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
index 61d4f44432..26f483a770 100644
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -221,7 +221,7 @@ public:
     {
         return backendId == DNN_BACKEND_OPENCV ||
                (backendId == DNN_BACKEND_CUDA && !_groupByClasses) ||
-               (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && !_locPredTransposed && _bboxesNormalized);
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH;
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -1006,9 +1006,30 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         CV_Assert(nodes.size() == 3);
-        auto& box_logits  = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        auto& class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-        auto& proposals   = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
+        auto box_logits  = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto class_preds = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+        auto proposals   = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
+
+        if (_locPredTransposed) {
+            // Convert box predictions from yxYX to xyXY
+            box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, -1, 2}),
+                true
+            );
+            int axis = 2;
+            box_logits = std::make_shared<ngraph::op::v1::Reverse>(box_logits,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis),
+                ngraph::op::v1::Reverse::Mode::INDEX
+            );
+        }
+
+        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{2}, std::vector<int32_t>{0, -1});
+        box_logits = std::make_shared<ngraph::op::v1::Reshape>(box_logits, shape, true);
+        class_preds = std::make_shared<ngraph::op::v1::Reshape>(class_preds, shape, true);
+        proposals = std::make_shared<ngraph::op::v1::Reshape>(proposals,
+            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{3}, std::vector<int32_t>{0, _varianceEncodedInTarget ? 1 : 2, -1}),
+            true
+        );
 
         ngraph::op::DetectionOutputAttrs attrs;
         attrs.num_classes                = _numClasses;
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 809b959a21..003592a68a 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -731,7 +731,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
-    double scoreDiff = 0.0;
+    double scoreDiff = 0.0, iouDiff = 0.0;
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
     // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
     // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
@@ -741,11 +741,20 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
     if (target == DNN_TARGET_OPENCL_FP16)
         scoreDiff = 0.02;
 #endif
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        iouDiff = 0.02;
+        if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
+            scoreDiff = 0.04;
+            iouDiff = 0.06;
+        }
+    }
+#endif
 
     static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
                                            0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
                                            0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
-    testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff);
+    testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
 }
 
 TEST_P(Test_Caffe_nets, FasterRCNN_zf)
@@ -766,9 +775,6 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
         );
 #endif
 
-    if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
-         backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16)
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
     if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
          backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD);
@@ -779,7 +785,14 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
     static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
                                            0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
                                            0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
-    testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
+
+    double scoreDiff = 0.0, iouDiff = 0.0;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        scoreDiff = 0.02;
+        iouDiff = 0.13;
+    }
+
+    testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref, scoreDiff, iouDiff);
 }
 
 TEST_P(Test_Caffe_nets, RFCN)
@@ -802,8 +815,8 @@ TEST_P(Test_Caffe_nets, RFCN)
         iouDiff = 0.12;
     }
 
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
+#if defined(INF_ENGINE_RELEASE)
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
     {
         scoreDiff = 0.1f;
         iouDiff = 0.2f;
diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp
index bd03551ab8..a19923bf28 100644
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@@ -447,14 +447,17 @@ TEST_P(Test_Model, DetectionOutput)
     {
         if (backend == DNN_BACKEND_OPENCV)
             scoreDiff = 4e-3;
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
-        else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-            scoreDiff = 4e-2;
-#endif
         else
             scoreDiff = 2e-2;
         iouDiff = 1.8e-1;
     }
+#if defined(INF_ENGINE_RELEASE)
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+        {
+            scoreDiff = 0.05;
+            iouDiff = 0.08;
+        }
+#endif
 
     testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes,
                     scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index e2dfbc706e..42d98baced 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -1816,6 +1816,11 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
 
     double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.2 : 2e-5;
     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+    {
+        scoreDiff = std::max(scoreDiff, 0.02);
+        iouDiff = std::max(iouDiff, 0.009);
+    }
     normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
 
     // Output size of masks is NxCxHxW where

From 0245c0cd10387f2de7c7bd076585a285d977e53c Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Wed, 2 Aug 2023 14:39:11 +0300
Subject: [PATCH 08/12] Merge pull request #24072 from dkurt:openvino_cpu_tests

Remove legacy nGraph logic #24072

### Pull Request Readiness Checklist

TODO:
- [x] Test with OpenVINO 2021.4 (tested locally)

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/src/ie_ngraph.cpp                 | 101 +-----------------
 modules/dnn/src/ie_ngraph.hpp                 |   8 --
 modules/dnn/src/net_openvino.cpp              |   1 -
 modules/dnn/test/test_darknet_importer.cpp    |   2 +-
 modules/dnn/test/test_ie_models.cpp           |   4 +-
 ...conformance_layer_filter__openvino.inl.hpp |   4 +-
 6 files changed, 5 insertions(+), 115 deletions(-)

diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index a49976de74..140d4b0d2f 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -446,66 +446,6 @@ void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
     requestedOutputs.insert({name, node.get()});
 }
 
-void InfEngineNgraphNet::setNodePtr(std::shared_ptr<ngraph::Node>* ptr) {
-    all_nodes.emplace((*ptr)->get_friendly_name(), ptr);
-}
-
- void InfEngineNgraphNet::release()
- {
-     // FIXIT release should not be conditional, release ALL
-     for (auto& node : components.back()) {
-#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
-         if (!(ngraph::op::is_parameter(node) || ngraph::op::is_output(node) || ngraph::op::is_constant(node)) ) {
-#else
-         if (!(node->is_parameter() || node->is_output() || node->is_constant()) ) {
-#endif
-             auto it = all_nodes.find(node->get_friendly_name());
-             if (it != all_nodes.end()) {
-                 it->second->reset();
-                 all_nodes.erase(it);
-             }
-         }
-     }
- }
-
-void InfEngineNgraphNet::dfs(std::shared_ptr<ngraph::Node>& node,
-                             std::vector<std::shared_ptr<ngraph::Node>>& comp,
-                             std::unordered_map<std::string, bool>& used) {
-    used[node->get_friendly_name()] = true;
-    comp.push_back(node);
-    auto inputs = node->get_users();
-    for (size_t i = 0; i < node->get_input_size(); ++i) {
-        inputs.push_back(node->input_value(i).get_node()->shared_from_this());
-    }
-
-    for (auto& to : inputs) {
-        if (!used[to->get_friendly_name()]) {
-            dfs(to, comp, used);
-        }
-    }
-}
-
-int InfEngineNgraphNet::getNumComponents()
-{
-    if (!components.empty()) {
-        return components.size();
-    }
-    std::unordered_map<std::string, bool> used;
-    auto inputs = ngraph_function->get_ordered_ops();
-    for (auto& node : inputs) {
-        used.emplace(node->get_friendly_name(), false);
-    }
-
-    for (auto& node : inputs) {
-        if (!used[node->get_friendly_name()]) {
-            std::vector<std::shared_ptr<ngraph::Node>> current_comp;
-            dfs(node, current_comp, used);
-            components.push_back(current_comp);
-        }
-    }
-    return components.size();
-}
-
 void InfEngineNgraphNet::createNet(Target targetId) {
     if (!hasNetOwner)
     {
@@ -524,46 +464,7 @@ void InfEngineNgraphNet::createNet(Target targetId) {
         }
         CV_Assert_N(!inputs_vec.empty(), !outs.empty());
         ngraph_function = std::make_shared<ngraph::Function>(outs, inputs_vec);
-
-        int num_comp = getNumComponents();
-        CV_LOG_DEBUG(NULL, "DNN/IE: number of subgraphs: " << num_comp);
-        if (num_comp > 1) {
-            for (int i = num_comp - 1; i >= 0; --i) {
-                ngraph::ResultVector outputs;
-                ngraph::ParameterVector inps;
-                for (auto& node : components.back()) {
-#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
-                    if (ngraph::op::is_parameter(node)) {
-#else
-                    if (node->is_parameter()) {
-#endif
-                        CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +input[" << inps.size() << "] = '" << node->get_friendly_name() << "'");
-                        auto parameter = std::dynamic_pointer_cast<ngraph::op::Parameter>(node);
-                        inps.push_back(parameter);
-                    }
-#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
-                    else if (ngraph::op::is_output(node)) {
-#else
-                    else if (node->is_output()) {
-#endif
-                        CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << "]: +output[" << outputs.size() << "] = '" << node->get_friendly_name() << "'");
-                        auto result = std::dynamic_pointer_cast<ngraph::op::Result>(node);
-                        outputs.push_back(result);
-                    }
-                }
-                CV_LOG_DEBUG(NULL, "DNN/IE: subgraph[" << i << ": nodes=" << components.back().size() << " inputs=" << inps.size() << " outputs=" << outputs.size());
-                isInit = false;
-                CV_Assert_N(!inps.empty(), !outputs.empty());
-                ngraph_function = std::make_shared<ngraph::Function>(outputs, inps);
-                release();
-                components.pop_back();
-                init(targetId);
-            }
-        } else {
-            release();
-            components.clear();
-            init(targetId);
-        }
+        init(targetId);
     }
 }
 
diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp
index 09afc7f117..7bb0ac09df 100644
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@@ -50,22 +50,14 @@ public:
     void addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs);
 
     void createNet(Target targetId);
-    void setNodePtr(std::shared_ptr<ngraph::Node>* ptr);
 
     void reset();
 
 //private:
     detail::NetImplBase& netImpl_;
 
-    void release();
-    int getNumComponents();
-    void dfs(std::shared_ptr<ngraph::Node>& node, std::vector<std::shared_ptr<ngraph::Node>>& comp,
-             std::unordered_map<std::string, bool>& used);
-
     ngraph::ParameterVector inputs_vec;
     std::shared_ptr<ngraph::Function> ngraph_function;
-    std::vector<std::vector<std::shared_ptr<ngraph::Node>>> components;
-    std::unordered_map<std::string, std::shared_ptr<ngraph::Node>* > all_nodes;
 
     InferenceEngine::ExecutableNetwork netExec;
 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp
index 5704cb9b64..e147faa9f3 100644
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@@ -549,7 +549,6 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
                 break;
             }
         }
-        ieNode->net->setNodePtr(&ieNode->node);
 
         net->addBlobs(ld.inputBlobsWrappers);
         net->addBlobs(ld.outputBlobsWrappers);
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index 2d61426769..fc4f02e1ad 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -1046,7 +1046,7 @@ TEST_P(Test_Darknet_layers, region)
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2022010000)
     // accuracy on CPU, OpenCL
     // Expected: (normL1) <= (l1), actual: 0.000358148 vs 1e-05
     //   |ref| = 1.207319974899292
diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp
index 135caa9064..c6667c7ad2 100644
--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@@ -465,8 +465,8 @@ TEST_P(DNNTestHighLevelAPI, predict)
     const std::string modelPath = getOpenVINOModel(modelName, isFP16);
     ASSERT_FALSE(modelPath.empty()) << modelName;
 
-    std::string xmlPath = findDataFile(modelPath + ".xml");
-    std::string binPath = findDataFile(modelPath + ".bin");
+    std::string xmlPath = findDataFile(modelPath + ".xml", false);
+    std::string binPath = findDataFile(modelPath + ".bin", false);
 
     Model model(xmlPath, binPath);
     Mat frame = imread(findDataFile("dnn/googlenet_1.png"));
diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
index e6a35dfab9..339746f5f2 100644
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@@ -579,9 +579,7 @@ CASE(test_dropout_default_mask_ratio)
 CASE(test_dropout_default_old)
     // no filter
 CASE(test_dropout_default_ratio)
-#if SKIP_SET_1
-    SKIP;
-#endif
+    // no filter
 CASE(test_dropout_random_old)
     // no filter
 CASE(test_dynamicquantizelinear)

From 96f23e3da1d304a6582514542843b5a3ef740a1a Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Thu, 3 Aug 2023 09:13:42 +0300
Subject: [PATCH 09/12] Merge pull request #24080 from dkurt:dnn_cuda_layers

Resolve uncovered CUDA dnn layer #24080

### Pull Request Readiness Checklist

* Gelu activation layer on CUDA
* Try to relax GEMM from ONNX

resolves https://github.com/opencv/opencv/issues/24064

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/perf/perf_layer.cpp               | 65 +++++++++++++++++++
 modules/dnn/src/cuda/activations.cu           |  7 ++
 modules/dnn/src/cuda/functors.hpp             | 15 +++++
 .../dnn/src/cuda4dnn/kernels/activations.hpp  |  3 +
 .../src/cuda4dnn/primitives/activation.hpp    | 14 ++++
 modules/dnn/src/layers/elementwise_layers.cpp |  9 ++-
 .../dnn/src/layers/fully_connected_layer.cpp  |  4 +-
 modules/dnn/src/onnx/onnx_importer.cpp        |  9 ++-
 modules/dnn/test/test_onnx_importer.cpp       |  2 +-
 9 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp
index d5a9bb34af..261bc5c3ca 100644
--- a/modules/dnn/perf/perf_layer.cpp
+++ b/modules/dnn/perf/perf_layer.cpp
@@ -643,4 +643,69 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_ScatterND, testing::Values(std::make_tuple(D
 INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNorm, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
 INSTANTIATE_TEST_CASE_P(/**/, Layer_LayerNormExpanded, testing::Values(std::make_tuple(DNN_BACKEND_OPENCV, DNN_TARGET_CPU)));
 
+
+typedef TestBaseWithParam<tuple<Vec4i, int, bool, tuple<Backend, Target> > > Layer_FullyConnected;
+PERF_TEST_P_(Layer_FullyConnected, fc)
+{
+    std::vector<int> inpShape;
+    inpShape.reserve(4);
+    for (int i = 0; i < 4; ++i) {
+        int dim = get<0>(GetParam())[i];
+        if (dim == 0)
+            break;
+        inpShape.push_back(dim);
+    }
+    Mat input(inpShape, CV_32F);
+    randn(input, 0, 1);
+
+    int axis = input.dims - 1;
+    int outDims = get<1>(GetParam());
+    bool isMatMul = get<2>(GetParam());
+    int backendId = get<0>(get<3>(GetParam()));
+    int targetId = get<1>(get<3>(GetParam()));
+
+    std::vector<int> weightShape;
+    if (isMatMul) {
+        weightShape = inpShape;
+        weightShape[weightShape.size() - 2] = outDims;
+    } else {
+        weightShape = {outDims, (int)input.total(axis, input.dims)};
+    }
+    Mat weights(weightShape, CV_32F);
+    randn(weights, 0, 1);
+
+    LayerParams lp;
+    lp.set("axis", input.dims - 1);
+    lp.set("is_matmul", weights.dims > 2);
+    lp.set("bias_term", false);
+    lp.set("transB", true);
+    lp.set("num_output", (int)weights.total(0, weights.dims - 1));
+    lp.blobs.resize(1, weights);
+
+    Net net;
+    net.addLayerToPrev("matmul", "InnerProduct", lp);
+
+    net.setInput(input);
+    net.setPreferableBackend(backendId);
+    net.setPreferableTarget(targetId);
+
+    // warmup
+    Mat output = net.forward();
+
+    TEST_CYCLE()
+    {
+        net.forward();
+    }
+    SANITY_CHECK_NOTHING();
+}
+INSTANTIATE_TEST_CASE_P(/**/, Layer_FullyConnected, Combine(
+    Values(                // input size
+        Vec4i(5, 512, 384),
+        Vec4i(5, 16, 512, 128)
+    ),
+    Values(256, 512, 1024),  // output dimension
+    testing::Bool(),         // is_matmul
+    dnnBackendsAndTargets()
+));
+
 } // namespace
diff --git a/modules/dnn/src/cuda/activations.cu b/modules/dnn/src/cuda/activations.cu
index e12457a164..e983c95a91 100644
--- a/modules/dnn/src/cuda/activations.cu
+++ b/modules/dnn/src/cuda/activations.cu
@@ -248,6 +248,11 @@ void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma)
     generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
 }
 
+template <class T>
+void gelu(const Stream& stream, Span<T> output, View<T> input) {
+    generic_op<T, GeluFunctor<T>>(stream, output, input);
+}
+
 template <class T>
 void sign(const Stream& stream, Span<T> output, View<T> input) {
     generic_op<T, SignFunctor<T>>(stream, output, input);
@@ -324,6 +329,7 @@ template void tan<__half>(const Stream&, Span<__half>, View<__half>);
 template void celu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void hardsigmoid<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
 template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
+template void gelu<__half>(const Stream&, Span<__half>, View<__half>);
 template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
 template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
@@ -366,6 +372,7 @@ template void tan<float>(const Stream&, Span<float>, View<float>);
 template void celu<float>(const Stream&, Span<float>, View<float>, float);
 template void hardsigmoid<float>(const Stream&, Span<float>, View<float>, float, float);
 template void selu<float>(const Stream&, Span<float>, View<float>, float, float);
+template void gelu<float>(const Stream&, Span<float>, View<float>);
 template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
 template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
 template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
diff --git a/modules/dnn/src/cuda/functors.hpp b/modules/dnn/src/cuda/functors.hpp
index 83a949f8e7..3e487cd98a 100644
--- a/modules/dnn/src/cuda/functors.hpp
+++ b/modules/dnn/src/cuda/functors.hpp
@@ -588,6 +588,21 @@ struct SeluFunctor {
     T alpha, gamma;
 };
 
+template <class T>
+struct GeluFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() { }
+    };
+
+    CUDA4DNN_DEVICE GeluFunctor() { }
+    CUDA4DNN_DEVICE GeluFunctor(const Params& params) { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        using csl::device::erf;
+        return static_cast<T>(0.5f) * value * (static_cast<T>(1.f) + erf(value * static_cast<T>(M_SQRT1_2)));
+    }
+};
+
 template <class T>
 struct ThresholdedReluFunctor {
     struct Params {
diff --git a/modules/dnn/src/cuda4dnn/kernels/activations.hpp b/modules/dnn/src/cuda4dnn/kernels/activations.hpp
index 6958b93d5e..fad549a083 100644
--- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp
@@ -114,6 +114,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
     template <class T>
     void selu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha, T gamma);
 
+    template <class T>
+    void gelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
+
     template <class T>
     void thresholdedrelu(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T alpha);
 
diff --git a/modules/dnn/src/cuda4dnn/primitives/activation.hpp b/modules/dnn/src/cuda4dnn/primitives/activation.hpp
index 564202e8c0..c10f9014a5 100644
--- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp
@@ -537,6 +537,20 @@ namespace cv { namespace dnn { namespace cuda4dnn {
         const T alpha, gamma;
     };
 
+    template <class T>
+    class GeluOp final : public BaseOp<GeluOp, T> {
+    public:
+        GeluOp(csl::Stream stream_) : stream(std::move(stream_)) { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::gelu<T>(stream, output, input);
+        }
+
+    private:
+        csl::Stream stream;
+    };
+
     template <class T>
     class ThresholdedReluOp final : public BaseOp<ThresholdedReluOp, T> {
     public:
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index 2a34b9400b..3bcd53f95c 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -821,7 +821,7 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
 
     bool supportBackend(int backendId, int)
     {
-        return backendId == DNN_BACKEND_OPENCV;
+        return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA;
     }
 
     inline float calculate(float x) const
@@ -829,6 +829,13 @@ struct GeluFunctor : public BaseDefaultFunctor<GeluFunctor>
         return 0.5f * x * (1.0f + erf(x * M_SQRT1_2));
     }
 
+#ifdef HAVE_CUDA
+    Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
+    {
+        return make_cuda_node<cuda4dnn::GeluOp>(target, stream);
+    }
+#endif
+
     int64 getFLOPSPerElement() const { return 100; }
 };
 
diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index e0fdac1039..1348b162d2 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -630,8 +630,10 @@ public:
 
             if(input_wrapper->getRank() == inp2Dim)
                 return make_cuda_node<cuda4dnn::MatMulOp>(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB);
-            else
+            else {
+                CV_LOG_INFO(NULL, "DNN/CUDA: no implementation for MatMul with rank " << input_wrapper->getRank());
                 return Ptr<BackendNode>();
+            }
         }
 
         auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank());
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 5cd22057ad..68769712ac 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -1965,9 +1965,11 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
     }
 
     int transB = layerParams.get<int>("transB", 0);
+    int secondInpDims;
     if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
     {
         Mat weights = getBlob(node_proto, 1);
+        secondInpDims = weights.dims;
 
         if (transA == 0) // optimized barnch, for now, we can only optimize the Gemm when transA = 0.
         {
@@ -1993,7 +1995,10 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
         }
     }
     else
+    {
         layerParams.set("transB", transB == 1);
+        secondInpDims = outShapes[node_proto.input(1)].size();
+    }
 
     if (node_proto.input_size() == 3)
     {
@@ -2002,7 +2007,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr
     }
 
     layerParams.set("bias_term", node_proto.input_size() == 3);
-    layerParams.set("is_matmul", true);
+    layerParams.set("is_matmul", secondInpDims > 2);
     addLayer(layerParams, node_proto);
 }
 
@@ -2045,7 +2050,7 @@ void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::Node
         layerParams.blobs.push_back(transBlob);
         int numOutput = layerParams.blobs[0].total(0, secondInpDims - 1);
         layerParams.set("num_output", numOutput);
-        layerParams.set("is_matmul", true);
+        layerParams.set("is_matmul", secondInpDims > 2);
     } else
         secondInpDims = outShapes[node_proto.input(1)].size();
 
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 49908e7ff1..6f5aabdcd1 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -102,7 +102,7 @@ public:
             netSoftmax.setInput(ref);
             ref = netSoftmax.forward();
         }
-        normAssert(ref, out, "", l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
+        normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
         if (checkNoFallbacks)
             expectNoFallbacksFromIE(net);
     }

From 4b8aeb1129f2ad70a2a5e4e7b59dcbad6b2e0129 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Fri, 4 Aug 2023 11:28:51 +0300
Subject: [PATCH 10/12] Merge pull request #24039 from
 dkurt:tflite_test_backends

TFLite models on different backends (tests and improvements) #24039

### Pull Request Readiness Checklist

* MaxUnpooling with OpenVINO
* Fully connected with transposed inputs/weights with OpenVINO
* Enable backends tests for TFLite (related to https://github.com/opencv/opencv/issues/23992#issuecomment-1640691722)
* Increase existing tests thresholds

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 .../dnn/src/layers/fully_connected_layer.cpp  |  26 +++--
 .../dnn/src/layers/max_unpooling_layer.cpp    |  46 ++++++++
 modules/dnn/src/layers/pooling_layer.cpp      |  16 ++-
 modules/dnn/src/layers/resize_layer.cpp       |  10 +-
 modules/dnn/src/net_openvino.cpp              |   5 +-
 modules/dnn/test/test_caffe_importer.cpp      |   9 +-
 modules/dnn/test/test_darknet_importer.cpp    |  18 ++-
 modules/dnn/test/test_halide_layers.cpp       |  24 ++--
 modules/dnn/test/test_layers.cpp              |   8 +-
 modules/dnn/test/test_onnx_importer.cpp       |   7 +-
 modules/dnn/test/test_tf_importer.cpp         |   4 +-
 modules/dnn/test/test_tflite_importer.cpp     | 105 +++++++++++++++---
 12 files changed, 211 insertions(+), 67 deletions(-)

diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index 1348b162d2..9cdb31023c 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -180,15 +180,12 @@ public:
     virtual bool supportBackend(int backendId) CV_OVERRIDE
     {
         bool tranAorB = transA || transB;
-#ifdef HAVE_INF_ENGINE
-        if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
-            return axis == 1 && !tranAorB;
-#endif
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) ||
                (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) ||
                backendId == DNN_BACKEND_CANN ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
                (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !tranAorB);
     }
 
@@ -802,17 +799,26 @@ public:
         if (nodes.size() == 2)
         {
             auto& inp2 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
-            matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, false, false);
+            matmul = std::make_shared<ngraph::op::MatMul>(ieInpNode, inp2, transA, transB);
         }
         else
         {
-            std::vector<int64_t> data = {(int64_t)ieInpNode->get_shape()[0], (int64_t)blobs[0].size[1]};
-            auto new_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, data.data());
-            auto inp = std::make_shared<ngraph::op::v1::Reshape>(ieInpNode, new_shape, true);
+            std::vector<int> shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0);
+            shape[shape.size() - 1] = -1;
+            auto inp = std::make_shared<ngraph::op::v1::Reshape>(
+                ieInpNode,
+                std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{shape.size()}, shape.data()),
+                true
+            );
 
-            std::vector<size_t> weight_shape{(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
+            std::vector<size_t> weight_shape;
+            if (isMatMul) {
+                weight_shape = getShape<size_t>(oriMat);
+            } else {
+                weight_shape = {(size_t)blobs[0].size[0], (size_t)blobs[0].size[1]};
+            }
             auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, weight_shape, blobs[0].data);
-            matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, false, true);
+            matmul = std::make_shared<ngraph::op::MatMul>(inp, ieWeights, transA, transB);
         }
 
         if (bias) {
diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp
index fd47c4c919..6a599408e1 100644
--- a/modules/dnn/src/layers/max_unpooling_layer.cpp
+++ b/modules/dnn/src/layers/max_unpooling_layer.cpp
@@ -13,6 +13,7 @@ Implementation of Batch Normalization layer.
 #include "layers_common.hpp"
 #include "../op_cuda.hpp"
 #include "../op_halide.hpp"
+#include "../ie_ngraph.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/core/utils/logger.hpp>
 
@@ -41,6 +42,7 @@ public:
     {
         return backendId == DNN_BACKEND_OPENCV ||
                backendId == DNN_BACKEND_CUDA ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH ||
                (backendId == DNN_BACKEND_HALIDE && haveHalide() && !poolPad.width && !poolPad.height);
     }
 
@@ -181,6 +183,50 @@ public:
 #endif  // HAVE_HALIDE
         return Ptr<BackendNode>();
     }
+
+#ifdef HAVE_DNN_NGRAPH
+    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
+                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
+    {
+        auto features = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto indices = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
+
+        std::vector<MatShape> inpShapes(nodes.size());
+        std::vector<MatShape> outShapes, internals;
+        for (int i = 0; i < nodes.size(); ++i) {
+            std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
+            inpShapes[i] = std::vector<int>(shape.begin(), shape.end());
+        }
+        getMemoryShapes(inpShapes, 1, outShapes, internals);
+
+        Mat zeros = Mat::zeros(1, total(outShapes[0]), CV_32F);
+        auto zeroInp = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{zeros.total()}, zeros.data);
+
+        int newShape = -1;
+        features = std::make_shared<ngraph::op::v1::Reshape>(
+            features,
+            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
+            true
+        );
+        indices = std::make_shared<ngraph::op::v1::Reshape>(
+            indices,
+            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
+            true
+        );
+        if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) {
+            indices = std::make_shared<ngraph::op::Convert>(indices, ngraph::element::i64);
+        }
+
+        int axis = 0;
+        std::shared_ptr<ngraph::Node> unpool = std::make_shared<ngraph::op::ScatterElementsUpdate>(zeroInp, indices, features,
+            std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &axis));
+
+        auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{outShapes[0].size()}, outShapes[0].data());
+        unpool = std::make_shared<ngraph::op::v1::Reshape>(unpool, shape, true);
+
+        return Ptr<BackendNode>(new InfEngineNgraphNode(unpool));
+    }
+#endif  // HAVE_DNN_NGRAPH
 };
 
 Ptr<MaxUnpoolLayer> MaxUnpoolLayer::create(const LayerParams& params)
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index c58405507e..5caaa36ba0 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -209,7 +209,7 @@ public:
 #ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
-            return !computeMaxIdx && type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
+            return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
         }
 #endif
         if (backendId == DNN_BACKEND_OPENCV)
@@ -613,9 +613,17 @@ public:
             return Ptr<BackendNode>(new InfEngineNgraphNode(reduce_sum));
         }
         else if (type == MAX) {
-            auto max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
-                            ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
-                            rounding_type, pad_type);
+            std::shared_ptr<ngraph::Node> max_pool;
+            if (computeMaxIdx) {
+                std::vector<size_t> dilations(kernel_size.size(), 1);
+                max_pool = std::make_shared<ngraph::op::v8::MaxPool>(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations),
+                                ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
+                                rounding_type, pad_type);
+            } else {
+                max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
+                                ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
+                                rounding_type, pad_type);
+            }
             return Ptr<BackendNode>(new InfEngineNgraphNode(max_pool));
         }
         else if (type == ROI) {
diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp
index 02ac29de8d..607adb8aa1 100644
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@ -410,7 +410,10 @@ public:
         }
         attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES;
 
-        if (alignCorners) {
+        CV_Assert(!halfPixelCenters || !alignCorners);
+        if (halfPixelCenters) {
+            attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL;
+        } else if (alignCorners) {
             attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS;
         }
 
@@ -427,7 +430,10 @@ public:
         }
         attrs.shape_calculation_mode = ngraph::op::v4::Interpolate::ShapeCalcMode::sizes;
 
-        if (alignCorners) {
+        CV_Assert(!halfPixelCenters || !alignCorners);
+        if (halfPixelCenters) {
+            attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel;
+        } else if (alignCorners) {
             attrs.coordinate_transformation_mode = ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners;
         }
 
diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp
index e147faa9f3..e974ce34a3 100644
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@@ -476,13 +476,14 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
             {
                 int lid = ld.inputBlobsId[i].lid;
                 int oid = ld.inputBlobsId[i].oid;
-                if (oid == 0 || lid == 0)
-                    continue;
 
                 auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
                 const auto& ngraph_input_node = ieInpNode->node;
                 CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
 
+                if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0)
+                    continue;
+
                 // Handle parameters from other subnets. Output port is not used in this case
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
                 if ((ngraph::op::is_parameter(ngraph_input_node) || ngraph::op::is_constant(ngraph_input_node)) &&
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 003592a68a..708e353aac 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -732,16 +732,9 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
 #endif
 
     double scoreDiff = 0.0, iouDiff = 0.0;
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2022010000)
-    // Check 'backward_compatible_check || in_out_elements_equal' failed at core/src/op/reshape.cpp:427:
-    // While validating node 'v1::Reshape bbox_pred_reshape (bbox_pred[0]:f32{1,84}, Constant_265242[0]:i64{4}) -> (f32{?,?,?,?})' with friendly_name 'bbox_pred_reshape':
-    // Requested output shape {1,6300,4,1} is incompatible with input shape {1, 84}
+#if defined(INF_ENGINE_RELEASE)
     if (target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
-    if (target == DNN_TARGET_OPENCL_FP16)
-        scoreDiff = 0.02;
-#endif
-#if defined(INF_ENGINE_RELEASE)
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
         iouDiff = 0.02;
         if (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16) {
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
index fc4f02e1ad..2c734b4492 100644
--- a/modules/dnn/test/test_darknet_importer.cpp
+++ b/modules/dnn/test/test_darknet_importer.cpp
@@ -102,11 +102,14 @@ TEST(Test_Darknet, read_yolo_voc_stream)
 class Test_Darknet_layers : public DNNTestLayer
 {
 public:
-    void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true)
+    void testDarknetLayer(const std::string& name, bool hasWeights = false, bool testBatchProcessing = true,
+                          double l1 = 0.0, double lInf = 0.0)
     {
         SCOPED_TRACE(name);
         Mat inp = blobFromNPY(findDataFile("dnn/darknet/" + name + "_in.npy"));
         Mat ref = blobFromNPY(findDataFile("dnn/darknet/" + name + "_out.npy"));
+        l1 = l1 ? l1 : default_l1;
+        lInf = lInf ? lInf : default_lInf;
 
         std::string cfg = findDataFile("dnn/darknet/" + name + ".cfg");
         std::string model = "";
@@ -120,7 +123,7 @@ public:
         net.setPreferableTarget(target);
         net.setInput(inp);
         Mat out = net.forward();
-        normAssert(out, ref, "", default_l1, default_lInf);
+        normAssert(out, ref, "", l1, lInf);
 
         if (inp.size[0] == 1 && testBatchProcessing)  // test handling of batch size
         {
@@ -166,8 +169,8 @@ public:
             }*/
             ASSERT_EQ(out2.dims, ref2.dims) << ref.dims;
 
-            normAssert(out2(ranges0), ref2, "", default_l1, default_lInf);
-            normAssert(out2(ranges1), ref2, "", default_l1, default_lInf);
+            normAssert(out2(ranges0), ref2, "", l1, lInf);
+            normAssert(out2(ranges1), ref2, "", l1, lInf);
         }
     }
 };
@@ -1116,7 +1119,12 @@ TEST_P(Test_Darknet_layers, connected)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
-    testDarknetLayer("connected", true);
+    double l1 = 0.0;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
+    {
+        l1 = 3e-5;
+    }
+    testDarknetLayer("connected", true, true, l1);
 }
 
 TEST_P(Test_Darknet_layers, relu)
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
index 6a7958ecee..d8a16d3efa 100644
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -361,22 +361,9 @@ TEST_P(MaxPooling, Accuracy)
     Backend backendId = get<0>(get<5>(GetParam()));
     Target targetId = get<1>(get<5>(GetParam()));
 
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000)
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
-            && inSize == Size(7, 6) && kernel == Size(3, 2)
-            && (stride == Size(1, 1) || stride == Size(2, 2))
-            && (pad == Size(0, 1) || pad == Size(1, 1))
-    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
-#endif
-
-#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000)
-    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
-            && (kernel == Size(2, 2) || kernel == Size(3, 2))
-            && stride == Size(1, 1) && (pad == Size(0, 0) || pad == Size(0, 1))
-    )
-        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
-#endif
+    // https://github.com/openvinotoolkit/openvino/issues/18731
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && stride != Size(1, 1))
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
 
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000)
     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && targetId == DNN_TARGET_MYRIAD
@@ -467,6 +454,11 @@ TEST_P(FullyConnected, Accuracy)
     {
         l1 = 0.01;
     }
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL)
+    {
+        l1 = 5e-3;
+        lInf = 7e-3;
+    }
 #endif
     if (targetId == DNN_TARGET_CUDA_FP16)
         l1 = 0.015;
diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp
index 763d94b99c..c820283a94 100644
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -215,7 +215,13 @@ TEST_P(Test_Caffe_layers, InnerProduct)
     if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_CPU_FP16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_CPU_FP16);
 
-    testLayerUsingCaffeModels("layer_inner_product", true);
+    double l1 = 0.0, lInf = 0.0;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
+    {
+        l1 = 5e-3;
+        lInf = 2e-2;
+    }
+    testLayerUsingCaffeModels("layer_inner_product", true, true, l1, lInf);
 }
 
 TEST_P(Test_Caffe_layers, Pooling_max)
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp
index 6f5aabdcd1..d695b1c202 100644
--- a/modules/dnn/test/test_onnx_importer.cpp
+++ b/modules/dnn/test/test_onnx_importer.cpp
@@ -52,7 +52,7 @@ public:
     }
 
     void testONNXModels(const String& basename, const Extension ext = npy,
-                        const double l1 = 0, const float lInf = 0, const bool useSoftmax = false,
+                        double l1 = 0, double lInf = 0, const bool useSoftmax = false,
                         bool checkNoFallbacks = true, int numInps = 1)
     {
         String onnxmodel = _tf("models/" + basename + ".onnx", required);
@@ -102,6 +102,11 @@ public:
             netSoftmax.setInput(ref);
             ref = netSoftmax.forward();
         }
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
+        {
+            l1 = std::max(l1, 1.4e-3);
+            lInf = std::max(lInf, 8e-3);
+        }
         normAssert(ref, out, basename.c_str(), l1 ? l1 : default_l1, lInf ? lInf : default_lInf);
         if (checkNoFallbacks)
             expectNoFallbacksFromIE(net);
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index 42d98baced..274fa8cee0 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -1818,8 +1818,8 @@ TEST_P(Test_TensorFlow_nets, Mask_RCNN)
     double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 0.018 : default_lInf;
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
     {
-        scoreDiff = std::max(scoreDiff, 0.02);
-        iouDiff = std::max(iouDiff, 0.009);
+        scoreDiff = std::max(scoreDiff, 0.06);
+        iouDiff = std::max(iouDiff, 0.01);
     }
     normAssertDetections(refDetections, outDetections, "", /*threshold for zero confidence*/1e-5, scoreDiff, iouDiff);
 
diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp
index c5bee0c086..19b3f3a94a 100644
--- a/modules/dnn/test/test_tflite_importer.cpp
+++ b/modules/dnn/test/test_tflite_importer.cpp
@@ -20,6 +20,14 @@ namespace opencv_test { namespace {
 using namespace cv;
 using namespace cv::dnn;
 
+class Test_TFLite : public DNNTestLayer {
+public:
+    void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
+    void testModel(const std::string& modelName, const Mat& input, double l1 = 0, double lInf = 0);
+    void testModel(const std::string& modelName, const Size& inpSize, double l1 = 0, double lInf = 0);
+    void testLayer(const std::string& modelName, double l1 = 0, double lInf = 0);
+};
+
 void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
     std::vector<MatShape> inLayerShapes;
     std::vector<MatShape> outLayerShapes;
@@ -31,8 +39,14 @@ void testInputShapes(const Net& net, const std::vector<Mat>& inps) {
     }
 }
 
-void testModel(Net& net, const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testModel(Net& net, const std::string& modelName, const Mat& input, double l1, double lInf)
 {
+    l1 = l1 ? l1 : default_l1;
+    lInf = lInf ? lInf : default_lInf;
+
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
+
     testInputShapes(net, {input});
     net.setInput(input);
 
@@ -48,20 +62,20 @@ void testModel(Net& net, const std::string& modelName, const Mat& input, double
     }
 }
 
-void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testModel(const std::string& modelName, const Mat& input, double l1, double lInf)
 {
     Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false));
     testModel(net, modelName, input, l1, lInf);
 }
 
-void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testModel(const std::string& modelName, const Size& inpSize, double l1, double lInf)
 {
     Mat input = imread(findDataFile("cv/shared/lena.png"));
     input = blobFromImage(input, 1.0 / 255, inpSize, 0, true);
     testModel(modelName, input, l1, lInf);
 }
 
-void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-4)
+void Test_TFLite::testLayer(const std::string& modelName, double l1, double lInf)
 {
     Mat inp = blobFromNPY(findDataFile("dnn/tflite/" + modelName + "_inp.npy"));
     Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite"));
@@ -69,29 +83,66 @@ void testLayer(const std::string& modelName, double l1 = 1e-5, double lInf = 1e-
 }
 
 // https://google.github.io/mediapipe/solutions/face_mesh
-TEST(Test_TFLite, face_landmark)
+TEST_P(Test_TFLite, face_landmark)
 {
-    testModel("face_landmark", Size(192, 192), 2e-5, 2e-4);
+    if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16);
+    double l1 = 2e-5, lInf = 2e-4;
+    if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
+    {
+        l1 = 0.15;
+        lInf = 0.82;
+    }
+    testModel("face_landmark", Size(192, 192), l1, lInf);
 }
 
 // https://google.github.io/mediapipe/solutions/face_detection
-TEST(Test_TFLite, face_detection_short_range)
+TEST_P(Test_TFLite, face_detection_short_range)
 {
-    testModel("face_detection_short_range", Size(128, 128));
+    double l1 = 0, lInf = 2e-4;
+    if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
+    {
+        l1 = 0.04;
+        lInf = 0.8;
+    }
+    testModel("face_detection_short_range", Size(128, 128), l1, lInf);
 }
 
 // https://google.github.io/mediapipe/solutions/selfie_segmentation
-TEST(Test_TFLite, selfie_segmentation)
+TEST_P(Test_TFLite, selfie_segmentation)
 {
-    testModel("selfie_segmentation", Size(256, 256));
+    double l1 = 0, lInf = 0;
+    if (target == DNN_TARGET_CPU_FP16 || target == DNN_TARGET_CUDA_FP16 || target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD ||
+        (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL))
+    {
+        l1 = 0.01;
+        lInf = 0.48;
+    }
+    testModel("selfie_segmentation", Size(256, 256), l1, lInf);
 }
 
-TEST(Test_TFLite, max_unpooling)
+TEST_P(Test_TFLite, max_unpooling)
 {
+    if (backend == DNN_BACKEND_CUDA)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
+
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) {
+        if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+        if (target == DNN_TARGET_MYRIAD)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
+    }
+
+    if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
+
     // Due Max Unpoling is a numerically unstable operation and small difference between frameworks
     // might lead to positional difference of maximal elements in the tensor, this test checks
     // behavior of Max Unpooling layer only.
     Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false));
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
 
     Mat input = imread(findDataFile("cv/shared/lena.png"));
     cvtColor(input, input, COLOR_BGR2RGBA);
@@ -101,7 +152,15 @@ TEST(Test_TFLite, max_unpooling)
     net.setInput(input);
 
     std::vector<std::vector<Mat> > outs;
-    net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+        // TODO: seems like a bug with a retrieving intermediate tensors
+        net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
+        outs.erase(outs.begin());
+    }
+    else {
+        net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
+    }
+
     ASSERT_EQ(outs.size(), 4);
     ASSERT_EQ(outs[0].size(), 1);
     ASSERT_EQ(outs[1].size(), 2);
@@ -117,6 +176,8 @@ TEST(Test_TFLite, max_unpooling)
     ASSERT_EQ(poolOut.size, poolIds.size);
     ASSERT_EQ(poolOut.size, unpoolInp.size);
 
+    ASSERT_EQ(countNonZero(poolInp), poolInp.total());
+
     for (int c = 0; c < 32; ++c) {
         float *poolInpData = poolInp.ptr<float>(0, c);
         float *poolOutData = poolOut.ptr<float>(0, c);
@@ -135,15 +196,19 @@ TEST(Test_TFLite, max_unpooling)
                     }
                 }
                 EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg;
-                EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
+                if (backend != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
+                    EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg;
+                }
                 EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg;
             }
         }
     }
 }
 
-TEST(Test_TFLite, EfficientDet_int8) {
+TEST_P(Test_TFLite, EfficientDet_int8) {
     Net net = readNet(findDataFile("dnn/tflite/coco_efficientdet_lite0_v1_1.0_quant_2021_09_06.tflite", false));
+    net.setPreferableBackend(backend);
+    net.setPreferableTarget(target);
 
     Mat img = imread(findDataFile("dnn/dog416.png"));
     Mat blob = blobFromImage(img, 1.0, Size(320, 320));
@@ -158,10 +223,18 @@ TEST(Test_TFLite, EfficientDet_int8) {
     normAssertDetections(ref, out, "", 0.5, 0.05, 0.1);
 }
 
-TEST(Test_TFLite, replicate_by_pack) {
-    testLayer("replicate_by_pack");
+TEST_P(Test_TFLite, replicate_by_pack) {
+    double l1 = 0, lInf = 0;
+    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
+    {
+        l1 = 4e-4;
+        lInf = 2e-3;
+    }
+    testLayer("replicate_by_pack", l1, lInf);
 }
 
+INSTANTIATE_TEST_CASE_P(/**/, Test_TFLite, dnnBackendsAndTargets());
+
 }}  // namespace
 
 #endif  // OPENCV_TEST_DNN_TFLITE

From 2ff16d4c45b122bef659eee4b0ee2fa970b3bfae Mon Sep 17 00:00:00 2001
From: Laurent Berger <3591626+LaurentBerger@users.noreply.github.com>
Date: Fri, 4 Aug 2023 11:18:49 +0200
Subject: [PATCH 11/12] Merge pull request #24101 from LaurentBerger:I24076

Invalid memory access fix for ONNX split layer parser #24076 #24101

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work https://github.com/opencv/opencv/issues/24076
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/src/onnx/onnx_importer.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 68769712ac..24e8b3f913 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -1385,13 +1385,19 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP
         CV_Assert(constBlobs.find(node_proto.input(1)) != constBlobs.end());
         Mat splitsBlob = getBlob(node_proto, 1);
         int splitSize = splitsBlob.total();
-
-        std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
-        for (int i = 1; i < splitSize - 1; ++i)
+        if (splitSize == 1)
         {
-            slicePoints[i] = slicePoints[i - 1] + splitsBlob.at<int>(i);
+            layerParams.set("num_split", 1);
+        }
+        else
+        {
+            std::vector<int> slicePoints(splitSize - 1, splitsBlob.at<int>(0));
+            for (int i = 1; i < splitSize - 1; ++i)
+            {
+                slicePoints[i] = slicePoints[i - 1] + splitsBlob.at<int>(i);
+            }
+            layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
         }
-        layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
     }
     else
     {

From 87b7ce4415d3800f0f63801d9fad6ad71ae5f0b4 Mon Sep 17 00:00:00 2001
From: unknown <laurent.berger@univ-lemans.fr>
Date: Fri, 4 Aug 2023 21:57:22 +0200
Subject: [PATCH 12/12] Solved issue 24044

---
 modules/dnn/src/layers/reduce_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dnn/src/layers/reduce_layer.cpp b/modules/dnn/src/layers/reduce_layer.cpp
index b983a791c5..77d8898df4 100644
--- a/modules/dnn/src/layers/reduce_layer.cpp
+++ b/modules/dnn/src/layers/reduce_layer.cpp
@@ -425,7 +425,7 @@ public:
             dtype* p_dst = dst.ptr<dtype>();
 
             size_t main_index = start / last_unreduced_dim;
-            size_t loop = start / last_unreduced_dim;
+            size_t loop = start % last_unreduced_dim;
             size_t origin = unprojected_steps[main_index] + loop * last_unreduced_step;
             for (int i = start; i < end; ++i) {
                 Op accumulator(n_reduce, p_src[origin + projected_steps[0]]);