From 57da72d4449b4f0ea5950e82733375e60316d7c5 Mon Sep 17 00:00:00 2001
From: Sean McBride <sean@rogue-research.com>
Date: Fri, 2 Jun 2023 10:57:28 -0400
Subject: [PATCH 01/57] Fixed invalid cast and unaligned memory access

Although acceptible to Intel CPUs, it's still undefined behaviour according to the C++ standard.

It can be replaced with memcpy, which makes the code simpler, and it generates the same assembly code with gcc and clang with -O2 (verified with godbolt).

Also expanded the test to include other little endian CPUs by testing for __LITTLE_ENDIAN__.
---
 modules/core/src/persistence.cpp | 33 +++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp
index 0d64bab094..6a71c1ff03 100644
--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@@ -295,16 +295,20 @@ int decodeSimpleFormat( const char* dt )
 
 }
 
-#if defined __i386__ || defined(_M_IX86) || defined __x86_64__ || defined(_M_X64)
-#define CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS 1
+#if defined __i386__ || defined(_M_IX86) || defined __x86_64__ || defined(_M_X64) || \
+    (defined (__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__)
+#define CV_LITTLE_ENDIAN_MEM_ACCESS 1
 #else
-#define CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS 0
+#define CV_LITTLE_ENDIAN_MEM_ACCESS 0
 #endif
 
 static inline int readInt(const uchar* p)
 {
-#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS
-    return *(const int*)p;
+    // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does.
+#if CV_LITTLE_ENDIAN_MEM_ACCESS
+    int val;
+    memcpy(&val, p, sizeof(val));
+    return val;
 #else
     int val = (int)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
     return val;
@@ -313,8 +317,11 @@ static inline int readInt(const uchar* p)
 
 static inline double readReal(const uchar* p)
 {
-#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS
-    return *(const double*)p;
+    // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does.
+#if CV_LITTLE_ENDIAN_MEM_ACCESS
+    double val;
+    memcpy(&val, p, sizeof(val));
+    return val;
 #else
     unsigned val0 = (unsigned)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24));
     unsigned val1 = (unsigned)(p[4] | (p[5] << 8) | (p[6] << 16) | (p[7] << 24));
@@ -326,9 +333,9 @@ static inline double readReal(const uchar* p)
 
 static inline void writeInt(uchar* p, int ival)
 {
-#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS
-    int* ip = (int*)p;
-    *ip = ival;
+    // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does.
+#if CV_LITTLE_ENDIAN_MEM_ACCESS
+    memcpy(p, &ival, sizeof(ival));
 #else
     p[0] = (uchar)ival;
     p[1] = (uchar)(ival >> 8);
@@ -339,9 +346,9 @@ static inline void writeInt(uchar* p, int ival)
 
 static inline void writeReal(uchar* p, double fval)
 {
-#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS
-    double* fp = (double*)p;
-    *fp = fval;
+    // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does.
+#if CV_LITTLE_ENDIAN_MEM_ACCESS
+    memcpy(p, &fval, sizeof(fval));
 #else
     Cv64suf v;
     v.f = fval;

From d25d44156b67a290a769553b5889cd6636983cf5 Mon Sep 17 00:00:00 2001
From: Wang Kai <wongkai@hnu.edu.cn>
Date: Sun, 2 Jul 2023 15:33:52 +0800
Subject: [PATCH 02/57] removing unreachable codes in `gbackend`

---
 modules/gapi/src/api/gbackend.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/modules/gapi/src/api/gbackend.cpp b/modules/gapi/src/api/gbackend.cpp
index efbe17a305..46c8dc1640 100644
--- a/modules/gapi/src/api/gbackend.cpp
+++ b/modules/gapi/src/api/gbackend.cpp
@@ -36,7 +36,6 @@ cv::gapi::GBackend::Priv::compile(const ade::Graph&,
 {
     // ...and this method is here for the same reason!
     GAPI_Error("InternalError");
-    return {};
 }
 
 std::unique_ptr<cv::gimpl::GIslandExecutable>
@@ -224,7 +223,6 @@ void bindOutArg(Mag& mag, const RcDesc &rc, const GRunArgP &arg, HandleRMat hand
 
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
-        break;
     }
 }
 
@@ -256,7 +254,6 @@ void resetInternalData(Mag& mag, const Data &d)
 
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
-        break;
     }
 }
 
@@ -284,7 +281,6 @@ cv::GRunArg getArg(const Mag& mag, const RcDesc &ref)
                        mag.meta<cv::MediaFrame>().at(ref.id));
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
-        break;
     }
 }
 
@@ -327,7 +323,6 @@ cv::GRunArgP getObjPtr(Mag& mag, const RcDesc &rc, bool is_umat)
 
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
-        break;
     }
 }
 
@@ -359,7 +354,6 @@ void writeBack(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg)
 
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
-        break;
     }
 }
 

From 68968eda8dffb4ceedc46088c7740f04f585acfd Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Tue, 1 Aug 2023 18:56:20 +0900
Subject: [PATCH 03/57] videoio: doc: add odd width or height limitation for
 FFMPEG

---
 modules/videoio/include/opencv2/videoio.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp
index dbed243b56..eb2e803b3c 100644
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@@ -1036,6 +1036,9 @@ public:
     - Most codecs are lossy. If you want lossless video file you need to use a lossless codecs
       (eg. FFMPEG FFV1, Huffman HFYU, Lagarith LAGS, etc...)
     - If FFMPEG is enabled, using `codec=0; fps=0;` you can create an uncompressed (raw) video file.
+    - If FFMPEG is used, we allow frames of odd width or height, but in this case we truncate
+      the rightmost column/the bottom row. Probably, this should be handled more elegantly,
+      but some internal functions inside FFMPEG swscale require even width/height.
     */
     CV_WRAP VideoWriter(const String& filename, int fourcc, double fps,
                 Size frameSize, bool isColor = true);

From bea0c1b660ea94d707b5805fc7adaa764fcfdfd2 Mon Sep 17 00:00:00 2001
From: cudawarped <12133430+cudawarped@users.noreply.github.com>
Date: Tue, 1 Aug 2023 15:09:37 +0300
Subject: [PATCH 04/57] cuda: Fix GpuMat::copyTo and GpuMat::converTo python
 bindings

---
 modules/core/include/opencv2/core/cuda.hpp | 46 ++++++++++++---
 modules/python/test/test_cuda.py           | 68 ++++++++++++++++++++++
 2 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp
index 5dca06df98..9d210ed7b5 100644
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@@ -198,16 +198,32 @@ public:
     CV_WRAP GpuMat clone() const;
 
     //! copies the GpuMat content to device memory (Blocking call)
-    CV_WRAP void copyTo(OutputArray dst) const;
+    void copyTo(OutputArray dst) const;
+    //! bindings overload which copies the GpuMat content to device memory (Blocking call)
+    CV_WRAP void copyTo(CV_OUT GpuMat& dst) const {
+        copyTo(static_cast<OutputArray>(dst));
+    }
 
     //! copies the GpuMat content to device memory (Non-Blocking call)
-    CV_WRAP void copyTo(OutputArray dst, Stream& stream) const;
+    void copyTo(OutputArray dst, Stream& stream) const;
+    //! bindings overload which copies the GpuMat content to device memory (Non-Blocking call)
+    CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const {
+        copyTo(static_cast<OutputArray>(dst), stream);
+    }
 
     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
-    CV_WRAP void copyTo(OutputArray dst, InputArray mask) const;
+    void copyTo(OutputArray dst, InputArray mask) const;
+    //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
+    CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const {
+        copyTo(static_cast<OutputArray>(dst), static_cast<InputArray>(mask));
+    }
 
     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
-    CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
+    void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
+    //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
+    CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const {
+        copyTo(static_cast<OutputArray>(dst), static_cast<InputArray>(mask), stream);
+    }
 
     //! sets some of the GpuMat elements to s (Blocking call)
     CV_WRAP GpuMat& setTo(Scalar s);
@@ -222,19 +238,31 @@ public:
     CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
 
     //! converts GpuMat to another datatype (Blocking call)
-    CV_WRAP void convertTo(OutputArray dst, int rtype) const;
+    void convertTo(OutputArray dst, int rtype) const;
 
     //! converts GpuMat to another datatype (Non-Blocking call)
-    CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const;
+    void convertTo(OutputArray dst, int rtype, Stream& stream) const;
+    //! bindings overload which converts GpuMat to another datatype (Non-Blocking call)
+    CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const {
+        convertTo(static_cast<OutputArray>(dst), rtype, stream);
+    }
 
     //! converts GpuMat to another datatype with scaling (Blocking call)
-    CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
+    void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
+    //! bindings overload which converts GpuMat to another datatype with scaling(Blocking call)
+    CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha = 1.0, double beta = 0.0) const {
+        convertTo(static_cast<OutputArray>(dst), rtype, alpha, beta);
+    }
 
     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
-    CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
+    void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
 
     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
-    CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
+    void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
+    //! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call)
+    CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const {
+        convertTo(static_cast<OutputArray>(dst), rtype, alpha, beta, stream);
+    }
 
     CV_WRAP void assignTo(GpuMat& m, int type = -1) const;
 
diff --git a/modules/python/test/test_cuda.py b/modules/python/test/test_cuda.py
index 851a23e880..c886342832 100644
--- a/modules/python/test/test_cuda.py
+++ b/modules/python/test/test_cuda.py
@@ -70,6 +70,74 @@ class cuda_test(NewOpenCVTests):
         self.assertTrue(cuMat.step == 0)
         self.assertTrue(cuMat.size() == (0, 0))
 
+    def test_cuda_convertTo(self):
+        # setup
+        npMat_8UC4 = (np.random.random((128, 128, 4)) * 255).astype(np.uint8)
+        npMat_32FC4 = npMat_8UC4.astype(np.single)
+        new_type = cv.CV_32FC4
+
+        # sync
+        # in/out
+        cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4)
+        cuMat_32FC4 = cv.cuda_GpuMat(cuMat_8UC4.size(), new_type)
+        cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, cuMat_32FC4)
+        self.assertTrue(cuMat_32FC4.cudaPtr() == cuMat_32FC4_out.cudaPtr())
+        npMat_32FC4_out = cuMat_32FC4.download()
+        self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out))
+        # out
+        cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type)
+        npMat_32FC4_out = cuMat_32FC4.download()
+        self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out))
+
+        # async
+        stream = cv.cuda.Stream()
+        cuMat_32FC4 = cv.cuda_GpuMat(cuMat_8UC4.size(), new_type)
+        cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, cuMat_32FC4)
+        # in/out
+        cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, 1, 0, stream, cuMat_32FC4)
+        self.assertTrue(cuMat_32FC4.cudaPtr() == cuMat_32FC4_out.cudaPtr())
+        npMat_32FC4_out = cuMat_32FC4.download(stream)
+        stream.waitForCompletion()
+        self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out))
+        # out
+        cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, 1, 0, stream)
+        npMat_32FC4_out = cuMat_32FC4.download(stream)
+        stream.waitForCompletion()
+        self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out))
+
+    def test_cuda_copyTo(self):
+        # setup
+        npMat_8UC4 = (np.random.random((128, 128, 4)) * 255).astype(np.uint8)
+
+        # sync
+        # in/out
+        cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4)
+        cuMat_8UC4_dst = cv.cuda_GpuMat(cuMat_8UC4.size(), cuMat_8UC4.type())
+        cuMat_8UC4_out = cuMat_8UC4.copyTo(cuMat_8UC4_dst)
+        self.assertTrue(cuMat_8UC4_out.cudaPtr() == cuMat_8UC4_dst.cudaPtr())
+        npMat_8UC4_out = cuMat_8UC4_out.download()
+        self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out))
+        # out
+        cuMat_8UC4_out =  cuMat_8UC4.copyTo()
+        npMat_8UC4_out = cuMat_8UC4_out.download()
+        self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out))
+
+        # async
+        stream = cv.cuda.Stream()
+        # in/out
+        cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4)
+        cuMat_8UC4_dst = cv.cuda_GpuMat(cuMat_8UC4.size(), cuMat_8UC4.type())
+        cuMat_8UC4_out = cuMat_8UC4.copyTo(cuMat_8UC4_dst, stream)
+        self.assertTrue(cuMat_8UC4_out.cudaPtr() == cuMat_8UC4_out.cudaPtr())
+        npMat_8UC4_out = cuMat_8UC4_dst.download(stream)
+        stream.waitForCompletion()
+        self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out))
+        # out
+        cuMat_8UC4_out = cuMat_8UC4.copyTo(stream)
+        npMat_8UC4_out = cuMat_8UC4_out.download(stream)
+        stream.waitForCompletion()
+        self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out))
+
     def test_cuda_denoising(self):
         self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoising'))
         self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoisingColored'))

From e1d0f07c9099a01487bd345109f9b768d7f257bc Mon Sep 17 00:00:00 2001
From: Mihir Patil <me@mihirpatil.me>
Date: Wed, 2 Aug 2023 00:01:37 -0400
Subject: [PATCH 05/57] highgui(cocoa): fix fullscreen behavior

---
 modules/highgui/src/window_cocoa.mm | 39 +++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm
index 86f38d0ae8..5800ab3f94 100644
--- a/modules/highgui/src/window_cocoa.mm
+++ b/modules/highgui/src/window_cocoa.mm
@@ -195,6 +195,9 @@ CV_IMPL void cvDestroyWindow( const char* name)
     //cout << "cvDestroyWindow" << endl;
     CVWindow *window = cvGetWindow(name);
     if(window) {
+        if ([window styleMask] & NSFullScreenWindowMask) {
+            [window toggleFullScreen:nil];
+        } 
         [window close];
         [windows removeObjectForKey:[NSString stringWithFormat:@"%s", name]];
     }
@@ -701,7 +704,11 @@ double cvGetModeWindow_COCOA( const char* name )
 void cvSetModeWindow_COCOA( const char* name, double prop_value )
 {
     CVWindow *window = nil;
+
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7
     NSDictionary *fullscreenOptions = nil;
+#endif
+
     NSAutoreleasePool* localpool = nil;
 
     CV_FUNCNAME( "cvSetModeWindow_COCOA" );
@@ -724,7 +731,35 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value )
     }
 
     localpool = [[NSAutoreleasePool alloc] init];
+    
+    // std::cout << "setting mode" << std::endl;
+#if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6
+    if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_NORMAL )
+    {
+        // std::cout << "exiting fullscreen" << std::endl;
+        [window toggleFullScreen:nil];
 
+        window.status=CV_WINDOW_NORMAL;
+    }
+    else if( !([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_FULLSCREEN )
+    {
+        // std::cout << "entering fullscreen" << std::endl;
+        [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary];
+
+        NSScreen* screen = [window screen]; 
+
+        NSRect frame = [screen frame];
+        [window setFrame:frame display:YES];
+        
+        [window setContentSize:frame.size];
+
+        [window toggleFullScreen:nil];
+
+        [window setFrameTopLeftPoint: frame.origin];
+
+        window.status=CV_WINDOW_FULLSCREEN;
+    }
+#else
     fullscreenOptions = [NSDictionary dictionaryWithObject:[NSNumber numberWithBool:YES] forKey:NSFullScreenModeSetting];
     if ( [[window contentView] isInFullScreenMode] && prop_value==CV_WINDOW_NORMAL )
     {
@@ -736,7 +771,7 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value )
         [[window contentView] enterFullScreenMode:[NSScreen mainScreen] withOptions:fullscreenOptions];
         window.status=CV_WINDOW_FULLSCREEN;
     }
-
+#endif
     [localpool drain];
 
     __END__;
@@ -810,7 +845,7 @@ void cvSetPropTopmost_COCOA( const char* name, const bool topmost )
         CV_ERROR( CV_StsNullPtr, "NULL window" );
     }
 
-    if ([[window contentView] isInFullScreenMode])
+    if (([window styleMask] & NSFullScreenWindowMask))
     {
         EXIT;
     }

From e4ad7e3778d0be868568085f0a1fc6b0d84d9cfe Mon Sep 17 00:00:00 2001
From: cudawarped <12133430+cudawarped@users.noreply.github.com>
Date: Wed, 19 Jul 2023 07:59:05 +0300
Subject: [PATCH 06/57] VideoCapture: remove decoder initialization when
 CAP_PROP_FORMAT== -1 (rawMode == true)

---
 modules/videoio/include/opencv2/videoio.hpp |   2 +-
 modules/videoio/src/cap_ffmpeg_impl.hpp     | 105 +++++++++++++-------
 modules/videoio/test/test_ffmpeg.cpp        |  30 ++++++
 3 files changed, 102 insertions(+), 35 deletions(-)

diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp
index dbed243b56..f40afbb4a2 100644
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@@ -140,7 +140,7 @@ enum VideoCaptureAPIs {
 */
 enum VideoCaptureProperties {
        CAP_PROP_POS_MSEC       =0, //!< Current position of the video file in milliseconds.
-       CAP_PROP_POS_FRAMES     =1, //!< 0-based index of the frame to be decoded/captured next.
+       CAP_PROP_POS_FRAMES     =1, //!< 0-based index of the frame to be decoded/captured next. When the index i is set in RAW mode (CAP_PROP_FORMAT == -1) this will seek to the key frame k, where k <= i.
        CAP_PROP_POS_AVI_RATIO  =2, //!< Relative position of the video file: 0=start of the film, 1=end of the film.
        CAP_PROP_FRAME_WIDTH    =3, //!< Width of the frames in the video stream.
        CAP_PROP_FRAME_HEIGHT   =4, //!< Height of the frames in the video stream.
diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp
index 982bc5c87d..e4431b323e 100644
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@@ -580,6 +580,7 @@ struct CvCapture_FFMPEG
     bool processRawPacket();
     bool rawMode;
     bool rawModeInitialized;
+    bool rawSeek;
     bool convertRGB;
     AVPacket packet_filtered;
 #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100)
@@ -633,6 +634,7 @@ void CvCapture_FFMPEG::init()
 
     rawMode = false;
     rawModeInitialized = false;
+    rawSeek = false;
     convertRGB = true;
     memset(&packet_filtered, 0, sizeof(packet_filtered));
     av_init_packet(&packet_filtered);
@@ -1051,33 +1053,35 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters&
                 return false;
             }
         }
-        if (params.has(CAP_PROP_HW_ACCELERATION))
-        {
-            va_type = params.get<VideoAccelerationType>(CAP_PROP_HW_ACCELERATION);
+        if(!rawMode) {
+            if (params.has(CAP_PROP_HW_ACCELERATION))
+            {
+                va_type = params.get<VideoAccelerationType>(CAP_PROP_HW_ACCELERATION);
 #if !USE_AV_HW_CODECS
-            if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY)
-            {
-                CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout");
-                return false;
-            }
+                if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY)
+                {
+                    CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout");
+                    return false;
+                }
 #endif
-        }
-        if (params.has(CAP_PROP_HW_DEVICE))
-        {
-            hw_device = params.get<int>(CAP_PROP_HW_DEVICE);
-            if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1)
-            {
-                CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout");
-                return false;
             }
-            if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1)
+            if (params.has(CAP_PROP_HW_DEVICE))
             {
-                CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout");
-                return false;
+                hw_device = params.get<int>(CAP_PROP_HW_DEVICE);
+                if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1)
+                {
+                    CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout");
+                    return false;
+                }
+                if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1)
+                {
+                    CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout");
+                    return false;
+                }
+            }
+            if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) {
+                use_opencl = params.get<int>(CAP_PROP_HW_ACCELERATION_USE_OPENCL);
             }
-        }
-        if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) {
-            use_opencl = params.get<int>(CAP_PROP_HW_ACCELERATION_USE_OPENCL);
         }
 #if USE_AV_INTERRUPT_CALLBACK
         if (params.has(CAP_PROP_OPEN_TIMEOUT_MSEC))
@@ -1153,6 +1157,23 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters&
         CV_LOG_WARNING(NULL, "Unable to read codec parameters from stream (" << _opencv_ffmpeg_get_error_string(err) << ")");
         goto exit_func;
     }
+
+    if (rawMode) {
+        video_stream = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
+        if (video_stream < 0) {
+            close();
+            return false;
+        }
+        video_st = ic->streams[video_stream];
+#ifndef CV_FFMPEG_CODECPAR
+        frame.height = video_st->codec->height;
+        frame.width = video_st->codec->width;
+#else
+        frame.height = video_st->codecpar->height;
+        frame.width = video_st->codecpar->width;
+#endif
+        return true;
+    }
     for(i = 0; i < ic->nb_streams; i++)
     {
 #ifndef CV_FFMPEG_CODECPAR
@@ -1440,6 +1461,10 @@ bool CvCapture_FFMPEG::processRawPacket()
 
 bool CvCapture_FFMPEG::grabFrame()
 {
+    if (rawSeek) {
+        rawSeek = false;
+        return true;
+    }
     bool valid = false;
 
     static const size_t max_read_attempts = cv::utils::getConfigurationParameterSizeT("OPENCV_FFMPEG_READ_ATTEMPTS", 4096);
@@ -1447,7 +1472,7 @@ bool CvCapture_FFMPEG::grabFrame()
     size_t cur_read_attempts = 0;
     size_t cur_decode_attempts = 0;
 
-    if( !ic || !video_st || !context )  return false;
+    if( !ic || !video_st || (!rawMode && !context) )  return false;
 
     if( ic->streams[video_stream]->nb_frames > 0 &&
         frame_number > ic->streams[video_stream]->nb_frames )
@@ -1464,7 +1489,7 @@ bool CvCapture_FFMPEG::grabFrame()
 
 #if USE_AV_SEND_FRAME_API
     // check if we can receive frame from previously decoded packet
-    valid = avcodec_receive_frame(context, picture) >= 0;
+    valid = rawMode ? false : avcodec_receive_frame(context, picture) >= 0;
 #endif
 
     // get the next frame
@@ -1548,12 +1573,16 @@ bool CvCapture_FFMPEG::grabFrame()
     }
 
     if (valid) {
-        if( picture_pts == AV_NOPTS_VALUE_ )
-            picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts;
-        frame_number++;
+        if (picture_pts == AV_NOPTS_VALUE_) {
+            if (!rawMode)
+                picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts;
+            else
+                picture_pts = packet.pts != AV_NOPTS_VALUE_ && packet.pts != 0 ? packet.pts : packet.dts;
+            frame_number++;
+        }
     }
 
-    if (!rawMode && valid && first_frame_number < 0)
+    if (valid && first_frame_number < 0)
         first_frame_number = dts_to_frame_number(picture_pts);
 
 #if USE_AV_INTERRUPT_CALLBACK
@@ -1567,7 +1596,7 @@ bool CvCapture_FFMPEG::grabFrame()
 
 bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, int* width, int* height, int* cn, int* depth)
 {
-    if (!video_st || !context)
+    if (!video_st || (!rawMode && !context))
         return false;
 
     if (rawMode || flag == extraDataIdx)
@@ -1735,7 +1764,7 @@ static inline double getCodecIdFourcc(const AVCodecID codec_id)
 
 double CvCapture_FFMPEG::getProperty( int property_id ) const
 {
-    if( !video_st || !context ) return 0;
+    if( !video_st || (!rawMode && !context) ) return 0;
 
     switch( property_id )
     {
@@ -1814,7 +1843,8 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
         //ic->start_time_realtime is in microseconds
         return ((double)ic->start_time_realtime);
     case CAP_PROP_N_THREADS:
-        return static_cast<double>(context->thread_count);
+        if (!rawMode)
+            return static_cast<double>(context->thread_count);
     default:
         break;
     }
@@ -1910,9 +1940,11 @@ void CvCapture_FFMPEG::get_rotation_angle()
 
 void CvCapture_FFMPEG::seek(int64_t _frame_number)
 {
-    CV_Assert(context);
+    if (!rawMode) {
+        CV_Assert(context);
+    }
     _frame_number = std::min(_frame_number, get_total_frames());
-    int delta = 16;
+    int delta = !rawMode ? 16 : 0;
 
     // if we have not grabbed a single frame before first seek, let's read the first frame
     // and get some valuable information during the process
@@ -1927,7 +1959,8 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number)
         double  time_base  = r2d(ic->streams[video_stream]->time_base);
         time_stamp += (int64_t)(sec / time_base + 0.5);
         if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD);
-        avcodec_flush_buffers(context);
+        if(!rawMode)
+            avcodec_flush_buffers(context);
         if( _frame_number > 0 )
         {
             grabFrame();
@@ -1935,6 +1968,10 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number)
             if( _frame_number > 1 )
             {
                 frame_number = dts_to_frame_number(picture_pts) - first_frame_number;
+                if (rawMode) {
+                    rawSeek = true;
+                    break;
+                }
                 //printf("_frame_number = %d, frame_number = %d, delta = %d\n",
                 //       (int)_frame_number, (int)frame_number, delta);
 
diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp
index 35d425d5c1..0496b8c369 100644
--- a/modules/videoio/test/test_ffmpeg.cpp
+++ b/modules/videoio/test/test_ffmpeg.cpp
@@ -476,6 +476,16 @@ static void ffmpeg_check_read_raw(VideoCapture& cap)
     EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type());
     EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size;
     EXPECT_EQ((size_t)37118, data.total());
+
+#ifndef WIN32
+    // 12 is the nearset key frame to frame 18
+    EXPECT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 18.));
+    EXPECT_EQ(cap.get(CAP_PROP_POS_FRAMES), 12.);
+    cap >> data;
+    EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type());
+    EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size;
+    EXPECT_EQ((size_t)8726, data.total());
+#endif
 }
 
 TEST(videoio_ffmpeg, ffmpeg_check_extra_data)
@@ -506,6 +516,16 @@ TEST(videoio_ffmpeg, open_with_property)
         CAP_PROP_FORMAT, -1  // demux only
     }));
 
+    // confirm properties are returned without initializing AVCodecContext
+    EXPECT_EQ(cap.get(CAP_PROP_FORMAT), -1);
+    EXPECT_EQ(static_cast<int>(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4"));
+#ifndef WIN32
+    EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0);
+#endif
+    EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0);
+    EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0);
+    EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125);
+    EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0);
     ffmpeg_check_read_raw(cap);
 }
 
@@ -519,6 +539,16 @@ TEST(videoio_ffmpeg, create_with_property)
         CAP_PROP_FORMAT, -1  // demux only
     });
 
+    // confirm properties are returned without initializing AVCodecContext
+    EXPECT_TRUE(cap.get(CAP_PROP_FORMAT) == -1);
+    EXPECT_EQ(static_cast<int>(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4"));
+#ifndef WIN32
+    EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0);
+#endif
+    EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0);
+    EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0);
+    EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125);
+    EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0);
     ffmpeg_check_read_raw(cap);
 }
 

From afb406f1dea030fbb39654584778a5dd5c363464 Mon Sep 17 00:00:00 2001
From: Mihir Patil <me@mihirpatil.me>
Date: Sun, 6 Aug 2023 20:10:05 -0400
Subject: [PATCH 07/57] style: remove trailing whitespace

---
 modules/highgui/src/window_cocoa.mm | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm
index 5800ab3f94..5e34b502db 100644
--- a/modules/highgui/src/window_cocoa.mm
+++ b/modules/highgui/src/window_cocoa.mm
@@ -197,7 +197,7 @@ CV_IMPL void cvDestroyWindow( const char* name)
     if(window) {
         if ([window styleMask] & NSFullScreenWindowMask) {
             [window toggleFullScreen:nil];
-        } 
+        }
         [window close];
         [windows removeObjectForKey:[NSString stringWithFormat:@"%s", name]];
     }
@@ -731,7 +731,7 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value )
     }
 
     localpool = [[NSAutoreleasePool alloc] init];
-    
+
     // std::cout << "setting mode" << std::endl;
 #if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6
     if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_NORMAL )
@@ -746,11 +746,11 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value )
         // std::cout << "entering fullscreen" << std::endl;
         [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary];
 
-        NSScreen* screen = [window screen]; 
+        NSScreen* screen = [window screen];
 
         NSRect frame = [screen frame];
         [window setFrame:frame display:YES];
-        
+
         [window setContentSize:frame.size];
 
         [window toggleFullScreen:nil];

From ba70ec99b3c119d549fabd955e276dae73c4a9b0 Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Wed, 9 Aug 2023 02:26:02 -0400
Subject: [PATCH 08/57] Merge pull request #24122 from
 fengyuentau:remove_tengine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dnn: cleanup of tengine backend #24122

🚀 Cleanup for OpenCV 5.0. Tengine backend is added for convolution layer speedup on ARM CPUs, but it is not maintained and the convolution layer on our default backend has reached similar performance to that of Tengine.

Tengine backend related PRs:
- https://github.com/opencv/opencv/pull/16724
- https://github.com/opencv/opencv/pull/18323

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 3rdparty/libtengine/tengine.cmake             |  80 ----
 CMakeLists.txt                                |  10 -
 cmake/OpenCVFindTengine.cmake                 |  78 ----
 cmake/mirrors/custom.cmake                    |   7 +-
 cmake/mirrors/gitcode.cmake                   |   5 +-
 .../config_reference.markdown                 |   1 -
 modules/dnn/CMakeLists.txt                    |  10 -
 modules/dnn/src/layers/convolution_layer.cpp  |  85 ----
 .../include/tengine_graph_convolution.hpp     |  53 ---
 .../src/tengine_graph_convolution.cpp         | 370 ------------------
 10 files changed, 3 insertions(+), 696 deletions(-)
 delete mode 100644 3rdparty/libtengine/tengine.cmake
 delete mode 100644 cmake/OpenCVFindTengine.cmake
 delete mode 100644 modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
 delete mode 100644 modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp

diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake
deleted file mode 100644
index ee8f0cb86f..0000000000
--- a/3rdparty/libtengine/tengine.cmake
+++ /dev/null
@@ -1,80 +0,0 @@
-# COPYRIGHT
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# License); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Copyright (c) 2020, OPEN AI LAB
-# Author: qtang@openailab.com or https://github.com/BUG1989
-#         qli@openailab.com
-#         sqfu@openailab.com
-
-SET(TENGINE_COMMIT_VERSION "e89cf8870de2ff0a80cfe626c0b52b2a16fb302e")
-SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine")
-SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}")
-
-IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}")
-	MESSAGE(STATUS "Tengine is exist already at: ${OCV_TENGINE_SOURCE_PATH}")
-
-	SET(Tengine_FOUND ON)
-	SET(BUILD_TENGINE ON)
-ELSE()
-	SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name
-	SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url
-	SET(tengine_md5sum 23f61ebb1dd419f1207d8876496289c5) #md5sum
-
-	ocv_download(FILENAME ${OCV_TENGINE_FILENAME}
-						HASH ${tengine_md5sum}
-						URL
-						"${OPENCV_TENGINE_URL}"
-						"$ENV{OPENCV_TENGINE_URL}"
-						"${OCV_TENGINE_URL}"
-						DESTINATION_DIR "${OCV_TENGINE_DIR}"
-						ID TENGINE
-						STATUS res
-						UNPACK RELATIVE_URL)
-
-	if (NOT res)
-		MESSAGE(STATUS "TENGINE DOWNLOAD FAILED. Turning Tengine_FOUND off.")
-		SET(Tengine_FOUND OFF)
-	else ()
-		MESSAGE(STATUS "TENGINE DOWNLOAD success . ")
-
-		SET(Tengine_FOUND ON)
-		SET(BUILD_TENGINE ON)
-	endif()
-ENDIF()
-
-if(BUILD_TENGINE)
-	SET(HAVE_TENGINE 1)
-
-	if(NOT ANDROID)
-		# linux system
-		if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm)
-			   SET(TENGINE_TOOLCHAIN_FLAG "-march=armv7-a")
-		elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64
-			   SET(TENGINE_TOOLCHAIN_FLAG "-march=armv8-a")
-		endif()
-	endif()
-
-	SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern .
-	SET(Tengine_INCLUDE_DIR  "${OCV_TENGINE_SOURCE_PATH}/include" CACHE INTERNAL "")
-	if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt")
-		add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build")
-	else()
-		message(WARNING "TENGINE: Missing 'CMakeLists.txt' in source code package: ${OCV_TENGINE_SOURCE_PATH}")
-	endif()
-	SET(Tengine_LIB "tengine" CACHE INTERNAL "")
-endif()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4dbcab578b..d14b7af439 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -462,9 +462,6 @@ OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)"
 OCV_OPTION(WITH_ANDROID_NATIVE_CAMERA "Use Android NDK for Camera I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 23)
   VISIBLE_IF ANDROID
   VERIFY HAVE_ANDROID_NATIVE_CAMERA)
-OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
-  VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS
-  VERIFY HAVE_TENGINE)
 OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF
   VISIBLE_IF TRUE
   VERIFY HAVE_ONNX)
@@ -761,9 +758,6 @@ include(cmake/OpenCVFindLibsPerf.cmake)
 include(cmake/OpenCVFindLAPACK.cmake)
 include(cmake/OpenCVFindProtobuf.cmake)
 include(cmake/OpenCVDetectFlatbuffers.cmake)
-if(WITH_TENGINE)
-  include(cmake/OpenCVFindTengine.cmake)
-endif()
 if(WITH_TIMVX)
   include(cmake/OpenCVFindTIMVX.cmake)
 endif()
@@ -1612,10 +1606,6 @@ if(WITH_VA OR HAVE_VA)
   status("    VA:"            HAVE_VA          THEN "YES" ELSE NO)
 endif()
 
-if(WITH_TENGINE OR HAVE_TENGINE)
-  status("    Tengine:"      HAVE_TENGINE     THEN "YES (${TENGINE_LIBRARIES})" ELSE NO)
-endif()
-
 if(WITH_LAPACK OR HAVE_LAPACK)
   status("    Lapack:"      HAVE_LAPACK     THEN "YES (${LAPACK_LIBRARIES})" ELSE NO)
 endif()
diff --git a/cmake/OpenCVFindTengine.cmake b/cmake/OpenCVFindTengine.cmake
deleted file mode 100644
index 2d33f5c993..0000000000
--- a/cmake/OpenCVFindTengine.cmake
+++ /dev/null
@@ -1,78 +0,0 @@
-# COPYRIGHT
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# License); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Copyright (c) 2020, OPEN AI LAB
-# Author: qtang@openailab.com or https://github.com/BUG1989
-#
-
-# ----------------------------------------------------------------------------
-#  Path for Tengine binaries
-# ----------------------------------------------------------------------------
-set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Path to TENGINE binaries installation")
-
-IF(OPENCV_LIBTENGINE_ROOT_DIR AND NOT BUILD_TENGINE)
-
-	MESSAGE(STATUS "TENGINE:--  Use binaries at ${OPENCV_LIBTENGINE_ROOT_DIR}")
-
-	SET(Tengine_FOUND ON)
-	set(BUILD_TENGINE OFF)
-
-	SET(Tengine_INCLUDE_DIR "${OPENCV_LIBTENGINE_ROOT_DIR}/include" CACHE PATH "TENGINE include dir")
-	SET(Tengine_LIB "${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a" CACHE PATH "TENGINE library dir")
-
-ELSE()
-	IF(ANDROID)
-		IF(OPENCV_TENGINE_FORCE_ANDROID)
-			# nothing, use Android
-		ELSEIF(OPENCV_TENGINE_SKIP_ANDROID)
-			set(Tengine_FOUND OFF)
-			set(HAVE_TENGINE FALSE)
-			return()
-		ELSEIF(NOT DEFINED ANDROID_NDK_REVISION)
-			MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION is not defined")
-			set(Tengine_FOUND OFF)
-			set(HAVE_TENGINE FALSE)
-			return()
-		ELSEIF(ANDROID_NDK_REVISION VERSION_LESS 14)
-			MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION=${ANDROID_NDK_REVISION}")
-			set(Tengine_FOUND OFF)
-			set(HAVE_TENGINE FALSE)
-			return()
-		ENDIF()
-	ENDIF()
-	MESSAGE(STATUS "TENGINE:--  Build Tengine from source code. ")
-	include("${OpenCV_SOURCE_DIR}/3rdparty/libtengine/tengine.cmake")
-ENDIF()
-
-IF(NOT Tengine_LIB)
-	SET(Tengine_FOUND OFF)
-	MESSAGE(STATUS "#### Could not find Tengine lib. Turning Tengine_FOUND off")
-ENDIF()
-
-IF (Tengine_FOUND)
-	MESSAGE(STATUS "Found Tengine include: ${Tengine_INCLUDE_DIR}")
-	MESSAGE(STATUS "Found Tengine libraries: ${Tengine_LIB}")
-	set(HAVE_TENGINE 1)
-	set(TENGINE_LIBRARIES    ${Tengine_LIB})
-	set(TENGINE_INCLUDE_DIRS    ${Tengine_INCLUDE_DIR})
-ENDIF (Tengine_FOUND)
-
-MARK_AS_ADVANCED(
-	Tengine_INCLUDE_DIR
-	Tengine_LIB
-)
diff --git a/cmake/mirrors/custom.cmake b/cmake/mirrors/custom.cmake
index 3cdf700e19..8c421471f3 100644
--- a/cmake/mirrors/custom.cmake
+++ b/cmake/mirrors/custom.cmake
@@ -1,15 +1,12 @@
 # Gitlab-style mirror
 # CMake scripts look for opencv/opencv_3rdparty,
-#  OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade
+#  01org/tbb(oneAPI/oneTBB), opencv/ade
 #  from OPENCV_DOWNLOAD_MIRROR
 ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "")
 
 ######
 # Download via commit id
 ######
-# Tengine
-ocv_update(TENGINE_PKG_MD5_CUSTOM "")
-ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e
 # NVIDIA_OPTICAL_FLOW
 ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "")
 ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191)
@@ -77,7 +74,7 @@ else()
     ocv_download_url_custom_usercontent(opencv)
   elseif(DL_ID STREQUAL "wechat_qrcode")
     ocv_download_url_gitcode_usercontent(WeChatCV)
-  elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
+  elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
     ocv_download_url_custom_archive_commit_id()
   elseif(DL_ID STREQUAL "TBB")
     ocv_download_url_custom_archive_release()
diff --git a/cmake/mirrors/gitcode.cmake b/cmake/mirrors/gitcode.cmake
index c9d41e7458..e208a87245 100644
--- a/cmake/mirrors/gitcode.cmake
+++ b/cmake/mirrors/gitcode.cmake
@@ -1,9 +1,6 @@
 ######
 # Download via commit id
 ######
-# Tengine
-ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690)
-ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e
 # NVIDIA_OPTICAL_FLOW
 ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47)
 ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191)
@@ -74,7 +71,7 @@ if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "da
   ocv_download_url_gitcode_usercontent(opencv)
 elseif(DL_ID STREQUAL "wechat_qrcode")
   ocv_download_url_gitcode_usercontent(mirrors/WeChatCV)
-elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
+elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
   ocv_download_url_gitcode_archive_commit_id()
 elseif(DL_ID STREQUAL "TBB")
   ocv_download_url_gitcode_archive_release(OPENCV_TBB_SUBDIR)
diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index 3ed87e5bdf..2528baf41d 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -484,7 +484,6 @@ OpenCV have own DNN inference module which have own build-in engine, but can als
 | `OPENCV_DNN_CUDA` | _OFF_ | Enable CUDA backend. [CUDA](https://en.wikipedia.org/wiki/CUDA), CUBLAS and [CUDNN](https://developer.nvidia.com/cudnn) must be installed. |
 | `WITH_HALIDE` | _OFF_ | Use experimental [Halide](https://en.wikipedia.org/wiki/Halide_(programming_language)) backend which can generate optimized code for dnn-layers at runtime. Halide must be installed. |
 | `WITH_VULKAN` | _OFF_ | Enable experimental [Vulkan](https://en.wikipedia.org/wiki/Vulkan_(API)) backend. Does not require additional dependencies, but can use external Vulkan headers (`VULKAN_INCLUDE_DIRS`). |
-| `WITH_TENGINE` | _OFF_ | Enable experimental [Tengine](https://github.com/OAID/Tengine) backend for ARM CPUs. Tengine library must be installed. |
 
 
 # Installation layout {#tutorial_config_reference_install}
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index 804b78ead2..60cc77ca8b 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -58,11 +58,6 @@ endif()
 ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake")
 
 
-if(HAVE_TENGINE)
-  ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TENGINE=1")
-endif()
-
-
 if(MSVC)
   add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 )
   ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146
@@ -172,11 +167,6 @@ else()
   set(sources_options ${sources_options} EXCLUDE_CUDA)
 endif()
 
-if(HAVE_TENGINE)
-	list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS})
-	list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive)
-endif()
-
 if(HAVE_TIMVX)
     list(APPEND include_dirs ${TIMVX_INCLUDE_DIR})
     list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive)
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 2787d64880..0ed2bb7feb 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -62,9 +62,6 @@
 #include "opencl_kernels_dnn.hpp"
 using namespace cv::dnn::ocl4dnn;
 #endif
-#ifdef HAVE_TENGINE
-#include "../tengine4dnn/include/tengine_graph_convolution.hpp"
-#endif
 
 #ifdef HAVE_CUDA
 #include "../cuda4dnn/primitives/convolution.hpp"
@@ -267,10 +264,6 @@ public:
     float power;
 #endif
 
-#ifdef HAVE_TENGINE
-    teng_graph_t tengine_graph;
-#endif
-
 #ifdef HAVE_CUDA
     cuda4dnn::ConvolutionConfiguration::FusionMode cudaFusionMode;
     cuda4dnn::ConvolutionConfiguration::ActivationType cudaActType;
@@ -289,20 +282,8 @@ public:
 #ifdef HAVE_CUDA
         cudaFusionMode = cuda4dnn::ConvolutionConfiguration::FusionMode::NONE;
         cudaActType = cuda4dnn::ConvolutionConfiguration::ActivationType::IDENTITY;
-#endif
-#ifdef HAVE_TENGINE
-        tengine_graph=NULL;
 #endif
     }
-#ifdef HAVE_TENGINE
-    ~ConvolutionLayerImpl()
-    {
-        if(NULL != tengine_graph )
-        {
-            tengine_release(tengine_graph);
-        }
-    }
-#endif
 
     MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
     {
@@ -466,13 +447,6 @@ public:
             for(int i = 0; i < numOutput; i++ )
                 biasvec[i] = biasMat.at<float>(i);
         }
-#ifdef HAVE_TENGINE
-        if(NULL != tengine_graph )
-        {
-            tengine_release(tengine_graph);
-            tengine_graph = NULL ;
-        }
-#endif
 #ifdef HAVE_OPENCL
         convolutionOp.release();
 #endif
@@ -1305,65 +1279,6 @@ public:
             }
         }
 
-#ifdef HAVE_TENGINE
-        bool tengine_ret = false;
-
-        std::vector<Mat> teng_in, teng_out;
-        inputs_arr.getMatVector(teng_in);
-        outputs_arr.getMatVector(teng_out);
-
-        int inch = teng_in[0].size[1];    // inch
-        int in_h = teng_in[0].size[2];    // in_h
-        int in_w = teng_in[0].size[3];    // in_w
-
-        int out_b = teng_out[0].size[0];  // out batch size
-        int outch = teng_out[0].size[1];  // outch
-        int out_h = teng_out[0].size[2];  // out_h
-        int out_w = teng_out[0].size[3];  // out_w
-
-        float *input_  = teng_in[0].ptr<float>();
-        float *output_ = teng_out[0].ptr<float>();
-        float *kernel_ = weightsMat.ptr<float>();
-        float *teg_bias = &biasvec[0];
-
-        int nstripes = std::max(getNumThreads(), 1);
-
-        /* tengine_init will run when first time. */
-        if(NULL == tengine_graph)
-        {
-            // pads_begin: 0 - pad_top,    1 - pad_left
-            // pads_end:   0 - pad_bottom, 1 - pad_right
-            // pad_h0: pad_top,  pad_h1: pad_bottom
-            // pad_w0: pad_left, pad_w1: pad_right
-            tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w,
-                                         output_, out_b, outch, out_h, out_w,
-                                         kernel_, kernel_size.size(), kernel.height, kernel.width,
-                                         teg_bias, stride.height, stride.width,
-                                         pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width,
-                                         weightsMat.step1(), padMode, tengine_graph, nstripes);
-            // printf("Init(%s):  input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ,"
-            //        "stride(%d %d), pad(%d %d %d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n",
-            //        name.c_str(),input_, inch, ngroups, in_h, in_w,
-            //        output_, out_b, outch, out_h, out_w,
-            //        kernel_, kernel_size.size(), kernel.height, kernel.width,
-            //        teg_bias, stride.height, stride.width,
-            //        pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width,
-            //        weightsMat.step1(), padMode.c_str() ,tengine_graph);
-        }
-        if(NULL != tengine_graph)
-        {
-            tengine_ret = tengine_forward(tengine_graph);
-        }
-        /* activation */
-        if((true == tengine_ret) && activ )
-        {
-            int out_cstep = out_h * out_w;	    // out_cstep
-
-            ActivationLayer* activ_ = activ.get();
-            activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch);
-        }
-        if(false == tengine_ret)
-#endif
         {
             int nstripes = std::max(getNumThreads(), 1);
             int conv_dim = CONV_2D;
diff --git a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
deleted file mode 100644
index 8ec99c9685..0000000000
--- a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Copyright (c) 2020, OPEN AI LAB
- * Author: qtang@openailab.com
- */
-
-#ifndef TENGINE_GRAPH_CONVOLUTION_HPP
-#define TENGINE_GRAPH_CONVOLUTION_HPP
-
-#define FLOAT_TO_REALSIZE (4)
-#ifdef HAVE_TENGINE
-
-#include "tengine_c_api.h"
-
-namespace cv
-{
-namespace dnn
-{
-// pad_h0: pad_top
-// pad_h1: pad_bottom
-// pad_w0: pad_left
-// pad_w1: pad_right
-teng_graph_t  tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w,
-                        float *output_, int out_b, int outch, int out_h, int out_w,
-                        float *kernel_,int kernel_s , int kernel_h, int kernel_w,
-                        float *teg_bias, int stride_h, int stride_w,
-                        int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w,
-                        size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ;
-
-bool tengine_forward(teng_graph_t& graph) ;
-bool tengine_release(teng_graph_t& graph) ;
-}
-}
-#endif
-#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */
\ No newline at end of file
diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp
deleted file mode 100644
index d35937006c..0000000000
--- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*
- * Copyright (c) 2020, OPEN AI LAB
- * Author: qtang@openailab.com
- */
-
-#include "../../precomp.hpp"
-#include <iostream>
-#include <vector>
-
-#include <opencv2/core/utils/configuration.private.hpp>
-#include <opencv2/core/utils/logger.hpp>
-
-#include "../include/tengine_graph_convolution.hpp"
-
-#ifdef HAVE_TENGINE
-
-#include "tengine_c_api.h"
-
-
-namespace cv
-{
-namespace dnn
-{
-static int create_input_node(teng_graph_t graph, const char* node_name, int inch, int in_h, int in_w)
-{
-    node_t node     = teng_create_graph_node(graph, node_name, "InputOp");
-    tensor_t tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
-    teng_set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT);
-
-    int dims[4] = {1, inch, in_h, in_w};
-    teng_set_tensor_shape(tensor, dims, 4);
-
-    teng_release_graph_tensor(tensor);
-    teng_release_graph_node(node);
-
-    return 0;
-}
-
-static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w,
-    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h0, int pad_h1, int pad_w0, int pad_w1, int inch, int outch, int group,
-    int dilation_h, int dilation_w, int activation, std::string padMode)
-{
-    node_t conv_node      = teng_create_graph_node(graph, node_name, "Convolution");
-    tensor_t input_tensor = teng_get_graph_tensor(graph, input_name);
-
-    if (input_tensor == NULL)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: input_tensor is NULL." );
-        return -1;
-    }
-
-    teng_set_node_input_tensor(conv_node, 0, input_tensor);
-    teng_release_graph_tensor(input_tensor);
-
-    /* output */
-    tensor_t output_tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32);
-
-    teng_set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR);
-    teng_release_graph_tensor(output_tensor);
-
-    /* weight */
-    std::string weight_name(node_name);
-    weight_name += "/weight";
-
-    node_t w_node = teng_create_graph_node(graph, weight_name.c_str(), "Const");
-    tensor_t w_tensor = teng_create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32);
-    teng_set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST);
-    teng_set_node_input_tensor(conv_node, 1, w_tensor);
-    int w_dims[] = {outch, inch / group, kernel_h, kernel_w};
-
-    teng_set_tensor_shape(w_tensor, w_dims, 4);
-
-    teng_release_graph_node(w_node);
-    teng_release_graph_tensor(w_tensor);
-
-    /* bias */
-    std::string bias_name(node_name);
-    bias_name += "/bias";
-
-    node_t b_node = teng_create_graph_node(graph, bias_name.c_str(), "Const");
-    tensor_t b_tensor = teng_create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32);
-    teng_set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST);
-    int b_dims[] = {outch};
-
-    teng_set_tensor_shape(b_tensor, b_dims, 1);
-
-    teng_set_node_input_tensor(conv_node, 2, b_tensor);
-    teng_release_graph_node(b_node);
-    teng_release_graph_tensor(b_tensor);
-
-    if (!padMode.empty())
-    {
-        if (padMode == "SAME")
-        {
-            int out_h_temp = (in_h-kernel_h + 2*pad_h0)/stride_h + 1;
-            int out_w_temp = (in_w-kernel_w + 2*pad_w0)/stride_w + 1;
-
-            if (out_h_temp < out_h)
-                pad_h1 += 1;
-            if (out_w_temp < out_w)
-                pad_w1 += 1;
-        }
-    }
-
-    /* attr */
-    teng_set_node_attr_int(conv_node, "kernel_h", &kernel_h);
-    teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w);
-    teng_set_node_attr_int(conv_node, "stride_h", &stride_h);
-    teng_set_node_attr_int(conv_node, "stride_w", &stride_w);
-    teng_set_node_attr_int(conv_node, "pad_h0", &pad_h0);
-    teng_set_node_attr_int(conv_node, "pad_w0", &pad_w0);
-    teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1);
-    teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1);
-    teng_set_node_attr_int(conv_node, "output_channel", &outch);
-    teng_set_node_attr_int(conv_node, "input_channel", &inch);
-    teng_set_node_attr_int(conv_node, "group", &group);
-    teng_set_node_attr_int(conv_node, "dilation_h", &dilation_h);
-    teng_set_node_attr_int(conv_node, "dilation_w", &dilation_w);
-  //  set_node_attr_int(conv_node, "activation", &activation);
-
-    teng_release_graph_node(conv_node);
-
-    return 0;
-}
-
-static teng_graph_t create_conv_graph(const char* layer_name, float* input_data, int inch, int group, int in_h, int in_w,
-                        float* output_data, int outch, int out_h, int out_w,
-                        int kernel_h, int kernel_w,
-                        int stride_h,int stride_w,
-                        int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, int activation,
-                        float* teg_weight, float* teg_bias, std::string padMode, int nstripes)
-{
-    node_t    conv_node     = NULL;
-
-    tensor_t  input_tensor  = NULL;
-    tensor_t  output_tensor = NULL;
-    tensor_t  weight_tensor = NULL;
-    tensor_t  bias_tensor   = NULL;
-
-    /* create graph for convolution */
-    int in_size  = in_h * in_w * inch;
-    int out_size  = out_h * out_w * outch;
-    int weight_size = outch * (inch / group) * kernel_w * kernel_h;
-    int bias_size = outch;
-
-    int buf_size  = 0;
-    int input_num = 0;
-
-    /* create graph */
-    teng_graph_t graph = teng_create_graph(NULL, NULL, NULL);
-    bool ok = true;
-
-    if(graph == NULL)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: create_graph failed." );
-        ok = false;
-    }
-
-    const char* input_name = "data";
-    const char* conv_name  = layer_name;
-
-    if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: create_input_node failed." );
-        ok = false;
-    }
-
-    if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w,
-        stride_h, stride_w, pad_h0, pad_h1, pad_w0, pad_w1, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: create conv node failed." );
-        ok = false;
-    }
-
-    /* set input/output node */
-    const char* inputs_name[]  = {input_name};
-    const char* outputs_name[] = {conv_name};
-
-    if (ok && teng_set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: set inputs failed." );
-        ok = false;
-    }
-
-    if (ok && teng_set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: set outputs failed." );
-        ok = false;
-    }
-
-    /* set input data */
-    if (ok)
-    {
-        input_tensor = teng_get_graph_input_tensor(graph, 0, 0);
-        buf_size     = teng_get_tensor_buffer_size(input_tensor);
-        if (buf_size != in_size * FLOAT_TO_REALSIZE)
-        {
-            CV_LOG_WARNING(NULL,"Tengine: Input data size check failed.");
-            ok = false;
-        }
-    }
-
-    if (ok)
-    {
-        teng_set_tensor_buffer(input_tensor, (float *)input_data, buf_size);
-        teng_release_graph_tensor(input_tensor);
-
-        /* create convolution node */
-        /* set weight node */
-        conv_node     = teng_get_graph_node(graph, conv_name);
-        weight_tensor = teng_get_node_input_tensor(conv_node, 1);
-        buf_size      = teng_get_tensor_buffer_size(weight_tensor);
-
-        if (buf_size != weight_size * FLOAT_TO_REALSIZE)
-        {
-            CV_LOG_WARNING(NULL,"Tengine: Input weight size check failed.");
-            ok = false;
-        }
-    }
-
-    if (ok)
-    {
-        teng_set_tensor_buffer(weight_tensor, teg_weight, buf_size);
-
-        /* set bias node */
-        input_num = teng_get_node_input_number(conv_node);
-        if (input_num > 2)
-        {
-            bias_tensor = teng_get_node_input_tensor(conv_node, 2);
-            buf_size    = teng_get_tensor_buffer_size(bias_tensor);
-            if (buf_size != bias_size * FLOAT_TO_REALSIZE)
-            {
-                CV_LOG_WARNING(NULL,"Tengine: Input bias size check failed.");
-                ok = false;
-            }
-            else teng_set_tensor_buffer(bias_tensor, teg_bias, buf_size);
-        }
-    }
-
-    /* prerun */
-    if (ok && teng_prerun_graph_multithread(graph, TENGINE_CLUSTER_BIG, nstripes) < 0)
-    {
-        CV_LOG_WARNING(NULL, "Tengine: prerun_graph failed.");
-        ok = false;
-    }
-
-    if (ok)
-    {
-        /* set output data */
-        output_tensor = teng_get_node_output_tensor(conv_node, 0);
-        int ret = teng_set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE);
-        if(ret)
-        {
-            CV_LOG_WARNING(NULL,"Tengine: Set output tensor buffer failed." );
-            ok = false;
-        }
-    }
-
-    if (false == ok)
-    {
-        teng_destroy_graph(graph) ;
-        return NULL ;
-    }
-    return graph;
-}
-static bool tengine_init_flag = false;
-teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w,
-                        float *output_, int out_b, int outch, int out_h, int out_w,
-                        float *kernel_, int kernel_s ,int kernel_h, int kernel_w,
-                        float *teg_bias, int stride_h, int stride_w,
-                        int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w,
-                        size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes)
-{
-    std::vector<float> teg_weight_vec;
-    float *teg_weight = NULL;
-    int kernel_inwh = (inch / group) * kernel_w * kernel_h;
-    // Do not using the activation fuse mode, just convolution only.
-    int activation = -1;
-
-    if (!(kernel_s == 2 && kernel_h == kernel_w
-        && dilation_h == dilation_w && stride_h == stride_w
-        && out_b == 1 && pad_h0 < 10 && pad_h1 < 10 && pad_w0 < 10 && pad_w1 < 10)) // just for Conv2D
-    {
-       // printf("return : just for Conv2D\n");
-        return NULL;
-    }
-
-    {
-      /*   printf("Tengine(%s): input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n",
-               layer_name, inch, in_h, in_w,
-               out_b, outch, out_h, out_w,
-               kernel_w, kernel_h,
-               stride_w, stride_h,
-               dilation_w, dilation_h,
-               pad_h0, pad_h1, pad_w0, pad_w1);
-     */
-        // weight
-        if (kernel_inwh != wstep)
-        {
-            teg_weight_vec.resize(kernel_inwh * outch);
-            teg_weight = &teg_weight_vec[0];
-            for (int i=0; i<outch; i++)
-            {
-                memcpy(teg_weight+i*kernel_inwh, kernel_+i*wstep, kernel_inwh*FLOAT_TO_REALSIZE);
-            }
-        }
-        else
-        {
-            teg_weight = kernel_;
-        }
-
-        /* initial the resource of tengine */
-        if(false == tengine_init_flag)
-        {
-            init_tengine();
-            tengine_init_flag = true;
-        }
-
-        /* create the convolution graph */
-        graph = create_conv_graph(layer_name, input_, inch, group, in_h, in_w,
-                                    output_, outch, out_h, out_w,
-                                    kernel_h, kernel_w, stride_h,stride_w,
-                                    pad_h0, pad_h1, pad_w0, pad_w1, dilation_h, dilation_w, activation,
-                                    teg_weight, teg_bias, padMode, nstripes);
-        if(NULL == graph )
-        {
-            return NULL;
-        }
-    }
-    return graph ;
-}
-
-bool tengine_forward(teng_graph_t &graph)
-{
-    /* run */
-    if(teng_run_graph(graph, 1) < 0)
-    {
-        CV_LOG_WARNING(NULL,"Tengine: run_graph failed.");
-        return false ;
-    }
-    return true;
-}
-bool tengine_release(teng_graph_t &graph)
-{
-    teng_postrun_graph(graph);
-    teng_destroy_graph(graph);
-    return true;
-}
-}
-}
-#endif

From 4a12707103f09b503bbe4f0332fe0fb96561dc0d Mon Sep 17 00:00:00 2001
From: Alexander Lyulkov <alexander.lyulkov@opencv.ai>
Date: Wed, 9 Aug 2023 18:43:49 +0800
Subject: [PATCH 09/57] Fixed bug when MSMF webcamera doesn't start when build
 with VIDEOIO_PLUGIN_ALL

---
 modules/videoio/src/cap_msmf.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp
index 78eefc34a3..a55f919ed1 100644
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@@ -2719,8 +2719,6 @@ CvResult CV_API_CALL cv_capture_open_with_params(
     if (!handle)
         return CV_ERROR_FAIL;
     *handle = NULL;
-    if (!filename)
-        return CV_ERROR_FAIL;
     CaptureT* cap = 0;
     try
     {

From f834736307c8328340aea48908484052170c9224 Mon Sep 17 00:00:00 2001
From: chaebkimm <chae.b.kim@gmail.com>
Date: Wed, 9 Aug 2023 19:46:25 +0900
Subject: [PATCH 10/57] Merge pull request #24116 from
 chaebkimm/update-samples-python-tst_scene_render

Fix python sample code (tst_scene_render) #24116

Fix bug of python sample code (samples/python/tst_scene_render.py) when backGr or fgr is None (#24114)

1) pass shape tuple to np.zeros arguments instead of integers
2) change np.int to int

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [o] I agree to contribute to the project under Apache 2 License.
- [o] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [o] The PR is proposed to the proper branch
- [o] There is a reference to the original bug report and related work
- [o] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [o] The feature is well documented and sample code can be built with the project CMake
---
 samples/python/tst_scene_render.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/python/tst_scene_render.py b/samples/python/tst_scene_render.py
index 9d09ea7b9e..c3eb69ef9c 100644
--- a/samples/python/tst_scene_render.py
+++ b/samples/python/tst_scene_render.py
@@ -25,7 +25,7 @@ class TestSceneRender():
         if bgImg is not None:
             self.sceneBg = bgImg.copy()
         else:
-            self.sceneBg = np.zeros(defaultSize, defaultSize, np.uint8)
+            self.sceneBg = np.zeros((defaultSize, defaultSize,3), np.uint8)
 
         self.w = self.sceneBg.shape[0]
         self.h = self.sceneBg.shape[1]
@@ -85,7 +85,7 @@ class TestSceneRender():
             img[self.currentCenter[0]:self.currentCenter[0]+self.foreground.shape[0],
              self.currentCenter[1]:self.currentCenter[1]+self.foreground.shape[1]] = self.foreground
         else:
-            self.currentRect = self.initialRect + np.int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed))
+            self.currentRect = self.initialRect + int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed))
             if self.deformation:
                 self.currentRect[1:3] += int(self.h/20*cos(self.time))
             cv.fillConvexPoly(img, self.currentRect, (0, 0, 255))

From 53dfd9536a569b824cb083a1d6f5f9f3df0b05be Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Thu, 10 Aug 2023 11:39:29 +0300
Subject: [PATCH 11/57] videoio: fix camera opening with GStreamer plugin

---
 modules/videoio/src/cap_gstreamer.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp
index fc031d2b5f..305d527ce9 100644
--- a/modules/videoio/src/cap_gstreamer.cpp
+++ b/modules/videoio/src/cap_gstreamer.cpp
@@ -2825,8 +2825,6 @@ CvResult CV_API_CALL cv_capture_open_with_params(
     if (!handle)
         return CV_ERROR_FAIL;
     *handle = NULL;
-    if (!filename)
-        return CV_ERROR_FAIL;
     GStreamerCapture *cap = 0;
     try
     {

From 82de5b3a67a961a52e6f5a6e58d9e8ec8264b7eb Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Thu, 10 Aug 2023 22:43:46 +0200
Subject: [PATCH 12/57] Fix GNU/Hurd build

It has the usual Unix filesystem operations.
---
 .../core/include/opencv2/core/utils/filesystem.private.hpp  | 3 ++-
 modules/core/src/utils/filesystem.cpp                       | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/core/include/opencv2/core/utils/filesystem.private.hpp b/modules/core/include/opencv2/core/utils/filesystem.private.hpp
index c32be15c61..70df64f0d4 100644
--- a/modules/core/include/opencv2/core/utils/filesystem.private.hpp
+++ b/modules/core/include/opencv2/core/utils/filesystem.private.hpp
@@ -12,7 +12,8 @@
 #  elif defined WINRT || defined _WIN32_WCE
      /* not supported */
 #  elif defined __ANDROID__ || defined __linux__ || defined _WIN32 || \
-        defined __FreeBSD__ || defined __bsdi__ || defined __HAIKU__
+        defined __FreeBSD__ || defined __bsdi__ || defined __HAIKU__ || \
+        defined __GNU__
 #      define OPENCV_HAVE_FILESYSTEM_SUPPORT 1
 #  elif defined(__APPLE__)
 #    include <TargetConditionals.h>
diff --git a/modules/core/src/utils/filesystem.cpp b/modules/core/src/utils/filesystem.cpp
index 415323490d..24f69ccb59 100644
--- a/modules/core/src/utils/filesystem.cpp
+++ b/modules/core/src/utils/filesystem.cpp
@@ -34,7 +34,7 @@
 #include <errno.h>
 #include <io.h>
 #include <stdio.h>
-#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__
+#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -343,7 +343,7 @@ private:
     Impl& operator=(const Impl&); // disabled
 };
 
-#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__
+#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__
 
 struct FileLock::Impl
 {
@@ -457,7 +457,7 @@ cv::String getCacheDirectory(const char* sub_directory_name, const char* configu
             default_cache_path = "/tmp/";
             CV_LOG_WARNING(NULL, "Using world accessible cache directory. This may be not secure: " << default_cache_path);
         }
-#elif defined __linux__ || defined __HAIKU__ || defined __FreeBSD__
+#elif defined __linux__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__
         // https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html
         if (default_cache_path.empty())
         {

From 0dd7769bb12b518ff2d54ee6aace74db01688116 Mon Sep 17 00:00:00 2001
From: HAN Liutong <liutong2020@iscas.ac.cn>
Date: Fri, 11 Aug 2023 13:33:33 +0800
Subject: [PATCH 13/57] Merge pull request #23980 from hanliutong:rewrite-core

Rewrite Universal Intrinsic code by using new API: Core module. #23980

The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API.

The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885.

Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are:

1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited
- ./modules/core/src/stat.simd.hpp
- ./modules/core/src/matrix_transform.cpp
- ./modules/core/src/matmul.simd.hpp

2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly.
- ./modules/core/src/mathfuncs_core.simd.hpp
```cpp
struct v_atan_f32
{
    explicit v_atan_f32(const float& scale)
    {
...
    }

    v_float32 compute(const v_float32& y, const v_float32& x)
    {
...
    }

...
    v_float32 val90; // sizeless type can not used in a class
    v_float32 val180;
    v_float32 val360;
    v_float32 s;
};
```

3. The API interface does not support/does not match

- ./modules/core/src/norm.cpp
Use `v_popcount`, ~~waiting for #23966~~ Fixed
- ./modules/core/src/has_non_zero.simd.hpp
Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed

```cpp
/** @brief Bitwise OR

Only for integer types. */
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
template<typename _Tp, int n> CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b);
```

```cpp
#if CV_SIMD
    typedef v_float32 v_type;
    const v_type v_zero = vx_setzero_f32();
    constexpr const int unrollCount = 8;
    int step = v_type::nlanes * unrollCount;
    int len0 = len & -step;
    const float* srcSimdEnd = src+len0;

    int countSIMD = static_cast<int>((srcSimdEnd-src)/step);
    while(!res && countSIMD--)
    {
        v_type v0 = vx_load(src);
        src += v_type::nlanes;
        v_type v1 = vx_load(src);
        src += v_type::nlanes;
....
        src += v_type::nlanes;
        v0 |= v1; //Illegal ?
....
        //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ
        res = !v_check_all(((v0 | v4) == v_zero));
    }

    v_cleanup();
#endif
```

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../opencv2/core/hal/intrin_rvv_scalable.hpp  |   3 +
 modules/core/src/arithm.cpp                   |  52 +++----
 modules/core/src/arithm.simd.hpp              | 119 +++++++--------
 modules/core/src/convert.hpp                  |  54 +++----
 modules/core/src/convert.simd.hpp             |  16 +-
 modules/core/src/convert_scale.simd.hpp       |  30 ++--
 modules/core/src/copy.cpp                     |  18 +--
 modules/core/src/count_non_zero.simd.hpp      |  72 +++++----
 modules/core/src/lapack.cpp                   |  88 +++--------
 modules/core/src/mathfuncs.cpp                | 138 +++++++++---------
 modules/core/src/matmul.simd.hpp              | 128 ++++++++--------
 modules/core/src/matrix_transform.cpp         |  10 +-
 modules/core/src/merge.simd.hpp               |  20 +--
 modules/core/src/norm.cpp                     |  66 ++++-----
 modules/core/src/split.simd.hpp               |  20 +--
 modules/core/src/stat.simd.hpp                |  26 +---
 modules/core/src/sum.simd.hpp                 | 124 ++++++++--------
 17 files changed, 466 insertions(+), 518 deletions(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
index dab82489f8..6c28b44f5b 100644
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
@@ -924,6 +924,9 @@ inline scalartype v_reduce_sum(const _Tpvec& a)  \
     return (scalartype)v_get0(res); \
 }
 OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32, v_float32, vfloat32m1_t, float, f32, VTraits<v_float32>::vlanes())
+#if CV_SIMD_SCALABLE_64F
+OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64, v_float64, vfloat64m1_t, float, f64, VTraits<v_float64>::vlanes())
+#endif
 
 #define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \
 inline scalartype v_reduce_##func(const _Tpvec& a)  \
diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp
index 5709ec12e4..c5e561e26e 100644
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@@ -1332,7 +1332,7 @@ struct InRange_SIMD
     }
 };
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 template <>
 struct InRange_SIMD<uchar>
@@ -1341,7 +1341,7 @@ struct InRange_SIMD<uchar>
         uchar * dst, int len) const
     {
         int x = 0;
-        const int width = v_uint8::nlanes;
+        const int width = VTraits<v_uint8>::vlanes();
 
         for (; x <= len - width; x += width)
         {
@@ -1349,7 +1349,7 @@ struct InRange_SIMD<uchar>
             v_uint8 low = vx_load(src2 + x);
             v_uint8 high = vx_load(src3 + x);
 
-            v_store(dst + x, (values >= low) & (high >= values));
+            v_store(dst + x, v_and(v_ge(values, low), v_ge(high, values)));
         }
         vx_cleanup();
         return x;
@@ -1363,7 +1363,7 @@ struct InRange_SIMD<schar>
         uchar * dst, int len) const
     {
         int x = 0;
-        const int width = v_int8::nlanes;
+        const int width = VTraits<v_int8>::vlanes();
 
         for (; x <= len - width; x += width)
         {
@@ -1371,7 +1371,7 @@ struct InRange_SIMD<schar>
             v_int8 low = vx_load(src2 + x);
             v_int8 high = vx_load(src3 + x);
 
-            v_store((schar*)(dst + x), (values >= low) & (high >= values));
+            v_store((schar*)(dst + x), v_and(v_ge(values, low), v_ge(high, values)));
         }
         vx_cleanup();
         return x;
@@ -1385,7 +1385,7 @@ struct InRange_SIMD<ushort>
         uchar * dst, int len) const
     {
         int x = 0;
-        const int width = v_uint16::nlanes * 2;
+        const int width = VTraits<v_uint16>::vlanes() * 2;
 
         for (; x <= len - width; x += width)
         {
@@ -1393,11 +1393,11 @@ struct InRange_SIMD<ushort>
             v_uint16 low1 = vx_load(src2 + x);
             v_uint16 high1 = vx_load(src3 + x);
 
-            v_uint16 values2 = vx_load(src1 + x + v_uint16::nlanes);
-            v_uint16 low2 = vx_load(src2 + x + v_uint16::nlanes);
-            v_uint16 high2 = vx_load(src3 + x + v_uint16::nlanes);
+            v_uint16 values2 = vx_load(src1 + x + VTraits<v_uint16>::vlanes());
+            v_uint16 low2 = vx_load(src2 + x + VTraits<v_uint16>::vlanes());
+            v_uint16 high2 = vx_load(src3 + x + VTraits<v_uint16>::vlanes());
 
-            v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
+            v_store(dst + x, v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2))));
         }
         vx_cleanup();
         return x;
@@ -1411,7 +1411,7 @@ struct InRange_SIMD<short>
         uchar * dst, int len) const
     {
         int x = 0;
-        const int width = (int)v_int16::nlanes * 2;
+        const int width = (int)VTraits<v_int16>::vlanes() * 2;
 
         for (; x <= len - width; x += width)
         {
@@ -1419,11 +1419,11 @@ struct InRange_SIMD<short>
             v_int16 low1 = vx_load(src2 + x);
             v_int16 high1 = vx_load(src3 + x);
 
-            v_int16 values2 = vx_load(src1 + x + v_int16::nlanes);
-            v_int16 low2 = vx_load(src2 + x + v_int16::nlanes);
-            v_int16 high2 = vx_load(src3 + x + v_int16::nlanes);
+            v_int16 values2 = vx_load(src1 + x + VTraits<v_int16>::vlanes());
+            v_int16 low2 = vx_load(src2 + x + VTraits<v_int16>::vlanes());
+            v_int16 high2 = vx_load(src3 + x + VTraits<v_int16>::vlanes());
 
-            v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
+            v_store((schar*)(dst + x), v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2))));
         }
         vx_cleanup();
         return x;
@@ -1437,7 +1437,7 @@ struct InRange_SIMD<int>
         uchar * dst, int len) const
     {
         int x = 0;
-        const int width = (int)v_int32::nlanes * 2;
+        const int width = (int)VTraits<v_int32>::vlanes() * 2;
 
         for (; x <= len - width; x += width)
         {
@@ -1445,11 +1445,11 @@ struct InRange_SIMD<int>
             v_int32 low1 = vx_load(src2 + x);
             v_int32 high1 = vx_load(src3 + x);
 
-            v_int32 values2 = vx_load(src1 + x + v_int32::nlanes);
-            v_int32 low2 = vx_load(src2 + x + v_int32::nlanes);
-            v_int32 high2 = vx_load(src3 + x + v_int32::nlanes);
+            v_int32 values2 = vx_load(src1 + x + VTraits<v_int32>::vlanes());
+            v_int32 low2 = vx_load(src2 + x + VTraits<v_int32>::vlanes());
+            v_int32 high2 = vx_load(src3 + x + VTraits<v_int32>::vlanes());
 
-            v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))));
+            v_pack_store(dst + x, v_reinterpret_as_u16(v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2)))));
         }
         vx_cleanup();
         return x;
@@ -1463,7 +1463,7 @@ struct InRange_SIMD<float>
         uchar * dst, int len) const
     {
         int x = 0;
-        const int width = (int)v_float32::nlanes * 2;
+        const int width = (int)VTraits<v_float32>::vlanes() * 2;
 
         for (; x <= len - width; x += width)
         {
@@ -1471,12 +1471,12 @@ struct InRange_SIMD<float>
             v_float32 low1 = vx_load(src2 + x);
             v_float32 high1 = vx_load(src3 + x);
 
-            v_float32 values2 = vx_load(src1 + x + v_float32::nlanes);
-            v_float32 low2 = vx_load(src2 + x + v_float32::nlanes);
-            v_float32 high2 = vx_load(src3 + x + v_float32::nlanes);
+            v_float32 values2 = vx_load(src1 + x + VTraits<v_float32>::vlanes());
+            v_float32 low2 = vx_load(src2 + x + VTraits<v_float32>::vlanes());
+            v_float32 high2 = vx_load(src3 + x + VTraits<v_float32>::vlanes());
 
-            v_pack_store(dst + x, v_pack(v_reinterpret_as_u32(values1 >= low1) & v_reinterpret_as_u32(high1 >= values1),
-                                         v_reinterpret_as_u32(values2 >= low2) & v_reinterpret_as_u32(high2 >= values2)));
+            v_pack_store(dst + x, v_pack(v_and(v_reinterpret_as_u32(v_ge(values1, low1)), v_reinterpret_as_u32(v_ge(high1, values1))),
+                                         v_and(v_reinterpret_as_u32(v_ge(values2, low2)), v_reinterpret_as_u32(v_ge(high2, values2)))));
         }
         vx_cleanup();
         return x;
diff --git a/modules/core/src/arithm.simd.hpp b/modules/core/src/arithm.simd.hpp
index 06ebfb7678..1c97e91fbe 100644
--- a/modules/core/src/arithm.simd.hpp
+++ b/modules/core/src/arithm.simd.hpp
@@ -219,7 +219,7 @@ template<typename T1, typename Tvec>
 struct op_add
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a + b; }
+    { return v_add(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return c_add(a, b); }
 };
@@ -229,7 +229,7 @@ template<typename T1, typename Tvec>
 struct op_sub
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a - b; }
+    { return v_sub(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return c_sub(a, b); }
 };
@@ -266,7 +266,7 @@ struct op_absdiff
 template<>
 struct op_absdiff<schar, v_int8>
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_int8 r(const v_int8& a, const v_int8& b)
     { return v_absdiffs(a, b); }
 #endif
@@ -276,7 +276,7 @@ struct op_absdiff<schar, v_int8>
 template<>
 struct op_absdiff<short, v_int16>
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_int16 r(const v_int16& a, const v_int16& b)
     { return v_absdiffs(a, b); }
 #endif
@@ -286,7 +286,7 @@ struct op_absdiff<short, v_int16>
 template<>
 struct op_absdiff<int, v_int32>
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_int32 r(const v_int32& a, const v_int32& b)
     { return v_reinterpret_as_s32(v_absdiff(a, b)); }
 #endif
@@ -299,7 +299,7 @@ template<typename T1, typename Tvec>
 struct op_or
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a | b; }
+    { return v_or(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return a | b; }
 };
@@ -307,7 +307,7 @@ template<typename T1, typename Tvec>
 struct op_xor
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a ^ b; }
+    { return v_xor(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return a ^ b; }
 };
@@ -315,7 +315,7 @@ template<typename T1, typename Tvec>
 struct op_and
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a & b; }
+    { return v_and(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return a & b; }
 };
@@ -324,14 +324,14 @@ struct op_not
 {
     // ignored b from loader level
     static inline Tvec r(const Tvec& a)
-    { return ~a; }
+    { return v_not(a); }
     static inline T1 r(T1 a, T1)
     { return ~a; }
 };
 
 //////////////////////////// Loaders /////////////////////////////////
 
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
 
 template< template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
 struct bin_loader
@@ -396,13 +396,13 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height)
 {
     typedef OP<T1, Tvec> op;
-#if CV_SIMD
+#if CV_SIMD  || CV_SIMD_SCALABLE
     typedef bin_loader<OP, T1, Tvec> ldr;
-    enum {wide_step = Tvec::nlanes};
+    const int wide_step = VTraits<Tvec>::vlanes();
     #if !CV_NEON && CV_SIMD_WIDTH == 16
-        enum {wide_step_l = wide_step * 2};
+        const int wide_step_l = wide_step * 2;
     #else
-        enum {wide_step_l = wide_step};
+        const int wide_step_l = wide_step;
     #endif
 #endif // CV_SIMD
 
@@ -414,7 +414,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
     {
         int x = 0;
 
-    #if CV_SIMD
+    #if CV_SIMD || CV_SIMD_SCALABLE
         #if !CV_NEON && !CV_MSA
         if (is_aligned(src1, src2, dst))
         {
@@ -587,7 +587,7 @@ template<typename T1, typename Tvec>
 struct op_cmplt
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a < b; }
+    { return v_lt(a, b); }
     static inline uchar r(T1 a, T1 b)
     { return (uchar)-(int)(a < b); }
 };
@@ -596,7 +596,7 @@ template<typename T1, typename Tvec>
 struct op_cmple
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a <= b; }
+    { return v_le(a, b); }
     static inline uchar r(T1 a, T1 b)
     { return (uchar)-(int)(a <= b); }
 };
@@ -605,7 +605,7 @@ template<typename T1, typename Tvec>
 struct op_cmpeq
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a == b; }
+    { return v_eq(a, b); }
     static inline uchar r(T1 a, T1 b)
     { return (uchar)-(int)(a == b); }
 };
@@ -614,14 +614,14 @@ template<typename T1, typename Tvec>
 struct op_cmpne
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a != b; }
+    { return v_ne(a, b); }
     static inline uchar r(T1 a, T1 b)
     { return (uchar)-(int)(a != b); }
 };
 
 //////////////////////////// Loaders /////////////////////////////////
 
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
 // todo: add support for RW alignment & stream
 template<int nload, template<typename T1, typename Tvec> class OP, typename T1, typename Tvec>
 struct cmp_loader_n
@@ -646,10 +646,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 struct cmp_loader_n<sizeof(ushort), OP, T1, Tvec>
 {
     typedef OP<T1, Tvec> op;
-    enum {step = Tvec::nlanes};
 
     static inline void l(const T1* src1, const T1* src2, uchar* dst)
     {
+        const int step = VTraits<Tvec>::vlanes();
         Tvec c0 = op::r(vx_load(src1), vx_load(src2));
         Tvec c1 = op::r(vx_load(src1 + step), vx_load(src2 + step));
         v_store(dst, v_pack_b(v_reinterpret_as_u16(c0), v_reinterpret_as_u16(c1)));
@@ -660,10 +660,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 struct cmp_loader_n<sizeof(unsigned), OP, T1, Tvec>
 {
     typedef OP<T1, Tvec> op;
-    enum {step = Tvec::nlanes};
 
     static inline void l(const T1* src1, const T1* src2, uchar* dst)
     {
+        const int step = VTraits<Tvec>::vlanes();
         v_uint32 c0 = v_reinterpret_as_u32(op::r(vx_load(src1), vx_load(src2)));
         v_uint32 c1 = v_reinterpret_as_u32(op::r(vx_load(src1 + step), vx_load(src2 + step)));
         v_uint32 c2 = v_reinterpret_as_u32(op::r(vx_load(src1 + step * 2), vx_load(src2 + step * 2)));
@@ -676,10 +676,10 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 struct cmp_loader_n<sizeof(double), OP, T1, Tvec>
 {
     typedef OP<T1, Tvec> op;
-    enum {step = Tvec::nlanes};
 
     static inline void l(const T1* src1, const T1* src2, uchar* dst)
     {
+        const int step = VTraits<Tvec>::vlanes();
         v_uint64 c0 = v_reinterpret_as_u64(op::r(vx_load(src1), vx_load(src2)));
         v_uint64 c1 = v_reinterpret_as_u64(op::r(vx_load(src1 + step), vx_load(src2 + step)));
         v_uint64 c2 = v_reinterpret_as_u64(op::r(vx_load(src1 + step * 2), vx_load(src2 + step * 2)));
@@ -701,9 +701,9 @@ template<template<typename T1, typename Tvec> class OP, typename T1, typename Tv
 static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height)
 {
     typedef OP<T1, Tvec> op;
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     typedef cmp_loader_n<sizeof(T1), OP, T1, Tvec> ldr;
-    enum {wide_step = Tvec::nlanes * sizeof(T1)};
+    const int wide_step = VTraits<Tvec>::vlanes() * sizeof(T1);
 #endif // CV_SIMD
 
     step1 /= sizeof(T1);
@@ -713,7 +713,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2,
     {
         int x = 0;
 
-    #if CV_SIMD
+    #if CV_SIMD || CV_SIMD_SCALABLE
         for (; x <= width - wide_step; x += wide_step)
         {
             ldr::l(src1 + x, src2 + x, dst + x);
@@ -880,7 +880,7 @@ DEFINE_SIMD_ALL(cmp)
 
 //////////////////////////// Loaders ///////////////////////////////
 
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
 // todo: add support for RW alignment & stream
 template<int nload, template<typename T1, typename T2, typename Tvec> class OP, typename T1, typename T2, typename Tvec>
 struct scalar_loader_n
@@ -1013,10 +1013,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T2
 struct scalar_loader_n<sizeof(int), OP, int, T2, v_int32>
 {
     typedef OP<int, T2, v_int32> op;
-    enum {step = v_int32::nlanes};
 
     static inline void l(const int* src1, const int* src2, const T2* scalar, int* dst)
     {
+        const int step = VTraits<v_int32>::vlanes();
         v_int32 v_src1 = vx_load(src1);
         v_int32 v_src2 = vx_load(src2);
         v_int32 v_src1s = vx_load(src1 + step);
@@ -1043,6 +1043,7 @@ struct scalar_loader_n<sizeof(int), OP, int, T2, v_int32>
 
     static inline void l(const int* src1, const T2* scalar, int* dst)
     {
+        const int step = VTraits<v_int32>::vlanes();
         v_int32 v_src1 = vx_load(src1);
         v_int32 v_src1s = vx_load(src1 + step);
 
@@ -1068,10 +1069,9 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T2
 struct scalar_loader_n<sizeof(float), OP, float, T2, v_float32>
 {
     typedef OP<float, T2, v_float32> op;
-    enum {step = v_float32::nlanes};
-
     static inline void l(const float* src1, const float* src2, const T2* scalar, float* dst)
     {
+        const int step = VTraits<v_float32>::vlanes();
         v_float32 v_src1 = vx_load(src1);
         v_float32 v_src2 = vx_load(src2);
         v_float32 v_src1s = vx_load(src1 + step);
@@ -1086,6 +1086,7 @@ struct scalar_loader_n<sizeof(float), OP, float, T2, v_float32>
 
     static inline void l(const float* src1, const T2* scalar, float* dst)
     {
+        const int step = VTraits<v_float32>::vlanes();
         v_float32 v_src1 = vx_load(src1);
         v_float32 v_src1s = vx_load(src1 + step);
 
@@ -1262,10 +1263,10 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
                  T1* dst, size_t step, int width, int height, const T2* scalar)
 {
     typedef OP<T1, T2, Tvec> op;
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
-    const int wide_step = sizeof(T1) > sizeof(ushort) ? Tvec::nlanes * 2 :
-                          sizeof(T1) == sizeof(uchar) ? Tvec::nlanes / 2 : Tvec::nlanes;
+    const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
+                          sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
 #endif // CV_SIMD
 
     step1 /= sizeof(T1);
@@ -1276,7 +1277,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste
     {
         int x = 0;
 
-    #if CV_SIMD
+    #if CV_SIMD || CV_SIMD_SCALABLE
         for (; x <= width - wide_step; x += wide_step)
         {
             ldr::l(src1 + x, src2 + x, scalar, dst + x);
@@ -1308,10 +1309,10 @@ template<template<typename T1, typename T2, typename Tvec> class OP, typename T1
 static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int width, int height, const T2* scalar)
 {
     typedef OP<T1, T2, Tvec> op;
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     typedef scalar_loader_n<sizeof(T1), OP, T1, T2, Tvec> ldr;
-    const int wide_step = sizeof(T1) > sizeof(ushort) ? Tvec::nlanes * 2 :
-                          sizeof(T1) == sizeof(uchar) ? Tvec::nlanes / 2 : Tvec::nlanes;
+    const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits<Tvec>::vlanes() * 2 :
+                          sizeof(T1) == sizeof(uchar) ? VTraits<Tvec>::vlanes() / 2 : VTraits<Tvec>::vlanes();
 #endif // CV_SIMD
 
     step1 /= sizeof(T1);
@@ -1321,7 +1322,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int
     {
         int x = 0;
 
-    #if CV_SIMD
+    #if CV_SIMD || CV_SIMD_SCALABLE
         for (; x <= width - wide_step; x += wide_step)
         {
             ldr::l(src1 + x, scalar, dst + x);
@@ -1428,7 +1429,7 @@ template<typename T1, typename Tvec>
 struct op_mul
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a * b; }
+    { return v_mul(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return saturate_cast<T1>(a * b); }
 };
@@ -1436,11 +1437,11 @@ struct op_mul
 template<typename T1, typename T2, typename Tvec>
 struct op_mul_scale
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
-        return v_scalar * a * b;
+        return v_mul(v_scalar , a , b);
     }
 #endif
     static inline T1 r(T1 a, T1 b, const T2* scalar)
@@ -1456,7 +1457,7 @@ struct op_mul_scale<double, double, v_float64>
     static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
     {
         const v_float64 v_scalar = vx_setall_f64(*scalar);
-        return v_scalar * a * b;
+        return v_mul(v_mul(v_scalar, a), b);
     }
 #endif
     static inline double r(double a, double b, const double* scalar)
@@ -1569,7 +1570,7 @@ template<typename T1, typename Tvec>
 struct op_div_f
 {
     static inline Tvec r(const Tvec& a, const Tvec& b)
-    { return a / b; }
+    { return v_div(a, b); }
     static inline T1 r(T1 a, T1 b)
     { return a / b; }
 };
@@ -1577,16 +1578,16 @@ struct op_div_f
 template<typename T1, typename T2, typename Tvec>
 struct op_div_scale
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
-        return a * v_scalar / b;
+        return v_div(v_mul(a, v_scalar), b);
     }
     static inline Tvec pre(const Tvec& denom, const Tvec& res)
     {
-        const Tvec v_zero = vx_setall<typename Tvec::lane_type>(0);
-        return v_select(denom == v_zero, v_zero, res);
+        const Tvec v_zero = vx_setall<typename VTraits<Tvec>::lane_type>(0);
+        return v_select(v_eq(denom, v_zero), v_zero, res);
     }
 #endif
     static inline T1 r(T1 a, T1 denom, const T2* scalar)
@@ -1599,11 +1600,11 @@ struct op_div_scale
 template<>
 struct op_div_scale<float, float, v_float32>
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const v_float32& b, const float* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
-        return a * v_scalar / b;
+        return v_div(v_mul(a, v_scalar), b);
     }
 #endif
     static inline float r(float a, float denom, const float* scalar)
@@ -1617,7 +1618,7 @@ struct op_div_scale<double, double, v_float64>
     static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar)
     {
         const v_float64 v_scalar = vx_setall_f64(*scalar);
-        return a * v_scalar / b;
+        return v_div(v_mul(a, v_scalar), b);
     }
 #endif
     static inline double r(double a, double denom, const double* scalar)
@@ -1685,7 +1686,7 @@ DEFINE_SIMD_ALL(div, div_loop)
 template<typename T1, typename T2, typename Tvec>
 struct op_add_scale
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar)
     {
         const v_float32 v_alpha = vx_setall_f32(*scalar);
@@ -1718,7 +1719,7 @@ struct op_add_scale<double, double, v_float64>
 template<typename T1, typename T2, typename Tvec>
 struct op_add_weighted
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalars)
     {
         const v_float32 v_alpha = vx_setall_f32(scalars[0]);
@@ -1835,16 +1836,16 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d)
 template<typename T1, typename T2, typename Tvec>
 struct op_recip
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const T2* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
-        return v_scalar / a;
+        return v_div(v_scalar, a);
     }
     static inline Tvec pre(const Tvec& denom, const Tvec& res)
     {
-        const Tvec v_zero = vx_setall<typename Tvec::lane_type>(0);
-        return v_select(denom == v_zero, v_zero, res);
+        const Tvec v_zero = vx_setall<typename VTraits<Tvec>::lane_type>(0);
+        return v_select(v_eq(denom, v_zero), v_zero, res);
     }
 #endif
     static inline T1 r(T1 denom, const T2* scalar)
@@ -1857,11 +1858,11 @@ struct op_recip
 template<>
 struct op_recip<float, float, v_float32>
 {
-#if CV_SIMD
+#if CV_SIMD || CV_SIMD_SCALABLE
     static inline v_float32 r(const v_float32& a, const float* scalar)
     {
         const v_float32 v_scalar = vx_setall_f32(*scalar);
-        return v_scalar / a;
+        return v_div(v_scalar, a);
     }
 #endif
     static inline float r(float denom, const float* scalar)
@@ -1875,7 +1876,7 @@ struct op_recip<double, double, v_float64>
     static inline v_float64 r(const v_float64& a, const double* scalar)
     {
         const v_float64 v_scalar = vx_setall_f64(*scalar);
-        return v_scalar / a;
+        return v_div(v_scalar, a);
     }
 #endif
     static inline double r(double denom, const double* scalar)
diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp
index 4b9ddbb413..c689276218 100644
--- a/modules/core/src/convert.hpp
+++ b/modules/core/src/convert.hpp
@@ -11,7 +11,7 @@
 namespace cv
 {
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 static inline void vx_load_as(const uchar* ptr, v_float32& a)
 { a = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(ptr))); }
@@ -62,7 +62,7 @@ static inline void vx_load_pair_as(const schar* ptr, v_uint16& a, v_uint16& b)
 }
 
 static inline void vx_load_pair_as(const ushort* ptr, v_uint16& a, v_uint16& b)
-{ a = vx_load(ptr); b = vx_load(ptr + v_uint16::nlanes); }
+{ a = vx_load(ptr); b = vx_load(ptr + VTraits<v_uint16>::vlanes()); }
 
 static inline void vx_load_pair_as(const uchar* ptr, v_int16& a, v_int16& b)
 {
@@ -76,7 +76,7 @@ static inline void vx_load_pair_as(const schar* ptr, v_int16& a, v_int16& b)
 { v_expand(vx_load(ptr), a, b); }
 
 static inline void vx_load_pair_as(const short* ptr, v_int16& a, v_int16& b)
-{ a = vx_load(ptr); b = vx_load(ptr + v_uint16::nlanes); }
+{ a = vx_load(ptr); b = vx_load(ptr + VTraits<v_uint16>::vlanes()); }
 
 static inline void vx_load_pair_as(const uchar* ptr, v_int32& a, v_int32& b)
 {
@@ -105,7 +105,7 @@ static inline void vx_load_pair_as(const short* ptr, v_int32& a, v_int32& b)
 static inline void vx_load_pair_as(const int* ptr, v_int32& a, v_int32& b)
 {
     a = vx_load(ptr);
-    b = vx_load(ptr + v_int32::nlanes);
+    b = vx_load(ptr + VTraits<v_int32>::vlanes());
 }
 
 static inline void vx_load_pair_as(const uchar* ptr, v_float32& a, v_float32& b)
@@ -142,18 +142,18 @@ static inline void vx_load_pair_as(const short* ptr, v_float32& a, v_float32& b)
 
 static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b)
 {
-    v_int32 ia = vx_load(ptr), ib = vx_load(ptr + v_int32::nlanes);
+    v_int32 ia = vx_load(ptr), ib = vx_load(ptr + VTraits<v_int32>::vlanes());
     a = v_cvt_f32(ia);
     b = v_cvt_f32(ib);
 }
 
 static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b)
-{ a = vx_load(ptr); b = vx_load(ptr + v_float32::nlanes); }
+{ a = vx_load(ptr); b = vx_load(ptr + VTraits<v_float32>::vlanes()); }
 
 static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b)
 {
     a = vx_load_expand(ptr);
-    b = vx_load_expand(ptr + v_float32::nlanes);
+    b = vx_load_expand(ptr + VTraits<v_float32>::vlanes());
 }
 
 static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b)
@@ -169,7 +169,7 @@ static inline void v_store_pair_as(schar* ptr, const v_uint16& a, const v_uint16
 }
 
 static inline void v_store_pair_as(ushort* ptr, const v_uint16& a, const v_uint16& b)
-{ v_store(ptr, a); v_store(ptr + v_uint16::nlanes, b); }
+{ v_store(ptr, a); v_store(ptr + VTraits<v_uint16>::vlanes(), b); }
 
 static inline void v_store_pair_as(uchar* ptr, const v_int16& a, const v_int16& b)
 { v_store(ptr, v_pack_u(a, b)); }
@@ -178,7 +178,7 @@ static inline void v_store_pair_as(schar* ptr, const v_int16& a, const v_int16&
 { v_store(ptr, v_pack(a, b)); }
 
 static inline void v_store_pair_as(short* ptr, const v_int16& a, const v_int16& b)
-{ v_store(ptr, a); v_store(ptr + v_int16::nlanes, b); }
+{ v_store(ptr, a); v_store(ptr + VTraits<v_int16>::vlanes(), b); }
 
 static inline void v_store_pair_as(uchar* ptr, const v_int32& a, const v_int32& b)
 { v_pack_u_store(ptr, v_pack(a, b)); }
@@ -195,7 +195,7 @@ static inline void v_store_pair_as(short* ptr, const v_int32& a, const v_int32&
 static inline void v_store_pair_as(int* ptr, const v_int32& a, const v_int32& b)
 {
     v_store(ptr, a);
-    v_store(ptr + v_int32::nlanes, b);
+    v_store(ptr + VTraits<v_int32>::vlanes(), b);
 }
 
 static inline void v_store_pair_as(uchar* ptr, const v_float32& a, const v_float32& b)
@@ -214,24 +214,24 @@ static inline void v_store_pair_as(int* ptr, const v_float32& a, const v_float32
 {
     v_int32 ia = v_round(a), ib = v_round(b);
     v_store(ptr, ia);
-    v_store(ptr + v_int32::nlanes, ib);
+    v_store(ptr + VTraits<v_int32>::vlanes(), ib);
 }
 
 static inline void v_store_pair_as(float* ptr, const v_float32& a, const v_float32& b)
-{ v_store(ptr, a); v_store(ptr + v_float32::nlanes, b); }
+{ v_store(ptr, a); v_store(ptr + VTraits<v_float32>::vlanes(), b); }
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 
 static inline void vx_load_as(const double* ptr, v_float32& a)
 {
-    v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes);
+    v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits<v_float64>::vlanes());
     a = v_cvt_f32(v0, v1);
 }
 
 static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b)
 {
-    v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes);
-    v_float64 v2 = vx_load(ptr + v_float64::nlanes*2), v3 = vx_load(ptr + v_float64::nlanes*3);
+    v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits<v_float64>::vlanes());
+    v_float64 v2 = vx_load(ptr + VTraits<v_float64>::vlanes()*2), v3 = vx_load(ptr + VTraits<v_float64>::vlanes()*3);
     v_int32 iv0 = v_round(v0), iv1 = v_round(v1);
     v_int32 iv2 = v_round(v2), iv3 = v_round(v3);
     a = v_combine_low(iv0, iv1);
@@ -240,8 +240,8 @@ static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b)
 
 static inline void vx_load_pair_as(const double* ptr, v_float32& a, v_float32& b)
 {
-    v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes);
-    v_float64 v2 = vx_load(ptr + v_float64::nlanes*2), v3 = vx_load(ptr + v_float64::nlanes*3);
+    v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits<v_float64>::vlanes());
+    v_float64 v2 = vx_load(ptr + VTraits<v_float64>::vlanes()*2), v3 = vx_load(ptr + VTraits<v_float64>::vlanes()*3);
     a = v_cvt_f32(v0, v1);
     b = v_cvt_f32(v2, v3);
 }
@@ -291,7 +291,7 @@ static inline void vx_load_pair_as(const float* ptr, v_float64& a, v_float64& b)
 static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b)
 {
     a = vx_load(ptr);
-    b = vx_load(ptr + v_float64::nlanes);
+    b = vx_load(ptr + VTraits<v_float64>::vlanes());
 }
 
 static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
@@ -305,7 +305,7 @@ static inline void v_store_as(double* ptr, const v_float32& a)
 {
     v_float64 fa0 = v_cvt_f64(a), fa1 = v_cvt_f64_high(a);
     v_store(ptr, fa0);
-    v_store(ptr + v_float64::nlanes, fa1);
+    v_store(ptr + VTraits<v_float64>::vlanes(), fa1);
 }
 
 static inline void v_store_pair_as(double* ptr, const v_int32& a, const v_int32& b)
@@ -314,9 +314,9 @@ static inline void v_store_pair_as(double* ptr, const v_int32& a, const v_int32&
     v_float64 fb0 = v_cvt_f64(b), fb1 = v_cvt_f64_high(b);
 
     v_store(ptr, fa0);
-    v_store(ptr + v_float64::nlanes, fa1);
-    v_store(ptr + v_float64::nlanes*2, fb0);
-    v_store(ptr + v_float64::nlanes*3, fb1);
+    v_store(ptr + VTraits<v_float64>::vlanes(), fa1);
+    v_store(ptr + VTraits<v_float64>::vlanes()*2, fb0);
+    v_store(ptr + VTraits<v_float64>::vlanes()*3, fb1);
 }
 
 static inline void v_store_pair_as(double* ptr, const v_float32& a, const v_float32& b)
@@ -325,15 +325,15 @@ static inline void v_store_pair_as(double* ptr, const v_float32& a, const v_floa
     v_float64 fb0 = v_cvt_f64(b), fb1 = v_cvt_f64_high(b);
 
     v_store(ptr, fa0);
-    v_store(ptr + v_float64::nlanes, fa1);
-    v_store(ptr + v_float64::nlanes*2, fb0);
-    v_store(ptr + v_float64::nlanes*3, fb1);
+    v_store(ptr + VTraits<v_float64>::vlanes(), fa1);
+    v_store(ptr + VTraits<v_float64>::vlanes()*2, fb0);
+    v_store(ptr + VTraits<v_float64>::vlanes()*3, fb1);
 }
 
 static inline void v_store_pair_as(double* ptr, const v_float64& a, const v_float64& b)
 {
     v_store(ptr, a);
-    v_store(ptr + v_float64::nlanes, b);
+    v_store(ptr + VTraits<v_float64>::vlanes(), b);
 }
 
 static inline void v_store_pair_as(int* ptr, const v_float64& a, const v_float64& b)
diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp
index 5154041b6d..c126450a13 100644
--- a/modules/core/src/convert.simd.hpp
+++ b/modules/core/src/convert.simd.hpp
@@ -39,8 +39,8 @@ void cvt16f32f( const float16_t* src, float* dst, int len )
 {
     CV_INSTRUMENT_REGION();
     int j = 0;
-#if CV_SIMD
-    const int VECSZ = v_float32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    const int VECSZ = VTraits<v_float32>::vlanes();
     for( ; j < len; j += VECSZ )
     {
         if( j > len - VECSZ )
@@ -60,8 +60,8 @@ void cvt32f16f( const float* src, float16_t* dst, int len )
 {
     CV_INSTRUMENT_REGION();
     int j = 0;
-#if CV_SIMD
-    const int VECSZ = v_float32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    const int VECSZ = VTraits<v_float32>::vlanes();
     for( ; j < len; j += VECSZ )
     {
         if( j > len - VECSZ )
@@ -108,8 +108,8 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD
-        const int VECSZ = _Twvec::nlanes*2;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        const int VECSZ = VTraits<_Twvec>::vlanes()*2;
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
@@ -139,8 +139,8 @@ cvt1_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD
-        const int VECSZ = _Twvec::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        const int VECSZ = VTraits<_Twvec>::vlanes();
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
diff --git a/modules/core/src/convert_scale.simd.hpp b/modules/core/src/convert_scale.simd.hpp
index 2c6d55462b..c79a33f1b1 100644
--- a/modules/core/src/convert_scale.simd.hpp
+++ b/modules/core/src/convert_scale.simd.hpp
@@ -22,9 +22,9 @@ template<typename _Ts, typename _Td> inline void
 cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
             Size size, float a, float b )
 {
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
-    const int VECSZ = v_float32::nlanes*2;
+    const int VECSZ = VTraits<v_float32>::vlanes()*2;
 #endif
     sstep /= sizeof(src[0]);
     dstep /= sizeof(dst[0]);
@@ -32,7 +32,7 @@ cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
@@ -58,9 +58,9 @@ template<typename _Ts, typename _Td> inline void
 cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
              Size size, float a, float b )
 {
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
-    const int VECSZ = v_float32::nlanes*2;
+    const int VECSZ = VTraits<v_float32>::vlanes()*2;
 #endif
     sstep /= sizeof(src[0]);
     dstep /= sizeof(dst[0]);
@@ -68,7 +68,7 @@ cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
@@ -92,9 +92,9 @@ template<typename _Ts, typename _Td> inline void
 cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
          Size size, float a, float b )
 {
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
-    const int VECSZ = v_float32::nlanes*2;
+    const int VECSZ = VTraits<v_float32>::vlanes()*2;
 #endif
     sstep /= sizeof(src[0]);
     dstep /= sizeof(dst[0]);
@@ -102,7 +102,7 @@ cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
@@ -128,9 +128,9 @@ template<typename _Ts, typename _Td> inline void
 cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
           Size size, float a, float b )
 {
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
-    const int VECSZ = v_float32::nlanes;
+    const int VECSZ = VTraits<v_float32>::vlanes();
 #endif
     sstep /= sizeof(src[0]);
     dstep /= sizeof(dst[0]);
@@ -138,7 +138,7 @@ cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
@@ -163,9 +163,9 @@ template<typename _Ts, typename _Td> inline void
 cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
          Size size, double a, double b )
 {
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     v_float64 va = vx_setall_f64(a), vb = vx_setall_f64(b);
-    const int VECSZ = v_float64::nlanes*2;
+    const int VECSZ = VTraits<v_float64>::vlanes()*2;
 #endif
     sstep /= sizeof(src[0]);
     dstep /= sizeof(dst[0]);
@@ -173,7 +173,7 @@ cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
     {
         int j = 0;
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
         for( ; j < size.width; j += VECSZ )
         {
             if( j > size.width - VECSZ )
diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp
index 89948fb878..aab4fbd3f0 100644
--- a/modules/core/src/copy.cpp
+++ b/modules/core/src/copy.cpp
@@ -156,15 +156,15 @@ copyMask_<uchar>(const uchar* _src, size_t sstep, const uchar* mask, size_t mste
         const uchar* src = (const uchar*)_src;
         uchar* dst = (uchar*)_dst;
         int x = 0;
-        #if CV_SIMD
+        #if (CV_SIMD || CV_SIMD_SCALABLE)
         {
             v_uint8 v_zero = vx_setzero_u8();
 
-            for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes )
+            for( ; x <= size.width - VTraits<v_uint8>::vlanes(); x += VTraits<v_uint8>::vlanes() )
             {
                 v_uint8 v_src   = vx_load(src  + x),
                         v_dst   = vx_load(dst  + x),
-                        v_nmask = vx_load(mask + x) == v_zero;
+                        v_nmask = v_eq(vx_load(mask + x), v_zero);
 
                 v_dst = v_select(v_nmask, v_dst, v_src);
                 v_store(dst + x, v_dst);
@@ -188,23 +188,23 @@ copyMask_<ushort>(const uchar* _src, size_t sstep, const uchar* mask, size_t mst
         const ushort* src = (const ushort*)_src;
         ushort* dst = (ushort*)_dst;
         int x = 0;
-        #if CV_SIMD
+        #if (CV_SIMD || CV_SIMD_SCALABLE)
         {
             v_uint8 v_zero = vx_setzero_u8();
 
-            for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes )
+            for( ; x <= size.width - VTraits<v_uint8>::vlanes(); x += VTraits<v_uint8>::vlanes() )
             {
-                v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + v_uint16::nlanes),
-                         v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + v_uint16::nlanes);
+                v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + VTraits<v_uint16>::vlanes()),
+                         v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + VTraits<v_uint16>::vlanes());
 
                 v_uint8 v_nmask1, v_nmask2;
-                v_uint8 v_nmask = vx_load(mask + x) == v_zero;
+                v_uint8 v_nmask = v_eq(vx_load(mask + x), v_zero);
                 v_zip(v_nmask, v_nmask, v_nmask1, v_nmask2);
 
                 v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1);
                 v_dst2 = v_select(v_reinterpret_as_u16(v_nmask2), v_dst2, v_src2);
                 v_store(dst + x, v_dst1);
-                v_store(dst + x + v_uint16::nlanes, v_dst2);
+                v_store(dst + x + VTraits<v_uint16>::vlanes(), v_dst2);
             }
         }
         vx_cleanup();
diff --git a/modules/core/src/count_non_zero.simd.hpp b/modules/core/src/count_non_zero.simd.hpp
index 6994564127..ce7c75aa54 100644
--- a/modules/core/src/count_non_zero.simd.hpp
+++ b/modules/core/src/count_non_zero.simd.hpp
@@ -32,8 +32,8 @@ static int countNonZero_(const T* src, int len )
 static int countNonZero8u( const uchar* src, int len )
 {
     int i=0, nz = 0;
-#if CV_SIMD
-    int len0 = len & -v_uint8::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_uint8>::vlanes();
     v_uint8 v_zero = vx_setzero_u8();
     v_uint8 v_one = vx_setall_u8(1);
 
@@ -42,20 +42,20 @@ static int countNonZero8u( const uchar* src, int len )
     {
         v_uint16 v_sum16 = vx_setzero_u16();
         int j = i;
-        while (j < std::min(len0, i + 65280 * v_uint16::nlanes))
+        while (j < std::min(len0, i + 65280 * VTraits<v_uint16>::vlanes()))
         {
             v_uint8 v_sum8 = vx_setzero_u8();
             int k = j;
-            for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes)
-                v_sum8 += v_one & (vx_load(src + k) == v_zero);
+            for (; k < std::min(len0, j + 255 * VTraits<v_uint8>::vlanes()); k += VTraits<v_uint8>::vlanes())
+                v_sum8 = v_add(v_sum8, v_and(v_one, v_eq(vx_load(src + k), v_zero)));
             v_uint16 part1, part2;
             v_expand(v_sum8, part1, part2);
-            v_sum16 += part1 + part2;
+            v_sum16 = v_add(v_sum16, v_add(part1, part2));
             j = k;
         }
         v_uint32 part1, part2;
         v_expand(v_sum16, part1, part2);
-        v_sum32 += part1 + part2;
+        v_sum32 = v_add(v_sum32, v_add(part1, part2));
         i = j;
     }
     nz = i - v_reduce_sum(v_sum32);
@@ -69,8 +69,8 @@ static int countNonZero8u( const uchar* src, int len )
 static int countNonZero16u( const ushort* src, int len )
 {
     int i = 0, nz = 0;
-#if CV_SIMD
-    int len0 = len & -v_int8::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_int8>::vlanes();
     v_uint16 v_zero = vx_setzero_u16();
     v_int8 v_one = vx_setall_s8(1);
 
@@ -79,20 +79,20 @@ static int countNonZero16u( const ushort* src, int len )
     {
         v_int16 v_sum16 = vx_setzero_s16();
         int j = i;
-        while (j < std::min(len0, i + 32766 * v_int16::nlanes))
+        while (j < std::min(len0, i + 32766 * VTraits<v_int16>::vlanes()))
         {
             v_int8 v_sum8 = vx_setzero_s8();
             int k = j;
-            for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
-                v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero));
+            for (; k < std::min(len0, j + 127 * VTraits<v_int8>::vlanes()); k += VTraits<v_int8>::vlanes())
+                v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_reinterpret_as_s16(v_eq(vx_load(src + k), v_zero)), v_reinterpret_as_s16(v_eq(vx_load(src + k + VTraits<v_uint16>::vlanes()), v_zero)))));
             v_int16 part1, part2;
             v_expand(v_sum8, part1, part2);
-            v_sum16 += part1 + part2;
+            v_sum16 = v_add(v_sum16, v_add(part1, part2));
             j = k;
         }
         v_int32 part1, part2;
         v_expand(v_sum16, part1, part2);
-        v_sum32 += part1 + part2;
+        v_sum32 = v_add(v_sum32, v_add(part1, part2));
         i = j;
     }
     nz = i - v_reduce_sum(v_sum32);
@@ -104,8 +104,8 @@ static int countNonZero16u( const ushort* src, int len )
 static int countNonZero32s( const int* src, int len )
 {
     int i = 0, nz = 0;
-#if CV_SIMD
-    int len0 = len & -v_int8::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_int8>::vlanes();
     v_int32 v_zero = vx_setzero_s32();
     v_int8 v_one = vx_setall_s8(1);
 
@@ -114,23 +114,20 @@ static int countNonZero32s( const int* src, int len )
     {
         v_int16 v_sum16 = vx_setzero_s16();
         int j = i;
-        while (j < std::min(len0, i + 32766 * v_int16::nlanes))
+        while (j < std::min(len0, i + 32766 * VTraits<v_int16>::vlanes()))
         {
             v_int8 v_sum8 = vx_setzero_s8();
             int k = j;
-            for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
-                v_sum8 += v_one & v_pack(
-                    v_pack(vx_load(src + k                    ) == v_zero, vx_load(src + k +   v_int32::nlanes) == v_zero),
-                    v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero)
-                );
+            for (; k < std::min(len0, j + 127 * VTraits<v_int8>::vlanes()); k += VTraits<v_int8>::vlanes())
+                v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_pack(v_eq(vx_load(src + k), v_zero), v_eq(vx_load(src + k + VTraits<v_int32>::vlanes()), v_zero)), v_pack(v_eq(vx_load(src + k + 2 * VTraits<v_int32>::vlanes()), v_zero), v_eq(vx_load(src + k + 3 * VTraits<v_int32>::vlanes()), v_zero)))));
             v_int16 part1, part2;
             v_expand(v_sum8, part1, part2);
-            v_sum16 += part1 + part2;
+            v_sum16 = v_add(v_sum16, v_add(part1, part2));
             j = k;
         }
         v_int32 part1, part2;
         v_expand(v_sum16, part1, part2);
-        v_sum32 += part1 + part2;
+        v_sum32 = v_add(v_sum32, v_add(part1, part2));
         i = j;
     }
     nz = i - v_reduce_sum(v_sum32);
@@ -142,8 +139,8 @@ static int countNonZero32s( const int* src, int len )
 static int countNonZero32f( const float* src, int len )
 {
     int i = 0, nz = 0;
-#if CV_SIMD
-    int len0 = len & -v_int8::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_int8>::vlanes();
     v_float32 v_zero = vx_setzero_f32();
     v_int8 v_one = vx_setall_s8(1);
 
@@ -152,23 +149,20 @@ static int countNonZero32f( const float* src, int len )
     {
         v_int16 v_sum16 = vx_setzero_s16();
         int j = i;
-        while (j < std::min(len0, i + 32766 * v_int16::nlanes))
+        while (j < std::min(len0, i + 32766 * VTraits<v_int16>::vlanes()))
         {
             v_int8 v_sum8 = vx_setzero_s8();
             int k = j;
-            for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes)
-                v_sum8 += v_one & v_pack(
-                    v_pack(v_reinterpret_as_s32(vx_load(src + k                      ) == v_zero), v_reinterpret_as_s32(vx_load(src + k +   v_float32::nlanes) == v_zero)),
-                    v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero))
-                );
+            for (; k < std::min(len0, j + 127 * VTraits<v_int8>::vlanes()); k += VTraits<v_int8>::vlanes())
+                v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_pack(v_reinterpret_as_s32(v_eq(vx_load(src + k), v_zero)), v_reinterpret_as_s32(v_eq(vx_load(src + k + VTraits<v_float32>::vlanes()), v_zero))), v_pack(v_reinterpret_as_s32(v_eq(vx_load(src + k + 2 * VTraits<v_float32>::vlanes()), v_zero)), v_reinterpret_as_s32(v_eq(vx_load(src + k + 3 * VTraits<v_float32>::vlanes()), v_zero))))));
             v_int16 part1, part2;
             v_expand(v_sum8, part1, part2);
-            v_sum16 += part1 + part2;
+            v_sum16 = v_add(v_sum16, v_add(part1, part2));
             j = k;
         }
         v_int32 part1, part2;
         v_expand(v_sum16, part1, part2);
-        v_sum32 += part1 + part2;
+        v_sum32 = v_add(v_sum32, v_add(part1, part2));
         i = j;
     }
     nz = i - v_reduce_sum(v_sum32);
@@ -180,21 +174,21 @@ static int countNonZero32f( const float* src, int len )
 static int countNonZero64f( const double* src, int len )
 {
     int nz = 0, i = 0;
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     v_int64 sum1 = vx_setzero_s64();
     v_int64 sum2 = vx_setzero_s64();
     v_float64 zero = vx_setzero_f64();
-    int step = v_float64::nlanes * 2;
+    int step = VTraits<v_float64>::vlanes() * 2;
     int len0 = len & -step;
 
     for(i = 0; i < len0; i += step )
         {
-        sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero);
-        sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero);
+        sum1 = v_add(sum1, v_reinterpret_as_s64(v_eq(vx_load(&src[i]), zero)));
+        sum2 = v_add(sum2, v_reinterpret_as_s64(v_eq(vx_load(&src[i + step / 2]), zero)));
         }
 
     // N.B the value is incremented by -1 (0xF...F) for each value
-    nz = i + (int)v_reduce_sum(sum1 + sum2);
+    nz = i + (int)v_reduce_sum(v_add(sum1, sum2));
     v_cleanup();
 #endif
     return nz + countNonZero_(src + i, len - i);
diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp
index a644fe15a7..43c6d07d58 100644
--- a/modules/core/src/lapack.cpp
+++ b/modules/core/src/lapack.cpp
@@ -274,22 +274,21 @@ template<typename T> struct VBLAS
 {
     int dot(const T*, const T*, int, T*) const { return 0; }
     int givens(T*, T*, int, T, T) const { return 0; }
-    int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
 };
 
-#if CV_SIMD
+#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE_64F
 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
 {
-    if( n < 2*v_float32::nlanes )
+    if( n < 2*VTraits<v_float32>::vlanes() )
         return 0;
     int k = 0;
     v_float32 s0 = vx_setzero_f32();
-    for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
+    for( ; k <= n - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
     {
         v_float32 a0 = vx_load(a + k);
         v_float32 b0 = vx_load(b + k);
 
-        s0 += a0 * b0;
+        s0 = v_add(s0, v_mul(a0, b0));
     }
     *result = v_reduce_sum(s0);
     vx_cleanup();
@@ -299,16 +298,16 @@ template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, f
 
 template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
 {
-    if( n < v_float32::nlanes)
+    if( n < VTraits<v_float32>::vlanes())
         return 0;
     int k = 0;
     v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
-    for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
+    for( ; k <= n - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
     {
         v_float32 a0 = vx_load(a + k);
         v_float32 b0 = vx_load(b + k);
-        v_float32 t0 = (a0 * c4) + (b0 * s4);
-        v_float32 t1 = (b0 * c4) - (a0 * s4);
+        v_float32 t0 = v_add(v_mul(a0, c4), v_mul(b0, s4));
+        v_float32 t1 = v_sub(v_mul(b0, c4), v_mul(a0, s4));
         v_store(a + k, t0);
         v_store(b + k, t1);
     }
@@ -317,44 +316,19 @@ template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, f
 }
 
 
-template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
-                                             float* anorm, float* bnorm) const
-{
-    if( n < v_float32::nlanes)
-        return 0;
-    int k = 0;
-    v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s);
-    v_float32 sa = vx_setzero_f32(), sb = vx_setzero_f32();
-    for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes )
-    {
-        v_float32 a0 = vx_load(a + k);
-        v_float32 b0 = vx_load(b + k);
-        v_float32 t0 = (a0 * c4) + (b0 * s4);
-        v_float32 t1 = (b0 * c4) - (a0 * s4);
-        v_store(a + k, t0);
-        v_store(b + k, t1);
-        sa += t0 + t0;
-        sb += t1 + t1;
-    }
-    *anorm = v_reduce_sum(sa);
-    *bnorm = v_reduce_sum(sb);
-    vx_cleanup();
-    return k;
-}
-
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
 {
-    if( n < 2*v_float64::nlanes )
+    if( n < 2*VTraits<v_float64>::vlanes() )
         return 0;
     int k = 0;
     v_float64 s0 = vx_setzero_f64();
-    for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
+    for( ; k <= n - VTraits<v_float64>::vlanes(); k += VTraits<v_float64>::vlanes() )
     {
         v_float64 a0 = vx_load(a + k);
         v_float64 b0 = vx_load(b + k);
 
-        s0 += a0 * b0;
+        s0 = v_add(s0, v_mul(a0, b0));
     }
     double sbuf[2];
     v_store(sbuf, s0);
@@ -368,12 +342,12 @@ template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double
 {
     int k = 0;
     v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
-    for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
+    for( ; k <= n - VTraits<v_float64>::vlanes(); k += VTraits<v_float64>::vlanes() )
     {
         v_float64 a0 = vx_load(a + k);
         v_float64 b0 = vx_load(b + k);
-        v_float64 t0 = (a0 * c2) + (b0 * s2);
-        v_float64 t1 = (b0 * c2) - (a0 * s2);
+        v_float64 t0 = v_add(v_mul(a0, c2), v_mul(b0, s2));
+        v_float64 t1 = v_sub(v_mul(b0, c2), v_mul(a0, s2));
         v_store(a + k, t0);
         v_store(b + k, t1);
     }
@@ -382,30 +356,6 @@ template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double
 }
 
 
-template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s,
-                                              double* anorm, double* bnorm) const
-{
-    int k = 0;
-    v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s);
-    v_float64 sa = vx_setzero_f64(), sb = vx_setzero_f64();
-    for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes )
-    {
-        v_float64 a0 = vx_load(a + k);
-        v_float64 b0 = vx_load(b + k);
-        v_float64 t0 = (a0 * c2) + (b0 * s2);
-        v_float64 t1 = (b0 * c2) - (a0 * s2);
-        v_store(a + k, t0);
-        v_store(b + k, t1);
-        sa += t0 * t0;
-        sb += t1 * t1;
-    }
-    double abuf[2], bbuf[2];
-    v_store(abuf, sa);
-    v_store(bbuf, sb);
-    *anorm = abuf[0] + abuf[1];
-    *bnorm = bbuf[0] + bbuf[1];
-    return k;
-}
 #endif //CV_SIMD_64F
 #endif //CV_SIMD
 
@@ -916,7 +866,7 @@ double invert( InputArray _src, OutputArray _dst, int method )
                 #if CV_SIMD128
                     const float d_32f = (float)d;
                     const v_float32x4 d_vec(d_32f, -d_32f, -d_32f, d_32f);
-                    v_float32x4 s0 = v_load_halves((const float*)srcdata, (const float*)(srcdata + srcstep)) * d_vec;//0123//3120
+                    v_float32x4 s0 = v_mul(v_load_halves((const float *)srcdata, (const float *)(srcdata + srcstep)), d_vec);//0123//3120
                     s0 = v_extract<3>(s0, v_combine_low(v_rotate_right<1>(s0), s0));
                     v_store_low((float*)dstdata, s0);
                     v_store_high((float*)(dstdata + dststep), s0);
@@ -942,10 +892,10 @@ double invert( InputArray _src, OutputArray _dst, int method )
                     d = 1./d;
                 #if CV_SIMD128_64F
                     v_float64x2 det = v_setall_f64(d);
-                    v_float64x2 s0 = v_load((const double*)srcdata) * det;
-                    v_float64x2 s1 = v_load((const double*)(srcdata+srcstep)) * det;
+                    v_float64x2 s0 = v_mul(v_load((const double *)srcdata), det);
+                    v_float64x2 s1 = v_mul(v_load((const double *)(srcdata + srcstep)), det);
                     v_float64x2 sm = v_extract<1>(s1, s0);//30
-                    v_float64x2 ss = v_setall<double>(0) - v_extract<1>(s0, s1);//12
+                    v_float64x2 ss = v_sub(v_setall<double>(0), v_extract<1>(s0, s1));//12
                     v_store((double*)dstdata, v_combine_low(sm, ss));//31
                     v_store((double*)(dstdata + dststep), v_combine_high(ss, sm));//20
                 #else
diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp
index 056be63a71..9e3a1dbad2 100644
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@@ -614,13 +614,13 @@ void polarToCart( InputArray src1, InputArray src2,
                 {
                     k = 0;
 
-#if CV_SIMD
-                    int cWidth = v_float32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+                    int cWidth = VTraits<v_float32>::vlanes();
                     for( ; k <= len - cWidth; k += cWidth )
                     {
                         v_float32 v_m = vx_load(mag + k);
-                        v_store(x + k, vx_load(x + k) * v_m);
-                        v_store(y + k, vx_load(y + k) * v_m);
+                        v_store(x + k, v_mul(vx_load(x + k), v_m));
+                        v_store(y + k, v_mul(vx_load(y + k), v_m));
                     }
                     vx_cleanup();
 #endif
@@ -741,7 +741,7 @@ struct iPow_SIMD
     }
 };
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 template <>
 struct iPow_SIMD<uchar, int>
@@ -751,7 +751,7 @@ struct iPow_SIMD<uchar, int>
         int i = 0;
         v_uint32 v_1 = vx_setall_u32(1u);
 
-        for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes)
+        for ( ; i <= len - VTraits<v_uint16>::vlanes(); i += VTraits<v_uint16>::vlanes())
         {
             v_uint32 v_a1 = v_1, v_a2 = v_1;
             v_uint16 v = vx_load_expand(src + i);
@@ -763,16 +763,16 @@ struct iPow_SIMD<uchar, int>
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v = v_pack(v_a1, v_a2);
             v_pack_store(dst + i, v);
@@ -791,7 +791,7 @@ struct iPow_SIMD<schar, int>
         int i = 0;
         v_int32 v_1 = vx_setall_s32(1);
 
-        for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes)
+        for ( ; i <= len - VTraits<v_int16>::vlanes(); i += VTraits<v_int16>::vlanes())
         {
             v_int32 v_a1 = v_1, v_a2 = v_1;
             v_int16 v = vx_load_expand(src + i);
@@ -803,16 +803,16 @@ struct iPow_SIMD<schar, int>
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v = v_pack(v_a1, v_a2);
             v_pack_store(dst + i, v);
@@ -831,7 +831,7 @@ struct iPow_SIMD<ushort, int>
         int i = 0;
         v_uint32 v_1 = vx_setall_u32(1u);
 
-        for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes)
+        for ( ; i <= len - VTraits<v_uint16>::vlanes(); i += VTraits<v_uint16>::vlanes())
         {
             v_uint32 v_a1 = v_1, v_a2 = v_1;
             v_uint16 v = vx_load(src + i);
@@ -843,16 +843,16 @@ struct iPow_SIMD<ushort, int>
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v = v_pack(v_a1, v_a2);
             v_store(dst + i, v);
@@ -871,7 +871,7 @@ struct iPow_SIMD<short, int>
         int i = 0;
         v_int32 v_1 = vx_setall_s32(1);
 
-        for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes)
+        for ( ; i <= len - VTraits<v_int16>::vlanes(); i += VTraits<v_int16>::vlanes())
         {
             v_int32 v_a1 = v_1, v_a2 = v_1;
             v_int16 v = vx_load(src + i);
@@ -883,16 +883,16 @@ struct iPow_SIMD<short, int>
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v = v_pack(v_a1, v_a2);
             v_store(dst + i, v);
@@ -911,29 +911,29 @@ struct iPow_SIMD<int, int>
         int i = 0;
         v_int32 v_1 = vx_setall_s32(1);
 
-        for ( ; i <= len - v_int32::nlanes*2; i += v_int32::nlanes*2)
+        for ( ; i <= len - VTraits<v_int32>::vlanes()*2; i += VTraits<v_int32>::vlanes()*2)
         {
             v_int32 v_a1 = v_1, v_a2 = v_1;
-            v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_int32::nlanes);
+            v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits<v_int32>::vlanes());
             int p = power;
 
             while( p > 1 )
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v_store(dst + i, v_a1);
-            v_store(dst + i + v_int32::nlanes, v_a2);
+            v_store(dst + i + VTraits<v_int32>::vlanes(), v_a2);
         }
         vx_cleanup();
 
@@ -949,34 +949,34 @@ struct iPow_SIMD<float, float>
         int i = 0;
         v_float32 v_1 = vx_setall_f32(1.f);
 
-        for ( ; i <= len - v_float32::nlanes*2; i += v_float32::nlanes*2)
+        for ( ; i <= len - VTraits<v_float32>::vlanes()*2; i += VTraits<v_float32>::vlanes()*2)
         {
             v_float32 v_a1 = v_1, v_a2 = v_1;
-            v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float32::nlanes);
+            v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits<v_float32>::vlanes());
             int p = std::abs(power);
             if( power < 0 )
             {
-                v_b1 = v_1 / v_b1;
-                v_b2 = v_1 / v_b2;
+                v_b1 = v_div(v_1, v_b1);
+                v_b2 = v_div(v_1, v_b2);
             }
 
             while( p > 1 )
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v_store(dst + i, v_a1);
-            v_store(dst + i + v_float32::nlanes, v_a2);
+            v_store(dst + i + VTraits<v_float32>::vlanes(), v_a2);
         }
         vx_cleanup();
 
@@ -984,7 +984,7 @@ struct iPow_SIMD<float, float>
     }
 };
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 template <>
 struct iPow_SIMD<double, double>
 {
@@ -993,34 +993,34 @@ struct iPow_SIMD<double, double>
         int i = 0;
         v_float64 v_1 = vx_setall_f64(1.);
 
-        for ( ; i <= len - v_float64::nlanes*2; i += v_float64::nlanes*2)
+        for ( ; i <= len - VTraits<v_float64>::vlanes()*2; i += VTraits<v_float64>::vlanes()*2)
         {
             v_float64 v_a1 = v_1, v_a2 = v_1;
-            v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float64::nlanes);
+            v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits<v_float64>::vlanes());
             int p = std::abs(power);
             if( power < 0 )
             {
-                v_b1 = v_1 / v_b1;
-                v_b2 = v_1 / v_b2;
+                v_b1 = v_div(v_1, v_b1);
+                v_b2 = v_div(v_1, v_b2);
             }
 
             while( p > 1 )
             {
                 if (p & 1)
                 {
-                    v_a1 *= v_b1;
-                    v_a2 *= v_b2;
+                    v_a1 = v_mul(v_a1, v_b1);
+                    v_a2 = v_mul(v_a2, v_b2);
                 }
-                v_b1 *= v_b1;
-                v_b2 *= v_b2;
+                v_b1 = v_mul(v_b1, v_b1);
+                v_b2 = v_mul(v_b2, v_b2);
                 p >>= 1;
             }
 
-            v_a1 *= v_b1;
-            v_a2 *= v_b2;
+            v_a1 = v_mul(v_a1, v_b1);
+            v_a2 = v_mul(v_a2, v_b2);
 
             v_store(dst + i, v_a1);
-            v_store(dst + i + v_float64::nlanes, v_a2);
+            v_store(dst + i + VTraits<v_float64>::vlanes(), v_a2);
         }
         vx_cleanup();
 
@@ -1614,7 +1614,7 @@ void patchNaNs( InputOutputArray _a, double _val )
     Cv32suf val;
     val.f = (float)_val;
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000);
     v_int32 v_val = vx_setall_s32(val.i);
 #endif
@@ -1624,12 +1624,12 @@ void patchNaNs( InputOutputArray _a, double _val )
         int* tptr = ptrs[0];
         size_t j = 0;
 
-#if CV_SIMD
-        size_t cWidth = (size_t)v_int32::nlanes;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        size_t cWidth = (size_t)VTraits<v_int32>::vlanes();
         for ( ; j + cWidth <= len; j += cWidth)
         {
             v_int32 v_src = vx_load(tptr + j);
-            v_int32 v_cmp_mask = v_mask2 < (v_src & v_mask1);
+            v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1));
             v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src);
             v_store(tptr + j, v_dst);
         }
diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp
index 5a7f36d12b..62aacc0d63 100644
--- a/modules/core/src/matmul.simd.hpp
+++ b/modules/core/src/matmul.simd.hpp
@@ -1454,7 +1454,7 @@ transform_( const T* src, T* dst, const WT* m, int len, int scn, int dcn )
 static void
 transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, int dcn )
 {
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     const int BITS = 10, SCALE = 1 << BITS;
     const float MAX_M = (float)(1 << (15 - BITS));
 
@@ -1485,7 +1485,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in
         v_int32 m10 = vx_setall_s32(m32[4]);
         v_int32 m11 = vx_setall_s32(m32[5]);
         int x = 0;
-        for (; x <= (len - v_uint8::nlanes) * nChannels; x += v_uint8::nlanes * nChannels)
+        for (; x <= (len - VTraits<v_uint8>::vlanes()) * nChannels; x += VTraits<v_uint8>::vlanes() * nChannels)
         {
             v_uint8 b, g, r;
             v_load_deinterleave(src + x, b, g, r);
@@ -1499,20 +1499,20 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in
             v_int32 p1, p3;
             v_expand(bgl, p0, p2);
             v_expand(v_reinterpret_as_s16(rl), p1, p3);
-            dbl = v_rshr_pack<BITS>(v_dotprod(v_reinterpret_as_s16(p0), m01) + p1 *  m2 + m3,
-                                    v_dotprod(v_reinterpret_as_s16(p2), m01) + p3 *  m2 + m3);
-            dgl = v_rshr_pack<BITS>(v_dotprod(v_reinterpret_as_s16(p0), m45) + p1 *  m6 + m7,
-                                    v_dotprod(v_reinterpret_as_s16(p2), m45) + p3 *  m6 + m7);
-            drl = v_rshr_pack<BITS>(v_dotprod(v_reinterpret_as_s16(p0), m89) + p1 * m10 + m11,
-                                    v_dotprod(v_reinterpret_as_s16(p2), m89) + p3 * m10 + m11);
+            dbl = v_rshr_pack<BITS>(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m01), v_mul(p1, m2)), m3),
+                                    v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m01), v_mul(p3, m2)), m3));
+            dgl = v_rshr_pack<BITS>(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m45), v_mul(p1, m6)), m7),
+                                    v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m45), v_mul(p3, m6)), m7));
+            drl = v_rshr_pack<BITS>(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m89), v_mul(p1, m10)), m11),
+                                    v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m89), v_mul(p3, m10)), m11));
             v_expand(bgh, p0, p2);
             v_expand(v_reinterpret_as_s16(rh), p1, p3);
-            dbh = v_rshr_pack<BITS>(v_dotprod(v_reinterpret_as_s16(p0), m01) + p1 *  m2 + m3,
-                                    v_dotprod(v_reinterpret_as_s16(p2), m01) + p3 *  m2 + m3);
-            dgh = v_rshr_pack<BITS>(v_dotprod(v_reinterpret_as_s16(p0), m45) + p1 *  m6 + m7,
-                                    v_dotprod(v_reinterpret_as_s16(p2), m45) + p3 *  m6 + m7);
-            drh = v_rshr_pack<BITS>(v_dotprod(v_reinterpret_as_s16(p0), m89) + p1 * m10 + m11,
-                                    v_dotprod(v_reinterpret_as_s16(p2), m89) + p3 * m10 + m11);
+            dbh = v_rshr_pack<BITS>(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m01), v_mul(p1, m2)), m3),
+                                    v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m01), v_mul(p3, m2)), m3));
+            dgh = v_rshr_pack<BITS>(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m45), v_mul(p1, m6)), m7),
+                                    v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m45), v_mul(p3, m6)), m7));
+            drh = v_rshr_pack<BITS>(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m89), v_mul(p1, m10)), m11),
+                                    v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m89), v_mul(p3, m10)), m11));
             v_store_interleave(dst + x, v_pack_u(dbl, dbh), v_pack_u(dgl, dgh), v_pack_u(drl, drh));
         }
         m32[1] = saturate_cast<int>((m[3] + 0.5f)*SCALE);
@@ -1537,7 +1537,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in
 static void
 transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, int dcn )
 {
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     if( scn == 3 && dcn == 3 )
     {
         int x = 0;
@@ -1555,7 +1555,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn,
         v_float32 m10 = vx_setall_f32(m[10]);
         v_float32 m11 = vx_setall_f32(m[11] - 32768.f);
         v_int16 delta = vx_setall_s16(-32768);
-        for (; x <= (len - v_uint16::nlanes)*3; x += v_uint16::nlanes*3)
+        for (; x <= (len - VTraits<v_uint16>::vlanes())*3; x +=  VTraits<v_uint16>::vlanes()*3)
         {
             v_uint16 b, g, r;
             v_load_deinterleave(src + x, b, g, r);
@@ -1574,6 +1574,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn,
             v_store_interleave(dst + x, v_reinterpret_as_u16(db), v_reinterpret_as_u16(dg), v_reinterpret_as_u16(dr));
         }
 #endif
+#if CV_SIMD128
         v_float32x4 _m0l(m[0], m[4], m[ 8], 0.f);
         v_float32x4 _m1l(m[1], m[5], m[ 9], 0.f);
         v_float32x4 _m2l(m[2], m[6], m[10], 0.f);
@@ -1587,6 +1588,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn,
             v_store(dst + x, v_rotate_right<1>(v_reinterpret_as_u16(v_add_wrap(v_pack(
                              v_round(v_matmuladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand(src + x    ))), _m0h, _m1h, _m2h, _m3h)),
                              v_round(v_matmuladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand(src + x + 3))), _m0l, _m1l, _m2l, _m3l))), _delta))));
+#endif //CV_SIMD128
         for( ; x < len * 3; x += 3 )
         {
             float v0 = src[x], v1 = src[x + 1], v2 = src[x + 2];
@@ -1606,25 +1608,25 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn,
 static void
 transform_32f( const float* src, float* dst, const float* m, int len, int scn, int dcn )
 {
-#if CV_SIMD && !defined(__aarch64__) && !defined(_M_ARM64)
+#if (CV_SIMD || CV_SIMD_SCALABLE) && !defined(__aarch64__) && !defined(_M_ARM64)
     int x = 0;
     if( scn == 3 && dcn == 3 )
     {
-        int idx[v_float32::nlanes/2];
-        for( int i = 0; i < v_float32::nlanes/4; i++ )
+        int idx[VTraits<v_float32>::max_nlanes/2];
+        for( int i = 0; i < VTraits<v_float32>::vlanes()/4; i++ )
         {
             idx[i] = 3*i;
-            idx[i + v_float32::nlanes/4] = 0;
+            idx[i + VTraits<v_float32>::vlanes()/4] = 0;
         }
         float _m[] = { m[0], m[4], m[ 8], 0.f,
                        m[1], m[5], m[ 9], 0.f,
                        m[2], m[6], m[10], 0.f,
                        m[3], m[7], m[11], 0.f };
-        v_float32 m0 = vx_lut_quads(_m     , idx + v_float32::nlanes/4);
-        v_float32 m1 = vx_lut_quads(_m +  4, idx + v_float32::nlanes/4);
-        v_float32 m2 = vx_lut_quads(_m +  8, idx + v_float32::nlanes/4);
-        v_float32 m3 = vx_lut_quads(_m + 12, idx + v_float32::nlanes/4);
-        for( ; x <= len*3 - v_float32::nlanes; x += 3*v_float32::nlanes/4 )
+        v_float32 m0 = vx_lut_quads(_m     , idx + VTraits<v_float32>::vlanes()/4);
+        v_float32 m1 = vx_lut_quads(_m +  4, idx + VTraits<v_float32>::vlanes()/4);
+        v_float32 m2 = vx_lut_quads(_m +  8, idx + VTraits<v_float32>::vlanes()/4);
+        v_float32 m3 = vx_lut_quads(_m + 12, idx + VTraits<v_float32>::vlanes()/4);
+        for( ; x <= len*3 - VTraits<v_float32>::vlanes(); x += 3*VTraits<v_float32>::vlanes()/4 )
             v_store(dst + x, v_pack_triplets(v_matmuladd(vx_lut_quads(src + x, idx), m0, m1, m2, m3)));
         for( ; x < len*3; x += 3 )
         {
@@ -1641,8 +1643,8 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i
     if( scn == 4 && dcn == 4 )
     {
 #if CV_SIMD_WIDTH > 16
-        int idx[v_float32::nlanes/4];
-        for( int i = 0; i < v_float32::nlanes/4; i++ )
+        int idx[VTraits<v_float32>::max_nlanes/4];
+        for( int i = 0; i < VTraits<v_float32>::vlanes()/4; i++ )
             idx[i] = 0;
         float _m[] = { m[4], m[9], m[14], m[19] };
         v_float32 m0 = vx_lut_quads(m   , idx);
@@ -1650,12 +1652,13 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i
         v_float32 m2 = vx_lut_quads(m+10, idx);
         v_float32 m3 = vx_lut_quads(m+15, idx);
         v_float32 m4 = vx_lut_quads(_m, idx);
-        for( ; x <= len*4 - v_float32::nlanes; x += v_float32::nlanes )
+        for( ; x <= len*4 - VTraits<v_float32>::vlanes(); x += VTraits<v_float32>::vlanes() )
         {
             v_float32 v_src = vx_load(src + x);
-            v_store(dst + x, v_reduce_sum4(v_src * m0, v_src * m1, v_src * m2, v_src * m3) + m4);
+            v_store(dst + x, v_add(v_reduce_sum4(v_mul(v_src, m0), v_mul(v_src, m1), v_mul(v_src, m2), v_mul(v_src, m3)), m4));
         }
 #endif
+#if CV_SIMD128
         v_float32x4 _m0 = v_load(m     );
         v_float32x4 _m1 = v_load(m +  5);
         v_float32x4 _m2 = v_load(m + 10);
@@ -1666,6 +1669,17 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i
             v_float32x4 v_src = v_load(src + x);
             v_store(dst + x, v_reduce_sum4(v_src * _m0, v_src * _m1, v_src * _m2, v_src * _m3) + _m4);
         }
+#else // CV_SIMD_WIDTH >= 16 && !CV_SIMD128
+        for( ; x < len*4; x += 4 )
+        {
+            float v0 = src[x], v1 = src[x+1], v2 = src[x+2], v3 = src[x+3];
+            float t0 = saturate_cast<float>(m[0]*v0 + m[1]*v1 + m[ 2]*v2 + m[ 3]*v3 + m[ 4]);
+            float t1 = saturate_cast<float>(m[5]*v0 + m[6]*v1 + m[ 7]*v2 + m[ 8]*v3 + m[ 9]);
+            float t2 = saturate_cast<float>(m[10]*v0 + m[11]*v1 + m[12]*v2 + m[13]*v3 + m[14]);
+            float t3 = saturate_cast<float>(m[15]*v0 + m[16]*v1 + m[17]*v2 + m[18]*v3 + m[19]);
+            dst[x] = t0; dst[x+1] = t1; dst[x+2] = t2; dst[x+3] = t3;
+        }
+#endif
         vx_cleanup();
         return;
     }
@@ -1936,9 +1950,9 @@ static void scaleAdd_32f(const float* src1, const float* src2, float* dst,
 {
     float alpha = *_alpha;
     int i = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 v_alpha = vx_setall_f32(alpha);
-    const int cWidth = v_float32::nlanes;
+    const int cWidth = VTraits<v_float32>::vlanes();
     for (; i <= len - cWidth; i += cWidth)
         v_store(dst + i, v_muladd(vx_load(src1 + i), v_alpha, vx_load(src2 + i)));
     vx_cleanup();
@@ -1953,9 +1967,9 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst,
 {
     double alpha = *_alpha;
     int i = 0;
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
     v_float64 a2 = vx_setall_f64(alpha);
-    const int cWidth = v_float64::nlanes;
+    const int cWidth = VTraits<v_float64>::vlanes();
     for (; i <= len - cWidth; i += cWidth)
         v_store(dst + i, v_muladd(vx_load(src1 + i), a2, vx_load(src2 + i)));
     vx_cleanup();
@@ -2078,7 +2092,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
         deltastep = deltastep ? 4 : 0;
     }
 
-#if CV_SIMD_64F
+#if CV_SIMD128_64F
     v_float64x2 v_scale = v_setall_f64(scale);
 #endif
 
@@ -2090,7 +2104,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
 
             for( j = i; j <= size.width - 4; j += 4 )
             {
-#if CV_SIMD_64F
+#if CV_SIMD128_64F
                 if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
                 {
                     v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64();
@@ -2150,7 +2164,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
 
             for( j = i; j <= size.width - 4; j += 4 )
             {
-#if CV_SIMD_64F
+#if CV_SIMD128_64F
                 if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
                 {
                     v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64();
@@ -2227,7 +2241,7 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
                 double s = 0;
                 const sT *tsrc1 = src + i*srcstep;
                 const sT *tsrc2 = src + j*srcstep;
-#if CV_SIMD_64F
+#if CV_SIMD128_64F
                 if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
                 {
                     const double *v_tsrc1 = (double *)(tsrc1);
@@ -2280,7 +2294,7 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double
                         delta_buf[2] = delta_buf[3] = tdelta2[0];
                     tdelta2 = delta_buf;
                 }
-#if CV_SIMD_64F
+#if CV_SIMD128_64F
                 if (DataType<sT>::depth == CV_64F && DataType<dT>::depth == CV_64F)
                 {
                     const double *v_tsrc2 = (double *)(tsrc2);
@@ -2393,14 +2407,14 @@ double dotProd_8u(const uchar* src1, const uchar* src2, int len)
     double r = 0;
     int i = 0;
 
-#if CV_SIMD
-    int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 15), blockSize;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_uint16>::vlanes(), blockSize0 = (1 << 15), blockSize;
 
     while (i < len0)
     {
         blockSize = std::min(len0 - i, blockSize0);
         v_uint32 v_sum = vx_setzero_u32();
-        const int cWidth = v_uint16::nlanes;
+        const int cWidth = VTraits<v_uint16>::vlanes();
 
         int j = 0;
         for (; j <= blockSize - cWidth * 2; j += cWidth * 2)
@@ -2414,7 +2428,7 @@ double dotProd_8u(const uchar* src1, const uchar* src2, int len)
         {
             v_int16 v_src10 = v_reinterpret_as_s16(vx_load_expand(src1 + j));
             v_int16 v_src20 = v_reinterpret_as_s16(vx_load_expand(src2 + j));
-            v_sum += v_reinterpret_as_u32(v_dotprod_fast(v_src10, v_src20));
+            v_sum = v_add(v_sum, v_reinterpret_as_u32(v_dotprod_fast(v_src10, v_src20)));
         }
         r += (double)v_reduce_sum(v_sum);
 
@@ -2433,14 +2447,14 @@ double dotProd_8s(const schar* src1, const schar* src2, int len)
     double r = 0.0;
     int i = 0;
 
-#if CV_SIMD
-    int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 14), blockSize;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_int16>::vlanes(), blockSize0 = (1 << 14), blockSize;
 
     while (i < len0)
     {
         blockSize = std::min(len0 - i, blockSize0);
         v_int32 v_sum = vx_setzero_s32();
-        const int cWidth = v_int16::nlanes;
+        const int cWidth = VTraits<v_int16>::vlanes();
 
         int j = 0;
         for (; j <= blockSize - cWidth * 2; j += cWidth * 2)
@@ -2473,14 +2487,14 @@ double dotProd_16u(const ushort* src1, const ushort* src2, int len)
     double r = 0.0;
     int i = 0;
 
-#if CV_SIMD
-    int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 24), blockSize;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_uint16>::vlanes(), blockSize0 = (1 << 24), blockSize;
 
     while (i < len0)
     {
         blockSize = std::min(len0 - i, blockSize0);
         v_uint64 v_sum = vx_setzero_u64();
-        const int cWidth = v_uint16::nlanes;
+        const int cWidth = VTraits<v_uint16>::vlanes();
 
         int j = 0;
         for (; j <= blockSize - cWidth; j += cWidth)
@@ -2505,14 +2519,14 @@ double dotProd_16s(const short* src1, const short* src2, int len)
     double r = 0.0;
     int i = 0;
 
-#if CV_SIMD
-    int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 24), blockSize;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_int16>::vlanes(), blockSize0 = (1 << 24), blockSize;
 
     while (i < len0)
     {
         blockSize = std::min(len0 - i, blockSize0);
         v_int64 v_sum = vx_setzero_s64();
-        const int cWidth = v_int16::nlanes;
+        const int cWidth = VTraits<v_int16>::vlanes();
 
         int j = 0;
         for (; j <= blockSize - cWidth; j += cWidth)
@@ -2534,10 +2548,10 @@ double dotProd_16s(const short* src1, const short* src2, int len)
 
 double dotProd_32s(const int* src1, const int* src2, int len)
 {
-#if CV_SIMD_64F
+#if CV_SIMD_64F // TODO: enable for CV_SIMD_SCALABLE_64F
     double r = .0;
     int i = 0;
-    const int step  = v_int32::nlanes;
+    const int step  = VTraits<v_int32>::vlanes();
     v_float64 v_sum0 = vx_setzero_f64();
 #if CV_SIMD_WIDTH == 16
     const int wstep = step * 2;
@@ -2572,8 +2586,8 @@ double dotProd_32f(const float* src1, const float* src2, int len)
     double r = 0.0;
     int i = 0;
 
-#if CV_SIMD
-    int len0 = len & -v_float32::nlanes, blockSize0 = (1 << 13), blockSize;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    int len0 = len & -VTraits<v_float32>::vlanes(), blockSize0 = (1 << 13), blockSize;
 
     while (i < len0)
     {
@@ -2581,7 +2595,7 @@ double dotProd_32f(const float* src1, const float* src2, int len)
         v_float32 v_sum = vx_setzero_f32();
 
         int j = 0;
-        int cWidth = v_float32::nlanes;
+        int cWidth = VTraits<v_float32>::vlanes();
 
 #if CV_ENABLE_UNROLLED
         v_float32 v_sum1 = vx_setzero_f32();
@@ -2600,7 +2614,7 @@ double dotProd_32f(const float* src1, const float* src2, int len)
                               vx_load(src2 + j + (cWidth * 3)), v_sum3);
         }
 
-        v_sum += v_sum1 + v_sum2 + v_sum3;
+        v_sum = v_add(v_sum, v_add(v_add(v_sum1, v_sum2), v_sum3));
 #endif
 
         for (; j <= blockSize - cWidth; j += cWidth)
diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp
index 7f1043fbbe..744ee69b0d 100644
--- a/modules/core/src/matrix_transform.cpp
+++ b/modules/core/src/matrix_transform.cpp
@@ -440,7 +440,7 @@ template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const
 static void
 flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
 {
-#if CV_SIMD
+#if CV_SIMD128
 #if CV_STRONG_ALIGNMENT
     size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
 #endif
@@ -563,7 +563,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size,
     }
 #endif
     else
-#endif // CV_SIMD
+#endif // CV_SIMD128
     {
         int i, j, limit = (int)(((size.width + 1)/2)*esz);
         AutoBuffer<int> _tab(size.width*esz);
@@ -596,12 +596,12 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
                                                   dst0 += dstep, dst1 -= dstep )
     {
         int i = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 #if CV_STRONG_ALIGNMENT
         if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
 #endif
         {
-            for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
+            for (; i <= size.width - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
             {
                 v_int32 t0 = v_reinterpret_as_s32(vx_load(src0 + i));
                 v_int32 t1 = v_reinterpret_as_s32(vx_load(src1 + i));
@@ -612,7 +612,7 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
 #if CV_STRONG_ALIGNMENT
         else
         {
-            for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
+            for (; i <= size.width - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
             {
                 v_uint8 t0 = vx_load(src0 + i);
                 v_uint8 t1 = vx_load(src1 + i);
diff --git a/modules/core/src/merge.simd.hpp b/modules/core/src/merge.simd.hpp
index ad08dd8879..d67a117c7b 100644
--- a/modules/core/src/merge.simd.hpp
+++ b/modules/core/src/merge.simd.hpp
@@ -15,7 +15,7 @@ void merge64s(const int64** src, int64* dst, int len, int cn);
 
 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 /*
   The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following:
   on IA there are instructions movntps and such to which
@@ -38,7 +38,7 @@ void merge64s(const int64** src, int64* dst, int len, int cn);
 template<typename T, typename VecT> static void
 vecmerge_( const T** src, T* dst, int len, int cn )
 {
-    const int VECSZ = VecT::nlanes;
+    const int VECSZ = VTraits<VecT>::vlanes();
     int i, i0 = 0;
     const T* src0 = src[0];
     const T* src1 = src[1];
@@ -173,8 +173,8 @@ merge_( const T** src, T* dst, int len, int cn )
 void merge8u(const uchar** src, uchar* dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_uint8>::vlanes() && 2 <= cn && cn <= 4 )
         vecmerge_<uchar, v_uint8>(src, dst, len, cn);
     else
 #endif
@@ -184,8 +184,8 @@ void merge8u(const uchar** src, uchar* dst, int len, int cn )
 void merge16u(const ushort** src, ushort* dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_uint16>::vlanes() && 2 <= cn && cn <= 4 )
         vecmerge_<ushort, v_uint16>(src, dst, len, cn);
     else
 #endif
@@ -195,8 +195,8 @@ void merge16u(const ushort** src, ushort* dst, int len, int cn )
 void merge32s(const int** src, int* dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_int32::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_int32>::vlanes() && 2 <= cn && cn <= 4 )
         vecmerge_<int, v_int32>(src, dst, len, cn);
     else
 #endif
@@ -206,8 +206,8 @@ void merge32s(const int** src, int* dst, int len, int cn )
 void merge64s(const int64** src, int64* dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_int64>::vlanes() && 2 <= cn && cn <= 4 )
         vecmerge_<int64, v_int64>(src, dst, len, cn);
     else
 #endif
diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp
index 69da85f291..be68efddf0 100644
--- a/modules/core/src/norm.cpp
+++ b/modules/core/src/norm.cpp
@@ -63,25 +63,25 @@ int normHamming(const uchar* a, int n, int cellSize)
         return -1;
     int i = 0;
     int result = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_uint64 t = vx_setzero_u64();
     if ( cellSize == 2)
     {
         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
-        for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
+        for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
         {
             v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
-            t += v_popcount(v_reinterpret_as_u64((a0 | (a0 >> 1)) & mask));
+            t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(a0, v_shr<1>(a0)), mask))));
         }
     }
     else    // cellSize == 4
     {
         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
-        for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
+        for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
         {
             v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
-            v_uint16 a1 = a0 | (a0 >> 2);
-            t += v_popcount(v_reinterpret_as_u64((a1 | (a1 >> 1)) & mask));
+            v_uint16 a1 = v_or(a0, v_shr<2>(a0));
+            t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(a1, v_shr<1>(a1)), mask))));
 
         }
     }
@@ -109,25 +109,25 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
         return -1;
     int i = 0;
     int result = 0;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_uint64 t = vx_setzero_u64();
     if ( cellSize == 2)
     {
         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
-        for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
+        for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
         {
-            v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
-            t += v_popcount(v_reinterpret_as_u64((ab0 | (ab0 >> 1)) & mask));
+            v_uint16 ab0 = v_reinterpret_as_u16(v_xor(vx_load(a + i), vx_load(b + i)));
+            t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(ab0, v_shr<1>(ab0)), mask))));
         }
     }
     else    // cellSize == 4
     {
         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
-        for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
+        for(; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
         {
-            v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
-            v_uint16 ab1 = ab0 | (ab0 >> 2);
-            t += v_popcount(v_reinterpret_as_u64((ab1 | (ab1 >> 1)) & mask));
+            v_uint16 ab0 = v_reinterpret_as_u16(v_xor(vx_load(a + i), vx_load(b + i)));
+            v_uint16 ab1 = v_or(ab0, v_shr<2>(ab0));
+            t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(ab1, v_shr<1>(ab1)), mask))));
         }
     }
     result += (int)v_reduce_sum(t);
@@ -145,21 +145,21 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
 float normL2Sqr_(const float* a, const float* b, int n)
 {
     int j = 0; float d = 0.f;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
     v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
-    for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
+    for (; j <= n - 4 * VTraits<v_float32>::vlanes(); j += 4 * VTraits<v_float32>::vlanes())
     {
-        v_float32 t0 = vx_load(a + j) - vx_load(b + j);
-        v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes);
+        v_float32 t0 = v_sub(vx_load(a + j), vx_load(b + j));
+        v_float32 t1 = v_sub(vx_load(a + j + VTraits<v_float32>::vlanes()), vx_load(b + j + VTraits<v_float32>::vlanes()));
         v_d0 = v_muladd(t0, t0, v_d0);
-        v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes);
+        v_float32 t2 = v_sub(vx_load(a + j + 2 * VTraits<v_float32>::vlanes()), vx_load(b + j + 2 * VTraits<v_float32>::vlanes()));
         v_d1 = v_muladd(t1, t1, v_d1);
-        v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes);
+        v_float32 t3 = v_sub(vx_load(a + j + 3 * VTraits<v_float32>::vlanes()), vx_load(b + j + 3 * VTraits<v_float32>::vlanes()));
         v_d2 = v_muladd(t2, t2, v_d2);
         v_d3 = v_muladd(t3, t3, v_d3);
     }
-    d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
+    d = v_reduce_sum(v_add(v_add(v_add(v_d0, v_d1), v_d2), v_d3));
 #endif
     for( ; j < n; j++ )
     {
@@ -173,17 +173,17 @@ float normL2Sqr_(const float* a, const float* b, int n)
 float normL1_(const float* a, const float* b, int n)
 {
     int j = 0; float d = 0.f;
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
     v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
-    for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
+    for (; j <= n - 4 * VTraits<v_float32>::vlanes(); j += 4 * VTraits<v_float32>::vlanes())
     {
-        v_d0 += v_absdiff(vx_load(a + j), vx_load(b + j));
-        v_d1 += v_absdiff(vx_load(a + j + v_float32::nlanes), vx_load(b + j + v_float32::nlanes));
-        v_d2 += v_absdiff(vx_load(a + j + 2 * v_float32::nlanes), vx_load(b + j + 2 * v_float32::nlanes));
-        v_d3 += v_absdiff(vx_load(a + j + 3 * v_float32::nlanes), vx_load(b + j + 3 * v_float32::nlanes));
+        v_d0 = v_add(v_d0, v_absdiff(vx_load(a + j), vx_load(b + j)));
+        v_d1 = v_add(v_d1, v_absdiff(vx_load(a + j + VTraits<v_float32>::vlanes()), vx_load(b + j + VTraits<v_float32>::vlanes())));
+        v_d2 = v_add(v_d2, v_absdiff(vx_load(a + j + 2 * VTraits<v_float32>::vlanes()), vx_load(b + j + 2 * VTraits<v_float32>::vlanes())));
+        v_d3 = v_add(v_d3, v_absdiff(vx_load(a + j + 3 * VTraits<v_float32>::vlanes()), vx_load(b + j + 3 * VTraits<v_float32>::vlanes())));
     }
-    d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
+    d = v_reduce_sum(v_add(v_add(v_add(v_d0, v_d1), v_d2), v_d3));
 #endif
     for( ; j < n; j++ )
         d += std::abs(a[j] - b[j]);
@@ -193,12 +193,12 @@ float normL1_(const float* a, const float* b, int n)
 int normL1_(const uchar* a, const uchar* b, int n)
 {
     int j = 0, d = 0;
-#if CV_SIMD
-    for (; j <= n - 4 * v_uint8::nlanes; j += 4 * v_uint8::nlanes)
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    for (; j <= n - 4 * VTraits<v_uint8>::vlanes(); j += 4 * VTraits<v_uint8>::vlanes())
         d += v_reduce_sad(vx_load(a + j), vx_load(b + j)) +
-             v_reduce_sad(vx_load(a + j + v_uint8::nlanes), vx_load(b + j + v_uint8::nlanes)) +
-             v_reduce_sad(vx_load(a + j + 2 * v_uint8::nlanes), vx_load(b + j + 2 * v_uint8::nlanes)) +
-             v_reduce_sad(vx_load(a + j + 3 * v_uint8::nlanes), vx_load(b + j + 3 * v_uint8::nlanes));
+             v_reduce_sad(vx_load(a + j + VTraits<v_uint8>::vlanes()), vx_load(b + j + VTraits<v_uint8>::vlanes())) +
+             v_reduce_sad(vx_load(a + j + 2 * VTraits<v_uint8>::vlanes()), vx_load(b + j + 2 * VTraits<v_uint8>::vlanes())) +
+             v_reduce_sad(vx_load(a + j + 3 * VTraits<v_uint8>::vlanes()), vx_load(b + j + 3 * VTraits<v_uint8>::vlanes()));
 #endif
     for( ; j < n; j++ )
         d += std::abs(a[j] - b[j]);
diff --git a/modules/core/src/split.simd.hpp b/modules/core/src/split.simd.hpp
index 25e90c0063..88414161b8 100644
--- a/modules/core/src/split.simd.hpp
+++ b/modules/core/src/split.simd.hpp
@@ -15,12 +15,12 @@ void split64s(const int64* src, int64** dst, int len, int cn);
 
 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 // see the comments for vecmerge_ in merge.cpp
 template<typename T, typename VecT> static void
 vecsplit_( const T* src, T** dst, int len, int cn )
 {
-    const int VECSZ = VecT::nlanes;
+    const int VECSZ = VTraits<VecT>::vlanes();
     int i, i0 = 0;
     T* dst0 = dst[0];
     T* dst1 = dst[1];
@@ -177,8 +177,8 @@ split_( const T* src, T** dst, int len, int cn )
 void split8u(const uchar* src, uchar** dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_uint8>::vlanes() && 2 <= cn && cn <= 4 )
         vecsplit_<uchar, v_uint8>(src, dst, len, cn);
     else
 #endif
@@ -188,8 +188,8 @@ void split8u(const uchar* src, uchar** dst, int len, int cn )
 void split16u(const ushort* src, ushort** dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_uint16>::vlanes() && 2 <= cn && cn <= 4 )
         vecsplit_<ushort, v_uint16>(src, dst, len, cn);
     else
 #endif
@@ -199,8 +199,8 @@ void split16u(const ushort* src, ushort** dst, int len, int cn )
 void split32s(const int* src, int** dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_uint32::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_uint32>::vlanes() && 2 <= cn && cn <= 4 )
         vecsplit_<int, v_int32>(src, dst, len, cn);
     else
 #endif
@@ -210,8 +210,8 @@ void split32s(const int* src, int** dst, int len, int cn )
 void split64s(const int64* src, int64** dst, int len, int cn )
 {
     CV_INSTRUMENT_REGION();
-#if CV_SIMD
-    if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 )
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+    if( len >= VTraits<v_int64>::vlanes() && 2 <= cn && cn <= 4 )
         vecsplit_<int64, v_int64>(src, dst, len, cn);
     else
 #endif
diff --git a/modules/core/src/stat.simd.hpp b/modules/core/src/stat.simd.hpp
index 0592f84794..a5fb05476d 100644
--- a/modules/core/src/stat.simd.hpp
+++ b/modules/core/src/stat.simd.hpp
@@ -33,11 +33,11 @@ int normHamming(const uchar* a, int n)
     int i = 0;
     int result = 0;
 
-#if CV_SIMD && CV_SIMD_WIDTH > 16
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     {
         v_uint64 t = vx_setzero_u64();
-        for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
-            t += v_popcount(v_reinterpret_as_u64(vx_load(a + i)));
+        for (; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
+            t = v_add(t, v_popcount(v_reinterpret_as_u64(vx_load(a + i))));
         result = (int)v_reduce_sum(t);
         vx_cleanup();
     }
@@ -56,13 +56,6 @@ int normHamming(const uchar* a, int n)
             result += CV_POPCNT_U32(*(uint*)(a + i));
         }
     }
-#elif CV_SIMD
-    {
-        v_uint64x2 t = v_setzero_u64();
-        for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes)
-            t += v_popcount(v_reinterpret_as_u64(v_load(a + i)));
-        result += (int)v_reduce_sum(t);
-    }
 #endif
 #if CV_ENABLE_UNROLLED
     for(; i <= n - 4; i += 4)
@@ -85,11 +78,11 @@ int normHamming(const uchar* a, const uchar* b, int n)
     int i = 0;
     int result = 0;
 
-#if CV_SIMD && CV_SIMD_WIDTH > 16
+#if (CV_SIMD || CV_SIMD_SCALABLE)
     {
         v_uint64 t = vx_setzero_u64();
-        for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
-            t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i)));
+        for (; i <= n - VTraits<v_uint8>::vlanes(); i += VTraits<v_uint8>::vlanes())
+            t = v_add(t, v_popcount(v_reinterpret_as_u64(v_xor(vx_load(a + i), vx_load(b + i)))));
         result += (int)v_reduce_sum(t);
     }
 #endif
@@ -107,13 +100,6 @@ int normHamming(const uchar* a, const uchar* b, int n)
             result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i));
         }
     }
-#elif CV_SIMD
-    {
-        v_uint64x2 t = v_setzero_u64();
-        for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes)
-            t += v_popcount(v_reinterpret_as_u64(v_load(a + i) ^ v_load(b + i)));
-        result += (int)v_reduce_sum(t);
-    }
 #endif
 #if CV_ENABLE_UNROLLED
     for(; i <= n - 4; i += 4)
diff --git a/modules/core/src/sum.simd.hpp b/modules/core/src/sum.simd.hpp
index 2232013b24..e20cd39b70 100644
--- a/modules/core/src/sum.simd.hpp
+++ b/modules/core/src/sum.simd.hpp
@@ -22,7 +22,7 @@ struct Sum_SIMD
     }
 };
 
-#if CV_SIMD
+#if (CV_SIMD || CV_SIMD_SCALABLE)
 
 template <>
 struct Sum_SIMD<uchar, int>
@@ -36,41 +36,41 @@ struct Sum_SIMD<uchar, int>
         int x = 0;
         v_uint32 v_sum = vx_setzero_u32();
 
-        int len0 = len & -v_uint8::nlanes;
+        int len0 = len & -VTraits<v_uint8>::vlanes();
         while (x < len0)
         {
-            const int len_tmp = min(x + 256*v_uint16::nlanes, len0);
+            const int len_tmp = min(x + 256*VTraits<v_uint16>::vlanes(), len0);
             v_uint16 v_sum16 = vx_setzero_u16();
-            for (; x < len_tmp; x += v_uint8::nlanes)
+            for (; x < len_tmp; x += VTraits<v_uint8>::vlanes())
             {
                 v_uint16 v_src0, v_src1;
                 v_expand(vx_load(src0 + x), v_src0, v_src1);
-                v_sum16 += v_src0 + v_src1;
+                v_sum16 = v_add(v_sum16, v_add(v_src0, v_src1));
             }
             v_uint32 v_half0, v_half1;
             v_expand(v_sum16, v_half0, v_half1);
-            v_sum += v_half0 + v_half1;
+            v_sum = v_add(v_sum, v_add(v_half0, v_half1));
         }
-        if (x <= len - v_uint16::nlanes)
+        if (x <= len - VTraits<v_uint16>::vlanes())
         {
             v_uint32 v_half0, v_half1;
             v_expand(vx_load_expand(src0 + x), v_half0, v_half1);
-            v_sum += v_half0 + v_half1;
-            x += v_uint16::nlanes;
+            v_sum = v_add(v_sum, v_add(v_half0, v_half1));
+            x += VTraits<v_uint16>::vlanes();
         }
-        if (x <= len - v_uint32::nlanes)
+        if (x <= len - VTraits<v_uint32>::vlanes())
         {
-            v_sum += vx_load_expand_q(src0 + x);
-            x += v_uint32::nlanes;
+            v_sum = v_add(v_sum, vx_load_expand_q(src0 + x));
+            x += VTraits<v_uint32>::vlanes();
         }
 
         if (cn == 1)
             *dst += v_reduce_sum(v_sum);
         else
         {
-            uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_uint32::nlanes];
+            uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits<v_uint32>::max_nlanes];
             v_store_aligned(ar, v_sum);
-            for (int i = 0; i < v_uint32::nlanes; ++i)
+            for (int i = 0; i < VTraits<v_uint32>::vlanes(); ++i)
                 dst[i % cn] += ar[i];
         }
         v_cleanup();
@@ -91,41 +91,41 @@ struct Sum_SIMD<schar, int>
         int x = 0;
         v_int32 v_sum = vx_setzero_s32();
 
-        int len0 = len & -v_int8::nlanes;
+        int len0 = len & -VTraits<v_int8>::vlanes();
         while (x < len0)
         {
-            const int len_tmp = min(x + 256*v_int16::nlanes, len0);
+            const int len_tmp = min(x + 256*VTraits<v_int16>::vlanes(), len0);
             v_int16 v_sum16 = vx_setzero_s16();
-            for (; x < len_tmp; x += v_int8::nlanes)
+            for (; x < len_tmp; x += VTraits<v_int8>::vlanes())
             {
                 v_int16 v_src0, v_src1;
                 v_expand(vx_load(src0 + x), v_src0, v_src1);
-                v_sum16 += v_src0 + v_src1;
+                v_sum16 = v_add(v_sum16, v_add(v_src0, v_src1));
             }
             v_int32 v_half0, v_half1;
             v_expand(v_sum16, v_half0, v_half1);
-            v_sum += v_half0 + v_half1;
+            v_sum = v_add(v_sum, v_add(v_half0, v_half1));
         }
-        if (x <= len - v_int16::nlanes)
+        if (x <= len - VTraits<v_int16>::vlanes())
         {
             v_int32 v_half0, v_half1;
             v_expand(vx_load_expand(src0 + x), v_half0, v_half1);
-            v_sum += v_half0 + v_half1;
-            x += v_int16::nlanes;
+            v_sum = v_add(v_sum, v_add(v_half0, v_half1));
+            x += VTraits<v_int16>::vlanes();
         }
-        if (x <= len - v_int32::nlanes)
+        if (x <= len - VTraits<v_int32>::vlanes())
         {
-            v_sum += vx_load_expand_q(src0 + x);
-            x += v_int32::nlanes;
+            v_sum = v_add(v_sum, vx_load_expand_q(src0 + x));
+            x += VTraits<v_int32>::vlanes();
         }
 
         if (cn == 1)
             *dst += v_reduce_sum(v_sum);
         else
         {
-            int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_int32::nlanes];
+            int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits<v_int32>::max_nlanes];
             v_store_aligned(ar, v_sum);
-            for (int i = 0; i < v_int32::nlanes; ++i)
+            for (int i = 0; i < VTraits<v_int32>::vlanes(); ++i)
                 dst[i % cn] += ar[i];
         }
         v_cleanup();
@@ -146,25 +146,25 @@ struct Sum_SIMD<ushort, int>
         int x = 0;
         v_uint32 v_sum = vx_setzero_u32();
 
-        for (; x <= len - v_uint16::nlanes; x += v_uint16::nlanes)
+        for (; x <= len - VTraits<v_uint16>::vlanes(); x += VTraits<v_uint16>::vlanes())
         {
             v_uint32 v_src0, v_src1;
             v_expand(vx_load(src0 + x), v_src0, v_src1);
-            v_sum += v_src0 + v_src1;
+            v_sum = v_add(v_sum, v_add(v_src0, v_src1));
         }
-        if (x <= len - v_uint32::nlanes)
+        if (x <= len - VTraits<v_uint32>::vlanes())
         {
-            v_sum += vx_load_expand(src0 + x);
-            x += v_uint32::nlanes;
+            v_sum = v_add(v_sum, vx_load_expand(src0 + x));
+            x += VTraits<v_uint32>::vlanes();
         }
 
         if (cn == 1)
             *dst += v_reduce_sum(v_sum);
         else
         {
-            uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_uint32::nlanes];
+            uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits<v_uint32>::max_nlanes];
             v_store_aligned(ar, v_sum);
-            for (int i = 0; i < v_uint32::nlanes; ++i)
+            for (int i = 0; i < VTraits<v_uint32>::vlanes(); ++i)
                 dst[i % cn] += ar[i];
         }
         v_cleanup();
@@ -185,25 +185,25 @@ struct Sum_SIMD<short, int>
         int x = 0;
         v_int32 v_sum = vx_setzero_s32();
 
-        for (; x <= len - v_int16::nlanes; x += v_int16::nlanes)
+        for (; x <= len - VTraits<v_int16>::vlanes(); x += VTraits<v_int16>::vlanes())
         {
             v_int32 v_src0, v_src1;
             v_expand(vx_load(src0 + x), v_src0, v_src1);
-            v_sum += v_src0 + v_src1;
+            v_sum = v_add(v_sum, v_add(v_src0, v_src1));
         }
-        if (x <= len - v_int32::nlanes)
+        if (x <= len - VTraits<v_int32>::vlanes())
         {
-            v_sum += vx_load_expand(src0 + x);
-            x += v_int32::nlanes;
+            v_sum = v_add(v_sum, vx_load_expand(src0 + x));
+            x += VTraits<v_int32>::vlanes();
         }
 
         if (cn == 1)
             *dst += v_reduce_sum(v_sum);
         else
         {
-            int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_int32::nlanes];
+            int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits<v_int32>::max_nlanes];
             v_store_aligned(ar, v_sum);
-            for (int i = 0; i < v_int32::nlanes; ++i)
+            for (int i = 0; i < VTraits<v_int32>::vlanes(); ++i)
                 dst[i % cn] += ar[i];
         }
         v_cleanup();
@@ -212,7 +212,7 @@ struct Sum_SIMD<short, int>
     }
 };
 
-#if CV_SIMD_64F
+#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F)
 template <>
 struct Sum_SIMD<int, double>
 {
@@ -226,24 +226,24 @@ struct Sum_SIMD<int, double>
         v_float64 v_sum0 = vx_setzero_f64();
         v_float64 v_sum1 = vx_setzero_f64();
 
-        for (; x <= len - 2 * v_int32::nlanes; x += 2 * v_int32::nlanes)
+        for (; x <= len - 2 * VTraits<v_int32>::vlanes(); x += 2 * VTraits<v_int32>::vlanes())
         {
             v_int32 v_src0 = vx_load(src0 + x);
-            v_int32 v_src1 = vx_load(src0 + x + v_int32::nlanes);
-            v_sum0 += v_cvt_f64(v_src0) + v_cvt_f64(v_src1);
-            v_sum1 += v_cvt_f64_high(v_src0) + v_cvt_f64_high(v_src1);
+            v_int32 v_src1 = vx_load(src0 + x + VTraits<v_int32>::vlanes());
+            v_sum0 = v_add(v_sum0, v_add(v_cvt_f64(v_src0), v_cvt_f64(v_src1)));
+            v_sum1 = v_add(v_sum1, v_add(v_cvt_f64_high(v_src0), v_cvt_f64_high(v_src1)));
         }
 
 #if CV_SIMD256 || CV_SIMD512
-        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_float64::nlanes];
-        v_store_aligned(ar, v_sum0 + v_sum1);
-        for (int i = 0; i < v_float64::nlanes; ++i)
+        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits<v_float64>::max_nlanes];
+        v_store_aligned(ar, v_add(v_sum0, v_sum1));
+        for (int i = 0; i < VTraits<v_float64>::vlanes(); ++i)
             dst[i % cn] += ar[i];
 #else
-        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * v_float64::nlanes];
+        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * VTraits<v_float64>::max_nlanes];
         v_store_aligned(ar, v_sum0);
-        v_store_aligned(ar + v_float64::nlanes, v_sum1);
-        for (int i = 0; i < 2 * v_float64::nlanes; ++i)
+        v_store_aligned(ar + VTraits<v_float64>::vlanes(), v_sum1);
+        for (int i = 0; i < 2 * VTraits<v_float64>::vlanes(); ++i)
             dst[i % cn] += ar[i];
 #endif
         v_cleanup();
@@ -265,24 +265,24 @@ struct Sum_SIMD<float, double>
         v_float64 v_sum0 = vx_setzero_f64();
         v_float64 v_sum1 = vx_setzero_f64();
 
-        for (; x <= len - 2 * v_float32::nlanes; x += 2 * v_float32::nlanes)
+        for (; x <= len - 2 * VTraits<v_float32>::vlanes(); x += 2 * VTraits<v_float32>::vlanes())
         {
             v_float32 v_src0 = vx_load(src0 + x);
-            v_float32 v_src1 = vx_load(src0 + x + v_float32::nlanes);
-            v_sum0 += v_cvt_f64(v_src0) + v_cvt_f64(v_src1);
-            v_sum1 += v_cvt_f64_high(v_src0) + v_cvt_f64_high(v_src1);
+            v_float32 v_src1 = vx_load(src0 + x + VTraits<v_float32>::vlanes());
+            v_sum0 = v_add(v_sum0, v_add(v_cvt_f64(v_src0), v_cvt_f64(v_src1)));
+            v_sum1 = v_add(v_sum1, v_add(v_cvt_f64_high(v_src0), v_cvt_f64_high(v_src1)));
         }
 
 #if CV_SIMD256 || CV_SIMD512
-        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_float64::nlanes];
-        v_store_aligned(ar, v_sum0 + v_sum1);
-        for (int i = 0; i < v_float64::nlanes; ++i)
+        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits<v_float64>::max_nlanes];
+        v_store_aligned(ar, v_add(v_sum0, v_sum1));
+        for (int i = 0; i < VTraits<v_float64>::vlanes(); ++i)
             dst[i % cn] += ar[i];
 #else
-        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * v_float64::nlanes];
+        double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * VTraits<v_float64>::max_nlanes];
         v_store_aligned(ar, v_sum0);
-        v_store_aligned(ar + v_float64::nlanes, v_sum1);
-        for (int i = 0; i < 2 * v_float64::nlanes; ++i)
+        v_store_aligned(ar + VTraits<v_float64>::vlanes(), v_sum1);
+        for (int i = 0; i < 2 * VTraits<v_float64>::vlanes(); ++i)
             dst[i % cn] += ar[i];
 #endif
         v_cleanup();

From a300e7e9454f80e2b08ac8c27e6bc27c479af9b2 Mon Sep 17 00:00:00 2001
From: DeePingXian <lance8819@gmail.com>
Date: Sun, 13 Aug 2023 16:40:38 +0800
Subject: [PATCH 14/57] Adding support for Streamlabs Desktop Virtual Webcam

Streamlabs Desktop has the same issue in https://github.com/opencv/opencv/issues/19746.
This fixes it using https://github.com/opencv/opencv/pull/23460 method.
---
 modules/videoio/src/cap_dshow.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp
index d6b2b95545..21af06a147 100644
--- a/modules/videoio/src/cap_dshow.cpp
+++ b/modules/videoio/src/cap_dshow.cpp
@@ -2771,7 +2771,7 @@ int videoInput::start(int deviceID, videoDevice *VD){
     if(customSize){
         DebugPrintOut("SETUP: Default Format is set to %ix%i\n", currentWidth, currentHeight);
 
-        if (strcmp("OBS Virtual Camera", VD->nDeviceName) == 0)
+        if (strcmp("OBS Virtual Camera", VD->nDeviceName) == 0 || strcmp("Streamlabs Desktop Virtual Webcam", VD->nDeviceName) == 0)
         {
             // OBS Virtual Camera always returns S_OK on SetFormat(), even if it doesn't support
             // the actual format. So we have to choose a format that it supports manually, e.g. NV12.

From a301d1c298250ecb9ca3d376cdc812729797908b Mon Sep 17 00:00:00 2001
From: Ginkgo <ecrirelettre@outlook.com>
Date: Mon, 14 Aug 2023 20:58:35 +0800
Subject: [PATCH 15/57] fix ipp_warpAffine return value error

---
 modules/imgproc/src/imgwarp.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp
index e5d9b0defb..bbeb8223f1 100644
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@@ -2679,8 +2679,13 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation
     }
 
     return true;
+#else
+    CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(interpolation);
+    CV_UNUSED(borderType); CV_UNUSED(_M); CV_UNUSED(flags);
+    return false;
 #endif
 }
+
 #endif
 
 namespace hal {

From fb34f36c695c177d34b9303bd175925995cc3daf Mon Sep 17 00:00:00 2001
From: Mihir Patil <me@mihirpatil.me>
Date: Mon, 14 Aug 2023 19:11:14 -0400
Subject: [PATCH 16/57] style: remove extraneous std::cout

---
 modules/highgui/src/window_cocoa.mm | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm
index 5e34b502db..7e364220fa 100644
--- a/modules/highgui/src/window_cocoa.mm
+++ b/modules/highgui/src/window_cocoa.mm
@@ -732,18 +732,15 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value )
 
     localpool = [[NSAutoreleasePool alloc] init];
 
-    // std::cout << "setting mode" << std::endl;
 #if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6
     if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_NORMAL )
     {
-        // std::cout << "exiting fullscreen" << std::endl;
         [window toggleFullScreen:nil];
 
         window.status=CV_WINDOW_NORMAL;
     }
     else if( !([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_FULLSCREEN )
     {
-        // std::cout << "entering fullscreen" << std::endl;
         [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary];
 
         NSScreen* screen = [window screen];

From ad7ecf1dbaa0235f70a2a6d9affbbbb4f452df16 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Tue, 15 Aug 2023 11:32:44 +0300
Subject: [PATCH 17/57] Mark OpenVINO models for G-API tests optional

---
 .../gapi/misc/python/test/test_gapi_infer.py  |  24 ++--
 .../misc/python/test/test_gapi_infer_ov.py    |  16 +--
 .../gapi/test/infer/gapi_infer_ie_test.cpp    | 108 +++++++++---------
 .../gapi/test/infer/gapi_infer_ov_tests.cpp   |   4 +-
 4 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/modules/gapi/misc/python/test/test_gapi_infer.py b/modules/gapi/misc/python/test/test_gapi_infer.py
index 8ecc957e41..d075651e87 100644
--- a/modules/gapi/misc/python/test/test_gapi_infer.py
+++ b/modules/gapi/misc/python/test/test_gapi_infer.py
@@ -38,8 +38,8 @@ try:
                 return
 
             root_path    = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id    = 'CPU'
 
             img_path  = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
@@ -73,8 +73,8 @@ try:
                 return
 
             root_path    = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id    = 'CPU'
 
             img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
@@ -112,8 +112,8 @@ try:
                 return
 
             root_path    = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id    = 'CPU'
 
             rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)]
@@ -161,8 +161,8 @@ try:
                 return
 
             root_path    = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id    = 'CPU'
 
             rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)]
@@ -211,8 +211,8 @@ try:
                 return
 
             root_path    = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013'
-            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             img_path     = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')])
             device_id    = 'CPU'
             img          = cv.resize(cv.imread(img_path), (544, 320))
@@ -270,8 +270,8 @@ try:
                 return
 
             root_path    = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013'
-            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path   = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            weights_path = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             img_path     = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')])
             device_id    = 'CPU'
             img          = cv.resize(cv.imread(img_path), (544, 320))
diff --git a/modules/gapi/misc/python/test/test_gapi_infer_ov.py b/modules/gapi/misc/python/test/test_gapi_infer_ov.py
index b4022b6e2d..f48ec96369 100644
--- a/modules/gapi/misc/python/test/test_gapi_infer_ov.py
+++ b/modules/gapi/misc/python/test/test_gapi_infer_ov.py
@@ -86,8 +86,8 @@ try:
             skip_if_openvino_not_available()
 
             root_path  = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id  = 'CPU'
 
             img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
@@ -119,8 +119,8 @@ try:
             skip_if_openvino_not_available()
 
             root_path  = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id  = 'CPU'
 
             img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
@@ -148,8 +148,8 @@ try:
             skip_if_openvino_not_available()
 
             root_path  = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id  = 'CPU'
 
             img_path1 = self.find_file('cv/face/david1.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
@@ -190,8 +190,8 @@ try:
             skip_if_openvino_not_available()
 
             root_path  = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013'
-            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
-            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')])
+            model_path = self.find_file(root_path + '.xml',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
+            bin_path   = self.find_file(root_path + '.bin',   [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False)
             device_id  = 'CPU'
 
             img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')])
diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp
index 58e37040e8..92de39abfa 100644
--- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp
+++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp
@@ -187,8 +187,8 @@ std::string compileAgeGenderBlob(const std::string& device) {
         cv::gapi::ie::detail::ParamDesc params;
         const std::string model_name = "age-gender-recognition-retail-0013";
         const std::string output  = model_name + ".blob";
-        params.model_path   = findDataFile(SUBDIR + model_name + ".xml");
-        params.weights_path = findDataFile(SUBDIR + model_name + ".bin");
+        params.model_path   = findDataFile(SUBDIR + model_name + ".xml", false);
+        params.weights_path = findDataFile(SUBDIR + model_name + ".bin", false);
         params.device_id    = device;
         compileBlob(params, output, IE::Precision::U8);
         return output;
@@ -205,8 +205,8 @@ TEST(TestAgeGenderIE, InferBasicTensor)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -256,8 +256,8 @@ TEST(TestAgeGenderIE, InferBasicImage)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // FIXME: Ideally it should be an image from disk
@@ -334,8 +334,8 @@ struct InferWithReshape: public ::testing::Test {
         reshape_dims = {1, 3, 70, 70};
 
         initDLDTDataPath();
-        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
 
         params.device_id = "CPU";
 
@@ -432,8 +432,8 @@ struct ROIList: public ::testing::Test {
 
     void SetUp() {
         initDLDTDataPath();
-        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         params.device_id = "CPU";
 
         // FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false));
@@ -505,8 +505,8 @@ struct ROIListNV12: public ::testing::Test {
 
     void SetUp() {
         initDLDTDataPath();
-        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         params.device_id = "CPU";
 
         cv::Size sz{320, 240};
@@ -585,8 +585,8 @@ struct SingleROI: public ::testing::Test {
 
     void SetUp() {
         initDLDTDataPath();
-        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         params.device_id = "CPU";
 
         // FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false));
@@ -644,8 +644,8 @@ struct SingleROINV12: public ::testing::Test {
 
     void SetUp() {
         initDLDTDataPath();
-        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         params.device_id = "CPU";
 
         cv::Size sz{320, 240};
@@ -809,8 +809,8 @@ TEST(TestAgeGenderIE, GenericInfer)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     cv::Mat in_mat(cv::Size(320, 240), CV_8UC3);
@@ -859,8 +859,8 @@ TEST(TestAgeGenderIE, InvalidConfigGeneric)
 {
     initDLDTDataPath();
 
-    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     std::string device_id    = "CPU";
 
     // Configure & run G-API
@@ -885,8 +885,8 @@ TEST(TestAgeGenderIE, CPUConfigGeneric)
 {
     initDLDTDataPath();
 
-    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     std::string device_id    = "CPU";
 
     // Configure & run G-API
@@ -912,8 +912,8 @@ TEST(TestAgeGenderIE, InvalidConfig)
 {
     initDLDTDataPath();
 
-    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     std::string device_id    = "CPU";
 
     using AGInfo = std::tuple<cv::GMat, cv::GMat>;
@@ -937,8 +937,8 @@ TEST(TestAgeGenderIE, CPUConfig)
 {
     initDLDTDataPath();
 
-    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    std::string model_path   = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     std::string device_id    = "CPU";
 
     using AGInfo = std::tuple<cv::GMat, cv::GMat>;
@@ -1017,8 +1017,8 @@ TEST(TestAgeGenderIE, MediaInputNV12)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     cv::Size sz{320, 240};
@@ -1082,8 +1082,8 @@ TEST(TestAgeGenderIE, MediaInputBGR)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     cv::Size sz{320, 240};
@@ -1134,8 +1134,8 @@ TEST(InferROI, MediaInputBGR)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     cv::Size sz{320, 240};
@@ -1196,8 +1196,8 @@ TEST(InferROI, MediaInputNV12)
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     cv::Size sz{320, 240};
@@ -1587,8 +1587,8 @@ TEST(Infer, TestStreamingInfer)
     std::string filepath = findDataFile("cv/video/768x576.avi");
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -1654,8 +1654,8 @@ TEST(InferROI, TestStreamingInfer)
     std::string filepath = findDataFile("cv/video/768x576.avi");
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -1732,8 +1732,8 @@ TEST(InferList, TestStreamingInfer)
     std::string filepath = findDataFile("cv/video/768x576.avi");
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -1821,8 +1821,8 @@ TEST(Infer2, TestStreamingInfer)
     std::string filepath = findDataFile("cv/video/768x576.avi");
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -1911,8 +1911,8 @@ TEST(InferEmptyList, TestStreamingInfer)
     std::string filepath = findDataFile("cv/video/768x576.avi");
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -1965,8 +1965,8 @@ TEST(Infer2EmptyList, TestStreamingInfer)
     std::string filepath = findDataFile("cv/video/768x576.avi");
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
@@ -2294,8 +2294,8 @@ struct LimitedSourceInfer: public ::testing::Test {
 
     GStreamingCompiled compileStreaming(int nireq) {
         cv::gapi::ie::detail::ParamDesc params;
-        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         params.device_id = "CPU";
 
         auto pp = cv::gapi::ie::Params<AgeGender> {
@@ -2348,8 +2348,8 @@ TEST(TestAgeGenderIE, InferWithBatch)
 
     constexpr int batch_size = 4;
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     cv::Mat in_mat({batch_size, 3, 62, 62}, CV_8U);
@@ -3091,8 +3091,8 @@ struct AgeGenderInferTest: public ::testing::Test {
 
     void SetUp() {
         initDLDTDataPath();
-        m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         m_params.device_id = "CPU";
 
         m_plugin = cv::gimpl::ie::wrap::getPlugin(m_params);
@@ -3191,8 +3191,8 @@ TEST(TestAgeGenderIE, InferTensorWithPreproc) {
     initDLDTDataPath();
 
     cv::gapi::ie::detail::ParamDesc params;
-    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+    params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+    params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
     params.device_id = "CPU";
 
     // Load IE network, initialize input data using that.
diff --git a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp
index 09b54c1a46..abce82b329 100644
--- a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp
+++ b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp
@@ -255,8 +255,8 @@ private:
 struct BaseAgeGenderOV: public ::testing::Test {
     BaseAgeGenderOV() {
         initDLDTDataPath();
-        xml_path  = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml");
-        bin_path  = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin");
+        xml_path  = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false);
+        bin_path  = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false);
         device    = "CPU";
         blob_path = "age-gender-recognition-retail-0013.blob";
     }

From 8e52c0155bc797b93604813f96f2e9fa24593f3f Mon Sep 17 00:00:00 2001
From: Maksim Shabunin <maksim.shabunin@gmail.com>
Date: Tue, 15 Aug 2023 20:49:21 +0300
Subject: [PATCH 18/57] gapi: update ADE library to 0.1.2b

---
 modules/gapi/cmake/DownloadADE.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/gapi/cmake/DownloadADE.cmake b/modules/gapi/cmake/DownloadADE.cmake
index e22c4f1a32..26407f4fef 100644
--- a/modules/gapi/cmake/DownloadADE.cmake
+++ b/modules/gapi/cmake/DownloadADE.cmake
@@ -1,7 +1,7 @@
 set(ade_src_dir "${OpenCV_BINARY_DIR}/3rdparty/ade")
-set(ade_filename "v0.1.2a.zip")
-set(ade_subdir "ade-0.1.2a")
-set(ade_md5 "fa4b3e25167319cb0fa9432ef8281945")
+set(ade_filename "v0.1.2b.zip")
+set(ade_subdir "ade-0.1.2b")
+set(ade_md5 "4f93a0844dfc463c617d83b09011819a")
 ocv_download(FILENAME ${ade_filename}
              HASH ${ade_md5}
              URL

From 16681d1080928d31645d94ede6ce524fa8d6f177 Mon Sep 17 00:00:00 2001
From: MuZihao <muzihao@migu.cn>
Date: Wed, 16 Aug 2023 09:34:59 +0800
Subject: [PATCH 19/57] fix the issue in layer fused

---
 modules/dnn/src/net_impl_fuse.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp
index 4570d2b360..dfa542bd41 100644
--- a/modules/dnn/src/net_impl_fuse.cpp
+++ b/modules/dnn/src/net_impl_fuse.cpp
@@ -210,7 +210,7 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
                 if (!nextData->params.has("operation") || toLowerCase(nextData->params.get<String>("operation")) != "add")
                 {
                     CV_LOG_DEBUG(NULL, "DNN/CPU: fusion with NaryEltwise or Eltwise Layer operation is not supported: "
-                        << nextData->params.get<String>("operation"));
+                        << toLowerCase(nextData->params.get<String>("operation", "sum")));
                     break;
                 }
 

From 8ad5eb521a6e64f71963efcceaf995b0930ea357 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Wed, 16 Aug 2023 15:46:11 +0300
Subject: [PATCH 20/57] Merge pull request #24120 from
 dkurt:actualize_dnn_links

OCL_FP16 MatMul with large batch

* Workaround FP16 MatMul with large batch

* Fix OCL reinitialization

* Higher thresholds for INT8 quantization

* Try fix gemm_buffer_NT for half (columns)

* Fix GEMM by rows

* Add batch dimension to InnerProduct layer test

* Fix Test_ONNX_conformance.Layer_Test/test_basic_conv_with_padding

* Batch 16

* Replace all vload4

* Version suffix for MobileNetSSD_deploy Caffe model
---
 modules/dnn/misc/python/test/test_dnn.py     |  6 ++--
 modules/dnn/perf/perf_caffe.cpp              |  4 +--
 modules/dnn/perf/perf_net.cpp                |  2 +-
 modules/dnn/src/layers/convolution_layer.cpp |  4 +--
 modules/dnn/src/opencl/gemm_buffer.cl        | 38 ++++++++++----------
 modules/dnn/test/test_backends.cpp           |  4 +--
 modules/dnn/test/test_caffe_importer.cpp     |  4 +--
 modules/dnn/test/test_halide_layers.cpp      | 18 +++++-----
 modules/dnn/test/test_int8_layers.cpp        |  6 ++--
 modules/dnn/test/test_model.cpp              | 10 +++---
 10 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py
index 5c91aae56f..a06c02ad2d 100644
--- a/modules/dnn/misc/python/test/test_dnn.py
+++ b/modules/dnn/misc/python/test/test_dnn.py
@@ -191,10 +191,10 @@ class dnn_test(NewOpenCVTests):
 
     def test_model(self):
         img_path = self.find_dnn_file("dnn/street.png")
-        weights = self.find_dnn_file("dnn/MobileNetSSD_deploy.caffemodel", required=False)
-        config = self.find_dnn_file("dnn/MobileNetSSD_deploy.prototxt", required=False)
+        weights = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", required=False)
+        config = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", required=False)
         if weights is None or config is None:
-            raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
+            raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy_19e3ec3.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.")
 
         frame = cv.imread(img_path)
         model = cv.dnn_DetectionModel(weights, config)
diff --git a/modules/dnn/perf/perf_caffe.cpp b/modules/dnn/perf/perf_caffe.cpp
index 370f06dba2..f1ba26afcc 100644
--- a/modules/dnn/perf/perf_caffe.cpp
+++ b/modules/dnn/perf/perf_caffe.cpp
@@ -101,8 +101,8 @@ PERF_TEST(SqueezeNet_v1_1_caffe, CaffePerfTest)
 
 PERF_TEST(MobileNet_SSD, CaffePerfTest)
 {
-    caffe::Net<float>* net = initNet("dnn/MobileNetSSD_deploy.prototxt",
-                                     "dnn/MobileNetSSD_deploy.caffemodel");
+    caffe::Net<float>* net = initNet("dnn/MobileNetSSD_deploy_19e3ec3.prototxt",
+                                     "dnn/MobileNetSSD_deploy_19e3ec3.caffemodel");
     TEST_CYCLE() net->Forward();
     SANITY_CHECK_NOTHING();
 }
diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp
index cfbb45b173..7f852e8f7b 100644
--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -141,7 +141,7 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
 {
     if (backend == DNN_BACKEND_HALIDE)
         throw SkipTestException("");
-    processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "",
+    processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", "",
             Mat(cv::Size(300, 300), CV_32FC3));
 }
 
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 0ed2bb7feb..0488dc462d 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1069,7 +1069,7 @@ public:
             config.pads = pads;
             config.stride = stride;
             config.dilation = dilation;
-            if (inputs[0].dims != 4 && inputs[0].dims != umat_blobs[0].dims)
+            if (inputs[0].dims != 4 && inputs[0].dims != (blobs.empty() ? umat_blobs[0].dims : blobs[0].dims))
             {
                 static bool bypassCheck = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_CONVOLUTION_IGNORE_INPUT_DIMS_4_CHECK", false);
                 if (!bypassCheck)
@@ -1081,7 +1081,7 @@ public:
                     return false;
                 }
             }
-            config.group = inputs[0].size[1] / umat_blobs[0].size[1];
+            config.group = inputs[0].size[1] / (blobs.empty() ? umat_blobs[0].size[1] : blobs[0].size[1]);
             if (config.group < 1)  // config.group == 0 causes div by zero in ocl4dnn code
             {
                 CV_LOG_WARNING(NULL, "DNN/OpenCL: Unsupported config.group=" << config.group
diff --git a/modules/dnn/src/opencl/gemm_buffer.cl b/modules/dnn/src/opencl/gemm_buffer.cl
index b345983aee..70028b0eec 100644
--- a/modules/dnn/src/opencl/gemm_buffer.cl
+++ b/modules/dnn/src/opencl/gemm_buffer.cl
@@ -453,14 +453,14 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
     int w;
     for(int b_tile = 0; b_tile < K; b_tile += SLM_BLOCK) {
         barrier(CLK_LOCAL_MEM_FENCE);
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(0, K, local_index))), 0, (__local float *)(slm_brow + mad24(0, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(1, K, local_index))), 0, (__local float *)(slm_brow + mad24(1, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(2, K, local_index))), 0, (__local float *)(slm_brow + mad24(2, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(3, K, local_index))), 0, (__local float *)(slm_brow + mad24(3, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(4, K, local_index))), 0, (__local float *)(slm_brow + mad24(4, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(5, K, local_index))), 0, (__local float *)(slm_brow + mad24(5, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(6, K, local_index))), 0, (__local float *)(slm_brow + mad24(6, SLM_BLOCK, local_index)));
-        vstore4(vload4(0, (__global float *)(src1_read0 + mad24(7, K, local_index))), 0, (__local float *)(slm_brow + mad24(7, SLM_BLOCK, local_index)));
+        vstore8(vload8(0, src1_read0 + mad24(0, K, local_index)), 0, slm_brow + mad24(0, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(1, K, local_index)), 0, slm_brow + mad24(1, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(2, K, local_index)), 0, slm_brow + mad24(2, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(3, K, local_index)), 0, slm_brow + mad24(3, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(4, K, local_index)), 0, slm_brow + mad24(4, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(5, K, local_index)), 0, slm_brow + mad24(5, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(6, K, local_index)), 0, slm_brow + mad24(6, SLM_BLOCK, local_index));
+        vstore8(vload8(0, src1_read0 + mad24(7, K, local_index)), 0, slm_brow + mad24(7, SLM_BLOCK, local_index));
         barrier(CLK_LOCAL_MEM_FENCE);
 
         slm_brow0 = slm_brow + local_x * (TILE_K / 8);
@@ -469,17 +469,17 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
         while( w + TILE_K <= end_w ) {
             Dtype8 arow;
 
-            brow0 = as_half8(vload4(0, (__local float *)(slm_brow0 + 0 * SLM_BLOCK)));
-            brow1 = as_half8(vload4(0, (__local float *)(slm_brow0 + 1 * SLM_BLOCK)));
-            brow2 = as_half8(vload4(0, (__local float *)(slm_brow0 + 2 * SLM_BLOCK)));
-            brow3 = as_half8(vload4(0, (__local float *)(slm_brow0 + 3 * SLM_BLOCK)));
-            brow4 = as_half8(vload4(0, (__local float *)(slm_brow0 + 4 * SLM_BLOCK)));
-            brow5 = as_half8(vload4(0, (__local float *)(slm_brow0 + 5 * SLM_BLOCK)));
-            brow6 = as_half8(vload4(0, (__local float *)(slm_brow0 + 6 * SLM_BLOCK)));
-            brow7 = as_half8(vload4(0, (__local float *)(slm_brow0 + 7 * SLM_BLOCK)));
+            brow0 = vload8(0, slm_brow0 + 0 * SLM_BLOCK);
+            brow1 = vload8(0, slm_brow0 + 1 * SLM_BLOCK);
+            brow2 = vload8(0, slm_brow0 + 2 * SLM_BLOCK);
+            brow3 = vload8(0, slm_brow0 + 3 * SLM_BLOCK);
+            brow4 = vload8(0, slm_brow0 + 4 * SLM_BLOCK);
+            brow5 = vload8(0, slm_brow0 + 5 * SLM_BLOCK);
+            brow6 = vload8(0, slm_brow0 + 6 * SLM_BLOCK);
+            brow7 = vload8(0, slm_brow0 + 7 * SLM_BLOCK);
 
 #define MM_DOT_PRODUCT( _row, _dot )   \
-            arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K)));                           \
+            arow = vload8(0, src0_read + _row * K); \
             _dot = mad( (Dtype8)(arow.s0), (Dtype8)(brow0.s0, brow1.s0, brow2.s0, brow3.s0, brow4.s0, brow5.s0, brow6.s0, brow7.s0), _dot ); \
             _dot = mad( (Dtype8)(arow.s1), (Dtype8)(brow0.s1, brow1.s1, brow2.s1, brow3.s1, brow4.s1, brow5.s1, brow6.s1, brow7.s1), _dot ); \
             _dot = mad( (Dtype8)(arow.s2), (Dtype8)(brow0.s2, brow1.s2, brow2.s2, brow3.s2, brow4.s2, brow5.s2, brow6.s2, brow7.s2), _dot ); \
@@ -510,7 +510,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
         Dtype8 arow;
 
 #define READ_BROW(_brow, _row) \
-        _brow = as_half8(vload4(0, (__local float *)(slm_brow0 + _row * SLM_BLOCK))); \
+        _brow = vload8(0, slm_brow0 + _row * SLM_BLOCK); \
         _brow.s0 = (mad24(local_x, 8, w) < K) ? _brow.s0 : 0.0f; \
         _brow.s1 = (mad24(local_x, 8, w + 1) < K) ? _brow.s1 : 0.0f; \
         _brow.s2 = (mad24(local_x, 8, w + 2) < K) ? _brow.s2 : 0.0f; \
@@ -532,7 +532,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)(
 #undef READ_BROW
 
 #define MM_DOT_PRODUCT( _row, _dot )   \
-        arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K)));                           \
+        arow = vload8(0, src0_read + _row * K);                           \
         arow.s0 = (mad24(local_x, 8, w) < K) ? arow.s0 : 0.0f; \
         arow.s1 = (mad24(local_x, 8, w + 1) < K) ? arow.s1 : 0.0f; \
         arow.s2 = (mad24(local_x, 8, w + 2) < K) ? arow.s2 : 0.0f; \
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index da666ace01..9570355b4f 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -194,7 +194,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
     float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 1.5e-2 : 0.0;
     float iouDiff = (target == DNN_TARGET_MYRIAD) ? 0.063  : 0.0;
     float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.262  : FLT_MIN;
-         processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
+         processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt",
                     inp, "detection_out", "", scoreDiff, iouDiff, detectionConfThresh);
     expectNoFallbacksFromIE(net);
 }
@@ -237,7 +237,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height)
         scoreDiff = 0.03;
         iouDiff = 0.08;
     }
-    processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt",
+    processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt",
                 inp, "detection_out", "", scoreDiff, iouDiff);
     expectNoFallbacksFromIE(net);
 }
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 708e353aac..3f5458a873 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -290,8 +290,8 @@ TEST(Reproducibility_SSD, Accuracy)
 typedef testing::TestWithParam<tuple<Backend, Target> > Reproducibility_MobileNet_SSD;
 TEST_P(Reproducibility_MobileNet_SSD, Accuracy)
 {
-    const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false);
-    const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false);
+    const string proto = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false);
+    const string model = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false);
     Net net = readNetFromCaffe(proto, model);
     int backendId = get<0>(GetParam());
     int targetId = get<1>(GetParam());
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
index d8a16d3efa..3629f720fb 100644
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -407,15 +407,16 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine(
 ////////////////////////////////////////////////////////////////////////////////
 // Fully-connected
 ////////////////////////////////////////////////////////////////////////////////
-typedef TestWithParam<tuple<int, Size, int, bool, tuple<Backend, Target> > > FullyConnected;
+typedef TestWithParam<tuple<int, int, Size, int, bool, tuple<Backend, Target> > > FullyConnected;
 TEST_P(FullyConnected, Accuracy)
 {
-    int inChannels = get<0>(GetParam());
-    Size inSize = get<1>(GetParam());
-    int outChannels = get<2>(GetParam());
-    bool hasBias = get<3>(GetParam());
-    Backend backendId = get<0>(get<4>(GetParam()));
-    Target targetId = get<1>(get<4>(GetParam()));
+    int batch = get<0>(GetParam());
+    int inChannels = get<1>(GetParam());
+    Size inSize = get<2>(GetParam());
+    int outChannels = get<3>(GetParam());
+    bool hasBias = get<4>(GetParam());
+    Backend backendId = get<0>(get<5>(GetParam()));
+    Target targetId = get<1>(get<5>(GetParam()));
 #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000)
     if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ||
          backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (targetId == DNN_TARGET_OPENCL_FP16 ||
@@ -439,7 +440,7 @@ TEST_P(FullyConnected, Accuracy)
     lp.type = "InnerProduct";
     lp.name = "testLayer";
 
-    int sz[] = {1, inChannels, inSize.height, inSize.width};
+    int sz[] = {batch, inChannels, inSize.height, inSize.width};
     Mat input(4, &sz[0], CV_32F);
 
     double l1 = 0.0;
@@ -467,6 +468,7 @@ TEST_P(FullyConnected, Accuracy)
 }
 
 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine(
+/*batch*/        Values(1, 2, 4, 8, 16),
 /*in channels*/  Values(3, 4),
 /*in size*/      Values(Size(5, 4), Size(4, 5), Size(1, 1)),
 /*out channels*/ Values(3, 4),
diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp
index 8b3cd01f29..caba112516 100644
--- a/modules/dnn/test/test_int8_layers.cpp
+++ b/modules/dnn/test/test_int8_layers.cpp
@@ -878,14 +878,14 @@ TEST_P(Test_Int8_nets, MobileNet_SSD)
     if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel())
         applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL);
 
-    Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy.prototxt", false),
-                               findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false));
+    Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false),
+                               findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false));
 
     Mat inp = imread(_tf("street.png"));
     Mat blob = blobFromImage(inp, 1.0 / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
     Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
 
-    float confThreshold = FLT_MIN, scoreDiff = 0.059, iouDiff = 0.11;
+    float confThreshold = FLT_MIN, scoreDiff = 0.084, iouDiff = 0.43;
     testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff);
 }
 
diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp
index a19923bf28..59b51c4bc0 100644
--- a/modules/dnn/test/test_model.cpp
+++ b/modules/dnn/test/test_model.cpp
@@ -490,8 +490,8 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
         refBoxes.emplace_back(left, top, width, height);
     }
 
-    std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false);
-    std::string config_file = _tf("MobileNetSSD_deploy.prototxt");
+    std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false);
+    std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt");
 
     Scalar mean = Scalar(127.5, 127.5, 127.5);
     double scale = 1.0 / 127.5;
@@ -511,7 +511,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD)
     }
     else if (target == DNN_TARGET_CUDA_FP16)
     {
-        scoreDiff = 0.0021;
+        scoreDiff = 0.0028;
         iouDiff = 1e-2;
     }
     float confThreshold = FLT_MIN;
@@ -595,8 +595,8 @@ TEST_P(Test_Model, Detection_normalized)
     std::vector<float> refConfidences = {0.999222f};
     std::vector<Rect2d> refBoxes = {Rect2d(0, 4, 227, 222)};
 
-    std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false);
-    std::string config_file = _tf("MobileNetSSD_deploy.prototxt");
+    std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false);
+    std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt");
 
     Scalar mean = Scalar(127.5, 127.5, 127.5);
     double scale = 1.0 / 127.5;

From d792ebc5d2feb19f697260dc7ac923f27b173139 Mon Sep 17 00:00:00 2001
From: Sean McBride <sean@rogue-research.com>
Date: Fri, 26 May 2023 07:57:31 -0400
Subject: [PATCH 21/57] Fixed buffer overrun; removed the last two uses of
 sprintf

Fixed an off-by-1 buffer resize, the space for the null termination was forgotten.

Prefer snprintf, which can never overflow (if given the right size).

In one case I cheated and used strcpy, because I cannot figure out the buffer size at that point in the code.
---
 modules/core/src/persistence_xml.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/core/src/persistence_xml.cpp b/modules/core/src/persistence_xml.cpp
index caba4f5bf0..6141fade2d 100644
--- a/modules/core/src/persistence_xml.cpp
+++ b/modules/core/src/persistence_xml.cpp
@@ -308,8 +308,8 @@ public:
 
         if( !multiline )
         {
-            ptr = fs->resizeWriteBuffer( ptr, len + 9 );
-            sprintf( ptr, "<!-- %s -->", comment );
+            ptr = fs->resizeWriteBuffer( ptr, len + 5+4+1 );
+            snprintf( ptr, len + 5+4+1, "<!-- %s -->", comment );
             len = (int)strlen(ptr);
         }
         else
@@ -344,7 +344,7 @@ public:
                 fs->setBufferPtr(ptr);
                 ptr = fs->flush();
             }
-            sprintf( ptr, "-->" );
+            strcpy( ptr, "-->" );
             fs->setBufferPtr(ptr + 3);
             fs->flush();
         }

From f5a14532c22d45dfec4b3d91dc7caf46b5c940b0 Mon Sep 17 00:00:00 2001
From: autoantwort <41973254+autoantwort@users.noreply.github.com>
Date: Thu, 17 Aug 2023 11:34:19 +0200
Subject: [PATCH 22/57] Merge pull request #24167 from
 autoantwort:missing-include

* add missing include

* Apply CR
---
 modules/dnn/src/op_halide.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/dnn/src/op_halide.cpp b/modules/dnn/src/op_halide.cpp
index 653de36146..db1a72278e 100644
--- a/modules/dnn/src/op_halide.cpp
+++ b/modules/dnn/src/op_halide.cpp
@@ -14,6 +14,7 @@
 #include "halide_scheduler.hpp"
 
 #include <HalideRuntimeOpenCL.h>
+#include <thread>
 #endif  // HAVE_HALIDE
 
 namespace cv {

From 70a58d7198dc57c44b876e2f8c7ca56890ad55fc Mon Sep 17 00:00:00 2001
From: CSBVision <bjoern.boeken@csb.com>
Date: Thu, 17 Aug 2023 12:02:29 +0200
Subject: [PATCH 23/57] Use STRING instead of PATH to fix #24141

---
 cmake/OpenCVDetectPython.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake
index c93eb9f9a7..88a4341856 100644
--- a/cmake/OpenCVDetectPython.cmake
+++ b/cmake/OpenCVDetectPython.cmake
@@ -258,7 +258,7 @@ if(NOT ${found})
   set(${include_path} "${_include_path}" CACHE INTERNAL "")
   set(${include_dir} "${_include_dir}" CACHE PATH "Python include dir")
   set(${include_dir2} "${_include_dir2}" CACHE PATH "Python include dir 2")
-  set(${packages_path} "${_packages_path}" CACHE PATH "Where to install the python packages.")
+  set(${packages_path} "${_packages_path}" CACHE STRING "Where to install the python packages.")
   set(${numpy_include_dirs} ${_numpy_include_dirs} CACHE PATH "Path to numpy headers")
   set(${numpy_version} "${_numpy_version}" CACHE INTERNAL "")
 endif()

From d88ad46978b8a08b1796685d7fb4b99ef72ebc42 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Fri, 18 Aug 2023 15:10:14 +0300
Subject: [PATCH 24/57] Remove explitit transB attribute from MatMul perf test

---
 modules/dnn/perf/perf_layer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp
index 261bc5c3ca..3020dbea66 100644
--- a/modules/dnn/perf/perf_layer.cpp
+++ b/modules/dnn/perf/perf_layer.cpp
@@ -678,7 +678,6 @@ PERF_TEST_P_(Layer_FullyConnected, fc)
     lp.set("axis", input.dims - 1);
     lp.set("is_matmul", weights.dims > 2);
     lp.set("bias_term", false);
-    lp.set("transB", true);
     lp.set("num_output", (int)weights.total(0, weights.dims - 1));
     lp.blobs.resize(1, weights);
 

From a0debc3a9a4871d08bb8d07bad68df246c7207ab Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Wed, 23 Aug 2023 10:31:14 +0300
Subject: [PATCH 25/57] Enable OpenVINO max pooling with indices since 2022.1

---
 modules/dnn/src/layers/pooling_layer.cpp  | 7 ++++++-
 modules/dnn/test/test_tflite_importer.cpp | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 5caaa36ba0..1337657127 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -209,7 +209,8 @@ public:
 #ifdef HAVE_INF_ENGINE
         if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
         {
-            return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin());
+            return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()) &&
+                   (!computeMaxIdx || INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1));
         }
 #endif
         if (backendId == DNN_BACKEND_OPENCV)
@@ -615,10 +616,14 @@ public:
         else if (type == MAX) {
             std::shared_ptr<ngraph::Node> max_pool;
             if (computeMaxIdx) {
+#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
                 std::vector<size_t> dilations(kernel_size.size(), 1);
                 max_pool = std::make_shared<ngraph::op::v8::MaxPool>(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations),
                                 ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
                                 rounding_type, pad_type);
+#else
+                CV_Error(Error::StsNotImplemented, "OpenVINO MaxPool with indices");
+#endif
             } else {
                 max_pool = std::make_shared<ngraph::op::v1::MaxPool>(ieInpNode, ngraph::Strides(strides),
                                 ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size),
diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp
index 19b3f3a94a..beb586f126 100644
--- a/modules/dnn/test/test_tflite_importer.cpp
+++ b/modules/dnn/test/test_tflite_importer.cpp
@@ -128,6 +128,11 @@ TEST_P(Test_TFLite, max_unpooling)
     if (backend == DNN_BACKEND_CUDA)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA);
 
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000)
+        if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
+            applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+#endif
+
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) {
         if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);
         if (target == DNN_TARGET_OPENCL)      applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH);

From 81cc89a3ce1e92b24ee46bf95728efe5e6d0f29f Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Thu, 24 Aug 2023 04:53:11 +0900
Subject: [PATCH 26/57] Merge pull request #24179 from Kumataro:fix24145

* core:add OPENCV_IPP_MEAN/MINMAX/SUM option to enable IPP optimizations

* fix: to use guard HAVE_IPP and ocv_append_source_file_compile_definitions() macro.

* support OPENCV_IPP_ENABLE_ALL

* add document for OPENCV_IPP_ENABLE_ALL

* fix OPENCV_IPP_ENABLE_ALL comment
---
 .../config_reference.markdown                 | 11 ++++++++++
 modules/core/CMakeLists.txt                   | 20 +++++++++++++++++++
 modules/core/src/mean.dispatch.cpp            |  4 ++++
 modules/core/src/minmax.cpp                   |  2 ++
 modules/core/src/sum.dispatch.cpp             |  2 ++
 modules/imgproc/CMakeLists.txt                | 10 ++++++----
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown
index 2528baf41d..4fd256dd93 100644
--- a/doc/tutorials/introduction/config_reference/config_reference.markdown
+++ b/doc/tutorials/introduction/config_reference/config_reference.markdown
@@ -224,6 +224,16 @@ Following options can be used to produce special builds with instrumentation or
 @see [Link time optimization](https://gcc.gnu.org/wiki/LinkTimeOptimization)
 @see [ThinLTO](https://clang.llvm.org/docs/ThinLTO.html)
 
+## Enable IPP optimization
+
+Following options can be used to enables IPP optimizations for each functions but increases the size of the opencv library. All options are disabled by default.
+
+| Option | Functions | + roughly size |
+| -------| --------- | -------------- |
+| `OPENCV_IPP_GAUSSIAN_BLUR` | GaussianBlur() | +8Mb |
+| `OPENCV_IPP_MEAN` | mean() / meanStdDev() | +0.2Mb |
+| `OPENCV_IPP_MINMAX` | minMaxLoc() / minMaxIdx() | +0.2Mb |
+| `OPENCV_IPP_SUM` | sum() | +0.1Mb |
 
 # Functional features and dependencies {#tutorial_config_reference_func}
 
@@ -565,6 +575,7 @@ Following options can be used to change installation layout for common scenarios
 | ------ | ------- | ----------- |
 | `OPENCV_ENABLE_NONFREE` | _OFF_ | Some algorithms included in the library are known to be protected by patents and are disabled by default. |
 | `OPENCV_FORCE_3RDPARTY_BUILD`| _OFF_ | Enable all `BUILD_` options at once. |
+| `OPENCV_IPP_ENABLE_ALL`| _OFF_ | Enable all `OPENCV_IPP_` options at once. |
 | `ENABLE_CCACHE` | _ON_ (on Unix-like platforms) | Enable [ccache](https://en.wikipedia.org/wiki/Ccache) auto-detection. This tool wraps compiler calls and caches results, can significantly improve re-compilation time. |
 | `ENABLE_PRECOMPILED_HEADERS` | _ON_ (for MSVC) | Enable precompiled headers support. Improves build time. |
 | `BUILD_DOCS` | _OFF_ | Enable documentation build (_doxygen_, _doxygen_cpp_, _doxygen_python_, _doxygen_javadoc_ targets). [Doxygen](http://www.doxygen.org/index.html) must be installed for C++ documentation build. Python and [BeautifulSoup4](https://en.wikipedia.org/wiki/Beautiful_Soup_(HTML_parser)) must be installed for Python documentation build. Javadoc and Ant must be installed for Java documentation build (part of Java SDK). |
diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt
index 1b3f574275..ba5b61ef5f 100644
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@@ -60,6 +60,26 @@ if(CV_TRACE AND HAVE_ITT)
   add_definitions(-DOPENCV_WITH_ITT=1)
 endif()
 
+# https://github.com/opencv/opencv/issues/24145
+if(HAVE_IPP)
+  OCV_OPTION(OPENCV_IPP_ENABLE_ALL "Enable all OPENCV_IPP_ options at once" OFF)
+  OCV_OPTION(OPENCV_IPP_MEAN   "Enable IPP optimizations for mean (+200Kb in binary size)"                OPENCV_IPP_ENABLE_ALL)
+  OCV_OPTION(OPENCV_IPP_MINMAX "Enable IPP optimizations for minMaxLoc/minMaxIdx (+200Kb in binary size)" OPENCV_IPP_ENABLE_ALL)
+  OCV_OPTION(OPENCV_IPP_SUM    "Enable IPP optimizations for sum (+100Kb in binary size)"                 OPENCV_IPP_ENABLE_ALL)
+
+  if(OPENCV_IPP_MEAN)
+    ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/mean.dispatch.cpp "OPENCV_IPP_MEAN=1")
+  endif()
+
+  if(OPENCV_IPP_MINMAX)
+    ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/minmax.cpp "OPENCV_IPP_MINMAX=1")
+  endif()
+
+  if(OPENCV_IPP_SUM)
+    ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/sum.dispatch.cpp "OPENCV_IPP_SUM=1")
+  endif()
+endif()
+
 file(GLOB lib_cuda_hdrs
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cuda/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cuda/*.h")
diff --git a/modules/core/src/mean.dispatch.cpp b/modules/core/src/mean.dispatch.cpp
index 6a5275ab43..0f94e5421a 100644
--- a/modules/core/src/mean.dispatch.cpp
+++ b/modules/core/src/mean.dispatch.cpp
@@ -8,20 +8,24 @@
 #include "opencv2/core/openvx/ovx_defs.hpp"
 #include "stat.hpp"
 
+#ifndef OPENCV_IPP_MEAN
 #undef HAVE_IPP
 #undef CV_IPP_RUN_FAST
 #define CV_IPP_RUN_FAST(f, ...)
 #undef CV_IPP_RUN
 #define CV_IPP_RUN(c, f, ...)
+#endif // OPENCV_IPP_MEAN
 
 #include "mean.simd.hpp"
 #include "mean.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
 
+#ifndef OPENCV_IPP_MEAN
 #undef HAVE_IPP
 #undef CV_IPP_RUN_FAST
 #define CV_IPP_RUN_FAST(f, ...)
 #undef CV_IPP_RUN
 #define CV_IPP_RUN(c, f, ...)
+#endif // OPENCV_IPP_MEAN
 
 namespace cv {
 
diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp
index 092c5e9234..bf2471a076 100644
--- a/modules/core/src/minmax.cpp
+++ b/modules/core/src/minmax.cpp
@@ -11,11 +11,13 @@
 
 #include <algorithm>
 
+#ifndef OPENCV_IPP_MINMAX
 #undef HAVE_IPP
 #undef CV_IPP_RUN_FAST
 #define CV_IPP_RUN_FAST(f, ...)
 #undef CV_IPP_RUN
 #define CV_IPP_RUN(c, f, ...)
+#endif // OPENCV_IPP_MINMAX
 
 #define IPP_DISABLE_MINMAXIDX_MANY_ROWS 1  // see Core_MinMaxIdx.rows_overflow test
 
diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp
index a1f7d73868..fade948336 100644
--- a/modules/core/src/sum.dispatch.cpp
+++ b/modules/core/src/sum.dispatch.cpp
@@ -10,11 +10,13 @@
 #include "sum.simd.hpp"
 #include "sum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
 
+#ifndef OPENCV_IPP_SUM
 #undef HAVE_IPP
 #undef CV_IPP_RUN_FAST
 #define CV_IPP_RUN_FAST(f, ...)
 #undef CV_IPP_RUN
 #define CV_IPP_RUN(c, f, ...)
+#endif // OPENCV_IPP_SUM
 
 namespace cv
 {
diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt
index 8ee300c320..10aed6bedd 100644
--- a/modules/imgproc/CMakeLists.txt
+++ b/modules/imgproc/CMakeLists.txt
@@ -12,8 +12,10 @@ ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
 ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
 ocv_define_module(imgproc opencv_core WRAP java objc python js)
 
-ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR)
-option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF)
-if(OPENCV_IPP_GAUSSIAN_BLUR)
-  ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1")
+if(HAVE_IPP)
+  # OPENCV_IPP_ENABLE_ALL is defined in modules/core/CMakeList.txt
+  OCV_OPTION(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OPENCV_IPP_ENABLE_ALL)
+  if(OPENCV_IPP_GAUSSIAN_BLUR)
+    ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1")
+  endif()
 endif()

From 588ddf1b181aa7243144b27d65fc7690fb89e344 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Fri, 25 Aug 2023 14:53:34 +0300
Subject: [PATCH 27/57] Merge pull request #24186 from
 dkurt:ts_fixture_constructor_skip

Skip test on SkipTestException at fixture's constructor

* Skip test on SkipTestException at fixture's constructor

* Add warning supression

* Skip Python tests if no test file found

* Skip instances of test fixture with exception at SetUpTestCase

* Skip test with exception at SetUp method

* Try remove warning disable

* Add CV_NORETURN

* Remove FAIL assertion

* Use findDataFile to throw Skip exception

* Throw exception conditionally
---
 modules/core/test/test_misc.cpp          | 36 ++++++++++++++++++++
 modules/python/test/tests_common.py      |  2 ++
 modules/ts/include/opencv2/ts/ts_ext.hpp | 42 +++++++++++++++++++++---
 modules/ts/src/ts_tags.cpp               | 13 +++++---
 4 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp
index 8ed0afe771..cb89dcf573 100644
--- a/modules/core/test/test_misc.cpp
+++ b/modules/core/test/test_misc.cpp
@@ -917,5 +917,41 @@ REGISTER_TYPED_TEST_CASE_P(Rect_Test, Overflows);
 typedef ::testing::Types<int, float, double> RectTypes;
 INSTANTIATE_TYPED_TEST_CASE_P(Negative_Test, Rect_Test, RectTypes);
 
+// Expected that SkipTestException thrown in the constructor should skip test but not fail
+struct TestFixtureSkip: public ::testing::Test {
+    TestFixtureSkip(bool throwEx = true) {
+        if (throwEx) {
+            throw SkipTestException("Skip test at constructor");
+        }
+    }
+};
+
+TEST_F(TestFixtureSkip, NoBodyRun) {
+    FAIL() << "Unreachable code called";
+}
+
+// Check no test body started in case of skip exception at static SetUpTestCase
+struct TestSetUpTestCaseSkip: public ::testing::Test {
+    static void SetUpTestCase() {
+        throw SkipTestException("Skip test at SetUpTestCase");
+    }
+};
+
+TEST_F(TestSetUpTestCaseSkip, NoBodyRun) {
+    FAIL() << "Unreachable code called";
+}
+TEST_F(TestSetUpTestCaseSkip, NoBodyRun2) {
+    FAIL() << "Unreachable code called";
+}
+
+struct TestSetUpSkip: public ::testing::Test {
+    virtual void SetUp() {
+        throw SkipTestException("Skip test at SetUp");
+    }
+};
+
+TEST_F(TestSetUpSkip, NoBodyRun) {
+    FAIL() << "Unreachable code called";
+}
 
 }} // namespace
diff --git a/modules/python/test/tests_common.py b/modules/python/test/tests_common.py
index ec49f46d0d..d673dd7b78 100644
--- a/modules/python/test/tests_common.py
+++ b/modules/python/test/tests_common.py
@@ -36,6 +36,8 @@ class NewOpenCVTests(unittest.TestCase):
                     return candidate
         if required:
             self.fail('File ' + filename + ' not found')
+        else:
+            self.skipTest('File ' + filename + ' not found')
         return None
 
 
diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp
index efa4860510..4603dba4f7 100644
--- a/modules/ts/include/opencv2/ts/ts_ext.hpp
+++ b/modules/ts/include/opencv2/ts/ts_ext.hpp
@@ -47,6 +47,18 @@ bool checkBigDataTests();
        } \
     } \
 
+#define CV__TEST_SETUP_IMPL(parent_class) \
+    { \
+      try { \
+        parent_class::SetUp(); \
+      } catch (const cvtest::details::SkipTestExceptionBase& e) { \
+        printf("[     SKIP ] %s\n", e.what()); \
+      } \
+    }
+
+struct DummyTest : public ::testing::Test {
+  virtual void TestBody() CV_OVERRIDE {}
+};
 
 #undef TEST
 #define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_ATTR, BODY_IMPL) \
@@ -60,6 +72,17 @@ bool checkBigDataTests();
       GTEST_DISALLOW_COPY_AND_ASSIGN_(\
           GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
     };\
+    class test_case_name##test_name##_factory : public ::testing::internal::TestFactoryBase { \
+     public:\
+      virtual ::testing::Test* CreateTest() { \
+        try { \
+          return new GTEST_TEST_CLASS_NAME_(test_case_name, test_name); \
+        } catch (const cvtest::details::SkipTestExceptionBase& e) { \
+          printf("[     SKIP ] %s\n", e.what()); \
+          return new DummyTest(); \
+        } \
+      } \
+    };\
     \
     ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
       ::test_info_ =\
@@ -69,8 +92,7 @@ bool checkBigDataTests();
             (::testing::internal::GetTestTypeId()), \
             parent_class::SetUpTestCase, \
             parent_class::TearDownTestCase, \
-            new ::testing::internal::TestFactoryImpl<\
-                GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
+            new test_case_name##test_name##_factory);\
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName()
 
@@ -109,10 +131,22 @@ bool checkBigDataTests();
      private:\
       virtual void TestBody() CV_OVERRIDE;\
       virtual void Body(); \
+      virtual void SetUp() CV_OVERRIDE; \
       static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
       GTEST_DISALLOW_COPY_AND_ASSIGN_(\
           GTEST_TEST_CLASS_NAME_(test_fixture, test_name));\
     };\
+    class test_fixture##test_name##_factory : public ::testing::internal::TestFactoryBase { \
+     public:\
+      virtual ::testing::Test* CreateTest() { \
+        try { \
+          return new GTEST_TEST_CLASS_NAME_(test_fixture, test_name); \
+        } catch (const cvtest::details::SkipTestExceptionBase& e) { \
+          printf("[     SKIP ] %s\n", e.what()); \
+          return new DummyTest(); \
+        } \
+      } \
+    };\
     \
     ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_fixture, test_name)\
       ::test_info_ =\
@@ -122,9 +156,9 @@ bool checkBigDataTests();
             (::testing::internal::GetTypeId<test_fixture>()), \
             test_fixture::SetUpTestCase, \
             test_fixture::TearDownTestCase, \
-            new ::testing::internal::TestFactoryImpl<\
-                GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\
+            new test_fixture##test_name##_factory);\
     void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV__TEST_BODY_IMPL( #test_fixture "_" #test_name ) \
+    void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::SetUp() CV__TEST_SETUP_IMPL(test_fixture) \
     void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body()
 
 // Don't use directly
diff --git a/modules/ts/src/ts_tags.cpp b/modules/ts/src/ts_tags.cpp
index 8bed1b739f..21653e17ee 100644
--- a/modules/ts/src/ts_tags.cpp
+++ b/modules/ts/src/ts_tags.cpp
@@ -11,7 +11,7 @@ namespace cvtest {
 static bool printTestTag = false;
 
 static std::vector<std::string> currentDirectTestTags, currentImpliedTestTags;
-static std::vector<const ::testing::TestInfo*> skipped_tests;
+static std::vector<const ::testing::TestCase*> skipped_tests;
 
 static std::map<std::string, int>& getTestTagsSkipCounts()
 {
@@ -26,7 +26,7 @@ static std::map<std::string, int>& getTestTagsSkipExtraCounts()
 void testTagIncreaseSkipCount(const std::string& tag, bool isMain, bool appendSkipTests)
 {
     if (appendSkipTests)
-        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info());
+        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case());
     std::map<std::string, int>& counts = isMain ? getTestTagsSkipCounts() : getTestTagsSkipExtraCounts();
     std::map<std::string, int>::iterator i = counts.find(tag);
     if (i == counts.end())
@@ -280,6 +280,11 @@ static bool isTestTagSkipped(const std::string& testTag, CV_OUT std::string& ski
 
 void checkTestTags()
 {
+    if (std::find(skipped_tests.begin(), skipped_tests.end(),
+                  ::testing::UnitTest::GetInstance()->current_test_case()) != skipped_tests.end()) {
+        throw details::SkipTestExceptionBase(false);
+    }
+
     std::string skipTag;
     const std::vector<std::string>& testTags = currentDirectTestTags;
     {
@@ -307,7 +312,7 @@ void checkTestTags()
             }
             if (found != tags.size())
             {
-                skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info());
+                skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case());
                 throw details::SkipTestExceptionBase("Test tags don't pass required tags list (--test_tag parameter)", true);
             }
         }
@@ -341,7 +346,7 @@ void checkTestTags()
 
     if (!skip_message.empty())
     {
-        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info());
+        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case());
         throw details::SkipTestExceptionBase(skip_message, true);
     }
 }

From 4b1a4bdb49a6eb45d868eaf2de3fc2e8d34f3ece Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Sun, 27 Aug 2023 19:49:37 +0900
Subject: [PATCH 28/57] imgcodecs: fix libtiff homepage

---
 3rdparty/readme.txt                  | 4 +++-
 modules/imgcodecs/src/grfmt_tiff.cpp | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/3rdparty/readme.txt b/3rdparty/readme.txt
index c3068521e3..0e6ce1e05e 100644
--- a/3rdparty/readme.txt
+++ b/3rdparty/readme.txt
@@ -39,7 +39,9 @@ libspng               Portable Network Graphics library.
 libtiff               Tag Image File Format (TIFF) Software
                       Copyright (c) 1988-1997 Sam Leffler
                       Copyright (c) 1991-1997 Silicon Graphics, Inc.
-                      See libtiff home page http://www.libtiff.org/
+                      See libtiff home page #1 http://www.simplesystems.org/libtiff/
+                                            #2 https://libtiff.gitlab.io/libtiff/
+                                            #3 http://libtiff.maptools.org/
                       for details and links to the source code
 
                       WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs.
diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp
index ed21f3f14c..4febee36db 100644
--- a/modules/imgcodecs/src/grfmt_tiff.cpp
+++ b/modules/imgcodecs/src/grfmt_tiff.cpp
@@ -245,7 +245,7 @@ bool TiffDecoder::readHeader()
     if (!tif)
     {
         // TIFFOpen() mode flags are different to fopen().  A 'b' in mode "rb" has no effect when reading.
-        // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html
+        // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html
         if ( !m_buf.empty() )
         {
             m_buf_pos = 0;
@@ -1118,7 +1118,7 @@ public:
     TIFF* open ()
     {
         // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode.
-        // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html
+        // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html
         return TIFFClientOpen( "", "w", reinterpret_cast<thandle_t>(this), &TiffEncoderBufHelper::read,
                                &TiffEncoderBufHelper::write, &TiffEncoderBufHelper::seek,
                                &TiffEncoderBufHelper::close, &TiffEncoderBufHelper::size,
@@ -1200,7 +1200,7 @@ static bool readParam(const std::vector<int>& params, int key, int& value)
 bool TiffEncoder::writeLibTiff( const std::vector<Mat>& img_vec, const std::vector<int>& params)
 {
     // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode.
-    // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html
+    // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html
     TIFF* tif = NULL;
 
     TiffEncoderBufHelper buf_helper(m_buf);

From 8a1b998b2ba3993919deafd137a33f55fce2c962 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@users.noreply.github.com>
Date: Sun, 27 Aug 2023 19:46:24 +0200
Subject: [PATCH 29/57] Merge pull request #24194 from vrabaud:compilation_fix

* Fix compilation when forcing later C++.

* Remove random_shuffle.

* Remove random_shuffle.
---
 modules/objdetect/test/test_qrcode_encode.cpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/modules/objdetect/test/test_qrcode_encode.cpp b/modules/objdetect/test/test_qrcode_encode.cpp
index 14900c3078..1005793269 100644
--- a/modules/objdetect/test/test_qrcode_encode.cpp
+++ b/modules/objdetect/test/test_qrcode_encode.cpp
@@ -264,7 +264,8 @@ TEST(Objdetect_QRCode_Encode_Decode, regression)
                 int true_capacity = establishCapacity(mode, version, cur_capacity);
 
                 std::string input_info = symbol_set;
-                std::random_shuffle(input_info.begin(),input_info.end());
+                std::mt19937 rand_gen {1};
+                std::shuffle(input_info.begin(), input_info.end(), rand_gen);
                 int count = 0;
                 if((int)input_info.length() > true_capacity)
                 {
@@ -390,15 +391,8 @@ TEST(Objdetect_QRCode_Encode_Decode_Structured_Append, DISABLED_regression)
         std::string symbol_set = config["symbols_set"];
 
         std::string input_info = symbol_set;
-#if defined CV_CXX11
-        // std::random_shuffle is deprecated since C++11 and removed in C++17.
-        // Use manually constructed RNG with a fixed seed and std::shuffle instead.
         std::mt19937 rand_gen {1};
         std::shuffle(input_info.begin(), input_info.end(), rand_gen);
-#else
-        SeededRandFunctor<1> rand_gen;
-        std::random_shuffle(input_info.begin(), input_info.end(), rand_gen);
-#endif
         for (int j = min_stuctures_num; j < max_stuctures_num; j++)
         {
             QRCodeEncoder::Params params;

From c20febdbb04064ba23ea8fa83bde1b065932a94c Mon Sep 17 00:00:00 2001
From: Sam James <sam@gentoo.org>
Date: Tue, 29 Aug 2023 02:56:55 +0100
Subject: [PATCH 30/57] Fix compilation on arm64 with FP16 when disabled

If building with -mcpu=native or any other setting which implies the current
CPU has FP16 but with intrinsics disabled, we mistakenly try to use it even
though convolution.hpp conditionally defines it correctly based on whether
we should *use it*. convolution.cpp on the other hand was mismatched and
trying to use it if the CPU supported it, even if not enabled in the build
system.

Make the guards match.

Bug: https://bugs.gentoo.org/913031
Signed-off-by: Sam James <sam@gentoo.org>
---
 modules/dnn/src/layers/cpu_kernels/convolution.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp
index 22ef9a8575..5effdc2d0c 100644
--- a/modules/dnn/src/layers/cpu_kernels/convolution.hpp
+++ b/modules/dnn/src/layers/cpu_kernels/convolution.hpp
@@ -14,7 +14,7 @@
 #define CONV_NR_FP32 28
 
 // The FP16 can only be supported by ARM64 and with FP16 FMA supported.
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC // check FP16 FMA.
+#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && CV_FP16 // check FP16 FMA.
 #define CONV_ARM_FP16 1
 #endif
 

From a308dfca9856574d37abe7628b965e29861fb105 Mon Sep 17 00:00:00 2001
From: Yuantao Feng <yuantao.feng@opencv.org.cn>
Date: Wed, 30 Aug 2023 14:53:59 +0800
Subject: [PATCH 31/57] core: add broadcast (#23965)

* add broadcast_to with tests

* change name

* fix test

* fix implicit type conversion

* replace type of shape with InputArray

* add perf test

* add perf tests which takes care of axis

* v2 from ficus expand

* rename to broadcast

* use randu in place of declare

* doc improvement; smaller scale in perf

* capture get_index by reference
---
 modules/core/include/opencv2/core.hpp |   7 +
 modules/core/perf/perf_arithm.cpp     |  27 ++++
 modules/core/src/matrix_transform.cpp | 218 ++++++++++++++++++++++++++
 modules/core/test/test_arithm.cpp     | 133 ++++++++++++++++
 4 files changed, 385 insertions(+)

diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp
index d9a21701f2..7b5108fcc4 100644
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@@ -1118,6 +1118,13 @@ CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode);
  */
 CV_EXPORTS_W void flipND(InputArray src, OutputArray dst, int axis);
 
+/** @brief Broadcast the given Mat to the given shape.
+ * @param src input array
+ * @param shape target shape. Should be a list of CV_32S numbers. Note that negative values are not supported.
+ * @param dst output array that has the given shape
+ */
+CV_EXPORTS_W void broadcast(InputArray src, InputArray shape, OutputArray dst);
+
 enum RotateFlags {
     ROTATE_90_CLOCKWISE = 0, //!<Rotate 90 degrees clockwise
     ROTATE_180 = 1, //!<Rotate 180 degrees clockwise
diff --git a/modules/core/perf/perf_arithm.cpp b/modules/core/perf/perf_arithm.cpp
index 3ac9a24639..872963fc65 100644
--- a/modules/core/perf/perf_arithm.cpp
+++ b/modules/core/perf/perf_arithm.cpp
@@ -5,8 +5,35 @@ namespace opencv_test
 {
 using namespace perf;
 
+using BroadcastTest = perf::TestBaseWithParam<std::tuple<std::vector<int>, perf::MatType, std::vector<int>>>;
 typedef Size_MatType BinaryOpTest;
 
+PERF_TEST_P_(BroadcastTest, basic)
+{
+    std::vector<int> shape_src = get<0>(GetParam());
+    int dt_type = get<1>(GetParam());
+    std::vector<int> shape_dst = get<2>(GetParam());
+
+    cv::Mat src(static_cast<int>(shape_src.size()), shape_src.data(), dt_type);
+    cv::Mat dst(static_cast<int>(shape_dst.size()), shape_dst.data(), dt_type);
+
+    cv::randu(src, -1.f, 1.f);
+
+    TEST_CYCLE() cv::broadcast(src, shape_dst, dst);
+
+    SANITY_CHECK_NOTHING();
+}
+
+INSTANTIATE_TEST_CASE_P(/*nothing*/ , BroadcastTest,
+    testing::Combine(
+        testing::Values(std::vector<int>{1, 100, 800},
+                        std::vector<int>{10, 1, 800},
+                        std::vector<int>{10, 100, 1}),
+        testing::Values(CV_32FC1),
+        testing::Values(std::vector<int>{10, 100, 800})
+    )
+);
+
 PERF_TEST_P_(BinaryOpTest, min)
 {
     Size sz = get<0>(GetParam());
diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp
index 744ee69b0d..43bf9be057 100644
--- a/modules/core/src/matrix_transform.cpp
+++ b/modules/core/src/matrix_transform.cpp
@@ -7,6 +7,7 @@
 #include "opencv2/core/detail/dispatch_helper.impl.hpp"
 
 #include <algorithm> // std::swap_ranges
+#include <numeric> // std::accumulate
 
 namespace cv {
 
@@ -857,6 +858,223 @@ void flipND(InputArray _src, OutputArray _dst, int _axis)
     flipNDImpl(dst.ptr(), dst.size.p, dst.step.p, axis);
 }
 
+/*
+    This function first prepends 1 to each tensor shape to have a common max_ndims dimension, then flatten non-broadcast dimensions.
+*/
+static bool _flatten_for_broadcast(int narrays, int max_ndims, const int* ndims, const int** orig_shape,
+                                   int** flatten_shape, size_t** flatten_step) {
+    int i, j, k;
+
+    // step 1.
+    // * make all inputs and the output max_ndims-dimensional.
+    // * compute proper step's
+    for (i = max_ndims - 1; i >= 0; i-- ) {
+        for (k = 0; k < narrays; k++) {
+            j = ndims[k] - (max_ndims - i);
+            int sz_i = j >= 0 ? orig_shape[k][j] : 1;
+            size_t st_i = i == max_ndims - 1 ? 1 : flatten_step[k][i+1] * flatten_shape[k][i+1];
+            flatten_shape[k][i] = sz_i;
+            flatten_step[k][i] = st_i;
+            if (flatten_shape[k][i] == 0)
+                return false;
+        }
+    }
+
+    // step 2. Let's do the flattening first,
+    // since we'd need proper values of steps to check continuity.
+    // this loop is probably the most tricky part
+    // in the whole implementation of broadcasting.
+    j = max_ndims-1;
+    for (i = j - 1; i >= 0; i--) {
+        bool all_contiguous = true, all_scalars = true, all_consistent = true;
+        for(k = 0; k < narrays; k++) {
+            size_t st = flatten_step[k][j] * flatten_shape[k][j];
+            bool prev_scalar = flatten_shape[k][j] == 1;
+            bool scalar = flatten_shape[k][i] == 1;
+            all_contiguous = all_contiguous && (st == flatten_step[k][i]);
+            all_scalars = all_scalars && scalar;
+            all_consistent = all_consistent && (scalar == prev_scalar);
+        }
+        if (all_contiguous && (all_consistent || all_scalars)) {
+            for(k = 0; k < narrays; k++)
+                flatten_shape[k][j] *= flatten_shape[k][i];
+        } else {
+            j--;
+            if (i < j) {
+                for(k = 0; k < narrays; k++) {
+                    flatten_shape[k][j] = flatten_shape[k][i];
+                    flatten_step[k][j] = flatten_step[k][i];
+                }
+            }
+        }
+    }
+
+    // step 3. Set some step's to 0's.
+    for (i = max_ndims-1; i >= j; i--) {
+        for (k = 0; k < narrays; k++)
+            flatten_step[k][i] = flatten_shape[k][i] == 1 ? 0 : flatten_step[k][i];
+    }
+    for (; i >= 0; i--) {
+        for (k = 0; k < narrays; k++) {
+            flatten_step[k][i] = 0;
+            flatten_shape[k][i] = 1;
+        }
+    }
+    return true;
+}
+
+void broadcast(InputArray _src, InputArray _shape, OutputArray _dst) {
+    CV_INSTRUMENT_REGION();
+
+    Mat src = _src.getMat();
+    CV_CheckTrue(src.isContinuous(), "broadcast: input array must be contiguous");
+    CV_CheckChannelsEQ(src.channels(), 1, "broadcast: input array must be single channel");
+
+    Mat shape = _shape.getMat();
+    CV_CheckTypeEQ(shape.type(), CV_32S, "broadcast: target shape must be of type int32");
+    const auto dims_shape = static_cast<int>(shape.total());
+    const auto *ptr_shape = shape.ptr<int>();
+
+    // check valid shape, 1D/0D Mat would fail in the following checks
+    const auto dims_src = src.dims;
+    CV_CheckLE(dims_src, dims_shape,
+               "broadcast: dimension of input array must be less than or equal to dimension of target shape");
+    std::vector<int> shape_src{src.size.p, src.size.p + dims_src};
+    if (shape_src.size() < static_cast<size_t>(dims_shape)) {
+        shape_src.insert(shape_src.begin(), dims_shape - shape_src.size(), 1);
+    }
+    for (int i = 0; i < static_cast<int>(shape_src.size()); ++i) {
+        const auto *shape_target = ptr_shape;
+        if (shape_src[i] != 1) {
+            CV_CheckEQ(shape_src[i], shape_target[i], "target shape must be equal to input shape or 1");
+        }
+    }
+
+    // impl
+    _dst.create(dims_shape, shape.ptr<int>(), src.type());
+    Mat dst = _dst.getMat();
+    std::vector<int> is_same_shape(dims_shape, 0);
+    for (int i = 0; i < static_cast<int>(shape_src.size()); ++i) {
+        if (shape_src[i] == ptr_shape[i]) {
+            is_same_shape[i] = 1;
+        }
+    }
+    // copy if same shape
+    if (std::accumulate(is_same_shape.begin(), is_same_shape.end(), 1, std::multiplies<int>()) != 0) {
+        const auto *p_src = src.ptr<const char>();
+        auto *p_dst = dst.ptr<char>();
+        std::memcpy(p_dst, p_src, dst.total() * dst.elemSize());
+        return;
+    }
+    // other cases
+    int max_ndims = std::max(dims_src, dims_shape);
+    const int all_ndims[2] = {src.dims, dst.dims};
+    const int* orig_shapes[2] = {src.size.p, dst.size.p};
+    cv::AutoBuffer<size_t> buff(max_ndims * 4);
+    int* flatten_shapes[2] = {(int*)buff.data(), (int*)(buff.data() + max_ndims)};
+    size_t* flatten_steps[2] = {(size_t*)(buff.data() + 2 * max_ndims), (size_t*)(buff.data() + 3 * max_ndims)};
+    if (_flatten_for_broadcast(2, max_ndims, all_ndims, orig_shapes, flatten_shapes, flatten_steps)) {
+        size_t src_dp = flatten_steps[0][max_ndims - 1];
+        size_t dst_dp = flatten_steps[1][max_ndims - 1];
+        CV_Assert(dst_dp == 1);
+        CV_Assert(max_ndims >= 2); // >= 3?
+        size_t rowstep_src = flatten_steps[0][max_ndims - 2];
+        size_t rowstep_dst = flatten_steps[1][max_ndims - 2];
+        const char* ptr_src = src.ptr<const char>();
+        char* ptr_dst = dst.ptr<char>();
+        size_t esz = src.elemSize();
+        int nrows = flatten_shapes[1][max_ndims - 2];
+        int ncols = flatten_shapes[1][max_ndims - 1];
+        int nplanes = 1;
+        CV_Check(esz, esz == 1 || esz == 2 || esz == 4 || esz == 8, "broadcast: not supported data type");
+
+        for (int k = 0; k < max_ndims - 2; k++) {
+            nplanes *= flatten_shapes[1][k];
+        }
+        for (int plane_idx = 0; plane_idx < nplanes; plane_idx++) {
+            size_t offset_src = 0, offset_dst = 0;
+            size_t idx = (size_t)plane_idx;
+            for (int k = max_ndims - 3; k >= 0; k--) {
+                size_t prev_idx = idx / flatten_shapes[1][k];
+                size_t i_k = (int)(idx - prev_idx * flatten_shapes[1][k]);
+                offset_src += i_k * flatten_steps[0][k];
+                offset_dst += i_k * flatten_steps[1][k];
+                idx = prev_idx;
+            }
+
+            #define OPENCV_CORE_BROADCAST_LOOP(_Tp) \
+                for (int i = 0; i < nrows; i++) {   \
+                    const _Tp *ptr_src_ = (const _Tp*)ptr_src + offset_src + rowstep_src * i; \
+                    _Tp *ptr_dst_ = (_Tp*)ptr_dst + offset_dst + rowstep_dst * i; \
+                    if (src_dp == 1) { \
+                        for (int j = 0; j < ncols; j++) { \
+                            ptr_dst_[j] = ptr_src_[j]; \
+                        } \
+                    } else { \
+                        _Tp x = *ptr_src_; \
+                        for (int j = 0; j < ncols; j++) { \
+                            ptr_dst_[j] = x; \
+                        } \
+                    } \
+                }
+
+            if (esz == 1) {
+                OPENCV_CORE_BROADCAST_LOOP(int8_t);
+            } else if (esz == 2) {
+                OPENCV_CORE_BROADCAST_LOOP(int16_t);
+            } else if (esz == 4) {
+                OPENCV_CORE_BROADCAST_LOOP(int32_t);
+            } else if (esz == 8) {
+                OPENCV_CORE_BROADCAST_LOOP(int64_t);
+            } else {
+                CV_Error(cv::Error::StsNotImplemented, "");
+            }
+            #undef OPENCV_CORE_BROADCAST_LOOP
+        }
+    } else {
+        // initial copy (src to dst)
+        std::vector<size_t> step_src{src.step.p, src.step.p + dims_src};
+        if (step_src.size() < static_cast<size_t>(dims_shape)) {
+            step_src.insert(step_src.begin(), dims_shape - step_src.size(), step_src[0]);
+        }
+        for (size_t i = 0; i < src.total(); ++i) {
+            size_t t = i;
+            size_t src_offset = 0, dst_offset = 0;
+            for (int j = static_cast<int>(shape_src.size() - 1); j >= 0; --j) {
+                size_t idx = t / shape_src[j];
+                size_t offset = static_cast<size_t>(t - idx * shape_src[j]);
+                src_offset += offset * step_src[j];
+                dst_offset += offset * dst.step[j];
+                t = idx;
+            }
+            const auto *p_src = src.ptr<const char>();
+            auto *p_dst = dst.ptr<char>();
+            std::memcpy(p_dst + dst_offset, p_src + src_offset, dst.elemSize());
+        }
+        // broadcast copy (dst inplace)
+        std::vector<int> cumulative_shape(dims_shape, 1);
+        int total = static_cast<int>(dst.total());
+        for (int i = dims_shape - 1; i >= 0; --i) {
+            cumulative_shape[i] = static_cast<int>(total / ptr_shape[i]);
+            total = cumulative_shape[i];
+        }
+        for (int i = dims_shape - 1; i >= 0; --i) {
+            if (is_same_shape[i] == 1) {
+                continue;
+            }
+            auto step = dst.step[i];
+            auto *p_dst = dst.ptr<char>();
+            for (int j = 0; j < cumulative_shape[i]; j++) {
+                for (int k = 0; k < ptr_shape[i] - 1; k++) {
+                    std::memcpy(p_dst + step, p_dst, step);
+                    p_dst += step;
+                }
+                p_dst += step;
+            }
+        }
+    }
+}
+
 void rotate(InputArray _src, OutputArray _dst, int rotateMode)
 {
     CV_Assert(_src.dims() <= 2);
diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp
index ea9cda56be..848a2e8b6a 100644
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@@ -2268,6 +2268,139 @@ INSTANTIATE_TEST_CASE_P(Arithm, FlipND, testing::Combine(
     testing::Values(perf::MatType(CV_8UC1), CV_32FC1)
 ));
 
+TEST(BroadcastTo, basic) {
+    std::vector<int> shape_src{2, 1};
+    std::vector<int> data_src{1, 2};
+    Mat src(static_cast<int>(shape_src.size()), shape_src.data(), CV_32SC1, data_src.data());
+
+    auto get_index = [](const std::vector<int>& shape, size_t cnt) {
+        std::vector<int> index(shape.size());
+        size_t t = cnt;
+        for (int i = static_cast<int>(shape.size() - 1); i >= 0; --i) {
+            size_t idx = t / shape[i];
+            index[i] = static_cast<int>(t - idx * shape[i]);
+            t = idx;
+        }
+        return index;
+    };
+
+    auto fn_verify = [&get_index](const Mat& ref, const Mat& res) {
+        // check type
+        EXPECT_EQ(ref.type(), res.type());
+        // check shape
+        EXPECT_EQ(ref.dims, res.dims);
+        for (int i = 0; i < ref.dims; ++i) {
+            EXPECT_EQ(ref.size[i], res.size[i]);
+        }
+        // check value
+        std::vector<int> shape{ref.size.p, ref.size.p + ref.dims};
+        for (size_t i = 0; i < ref.total(); ++i) {
+            auto index = get_index(shape, i);
+            switch (ref.type()) {
+                case CV_32SC1: {
+                    ASSERT_EQ(ref.at<int>(index.data()), res.at<int>(index.data()));
+                } break;
+                case CV_8UC1: {
+                    ASSERT_EQ(ref.at<uint8_t>(index.data()), res.at<uint8_t>(index.data()));
+                } break;
+                case CV_32FC1: {
+                    ASSERT_EQ(ref.at<float>(index.data()), res.at<float>(index.data()));
+                } break;
+                default: FAIL() << "Unsupported type: " << ref.type();
+            }
+        }
+    };
+
+    {
+        std::vector<int> shape{4, 2, 3};
+        std::vector<int> data_ref{
+            1, 1, 1, // [0, 0, :]
+            2, 2, 2, // [0, 1, :]
+            1, 1, 1, // [1, 0, :]
+            2, 2, 2, // [1, 1, :]
+            1, 1, 1, // [2, 0, :]
+            2, 2, 2, // [2, 1, :]
+            1, 1, 1, // [3, 0, :]
+            2, 2, 2  // [3, 1, :]
+        };
+        Mat ref(static_cast<int>(shape.size()), shape.data(), src.type(), data_ref.data());
+        Mat dst;
+        broadcast(src, shape, dst);
+        fn_verify(ref, dst);
+    }
+
+    {
+        Mat _src;
+        src.convertTo(_src, CV_8U);
+        std::vector<int> shape{4, 2, 3};
+        std::vector<uint8_t> data_ref{
+            1, 1, 1, // [0, 0, :]
+            2, 2, 2, // [0, 1, :]
+            1, 1, 1, // [1, 0, :]
+            2, 2, 2, // [1, 1, :]
+            1, 1, 1, // [2, 0, :]
+            2, 2, 2, // [2, 1, :]
+            1, 1, 1, // [3, 0, :]
+            2, 2, 2  // [3, 1, :]
+        };
+        Mat ref(static_cast<int>(shape.size()), shape.data(), _src.type(), data_ref.data());
+        Mat dst;
+        broadcast(_src, shape, dst);
+        fn_verify(ref, dst);
+    }
+
+    {
+        Mat _src;
+        src.convertTo(_src, CV_32F);
+        std::vector<int> shape{1, 1, 2, 1}; // {2, 1}
+        std::vector<float> data_ref{
+            1.f, // [0, 0, 0, 0]
+            2.f, // [0, 0, 1, 0]
+        };
+        Mat ref(static_cast<int>(shape.size()), shape.data(), _src.type(), data_ref.data());
+        Mat dst;
+        broadcast(_src, shape, dst);
+        fn_verify(ref, dst);
+    }
+
+    {
+        std::vector<int> _shape_src{2, 3, 4};
+        std::vector<float> _data_src{
+            1.f, 2.f, 3.f, 4.f, // [0, 0, :]
+            2.f, 3.f, 4.f, 5.f, // [0, 1, :]
+            3.f, 4.f, 5.f, 6.f, // [0, 2, :]
+
+            4.f, 5.f, 6.f, 7.f, // [1, 0, :]
+            5.f, 6.f, 7.f, 8.f, // [1, 1, :]
+            6.f, 7.f, 8.f, 9.f, // [1, 2, :]
+        };
+        Mat _src(static_cast<int>(_shape_src.size()), _shape_src.data(), CV_32FC1, _data_src.data());
+
+        std::vector<int> shape{2, 1, 2, 3, 4};
+        std::vector<float> data_ref{
+            1.f, 2.f, 3.f, 4.f, // [0, 0, 0, 0, :]
+            2.f, 3.f, 4.f, 5.f, // [0, 0, 0, 1, :]
+            3.f, 4.f, 5.f, 6.f, // [0, 0, 0, 2, :]
+
+            4.f, 5.f, 6.f, 7.f, // [0, 0, 1, 0, :]
+            5.f, 6.f, 7.f, 8.f, // [0, 0, 1, 1, :]
+            6.f, 7.f, 8.f, 9.f, // [0, 0, 1, 2, :]
+
+            1.f, 2.f, 3.f, 4.f, // [1, 0, 0, 0, :]
+            2.f, 3.f, 4.f, 5.f, // [1, 0, 0, 1, :]
+            3.f, 4.f, 5.f, 6.f, // [1, 0, 0, 2, :]
+
+            4.f, 5.f, 6.f, 7.f, // [1, 0, 1, 0, :]
+            5.f, 6.f, 7.f, 8.f, // [1, 0, 1, 1, :]
+            6.f, 7.f, 8.f, 9.f, // [1, 0, 1, 2, :]
+        };
+        Mat ref(static_cast<int>(shape.size()), shape.data(), _src.type(), data_ref.data());
+        Mat dst;
+        broadcast(_src, shape, dst);
+        fn_verify(ref, dst);
+    }
+}
+
 TEST(Core_minMaxIdx, regression_9207_2)
 {
     const int rows = 13;

From 72bb8bb73cb97e9049a5dede776f47f39d29703c Mon Sep 17 00:00:00 2001
From: Kumataro <Kumataro@users.noreply.github.com>
Date: Sat, 2 Sep 2023 10:03:59 +0000
Subject: [PATCH 32/57] core: arm64: v_round() works with round to nearest,
 ties to even.

---
 modules/core/include/opencv2/core/hal/intrin_neon.hpp | 4 ++--
 modules/core/test/test_intrin_utils.hpp               | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
index 6f8973231b..14eb180819 100644
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@@ -1997,12 +1997,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a)
 inline v_int32x4 v_round(const v_float64x2& a)
 {
     static const int32x2_t zero = vdup_n_s32(0);
-    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtnq_s64_f64(a.val)), zero));
 }
 
 inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
 {
-    return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val))));
+    return v_int32x4(vcombine_s32(vmovn_s64(vcvtnq_s64_f64(a.val)), vmovn_s64(vcvtnq_s64_f64(b.val))));
 }
 
 inline v_int32x4 v_floor(const v_float64x2& a)
diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp
index 481e6bb1f2..1ece6de82f 100644
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@@ -1475,12 +1475,15 @@ template<typename R> struct TheTest
     TheTest & test_float_math()
     {
         typedef typename V_RegTraits<R>::round_reg Ri;
-        Data<R> data1, data2, data3;
+        Data<R> data1, data1_border, data2, data3;
+        // See https://github.com/opencv/opencv/issues/24213
+        data1_border *= 0.5;
         data1 *= 1.1;
         data2 += 10;
-        R a1 = data1, a2 = data2, a3 = data3;
+        R a1 = data1, a1_border = data1_border, a2 = data2, a3 = data3;
 
         Data<Ri> resB = v_round(a1),
+                 resB_border = v_round(a1_border),
                  resC = v_trunc(a1),
                  resD = v_floor(a1),
                  resE = v_ceil(a1);
@@ -1493,6 +1496,7 @@ template<typename R> struct TheTest
         {
             SCOPED_TRACE(cv::format("i=%d", i));
             EXPECT_EQ(cvRound(data1[i]), resB[i]);
+            EXPECT_EQ(cvRound(data1_border[i]), resB_border[i]);
             EXPECT_EQ((typename VTraits<Ri>::lane_type)data1[i], resC[i]);
             EXPECT_EQ(cvFloor(data1[i]), resD[i]);
             EXPECT_EQ(cvCeil(data1[i]), resE[i]);

From d19fc1264b4a05a7933361e57e0a301c54c1a661 Mon Sep 17 00:00:00 2001
From: Dmitry Matveev <dmitry.matveev@intel.com>
Date: Mon, 4 Sep 2023 11:48:53 +0200
Subject: [PATCH 33/57] Merge pull request #24178 from
 dmatveev:dm/streaming_queue

G-API: Introduce a Queue Source #24178

- Added a new IStreamSource class: in fact, a wrapper over a concurrent queue;
- Added minimal example on how it can be used;
- Extended IStreamSource with optional "halt" interface to break the blocking calls in the emitter threads when required to stop.
- Introduced a QueueInput class which allows to pass the whole graph's input vector at once. In fact it is a thin wrapper atop of individual Queue Sources.

There is a hidden trap found with our type system as described in https://github.com/orgs/g-api-org/discussions/2

While it works even in this form, it should be addressed somewhere in the 5.0 timeframe.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/gapi/CMakeLists.txt                   |   3 +
 .../include/opencv2/gapi/gtype_traits.hpp     |   6 +-
 .../opencv2/gapi/streaming/queue_source.hpp   |  67 +++++++++
 .../include/opencv2/gapi/streaming/source.hpp |   7 +-
 modules/gapi/src/compiler/gislandmodel.hpp    |   1 +
 .../gapi/src/executor/gstreamingexecutor.cpp  |  13 ++
 modules/gapi/src/streaming/queue_source.cpp   |  98 ++++++++++++++
 .../gapi_streaming_queue_source_tests.cpp     | 127 ++++++++++++++++++
 8 files changed, 319 insertions(+), 3 deletions(-)
 create mode 100644 modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp
 create mode 100644 modules/gapi/src/streaming/queue_source.cpp
 create mode 100644 modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp

diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt
index e30cb77e9e..2caeb02ae2 100644
--- a/modules/gapi/CMakeLists.txt
+++ b/modules/gapi/CMakeLists.txt
@@ -190,6 +190,9 @@ set(gapi_srcs
     src/backends/ov/bindings_ov.cpp
     src/backends/python/gpythonbackend.cpp
 
+    # Queue Streaming source
+    src/streaming/queue_source.cpp
+
     # OpenVPL Streaming source
     src/streaming/onevpl/source.cpp
     src/streaming/onevpl/source_priv.cpp
diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
index b56175788f..a1703a52cb 100644
--- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
+++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
@@ -141,8 +141,10 @@ namespace detail
     template<typename U> struct GTypeOf<std::vector<U> >       { using type = cv::GArray<U>; };
     template<typename U> struct GTypeOf                        { using type = cv::GOpaque<U>;};
     template<>           struct GTypeOf<cv::MediaFrame>        { using type = cv::GFrame;    };
-    // FIXME: This is not quite correct since IStreamSource may produce not only Mat but also Scalar
-    // and vector data. TODO: Extend the type dispatching on these types too.
+
+    // FIXME: This is not quite correct since IStreamSource may
+    // produce not only Mat but also MediaFrame, Scalar and vector
+    // data. TODO: Extend the type dispatching on these types too.
     template<>           struct GTypeOf<cv::gapi::wip::IStreamSource::Ptr> { using type = cv::GMat;};
     template<class T> using g_type_of_t = typename GTypeOf<T>::type;
 
diff --git a/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp b/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp
new file mode 100644
index 0000000000..bd385ed16e
--- /dev/null
+++ b/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp
@@ -0,0 +1,67 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2023 Intel Corporation
+
+#ifndef OPENCV_GAPI_STREAMING_QUEUE_SOURCE_HPP
+#define OPENCV_GAPI_STREAMING_QUEUE_SOURCE_HPP
+
+#include <memory>                      // shared_ptr
+#include <type_traits>                 // is_base_of
+
+#include <opencv2/gapi/garg.hpp>       // GRunArgs
+#include <opencv2/gapi/gmetaarg.hpp>   // GMetaArg + all descr_of
+#include <opencv2/gapi/streaming/source.hpp> // IStreamSource
+
+namespace cv {
+namespace gapi {
+namespace wip {
+struct Data; // fwd-declare to avoid circular? header dependencies
+
+class GAPI_EXPORTS QueueSourceBase: public cv::gapi::wip::IStreamSource {
+    class Priv;
+    std::shared_ptr<Priv> m_priv;
+    // FIXME: Need to understand how it works with IStreamSource's shared_from_this
+    // Can we avoid having too many shared_ptrs here?
+
+public:
+    explicit QueueSourceBase(const cv::GMetaArg &m);
+    void push(Data &&data);
+    virtual bool pull(Data &data) override;
+    virtual void halt() override;
+    virtual GMetaArg descr_of() const override;
+    virtual ~QueueSourceBase() = default;
+};
+
+/**
+ * @brief Queued streaming pipeline source.
+ *
+ */
+template<class T>
+class QueueSource final: public QueueSourceBase
+{
+public:
+    using Meta = decltype(cv::descr_of(T{}));
+    explicit QueueSource(Meta m) : QueueSourceBase(GMetaArg{m}) {
+    }
+    void push(T t) {
+        QueueSourceBase::push(Data{t});
+    }
+};
+
+class GAPI_EXPORTS QueueInput {
+    std::vector<std::shared_ptr<QueueSourceBase> > m_sources;
+
+public:
+    explicit QueueInput(const cv::GMetaArgs &args);
+
+    void push(cv::GRunArgs &&ins);
+    operator cv::GRunArgs();
+};
+
+} // namespace wip
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_STREAMING_SOURCE_HPP
diff --git a/modules/gapi/include/opencv2/gapi/streaming/source.hpp b/modules/gapi/include/opencv2/gapi/streaming/source.hpp
index 6597cad8f8..267469ad1b 100644
--- a/modules/gapi/include/opencv2/gapi/streaming/source.hpp
+++ b/modules/gapi/include/opencv2/gapi/streaming/source.hpp
@@ -16,7 +16,7 @@
 namespace cv {
 namespace gapi {
 namespace wip {
-    struct Data; // "forward-declaration" of GRunArg
+struct Data; // forward-declaration of Data to avoid circular dependencies
 
 /**
  * @brief Abstract streaming pipeline source.
@@ -43,6 +43,11 @@ public:
     Ptr ptr() { return shared_from_this(); }
     virtual bool pull(Data &data) = 0;
     virtual GMetaArg descr_of() const = 0;
+    virtual void halt() {
+        // Do nothing by default to maintain compatibility with the existing sources...
+        // In fact needs to be decorated atop of the child classes to maintain the behavior
+        // FIXME: Make it mandatory in OpenCV 5.0
+    };
     virtual ~IStreamSource() = default;
 };
 
diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp
index 3a1a8d5ab9..ade13a6f33 100644
--- a/modules/gapi/src/compiler/gislandmodel.hpp
+++ b/modules/gapi/src/compiler/gislandmodel.hpp
@@ -192,6 +192,7 @@ class GIslandEmitter
 public:
     // Obtain next value from the emitter
     virtual bool pull(GRunArg &) = 0;
+    virtual void halt() = 0;
     virtual ~GIslandEmitter() = default;
 };
 
diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp
index 124b27f39c..6a397faca6 100644
--- a/modules/gapi/src/executor/gstreamingexecutor.cpp
+++ b/modules/gapi/src/executor/gstreamingexecutor.cpp
@@ -41,6 +41,10 @@ using namespace cv::gimpl::stream;
 class VideoEmitter final: public cv::gimpl::GIslandEmitter {
     cv::gapi::wip::IStreamSource::Ptr src;
 
+    virtual void halt() override {
+        src->halt();
+    }
+
     virtual bool pull(cv::GRunArg &arg) override {
         // FIXME: probably we can maintain a pool of (then) pre-allocated
         // buffers to avoid runtime allocations.
@@ -62,6 +66,10 @@ public:
 class ConstEmitter final: public cv::gimpl::GIslandEmitter {
     cv::GRunArg m_arg;
 
+    virtual void halt() override {
+        // Not used here, but in fact can be used.
+    }
+
     virtual bool pull(cv::GRunArg &arg) override {
         arg = const_cast<const cv::GRunArg&>(m_arg); // FIXME: variant workaround
         return true;
@@ -1918,6 +1926,11 @@ void cv::gimpl::GStreamingExecutor::stop()
     for (auto &q : m_emitter_queues) {
         q.push(stream::Cmd{stream::Stop{}});
     }
+    // Also kindly ask emitter object to halt to break the blocking src->pull()
+    // loop
+    for (auto &nh : m_emitters) {
+        m_gim.metadata(nh).get<Emitter>().object->halt();
+    }
 
     // Pull messages from the final queue to ensure completion
     Cmd cmd;
diff --git a/modules/gapi/src/streaming/queue_source.cpp b/modules/gapi/src/streaming/queue_source.cpp
new file mode 100644
index 0000000000..59fde09c44
--- /dev/null
+++ b/modules/gapi/src/streaming/queue_source.cpp
@@ -0,0 +1,98 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2023 Intel Corporation
+
+#include <chrono>
+#include <atomic>
+
+#include <ade/util/zip_range.hpp>
+
+#include <opencv2/gapi/streaming/queue_source.hpp>
+#include <opencv2/gapi/streaming/meta.hpp>
+
+#include "executor/conc_queue.hpp"
+
+namespace cv {
+namespace gapi {
+namespace wip {
+
+class QueueSourceBase::Priv {
+public:
+    explicit Priv(const cv::GMetaArg &meta) {
+        m = meta;
+        halted = false;
+    }
+
+    cv::GMetaArg m;
+    cv::gapi::own::concurrent_bounded_queue<cv::GRunArg> q;
+    int64_t c = 0;
+    std::atomic<bool> halted;
+};
+
+QueueSourceBase::QueueSourceBase(const cv::GMetaArg &m)
+    : m_priv(new Priv(m)) {
+}
+
+void QueueSourceBase::push(Data &&data) {
+
+    // Tag data with seq_id/ts
+    const auto now = std::chrono::system_clock::now();
+    const auto dur = std::chrono::duration_cast<std::chrono::microseconds>
+        (now.time_since_epoch());
+    data.meta[cv::gapi::streaming::meta_tag::timestamp] = int64_t{dur.count()};
+    data.meta[cv::gapi::streaming::meta_tag::seq_id]    = int64_t{m_priv->c++};
+
+    m_priv->q.push(data);
+}
+
+bool QueueSourceBase::pull(Data &data) {
+    m_priv->q.pop(data);
+
+    if (m_priv->halted) {
+        return false;
+    }
+    return true;
+}
+
+void QueueSourceBase::halt() {
+    m_priv->halted.store(true);
+    m_priv->q.push(cv::GRunArg{});
+}
+
+cv::GMetaArg QueueSourceBase::descr_of() const {
+    return m_priv->m;
+}
+
+QueueInput::QueueInput(const cv::GMetaArgs &args) {
+    for (auto &&m : args) {
+        m_sources.emplace_back(new cv::gapi::wip::QueueSourceBase(m));
+    }
+}
+
+void QueueInput::push(cv::GRunArgs &&args) {
+    GAPI_Assert(m_sources.size() == args.size());
+    for (auto && it : ade::util::zip(ade::util::toRange(m_sources),
+                                     ade::util::toRange(args)))
+    {
+        auto &src = std::get<0>(it);
+        auto &obj = std::get<1>(it);
+
+        Data d;
+        d = obj;
+        src->push(std::move(d));
+    }
+}
+
+QueueInput::operator cv::GRunArgs () {
+    cv::GRunArgs args;
+    for (auto &&s : m_sources) {
+        args.push_back(s->ptr());
+    }
+    return args;
+}
+
+} // wip
+} // gapi
+} // cv
diff --git a/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp
new file mode 100644
index 0000000000..093e654715
--- /dev/null
+++ b/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp
@@ -0,0 +1,127 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2023 Intel Corporation
+
+
+#include "../test_precomp.hpp"
+
+#include <opencv2/gapi/gstreaming.hpp>
+#include <opencv2/gapi/streaming/queue_source.hpp>
+#include <opencv2/gapi/streaming/cap.hpp>
+
+namespace opencv_test
+{
+
+TEST(GAPI_Streaming_Queue_Source, SmokeTest) {
+    // This is more like an example on G-API Queue Source
+
+    cv::GMat in;
+    cv::GMat out = in + 1;
+    cv::GStreamingCompiled comp = cv::GComputation(in, out).compileStreaming();
+
+    // Queue source needs to know format information to maintain contracts
+    auto src = std::make_shared<cv::gapi::wip::QueueSource<cv::Mat> >
+        (cv::GMatDesc{CV_8U, 1, cv::Size{128, 128}});
+
+    comp.setSource(cv::gin(src->ptr()));
+    comp.start();
+
+    // It is perfectly legal to start a pipeline at this point - the source was passed.
+    // Now we can push data through the source and get the pipeline results.
+
+    cv::Mat eye = cv::Mat::eye(cv::Size{128, 128}, CV_8UC1);
+    src->push(eye);    // Push I (identity matrix)
+    src->push(eye*2);  // Push I*2
+
+    // Now its time to pop. The data could be already processed at this point.
+    // Note the queue source queues are unbounded to avoid deadlocks
+
+    cv::Mat result;
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(eye + 1, result, NORM_INF));
+
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(eye*2 + 1, result, NORM_INF));
+}
+
+TEST(GAPI_Streaming_Queue_Source, Mixed) {
+    // Mixing a regular "live" source (which runs on its own) with a
+    // manually controlled queue source may make a little sense, but
+    // is perfectly legal and possible.
+
+    cv::GMat in1;
+    cv::GMat in2;
+    cv::GMat out = in2 - in1;
+    cv::GStreamingCompiled comp = cv::GComputation(in1, in2, out).compileStreaming();
+
+    // Queue source needs to know format information to maintain contracts
+    auto src1 = std::make_shared<cv::gapi::wip::QueueSource<cv::Mat> >
+        (cv::GMatDesc{CV_8U, 3, cv::Size{768, 576}});
+
+    std::shared_ptr<cv::gapi::wip::IStreamSource> src2;
+    auto path = findDataFile("cv/video/768x576.avi");
+    try {
+        src2 = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(path);
+    } catch(...) {
+        throw SkipTestException("Video file can not be opened");
+    }
+
+    comp.setSource(cv::gin(src1->ptr(), src2)); // FIXME: quite inconsistent
+    comp.start();
+
+    cv::Mat eye = cv::Mat::eye(cv::Size{768, 576}, CV_8UC3);
+    src1->push(eye);    // Push I (identity matrix)
+    src1->push(eye);    // Push I (again)
+
+    cv::Mat ref, result;
+    cv::VideoCapture cap(path);
+
+    cap >> ref;
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(ref - eye, result, NORM_INF));
+
+    cap >> ref;
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(ref - eye, result, NORM_INF));
+}
+
+TEST(GAPI_Streaming_Queue_Input, SmokeTest) {
+
+    // Queue Input: a tiny wrapper atop of multiple queue sources.
+    // Allows users to pass all input data at once.
+
+    cv::GMat in1;
+    cv::GScalar in2;
+    cv::GMat out = in1 + in2;
+    cv::GStreamingCompiled comp = cv::GComputation(cv::GIn(in1, in2), cv::GOut(out))
+        .compileStreaming();
+
+    // FIXME: This API is too raw
+    cv::gapi::wip::QueueInput input({
+            cv::GMetaArg{ cv::GMatDesc{CV_8U, 1, cv::Size{64,64} } },
+            cv::GMetaArg{ cv::empty_scalar_desc() }
+        });
+    comp.setSource(input); // Implicit conversion allows it to be passed as-is.
+    comp.start();
+
+    // Push data via queue input
+    cv::Mat eye = cv::Mat::eye(cv::Size{64, 64}, CV_8UC1);
+    input.push(cv::gin(eye, cv::Scalar(1)));
+    input.push(cv::gin(eye, cv::Scalar(2)));
+    input.push(cv::gin(eye, cv::Scalar(3)));
+
+    // Pop data and validate
+    cv::Mat result;
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(eye+1, result, NORM_INF));
+
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(eye+2, result, NORM_INF));
+
+    ASSERT_TRUE(comp.pull(cv::gout(result)));
+    EXPECT_EQ(0, cvtest::norm(eye+3, result, NORM_INF));
+}
+
+} // namespace opencv_test

From 114c23e41108a68a6dce5ec9ab8a900bccc47637 Mon Sep 17 00:00:00 2001
From: alexander-varjo <118199184+alexander-varjo@users.noreply.github.com>
Date: Mon, 4 Sep 2023 17:49:45 +0300
Subject: [PATCH 34/57] Merge pull request #23607 from
 alexander-varjo:alexander-varjo-patch-1

Fix crash in ap3p #23607

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/calib3d/src/ap3p.cpp                  | 65 ++-----------------
 modules/calib3d/test/test_solvepnp_ransac.cpp | 62 ++++++++++++++++++
 2 files changed, 69 insertions(+), 58 deletions(-)

diff --git a/modules/calib3d/src/ap3p.cpp b/modules/calib3d/src/ap3p.cpp
index 582b201b36..79da0f13a7 100644
--- a/modules/calib3d/src/ap3p.cpp
+++ b/modules/calib3d/src/ap3p.cpp
@@ -1,5 +1,6 @@
 #include "precomp.hpp"
 #include "ap3p.h"
+#include "polynom_solver.h"
 
 #include <cmath>
 #include <complex>
@@ -8,63 +9,10 @@ static inline double cbrt(double x) { return (double)cv::cubeRoot((float)x); };
 #endif
 
 namespace {
-void solveQuartic(const double *factors, double *realRoots) {
-    const double &a4 = factors[0];
-    const double &a3 = factors[1];
-    const double &a2 = factors[2];
-    const double &a1 = factors[3];
-    const double &a0 = factors[4];
-
-    double a4_2 = a4 * a4;
-    double a3_2 = a3 * a3;
-    double a4_3 = a4_2 * a4;
-    double a2a4 = a2 * a4;
-
-    double p4 = (8 * a2a4 - 3 * a3_2) / (8 * a4_2);
-    double q4 = (a3_2 * a3 - 4 * a2a4 * a3 + 8 * a1 * a4_2) / (8 * a4_3);
-    double r4 = (256 * a0 * a4_3 - 3 * (a3_2 * a3_2) - 64 * a1 * a3 * a4_2 + 16 * a2a4 * a3_2) / (256 * (a4_3 * a4));
-
-    double p3 = ((p4 * p4) / 12 + r4) / 3; // /=-3
-    double q3 = (72 * r4 * p4 - 2 * p4 * p4 * p4 - 27 * q4 * q4) / 432; // /=2
-
-    double t; // *=2
-    std::complex<double> w;
-    if (q3 >= 0)
-        w = -std::sqrt(static_cast<std::complex<double> >(q3 * q3 - p3 * p3 * p3)) - q3;
-    else
-        w = std::sqrt(static_cast<std::complex<double> >(q3 * q3 - p3 * p3 * p3)) - q3;
-    if (w.imag() == 0.0) {
-        w.real(std::cbrt(w.real()));
-        t = 2.0 * (w.real() + p3 / w.real());
-    } else {
-        w = pow(w, 1.0 / 3);
-        t = 4.0 * w.real();
-    }
-
-    std::complex<double> sqrt_2m = sqrt(static_cast<std::complex<double> >(-2 * p4 / 3 + t));
-    double B_4A = -a3 / (4 * a4);
-    double complex1 = 4 * p4 / 3 + t;
-#if defined(__clang__) && defined(__arm__) && (__clang_major__ == 3 || __clang_major__ == 4) && !defined(__ANDROID__)
-    // details: https://github.com/opencv/opencv/issues/11135
-    // details: https://github.com/opencv/opencv/issues/11056
-    std::complex<double> complex2 = 2 * q4;
-    complex2 = std::complex<double>(complex2.real() / sqrt_2m.real(), 0);
-#else
-    std::complex<double> complex2 = 2 * q4 / sqrt_2m;
-#endif
-    double sqrt_2m_rh = sqrt_2m.real() / 2;
-    double sqrt1 = sqrt(-(complex1 + complex2)).real() / 2;
-    realRoots[0] = B_4A + sqrt_2m_rh + sqrt1;
-    realRoots[1] = B_4A + sqrt_2m_rh - sqrt1;
-    double sqrt2 = sqrt(-(complex1 - complex2)).real() / 2;
-    realRoots[2] = B_4A - sqrt_2m_rh + sqrt2;
-    realRoots[3] = B_4A - sqrt_2m_rh - sqrt2;
-}
-
-void polishQuarticRoots(const double *coeffs, double *roots) {
+void polishQuarticRoots(const double *coeffs, double *roots, int nb_roots) {
     const int iterations = 2;
     for (int i = 0; i < iterations; ++i) {
-        for (int j = 0; j < 4; ++j) {
+        for (int j = 0; j < nb_roots; ++j) {
             double error =
                     (((coeffs[0] * roots[j] + coeffs[1]) * roots[j] + coeffs[2]) * roots[j] + coeffs[3]) * roots[j] +
                     coeffs[4];
@@ -227,8 +175,9 @@ int ap3p::computePoses(const double featureVectors[3][4],
                         2 * (g6 * g7 - g1 * g2 - g3 * g4),
                         g7 * g7 - g2 * g2 - g4 * g4};
     double s[4];
-    solveQuartic(coeffs, s);
-    polishQuarticRoots(coeffs, s);
+    int nb_roots = solve_deg4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], coeffs[4],
+                              s[0], s[1], s[2], s[3]);
+    polishQuarticRoots(coeffs, s, nb_roots);
 
     double temp[3];
     vect_cross(k1, nl, temp);
@@ -254,7 +203,7 @@ int ap3p::computePoses(const double featureVectors[3][4],
     double reproj_errors[4];
 
     int nb_solutions = 0;
-    for (int i = 0; i < 4; ++i) {
+    for (int i = 0; i < nb_roots; ++i) {
         double ctheta1p = s[i];
         if (abs(ctheta1p) > 1)
             continue;
diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp
index 759b9650a8..a9ed88f0f5 100644
--- a/modules/calib3d/test/test_solvepnp_ransac.cpp
+++ b/modules/calib3d/test/test_solvepnp_ransac.cpp
@@ -41,6 +41,7 @@
 //M*/
 
 #include "test_precomp.hpp"
+#include "opencv2/core/utils/logger.hpp"
 
 namespace opencv_test { namespace {
 
@@ -2258,4 +2259,65 @@ TEST(Calib3d_SolvePnP, inputShape)
     }
 }
 
+bool hasNan(const cv::Mat& mat)
+{
+    bool has = false;
+    if (mat.type() == CV_32F)
+    {
+        for(int i = 0; i < static_cast<int>(mat.total()); i++)
+            has |= cvIsNaN(mat.at<float>(i)) != 0;
+    }
+    else if (mat.type() == CV_64F)
+    {
+        for(int i = 0; i < static_cast<int>(mat.total()); i++)
+            has |= cvIsNaN(mat.at<double>(i)) != 0;
+    }
+    else
+    {
+        has = true;
+        CV_LOG_ERROR(NULL, "check hasNan called with unsupported type!");
+    }
+
+    return has;
+}
+
+TEST(AP3P, ctheta1p_nan_23607)
+{
+    // the task is not well defined and may not converge (empty R, t) or should
+    // converge to some non-NaN solution
+    const std::array<cv::Point2d, 3> cameraPts = {
+        cv::Point2d{0.042784865945577621, 0.59844839572906494},
+        cv::Point2d{-0.028428621590137482, 0.60354739427566528},
+        cv::Point2d{0.0046037044376134872, 0.70674681663513184}
+    };
+    const std::array<cv::Point3d, 3> modelPts = {
+        cv::Point3d{-0.043258000165224075, 0.020459245890378952, -0.0069921980611979961},
+        cv::Point3d{-0.045648999512195587, 0.0029820732306689024, 0.0079000638797879219},
+        cv::Point3d{-0.043276999145746231, -0.013622495345771313, 0.0080113131552934647}
+    };
+
+    std::vector<Mat> R, t;
+    solveP3P(modelPts, cameraPts, Mat::eye(3, 3, CV_64F), Mat(), R, t, SOLVEPNP_AP3P);
+
+    EXPECT_EQ(R.size(), 2ul);
+    EXPECT_EQ(t.size(), 2ul);
+
+    // Try apply rvec and tvec to get model points from camera points.
+    Mat pts = Mat(modelPts).reshape(1, 3);
+    Mat expected = Mat(cameraPts).reshape(1, 3);
+    for (size_t i = 0; i < R.size(); ++i) {
+        EXPECT_TRUE(!hasNan(R[i]));
+        EXPECT_TRUE(!hasNan(t[i]));
+
+        Mat transform;
+        cv::Rodrigues(R[i], transform);
+        Mat res = pts * transform.t();
+        for (int j = 0; j < 3; ++j) {
+            res.row(j) += t[i].reshape(1, 1);
+            res.row(j) /= res.row(j).at<double>(2);
+        }
+        EXPECT_LE(cvtest::norm(res.colRange(0, 2), expected, NORM_INF), 3e-16);
+    }
+}
+
 }} // namespace

From ca527040e2bee65bce6df50773252779d3335dfb Mon Sep 17 00:00:00 2001
From: Alex <alexander.panov@xperience.ai>
Date: Mon, 4 Sep 2023 18:28:28 +0300
Subject: [PATCH 35/57] fix refineDetectedMarkers, add test

---
 .../misc/python/test/test_objdetect_aruco.py  | 33 +++++++++++++++++++
 .../objdetect/src/aruco/aruco_detector.cpp    |  8 ++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/modules/objdetect/misc/python/test/test_objdetect_aruco.py b/modules/objdetect/misc/python/test/test_objdetect_aruco.py
index d63a19cd2f..dda58b6460 100644
--- a/modules/objdetect/misc/python/test/test_objdetect_aruco.py
+++ b/modules/objdetect/misc/python/test/test_objdetect_aruco.py
@@ -186,6 +186,39 @@ class aruco_objdetect_test(NewOpenCVTests):
         self.assertEqual((1, 4, 2), refine_corners[0].shape)
         np.testing.assert_array_equal(corners, refine_corners)
 
+    def test_charuco_refine(self):
+        aruco_dict = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_6X6_50)
+        board_size = (3, 4)
+        board = cv.aruco.CharucoBoard(board_size, 1., .7, aruco_dict)
+        aruco_detector = cv.aruco.ArucoDetector(aruco_dict)
+        charuco_detector = cv.aruco.CharucoDetector(board)
+        cell_size = 100
+        image = board.generateImage((cell_size*board_size[0], cell_size*board_size[1]))
+        camera = np.array([[1, 0, 0.5],
+                           [0, 1, 0.5],
+                           [0, 0, 1]])
+        dist = np.array([0, 0, 0, 0, 0], dtype=np.float32).reshape(1, -1)
+
+        # generate gold corners of the ArUco markers for the test
+        gold_corners = np.array(board.getObjPoints())[:, :, 0:2]*cell_size
+
+        # detect corners
+        markerCorners, markerIds, _ = aruco_detector.detectMarkers(image)
+
+        # test refine
+        rejected = [markerCorners[-1]]
+        markerCorners, markerIds = markerCorners[:-1], markerIds[:-1]
+        markerCorners, markerIds, _, _ = aruco_detector.refineDetectedMarkers(image, board, markerCorners, markerIds,
+                                                                              rejected, cameraMatrix=camera, distCoeffs=dist)
+
+        charucoCorners, charucoIds, _, _ = charuco_detector.detectBoard(image, markerCorners=markerCorners,
+                                                                        markerIds=markerIds)
+        self.assertEqual(len(charucoIds), 6)
+        self.assertEqual(len(markerIds), 6)
+
+        for i, id in enumerate(markerIds.reshape(-1)):
+            np.testing.assert_allclose(gold_corners[id], markerCorners[i].reshape(4, 2), 0.01, 1.)
+
     def test_write_read_dictionary(self):
         try:
             aruco_dict = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_5X5_50)
diff --git a/modules/objdetect/src/aruco/aruco_detector.cpp b/modules/objdetect/src/aruco/aruco_detector.cpp
index 395bb49338..84ccc6e323 100644
--- a/modules/objdetect/src/aruco/aruco_detector.cpp
+++ b/modules/objdetect/src/aruco/aruco_detector.cpp
@@ -1000,7 +1000,13 @@ static inline void _projectUndetectedMarkers(const Board &board, InputOutputArra
                                              OutputArray undetectedMarkersIds) {
     Mat rvec, tvec; // first estimate board pose with the current avaible markers
     Mat objPoints, imgPoints; // object and image points for the solvePnP function
-    board.matchImagePoints(detectedCorners, detectedIds, objPoints, imgPoints);
+    // To refine corners of ArUco markers the function refineDetectedMarkers() find an aruco markers pose from 3D-2D point correspondences.
+    // To find 3D-2D point correspondences uses matchImagePoints().
+    // The method matchImagePoints() works with ArUco corners (in Board/GridBoard cases) or with ChArUco corners (in CharucoBoard case).
+    // To refine corners of ArUco markers we need work with ArUco corners only in all boards.
+    // To call matchImagePoints() with ArUco corners for all boards we need to call matchImagePoints() from base class Board.
+    // The method matchImagePoints() implemented in Pimpl and we need to create temp Board object to call the base method.
+    Board(board.getObjPoints(), board.getDictionary(), board.getIds()).matchImagePoints(detectedCorners, detectedIds, objPoints, imgPoints);
     if (objPoints.total() < 4ull) // at least one marker from board so rvec and tvec are valid
         return;
     solvePnP(objPoints, imgPoints, cameraMatrix, distCoeffs, rvec, tvec);

From c91c631ae26bc5257c67754ed6b8dc2a67f60915 Mon Sep 17 00:00:00 2001
From: Philipp Schrader <philipp.schrader@bluerivertech.com>
Date: Thu, 31 Aug 2023 11:43:53 -0700
Subject: [PATCH 36/57] Fix "use after free" issue in `essential_solver.cpp`

The address sanitizer highlighted this issue in our code base. It
looks like the code is currently grabbing a pointer to a temporary
object and then performing operations on it.

I printed some information right before the asan crash:

    eigensolver address: 0x7f0ad95032f0
    eigensolver size: 4528
    eig_vecs_ ptr: 0x7f0ad95045e0
    eig_vecs_ offset: 4848

This shows that `eig_vecs_` points past the end of `eigensolver`. In
other words, it points at the temporary object created by the
`eigensolver.eigenvectors()` call.

Compare the docs for `.eigenvalues()`:
https://eigen.tuxfamily.org/dox/classEigen_1_1EigenSolver.html#a0f507ad7ab14797882f474ca8f2773e7
to the docs for `.eigenvectors()`:
https://eigen.tuxfamily.org/dox/classEigen_1_1EigenSolver.html#a66288022802172e3ee059283b26201d7

The difference in return types is interesting. `.eigenvalues()`
returns a reference. But `.eigenvectors()` returns a matrix.

This patch here fixes the problem by saving the temporary object and
then grabbing a pointer into it.

This is a curated snippet of the original asan failure:

    ==12==ERROR: AddressSanitizer: stack-use-after-scope on address 0x7fc633704640 at pc 0x7fc64f7f1593 bp 0x7ffe8875fc90 sp 0x7ffe8875fc88
    READ of size 8 at 0x7fc633704640 thread T0
        #0 0x7fc64f7f1592 in cv::usac::EssentialMinimalSolverStewenius5ptsImpl::estimate(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<cv::Mat, std::__1::allocator<cv::Mat> >&) const /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/essential_solver.cpp:181:48
        #1 0x7fc64f915d92 in cv::usac::EssentialEstimatorImpl::estimateModels(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<cv::Mat, std::__1::allocator<cv::Mat> >&) const /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/estimator.cpp:110:46
        #2 0x7fc64fa74fb0 in cv::usac::Ransac::run(cv::Ptr<cv::usac::RansacOutput>&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/ransac_solvers.cpp:152:58
        #3 0x7fc64fa6cd8e in cv::usac::run(cv::Ptr<cv::usac::Model const> const&, cv::_InputArray const&, cv::_InputArray const&, int, cv::Ptr<cv::usac::RansacOutput>&, cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/ransac_solvers.cpp:1010:16
        #4 0x7fc64fa6fb46 in cv::usac::findEssentialMat(cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, int, double, double, cv::_OutputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/ransac_solvers.cpp:527:9
        #5 0x7fc64f3b5522 in cv::findEssentialMat(cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, int, double, double, int, cv::_OutputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/five-point.cpp:437:16
        #6 0x7fc64f3b7e00 in cv::findEssentialMat(cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, int, double, double, cv::_OutputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/five-point.cpp:486:12
        ...

    Address 0x7fc633704640 is located in stack of thread T0 at offset 17984 in frame
        #0 0x7fc64f7ed4ff in cv::usac::EssentialMinimalSolverStewenius5ptsImpl::estimate(std::__1::vector<int, std::__1::allocator<int> > const&, std::__1::vector<cv::Mat, std::__1::allocator<cv::Mat> >&) const /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/essential_solver.cpp:36

      This frame has 63 object(s):
        [32, 56) 'coefficients' (line 38)
        [96, 384) 'ee' (line 55)
        ...
        [13040, 17568) 'eigensolver' (line 142)
        [17824, 17840) 'ref.tmp518' (line 143)
        [17856, 17872) 'ref.tmp523' (line 144)
        [17888, 19488) 'ref.tmp524' (line 144) <== Memory access at offset 17984 is inside this variable
        [19616, 19640) 'ref.tmp532' (line 169)
        ...

The crash report says that we're accessing a temporary object from
line 144 when we shouldn't be. Line 144 looks like this:
https://github.com/opencv/opencv/blob/4.6.0/modules/calib3d/src/usac/essential_solver.cpp#L144

    const auto * const eig_vecs_ = (double *) eigensolver.eigenvectors().real().data();

We are using version 4.6.0 for this, but the problem is present on the
4.x branch.

Note that I am dropping the .real() call here. I think that is safe because
of the code further down (line 277 in the most recent version):

    const int eig_i = 20 * i + 12; // eigen stores imaginary values too

The code appears to expect to have to skip doubles for the imaginary parts
of the complex numbers.

Admittedly, I couldn't find a test case that exercised this code path to
validate correctness.
---
 modules/calib3d/src/usac/essential_solver.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/calib3d/src/usac/essential_solver.cpp b/modules/calib3d/src/usac/essential_solver.cpp
index 504fec6ab5..434db6d373 100644
--- a/modules/calib3d/src/usac/essential_solver.cpp
+++ b/modules/calib3d/src/usac/essential_solver.cpp
@@ -239,7 +239,8 @@ public:
             // (5) Compute the left eigenvectors of the action matrix
             Eigen::EigenSolver<Eigen::Matrix<double, 10, 10>> eigensolver(action_mat_eig);
             const Eigen::VectorXcd &eigenvalues = eigensolver.eigenvalues();
-            const auto * const eig_vecs_ = (double *) eigensolver.eigenvectors().real().data();
+            const Eigen::MatrixXcd eigenvectors = eigensolver.eigenvectors();
+            const auto * const eig_vecs_ = (double *) eigenvectors.data();
 #else
             Matx<double, 10, 10> A = constraint_mat.colRange(0, 10),
                              B = constraint_mat.colRange(10, 20), eliminated_mat;

From 84f32bbb243dbcd64c7abb8d99dbdf5ed58518cc Mon Sep 17 00:00:00 2001
From: Wanli <wanli.zhong.1999@gmail.com>
Date: Tue, 5 Sep 2023 13:44:15 +0800
Subject: [PATCH 37/57] increase Fast Math threshold

---
 modules/dnn/test/test_caffe_importer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index 3f5458a873..66eff49979 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -731,7 +731,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif
 
-    double scoreDiff = 0.0, iouDiff = 0.0;
+    double scoreDiff = 0.001, iouDiff = 0.03;
 #if defined(INF_ENGINE_RELEASE)
     if (target == DNN_TARGET_MYRIAD)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
@@ -779,7 +779,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf)
                                            0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
                                            0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
 
-    double scoreDiff = 0.0, iouDiff = 0.0;
+    double scoreDiff = 0.003, iouDiff = 0.07;
     if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
         scoreDiff = 0.02;
         iouDiff = 0.13;

From f280e3cbd9ba07ab2d0464cb4aefc31bdbe47665 Mon Sep 17 00:00:00 2001
From: Alexander Lyulkov <alexander.lyulkov@opencv.ai>
Date: Thu, 31 Aug 2023 23:35:38 +0800
Subject: [PATCH 38/57] Fixed bug with the last 4 bytes in MJPEG encoder.

---
 modules/videoio/src/cap_mjpeg_encoder.cpp | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/modules/videoio/src/cap_mjpeg_encoder.cpp b/modules/videoio/src/cap_mjpeg_encoder.cpp
index efac4093ae..2e7452cf17 100644
--- a/modules/videoio/src/cap_mjpeg_encoder.cpp
+++ b/modules/videoio/src/cap_mjpeg_encoder.cpp
@@ -268,7 +268,7 @@ public:
             m_buffer_list[0].finish();
 
             m_data_len = m_buffer_list[0].get_len();
-            m_last_bit_len = m_buffer_list[0].get_bits_free() ? 32 - m_buffer_list[0].get_bits_free() : 0;
+            m_last_bit_len = 32 - m_buffer_list[0].get_bits_free();
 
             return m_buffer_list[0].get_data();
         }
@@ -331,9 +331,14 @@ public:
         }
 
         //bits == 0 means that last element shouldn't be used.
-        m_output_buffer[m_data_len++] = currval;
-
-        m_last_bit_len = -bits;
+        if (bits != 0) {
+            m_output_buffer[m_data_len++] = currval;
+            m_last_bit_len = -bits;
+        }
+        else
+        {
+            m_last_bit_len = 32;
+        }
 
         return &m_output_buffer[0];
     }
@@ -1167,8 +1172,6 @@ public:
         fdct_qtab(_fdct_qtab),
         cat_table(_cat_table)
     {
-#if 0  // disable parallel processing due to buffer overrun bug: https://github.com/opencv/opencv/issues/19634
-
         //empirically found value. if number of pixels is less than that value there is no sense to parallelize it.
         const int min_pixels_count = 96*96;
 
@@ -1194,12 +1197,6 @@ public:
 
         stripes_count = std::min(stripes_count, max_stripes);
 
-#else
-        if (nstripes > 1)
-            CV_LOG_ONCE_WARNING(NULL, "VIDEOIO/MJPEG: parallel processing is disabled: https://github.com/opencv/opencv/issues/19634");
-        stripes_count = 1;
-#endif
-
         m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count);
     }
 

From 6ae7caaa0107b8e1507b8de40c754c908f24a243 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Tue, 5 Sep 2023 10:44:56 +0300
Subject: [PATCH 39/57] Merge pull request #24216 from
 dkurt:inter_lines_less_compute

Minor optimization of two lines intersection #24216

### Pull Request Readiness Checklist

Not significant, but we can reduce number of multiplications while compute two lines intersection. Both methods are used heavily in their modules.

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/imgproc/src/geometry.cpp |  7 +++----
 modules/objdetect/src/qrcode.cpp | 13 ++++---------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/modules/imgproc/src/geometry.cpp b/modules/imgproc/src/geometry.cpp
index 701c3a647f..77d3d16e02 100644
--- a/modules/imgproc/src/geometry.cpp
+++ b/modules/imgproc/src/geometry.cpp
@@ -328,17 +328,16 @@ static LineSegmentIntersection parallelInt( Point2f a, Point2f b, Point2f c, Poi
 static LineSegmentIntersection intersectLineSegments( Point2f a, Point2f b, Point2f c,
                                                       Point2f d, Point2f& p, Point2f& q )
 {
-    double denom = a.x * (double)(d.y - c.y) + b.x * (double)(c.y - d.y) +
-                   d.x * (double)(b.y - a.y) + c.x * (double)(a.y - b.y);
+    double denom = (a.x - b.x) * (double)(d.y - c.y) - (a.y - b.y) * (double)(d.x - c.x);
 
     // If denom is zero, then segments are parallel: handle separately.
     if( denom == 0. )
         return parallelInt(a, b, c, d, p, q);
 
-    double num = a.x * (double)(d.y - c.y) + c.x * (double)(a.y - d.y) + d.x * (double)(c.y - a.y);
+    double num = (d.y - a.y) * (double)(a.x - c.x) + (a.x - d.x) * (double)(a.y - c.y);
     double s = num / denom;
 
-    num = a.x * (double)(b.y - c.y) + b.x * (double)(c.y - a.y) + c.x * (double)(a.y - b.y);
+    num = (b.y - a.y) * (double)(a.x - c.x) + (c.y - a.y) * (double)(b.x - a.x);
     double t = num / denom;
 
     p.x = (float)(a.x + s*(b.x - a.x));
diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp
index 9f64c64462..1df46a9bb5 100644
--- a/modules/objdetect/src/qrcode.cpp
+++ b/modules/objdetect/src/qrcode.cpp
@@ -68,19 +68,14 @@ static void updatePointsResult(OutputArray points_, const vector<Point2f>& point
 
 static Point2f intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2)
 {
+    // Try to solve a two lines intersection (a1, a2) and (b1, b2) as a system of equations:
+    // a2 + u * (a1 - a2) = b2 + v * (b1 - b2)
     const float divisor = (a1.x - a2.x) * (b1.y - b2.y) - (a1.y - a2.y) * (b1.x - b2.x);
     const float eps = 0.001f;
     if (abs(divisor) < eps)
         return a2;
-    Point2f result_square_angle(
-                              ((a1.x * a2.y  -  a1.y * a2.x) * (b1.x - b2.x) -
-                               (b1.x * b2.y  -  b1.y * b2.x) * (a1.x - a2.x)) /
-                               divisor,
-                              ((a1.x * a2.y  -  a1.y * a2.x) * (b1.y - b2.y) -
-                               (b1.x * b2.y  -  b1.y * b2.x) * (a1.y - a2.y)) /
-                               divisor
-                              );
-    return result_square_angle;
+    const float u = ((b2.x - a2.x) * (b1.y - b2.y) + (b1.x - b2.x) * (a2.y - b2.y)) / divisor;
+    return a2 + u * (a1 - a2);
 }
 
 //      / | b

From 639836ebf03ce39112b8c09e44bd0daa02d53f60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20B=C3=B6ken?= <Bjoern.Boeken@csb.com>
Date: Mon, 4 Sep 2023 11:31:06 +0200
Subject: [PATCH 40/57] Added CMake configuration OPENCV_DNN_BACKEND_DEFAULT

---
 CMakeLists.txt                          | 4 ++++
 modules/dnn/CMakeLists.txt              | 3 +++
 modules/dnn/include/opencv2/dnn/dnn.hpp | 7 +------
 modules/dnn/src/dnn_params.cpp          | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d14b7af439..2a214a1a91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1672,6 +1672,10 @@ else()
   endif()
 endif()
 
+if(BUILD_opencv_dnn)
+    status("    Default DNN backend:" ${OPENCV_DNN_BACKEND_DEFAULT})
+endif()
+
 if(WITH_EIGEN OR HAVE_EIGEN)
   status("    Eigen:"      HAVE_EIGEN       THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
 endif()
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index 60cc77ca8b..896ce5ded7 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -227,6 +227,9 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO)
   endif()
 endif()
 
+set(OPENCV_DNN_BACKEND_DEFAULT "DNN_BACKEND_OPENCV" CACHE STRING "Default backend used by the DNN module")
+ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}")
+
 
 ocv_install_used_external_targets(${libs} ${dnn_runtime_libs})
 
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
index d61f7191bc..2bd3f790b4 100644
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -69,9 +69,7 @@ CV__DNN_INLINE_NS_BEGIN
      */
     enum Backend
     {
-        //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if
-        //! OpenCV is built with Intel OpenVINO or
-        //! DNN_BACKEND_OPENCV otherwise.
+        //! DNN_BACKEND_DEFAULT equals to OPENCV_DNN_BACKEND_DEFAULT, which can be defined using CMake or a configuration parameter
         DNN_BACKEND_DEFAULT = 0,
         DNN_BACKEND_HALIDE,
         DNN_BACKEND_INFERENCE_ENGINE,            //!< Intel OpenVINO computational backend
@@ -688,9 +686,6 @@ CV__DNN_INLINE_NS_BEGIN
          * @brief Ask network to use specific computation backend where it supported.
          * @param[in] backendId backend identifier.
          * @see Backend
-         *
-         * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT
-         * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV.
          */
         CV_WRAP void setPreferableBackend(int backendId);
 
diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp
index 86a43db757..19d453012c 100644
--- a/modules/dnn/src/dnn_params.cpp
+++ b/modules/dnn/src/dnn_params.cpp
@@ -36,7 +36,7 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
 int getParam_DNN_BACKEND_DEFAULT()
 {
     static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
-            (size_t)DNN_BACKEND_OPENCV
+            (size_t)OPENCV_DNN_BACKEND_DEFAULT
     );
     return PARAM_DNN_BACKEND_DEFAULT;
 }

From cca4ee2e464d3ca43babe9eadbd900aca22fe9fa Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Tue, 5 Sep 2023 14:35:37 +0300
Subject: [PATCH 41/57] Revert PR 24186 as it force skip tests.

---
 modules/core/test/test_misc.cpp          | 36 --------------------
 modules/python/test/tests_common.py      |  2 --
 modules/ts/include/opencv2/ts/ts_ext.hpp | 42 +++---------------------
 modules/ts/src/ts_tags.cpp               | 13 +++-----
 4 files changed, 8 insertions(+), 85 deletions(-)

diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp
index cb89dcf573..8ed0afe771 100644
--- a/modules/core/test/test_misc.cpp
+++ b/modules/core/test/test_misc.cpp
@@ -917,41 +917,5 @@ REGISTER_TYPED_TEST_CASE_P(Rect_Test, Overflows);
 typedef ::testing::Types<int, float, double> RectTypes;
 INSTANTIATE_TYPED_TEST_CASE_P(Negative_Test, Rect_Test, RectTypes);
 
-// Expected that SkipTestException thrown in the constructor should skip test but not fail
-struct TestFixtureSkip: public ::testing::Test {
-    TestFixtureSkip(bool throwEx = true) {
-        if (throwEx) {
-            throw SkipTestException("Skip test at constructor");
-        }
-    }
-};
-
-TEST_F(TestFixtureSkip, NoBodyRun) {
-    FAIL() << "Unreachable code called";
-}
-
-// Check no test body started in case of skip exception at static SetUpTestCase
-struct TestSetUpTestCaseSkip: public ::testing::Test {
-    static void SetUpTestCase() {
-        throw SkipTestException("Skip test at SetUpTestCase");
-    }
-};
-
-TEST_F(TestSetUpTestCaseSkip, NoBodyRun) {
-    FAIL() << "Unreachable code called";
-}
-TEST_F(TestSetUpTestCaseSkip, NoBodyRun2) {
-    FAIL() << "Unreachable code called";
-}
-
-struct TestSetUpSkip: public ::testing::Test {
-    virtual void SetUp() {
-        throw SkipTestException("Skip test at SetUp");
-    }
-};
-
-TEST_F(TestSetUpSkip, NoBodyRun) {
-    FAIL() << "Unreachable code called";
-}
 
 }} // namespace
diff --git a/modules/python/test/tests_common.py b/modules/python/test/tests_common.py
index d673dd7b78..ec49f46d0d 100644
--- a/modules/python/test/tests_common.py
+++ b/modules/python/test/tests_common.py
@@ -36,8 +36,6 @@ class NewOpenCVTests(unittest.TestCase):
                     return candidate
         if required:
             self.fail('File ' + filename + ' not found')
-        else:
-            self.skipTest('File ' + filename + ' not found')
         return None
 
 
diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp
index 4603dba4f7..efa4860510 100644
--- a/modules/ts/include/opencv2/ts/ts_ext.hpp
+++ b/modules/ts/include/opencv2/ts/ts_ext.hpp
@@ -47,18 +47,6 @@ bool checkBigDataTests();
        } \
     } \
 
-#define CV__TEST_SETUP_IMPL(parent_class) \
-    { \
-      try { \
-        parent_class::SetUp(); \
-      } catch (const cvtest::details::SkipTestExceptionBase& e) { \
-        printf("[     SKIP ] %s\n", e.what()); \
-      } \
-    }
-
-struct DummyTest : public ::testing::Test {
-  virtual void TestBody() CV_OVERRIDE {}
-};
 
 #undef TEST
 #define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_ATTR, BODY_IMPL) \
@@ -72,17 +60,6 @@ struct DummyTest : public ::testing::Test {
       GTEST_DISALLOW_COPY_AND_ASSIGN_(\
           GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
     };\
-    class test_case_name##test_name##_factory : public ::testing::internal::TestFactoryBase { \
-     public:\
-      virtual ::testing::Test* CreateTest() { \
-        try { \
-          return new GTEST_TEST_CLASS_NAME_(test_case_name, test_name); \
-        } catch (const cvtest::details::SkipTestExceptionBase& e) { \
-          printf("[     SKIP ] %s\n", e.what()); \
-          return new DummyTest(); \
-        } \
-      } \
-    };\
     \
     ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
       ::test_info_ =\
@@ -92,7 +69,8 @@ struct DummyTest : public ::testing::Test {
             (::testing::internal::GetTestTypeId()), \
             parent_class::SetUpTestCase, \
             parent_class::TearDownTestCase, \
-            new test_case_name##test_name##_factory);\
+            new ::testing::internal::TestFactoryImpl<\
+                GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \
     void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName()
 
@@ -131,22 +109,10 @@ struct DummyTest : public ::testing::Test {
      private:\
       virtual void TestBody() CV_OVERRIDE;\
       virtual void Body(); \
-      virtual void SetUp() CV_OVERRIDE; \
       static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
       GTEST_DISALLOW_COPY_AND_ASSIGN_(\
           GTEST_TEST_CLASS_NAME_(test_fixture, test_name));\
     };\
-    class test_fixture##test_name##_factory : public ::testing::internal::TestFactoryBase { \
-     public:\
-      virtual ::testing::Test* CreateTest() { \
-        try { \
-          return new GTEST_TEST_CLASS_NAME_(test_fixture, test_name); \
-        } catch (const cvtest::details::SkipTestExceptionBase& e) { \
-          printf("[     SKIP ] %s\n", e.what()); \
-          return new DummyTest(); \
-        } \
-      } \
-    };\
     \
     ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_fixture, test_name)\
       ::test_info_ =\
@@ -156,9 +122,9 @@ struct DummyTest : public ::testing::Test {
             (::testing::internal::GetTypeId<test_fixture>()), \
             test_fixture::SetUpTestCase, \
             test_fixture::TearDownTestCase, \
-            new test_fixture##test_name##_factory);\
+            new ::testing::internal::TestFactoryImpl<\
+                GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\
     void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV__TEST_BODY_IMPL( #test_fixture "_" #test_name ) \
-    void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::SetUp() CV__TEST_SETUP_IMPL(test_fixture) \
     void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body()
 
 // Don't use directly
diff --git a/modules/ts/src/ts_tags.cpp b/modules/ts/src/ts_tags.cpp
index 21653e17ee..8bed1b739f 100644
--- a/modules/ts/src/ts_tags.cpp
+++ b/modules/ts/src/ts_tags.cpp
@@ -11,7 +11,7 @@ namespace cvtest {
 static bool printTestTag = false;
 
 static std::vector<std::string> currentDirectTestTags, currentImpliedTestTags;
-static std::vector<const ::testing::TestCase*> skipped_tests;
+static std::vector<const ::testing::TestInfo*> skipped_tests;
 
 static std::map<std::string, int>& getTestTagsSkipCounts()
 {
@@ -26,7 +26,7 @@ static std::map<std::string, int>& getTestTagsSkipExtraCounts()
 void testTagIncreaseSkipCount(const std::string& tag, bool isMain, bool appendSkipTests)
 {
     if (appendSkipTests)
-        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case());
+        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info());
     std::map<std::string, int>& counts = isMain ? getTestTagsSkipCounts() : getTestTagsSkipExtraCounts();
     std::map<std::string, int>::iterator i = counts.find(tag);
     if (i == counts.end())
@@ -280,11 +280,6 @@ static bool isTestTagSkipped(const std::string& testTag, CV_OUT std::string& ski
 
 void checkTestTags()
 {
-    if (std::find(skipped_tests.begin(), skipped_tests.end(),
-                  ::testing::UnitTest::GetInstance()->current_test_case()) != skipped_tests.end()) {
-        throw details::SkipTestExceptionBase(false);
-    }
-
     std::string skipTag;
     const std::vector<std::string>& testTags = currentDirectTestTags;
     {
@@ -312,7 +307,7 @@ void checkTestTags()
             }
             if (found != tags.size())
             {
-                skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case());
+                skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info());
                 throw details::SkipTestExceptionBase("Test tags don't pass required tags list (--test_tag parameter)", true);
             }
         }
@@ -346,7 +341,7 @@ void checkTestTags()
 
     if (!skip_message.empty())
     {
-        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case());
+        skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info());
         throw details::SkipTestExceptionBase(skip_message, true);
     }
 }

From c4c2e2e796b829e05260ecfac6e8264e807c4df7 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Tue, 5 Sep 2023 15:21:10 +0300
Subject: [PATCH 42/57] Merge pull request #24214 from
 dkurt:distanceTransform_big_step

Fix distanceTransform for inputs with large step and height #24214

### Pull Request Readiness Checklist

resolves https://github.com/opencv/opencv/issues/23895

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/imgproc/src/distransform.cpp          | 47 +++++++++++--------
 .../imgproc/test/test_distancetransform.cpp   | 24 ++++++++++
 2 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp
index e88ba44cfb..8f2a20e0d0 100644
--- a/modules/imgproc/src/distransform.cpp
+++ b/modules/imgproc/src/distransform.cpp
@@ -78,7 +78,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
 
     const uchar* src = _src.ptr();
     int* temp = _temp.ptr<int>();
-    float* dist = _dist.ptr<float>();
+    float* dist = _dist.ptr<float>(_dist.rows - 1);
     int srcstep = (int)(_src.step/sizeof(src[0]));
     int step = (int)(_temp.step/sizeof(temp[0]));
     int dststep = (int)(_dist.step/sizeof(dist[0]));
@@ -87,11 +87,10 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
     initTopBottom( _temp, BORDER );
 
     // forward pass
+    unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER;
+    const uchar* s = src;
     for( i = 0; i < size.height; i++ )
     {
-        const uchar* s = src + i*srcstep;
-        unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
-
         for( j = 0; j < BORDER; j++ )
             tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
 
@@ -111,13 +110,15 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
                 tmp[j] = t0;
             }
         }
+        tmp += step;
+        s += srcstep;
     }
 
     // backward pass
+    float* d = (float*)dist;
     for( i = size.height - 1; i >= 0; i-- )
     {
-        float* d = (float*)(dist + i*dststep);
-        unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
+        tmp -= step;
 
         for( j = size.width - 1; j >= 0; j-- )
         {
@@ -137,6 +138,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
             t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
             d[j] = (float)(t0 * scale);
         }
+        d -= dststep;
     }
 }
 
@@ -153,7 +155,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
 
     const uchar* src = _src.ptr();
     int* temp = _temp.ptr<int>();
-    float* dist = _dist.ptr<float>();
+    float* dist = _dist.ptr<float>(_dist.rows - 1);
     int srcstep = (int)(_src.step/sizeof(src[0]));
     int step = (int)(_temp.step/sizeof(temp[0]));
     int dststep = (int)(_dist.step/sizeof(dist[0]));
@@ -162,11 +164,10 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
     initTopBottom( _temp, BORDER );
 
     // forward pass
+    unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER;
+    const uchar* s = src;
     for( i = 0; i < size.height; i++ )
     {
-        const uchar* s = src + i*srcstep;
-        unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
-
         for( j = 0; j < BORDER; j++ )
             tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
 
@@ -194,13 +195,15 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
                 tmp[j] = t0;
             }
         }
+        tmp += step;
+        s += srcstep;
     }
 
     // backward pass
+    float* d = (float*)dist;
     for( i = size.height - 1; i >= 0; i-- )
     {
-        float* d = (float*)(dist + i*dststep);
-        unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
+        tmp -= step;
 
         for( j = size.width - 1; j >= 0; j-- )
         {
@@ -228,6 +231,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met
             t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
             d[j] = (float)(t0 * scale);
         }
+        d -= dststep;
     }
 }
 
@@ -245,7 +249,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
 
     const uchar* src = _src.ptr();
     int* temp = _temp.ptr<int>();
-    float* dist = _dist.ptr<float>();
+    float* dist = _dist.ptr<float>(_dist.rows - 1);
     int* labels = _labels.ptr<int>();
     int srcstep = (int)(_src.step/sizeof(src[0]));
     int step = (int)(_temp.step/sizeof(temp[0]));
@@ -256,12 +260,11 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
     initTopBottom( _temp, BORDER );
 
     // forward pass
+    const uchar* s = src;
+    unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER;
+    int* lls = (int*)labels;
     for( i = 0; i < size.height; i++ )
     {
-        const uchar* s = src + i*srcstep;
-        unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
-        int* lls = (int*)(labels + i*lstep);
-
         for( j = 0; j < BORDER; j++ )
             tmp[-j-1] = tmp[size.width + j] = INIT_DIST0;
 
@@ -330,14 +333,17 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
                 lls[j] = l0;
             }
         }
+        s += srcstep;
+        tmp += step;
+        lls += lstep;
     }
 
     // backward pass
+    float* d = (float*)dist;
     for( i = size.height - 1; i >= 0; i-- )
     {
-        float* d = (float*)(dist + i*dststep);
-        unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER;
-        int* lls = (int*)(labels + i*lstep);
+        tmp -= step;
+        lls -= lstep;
 
         for( j = size.width - 1; j >= 0; j-- )
         {
@@ -399,6 +405,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels,
             t0 = (t0 > DIST_MAX) ? DIST_MAX : t0;
             d[j] = (float)(t0 * scale);
         }
+        d -= dststep;
     }
 }
 
diff --git a/modules/imgproc/test/test_distancetransform.cpp b/modules/imgproc/test/test_distancetransform.cpp
index 742595631a..00bca5b5b2 100644
--- a/modules/imgproc/test/test_distancetransform.cpp
+++ b/modules/imgproc/test/test_distancetransform.cpp
@@ -344,4 +344,28 @@ TEST(Imgproc_DistanceTransform, large_square_22732)
     EXPECT_EQ(0, nerrs) << "reference distance map is different from computed one at " << nerrs << " pixels\n";
 }
 
+BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_3x3)
+{
+    Mat src = Mat::zeros(50000, 50000, CV_8U), dist;
+    distanceTransform(src.col(0), dist, DIST_L2, DIST_MASK_3);
+    int nz = countNonZero(dist);
+    EXPECT_EQ(nz, 0);
+}
+
+BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_5x5)
+{
+    Mat src = Mat::zeros(50000, 50000, CV_8U), dist;
+    distanceTransform(src.col(0), dist, DIST_L2, DIST_MASK_5);
+    int nz = countNonZero(dist);
+    EXPECT_EQ(nz, 0);
+}
+
+BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_5x5_labels)
+{
+    Mat src = Mat::zeros(50000, 50000, CV_8U), dist, labels;
+    distanceTransform(src.col(0), dist, labels, DIST_L2, DIST_MASK_5);
+    int nz = countNonZero(dist);
+    EXPECT_EQ(nz, 0);
+}
+
 }} // namespace

From 2c53e3f53d00b23d8479dfa9a9ca346ff9f36130 Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <thegeorg@yandex-team.com>
Date: Tue, 5 Sep 2023 16:05:47 +0200
Subject: [PATCH 43/57] Merge pull request #24204 from
 georgthegreat:mser-license

Properly preserve chi_table license as mandated by BSD-3-Clause #24204

Amend reference to online hosted file with the full license quotation as mandated by the original license.
---
 modules/features2d/3rdparty/mscr/chi_table.h  | 135 ++++++++++++++++++
 .../3rdparty/mscr/chi_table_LICENSE.txt       |  28 ++++
 modules/features2d/CMakeLists.txt             |   2 +
 modules/features2d/src/mser.cpp               | 118 ++-------------
 4 files changed, 173 insertions(+), 110 deletions(-)
 create mode 100644 modules/features2d/3rdparty/mscr/chi_table.h
 create mode 100644 modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt

diff --git a/modules/features2d/3rdparty/mscr/chi_table.h b/modules/features2d/3rdparty/mscr/chi_table.h
new file mode 100644
index 0000000000..c0e9bae046
--- /dev/null
+++ b/modules/features2d/3rdparty/mscr/chi_table.h
@@ -0,0 +1,135 @@
+/*
+**
+**                           License Agreement
+**                           For chi_table.h
+**
+** Copyright (C) 2007 Per-Erik Forssen, all rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without modification,
+** are permitted provided that the following conditions are met:
+**
+**   * Redistribution's of source code must retain the above copyright notice,
+**     this list of conditions and the following disclaimer.
+**
+**   * Redistribution's in binary form must reproduce the above copyright notice,
+**     this list of conditions and the following disclaimer in the documentation
+**     and/or other materials provided with the distribution.
+**
+**   * The name of the copyright holders may not be used to endorse or promote products
+**     derived from this software without specific prior written permission.
+**
+** This software is provided by the copyright holders and contributors "as is" and
+** any express or implied warranties, including, but not limited to, the implied
+** warranties of merchantability and fitness for a particular purpose are disclaimed.
+** In no event shall the Intel Corporation or contributors be liable for any direct,
+** indirect, incidental, special, exemplary, or consequential damages
+** (including, but not limited to, procurement of substitute goods or services;
+** loss of use, data, or profits; or business interruption) however caused
+** and on any theory of liability, whether in contract, strict liability,
+** or tort (including negligence or otherwise) arising in any way out of
+** the use of this software, even if advised of the possibility of such damage.
+**
+** Content origin: http://users.isy.liu.se/cvl/perfo/software/chi_table.h
+*/
+#define TABLE_SIZE 400
+
+static double chitab3[]={0,  0.0150057,  0.0239478,  0.0315227,
+                  0.0383427,  0.0446605,  0.0506115,  0.0562786,
+                  0.0617174,  0.0669672,  0.0720573,  0.0770099,
+                  0.081843,  0.0865705,  0.0912043,  0.0957541,
+                  0.100228,  0.104633,  0.108976,  0.113261,
+                  0.117493,  0.121676,  0.125814,  0.12991,
+                  0.133967,  0.137987,  0.141974,  0.145929,
+                  0.149853,  0.15375,  0.15762,  0.161466,
+                  0.165287,  0.169087,  0.172866,  0.176625,
+                  0.180365,  0.184088,  0.187794,  0.191483,
+                  0.195158,  0.198819,  0.202466,  0.2061,
+                  0.209722,  0.213332,  0.216932,  0.220521,
+                  0.2241,  0.22767,  0.231231,  0.234783,
+                  0.238328,  0.241865,  0.245395,  0.248918,
+                  0.252435,  0.255947,  0.259452,  0.262952,
+                  0.266448,  0.269939,  0.273425,  0.276908,
+                  0.280386,  0.283862,  0.287334,  0.290803,
+                  0.29427,  0.297734,  0.301197,  0.304657,
+                  0.308115,  0.311573,  0.315028,  0.318483,
+                  0.321937,  0.32539,  0.328843,  0.332296,
+                  0.335749,  0.339201,  0.342654,  0.346108,
+                  0.349562,  0.353017,  0.356473,  0.35993,
+                  0.363389,  0.366849,  0.37031,  0.373774,
+                  0.377239,  0.380706,  0.384176,  0.387648,
+                  0.391123,  0.3946,  0.39808,  0.401563,
+                  0.405049,  0.408539,  0.412032,  0.415528,
+                  0.419028,  0.422531,  0.426039,  0.429551,
+                  0.433066,  0.436586,  0.440111,  0.44364,
+                  0.447173,  0.450712,  0.454255,  0.457803,
+                  0.461356,  0.464915,  0.468479,  0.472049,
+                  0.475624,  0.479205,  0.482792,  0.486384,
+                  0.489983,  0.493588,  0.4972,  0.500818,
+                  0.504442,  0.508073,  0.511711,  0.515356,
+                  0.519008,  0.522667,  0.526334,  0.530008,
+                  0.533689,  0.537378,  0.541075,  0.54478,
+                  0.548492,  0.552213,  0.555942,  0.55968,
+                  0.563425,  0.56718,  0.570943,  0.574715,
+                  0.578497,  0.582287,  0.586086,  0.589895,
+                  0.593713,  0.597541,  0.601379,  0.605227,
+                  0.609084,  0.612952,  0.61683,  0.620718,
+                  0.624617,  0.628526,  0.632447,  0.636378,
+                  0.64032,  0.644274,  0.648239,  0.652215,
+                  0.656203,  0.660203,  0.664215,  0.668238,
+                  0.672274,  0.676323,  0.680384,  0.684457,
+                  0.688543,  0.692643,  0.696755,  0.700881,
+                  0.70502,  0.709172,  0.713339,  0.717519,
+                  0.721714,  0.725922,  0.730145,  0.734383,
+                  0.738636,  0.742903,  0.747185,  0.751483,
+                  0.755796,  0.760125,  0.76447,  0.768831,
+                  0.773208,  0.777601,  0.782011,  0.786438,
+                  0.790882,  0.795343,  0.799821,  0.804318,
+                  0.808831,  0.813363,  0.817913,  0.822482,
+                  0.827069,  0.831676,  0.836301,  0.840946,
+                  0.84561,  0.850295,  0.854999,  0.859724,
+                  0.864469,  0.869235,  0.874022,  0.878831,
+                  0.883661,  0.888513,  0.893387,  0.898284,
+                  0.903204,  0.908146,  0.913112,  0.918101,
+                  0.923114,  0.928152,  0.933214,  0.938301,
+                  0.943413,  0.94855,  0.953713,  0.958903,
+                  0.964119,  0.969361,  0.974631,  0.979929,
+                  0.985254,  0.990608,  0.99599,  1.0014,
+                  1.00684,  1.01231,  1.01781,  1.02335,
+                  1.02891,  1.0345,  1.04013,  1.04579,
+                  1.05148,  1.05721,  1.06296,  1.06876,
+                  1.07459,  1.08045,  1.08635,  1.09228,
+                  1.09826,  1.10427,  1.11032,  1.1164,
+                  1.12253,  1.1287,  1.1349,  1.14115,
+                  1.14744,  1.15377,  1.16015,  1.16656,
+                  1.17303,  1.17954,  1.18609,  1.19269,
+                  1.19934,  1.20603,  1.21278,  1.21958,
+                  1.22642,  1.23332,  1.24027,  1.24727,
+                  1.25433,  1.26144,  1.26861,  1.27584,
+                  1.28312,  1.29047,  1.29787,  1.30534,
+                  1.31287,  1.32046,  1.32812,  1.33585,
+                  1.34364,  1.3515,  1.35943,  1.36744,
+                  1.37551,  1.38367,  1.39189,  1.4002,
+                  1.40859,  1.41705,  1.42561,  1.43424,
+                  1.44296,  1.45177,  1.46068,  1.46967,
+                  1.47876,  1.48795,  1.49723,  1.50662,
+                  1.51611,  1.52571,  1.53541,  1.54523,
+                  1.55517,  1.56522,  1.57539,  1.58568,
+                  1.59611,  1.60666,  1.61735,  1.62817,
+                  1.63914,  1.65025,  1.66152,  1.67293,
+                  1.68451,  1.69625,  1.70815,  1.72023,
+                  1.73249,  1.74494,  1.75757,  1.77041,
+                  1.78344,  1.79669,  1.81016,  1.82385,
+                  1.83777,  1.85194,  1.86635,  1.88103,
+                  1.89598,  1.91121,  1.92674,  1.94257,
+                  1.95871,  1.97519,  1.99201,  2.0092,
+                  2.02676,  2.04471,  2.06309,  2.08189,
+                  2.10115,  2.12089,  2.14114,  2.16192,
+                  2.18326,  2.2052,  2.22777,  2.25101,
+                  2.27496,  2.29966,  2.32518,  2.35156,
+                  2.37886,  2.40717,  2.43655,  2.46709,
+                  2.49889,  2.53206,  2.56673,  2.60305,
+                  2.64117,  2.6813,  2.72367,  2.76854,
+                  2.81623,  2.86714,  2.92173,  2.98059,
+                  3.04446,  3.1143,  3.19135,  3.27731,
+                  3.37455,  3.48653,  3.61862,  3.77982,
+                  3.98692,  4.2776,  4.77167,  133.333 };
diff --git a/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt b/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt
new file mode 100644
index 0000000000..66b272dd2d
--- /dev/null
+++ b/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt
@@ -0,0 +1,28 @@
+                          License Agreement
+                          For chi_table.h
+
+Copyright (C) 2007 Per-Erik Forssen, all rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+  * Redistribution's of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistribution's in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+  * The name of the copyright holders may not be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are disclaimed.
+In no event shall the Intel Corporation or contributors be liable for any direct,
+indirect, incidental, special, exemplary, or consequential damages
+(including, but not limited to, procurement of substitute goods or services;
+loss of use, data, or profits; or business interruption) however caused
+and on any theory of liability, whether in contract, strict liability,
+or tort (including negligence or otherwise) arising in any way out of
+the use of this software, even if advised of the possibility of such damage.
diff --git a/modules/features2d/CMakeLists.txt b/modules/features2d/CMakeLists.txt
index a586d4606e..91fea8bcc8 100644
--- a/modules/features2d/CMakeLists.txt
+++ b/modules/features2d/CMakeLists.txt
@@ -7,3 +7,5 @@ if(DEBUG_opencv_features2d)
   list(APPEND debug_modules opencv_highgui)
 endif()
 ocv_define_module(features2d opencv_imgproc ${debug_modules} OPTIONAL opencv_flann WRAP java objc python js)
+
+ocv_install_3rdparty_licenses(mscr "${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mscr/chi_table_LICENSE.txt")
diff --git a/modules/features2d/src/mser.cpp b/modules/features2d/src/mser.cpp
index d59ed39574..5c8db481b1 100644
--- a/modules/features2d/src/mser.cpp
+++ b/modules/features2d/src/mser.cpp
@@ -30,18 +30,23 @@
  * OpenCV functions for MSER extraction
  *
  * 1. there are two different implementation of MSER, one for gray image, one for color image
- * 2. the gray image algorithm is taken from: Linear Time Maximally Stable Extremal Regions;
+ * 2. the gray image algorithm is taken from:
+ *      Linear Time Maximally Stable Extremal Regions;
  *    the paper claims to be faster than union-find method;
  *    it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop.
- * 3. the color image algorithm is taken from: Maximally Stable Colour Regions for Recognition and Match;
+ * 3. the color image algorithm is taken from:
+ *      Maximally Stable Colour Regions for Recognition and Match;
  *    it should be much slower than gray image method ( 3~4 times );
- *    the chi_table.h file is taken directly from paper's source code which is distributed under permissive BSD-like license: http://users.isy.liu.se/cvl/perfo/software/chi_table.h
+ *    the chi_table.h file is taken directly from the paper's source code:
+ *    http://users.isy.liu.se/cvl/perfo/software/chi_table.h
+ *    license (BSD-like) is located in the file: 3rdparty/mscr/chi_table_LICENSE.txt
  * 4. though the name is *contours*, the result actually is a list of point set.
  */
 
 #include "precomp.hpp"
 #include "opencv2/imgproc/imgproc_c.h"
 #include <limits>
+#include "../3rdparty/mscr/chi_table.h"
 
 namespace cv
 {
@@ -613,113 +618,6 @@ the color MSER has not been completely refactored yet. We leave it mostly as-is,
 with just enough changes to convert C structures to C++ ones and
 add support for color images into MSER_Impl::detectAndLabel.
 */
-
-const int TABLE_SIZE = 400;
-
-static const float chitab3[]=
-{
-    0.f,  0.0150057f,  0.0239478f,  0.0315227f,
-    0.0383427f,  0.0446605f,  0.0506115f,  0.0562786f,
-    0.0617174f,  0.0669672f,  0.0720573f,  0.0770099f,
-    0.081843f,  0.0865705f,  0.0912043f,  0.0957541f,
-    0.100228f,  0.104633f,  0.108976f,  0.113261f,
-    0.117493f,  0.121676f,  0.125814f,  0.12991f,
-    0.133967f,  0.137987f,  0.141974f,  0.145929f,
-    0.149853f,  0.15375f,  0.15762f,  0.161466f,
-    0.165287f,  0.169087f,  0.172866f,  0.176625f,
-    0.180365f,  0.184088f,  0.187794f,  0.191483f,
-    0.195158f,  0.198819f,  0.202466f,  0.2061f,
-    0.209722f,  0.213332f,  0.216932f,  0.220521f,
-    0.2241f,  0.22767f,  0.231231f,  0.234783f,
-    0.238328f,  0.241865f,  0.245395f,  0.248918f,
-    0.252435f,  0.255947f,  0.259452f,  0.262952f,
-    0.266448f,  0.269939f,  0.273425f,  0.276908f,
-    0.280386f,  0.283862f,  0.287334f,  0.290803f,
-    0.29427f,  0.297734f,  0.301197f,  0.304657f,
-    0.308115f,  0.311573f,  0.315028f,  0.318483f,
-    0.321937f,  0.32539f,  0.328843f,  0.332296f,
-    0.335749f,  0.339201f,  0.342654f,  0.346108f,
-    0.349562f,  0.353017f,  0.356473f,  0.35993f,
-    0.363389f,  0.366849f,  0.37031f,  0.373774f,
-    0.377239f,  0.380706f,  0.384176f,  0.387648f,
-    0.391123f,  0.3946f,  0.39808f,  0.401563f,
-    0.405049f,  0.408539f,  0.412032f,  0.415528f,
-    0.419028f,  0.422531f,  0.426039f,  0.429551f,
-    0.433066f,  0.436586f,  0.440111f,  0.44364f,
-    0.447173f,  0.450712f,  0.454255f,  0.457803f,
-    0.461356f,  0.464915f,  0.468479f,  0.472049f,
-    0.475624f,  0.479205f,  0.482792f,  0.486384f,
-    0.489983f,  0.493588f,  0.4972f,  0.500818f,
-    0.504442f,  0.508073f,  0.511711f,  0.515356f,
-    0.519008f,  0.522667f,  0.526334f,  0.530008f,
-    0.533689f,  0.537378f,  0.541075f,  0.54478f,
-    0.548492f,  0.552213f,  0.555942f,  0.55968f,
-    0.563425f,  0.56718f,  0.570943f,  0.574715f,
-    0.578497f,  0.582287f,  0.586086f,  0.589895f,
-    0.593713f,  0.597541f,  0.601379f,  0.605227f,
-    0.609084f,  0.612952f,  0.61683f,  0.620718f,
-    0.624617f,  0.628526f,  0.632447f,  0.636378f,
-    0.64032f,  0.644274f,  0.648239f,  0.652215f,
-    0.656203f,  0.660203f,  0.664215f,  0.668238f,
-    0.672274f,  0.676323f,  0.680384f,  0.684457f,
-    0.688543f,  0.692643f,  0.696755f,  0.700881f,
-    0.70502f,  0.709172f,  0.713339f,  0.717519f,
-    0.721714f,  0.725922f,  0.730145f,  0.734383f,
-    0.738636f,  0.742903f,  0.747185f,  0.751483f,
-    0.755796f,  0.760125f,  0.76447f,  0.768831f,
-    0.773208f,  0.777601f,  0.782011f,  0.786438f,
-    0.790882f,  0.795343f,  0.799821f,  0.804318f,
-    0.808831f,  0.813363f,  0.817913f,  0.822482f,
-    0.827069f,  0.831676f,  0.836301f,  0.840946f,
-    0.84561f,  0.850295f,  0.854999f,  0.859724f,
-    0.864469f,  0.869235f,  0.874022f,  0.878831f,
-    0.883661f,  0.888513f,  0.893387f,  0.898284f,
-    0.903204f,  0.908146f,  0.913112f,  0.918101f,
-    0.923114f,  0.928152f,  0.933214f,  0.938301f,
-    0.943413f,  0.94855f,  0.953713f,  0.958903f,
-    0.964119f,  0.969361f,  0.974631f,  0.979929f,
-    0.985254f,  0.990608f,  0.99599f,  1.0014f,
-    1.00684f,  1.01231f,  1.01781f,  1.02335f,
-    1.02891f,  1.0345f,  1.04013f,  1.04579f,
-    1.05148f,  1.05721f,  1.06296f,  1.06876f,
-    1.07459f,  1.08045f,  1.08635f,  1.09228f,
-    1.09826f,  1.10427f,  1.11032f,  1.1164f,
-    1.12253f,  1.1287f,  1.1349f,  1.14115f,
-    1.14744f,  1.15377f,  1.16015f,  1.16656f,
-    1.17303f,  1.17954f,  1.18609f,  1.19269f,
-    1.19934f,  1.20603f,  1.21278f,  1.21958f,
-    1.22642f,  1.23332f,  1.24027f,  1.24727f,
-    1.25433f,  1.26144f,  1.26861f,  1.27584f,
-    1.28312f,  1.29047f,  1.29787f,  1.30534f,
-    1.31287f,  1.32046f,  1.32812f,  1.33585f,
-    1.34364f,  1.3515f,  1.35943f,  1.36744f,
-    1.37551f,  1.38367f,  1.39189f,  1.4002f,
-    1.40859f,  1.41705f,  1.42561f,  1.43424f,
-    1.44296f,  1.45177f,  1.46068f,  1.46967f,
-    1.47876f,  1.48795f,  1.49723f,  1.50662f,
-    1.51611f,  1.52571f,  1.53541f,  1.54523f,
-    1.55517f,  1.56522f,  1.57539f,  1.58568f,
-    1.59611f,  1.60666f,  1.61735f,  1.62817f,
-    1.63914f,  1.65025f,  1.66152f,  1.67293f,
-    1.68451f,  1.69625f,  1.70815f,  1.72023f,
-    1.73249f,  1.74494f,  1.75757f,  1.77041f,
-    1.78344f,  1.79669f,  1.81016f,  1.82385f,
-    1.83777f,  1.85194f,  1.86635f,  1.88103f,
-    1.89598f,  1.91121f,  1.92674f,  1.94257f,
-    1.95871f,  1.97519f,  1.99201f,  2.0092f,
-    2.02676f,  2.04471f,  2.06309f,  2.08189f,
-    2.10115f,  2.12089f,  2.14114f,  2.16192f,
-    2.18326f,  2.2052f,  2.22777f,  2.25101f,
-    2.27496f,  2.29966f,  2.32518f,  2.35156f,
-    2.37886f,  2.40717f,  2.43655f,  2.46709f,
-    2.49889f,  2.53206f,  2.56673f,  2.60305f,
-    2.64117f,  2.6813f,  2.72367f,  2.76854f,
-    2.81623f,  2.86714f,  2.92173f,  2.98059f,
-    3.04446f,  3.1143f,  3.19135f,  3.27731f,
-    3.37455f,  3.48653f,  3.61862f,  3.77982f,
-    3.98692f,  4.2776f,  4.77167f,  133.333f
-};
-
 struct MSCRNode;
 
 struct TempMSCR

From 178fdbbda89d1afbb6d311662588dc7bf0780371 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Tue, 5 Sep 2023 18:08:28 +0300
Subject: [PATCH 44/57] Merge pull request #24196 from
 dkurt:ov_backend_cleanups

Use ngraph::Output in OpenVINO backend wrapper #24196

### Pull Request Readiness Checklist

resolves https://github.com/opencv/opencv/issues/24102

* Use `ngraph::Output<ngraph::Node>>` insead of `std::shared_ptr<ngraph::Node>` as a backend wrapper. It lets access to multi-output nodes: https://github.com/opencv/opencv/blob/588ddf1b181aa7243144b27d65fc7690fb89e344/modules/dnn/src/net_openvino.cpp#L501-L504
* All layers can be customizable with OpenVINO >= 2022.1. nGraph reference code used for default layer implementation does not required CPU plugin also (might be tested by commenting CPU plugin at `/opt/intel/openvino/runtime/lib/intel64/plugins.xml`).
* Correct inference if only intermediate blobs requested.


See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
---
 modules/dnn/src/ie_ngraph.cpp                 | 21 ++++---
 modules/dnn/src/ie_ngraph.hpp                 |  6 +-
 modules/dnn/src/layers/batch_norm_layer.cpp   |  2 +-
 modules/dnn/src/layers/blank_layer.cpp        |  2 +-
 modules/dnn/src/layers/concat_layer.cpp       |  4 +-
 modules/dnn/src/layers/convolution_layer.cpp  |  8 +--
 .../dnn/src/layers/crop_and_resize_layer.cpp  |  2 +-
 modules/dnn/src/layers/elementwise_layers.cpp | 28 ++++-----
 modules/dnn/src/layers/eltwise_layer.cpp      | 15 +++--
 modules/dnn/src/layers/flatten_layer.cpp      |  2 +-
 .../dnn/src/layers/fully_connected_layer.cpp  |  2 +-
 modules/dnn/src/layers/lrn_layer.cpp          |  2 +-
 .../dnn/src/layers/max_unpooling_layer.cpp    |  4 +-
 modules/dnn/src/layers/mvn_layer.cpp          |  2 +-
 .../dnn/src/layers/nary_eltwise_layers.cpp    |  6 +-
 .../dnn/src/layers/normalize_bbox_layer.cpp   |  8 +--
 modules/dnn/src/layers/pooling_layer.cpp      |  2 +-
 modules/dnn/src/layers/proposal_layer.cpp     |  4 +-
 modules/dnn/src/layers/region_layer.cpp       |  4 +-
 modules/dnn/src/layers/resize_layer.cpp       |  2 +-
 modules/dnn/src/layers/scale_layer.cpp        | 18 +++---
 modules/dnn/src/layers/slice_layer.cpp        |  2 +-
 modules/dnn/src/layers/softmax_layer.cpp      |  2 +-
 modules/dnn/src/net_openvino.cpp              | 63 ++++++++++++++-----
 modules/dnn/test/test_halide_layers.cpp       | 11 +++-
 modules/dnn/test/test_tflite_importer.cpp     |  9 +--
 26 files changed, 136 insertions(+), 95 deletions(-)

diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp
index 140d4b0d2f..f9341febb5 100644
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -383,11 +383,17 @@ public:
 
 #endif // OpenVINO >= 2022.1
 
-InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr<ngraph::Node>&& _node)
-    : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) {}
+InfEngineNgraphNode::InfEngineNgraphNode(ngraph::Output<ngraph::Node>&& _node)
+    : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) {
+    CV_Assert(node.get_node());
+    CV_Assert(node.get_node_shared_ptr());
+}
 
-InfEngineNgraphNode::InfEngineNgraphNode(const std::shared_ptr<ngraph::Node>& _node)
-    : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) {}
+InfEngineNgraphNode::InfEngineNgraphNode(const ngraph::Output<ngraph::Node>& _node)
+    : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) {
+    CV_Assert(node.get_node());
+    CV_Assert(node.get_node_shared_ptr());
+}
 
 InfEngineNgraphNode::InfEngineNgraphNode(const std::vector<Ptr<BackendNode> >& nodes,
                                          Ptr<Layer>& cvLayer_, std::vector<Mat*>& inputs,
@@ -420,7 +426,7 @@ InfEngineNgraphNode::InfEngineNgraphNode(const std::vector<Ptr<BackendNode> >& n
 }
 
 void InfEngineNgraphNode::setName(const std::string& name) {
-    node->set_friendly_name(name);
+    node.get_node()->set_friendly_name(name);
 }
 
 InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl)
@@ -441,8 +447,7 @@ InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEn
 void InfEngineNgraphNet::addOutput(const Ptr<InfEngineNgraphNode>& node)
 {
     CV_Assert(node);
-    CV_Assert(node->node);
-    const std::string& name = node->node->get_friendly_name();
+    const std::string& name = node->node.get_node()->get_friendly_name();
     requestedOutputs.insert({name, node.get()});
 }
 
@@ -458,7 +463,7 @@ void InfEngineNgraphNet::createNet(Target targetId) {
             CV_Assert(output_node_it->second);
             auto out = std::make_shared<ngraph::op::Result>(output_node_it->second->node);
 #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
-            out->set_friendly_name(output_node_it->first + (output_node_it->second->node->get_output_size() == 1 ? "" : ".0"));
+            out->set_friendly_name(output_node_it->first + (output_node_it->second->node.get_node()->get_output_size() == 1 ? "" : ".0"));
 #endif
             outs.push_back(out);
         }
diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp
index 7bb0ac09df..cc8f53ca5c 100644
--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@@ -93,13 +93,13 @@ public:
                         std::vector<Mat*>& inputs, std::vector<Mat>& outputs,
                         std::vector<Mat>& internals);
 
-    InfEngineNgraphNode(std::shared_ptr<ngraph::Node>&& _node);
-    InfEngineNgraphNode(const std::shared_ptr<ngraph::Node>& _node);
+    InfEngineNgraphNode(ngraph::Output<ngraph::Node>&& _node);
+    InfEngineNgraphNode(const ngraph::Output<ngraph::Node>& _node);
 
     void setName(const std::string& name);
 
     // Inference Engine network object that allows to obtain the outputs of this layer.
-    std::shared_ptr<ngraph::Node> node;
+    ngraph::Output<ngraph::Node> node;
     Ptr<InfEngineNgraphNet> net;
     Ptr<dnn::Layer> cvLayer;
 };
diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp
index b90ee934ef..1d95096e60 100644
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@@ -457,7 +457,7 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
+        std::vector<size_t> shape(ieInpNode.get_shape().size(), 1);
         shape[1] = weights_.total();
         auto weight = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), weights_.data);
         auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), bias_.data);
diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp
index 3095e2d6c9..16de23b15e 100644
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@@ -148,7 +148,7 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
-        auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
+        auto ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         ngraph::OutputVector inp{ieInpNode};
         auto blank = std::make_shared<ngraph::op::Concat>(inp, 0);
         return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
index 6bd3dcdea5..a5af16f32e 100644
--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -392,7 +392,7 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
-        const int numDims = nodes[0].dynamicCast<InfEngineNgraphNode>()->node->get_shape().size();
+        const int numDims = nodes[0].dynamicCast<InfEngineNgraphNode>()->node.get_shape().size();
         const int cAxis = normalize_axis(axis, numDims);
         std::vector<size_t> maxDims(numDims, 0);
 
@@ -403,7 +403,7 @@ public:
             auto inp = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
             inp_nodes.push_back(inp);
 
-            std::vector<size_t> inpShape = inp->get_shape();
+            std::vector<size_t> inpShape = inp.get_shape();
             for (int i = 0; i < numDims; ++i)
                 maxDims[i] = std::max(maxDims[i], inpShape[i]);
         }
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 0488dc462d..d6e0aba1c6 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -822,13 +822,13 @@ public:
         CV_Assert(!blobs.empty());
         CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1);
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        std::vector<size_t> dims = ieInpNode->get_shape();
+        std::vector<size_t> dims = ieInpNode.get_shape();
         CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, "");
-        std::shared_ptr<ngraph::Node> ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast<InfEngineNgraphNode>()->node : nullptr;
+        ngraph::Output<ngraph::Node> ieWeights;
         if (nodes.size() > 1)
-            CV_Assert(ieWeights);  // dynamic_cast should not fail
+            ieWeights = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
         const int inpCn = dims[1];
-        const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1];
+        const int inpGroupCn = nodes.size() > 1 ? ieWeights.get_shape()[1] : blobs[0].size[1];
         const int group = inpCn / inpGroupCn;
 
         std::vector<size_t> kernel_shape;
diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp
index eb8822870f..a6f58f8983 100644
--- a/modules/dnn/src/layers/crop_and_resize_layer.cpp
+++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp
@@ -133,7 +133,7 @@ public:
         auto input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         auto rois = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
 
-        auto rois_shape = rois->get_shape();
+        auto rois_shape = rois.get_shape();
         std::vector<int64_t> dims(rois_shape.begin(), rois_shape.end()), offsets(4, 0);
         offsets[3] = 2;
         dims[3] = 7;
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
index 3bcd53f95c..4247511879 100644
--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -490,7 +490,7 @@ struct ReLUFunctor : public BaseFunctor
 #endif
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         if (slope) {
             auto param = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &slope);
@@ -674,7 +674,7 @@ struct ReLU6Functor : public BaseFunctor
 
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         return std::make_shared<ngraph::op::Clamp>(node, minValue, maxValue);
     }
@@ -796,7 +796,7 @@ struct BaseDefaultFunctor : public BaseFunctor
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         CV_Error(Error::StsNotImplemented, "");
     }
@@ -929,7 +929,7 @@ struct TanHFunctor : public BaseDefaultFunctor<TanHFunctor>
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         return std::make_shared<ngraph::op::Tanh>(node);
     }
@@ -998,7 +998,7 @@ struct SwishFunctor : public BaseDefaultFunctor<SwishFunctor>
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         auto sigmoid = std::make_shared<ngraph::op::Sigmoid>(node);
         return std::make_shared<ngraph::op::v1::Multiply>(node, sigmoid);
@@ -1074,7 +1074,7 @@ struct MishFunctor : public BaseDefaultFunctor<MishFunctor>
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         float one = 1.0f;
         auto constant = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &one);
@@ -1157,7 +1157,7 @@ struct SigmoidFunctor : public BaseDefaultFunctor<SigmoidFunctor>
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         return std::make_shared<ngraph::op::Sigmoid>(node);
     }
@@ -1237,7 +1237,7 @@ struct ELUFunctor : public BaseDefaultFunctor<ELUFunctor>
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         return std::make_shared<ngraph::op::Elu>(node, alpha);
     }
@@ -1307,7 +1307,7 @@ struct AbsValFunctor : public BaseDefaultFunctor<AbsValFunctor>
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         float coeff = -0.999999f;
         // float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f;
@@ -1603,7 +1603,7 @@ struct SqrtFunctor : public BaseDefaultFunctor<SqrtFunctor>
 #endif  // HAVE_HALIDE
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         return std::make_shared<ngraph::op::v0::Sqrt>(node);
     }
@@ -2329,7 +2329,7 @@ struct PowerFunctor : public BaseFunctor
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         auto scale_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
                                                                  ngraph::Shape{1}, &scale);
@@ -2439,7 +2439,7 @@ struct ExpFunctor : public BaseDefaultFunctor<ExpFunctor>
 #endif  // HAVE_HALIDE
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         auto scale_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32,
                                                                  ngraph::Shape{1}, &normScale);
@@ -2598,7 +2598,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor
 #endif // HAVE_CANN
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         const size_t numChannels = scale.total();
         auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{numChannels}, scale.data);
@@ -2678,7 +2678,7 @@ struct PReLUFunctor : public ChannelsPReLUFunctor
     }
 
 #ifdef HAVE_DNN_NGRAPH
-    std::shared_ptr<ngraph::Node> initNgraphAPI(const std::shared_ptr<ngraph::Node>& node)
+    std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
     {
         auto shape = getShape<size_t>(scale);
         auto slope = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, shape, scale.ptr<float>());
diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp
index 8ed1b799eb..49b3c02de3 100644
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -896,12 +896,14 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
+        CV_Assert(nodes.size() >= 2);
         auto curr_node = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         if (!coeffs.empty()) {
             auto coeff = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &coeffs[0]);
             curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
         }
 
+        std::shared_ptr<ngraph::Node> res;
         for (size_t i = 1; i < nodes.size(); i++)
         {
             auto next_node = nodes[i].dynamicCast<InfEngineNgraphNode>()->node;
@@ -910,15 +912,16 @@ public:
                 next_node = std::make_shared<ngraph::op::v1::Multiply>(next_node, coeff, ngraph::op::AutoBroadcastType::NUMPY);
             }
             switch (op) {
-                case SUM:  curr_node = std::make_shared<ngraph::op::v1::Add>(curr_node, next_node); break;
-                case PROD: curr_node = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
-                case DIV:  curr_node = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
-                case MAX:  curr_node = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
-                case MIN:  curr_node = std::make_shared<ngraph::op::v1::Minimum>(curr_node, next_node); break;
+                case SUM:  res = std::make_shared<ngraph::op::v1::Add>(curr_node, next_node); break;
+                case PROD: res = std::make_shared<ngraph::op::v1::Multiply>(curr_node, next_node); break;
+                case DIV:  res = std::make_shared<ngraph::op::v1::Divide>(curr_node, next_node); break;
+                case MAX:  res = std::make_shared<ngraph::op::v1::Maximum>(curr_node, next_node); break;
+                case MIN:  res = std::make_shared<ngraph::op::v1::Minimum>(curr_node, next_node); break;
                 default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
             }
+            curr_node = res;
         }
-        return Ptr<BackendNode>(new InfEngineNgraphNode(curr_node));
+        return Ptr<BackendNode>(new InfEngineNgraphNode(res));
     }
 #endif  // HAVE_DNN_NGRAPH
 
diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp
index 6a502af7e9..9ff3bec38b 100644
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
@@ -209,7 +209,7 @@ public:
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        std::vector<size_t> dims = ieInpNode->get_shape();
+        std::vector<size_t> dims = ieInpNode.get_shape();
 
         int numAxes = dims.size();
         int startAxis = normalize_axis(_startAxis, numAxes);
diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp
index 9cdb31023c..f03af7c1fb 100644
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@@ -803,7 +803,7 @@ public:
         }
         else
         {
-            std::vector<int> shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0);
+            std::vector<int> shape(1 + normalize_axis(axis, ieInpNode.get_shape().size()), 0);
             shape[shape.size() - 1] = -1;
             auto inp = std::make_shared<ngraph::op::v1::Reshape>(
                 ieInpNode,
diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp
index 61c2224e36..f8de64cb32 100644
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@@ -480,7 +480,7 @@ public:
         if (type != SPATIAL_NRM) {
             axes = {1};
         } else {
-            axes.resize(ieInpNode->get_shape().size() - 2);
+            axes.resize(ieInpNode.get_shape().size() - 2);
             std::iota(axes.begin(), axes.end(), 2);
         }
         auto ngraph_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data());
diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp
index 6a599408e1..7ed6c64ae8 100644
--- a/modules/dnn/src/layers/max_unpooling_layer.cpp
+++ b/modules/dnn/src/layers/max_unpooling_layer.cpp
@@ -194,7 +194,7 @@ public:
         std::vector<MatShape> inpShapes(nodes.size());
         std::vector<MatShape> outShapes, internals;
         for (int i = 0; i < nodes.size(); ++i) {
-            std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
+            std::vector<size_t> shape = nodes[i].dynamicCast<InfEngineNgraphNode>()->node.get_shape();
             inpShapes[i] = std::vector<int>(shape.begin(), shape.end());
         }
         getMemoryShapes(inpShapes, 1, outShapes, internals);
@@ -213,7 +213,7 @@ public:
             std::make_shared<ngraph::op::Constant>(ngraph::element::i32, ngraph::Shape{1}, &newShape),
             true
         );
-        if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) {
+        if (indices.get_element_type() != ngraph::element::i32 && indices.get_element_type() != ngraph::element::i64) {
             indices = std::make_shared<ngraph::op::Convert>(indices, ngraph::element::i64);
         }
 
diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp
index dc23656b7a..aae53fa327 100644
--- a/modules/dnn/src/layers/mvn_layer.cpp
+++ b/modules/dnn/src/layers/mvn_layer.cpp
@@ -390,7 +390,7 @@ public:
         auto mvn = std::make_shared<ngraph::op::MVN>(ieInpNode, acrossChannels, normVariance, eps);
 #else
         int64_t start_axis = acrossChannels ? 1 : 2;
-        std::vector<int64_t> axes_v(ieInpNode->get_shape().size() - start_axis);
+        std::vector<int64_t> axes_v(ieInpNode.get_shape().size() - start_axis);
         std::iota(axes_v.begin(), axes_v.end(), start_axis);
         auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_v.size()}, axes_v.data());
         auto mvn = std::make_shared<ngraph::op::v6::MVN>(ieInpNode, axes, normVariance, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT);
diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp
index fadbf58244..8572eee995 100644
--- a/modules/dnn/src/layers/nary_eltwise_layers.cpp
+++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp
@@ -900,12 +900,12 @@ public:
         auto& inp0 = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         auto& inp1 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
 
-        if (inp0->get_element_type() != inp1->get_element_type()) {
+        if (inp0.get_element_type() != inp1.get_element_type()) {
             auto dtype = preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD ?
                         ngraph::element::f16 : ngraph::element::f32;
-            if (inp0->get_element_type() != dtype)
+            if (inp0.get_element_type() != dtype)
                 inp0 = std::make_shared<ngraph::op::v0::Convert>(inp0, dtype);
-            if (inp1->get_element_type() != dtype)
+            if (inp1.get_element_type() != dtype)
                 inp1 = std::make_shared<ngraph::op::v0::Convert>(inp1, dtype);
         }
 
diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp
index f0ad6e6f61..431eeab82d 100644
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -273,21 +273,21 @@ public:
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        const size_t batch = ieInpNode->get_shape()[0];
-        const size_t numChannels = ieInpNode->get_shape()[1];
+        const size_t batch = ieInpNode.get_shape()[0];
+        const size_t numChannels = ieInpNode.get_shape()[1];
 
         std::vector<int64_t> axes_data;
         if (!acrossSpatial) {
             axes_data.push_back(1);
         } else {
-            axes_data.resize(ieInpNode->get_shape().size() - 1);
+            axes_data.resize(ieInpNode.get_shape().size() - 1);
             std::iota(axes_data.begin(), axes_data.end(), 1);
         }
         auto axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data);
         auto norm = std::make_shared<ngraph::op::v0::NormalizeL2>(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD);
 
         CV_Assert(blobs.empty() || numChannels == blobs[0].total());
-        std::vector<size_t> shape(ieInpNode->get_shape().size(), 1);
+        std::vector<size_t> shape(ieInpNode.get_shape().size(), 1);
         shape[0] = blobs.empty() ? 1 : batch;
         shape[1] = numChannels;
         if (!blobs.empty())
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 1337657127..a75382d8a5 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -601,7 +601,7 @@ public:
             return Ptr<BackendNode>(new InfEngineNgraphNode(ave_pool));
         }
         else if (type == SUM) {
-            ngraph::Shape inpShape = ieInpNode->get_shape();
+            ngraph::Shape inpShape = ieInpNode.get_shape();
             CV_Assert(inpShape.size() == 2 + kernel_size.size());
             std::vector<int64_t> axes;
             for (size_t i = 0; i < kernel_size.size(); i++)
diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp
index e9edcf1547..2f2a33cc6f 100644
--- a/modules/dnn/src/layers/proposal_layer.cpp
+++ b/modules/dnn/src/layers/proposal_layer.cpp
@@ -366,10 +366,10 @@ public:
         auto& class_logits = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
         auto& image_shape  = nodes[2].dynamicCast<InfEngineNgraphNode>()->node;
 
-        CV_Assert_N(image_shape->get_shape().size() == 2, image_shape->get_shape().front() == 1);
+        CV_Assert_N(image_shape.get_shape().size() == 2, image_shape.get_shape().front() == 1);
         auto shape   = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
                        ngraph::Shape{1},
-                       std::vector<int64_t>{(int64_t)image_shape->get_shape().back()});
+                       std::vector<int64_t>{(int64_t)image_shape.get_shape().back()});
         auto reshape = std::make_shared<ngraph::op::v1::Reshape>(image_shape, shape, true);
 
         auto proposal = std::make_shared<ngraph::op::Proposal>(class_probs, class_logits, reshape, attr);
diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp
index 7ab8cdd93f..49952b4c83 100644
--- a/modules/dnn/src/layers/region_layer.cpp
+++ b/modules/dnn/src/layers/region_layer.cpp
@@ -466,7 +466,7 @@ public:
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto& input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        auto parent_shape = input->get_shape();
+        auto parent_shape = input.get_shape();
         int64_t b = parent_shape[0];
         int64_t h = parent_shape[1];
         int64_t w = parent_shape[2];
@@ -567,7 +567,7 @@ public:
             int hNorm, wNorm;
             if (nodes.size() > 1)
             {
-                auto node_1_shape = nodes[1].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
+                auto node_1_shape = nodes[1].dynamicCast<InfEngineNgraphNode>()->node.get_shape();
                 hNorm = node_1_shape[2];
                 wNorm = node_1_shape[3];
             }
diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp
index 607adb8aa1..fe27748319 100644
--- a/modules/dnn/src/layers/resize_layer.cpp
+++ b/modules/dnn/src/layers/resize_layer.cpp
@@ -443,7 +443,7 @@ public:
         std::vector<int64_t> shape = {outHeight, outWidth};
         auto out_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, shape.data());
 
-        auto& input_shape = ieInpNode->get_shape();
+        auto& input_shape = ieInpNode.get_shape();
         CV_Assert_N(input_shape[2] != 0, input_shape[3] != 0);
         std::vector<float> scales = {static_cast<float>(outHeight) / input_shape[2], static_cast<float>(outWidth) / input_shape[3]};
         auto scales_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{2}, scales.data());
diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp
index 5338ab2215..2a4e1a05d5 100644
--- a/modules/dnn/src/layers/scale_layer.cpp
+++ b/modules/dnn/src/layers/scale_layer.cpp
@@ -331,34 +331,36 @@ public:
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inputs, const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto ieInpNode0 = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        auto ieInpNode1 = nodes.size() > 1 ? nodes[1].dynamicCast<InfEngineNgraphNode>()->node : nullptr;
+        ngraph::Output<ngraph::Node> ieInpNode1;
+        if (nodes.size() > 1)
+            ieInpNode1 = nodes[1].dynamicCast<InfEngineNgraphNode>()->node;
 
         size_t numChannels = 1;
         if (blobs.empty())
-            for (const size_t& dim : ieInpNode1->get_shape())
+            for (const size_t& dim : ieInpNode1.get_shape())
                 numChannels *= dim;
         else
             numChannels = blobs[0].total();
 
-        std::vector<size_t> shape(ieInpNode0->get_shape().size(), 1);
+        std::vector<size_t> shape(ieInpNode0.get_shape().size(), 1);
         int cAxis = normalize_axis(axis, shape.size());
         shape[cAxis] = numChannels;
 
-        auto node = ieInpNode0;
+        std::shared_ptr<ngraph::Node> node;
         if (hasWeights)
         {
-            auto weight = blobs.empty() ? ieInpNode1 :
+            ngraph::Output<ngraph::Node> weight = blobs.empty() ? ieInpNode1 :
                           std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data);
 
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2)
-            node = std::make_shared<ngraph::op::v1::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
+            node = std::make_shared<ngraph::op::v1::Multiply>(ieInpNode0, weight, ngraph::op::AutoBroadcastType::NUMPY);
 #else
-            node = std::make_shared<ngraph::op::v0::Multiply>(node, weight, ngraph::op::AutoBroadcastType::NUMPY);
+            node = std::make_shared<ngraph::op::v0::Multiply>(ieInpNode0, weight, ngraph::op::AutoBroadcastType::NUMPY);
 #endif
         }
         if (hasBias || !hasWeights)
         {
-            std::shared_ptr<ngraph::Node> bias;
+            ngraph::Output<ngraph::Node> bias;
             if (hasBias)
             {
                 bias = blobs.empty() ? ieInpNode1 :
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
index d3675e23a5..c44d18182e 100644
--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -759,7 +759,7 @@ public:
     {
         CV_Assert_N(nodes.size() <= 2);
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size());
+        CV_Assert(finalSliceRanges[0].size() == ieInpNode.get_shape().size());
 
         std::vector<int64_t> offsets, dims;
         for (int i = 0; i < finalSliceRanges[0].size(); ++i)
diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp
index b74f2b6791..faab6a565f 100644
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@@ -385,7 +385,7 @@ public:
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
-        int axis = normalize_axis(axisRaw, ieInpNode->get_shape().size());
+        int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size());
         auto softmax = std::make_shared<ngraph::op::v1::Softmax>(ieInpNode, axis);
         if (logSoftMax)
             return Ptr<BackendNode>(new InfEngineNgraphNode(std::make_shared<ngraph::op::v0::Log>(softmax)));
diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp
index e974ce34a3..4d08edeaaa 100644
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@@ -321,8 +321,10 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
         return;
     }
 
+#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2022_1)
     bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU ||
                                openvino::checkTarget(DNN_TARGET_CPU));
+#endif
 
     // Build Inference Engine networks from sets of layers that support this
     // backend. Split a whole model on several Inference Engine networks if
@@ -341,6 +343,10 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
 
         bool fused = ld.skip;
         Ptr<Layer> layer = ld.layerInstance;
+#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
+        if (ld.id == 0)
+            continue;
+#else
         if (!fused && !layer->supportBackend(preferableBackend))
         {
             CV_LOG_DEBUG(NULL, "DNN/IE:    NOT supported!");
@@ -355,17 +361,6 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
                 }
             }
 
-            // TODO: fix these workarounds
-            if (preferableTarget == DNN_TARGET_MYRIAD ||
-                preferableTarget == DNN_TARGET_HDDL ||
-                preferableTarget == DNN_TARGET_OPENCL ||
-                preferableTarget == DNN_TARGET_OPENCL_FP16)
-                customizable &= ld.type != "Concat";
-
-            if (preferableTarget == DNN_TARGET_OPENCL ||
-                preferableTarget == DNN_TARGET_OPENCL_FP16)
-                customizable &= ld.type != "Power";
-
             if (preferableTarget == DNN_TARGET_OPENCL)
                 customizable &= ld.type != "Eltwise";
 
@@ -390,6 +385,7 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
                 continue;
             }
         }
+#endif
         ld.skip = true;  // Initially skip all Inference Engine supported layers.
 
         // Create a new network if one of inputs from different Inference Engine graph.
@@ -478,7 +474,7 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
                 int oid = ld.inputBlobsId[i].oid;
 
                 auto ieInpNode = inputNodes[i].dynamicCast<InfEngineNgraphNode>();
-                const auto& ngraph_input_node = ieInpNode->node;
+                const auto& ngraph_input_node = ieInpNode->node.get_node_shared_ptr();
                 CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")");
 
                 if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0)
@@ -498,10 +494,7 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
                 }
                 CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), "");
 #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4)
-                // FIXIT refactor ".initNgraph()" API to use Output<Node>
-                // WA: use Concat to emulate Identity operation with requested output port
-                auto oid_node = std::make_shared<ngraph::op::Concat>(ngraph::OutputVector { ngraph_input_node->output(oid) }, 0);
-                inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(oid_node));
+                inputNodes[i] = new InfEngineNgraphNode(ngraph_input_node->output(oid));
 #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3)
                 inputNodes[i] = Ptr<BackendNode>(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid)));
 #else
@@ -556,6 +549,36 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
         addNgraphOutputs(ld);
     }
 
+    // User may choose to return only intermediate blobs but not network's result (see Test_TFLite.max_unpooling)
+    // Such layers should not be skipped when forwardLayer is called.
+    // Also, perform a sanity check that there is no double inferred networks (a single skip=false per unique net instance)
+    std::set<Ptr<InfEngineNgraphNet>> uniqueNets;
+    if (!blobsToKeep_.empty())
+    {
+        LayerPin latestLayerPin = getLatestLayerPin(blobsToKeep_);
+        for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it)
+        {
+            LayerData& ld = it->second;
+            auto iter = ld.backendNodes.find(preferableBackend);
+            if (iter == ld.backendNodes.end())
+                continue;
+
+            Ptr<BackendNode>& node = iter->second;
+            if (node.empty())
+                continue;
+
+            Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
+            if (ieNode.empty())
+                continue;
+
+            if (ld.id == latestLayerPin.lid) {
+                ld.skip = false;
+                uniqueNets.insert(ieNode->net);
+                break;
+            }
+        }
+    }
+
     // Initialize all networks.
     for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it)
     {
@@ -578,9 +601,15 @@ void NetImplOpenVINO::initBackend(const std::vector<LayerPin>& blobsToKeep_)
         {
             ieNode->net->addOutput(ieNode);
             ieNode->net->createNet((Target)preferableTarget);
-            ld.skip = false;
+            if (uniqueNets.find(ieNode->net) == uniqueNets.end()) {
+                ld.skip = false;
+                uniqueNets.insert(ieNode->net);
+            }
         }
     }
+#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)
+    CV_Assert(uniqueNets.size() == 1);
+#endif
 }
 
 
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
index 3629f720fb..12e62c754a 100644
--- a/modules/dnn/test/test_halide_layers.cpp
+++ b/modules/dnn/test/test_halide_layers.cpp
@@ -425,6 +425,13 @@ TEST_P(FullyConnected, Accuracy)
         applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
     }
 #endif
+    // https://github.com/openvinotoolkit/openvino/issues/19436
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL_FP16 && batch == 16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16);
+#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2023000000)
+    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL && batch == 16)
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL);
+#endif
 
     Mat weights(outChannels, inChannels * inSize.height * inSize.width, CV_32F);
     randu(weights, -1.0f, 1.0f);
@@ -454,11 +461,13 @@ TEST_P(FullyConnected, Accuracy)
     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL_FP16)
     {
         l1 = 0.01;
+        if (INF_ENGINE_VER_MAJOR_GE(2023000000))
+            lInf = 0.016;
     }
     if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL)
     {
         l1 = 5e-3;
-        lInf = 7e-3;
+        lInf = INF_ENGINE_VER_MAJOR_GE(2023000000) ? 0.016 : 7e-3;
     }
 #endif
     if (targetId == DNN_TARGET_CUDA_FP16)
diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp
index beb586f126..4f3a8b4a96 100644
--- a/modules/dnn/test/test_tflite_importer.cpp
+++ b/modules/dnn/test/test_tflite_importer.cpp
@@ -157,14 +157,7 @@ TEST_P(Test_TFLite, max_unpooling)
     net.setInput(input);
 
     std::vector<std::vector<Mat> > outs;
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) {
-        // TODO: seems like a bug with a retrieving intermediate tensors
-        net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
-        outs.erase(outs.begin());
-    }
-    else {
-        net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
-    }
+    net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"});
 
     ASSERT_EQ(outs.size(), 4);
     ASSERT_EQ(outs[0].size(), 1);

From 494d201fda24d2862e37302c73d3b1febfe47a5f Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <thegeorg@yandex-team.com>
Date: Mon, 28 Aug 2023 19:16:19 +0300
Subject: [PATCH 45/57] Add missing <sstream> includes

---
 apps/visualisation/opencv_visualisation.cpp              | 1 +
 modules/core/include/opencv2/core/opencl/opencl_info.hpp | 1 +
 modules/core/src/check.cpp                               | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/apps/visualisation/opencv_visualisation.cpp b/apps/visualisation/opencv_visualisation.cpp
index 85e9697aad..9b7fcd9f48 100644
--- a/apps/visualisation/opencv_visualisation.cpp
+++ b/apps/visualisation/opencv_visualisation.cpp
@@ -60,6 +60,7 @@ Created by: Puttemans Steven - April 2016
 
 #include <fstream>
 #include <iostream>
+#include <sstream>
 
 using namespace std;
 using namespace cv;
diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp
index 3ead76e5c4..0f0de893ca 100644
--- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp
+++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp
@@ -3,6 +3,7 @@
 // of this distribution and at http://opencv.org/license.html.
 
 #include <iostream>
+#include <sstream>
 
 #include <opencv2/core.hpp>
 #include <opencv2/core/ocl.hpp>
diff --git a/modules/core/src/check.cpp b/modules/core/src/check.cpp
index ffd9b302bf..2891f3a2e3 100644
--- a/modules/core/src/check.cpp
+++ b/modules/core/src/check.cpp
@@ -4,6 +4,8 @@
 
 #include "precomp.hpp"
 
+#include <sstream>
+
 #include "opencv2/core/check.hpp"
 
 namespace cv {

From 674c618471f2f7c57d6ca51a6638667864b6ebc8 Mon Sep 17 00:00:00 2001
From: CSBVision <bjoern.boeken@csb.com>
Date: Tue, 8 Aug 2023 13:31:32 +0200
Subject: [PATCH 46/57] Update dnn_utils.cpp

---
 modules/dnn/src/dnn_utils.cpp  | 67 ++++++++++++++++++++--------------
 modules/dnn/test/test_misc.cpp | 22 +++++++++++
 2 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp
index 18c7e975eb..d4d7dda008 100644
--- a/modules/dnn/src/dnn_utils.cpp
+++ b/modules/dnn/src/dnn_utils.cpp
@@ -5,6 +5,7 @@
 #include "precomp.hpp"
 
 #include <opencv2/imgproc.hpp>
+#include <opencv2/core/utils/logger.hpp>
 
 
 namespace cv {
@@ -100,15 +101,29 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con
     images_.getMatVector(images);
     CV_Assert(!images.empty());
 
-    int nch = images[0].channels();
-    Scalar scalefactor = param.scalefactor;
-
     if (param.ddepth == CV_8U)
     {
-        CV_Assert(scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth");
+        CV_Assert(param.scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth");
         CV_Assert(param.mean == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
     }
 
+    int nch = images[0].channels();
+    Scalar scalefactor = param.scalefactor;
+    Scalar mean = param.mean;
+
+    if (param.swapRB)
+    {
+        if (nch > 2)
+        {
+            std::swap(mean[0], mean[2]);
+            std::swap(scalefactor[0], scalefactor[2]);
+        }
+        else
+        {
+            CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels.");
+        }
+    }
+
     for (size_t i = 0; i < images.size(); i++)
     {
         Size imgSize = images[i].size();
@@ -126,34 +141,26 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con
                           size);
                 images[i] = images[i](crop);
             }
+            else if (param.paddingmode == DNN_PMODE_LETTERBOX)
+            {
+                float resizeFactor = std::min(size.width / (float)imgSize.width,
+                                              size.height / (float)imgSize.height);
+                int rh = int(imgSize.height * resizeFactor);
+                int rw = int(imgSize.width * resizeFactor);
+                resize(images[i], images[i], Size(rw, rh), INTER_LINEAR);
+
+                int top = (size.height - rh)/2;
+                int bottom = size.height - top - rh;
+                int left = (size.width - rw)/2;
+                int right = size.width - left - rw;
+                copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT);
+            }
             else
             {
-                if (param.paddingmode == DNN_PMODE_LETTERBOX)
-                {
-                    float resizeFactor = std::min(size.width / (float)imgSize.width,
-                                                  size.height / (float)imgSize.height);
-                    int rh = int(imgSize.height * resizeFactor);
-                    int rw = int(imgSize.width * resizeFactor);
-                    resize(images[i], images[i], Size(rw, rh), INTER_LINEAR);
-
-                    int top = (size.height - rh)/2;
-                    int bottom = size.height - top - rh;
-                    int left = (size.width - rw)/2;
-                    int right = size.width - left - rw;
-                    copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT);
-                }
-                else
-                    resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
+                resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
             }
         }
 
-        Scalar mean = param.mean;
-        if (param.swapRB)
-        {
-            std::swap(mean[0], mean[2]);
-            std::swap(scalefactor[0], scalefactor[2]);
-        }
-
         if (images[i].depth() == CV_8U && param.ddepth == CV_32F)
             images[i].convertTo(images[i], CV_32F);
 
@@ -220,18 +227,22 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con
             CV_Assert(image.depth() == blob_.depth());
             CV_Assert(image.channels() == image0.channels());
             CV_Assert(image.size() == image0.size());
-            if (param.swapRB)
+            if (nch > 2 && param.swapRB)
             {
                 Mat tmpRB;
                 cvtColor(image, tmpRB, COLOR_BGR2RGB);
                 tmpRB.copyTo(Mat(tmpRB.rows, tmpRB.cols, subMatType, blob.ptr((int)i, 0)));
             }
             else
+            {
                 image.copyTo(Mat(image.rows, image.cols, subMatType, blob.ptr((int)i, 0)));
+            }
         }
     }
     else
+    {
         CV_Error(Error::StsUnsupportedFormat, "Unsupported data layout in blobFromImagesWithParams function.");
+    }
 }
 
 void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp
index 4ee3e013cb..0c5fb28c5d 100644
--- a/modules/dnn/test/test_misc.cpp
+++ b/modules/dnn/test/test_misc.cpp
@@ -120,6 +120,28 @@ TEST(blobFromImageWithParams_4ch, letter_box)
     EXPECT_EQ(0, cvtest::norm(targetBlob, blob, NORM_INF));
 }
 
+TEST(blobFromImagesWithParams_4ch, multi_image)
+{
+    Mat img(10, 10, CV_8UC4, cv::Scalar(0, 1, 2, 3));
+    Scalar scalefactor(0.1, 0.2, 0.3, 0.4);
+
+    Image2BlobParams param;
+    param.scalefactor = scalefactor;
+    param.datalayout = DNN_LAYOUT_NHWC;
+
+    Mat blobs = blobFromImagesWithParams(std::vector<Mat> { img, 2*img }, param);
+    vector<Range> ranges;
+    ranges.push_back(Range(0, 1));
+    ranges.push_back(Range(0, blobs.size[1]));
+    ranges.push_back(Range(0, blobs.size[2]));
+    ranges.push_back(Range(0, blobs.size[3]));
+    Mat blob0 = blobs(ranges);
+    ranges[0] = Range(1, 2);
+    Mat blob1 = blobs(ranges);
+
+    EXPECT_EQ(0, cvtest::norm(2*blob0, blob1, NORM_INF));
+}
+
 TEST(readNet, Regression)
 {
     Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),

From 8a415c881ab9e13d8d1319ca8fdbc1d3c3076aec Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <thegeorg@yandex-team.com>
Date: Wed, 6 Sep 2023 13:45:28 +0300
Subject: [PATCH 47/57] Add missing std namespace qualifiers

---
 .../calib3d/test/test_affine2d_estimator.cpp   |  4 ++--
 .../calib3d/test/test_affine3d_estimator.cpp   |  4 ++--
 .../test/test_affine_partial2d_estimator.cpp   |  4 ++--
 modules/calib3d/test/test_stereomatching.cpp   |  8 ++++----
 .../test/test_translation3d_estimator.cpp      |  4 ++--
 modules/core/test/test_countnonzero.cpp        |  2 +-
 .../test/test_descriptors_regression.cpp       |  2 +-
 modules/imgproc/test/test_histograms.cpp       |  4 ++--
 modules/objdetect/src/qrcode.cpp               | 18 +++++++++---------
 modules/objdetect/test/test_cascadeandhog.cpp  |  2 +-
 10 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/modules/calib3d/test/test_affine2d_estimator.cpp b/modules/calib3d/test/test_affine2d_estimator.cpp
index 95f1235105..2282dc3240 100644
--- a/modules/calib3d/test/test_affine2d_estimator.cpp
+++ b/modules/calib3d/test/test_affine2d_estimator.cpp
@@ -115,8 +115,8 @@ TEST_P(EstimateAffine2D, testNPoints)
 
         EXPECT_NEAR(0., cvtest::norm(aff_est, aff, NORM_INF), 1e-4);
 
-        bool inliers_good = count(inliers.begin(), inliers.end(), 1) == m &&
-            m == accumulate(inliers.begin(), inliers.begin() + m, 0);
+        bool inliers_good = std::count(inliers.begin(), inliers.end(), 1) == m &&
+            m == std::accumulate(inliers.begin(), inliers.begin() + m, 0);
 
         EXPECT_TRUE(inliers_good);
     }
diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp
index 3f1b50e5f2..bb639a4018 100644
--- a/modules/calib3d/test/test_affine3d_estimator.cpp
+++ b/modules/calib3d/test/test_affine3d_estimator.cpp
@@ -160,8 +160,8 @@ bool CV_Affine3D_EstTest::testNPoints()
         return false;
     }
 
-    bool outl_good = count(outl.begin(), outl.end(), 1) == m &&
-        m == accumulate(outl.begin(), outl.begin() + m, 0);
+    bool outl_good = std::count(outl.begin(), outl.end(), 1) == m &&
+        m == std::accumulate(outl.begin(), outl.begin() + m, 0);
 
     if (!outl_good)
     {
diff --git a/modules/calib3d/test/test_affine_partial2d_estimator.cpp b/modules/calib3d/test/test_affine_partial2d_estimator.cpp
index 0be25ee7eb..dbbb4da0d9 100644
--- a/modules/calib3d/test/test_affine_partial2d_estimator.cpp
+++ b/modules/calib3d/test/test_affine_partial2d_estimator.cpp
@@ -125,8 +125,8 @@ TEST_P(EstimateAffinePartial2D, testNPoints)
 
         EXPECT_NEAR(0., cvtest::norm(aff_est, aff, NORM_INF), 1e-4);
 
-        bool inliers_good = count(inliers.begin(), inliers.end(), 1) == m &&
-            m == accumulate(inliers.begin(), inliers.begin() + m, 0);
+        bool inliers_good = std::count(inliers.begin(), inliers.end(), 1) == m &&
+            m == std::accumulate(inliers.begin(), inliers.begin() + m, 0);
 
         EXPECT_TRUE(inliers_good);
     }
diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp
index 02d1823d2d..c17d92292a 100644
--- a/modules/calib3d/test/test_stereomatching.cpp
+++ b/modules/calib3d/test/test_stereomatching.cpp
@@ -740,8 +740,8 @@ public:
     CV_StereoBMTest()
     {
         name = "stereobm";
-        fill(rmsEps.begin(), rmsEps.end(), 0.4f);
-        fill(fracEps.begin(), fracEps.end(), 0.022f);
+        std::fill(rmsEps.begin(), rmsEps.end(), 0.4f);
+        std::fill(fracEps.begin(), fracEps.end(), 0.022f);
     }
 
 protected:
@@ -866,8 +866,8 @@ public:
     CV_StereoSGBMTest()
     {
         name = "stereosgbm";
-        fill(rmsEps.begin(), rmsEps.end(), 0.25f);
-        fill(fracEps.begin(), fracEps.end(), 0.01f);
+        std::fill(rmsEps.begin(), rmsEps.end(), 0.25f);
+        std::fill(fracEps.begin(), fracEps.end(), 0.01f);
     }
 
 protected:
diff --git a/modules/calib3d/test/test_translation3d_estimator.cpp b/modules/calib3d/test/test_translation3d_estimator.cpp
index 88ad40e0f8..97c20e5033 100644
--- a/modules/calib3d/test/test_translation3d_estimator.cpp
+++ b/modules/calib3d/test/test_translation3d_estimator.cpp
@@ -91,8 +91,8 @@ TEST(Calib3d_EstimateTranslation3D, testNPoints)
         << "aff est: " << trans_est << endl
         << "aff ref: " << trans;
 
-    bool outl_good = count(outl.begin(), outl.end(), 1) == m &&
-        m == accumulate(outl.begin(), outl.begin() + m, 0);
+    bool outl_good = std::count(outl.begin(), outl.end(), 1) == m &&
+        m == std::accumulate(outl.begin(), outl.begin() + m, 0);
 
     EXPECT_TRUE(outl_good);
 }
diff --git a/modules/core/test/test_countnonzero.cpp b/modules/core/test/test_countnonzero.cpp
index fe14affb9c..41eaceb189 100644
--- a/modules/core/test/test_countnonzero.cpp
+++ b/modules/core/test/test_countnonzero.cpp
@@ -259,7 +259,7 @@ TEST_P (CountNonZeroND, ndim)
     const int ONE_SIZE = 5;
 
     vector<int> sizes(dims);
-    fill(sizes.begin(), sizes.end(), ONE_SIZE);
+    std::fill(sizes.begin(), sizes.end(), ONE_SIZE);
 
     Mat data(sizes, CV_MAKETYPE(type, 1));
     data = 0;
diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp
index 0258fea0f3..e44edb0769 100644
--- a/modules/features2d/test/test_descriptors_regression.cpp
+++ b/modules/features2d/test/test_descriptors_regression.cpp
@@ -142,7 +142,7 @@ TEST_P(DescriptorImage, no_crash)
 {
     vector<String> fnames;
     glob(cvtest::TS::ptr()->get_data_path() + pattern, fnames, false);
-    sort(fnames.begin(), fnames.end());
+    std::sort(fnames.begin(), fnames.end());
 
     Ptr<AKAZE> akaze_mldb = AKAZE::create(AKAZE::DESCRIPTOR_MLDB);
     Ptr<AKAZE> akaze_mldb_upright = AKAZE::create(AKAZE::DESCRIPTOR_MLDB_UPRIGHT);
diff --git a/modules/imgproc/test/test_histograms.cpp b/modules/imgproc/test/test_histograms.cpp
index b57af774f2..efd045d31b 100644
--- a/modules/imgproc/test/test_histograms.cpp
+++ b/modules/imgproc/test/test_histograms.cpp
@@ -1198,7 +1198,7 @@ void CV_CalcHistTest::run_func(void)
     }
 
     std::vector<cv::Mat> imagesv(cdims);
-    copy(images.begin(), images.begin() + cdims, imagesv.begin());
+    std::copy(images.begin(), images.begin() + cdims, imagesv.begin());
 
     Mat mask = images[CV_MAX_DIM];
     if( !CV_IS_SPARSE_HIST(hist[0]) )
@@ -1493,7 +1493,7 @@ void CV_CalcBackProjectTest::run_func(void)
     }
 
     std::vector<cv::Mat> imagesv(hdims);
-    copy(images.begin(), images.begin() + hdims, imagesv.begin());
+    std::copy(images.begin(), images.begin() + hdims, imagesv.begin());
 
     cv::Mat dst = images[CV_MAX_DIM+1];
 
diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp
index 1df46a9bb5..f4df6555da 100644
--- a/modules/objdetect/src/qrcode.cpp
+++ b/modules/objdetect/src/qrcode.cpp
@@ -1249,14 +1249,14 @@ bool QRDecode::computeSidesPoints(const vector<Point> &result_integer_hull)
         {
             if (points.front().x > points.back().x)
             {
-                reverse(points.begin(), points.end());
+                std::reverse(points.begin(), points.end());
             }
         }
         else
         {
             if (points.front().y > points.back().y)
             {
-                reverse(points.begin(), points.end());
+                std::reverse(points.begin(), points.end());
             }
         }
         if (points.empty())
@@ -1632,7 +1632,7 @@ bool QRDecode::findPatternsVerticesPoints(vector<vector<Point> > &patterns_verti
             }
             if ((int)min_angle_pnts_indexes.size() == num_vertices) { break; }
         }
-        sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end());
+        std::sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end());
 
         vector<Point> contour_vertices_points;
 
@@ -1761,11 +1761,11 @@ bool QRDecode::findTempPatternsAddingPoints(vector<std::pair<int, vector<Point>
             }
             if (abs(p1.x - p2.x) > abs(p1.y - p2.y))
             {
-                sort(points.begin(), points.end(), sortPointsByX());
+                std::sort(points.begin(), points.end(), sortPointsByX());
             }
             else
             {
-                sort(points.begin(), points.end(), sortPointsByY());
+                std::sort(points.begin(), points.end(), sortPointsByY());
             }
 
             temp_patterns_add_points.push_back(std::pair<int, vector<Point> >(idx_curved_side,points));
@@ -1909,11 +1909,11 @@ void QRDecode::completeAndSortSides()
         Point p2 = it->second.back();
         if (abs(p1.x - p2.x) > abs(p1.y - p2.y))
         {
-            sort(it->second.begin(), it->second.end(), sortPointsByX());
+            std::sort(it->second.begin(), it->second.end(), sortPointsByX());
         }
         else
         {
-            sort(it->second.begin(), it->second.end(), sortPointsByY());
+            std::sort(it->second.begin(), it->second.end(), sortPointsByY());
         }
     }
 }
@@ -2075,8 +2075,8 @@ bool QRDecode::divideIntoEvenSegments(vector<vector<Point2f> > &segments_points)
                 Point2f segment_start = segments_points[i][j];
                 Point2f segment_end   = segments_points[i][j + 1];
                 vector<Point2f>::iterator it_start, it_end, it;
-                it_start = find(spline_lines[i].begin(), spline_lines[i].end(), segment_start);
-                it_end   = find(spline_lines[i].begin(), spline_lines[i].end(), segment_end);
+                it_start = std::find(spline_lines[i].begin(), spline_lines[i].end(), segment_start);
+                it_end   = std::find(spline_lines[i].begin(), spline_lines[i].end(), segment_end);
                 float max_dist_to_line = 0.0;
                 for (it = it_start; it != it_end; it++)
                 {
diff --git a/modules/objdetect/test/test_cascadeandhog.cpp b/modules/objdetect/test/test_cascadeandhog.cpp
index 4151b899e3..0a68bd9bb3 100644
--- a/modules/objdetect/test/test_cascadeandhog.cpp
+++ b/modules/objdetect/test/test_cascadeandhog.cpp
@@ -355,7 +355,7 @@ int CV_DetectorTest::validate( int detectorIdx, vector<vector<Rect> >& objects )
                     map[minIdx] = 1;
             }
         }
-        noPair += (int)count_if( map.begin(), map.end(), isZero );
+        noPair += (int)std::count_if( map.begin(), map.end(), isZero );
         totalNoPair += noPair;
 
         /*if( noPair > cvRound(valRects.size()*eps.noPair)+1 )

From d0de575aef0b3383f183f6951a64dddc02a7563a Mon Sep 17 00:00:00 2001
From: beanjoy <120680451@qq.com>
Date: Thu, 7 Sep 2023 18:06:39 +0800
Subject: [PATCH 48/57] Merge pull request #24142 from beanjoy:4.x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Modify the outputVideoFormat after changing the output format in MSMF backend #24142

After changing the output format, need to modify the outputVideoFormat, otherwise the outputVideoFormat is always CV_CAP_MODE_BGR, and an error will occur when converting the format in retrieveVideoFrame(), and will always enter "case CV_CAP_MODE_BGR:" process.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [ ] I agree to contribute to the project under Apache 2 License.
- [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake

Co-authored-by: 李龙 <lilong@sobey.com>
---
 modules/videoio/src/cap_msmf.cpp     |  7 ++++++-
 modules/videoio/test/test_camera.cpp | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp
index a55f919ed1..4b234b8cae 100644
--- a/modules/videoio/src/cap_msmf.cpp
+++ b/modules/videoio/src/cap_msmf.cpp
@@ -1159,7 +1159,12 @@ bool CvCapture_MSMF::configureVideoOutput(MediaType newType, cv::uint32_t outFor
     {
         initStream(dwVideoStreamIndex, nativeFormat);
     }
-    return initStream(dwVideoStreamIndex, newFormat);
+    if (!initStream(dwVideoStreamIndex, newFormat))
+    {
+        return false;
+    }
+    outputVideoFormat = outFormat;
+    return true;
 }
 
 bool CvCapture_MSMF::configureOutput()
diff --git a/modules/videoio/test/test_camera.cpp b/modules/videoio/test/test_camera.cpp
index fc269959c3..8b0f0efe83 100644
--- a/modules/videoio/test/test_camera.cpp
+++ b/modules/videoio/test/test_camera.cpp
@@ -119,6 +119,21 @@ TEST(DISABLED_videoio_camera, v4l_read_mjpg)
     capture.release();
 }
 
+TEST(DISABLED_videoio_camera, msmf_read_yuyv)
+{
+    VideoCapture capture(CAP_MSMF);
+    ASSERT_TRUE(capture.isOpened());
+    ASSERT_TRUE(capture.set(CAP_PROP_FOURCC, VideoWriter::fourcc('Y', 'U', 'Y', 'V')));
+    std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl;
+    std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl;
+    std::cout << "     height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl;
+    std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl;
+    int fourcc = (int)capture.get(CAP_PROP_FOURCC);
+    std::cout << "FOURCC code: " << cv::format("0x%8x", fourcc) << std::endl;
+    test_readFrames(capture);
+    capture.release();
+}
+
 TEST(DISABLED_videoio_camera, v4l_open_mjpg)
 {
     VideoCapture capture;

From e8f94182f577894410cc59d5d20979dff69d8878 Mon Sep 17 00:00:00 2001
From: jason_w <wongzheng@126.com>
Date: Thu, 7 Sep 2023 20:47:00 +0800
Subject: [PATCH 49/57] Merge pull request #24180 from MambaWong:4.x

Fixed the channels when capturing yuv422 with v4l2 backend #24180

example to reproduce the problem
```cpp
#include <iostream>

#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/videoio.hpp>

using namespace cv;
using namespace std;

void help_func(VideoCapture& cap) {
  int height      = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
  int width       = cap.get(cv::CAP_PROP_FRAME_WIDTH);
  int pixel_type  = cap.get(cv::CAP_PROP_FORMAT);
  int channels    = CV_MAT_CN(pixel_type);
  int pixel_bytes = CV_ELEM_SIZE(pixel_type);
  bool to_bgr     = static_cast<bool>(cap.get(cv::CAP_PROP_CONVERT_RGB));

  std::cout << "backend: " << cap.getBackendName() << std::endl;
  std::cout << std::hex << "fourcc: " << static_cast<int>(cap.get(cv::CAP_PROP_FOURCC)) << std::endl;
  std::cout << std::boolalpha << "to_bgr: " << to_bgr << std::endl;
  std::cout << std::dec << "height: " << height << " width: " << width << " channels: " << channels
            << " pixel_bytes: " << pixel_bytes << std::endl;

  std::cout << "-----------------------------------------" << std::endl;
}

int main(int, char**) {

  VideoCapture cap;
  cap.open("/dev/video0");
  if (!cap.isOpened()) {
    cerr << "ERROR! Unable to open camera\n";
    return -1;
  }

  {
    help_func(cap);
  }

  {
    cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080);
    cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920);
    cap.set(cv::CAP_PROP_CONVERT_RGB, 0);
    help_func(cap);
  }

  // {
  //   cap.set(cv::CAP_PROP_CONVERT_RGB, 0);
  //   cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080);
  //   cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920);
  //   help_func(cap);
  // }

  Mat frame;
  int frame_idx = 0;
  while (cap.read(frame)) {
    std::cout << "frame index: " << frame_idx++ << std::endl;
    help_func(cap);
    if (frame.empty()) {
      cerr << "ERROR! blank frame grabbed\n";
      break;
    }
    Mat bgr;
    if (cap.get(cv::CAP_PROP_CONVERT_RGB)) {
      bgr = frame;
    } else {
      cv::cvtColor(frame, bgr, cv::COLOR_YUV2BGR_YUYV);
    }

    imshow("frame", bgr);
    if (waitKey(5) >= 0) {
      break;
    }
  }

  return 0;
}
```
The above code will get the wrong channels. By changing lines 41-45 like below, can get the correct channels.
<img width="747" alt="code" src="https://github.com/opencv/opencv/assets/16932438/55f44463-8465-4dba-a979-e71a50d58008">
This is because `cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080);` and `cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920);` reinitialize the `frame`, but `cap.set(cv::CAP_PROP_CONVERT_RGB, 0);` not.
Log info.
<img width="691" alt="log" src="https://github.com/opencv/opencv/assets/16932438/236e3b26-f5b2-447a-b202-bcd607c71af6">
We can also observe that we get the correct channels in the while loop. This is because:
https://github.com/opencv/opencv/blob/ca0bd70cde431b1dd211254011dd9bcf965f582f/modules/videoio/src/cap_v4l.cpp#L2309-L2310
reinitialize the `frame`.
---
 modules/videoio/src/cap_v4l.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp
index 905c79e42f..5b282f1966 100644
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@@ -2155,6 +2155,7 @@ bool CvCaptureCAM_V4L::setProperty( int property_id, double _value )
         }else{
             convert_rgb = false;
             releaseFrame();
+            v4l2_create_frame();
             return true;
         }
     case cv::CAP_PROP_FOURCC:

From e5ff41ec9bdbf1d81d095f82e3b87ce913dd69a7 Mon Sep 17 00:00:00 2001
From: Alex <alexander.panov@xperience.ai>
Date: Thu, 7 Sep 2023 14:09:01 +0300
Subject: [PATCH 50/57] fixes extendDictionary, add test

---
 modules/objdetect/src/aruco/aruco_dictionary.cpp | 3 ++-
 modules/objdetect/test/test_boarddetection.cpp   | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/modules/objdetect/src/aruco/aruco_dictionary.cpp b/modules/objdetect/src/aruco/aruco_dictionary.cpp
index f73cea3357..3d5f9b1bfd 100644
--- a/modules/objdetect/src/aruco/aruco_dictionary.cpp
+++ b/modules/objdetect/src/aruco/aruco_dictionary.cpp
@@ -355,6 +355,7 @@ static int _getSelfDistance(const Mat &marker) {
 
 
 Dictionary extendDictionary(int nMarkers, int markerSize, const Dictionary &baseDictionary, int randomSeed) {
+    CV_Assert(nMarkers > 0);
     RNG rng((uint64)(randomSeed));
 
     Dictionary out = Dictionary(Mat(), markerSize);
@@ -370,7 +371,7 @@ Dictionary extendDictionary(int nMarkers, int markerSize, const Dictionary &base
     // if baseDictionary is provided, calculate its intermarker distance
     if(baseDictionary.bytesList.rows > 0) {
         CV_Assert(baseDictionary.markerSize == markerSize);
-        out.bytesList = baseDictionary.bytesList.clone();
+        out.bytesList = baseDictionary.bytesList.rowRange(0, min(nMarkers, baseDictionary.bytesList.rows)).clone();
 
         int minDistance = markerSize * markerSize + 1;
         for(int i = 0; i < out.bytesList.rows; i++) {
diff --git a/modules/objdetect/test/test_boarddetection.cpp b/modules/objdetect/test/test_boarddetection.cpp
index e47e6c3cb6..0c99e6de61 100644
--- a/modules/objdetect/test/test_boarddetection.cpp
+++ b/modules/objdetect/test/test_boarddetection.cpp
@@ -318,4 +318,12 @@ TEST(CV_ArucoGenerateBoard, regression_1226) {
     });
 }
 
+TEST(CV_ArucoDictionary, extendDictionary) {
+    aruco::Dictionary base_dictionary = aruco::getPredefinedDictionary(aruco::DICT_4X4_250);
+    aruco::Dictionary custom_dictionary = aruco::extendDictionary(150, 4, base_dictionary);
+
+    ASSERT_EQ(custom_dictionary.bytesList.rows, 150);
+    ASSERT_EQ(cv::norm(custom_dictionary.bytesList, base_dictionary.bytesList.rowRange(0, 150)), 0.);
+}
+
 }} // namespace

From ceeb01dce5f6358df0c7b784b04fead14603a85d Mon Sep 17 00:00:00 2001
From: Alexander Lyulkov <alexander.lyulkov@opencv.ai>
Date: Fri, 8 Sep 2023 12:44:22 +0700
Subject: [PATCH 51/57] Replaced torch7 by onnx model in fast-neural-style dnn
 sample

---
 samples/dnn/fast_neural_style.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py
index 912c2f0832..22b8217b3a 100644
--- a/samples/dnn/fast_neural_style.py
+++ b/samples/dnn/fast_neural_style.py
@@ -5,15 +5,15 @@ import argparse
 
 parser = argparse.ArgumentParser(
         description='This script is used to run style transfer models from '
-                    'https://github.com/jcjohnson/fast-neural-style using OpenCV')
+                    'https://github.com/onnx/models/tree/main/vision/style_transfer/fast_neural_style using OpenCV')
 parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
-parser.add_argument('--model', help='Path to .t7 model')
+parser.add_argument('--model', help='Path to .onnx model')
 parser.add_argument('--width', default=-1, type=int, help='Resize input to specific width.')
 parser.add_argument('--height', default=-1, type=int, help='Resize input to specific height.')
 parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.')
 args = parser.parse_args()
 
-net = cv.dnn.readNetFromTorch(cv.samples.findFile(args.model))
+net = cv.dnn.readNetFromONNX(cv.samples.findFile(args.model))
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 
 if args.input:
@@ -31,16 +31,12 @@ while cv.waitKey(1) < 0:
     inWidth = args.width if args.width != -1 else frame.shape[1]
     inHeight = args.height if args.height != -1 else frame.shape[0]
     inp = cv.dnn.blobFromImage(frame, 1.0, (inWidth, inHeight),
-                              (103.939, 116.779, 123.68), swapRB=False, crop=False)
+                               swapRB=True, crop=False)
 
     net.setInput(inp)
     out = net.forward()
 
     out = out.reshape(3, out.shape[2], out.shape[3])
-    out[0] += 103.939
-    out[1] += 116.779
-    out[2] += 123.68
-    out /= 255
     out = out.transpose(1, 2, 0)
 
     t, _ = net.getPerfProfile()
@@ -50,4 +46,7 @@ while cv.waitKey(1) < 0:
     if args.median_filter:
         out = cv.medianBlur(out, args.median_filter)
 
+    out = np.clip(out, 0, 255)
+    out = out.astype(np.uint8)
+
     cv.imshow('Styled image', out)

From 0367a12b920a553fdc5349e3eebedf4808bce2b2 Mon Sep 17 00:00:00 2001
From: Alexander Smorkalov <alexander.smorkalov@xperience.ai>
Date: Fri, 8 Sep 2023 12:36:46 +0300
Subject: [PATCH 52/57] Check that cv::merge input matrices are not empty.

---
 modules/core/src/merge.dispatch.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/core/src/merge.dispatch.cpp b/modules/core/src/merge.dispatch.cpp
index b95dc7345d..abde21e0df 100644
--- a/modules/core/src/merge.dispatch.cpp
+++ b/modules/core/src/merge.dispatch.cpp
@@ -118,6 +118,7 @@ void merge(const Mat* mv, size_t n, OutputArray _dst)
     CV_INSTRUMENT_REGION();
 
     CV_Assert( mv && n > 0 );
+    CV_Assert(!mv[0].empty());
 
     int depth = mv[0].depth();
     bool allch1 = true;

From 910db5c9b7015e623dabf13b591e40a9b577a3c4 Mon Sep 17 00:00:00 2001
From: Alexander Lyulkov <alexander.lyulkov@opencv.ai>
Date: Fri, 8 Sep 2023 18:36:13 +0700
Subject: [PATCH 53/57] changed readNetFromONNX to readNet

---
 samples/dnn/fast_neural_style.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py
index 22b8217b3a..43b8b121d6 100644
--- a/samples/dnn/fast_neural_style.py
+++ b/samples/dnn/fast_neural_style.py
@@ -13,7 +13,7 @@ parser.add_argument('--height', default=-1, type=int, help='Resize input to spec
 parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.')
 args = parser.parse_args()
 
-net = cv.dnn.readNetFromONNX(cv.samples.findFile(args.model))
+net = cv.dnn.readNet(cv.samples.findFile(args.model))
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
 
 if args.input:

From 91cf0d18430631df0b9cdd7480667df46e9bc0f2 Mon Sep 17 00:00:00 2001
From: alexlyulkov <alex.lyulkov@gmail.com>
Date: Fri, 8 Sep 2023 19:36:01 +0700
Subject: [PATCH 54/57] Merge pull request #24244 from
 alexlyulkov:al/update-dnn-js-face-recognition-sample

Replaced torch7 by onnx model in js_face_recognition dnn sample #24244

Changed face recognition model in js_face_recognition dnn sample: replaced torch7 model from https://github.com/pyannote/pyannote-data by ONNX model from https://github.com/opencv/opencv_zoo/tree/main/models/face_recognition_sface
---
 samples/dnn/js_face_recognition.html | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/samples/dnn/js_face_recognition.html b/samples/dnn/js_face_recognition.html
index d94ead1e58..5893a5cf13 100644
--- a/samples/dnn/js_face_recognition.html
+++ b/samples/dnn/js_face_recognition.html
@@ -40,7 +40,7 @@ function detectFaces(img) {
 
 //! [Get 128 floating points feature vector]
 function face2vec(face) {
-  var blob = cv.blobFromImage(face, 1.0 / 255, {width: 96, height: 96}, [0, 0, 0, 0], true, false)
+  var blob = cv.blobFromImage(face, 1.0, {width: 112, height: 112}, [0, 0, 0, 0], true, false)
   netRecogn.setInput(blob);
   var vec = netRecogn.forward();
   blob.delete();
@@ -71,15 +71,15 @@ function loadModels(callback) {
   var utils = new Utils('');
   var proto = 'https://raw.githubusercontent.com/opencv/opencv/4.x/samples/dnn/face_detector/deploy_lowres.prototxt';
   var weights = 'https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel';
-  var recognModel = 'https://raw.githubusercontent.com/pyannote/pyannote-data/master/openface.nn4.small2.v1.t7';
+  var recognModel =  'https://media.githubusercontent.com/media/opencv/opencv_zoo/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx';
   utils.createFileFromUrl('face_detector.prototxt', proto, () => {
     document.getElementById('status').innerHTML = 'Downloading face_detector.caffemodel';
     utils.createFileFromUrl('face_detector.caffemodel', weights, () => {
       document.getElementById('status').innerHTML = 'Downloading OpenFace model';
-      utils.createFileFromUrl('face_recognition.t7', recognModel, () => {
+      utils.createFileFromUrl('face_recognition_sface_2021dec.onnx', recognModel, () => {
         document.getElementById('status').innerHTML = '';
         netDet = cv.readNetFromCaffe('face_detector.prototxt', 'face_detector.caffemodel');
-        netRecogn = cv.readNetFromTorch('face_recognition.t7');
+        netRecogn = cv.readNet('face_recognition_sface_2021dec.onnx');
         callback();
       });
     });
@@ -121,8 +121,8 @@ function main() {
       persons[name] = face2vec(face).clone();
 
       var canvas = document.createElement("canvas");
-      canvas.setAttribute("width", 96);
-      canvas.setAttribute("height", 96);
+      canvas.setAttribute("width", 112);
+      canvas.setAttribute("height", 112);
       var cell = document.getElementById("targetImgs").insertCell(0);
       cell.appendChild(canvas);
 

From c319735d9b1e21760bb51260fa155596892a6348 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Sat, 9 Sep 2023 03:19:45 +0000
Subject: [PATCH 55/57] js: include LUT support

---
 modules/js/test/test_core.js     | 41 ++++++++++++++++++++++++++++++++
 modules/js/test/test_mat.js      |  2 +-
 modules/js/test/tests.html       |  5 ++--
 modules/js/test/tests.js         | 11 ++++++---
 platforms/js/opencv_js.config.py |  1 +
 5 files changed, 54 insertions(+), 6 deletions(-)
 create mode 100644 modules/js/test/test_core.js

diff --git a/modules/js/test/test_core.js b/modules/js/test/test_core.js
new file mode 100644
index 0000000000..14d4ffe72b
--- /dev/null
+++ b/modules/js/test/test_core.js
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+if (typeof module !== 'undefined' && module.exports) {
+    // The environment is Node.js
+    var cv = require('./opencv.js'); // eslint-disable-line no-var
+}
+
+QUnit.module('Core', {});
+
+QUnit.test('test_LUT', function(assert) {
+    // test LUT
+    {
+        let src = cv.matFromArray(3, 3, cv.CV_8UC1, [255, 128, 0, 0, 128, 255, 1, 2, 254]);
+        let lutTable = [];
+        for (let i = 0; i < 256; i++)
+        {
+           lutTable[i] = 255 - i;
+        }
+        let lut = cv.matFromArray(1, 256, cv.CV_8UC1, lutTable);
+        let dst = new cv.Mat();
+
+        cv.LUT(src, lut, dst);
+
+        //console.log(dst.data);
+        assert.equal(dst.ucharAt(0), 0);
+        assert.equal(dst.ucharAt(1), 127);
+        assert.equal(dst.ucharAt(2), 255);
+        assert.equal(dst.ucharAt(3), 255);
+        assert.equal(dst.ucharAt(4), 127);
+        assert.equal(dst.ucharAt(5), 0);
+        assert.equal(dst.ucharAt(6), 254);
+        assert.equal(dst.ucharAt(7), 253);
+        assert.equal(dst.ucharAt(8), 1);
+
+        src.delete();
+        lut.delete();
+        dst.delete();
+    }
+});
diff --git a/modules/js/test/test_mat.js b/modules/js/test/test_mat.js
index 409ed1b123..fd3611cd2c 100644
--- a/modules/js/test/test_mat.js
+++ b/modules/js/test/test_mat.js
@@ -73,7 +73,7 @@ if (typeof module !== 'undefined' && module.exports) {
     var cv = require('./opencv.js'); // eslint-disable-line no-var
 }
 
-QUnit.module('Core', {});
+QUnit.module('CoreMat', {});
 
 QUnit.test('test_mat_creation', function(assert) {
     // Mat constructors.
diff --git a/modules/js/test/tests.html b/modules/js/test/tests.html
index de64ca7a29..b20013ec63 100644
--- a/modules/js/test/tests.html
+++ b/modules/js/test/tests.html
@@ -52,12 +52,12 @@
               if (window.cv instanceof Promise) {
                 window.cv.then((target) => {
                    window.cv = target;
-                   //console.log(cv.getBuildInformation());
+                   console.log(cv.getBuildInformation());
                    QUnit.start();
                 })
               } else {
                 // for backward compatible
-                // console.log(cv.getBuildInformation());
+                console.log(cv.getBuildInformation());
                 QUnit.start();
               }
             },
@@ -108,6 +108,7 @@
         <script type="application/javascript" async src="opencv.js" onerror="opencvjs_LoadError()"></script>
         <script type="application/javascript" src="test_mat.js"></script>
         <script type="application/javascript" src="test_utils.js"></script>
+        <script type="application/javascript" src="test_core.js"></script>
         <script type="application/javascript" src="test_imgproc.js"></script>
         <script type="application/javascript" src="test_objdetect.js"></script>
         <script type="application/javascript" src="test_video.js"></script>
diff --git a/modules/js/test/tests.js b/modules/js/test/tests.js
index f3156f6ea0..74a4b87e45 100644
--- a/modules/js/test/tests.js
+++ b/modules/js/test/tests.js
@@ -44,10 +44,15 @@ testrunner.options.maxBlockDuration = 20000; // cause opencv_js.js need time to
 testrunner.run(
     {
         code: 'opencv.js',
-        tests: ['test_mat.js', 'test_utils.js', 'test_imgproc.js',
-                'test_objdetect.js', 'test_video.js', 'test_features2d.js',
+        tests: ['test_mat.js',
+                'test_utils.js',
+                'test_core.js',
+                'test_imgproc.js',
+                'test_objdetect.js',
+                'test_video.js',
+                'test_features2d.js',
                 'test_photo.js',
-                'test_calib3d.js'
+                'test_calib3d.js',
         ],
     },
     function(err, report) {
diff --git a/platforms/js/opencv_js.config.py b/platforms/js/opencv_js.config.py
index 69891ea71a..5dca863bef 100644
--- a/platforms/js/opencv_js.config.py
+++ b/platforms/js/opencv_js.config.py
@@ -9,6 +9,7 @@ core = {
         'perspectiveTransform', 'polarToCart', 'pow', 'randn', 'randu', 'reduce', 'repeat', 'rotate', 'setIdentity', 'setRNGSeed',
         'solve', 'solvePoly', 'split', 'sqrt', 'subtract', 'trace', 'transform', 'transpose', 'vconcat',
         'setLogLevel', 'getLogLevel',
+        'LUT',
     ],
     'Algorithm': [],
 }

From 5dc5b2785884736f2889402502f35020b0481f45 Mon Sep 17 00:00:00 2001
From: Dmitry Kurtaev <dmitry.kurtaev@gmail.com>
Date: Sat, 9 Sep 2023 20:38:59 +0300
Subject: [PATCH 56/57] Enable build with OpenVINO in Debug

---
 modules/dnn/src/net_openvino.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp
index 4d08edeaaa..c274f44a87 100644
--- a/modules/dnn/src/net_openvino.cpp
+++ b/modules/dnn/src/net_openvino.cpp
@@ -252,7 +252,7 @@ void NetImplOpenVINO::addNgraphOutputs(LayerData& ld)
             CV_Assert(!ieInpNode->net.empty());
             if (layerNet != ieInpNode->net)
             {
-                CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name());
+                CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node.get_node()->get_friendly_name());
                 ieInpNode->net->addOutput(ieInpNode);
             }
         }

From 02525abd9fed88c39a393285f3c78880efa09101 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Sun, 10 Sep 2023 13:11:01 +0000
Subject: [PATCH 57/57] cmake: revise OPENCV_DNN_BACKEND_DEFAULT integration

- disable message on default value
---
 CMakeLists.txt                 | 2 +-
 modules/dnn/CMakeLists.txt     | 7 ++++---
 modules/dnn/src/dnn_params.cpp | 4 ++++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2a214a1a91..40d80e112c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1672,7 +1672,7 @@ else()
   endif()
 endif()
 
-if(BUILD_opencv_dnn)
+if(BUILD_opencv_dnn AND OPENCV_DNN_BACKEND_DEFAULT)
     status("    Default DNN backend:" ${OPENCV_DNN_BACKEND_DEFAULT})
 endif()
 
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
index 896ce5ded7..774e3c7b5a 100644
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -227,9 +227,10 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO)
   endif()
 endif()
 
-set(OPENCV_DNN_BACKEND_DEFAULT "DNN_BACKEND_OPENCV" CACHE STRING "Default backend used by the DNN module")
-ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}")
-
+set(OPENCV_DNN_BACKEND_DEFAULT "" CACHE STRING "Default backend used by the DNN module (DNN_BACKEND_OPENCV if empty)")
+if(OPENCV_DNN_BACKEND_DEFAULT)
+  ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}")
+endif()
 
 ocv_install_used_external_targets(${libs} ${dnn_runtime_libs})
 
diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp
index 19d453012c..a76f4cd512 100644
--- a/modules/dnn/src/dnn_params.cpp
+++ b/modules/dnn/src/dnn_params.cpp
@@ -36,7 +36,11 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
 int getParam_DNN_BACKEND_DEFAULT()
 {
     static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
+#ifdef OPENCV_DNN_BACKEND_DEFAULT
             (size_t)OPENCV_DNN_BACKEND_DEFAULT
+#else
+            (size_t)DNN_BACKEND_OPENCV
+#endif
     );
     return PARAM_DNN_BACKEND_DEFAULT;
 }