From 57da72d4449b4f0ea5950e82733375e60316d7c5 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 2 Jun 2023 10:57:28 -0400 Subject: [PATCH 01/57] Fixed invalid cast and unaligned memory access Although acceptible to Intel CPUs, it's still undefined behaviour according to the C++ standard. It can be replaced with memcpy, which makes the code simpler, and it generates the same assembly code with gcc and clang with -O2 (verified with godbolt). Also expanded the test to include other little endian CPUs by testing for __LITTLE_ENDIAN__. --- modules/core/src/persistence.cpp | 33 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 0d64bab094..6a71c1ff03 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -295,16 +295,20 @@ int decodeSimpleFormat( const char* dt ) } -#if defined __i386__ || defined(_M_IX86) || defined __x86_64__ || defined(_M_X64) -#define CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS 1 +#if defined __i386__ || defined(_M_IX86) || defined __x86_64__ || defined(_M_X64) || \ + (defined (__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__) +#define CV_LITTLE_ENDIAN_MEM_ACCESS 1 #else -#define CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS 0 +#define CV_LITTLE_ENDIAN_MEM_ACCESS 0 #endif static inline int readInt(const uchar* p) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - return *(const int*)p; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + int val; + memcpy(&val, p, sizeof(val)); + return val; #else int val = (int)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); return val; @@ -313,8 +317,11 @@ static inline int readInt(const uchar* p) static inline double readReal(const uchar* p) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - return *(const double*)p; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + double val; + memcpy(&val, p, sizeof(val)); + return val; #else unsigned val0 = (unsigned)(p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24)); unsigned val1 = (unsigned)(p[4] | (p[5] << 8) | (p[6] << 16) | (p[7] << 24)); @@ -326,9 +333,9 @@ static inline double readReal(const uchar* p) static inline void writeInt(uchar* p, int ival) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - int* ip = (int*)p; - *ip = ival; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + memcpy(p, &ival, sizeof(ival)); #else p[0] = (uchar)ival; p[1] = (uchar)(ival >> 8); @@ -339,9 +346,9 @@ static inline void writeInt(uchar* p, int ival) static inline void writeReal(uchar* p, double fval) { -#if CV_UNALIGNED_LITTLE_ENDIAN_MEM_ACCESS - double* fp = (double*)p; - *fp = fval; + // On little endian CPUs, both branches produce the same result. On big endian, only the else branch does. +#if CV_LITTLE_ENDIAN_MEM_ACCESS + memcpy(p, &fval, sizeof(fval)); #else Cv64suf v; v.f = fval; From d25d44156b67a290a769553b5889cd6636983cf5 Mon Sep 17 00:00:00 2001 From: Wang Kai Date: Sun, 2 Jul 2023 15:33:52 +0800 Subject: [PATCH 02/57] removing unreachable codes in `gbackend` --- modules/gapi/src/api/gbackend.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/modules/gapi/src/api/gbackend.cpp b/modules/gapi/src/api/gbackend.cpp index efbe17a305..46c8dc1640 100644 --- a/modules/gapi/src/api/gbackend.cpp +++ b/modules/gapi/src/api/gbackend.cpp @@ -36,7 +36,6 @@ cv::gapi::GBackend::Priv::compile(const ade::Graph&, { // ...and this method is here for the same reason! GAPI_Error("InternalError"); - return {}; } std::unique_ptr @@ -224,7 +223,6 @@ void bindOutArg(Mag& mag, const RcDesc &rc, const GRunArgP &arg, HandleRMat hand default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -256,7 +254,6 @@ void resetInternalData(Mag& mag, const Data &d) default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -284,7 +281,6 @@ cv::GRunArg getArg(const Mag& mag, const RcDesc &ref) mag.meta().at(ref.id)); default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -327,7 +323,6 @@ cv::GRunArgP getObjPtr(Mag& mag, const RcDesc &rc, bool is_umat) default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } @@ -359,7 +354,6 @@ void writeBack(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg) default: util::throw_error(std::logic_error("Unsupported GShape type")); - break; } } From 68968eda8dffb4ceedc46088c7740f04f585acfd Mon Sep 17 00:00:00 2001 From: Kumataro Date: Tue, 1 Aug 2023 18:56:20 +0900 Subject: [PATCH 03/57] videoio: doc: add odd width or height limitation for FFMPEG --- modules/videoio/include/opencv2/videoio.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index dbed243b56..eb2e803b3c 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -1036,6 +1036,9 @@ public: - Most codecs are lossy. If you want lossless video file you need to use a lossless codecs (eg. FFMPEG FFV1, Huffman HFYU, Lagarith LAGS, etc...) - If FFMPEG is enabled, using `codec=0; fps=0;` you can create an uncompressed (raw) video file. + - If FFMPEG is used, we allow frames of odd width or height, but in this case we truncate + the rightmost column/the bottom row. Probably, this should be handled more elegantly, + but some internal functions inside FFMPEG swscale require even width/height. */ CV_WRAP VideoWriter(const String& filename, int fourcc, double fps, Size frameSize, bool isColor = true); From bea0c1b660ea94d707b5805fc7adaa764fcfdfd2 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Tue, 1 Aug 2023 15:09:37 +0300 Subject: [PATCH 04/57] cuda: Fix GpuMat::copyTo and GpuMat::converTo python bindings --- modules/core/include/opencv2/core/cuda.hpp | 46 ++++++++++++--- modules/python/test/test_cuda.py | 68 ++++++++++++++++++++++ 2 files changed, 105 insertions(+), 9 deletions(-) diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 5dca06df98..9d210ed7b5 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -198,16 +198,32 @@ public: CV_WRAP GpuMat clone() const; //! copies the GpuMat content to device memory (Blocking call) - CV_WRAP void copyTo(OutputArray dst) const; + void copyTo(OutputArray dst) const; + //! bindings overload which copies the GpuMat content to device memory (Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst) const { + copyTo(static_cast(dst)); + } //! copies the GpuMat content to device memory (Non-Blocking call) - CV_WRAP void copyTo(OutputArray dst, Stream& stream) const; + void copyTo(OutputArray dst, Stream& stream) const; + //! bindings overload which copies the GpuMat content to device memory (Non-Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst, Stream& stream) const { + copyTo(static_cast(dst), stream); + } //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) - CV_WRAP void copyTo(OutputArray dst, InputArray mask) const; + void copyTo(OutputArray dst, InputArray mask) const; + //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask) const { + copyTo(static_cast(dst), static_cast(mask)); + } //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) - CV_WRAP void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + //! bindings overload which copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) + CV_WRAP void copyTo(CV_OUT GpuMat& dst, GpuMat& mask, Stream& stream) const { + copyTo(static_cast(dst), static_cast(mask), stream); + } //! sets some of the GpuMat elements to s (Blocking call) CV_WRAP GpuMat& setTo(Scalar s); @@ -222,19 +238,31 @@ public: CV_WRAP GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); //! converts GpuMat to another datatype (Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype) const; + void convertTo(OutputArray dst, int rtype) const; //! converts GpuMat to another datatype (Non-Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, Stream& stream) const; + void convertTo(OutputArray dst, int rtype, Stream& stream) const; + //! bindings overload which converts GpuMat to another datatype (Non-Blocking call) + CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, Stream& stream) const { + convertTo(static_cast(dst), rtype, stream); + } //! converts GpuMat to another datatype with scaling (Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; + //! bindings overload which converts GpuMat to another datatype with scaling(Blocking call) + CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha = 1.0, double beta = 0.0) const { + convertTo(static_cast(dst), rtype, alpha, beta); + } //! converts GpuMat to another datatype with scaling (Non-Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; + void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; //! converts GpuMat to another datatype with scaling (Non-Blocking call) - CV_WRAP void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; + //! bindings overload which converts GpuMat to another datatype with scaling (Non-Blocking call) + CV_WRAP void convertTo(CV_OUT GpuMat& dst, int rtype, double alpha, double beta, Stream& stream) const { + convertTo(static_cast(dst), rtype, alpha, beta, stream); + } CV_WRAP void assignTo(GpuMat& m, int type = -1) const; diff --git a/modules/python/test/test_cuda.py b/modules/python/test/test_cuda.py index 851a23e880..c886342832 100644 --- a/modules/python/test/test_cuda.py +++ b/modules/python/test/test_cuda.py @@ -70,6 +70,74 @@ class cuda_test(NewOpenCVTests): self.assertTrue(cuMat.step == 0) self.assertTrue(cuMat.size() == (0, 0)) + def test_cuda_convertTo(self): + # setup + npMat_8UC4 = (np.random.random((128, 128, 4)) * 255).astype(np.uint8) + npMat_32FC4 = npMat_8UC4.astype(np.single) + new_type = cv.CV_32FC4 + + # sync + # in/out + cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4) + cuMat_32FC4 = cv.cuda_GpuMat(cuMat_8UC4.size(), new_type) + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, cuMat_32FC4) + self.assertTrue(cuMat_32FC4.cudaPtr() == cuMat_32FC4_out.cudaPtr()) + npMat_32FC4_out = cuMat_32FC4.download() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + # out + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type) + npMat_32FC4_out = cuMat_32FC4.download() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + + # async + stream = cv.cuda.Stream() + cuMat_32FC4 = cv.cuda_GpuMat(cuMat_8UC4.size(), new_type) + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, cuMat_32FC4) + # in/out + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, 1, 0, stream, cuMat_32FC4) + self.assertTrue(cuMat_32FC4.cudaPtr() == cuMat_32FC4_out.cudaPtr()) + npMat_32FC4_out = cuMat_32FC4.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + # out + cuMat_32FC4_out = cuMat_8UC4.convertTo(new_type, 1, 0, stream) + npMat_32FC4_out = cuMat_32FC4.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_32FC4, npMat_32FC4_out)) + + def test_cuda_copyTo(self): + # setup + npMat_8UC4 = (np.random.random((128, 128, 4)) * 255).astype(np.uint8) + + # sync + # in/out + cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4) + cuMat_8UC4_dst = cv.cuda_GpuMat(cuMat_8UC4.size(), cuMat_8UC4.type()) + cuMat_8UC4_out = cuMat_8UC4.copyTo(cuMat_8UC4_dst) + self.assertTrue(cuMat_8UC4_out.cudaPtr() == cuMat_8UC4_dst.cudaPtr()) + npMat_8UC4_out = cuMat_8UC4_out.download() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + # out + cuMat_8UC4_out = cuMat_8UC4.copyTo() + npMat_8UC4_out = cuMat_8UC4_out.download() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + + # async + stream = cv.cuda.Stream() + # in/out + cuMat_8UC4 = cv.cuda_GpuMat(npMat_8UC4) + cuMat_8UC4_dst = cv.cuda_GpuMat(cuMat_8UC4.size(), cuMat_8UC4.type()) + cuMat_8UC4_out = cuMat_8UC4.copyTo(cuMat_8UC4_dst, stream) + self.assertTrue(cuMat_8UC4_out.cudaPtr() == cuMat_8UC4_out.cudaPtr()) + npMat_8UC4_out = cuMat_8UC4_dst.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + # out + cuMat_8UC4_out = cuMat_8UC4.copyTo(stream) + npMat_8UC4_out = cuMat_8UC4_out.download(stream) + stream.waitForCompletion() + self.assertTrue(np.array_equal(npMat_8UC4, npMat_8UC4_out)) + def test_cuda_denoising(self): self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoising')) self.assertEqual(True, hasattr(cv.cuda, 'fastNlMeansDenoisingColored')) From e1d0f07c9099a01487bd345109f9b768d7f257bc Mon Sep 17 00:00:00 2001 From: Mihir Patil Date: Wed, 2 Aug 2023 00:01:37 -0400 Subject: [PATCH 05/57] highgui(cocoa): fix fullscreen behavior --- modules/highgui/src/window_cocoa.mm | 39 +++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index 86f38d0ae8..5800ab3f94 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -195,6 +195,9 @@ CV_IMPL void cvDestroyWindow( const char* name) //cout << "cvDestroyWindow" << endl; CVWindow *window = cvGetWindow(name); if(window) { + if ([window styleMask] & NSFullScreenWindowMask) { + [window toggleFullScreen:nil]; + } [window close]; [windows removeObjectForKey:[NSString stringWithFormat:@"%s", name]]; } @@ -701,7 +704,11 @@ double cvGetModeWindow_COCOA( const char* name ) void cvSetModeWindow_COCOA( const char* name, double prop_value ) { CVWindow *window = nil; + +#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7 NSDictionary *fullscreenOptions = nil; +#endif + NSAutoreleasePool* localpool = nil; CV_FUNCNAME( "cvSetModeWindow_COCOA" ); @@ -724,7 +731,35 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) } localpool = [[NSAutoreleasePool alloc] init]; + + // std::cout << "setting mode" << std::endl; +#if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6 + if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_NORMAL ) + { + // std::cout << "exiting fullscreen" << std::endl; + [window toggleFullScreen:nil]; + window.status=CV_WINDOW_NORMAL; + } + else if( !([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_FULLSCREEN ) + { + // std::cout << "entering fullscreen" << std::endl; + [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary]; + + NSScreen* screen = [window screen]; + + NSRect frame = [screen frame]; + [window setFrame:frame display:YES]; + + [window setContentSize:frame.size]; + + [window toggleFullScreen:nil]; + + [window setFrameTopLeftPoint: frame.origin]; + + window.status=CV_WINDOW_FULLSCREEN; + } +#else fullscreenOptions = [NSDictionary dictionaryWithObject:[NSNumber numberWithBool:YES] forKey:NSFullScreenModeSetting]; if ( [[window contentView] isInFullScreenMode] && prop_value==CV_WINDOW_NORMAL ) { @@ -736,7 +771,7 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) [[window contentView] enterFullScreenMode:[NSScreen mainScreen] withOptions:fullscreenOptions]; window.status=CV_WINDOW_FULLSCREEN; } - +#endif [localpool drain]; __END__; @@ -810,7 +845,7 @@ void cvSetPropTopmost_COCOA( const char* name, const bool topmost ) CV_ERROR( CV_StsNullPtr, "NULL window" ); } - if ([[window contentView] isInFullScreenMode]) + if (([window styleMask] & NSFullScreenWindowMask)) { EXIT; } From e4ad7e3778d0be868568085f0a1fc6b0d84d9cfe Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Wed, 19 Jul 2023 07:59:05 +0300 Subject: [PATCH 06/57] VideoCapture: remove decoder initialization when CAP_PROP_FORMAT== -1 (rawMode == true) --- modules/videoio/include/opencv2/videoio.hpp | 2 +- modules/videoio/src/cap_ffmpeg_impl.hpp | 105 +++++++++++++------- modules/videoio/test/test_ffmpeg.cpp | 30 ++++++ 3 files changed, 102 insertions(+), 35 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index dbed243b56..f40afbb4a2 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -140,7 +140,7 @@ enum VideoCaptureAPIs { */ enum VideoCaptureProperties { CAP_PROP_POS_MSEC =0, //!< Current position of the video file in milliseconds. - CAP_PROP_POS_FRAMES =1, //!< 0-based index of the frame to be decoded/captured next. + CAP_PROP_POS_FRAMES =1, //!< 0-based index of the frame to be decoded/captured next. When the index i is set in RAW mode (CAP_PROP_FORMAT == -1) this will seek to the key frame k, where k <= i. CAP_PROP_POS_AVI_RATIO =2, //!< Relative position of the video file: 0=start of the film, 1=end of the film. CAP_PROP_FRAME_WIDTH =3, //!< Width of the frames in the video stream. CAP_PROP_FRAME_HEIGHT =4, //!< Height of the frames in the video stream. diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 982bc5c87d..e4431b323e 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -580,6 +580,7 @@ struct CvCapture_FFMPEG bool processRawPacket(); bool rawMode; bool rawModeInitialized; + bool rawSeek; bool convertRGB; AVPacket packet_filtered; #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(58, 20, 100) @@ -633,6 +634,7 @@ void CvCapture_FFMPEG::init() rawMode = false; rawModeInitialized = false; + rawSeek = false; convertRGB = true; memset(&packet_filtered, 0, sizeof(packet_filtered)); av_init_packet(&packet_filtered); @@ -1051,33 +1053,35 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& return false; } } - if (params.has(CAP_PROP_HW_ACCELERATION)) - { - va_type = params.get(CAP_PROP_HW_ACCELERATION); + if(!rawMode) { + if (params.has(CAP_PROP_HW_ACCELERATION)) + { + va_type = params.get(CAP_PROP_HW_ACCELERATION); #if !USE_AV_HW_CODECS - if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY) - { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout"); - return false; - } + if (va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: FFmpeg backend is build without acceleration support. Can't handle CAP_PROP_HW_ACCELERATION parameter. Bailout"); + return false; + } #endif - } - if (params.has(CAP_PROP_HW_DEVICE)) - { - hw_device = params.get(CAP_PROP_HW_DEVICE); - if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1) - { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout"); - return false; } - if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1) + if (params.has(CAP_PROP_HW_DEVICE)) { - CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout"); - return false; + hw_device = params.get(CAP_PROP_HW_DEVICE); + if (va_type == VIDEO_ACCELERATION_NONE && hw_device != -1) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE without requested H/W acceleration. Bailout"); + return false; + } + if (va_type == VIDEO_ACCELERATION_ANY && hw_device != -1) + { + CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: Invalid usage of CAP_PROP_HW_DEVICE with 'ANY' H/W acceleration. Bailout"); + return false; + } + } + if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { + use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } - } - if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { - use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } #if USE_AV_INTERRUPT_CALLBACK if (params.has(CAP_PROP_OPEN_TIMEOUT_MSEC)) @@ -1153,6 +1157,23 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& CV_LOG_WARNING(NULL, "Unable to read codec parameters from stream (" << _opencv_ffmpeg_get_error_string(err) << ")"); goto exit_func; } + + if (rawMode) { + video_stream = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); + if (video_stream < 0) { + close(); + return false; + } + video_st = ic->streams[video_stream]; +#ifndef CV_FFMPEG_CODECPAR + frame.height = video_st->codec->height; + frame.width = video_st->codec->width; +#else + frame.height = video_st->codecpar->height; + frame.width = video_st->codecpar->width; +#endif + return true; + } for(i = 0; i < ic->nb_streams; i++) { #ifndef CV_FFMPEG_CODECPAR @@ -1440,6 +1461,10 @@ bool CvCapture_FFMPEG::processRawPacket() bool CvCapture_FFMPEG::grabFrame() { + if (rawSeek) { + rawSeek = false; + return true; + } bool valid = false; static const size_t max_read_attempts = cv::utils::getConfigurationParameterSizeT("OPENCV_FFMPEG_READ_ATTEMPTS", 4096); @@ -1447,7 +1472,7 @@ bool CvCapture_FFMPEG::grabFrame() size_t cur_read_attempts = 0; size_t cur_decode_attempts = 0; - if( !ic || !video_st || !context ) return false; + if( !ic || !video_st || (!rawMode && !context) ) return false; if( ic->streams[video_stream]->nb_frames > 0 && frame_number > ic->streams[video_stream]->nb_frames ) @@ -1464,7 +1489,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_SEND_FRAME_API // check if we can receive frame from previously decoded packet - valid = avcodec_receive_frame(context, picture) >= 0; + valid = rawMode ? false : avcodec_receive_frame(context, picture) >= 0; #endif // get the next frame @@ -1548,12 +1573,16 @@ bool CvCapture_FFMPEG::grabFrame() } if (valid) { - if( picture_pts == AV_NOPTS_VALUE_ ) - picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; - frame_number++; + if (picture_pts == AV_NOPTS_VALUE_) { + if (!rawMode) + picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts; + else + picture_pts = packet.pts != AV_NOPTS_VALUE_ && packet.pts != 0 ? packet.pts : packet.dts; + frame_number++; + } } - if (!rawMode && valid && first_frame_number < 0) + if (valid && first_frame_number < 0) first_frame_number = dts_to_frame_number(picture_pts); #if USE_AV_INTERRUPT_CALLBACK @@ -1567,7 +1596,7 @@ bool CvCapture_FFMPEG::grabFrame() bool CvCapture_FFMPEG::retrieveFrame(int flag, unsigned char** data, int* step, int* width, int* height, int* cn, int* depth) { - if (!video_st || !context) + if (!video_st || (!rawMode && !context)) return false; if (rawMode || flag == extraDataIdx) @@ -1735,7 +1764,7 @@ static inline double getCodecIdFourcc(const AVCodecID codec_id) double CvCapture_FFMPEG::getProperty( int property_id ) const { - if( !video_st || !context ) return 0; + if( !video_st || (!rawMode && !context) ) return 0; switch( property_id ) { @@ -1814,7 +1843,8 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const //ic->start_time_realtime is in microseconds return ((double)ic->start_time_realtime); case CAP_PROP_N_THREADS: - return static_cast(context->thread_count); + if (!rawMode) + return static_cast(context->thread_count); default: break; } @@ -1910,9 +1940,11 @@ void CvCapture_FFMPEG::get_rotation_angle() void CvCapture_FFMPEG::seek(int64_t _frame_number) { - CV_Assert(context); + if (!rawMode) { + CV_Assert(context); + } _frame_number = std::min(_frame_number, get_total_frames()); - int delta = 16; + int delta = !rawMode ? 16 : 0; // if we have not grabbed a single frame before first seek, let's read the first frame // and get some valuable information during the process @@ -1927,7 +1959,8 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) double time_base = r2d(ic->streams[video_stream]->time_base); time_stamp += (int64_t)(sec / time_base + 0.5); if (get_total_frames() > 1) av_seek_frame(ic, video_stream, time_stamp, AVSEEK_FLAG_BACKWARD); - avcodec_flush_buffers(context); + if(!rawMode) + avcodec_flush_buffers(context); if( _frame_number > 0 ) { grabFrame(); @@ -1935,6 +1968,10 @@ void CvCapture_FFMPEG::seek(int64_t _frame_number) if( _frame_number > 1 ) { frame_number = dts_to_frame_number(picture_pts) - first_frame_number; + if (rawMode) { + rawSeek = true; + break; + } //printf("_frame_number = %d, frame_number = %d, delta = %d\n", // (int)_frame_number, (int)frame_number, delta); diff --git a/modules/videoio/test/test_ffmpeg.cpp b/modules/videoio/test/test_ffmpeg.cpp index 35d425d5c1..0496b8c369 100644 --- a/modules/videoio/test/test_ffmpeg.cpp +++ b/modules/videoio/test/test_ffmpeg.cpp @@ -476,6 +476,16 @@ static void ffmpeg_check_read_raw(VideoCapture& cap) EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type()); EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size; EXPECT_EQ((size_t)37118, data.total()); + +#ifndef WIN32 + // 12 is the nearset key frame to frame 18 + EXPECT_TRUE(cap.set(CAP_PROP_POS_FRAMES, 18.)); + EXPECT_EQ(cap.get(CAP_PROP_POS_FRAMES), 12.); + cap >> data; + EXPECT_EQ(CV_8UC1, data.type()) << "CV_8UC1 != " << typeToString(data.type()); + EXPECT_TRUE(data.rows == 1 || data.cols == 1) << data.size; + EXPECT_EQ((size_t)8726, data.total()); +#endif } TEST(videoio_ffmpeg, ffmpeg_check_extra_data) @@ -506,6 +516,16 @@ TEST(videoio_ffmpeg, open_with_property) CAP_PROP_FORMAT, -1 // demux only })); + // confirm properties are returned without initializing AVCodecContext + EXPECT_EQ(cap.get(CAP_PROP_FORMAT), -1); + EXPECT_EQ(static_cast(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4")); +#ifndef WIN32 + EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0); +#endif + EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125); + EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0); ffmpeg_check_read_raw(cap); } @@ -519,6 +539,16 @@ TEST(videoio_ffmpeg, create_with_property) CAP_PROP_FORMAT, -1 // demux only }); + // confirm properties are returned without initializing AVCodecContext + EXPECT_TRUE(cap.get(CAP_PROP_FORMAT) == -1); + EXPECT_EQ(static_cast(cap.get(CAP_PROP_FOURCC)), fourccFromString("FMP4")); +#ifndef WIN32 + EXPECT_EQ(cap.get(CAP_PROP_N_THREADS), 0.0); +#endif + EXPECT_EQ(cap.get(CAP_PROP_FRAME_HEIGHT), 384.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_WIDTH), 672.0); + EXPECT_EQ(cap.get(CAP_PROP_FRAME_COUNT), 125); + EXPECT_EQ(cap.get(CAP_PROP_FPS), 24.0); ffmpeg_check_read_raw(cap); } From afb406f1dea030fbb39654584778a5dd5c363464 Mon Sep 17 00:00:00 2001 From: Mihir Patil Date: Sun, 6 Aug 2023 20:10:05 -0400 Subject: [PATCH 07/57] style: remove trailing whitespace --- modules/highgui/src/window_cocoa.mm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index 5800ab3f94..5e34b502db 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -197,7 +197,7 @@ CV_IMPL void cvDestroyWindow( const char* name) if(window) { if ([window styleMask] & NSFullScreenWindowMask) { [window toggleFullScreen:nil]; - } + } [window close]; [windows removeObjectForKey:[NSString stringWithFormat:@"%s", name]]; } @@ -731,7 +731,7 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) } localpool = [[NSAutoreleasePool alloc] init]; - + // std::cout << "setting mode" << std::endl; #if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6 if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_NORMAL ) @@ -746,11 +746,11 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) // std::cout << "entering fullscreen" << std::endl; [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary]; - NSScreen* screen = [window screen]; + NSScreen* screen = [window screen]; NSRect frame = [screen frame]; [window setFrame:frame display:YES]; - + [window setContentSize:frame.size]; [window toggleFullScreen:nil]; From ba70ec99b3c119d549fabd955e276dae73c4a9b0 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Wed, 9 Aug 2023 02:26:02 -0400 Subject: [PATCH 08/57] Merge pull request #24122 from fengyuentau:remove_tengine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dnn: cleanup of tengine backend #24122 🚀 Cleanup for OpenCV 5.0. Tengine backend is added for convolution layer speedup on ARM CPUs, but it is not maintained and the convolution layer on our default backend has reached similar performance to that of Tengine. Tengine backend related PRs: - https://github.com/opencv/opencv/pull/16724 - https://github.com/opencv/opencv/pull/18323 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- 3rdparty/libtengine/tengine.cmake | 80 ---- CMakeLists.txt | 10 - cmake/OpenCVFindTengine.cmake | 78 ---- cmake/mirrors/custom.cmake | 7 +- cmake/mirrors/gitcode.cmake | 5 +- .../config_reference.markdown | 1 - modules/dnn/CMakeLists.txt | 10 - modules/dnn/src/layers/convolution_layer.cpp | 85 ---- .../include/tengine_graph_convolution.hpp | 53 --- .../src/tengine_graph_convolution.cpp | 370 ------------------ 10 files changed, 3 insertions(+), 696 deletions(-) delete mode 100644 3rdparty/libtengine/tengine.cmake delete mode 100644 cmake/OpenCVFindTengine.cmake delete mode 100644 modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp delete mode 100644 modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp diff --git a/3rdparty/libtengine/tengine.cmake b/3rdparty/libtengine/tengine.cmake deleted file mode 100644 index ee8f0cb86f..0000000000 --- a/3rdparty/libtengine/tengine.cmake +++ /dev/null @@ -1,80 +0,0 @@ -# COPYRIGHT -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# License); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Copyright (c) 2020, OPEN AI LAB -# Author: qtang@openailab.com or https://github.com/BUG1989 -# qli@openailab.com -# sqfu@openailab.com - -SET(TENGINE_COMMIT_VERSION "e89cf8870de2ff0a80cfe626c0b52b2a16fb302e") -SET(OCV_TENGINE_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtengine") -SET(OCV_TENGINE_SOURCE_PATH "${OCV_TENGINE_DIR}/Tengine-${TENGINE_COMMIT_VERSION}") - -IF(EXISTS "${OCV_TENGINE_SOURCE_PATH}") - MESSAGE(STATUS "Tengine is exist already at: ${OCV_TENGINE_SOURCE_PATH}") - - SET(Tengine_FOUND ON) - SET(BUILD_TENGINE ON) -ELSE() - SET(OCV_TENGINE_FILENAME "${TENGINE_COMMIT_VERSION}.zip")#name - SET(OCV_TENGINE_URL "https://github.com/OAID/Tengine/archive/") #url - SET(tengine_md5sum 23f61ebb1dd419f1207d8876496289c5) #md5sum - - ocv_download(FILENAME ${OCV_TENGINE_FILENAME} - HASH ${tengine_md5sum} - URL - "${OPENCV_TENGINE_URL}" - "$ENV{OPENCV_TENGINE_URL}" - "${OCV_TENGINE_URL}" - DESTINATION_DIR "${OCV_TENGINE_DIR}" - ID TENGINE - STATUS res - UNPACK RELATIVE_URL) - - if (NOT res) - MESSAGE(STATUS "TENGINE DOWNLOAD FAILED. Turning Tengine_FOUND off.") - SET(Tengine_FOUND OFF) - else () - MESSAGE(STATUS "TENGINE DOWNLOAD success . ") - - SET(Tengine_FOUND ON) - SET(BUILD_TENGINE ON) - endif() -ENDIF() - -if(BUILD_TENGINE) - SET(HAVE_TENGINE 1) - - if(NOT ANDROID) - # linux system - if(CMAKE_SYSTEM_PROCESSOR STREQUAL arm) - SET(TENGINE_TOOLCHAIN_FLAG "-march=armv7-a") - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) ## AARCH64 - SET(TENGINE_TOOLCHAIN_FLAG "-march=armv8-a") - endif() - endif() - - SET(BUILT_IN_OPENCV ON) ## set for tengine compile discern . - SET(Tengine_INCLUDE_DIR "${OCV_TENGINE_SOURCE_PATH}/include" CACHE INTERNAL "") - if(EXISTS "${OCV_TENGINE_SOURCE_PATH}/CMakeLists.txt") - add_subdirectory("${OCV_TENGINE_SOURCE_PATH}" "${OCV_TENGINE_DIR}/build") - else() - message(WARNING "TENGINE: Missing 'CMakeLists.txt' in source code package: ${OCV_TENGINE_SOURCE_PATH}") - endif() - SET(Tengine_LIB "tengine" CACHE INTERNAL "") -endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dbcab578b..d14b7af439 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -462,9 +462,6 @@ OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" OCV_OPTION(WITH_ANDROID_NATIVE_CAMERA "Use Android NDK for Camera I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 23) VISIBLE_IF ANDROID VERIFY HAVE_ANDROID_NATIVE_CAMERA) -OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF - VISIBLE_IF (ARM OR AARCH64) AND (UNIX OR ANDROID) AND NOT IOS - VERIFY HAVE_TENGINE) OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF VISIBLE_IF TRUE VERIFY HAVE_ONNX) @@ -761,9 +758,6 @@ include(cmake/OpenCVFindLibsPerf.cmake) include(cmake/OpenCVFindLAPACK.cmake) include(cmake/OpenCVFindProtobuf.cmake) include(cmake/OpenCVDetectFlatbuffers.cmake) -if(WITH_TENGINE) - include(cmake/OpenCVFindTengine.cmake) -endif() if(WITH_TIMVX) include(cmake/OpenCVFindTIMVX.cmake) endif() @@ -1612,10 +1606,6 @@ if(WITH_VA OR HAVE_VA) status(" VA:" HAVE_VA THEN "YES" ELSE NO) endif() -if(WITH_TENGINE OR HAVE_TENGINE) - status(" Tengine:" HAVE_TENGINE THEN "YES (${TENGINE_LIBRARIES})" ELSE NO) -endif() - if(WITH_LAPACK OR HAVE_LAPACK) status(" Lapack:" HAVE_LAPACK THEN "YES (${LAPACK_LIBRARIES})" ELSE NO) endif() diff --git a/cmake/OpenCVFindTengine.cmake b/cmake/OpenCVFindTengine.cmake deleted file mode 100644 index 2d33f5c993..0000000000 --- a/cmake/OpenCVFindTengine.cmake +++ /dev/null @@ -1,78 +0,0 @@ -# COPYRIGHT -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# License); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Copyright (c) 2020, OPEN AI LAB -# Author: qtang@openailab.com or https://github.com/BUG1989 -# - -# ---------------------------------------------------------------------------- -# Path for Tengine binaries -# ---------------------------------------------------------------------------- -set(OPENCV_LIBTENGINE_ROOT_DIR "" CACHE PATH "Path to TENGINE binaries installation") - -IF(OPENCV_LIBTENGINE_ROOT_DIR AND NOT BUILD_TENGINE) - - MESSAGE(STATUS "TENGINE:-- Use binaries at ${OPENCV_LIBTENGINE_ROOT_DIR}") - - SET(Tengine_FOUND ON) - set(BUILD_TENGINE OFF) - - SET(Tengine_INCLUDE_DIR "${OPENCV_LIBTENGINE_ROOT_DIR}/include" CACHE PATH "TENGINE include dir") - SET(Tengine_LIB "${OPENCV_LIBTENGINE_ROOT_DIR}/lib/libtengine.a" CACHE PATH "TENGINE library dir") - -ELSE() - IF(ANDROID) - IF(OPENCV_TENGINE_FORCE_ANDROID) - # nothing, use Android - ELSEIF(OPENCV_TENGINE_SKIP_ANDROID) - set(Tengine_FOUND OFF) - set(HAVE_TENGINE FALSE) - return() - ELSEIF(NOT DEFINED ANDROID_NDK_REVISION) - MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION is not defined") - set(Tengine_FOUND OFF) - set(HAVE_TENGINE FALSE) - return() - ELSEIF(ANDROID_NDK_REVISION VERSION_LESS 14) - MESSAGE(STATUS "Android NDK version Tengine not support: ANDROID_NDK_REVISION=${ANDROID_NDK_REVISION}") - set(Tengine_FOUND OFF) - set(HAVE_TENGINE FALSE) - return() - ENDIF() - ENDIF() - MESSAGE(STATUS "TENGINE:-- Build Tengine from source code. ") - include("${OpenCV_SOURCE_DIR}/3rdparty/libtengine/tengine.cmake") -ENDIF() - -IF(NOT Tengine_LIB) - SET(Tengine_FOUND OFF) - MESSAGE(STATUS "#### Could not find Tengine lib. Turning Tengine_FOUND off") -ENDIF() - -IF (Tengine_FOUND) - MESSAGE(STATUS "Found Tengine include: ${Tengine_INCLUDE_DIR}") - MESSAGE(STATUS "Found Tengine libraries: ${Tengine_LIB}") - set(HAVE_TENGINE 1) - set(TENGINE_LIBRARIES ${Tengine_LIB}) - set(TENGINE_INCLUDE_DIRS ${Tengine_INCLUDE_DIR}) -ENDIF (Tengine_FOUND) - -MARK_AS_ADVANCED( - Tengine_INCLUDE_DIR - Tengine_LIB -) diff --git a/cmake/mirrors/custom.cmake b/cmake/mirrors/custom.cmake index 3cdf700e19..8c421471f3 100644 --- a/cmake/mirrors/custom.cmake +++ b/cmake/mirrors/custom.cmake @@ -1,15 +1,12 @@ # Gitlab-style mirror # CMake scripts look for opencv/opencv_3rdparty, -# OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade +# 01org/tbb(oneAPI/oneTBB), opencv/ade # from OPENCV_DOWNLOAD_MIRROR ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "") ###### # Download via commit id ###### -# Tengine -ocv_update(TENGINE_PKG_MD5_CUSTOM "") -ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e # NVIDIA_OPTICAL_FLOW ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "") ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) @@ -77,7 +74,7 @@ else() ocv_download_url_custom_usercontent(opencv) elseif(DL_ID STREQUAL "wechat_qrcode") ocv_download_url_gitcode_usercontent(WeChatCV) - elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) + elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) ocv_download_url_custom_archive_commit_id() elseif(DL_ID STREQUAL "TBB") ocv_download_url_custom_archive_release() diff --git a/cmake/mirrors/gitcode.cmake b/cmake/mirrors/gitcode.cmake index c9d41e7458..e208a87245 100644 --- a/cmake/mirrors/gitcode.cmake +++ b/cmake/mirrors/gitcode.cmake @@ -1,9 +1,6 @@ ###### # Download via commit id ###### -# Tengine -ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690) -ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e # NVIDIA_OPTICAL_FLOW ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47) ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191) @@ -74,7 +71,7 @@ if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "da ocv_download_url_gitcode_usercontent(opencv) elseif(DL_ID STREQUAL "wechat_qrcode") ocv_download_url_gitcode_usercontent(mirrors/WeChatCV) -elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) +elseif((DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX")) ocv_download_url_gitcode_archive_commit_id() elseif(DL_ID STREQUAL "TBB") ocv_download_url_gitcode_archive_release(OPENCV_TBB_SUBDIR) diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 3ed87e5bdf..2528baf41d 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -484,7 +484,6 @@ OpenCV have own DNN inference module which have own build-in engine, but can als | `OPENCV_DNN_CUDA` | _OFF_ | Enable CUDA backend. [CUDA](https://en.wikipedia.org/wiki/CUDA), CUBLAS and [CUDNN](https://developer.nvidia.com/cudnn) must be installed. | | `WITH_HALIDE` | _OFF_ | Use experimental [Halide](https://en.wikipedia.org/wiki/Halide_(programming_language)) backend which can generate optimized code for dnn-layers at runtime. Halide must be installed. | | `WITH_VULKAN` | _OFF_ | Enable experimental [Vulkan](https://en.wikipedia.org/wiki/Vulkan_(API)) backend. Does not require additional dependencies, but can use external Vulkan headers (`VULKAN_INCLUDE_DIRS`). | -| `WITH_TENGINE` | _OFF_ | Enable experimental [Tengine](https://github.com/OAID/Tengine) backend for ARM CPUs. Tengine library must be installed. | # Installation layout {#tutorial_config_reference_install} diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 804b78ead2..60cc77ca8b 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -58,11 +58,6 @@ endif() ocv_cmake_hook_append(INIT_MODULE_SOURCES_opencv_dnn "${CMAKE_CURRENT_LIST_DIR}/cmake/hooks/INIT_MODULE_SOURCES_opencv_dnn.cmake") -if(HAVE_TENGINE) - ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_TENGINE=1") -endif() - - if(MSVC) add_definitions( -D_CRT_SECURE_NO_WARNINGS=1 ) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146 @@ -172,11 +167,6 @@ else() set(sources_options ${sources_options} EXCLUDE_CUDA) endif() -if(HAVE_TENGINE) - list(APPEND include_dirs ${TENGINE_INCLUDE_DIRS}) - list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive) -endif() - if(HAVE_TIMVX) list(APPEND include_dirs ${TIMVX_INCLUDE_DIR}) list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 2787d64880..0ed2bb7feb 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -62,9 +62,6 @@ #include "opencl_kernels_dnn.hpp" using namespace cv::dnn::ocl4dnn; #endif -#ifdef HAVE_TENGINE -#include "../tengine4dnn/include/tengine_graph_convolution.hpp" -#endif #ifdef HAVE_CUDA #include "../cuda4dnn/primitives/convolution.hpp" @@ -267,10 +264,6 @@ public: float power; #endif -#ifdef HAVE_TENGINE - teng_graph_t tengine_graph; -#endif - #ifdef HAVE_CUDA cuda4dnn::ConvolutionConfiguration::FusionMode cudaFusionMode; cuda4dnn::ConvolutionConfiguration::ActivationType cudaActType; @@ -289,20 +282,8 @@ public: #ifdef HAVE_CUDA cudaFusionMode = cuda4dnn::ConvolutionConfiguration::FusionMode::NONE; cudaActType = cuda4dnn::ConvolutionConfiguration::ActivationType::IDENTITY; -#endif -#ifdef HAVE_TENGINE - tengine_graph=NULL; #endif } -#ifdef HAVE_TENGINE - ~ConvolutionLayerImpl() - { - if(NULL != tengine_graph ) - { - tengine_release(tengine_graph); - } - } -#endif MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { @@ -466,13 +447,6 @@ public: for(int i = 0; i < numOutput; i++ ) biasvec[i] = biasMat.at(i); } -#ifdef HAVE_TENGINE - if(NULL != tengine_graph ) - { - tengine_release(tengine_graph); - tengine_graph = NULL ; - } -#endif #ifdef HAVE_OPENCL convolutionOp.release(); #endif @@ -1305,65 +1279,6 @@ public: } } -#ifdef HAVE_TENGINE - bool tengine_ret = false; - - std::vector teng_in, teng_out; - inputs_arr.getMatVector(teng_in); - outputs_arr.getMatVector(teng_out); - - int inch = teng_in[0].size[1]; // inch - int in_h = teng_in[0].size[2]; // in_h - int in_w = teng_in[0].size[3]; // in_w - - int out_b = teng_out[0].size[0]; // out batch size - int outch = teng_out[0].size[1]; // outch - int out_h = teng_out[0].size[2]; // out_h - int out_w = teng_out[0].size[3]; // out_w - - float *input_ = teng_in[0].ptr(); - float *output_ = teng_out[0].ptr(); - float *kernel_ = weightsMat.ptr(); - float *teg_bias = &biasvec[0]; - - int nstripes = std::max(getNumThreads(), 1); - - /* tengine_init will run when first time. */ - if(NULL == tengine_graph) - { - // pads_begin: 0 - pad_top, 1 - pad_left - // pads_end: 0 - pad_bottom, 1 - pad_right - // pad_h0: pad_top, pad_h1: pad_bottom - // pad_w0: pad_left, pad_w1: pad_right - tengine_graph = tengine_init(name.c_str(), input_, inch, ngroups, in_h, in_w, - output_, out_b, outch, out_h, out_w, - kernel_, kernel_size.size(), kernel.height, kernel.width, - teg_bias, stride.height, stride.width, - pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width, - weightsMat.step1(), padMode, tengine_graph, nstripes); - // printf("Init(%s): input=%p(%d %d %d %d ),output=%p(%d %d %d %d ),kernel=%p(%ld %d %d ), bias=%p ," - // "stride(%d %d), pad(%d %d %d %d), dilation(%d %d) ,weightsMat=%ld, padMode=%s ,tengine_graph = %p \n", - // name.c_str(),input_, inch, ngroups, in_h, in_w, - // output_, out_b, outch, out_h, out_w, - // kernel_, kernel_size.size(), kernel.height, kernel.width, - // teg_bias, stride.height, stride.width, - // pads_begin[0], pads_end[0], pads_begin[1], pads_end[1], dilation.height, dilation.width, - // weightsMat.step1(), padMode.c_str() ,tengine_graph); - } - if(NULL != tengine_graph) - { - tengine_ret = tengine_forward(tengine_graph); - } - /* activation */ - if((true == tengine_ret) && activ ) - { - int out_cstep = out_h * out_w; // out_cstep - - ActivationLayer* activ_ = activ.get(); - activ_->forwardSlice(output_, output_, out_cstep, out_cstep, 0, outch); - } - if(false == tengine_ret) -#endif { int nstripes = std::max(getNumThreads(), 1); int conv_dim = CONV_2D; diff --git a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp b/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp deleted file mode 100644 index 8ec99c9685..0000000000 --- a/modules/dnn/src/tengine4dnn/include/tengine_graph_convolution.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (c) 2020, OPEN AI LAB - * Author: qtang@openailab.com - */ - -#ifndef TENGINE_GRAPH_CONVOLUTION_HPP -#define TENGINE_GRAPH_CONVOLUTION_HPP - -#define FLOAT_TO_REALSIZE (4) -#ifdef HAVE_TENGINE - -#include "tengine_c_api.h" - -namespace cv -{ -namespace dnn -{ -// pad_h0: pad_top -// pad_h1: pad_bottom -// pad_w0: pad_left -// pad_w1: pad_right -teng_graph_t tengine_init(const char* name , float* input_, int inch, int group, int in_h, int in_w, - float *output_, int out_b, int outch, int out_h, int out_w, - float *kernel_,int kernel_s , int kernel_h, int kernel_w, - float *teg_bias, int stride_h, int stride_w, - int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, - size_t wstep, const std::string padMode , teng_graph_t& graph, int nstripes) ; - -bool tengine_forward(teng_graph_t& graph) ; -bool tengine_release(teng_graph_t& graph) ; -} -} -#endif -#endif /* TENGINE_GRAPH_CONVOLUTION_HPP */ \ No newline at end of file diff --git a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp b/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp deleted file mode 100644 index d35937006c..0000000000 --- a/modules/dnn/src/tengine4dnn/src/tengine_graph_convolution.cpp +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Copyright (c) 2020, OPEN AI LAB - * Author: qtang@openailab.com - */ - -#include "../../precomp.hpp" -#include -#include - -#include -#include - -#include "../include/tengine_graph_convolution.hpp" - -#ifdef HAVE_TENGINE - -#include "tengine_c_api.h" - - -namespace cv -{ -namespace dnn -{ -static int create_input_node(teng_graph_t graph, const char* node_name, int inch, int in_h, int in_w) -{ - node_t node = teng_create_graph_node(graph, node_name, "InputOp"); - tensor_t tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32); - teng_set_node_output_tensor(node, 0, tensor, TENSOR_TYPE_INPUT); - - int dims[4] = {1, inch, in_h, in_w}; - teng_set_tensor_shape(tensor, dims, 4); - - teng_release_graph_tensor(tensor); - teng_release_graph_node(node); - - return 0; -} - -static int create_conv_node(teng_graph_t graph, const char* node_name, const char* input_name, int in_h, int in_w, int out_h, int out_w, - int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h0, int pad_h1, int pad_w0, int pad_w1, int inch, int outch, int group, - int dilation_h, int dilation_w, int activation, std::string padMode) -{ - node_t conv_node = teng_create_graph_node(graph, node_name, "Convolution"); - tensor_t input_tensor = teng_get_graph_tensor(graph, input_name); - - if (input_tensor == NULL) - { - CV_LOG_WARNING(NULL,"Tengine: input_tensor is NULL." ); - return -1; - } - - teng_set_node_input_tensor(conv_node, 0, input_tensor); - teng_release_graph_tensor(input_tensor); - - /* output */ - tensor_t output_tensor = teng_create_graph_tensor(graph, node_name, TENGINE_DT_FP32); - - teng_set_node_output_tensor(conv_node, 0, output_tensor, TENSOR_TYPE_VAR); - teng_release_graph_tensor(output_tensor); - - /* weight */ - std::string weight_name(node_name); - weight_name += "/weight"; - - node_t w_node = teng_create_graph_node(graph, weight_name.c_str(), "Const"); - tensor_t w_tensor = teng_create_graph_tensor(graph, weight_name.c_str(), TENGINE_DT_FP32); - teng_set_node_output_tensor(w_node, 0, w_tensor, TENSOR_TYPE_CONST); - teng_set_node_input_tensor(conv_node, 1, w_tensor); - int w_dims[] = {outch, inch / group, kernel_h, kernel_w}; - - teng_set_tensor_shape(w_tensor, w_dims, 4); - - teng_release_graph_node(w_node); - teng_release_graph_tensor(w_tensor); - - /* bias */ - std::string bias_name(node_name); - bias_name += "/bias"; - - node_t b_node = teng_create_graph_node(graph, bias_name.c_str(), "Const"); - tensor_t b_tensor = teng_create_graph_tensor(graph, bias_name.c_str(), TENGINE_DT_FP32); - teng_set_node_output_tensor(b_node, 0, b_tensor, TENSOR_TYPE_CONST); - int b_dims[] = {outch}; - - teng_set_tensor_shape(b_tensor, b_dims, 1); - - teng_set_node_input_tensor(conv_node, 2, b_tensor); - teng_release_graph_node(b_node); - teng_release_graph_tensor(b_tensor); - - if (!padMode.empty()) - { - if (padMode == "SAME") - { - int out_h_temp = (in_h-kernel_h + 2*pad_h0)/stride_h + 1; - int out_w_temp = (in_w-kernel_w + 2*pad_w0)/stride_w + 1; - - if (out_h_temp < out_h) - pad_h1 += 1; - if (out_w_temp < out_w) - pad_w1 += 1; - } - } - - /* attr */ - teng_set_node_attr_int(conv_node, "kernel_h", &kernel_h); - teng_set_node_attr_int(conv_node, "kernel_w", &kernel_w); - teng_set_node_attr_int(conv_node, "stride_h", &stride_h); - teng_set_node_attr_int(conv_node, "stride_w", &stride_w); - teng_set_node_attr_int(conv_node, "pad_h0", &pad_h0); - teng_set_node_attr_int(conv_node, "pad_w0", &pad_w0); - teng_set_node_attr_int(conv_node, "pad_h1", &pad_h1); - teng_set_node_attr_int(conv_node, "pad_w1", &pad_w1); - teng_set_node_attr_int(conv_node, "output_channel", &outch); - teng_set_node_attr_int(conv_node, "input_channel", &inch); - teng_set_node_attr_int(conv_node, "group", &group); - teng_set_node_attr_int(conv_node, "dilation_h", &dilation_h); - teng_set_node_attr_int(conv_node, "dilation_w", &dilation_w); - // set_node_attr_int(conv_node, "activation", &activation); - - teng_release_graph_node(conv_node); - - return 0; -} - -static teng_graph_t create_conv_graph(const char* layer_name, float* input_data, int inch, int group, int in_h, int in_w, - float* output_data, int outch, int out_h, int out_w, - int kernel_h, int kernel_w, - int stride_h,int stride_w, - int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, int activation, - float* teg_weight, float* teg_bias, std::string padMode, int nstripes) -{ - node_t conv_node = NULL; - - tensor_t input_tensor = NULL; - tensor_t output_tensor = NULL; - tensor_t weight_tensor = NULL; - tensor_t bias_tensor = NULL; - - /* create graph for convolution */ - int in_size = in_h * in_w * inch; - int out_size = out_h * out_w * outch; - int weight_size = outch * (inch / group) * kernel_w * kernel_h; - int bias_size = outch; - - int buf_size = 0; - int input_num = 0; - - /* create graph */ - teng_graph_t graph = teng_create_graph(NULL, NULL, NULL); - bool ok = true; - - if(graph == NULL) - { - CV_LOG_WARNING(NULL,"Tengine: create_graph failed." ); - ok = false; - } - - const char* input_name = "data"; - const char* conv_name = layer_name; - - if (ok && create_input_node(graph, input_name, inch, in_h, in_w) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: create_input_node failed." ); - ok = false; - } - - if (ok && create_conv_node(graph, conv_name, input_name, in_h, in_w, out_h, out_w, kernel_h, kernel_w, - stride_h, stride_w, pad_h0, pad_h1, pad_w0, pad_w1, inch, outch, group, dilation_h, dilation_w, activation, padMode) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: create conv node failed." ); - ok = false; - } - - /* set input/output node */ - const char* inputs_name[] = {input_name}; - const char* outputs_name[] = {conv_name}; - - if (ok && teng_set_graph_input_node(graph, inputs_name, sizeof(inputs_name) / sizeof(char*)) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: set inputs failed." ); - ok = false; - } - - if (ok && teng_set_graph_output_node(graph, outputs_name, sizeof(outputs_name) / sizeof(char*)) < 0) - { - CV_LOG_WARNING(NULL,"Tengine: set outputs failed." ); - ok = false; - } - - /* set input data */ - if (ok) - { - input_tensor = teng_get_graph_input_tensor(graph, 0, 0); - buf_size = teng_get_tensor_buffer_size(input_tensor); - if (buf_size != in_size * FLOAT_TO_REALSIZE) - { - CV_LOG_WARNING(NULL,"Tengine: Input data size check failed."); - ok = false; - } - } - - if (ok) - { - teng_set_tensor_buffer(input_tensor, (float *)input_data, buf_size); - teng_release_graph_tensor(input_tensor); - - /* create convolution node */ - /* set weight node */ - conv_node = teng_get_graph_node(graph, conv_name); - weight_tensor = teng_get_node_input_tensor(conv_node, 1); - buf_size = teng_get_tensor_buffer_size(weight_tensor); - - if (buf_size != weight_size * FLOAT_TO_REALSIZE) - { - CV_LOG_WARNING(NULL,"Tengine: Input weight size check failed."); - ok = false; - } - } - - if (ok) - { - teng_set_tensor_buffer(weight_tensor, teg_weight, buf_size); - - /* set bias node */ - input_num = teng_get_node_input_number(conv_node); - if (input_num > 2) - { - bias_tensor = teng_get_node_input_tensor(conv_node, 2); - buf_size = teng_get_tensor_buffer_size(bias_tensor); - if (buf_size != bias_size * FLOAT_TO_REALSIZE) - { - CV_LOG_WARNING(NULL,"Tengine: Input bias size check failed."); - ok = false; - } - else teng_set_tensor_buffer(bias_tensor, teg_bias, buf_size); - } - } - - /* prerun */ - if (ok && teng_prerun_graph_multithread(graph, TENGINE_CLUSTER_BIG, nstripes) < 0) - { - CV_LOG_WARNING(NULL, "Tengine: prerun_graph failed."); - ok = false; - } - - if (ok) - { - /* set output data */ - output_tensor = teng_get_node_output_tensor(conv_node, 0); - int ret = teng_set_tensor_buffer(output_tensor, output_data, out_size * FLOAT_TO_REALSIZE); - if(ret) - { - CV_LOG_WARNING(NULL,"Tengine: Set output tensor buffer failed." ); - ok = false; - } - } - - if (false == ok) - { - teng_destroy_graph(graph) ; - return NULL ; - } - return graph; -} -static bool tengine_init_flag = false; -teng_graph_t tengine_init(const char* layer_name, float* input_, int inch, int group, int in_h, int in_w, - float *output_, int out_b, int outch, int out_h, int out_w, - float *kernel_, int kernel_s ,int kernel_h, int kernel_w, - float *teg_bias, int stride_h, int stride_w, - int pad_h0, int pad_h1, int pad_w0, int pad_w1, int dilation_h, int dilation_w, - size_t wstep, const std::string padMode, teng_graph_t &graph, int nstripes) -{ - std::vector teg_weight_vec; - float *teg_weight = NULL; - int kernel_inwh = (inch / group) * kernel_w * kernel_h; - // Do not using the activation fuse mode, just convolution only. - int activation = -1; - - if (!(kernel_s == 2 && kernel_h == kernel_w - && dilation_h == dilation_w && stride_h == stride_w - && out_b == 1 && pad_h0 < 10 && pad_h1 < 10 && pad_w0 < 10 && pad_w1 < 10)) // just for Conv2D - { - // printf("return : just for Conv2D\n"); - return NULL; - } - - { - /* printf("Tengine(%s): input (1 x %d x %d x %d),output (%d x %d x %d x %d), kernel (%d x %d), stride (%d x %d), dilation (%d x %d), pad (%d x %d).\n", - layer_name, inch, in_h, in_w, - out_b, outch, out_h, out_w, - kernel_w, kernel_h, - stride_w, stride_h, - dilation_w, dilation_h, - pad_h0, pad_h1, pad_w0, pad_w1); - */ - // weight - if (kernel_inwh != wstep) - { - teg_weight_vec.resize(kernel_inwh * outch); - teg_weight = &teg_weight_vec[0]; - for (int i=0; i Date: Wed, 9 Aug 2023 18:43:49 +0800 Subject: [PATCH 09/57] Fixed bug when MSMF webcamera doesn't start when build with VIDEOIO_PLUGIN_ALL --- modules/videoio/src/cap_msmf.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 78eefc34a3..a55f919ed1 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -2719,8 +2719,6 @@ CvResult CV_API_CALL cv_capture_open_with_params( if (!handle) return CV_ERROR_FAIL; *handle = NULL; - if (!filename) - return CV_ERROR_FAIL; CaptureT* cap = 0; try { From f834736307c8328340aea48908484052170c9224 Mon Sep 17 00:00:00 2001 From: chaebkimm Date: Wed, 9 Aug 2023 19:46:25 +0900 Subject: [PATCH 10/57] Merge pull request #24116 from chaebkimm/update-samples-python-tst_scene_render Fix python sample code (tst_scene_render) #24116 Fix bug of python sample code (samples/python/tst_scene_render.py) when backGr or fgr is None (#24114) 1) pass shape tuple to np.zeros arguments instead of integers 2) change np.int to int ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [o] I agree to contribute to the project under Apache 2 License. - [o] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [o] The PR is proposed to the proper branch - [o] There is a reference to the original bug report and related work - [o] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [o] The feature is well documented and sample code can be built with the project CMake --- samples/python/tst_scene_render.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/python/tst_scene_render.py b/samples/python/tst_scene_render.py index 9d09ea7b9e..c3eb69ef9c 100644 --- a/samples/python/tst_scene_render.py +++ b/samples/python/tst_scene_render.py @@ -25,7 +25,7 @@ class TestSceneRender(): if bgImg is not None: self.sceneBg = bgImg.copy() else: - self.sceneBg = np.zeros(defaultSize, defaultSize, np.uint8) + self.sceneBg = np.zeros((defaultSize, defaultSize,3), np.uint8) self.w = self.sceneBg.shape[0] self.h = self.sceneBg.shape[1] @@ -85,7 +85,7 @@ class TestSceneRender(): img[self.currentCenter[0]:self.currentCenter[0]+self.foreground.shape[0], self.currentCenter[1]:self.currentCenter[1]+self.foreground.shape[1]] = self.foreground else: - self.currentRect = self.initialRect + np.int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed)) + self.currentRect = self.initialRect + int( 30*cos(self.time*self.speed) + 50*sin(self.time*self.speed)) if self.deformation: self.currentRect[1:3] += int(self.h/20*cos(self.time)) cv.fillConvexPoly(img, self.currentRect, (0, 0, 255)) From 53dfd9536a569b824cb083a1d6f5f9f3df0b05be Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 10 Aug 2023 11:39:29 +0300 Subject: [PATCH 11/57] videoio: fix camera opening with GStreamer plugin --- modules/videoio/src/cap_gstreamer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index fc031d2b5f..305d527ce9 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -2825,8 +2825,6 @@ CvResult CV_API_CALL cv_capture_open_with_params( if (!handle) return CV_ERROR_FAIL; *handle = NULL; - if (!filename) - return CV_ERROR_FAIL; GStreamerCapture *cap = 0; try { From 82de5b3a67a961a52e6f5a6e58d9e8ec8264b7eb Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 10 Aug 2023 22:43:46 +0200 Subject: [PATCH 12/57] Fix GNU/Hurd build It has the usual Unix filesystem operations. --- .../core/include/opencv2/core/utils/filesystem.private.hpp | 3 ++- modules/core/src/utils/filesystem.cpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/core/include/opencv2/core/utils/filesystem.private.hpp b/modules/core/include/opencv2/core/utils/filesystem.private.hpp index c32be15c61..70df64f0d4 100644 --- a/modules/core/include/opencv2/core/utils/filesystem.private.hpp +++ b/modules/core/include/opencv2/core/utils/filesystem.private.hpp @@ -12,7 +12,8 @@ # elif defined WINRT || defined _WIN32_WCE /* not supported */ # elif defined __ANDROID__ || defined __linux__ || defined _WIN32 || \ - defined __FreeBSD__ || defined __bsdi__ || defined __HAIKU__ + defined __FreeBSD__ || defined __bsdi__ || defined __HAIKU__ || \ + defined __GNU__ # define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 # elif defined(__APPLE__) # include diff --git a/modules/core/src/utils/filesystem.cpp b/modules/core/src/utils/filesystem.cpp index 415323490d..24f69ccb59 100644 --- a/modules/core/src/utils/filesystem.cpp +++ b/modules/core/src/utils/filesystem.cpp @@ -34,7 +34,7 @@ #include #include #include -#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ +#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__ #include #include #include @@ -343,7 +343,7 @@ private: Impl& operator=(const Impl&); // disabled }; -#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ +#elif defined __linux__ || defined __APPLE__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__ struct FileLock::Impl { @@ -457,7 +457,7 @@ cv::String getCacheDirectory(const char* sub_directory_name, const char* configu default_cache_path = "/tmp/"; CV_LOG_WARNING(NULL, "Using world accessible cache directory. This may be not secure: " << default_cache_path); } -#elif defined __linux__ || defined __HAIKU__ || defined __FreeBSD__ +#elif defined __linux__ || defined __HAIKU__ || defined __FreeBSD__ || defined __GNU__ // https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html if (default_cache_path.empty()) { From 0dd7769bb12b518ff2d54ee6aace74db01688116 Mon Sep 17 00:00:00 2001 From: HAN Liutong Date: Fri, 11 Aug 2023 13:33:33 +0800 Subject: [PATCH 13/57] Merge pull request #23980 from hanliutong:rewrite-core Rewrite Universal Intrinsic code by using new API: Core module. #23980 The goal of this PR is to match and modify all SIMD code blocks guarded by `CV_SIMD` macro in the `opencv/modules/core` folder and rewrite them by using the new Universal Intrinsic API. The patch is almost auto-generated by using the [rewriter](https://github.com/hanliutong/rewriter), related PR #23885. Most of the files have been rewritten, but I marked this PR as draft because, the `CV_SIMD` macro also exists in the following files, and the reasons why they are not rewrited are: 1. ~~code design for fixed-size SIMD (v_int16x8, v_float32x4, etc.), need to manually rewrite.~~ Rewrited - ./modules/core/src/stat.simd.hpp - ./modules/core/src/matrix_transform.cpp - ./modules/core/src/matmul.simd.hpp 2. Vector types are wrapped in other class/struct, that are not supported by the compiler in variable-length backends. Can not be rewrited directly. - ./modules/core/src/mathfuncs_core.simd.hpp ```cpp struct v_atan_f32 { explicit v_atan_f32(const float& scale) { ... } v_float32 compute(const v_float32& y, const v_float32& x) { ... } ... v_float32 val90; // sizeless type can not used in a class v_float32 val180; v_float32 val360; v_float32 s; }; ``` 3. The API interface does not support/does not match - ./modules/core/src/norm.cpp Use `v_popcount`, ~~waiting for #23966~~ Fixed - ./modules/core/src/has_non_zero.simd.hpp Use illegal Universal Intrinsic API: For float type, there is no logical operation `|`. Further discussion needed ```cpp /** @brief Bitwise OR Only for integer types. */ template CV_INLINE v_reg<_Tp, n> operator|(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); template CV_INLINE v_reg<_Tp, n>& operator|=(v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b); ``` ```cpp #if CV_SIMD typedef v_float32 v_type; const v_type v_zero = vx_setzero_f32(); constexpr const int unrollCount = 8; int step = v_type::nlanes * unrollCount; int len0 = len & -step; const float* srcSimdEnd = src+len0; int countSIMD = static_cast((srcSimdEnd-src)/step); while(!res && countSIMD--) { v_type v0 = vx_load(src); src += v_type::nlanes; v_type v1 = vx_load(src); src += v_type::nlanes; .... src += v_type::nlanes; v0 |= v1; //Illegal ? .... //res = v_check_any(((v0 | v4) != v_zero));//beware : (NaN != 0) returns "false" since != is mapped to _CMP_NEQ_OQ and not _CMP_NEQ_UQ res = !v_check_all(((v0 | v4) == v_zero)); } v_cleanup(); #endif ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- .../opencv2/core/hal/intrin_rvv_scalable.hpp | 3 + modules/core/src/arithm.cpp | 52 +++---- modules/core/src/arithm.simd.hpp | 119 +++++++-------- modules/core/src/convert.hpp | 54 +++---- modules/core/src/convert.simd.hpp | 16 +- modules/core/src/convert_scale.simd.hpp | 30 ++-- modules/core/src/copy.cpp | 18 +-- modules/core/src/count_non_zero.simd.hpp | 72 +++++---- modules/core/src/lapack.cpp | 88 +++-------- modules/core/src/mathfuncs.cpp | 138 +++++++++--------- modules/core/src/matmul.simd.hpp | 128 ++++++++-------- modules/core/src/matrix_transform.cpp | 10 +- modules/core/src/merge.simd.hpp | 20 +-- modules/core/src/norm.cpp | 66 ++++----- modules/core/src/split.simd.hpp | 20 +-- modules/core/src/stat.simd.hpp | 26 +--- modules/core/src/sum.simd.hpp | 124 ++++++++-------- 17 files changed, 466 insertions(+), 518 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp index dab82489f8..6c28b44f5b 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp @@ -924,6 +924,9 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \ return (scalartype)v_get0(res); \ } OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32, v_float32, vfloat32m1_t, float, f32, VTraits::vlanes()) +#if CV_SIMD_SCALABLE_64F +OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64, v_float64, vfloat64m1_t, float, f64, VTraits::vlanes()) +#endif #define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 5709ec12e4..c5e561e26e 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1332,7 +1332,7 @@ struct InRange_SIMD } }; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) template <> struct InRange_SIMD @@ -1341,7 +1341,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_uint8::nlanes; + const int width = VTraits::vlanes(); for (; x <= len - width; x += width) { @@ -1349,7 +1349,7 @@ struct InRange_SIMD v_uint8 low = vx_load(src2 + x); v_uint8 high = vx_load(src3 + x); - v_store(dst + x, (values >= low) & (high >= values)); + v_store(dst + x, v_and(v_ge(values, low), v_ge(high, values))); } vx_cleanup(); return x; @@ -1363,7 +1363,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_int8::nlanes; + const int width = VTraits::vlanes(); for (; x <= len - width; x += width) { @@ -1371,7 +1371,7 @@ struct InRange_SIMD v_int8 low = vx_load(src2 + x); v_int8 high = vx_load(src3 + x); - v_store((schar*)(dst + x), (values >= low) & (high >= values)); + v_store((schar*)(dst + x), v_and(v_ge(values, low), v_ge(high, values))); } vx_cleanup(); return x; @@ -1385,7 +1385,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = v_uint16::nlanes * 2; + const int width = VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1393,11 +1393,11 @@ struct InRange_SIMD v_uint16 low1 = vx_load(src2 + x); v_uint16 high1 = vx_load(src3 + x); - v_uint16 values2 = vx_load(src1 + x + v_uint16::nlanes); - v_uint16 low2 = vx_load(src2 + x + v_uint16::nlanes); - v_uint16 high2 = vx_load(src3 + x + v_uint16::nlanes); + v_uint16 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_uint16 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_uint16 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))); + v_store(dst + x, v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2)))); } vx_cleanup(); return x; @@ -1411,7 +1411,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_int16::nlanes * 2; + const int width = (int)VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1419,11 +1419,11 @@ struct InRange_SIMD v_int16 low1 = vx_load(src2 + x); v_int16 high1 = vx_load(src3 + x); - v_int16 values2 = vx_load(src1 + x + v_int16::nlanes); - v_int16 low2 = vx_load(src2 + x + v_int16::nlanes); - v_int16 high2 = vx_load(src3 + x + v_int16::nlanes); + v_int16 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_int16 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_int16 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))); + v_store((schar*)(dst + x), v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2)))); } vx_cleanup(); return x; @@ -1437,7 +1437,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_int32::nlanes * 2; + const int width = (int)VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1445,11 +1445,11 @@ struct InRange_SIMD v_int32 low1 = vx_load(src2 + x); v_int32 high1 = vx_load(src3 + x); - v_int32 values2 = vx_load(src1 + x + v_int32::nlanes); - v_int32 low2 = vx_load(src2 + x + v_int32::nlanes); - v_int32 high2 = vx_load(src3 + x + v_int32::nlanes); + v_int32 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_int32 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_int32 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)))); + v_pack_store(dst + x, v_reinterpret_as_u16(v_pack(v_and(v_ge(values1, low1), v_ge(high1, values1)), v_and(v_ge(values2, low2), v_ge(high2, values2))))); } vx_cleanup(); return x; @@ -1463,7 +1463,7 @@ struct InRange_SIMD uchar * dst, int len) const { int x = 0; - const int width = (int)v_float32::nlanes * 2; + const int width = (int)VTraits::vlanes() * 2; for (; x <= len - width; x += width) { @@ -1471,12 +1471,12 @@ struct InRange_SIMD v_float32 low1 = vx_load(src2 + x); v_float32 high1 = vx_load(src3 + x); - v_float32 values2 = vx_load(src1 + x + v_float32::nlanes); - v_float32 low2 = vx_load(src2 + x + v_float32::nlanes); - v_float32 high2 = vx_load(src3 + x + v_float32::nlanes); + v_float32 values2 = vx_load(src1 + x + VTraits::vlanes()); + v_float32 low2 = vx_load(src2 + x + VTraits::vlanes()); + v_float32 high2 = vx_load(src3 + x + VTraits::vlanes()); - v_pack_store(dst + x, v_pack(v_reinterpret_as_u32(values1 >= low1) & v_reinterpret_as_u32(high1 >= values1), - v_reinterpret_as_u32(values2 >= low2) & v_reinterpret_as_u32(high2 >= values2))); + v_pack_store(dst + x, v_pack(v_and(v_reinterpret_as_u32(v_ge(values1, low1)), v_reinterpret_as_u32(v_ge(high1, values1))), + v_and(v_reinterpret_as_u32(v_ge(values2, low2)), v_reinterpret_as_u32(v_ge(high2, values2))))); } vx_cleanup(); return x; diff --git a/modules/core/src/arithm.simd.hpp b/modules/core/src/arithm.simd.hpp index 06ebfb7678..1c97e91fbe 100644 --- a/modules/core/src/arithm.simd.hpp +++ b/modules/core/src/arithm.simd.hpp @@ -219,7 +219,7 @@ template struct op_add { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a + b; } + { return v_add(a, b); } static inline T1 r(T1 a, T1 b) { return c_add(a, b); } }; @@ -229,7 +229,7 @@ template struct op_sub { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a - b; } + { return v_sub(a, b); } static inline T1 r(T1 a, T1 b) { return c_sub(a, b); } }; @@ -266,7 +266,7 @@ struct op_absdiff template<> struct op_absdiff { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_int8 r(const v_int8& a, const v_int8& b) { return v_absdiffs(a, b); } #endif @@ -276,7 +276,7 @@ struct op_absdiff template<> struct op_absdiff { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_int16 r(const v_int16& a, const v_int16& b) { return v_absdiffs(a, b); } #endif @@ -286,7 +286,7 @@ struct op_absdiff template<> struct op_absdiff { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_int32 r(const v_int32& a, const v_int32& b) { return v_reinterpret_as_s32(v_absdiff(a, b)); } #endif @@ -299,7 +299,7 @@ template struct op_or { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a | b; } + { return v_or(a, b); } static inline T1 r(T1 a, T1 b) { return a | b; } }; @@ -307,7 +307,7 @@ template struct op_xor { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a ^ b; } + { return v_xor(a, b); } static inline T1 r(T1 a, T1 b) { return a ^ b; } }; @@ -315,7 +315,7 @@ template struct op_and { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a & b; } + { return v_and(a, b); } static inline T1 r(T1 a, T1 b) { return a & b; } }; @@ -324,14 +324,14 @@ struct op_not { // ignored b from loader level static inline Tvec r(const Tvec& a) - { return ~a; } + { return v_not(a); } static inline T1 r(T1 a, T1) { return ~a; } }; //////////////////////////// Loaders ///////////////////////////////// -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE template< template class OP, typename T1, typename Tvec> struct bin_loader @@ -396,13 +396,13 @@ template class OP, typename T1, typename Tv static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, T1* dst, size_t step, int width, int height) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef bin_loader ldr; - enum {wide_step = Tvec::nlanes}; + const int wide_step = VTraits::vlanes(); #if !CV_NEON && CV_SIMD_WIDTH == 16 - enum {wide_step_l = wide_step * 2}; + const int wide_step_l = wide_step * 2; #else - enum {wide_step_l = wide_step}; + const int wide_step_l = wide_step; #endif #endif // CV_SIMD @@ -414,7 +414,7 @@ static void bin_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE #if !CV_NEON && !CV_MSA if (is_aligned(src1, src2, dst)) { @@ -587,7 +587,7 @@ template struct op_cmplt { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a < b; } + { return v_lt(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a < b); } }; @@ -596,7 +596,7 @@ template struct op_cmple { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a <= b; } + { return v_le(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a <= b); } }; @@ -605,7 +605,7 @@ template struct op_cmpeq { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a == b; } + { return v_eq(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a == b); } }; @@ -614,14 +614,14 @@ template struct op_cmpne { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a != b; } + { return v_ne(a, b); } static inline uchar r(T1 a, T1 b) { return (uchar)-(int)(a != b); } }; //////////////////////////// Loaders ///////////////////////////////// -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE // todo: add support for RW alignment & stream template class OP, typename T1, typename Tvec> struct cmp_loader_n @@ -646,10 +646,10 @@ template class OP, typename T1, typename Tv struct cmp_loader_n { typedef OP op; - enum {step = Tvec::nlanes}; static inline void l(const T1* src1, const T1* src2, uchar* dst) { + const int step = VTraits::vlanes(); Tvec c0 = op::r(vx_load(src1), vx_load(src2)); Tvec c1 = op::r(vx_load(src1 + step), vx_load(src2 + step)); v_store(dst, v_pack_b(v_reinterpret_as_u16(c0), v_reinterpret_as_u16(c1))); @@ -660,10 +660,10 @@ template class OP, typename T1, typename Tv struct cmp_loader_n { typedef OP op; - enum {step = Tvec::nlanes}; static inline void l(const T1* src1, const T1* src2, uchar* dst) { + const int step = VTraits::vlanes(); v_uint32 c0 = v_reinterpret_as_u32(op::r(vx_load(src1), vx_load(src2))); v_uint32 c1 = v_reinterpret_as_u32(op::r(vx_load(src1 + step), vx_load(src2 + step))); v_uint32 c2 = v_reinterpret_as_u32(op::r(vx_load(src1 + step * 2), vx_load(src2 + step * 2))); @@ -676,10 +676,10 @@ template class OP, typename T1, typename Tv struct cmp_loader_n { typedef OP op; - enum {step = Tvec::nlanes}; static inline void l(const T1* src1, const T1* src2, uchar* dst) { + const int step = VTraits::vlanes(); v_uint64 c0 = v_reinterpret_as_u64(op::r(vx_load(src1), vx_load(src2))); v_uint64 c1 = v_reinterpret_as_u64(op::r(vx_load(src1 + step), vx_load(src2 + step))); v_uint64 c2 = v_reinterpret_as_u64(op::r(vx_load(src1 + step * 2), vx_load(src2 + step * 2))); @@ -701,9 +701,9 @@ template class OP, typename T1, typename Tv static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, uchar* dst, size_t step, int width, int height) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef cmp_loader_n ldr; - enum {wide_step = Tvec::nlanes * sizeof(T1)}; + const int wide_step = VTraits::vlanes() * sizeof(T1); #endif // CV_SIMD step1 /= sizeof(T1); @@ -713,7 +713,7 @@ static void cmp_loop(const T1* src1, size_t step1, const T1* src2, size_t step2, { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE for (; x <= width - wide_step; x += wide_step) { ldr::l(src1 + x, src2 + x, dst + x); @@ -880,7 +880,7 @@ DEFINE_SIMD_ALL(cmp) //////////////////////////// Loaders /////////////////////////////// -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE // todo: add support for RW alignment & stream template class OP, typename T1, typename T2, typename Tvec> struct scalar_loader_n @@ -1013,10 +1013,10 @@ template class OP, typename T2 struct scalar_loader_n { typedef OP op; - enum {step = v_int32::nlanes}; static inline void l(const int* src1, const int* src2, const T2* scalar, int* dst) { + const int step = VTraits::vlanes(); v_int32 v_src1 = vx_load(src1); v_int32 v_src2 = vx_load(src2); v_int32 v_src1s = vx_load(src1 + step); @@ -1043,6 +1043,7 @@ struct scalar_loader_n static inline void l(const int* src1, const T2* scalar, int* dst) { + const int step = VTraits::vlanes(); v_int32 v_src1 = vx_load(src1); v_int32 v_src1s = vx_load(src1 + step); @@ -1068,10 +1069,9 @@ template class OP, typename T2 struct scalar_loader_n { typedef OP op; - enum {step = v_float32::nlanes}; - static inline void l(const float* src1, const float* src2, const T2* scalar, float* dst) { + const int step = VTraits::vlanes(); v_float32 v_src1 = vx_load(src1); v_float32 v_src2 = vx_load(src2); v_float32 v_src1s = vx_load(src1 + step); @@ -1086,6 +1086,7 @@ struct scalar_loader_n static inline void l(const float* src1, const T2* scalar, float* dst) { + const int step = VTraits::vlanes(); v_float32 v_src1 = vx_load(src1); v_float32 v_src1s = vx_load(src1 + step); @@ -1262,10 +1263,10 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste T1* dst, size_t step, int width, int height, const T2* scalar) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef scalar_loader_n ldr; - const int wide_step = sizeof(T1) > sizeof(ushort) ? Tvec::nlanes * 2 : - sizeof(T1) == sizeof(uchar) ? Tvec::nlanes / 2 : Tvec::nlanes; + const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits::vlanes() * 2 : + sizeof(T1) == sizeof(uchar) ? VTraits::vlanes() / 2 : VTraits::vlanes(); #endif // CV_SIMD step1 /= sizeof(T1); @@ -1276,7 +1277,7 @@ static void scalar_loop(const T1* src1, size_t step1, const T1* src2, size_t ste { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE for (; x <= width - wide_step; x += wide_step) { ldr::l(src1 + x, src2 + x, scalar, dst + x); @@ -1308,10 +1309,10 @@ template class OP, typename T1 static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int width, int height, const T2* scalar) { typedef OP op; -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE typedef scalar_loader_n ldr; - const int wide_step = sizeof(T1) > sizeof(ushort) ? Tvec::nlanes * 2 : - sizeof(T1) == sizeof(uchar) ? Tvec::nlanes / 2 : Tvec::nlanes; + const int wide_step = sizeof(T1) > sizeof(ushort) ? VTraits::vlanes() * 2 : + sizeof(T1) == sizeof(uchar) ? VTraits::vlanes() / 2 : VTraits::vlanes(); #endif // CV_SIMD step1 /= sizeof(T1); @@ -1321,7 +1322,7 @@ static void scalar_loop(const T1* src1, size_t step1, T1* dst, size_t step, int { int x = 0; - #if CV_SIMD + #if CV_SIMD || CV_SIMD_SCALABLE for (; x <= width - wide_step; x += wide_step) { ldr::l(src1 + x, scalar, dst + x); @@ -1428,7 +1429,7 @@ template struct op_mul { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a * b; } + { return v_mul(a, b); } static inline T1 r(T1 a, T1 b) { return saturate_cast(a * b); } }; @@ -1436,11 +1437,11 @@ struct op_mul template struct op_mul_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return v_scalar * a * b; + return v_mul(v_scalar , a , b); } #endif static inline T1 r(T1 a, T1 b, const T2* scalar) @@ -1456,7 +1457,7 @@ struct op_mul_scale static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar) { const v_float64 v_scalar = vx_setall_f64(*scalar); - return v_scalar * a * b; + return v_mul(v_mul(v_scalar, a), b); } #endif static inline double r(double a, double b, const double* scalar) @@ -1569,7 +1570,7 @@ template struct op_div_f { static inline Tvec r(const Tvec& a, const Tvec& b) - { return a / b; } + { return v_div(a, b); } static inline T1 r(T1 a, T1 b) { return a / b; } }; @@ -1577,16 +1578,16 @@ struct op_div_f template struct op_div_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return a * v_scalar / b; + return v_div(v_mul(a, v_scalar), b); } static inline Tvec pre(const Tvec& denom, const Tvec& res) { - const Tvec v_zero = vx_setall(0); - return v_select(denom == v_zero, v_zero, res); + const Tvec v_zero = vx_setall::lane_type>(0); + return v_select(v_eq(denom, v_zero), v_zero, res); } #endif static inline T1 r(T1 a, T1 denom, const T2* scalar) @@ -1599,11 +1600,11 @@ struct op_div_scale template<> struct op_div_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const float* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return a * v_scalar / b; + return v_div(v_mul(a, v_scalar), b); } #endif static inline float r(float a, float denom, const float* scalar) @@ -1617,7 +1618,7 @@ struct op_div_scale static inline v_float64 r(const v_float64& a, const v_float64& b, const double* scalar) { const v_float64 v_scalar = vx_setall_f64(*scalar); - return a * v_scalar / b; + return v_div(v_mul(a, v_scalar), b); } #endif static inline double r(double a, double denom, const double* scalar) @@ -1685,7 +1686,7 @@ DEFINE_SIMD_ALL(div, div_loop) template struct op_add_scale { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalar) { const v_float32 v_alpha = vx_setall_f32(*scalar); @@ -1718,7 +1719,7 @@ struct op_add_scale template struct op_add_weighted { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const v_float32& b, const T2* scalars) { const v_float32 v_alpha = vx_setall_f32(scalars[0]); @@ -1835,16 +1836,16 @@ DEFINE_SIMD_F64(addWeighted, add_weighted_loop_d) template struct op_recip { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const T2* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return v_scalar / a; + return v_div(v_scalar, a); } static inline Tvec pre(const Tvec& denom, const Tvec& res) { - const Tvec v_zero = vx_setall(0); - return v_select(denom == v_zero, v_zero, res); + const Tvec v_zero = vx_setall::lane_type>(0); + return v_select(v_eq(denom, v_zero), v_zero, res); } #endif static inline T1 r(T1 denom, const T2* scalar) @@ -1857,11 +1858,11 @@ struct op_recip template<> struct op_recip { -#if CV_SIMD +#if CV_SIMD || CV_SIMD_SCALABLE static inline v_float32 r(const v_float32& a, const float* scalar) { const v_float32 v_scalar = vx_setall_f32(*scalar); - return v_scalar / a; + return v_div(v_scalar, a); } #endif static inline float r(float denom, const float* scalar) @@ -1875,7 +1876,7 @@ struct op_recip static inline v_float64 r(const v_float64& a, const double* scalar) { const v_float64 v_scalar = vx_setall_f64(*scalar); - return v_scalar / a; + return v_div(v_scalar, a); } #endif static inline double r(double denom, const double* scalar) diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp index 4b9ddbb413..c689276218 100644 --- a/modules/core/src/convert.hpp +++ b/modules/core/src/convert.hpp @@ -11,7 +11,7 @@ namespace cv { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) static inline void vx_load_as(const uchar* ptr, v_float32& a) { a = v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(ptr))); } @@ -62,7 +62,7 @@ static inline void vx_load_pair_as(const schar* ptr, v_uint16& a, v_uint16& b) } static inline void vx_load_pair_as(const ushort* ptr, v_uint16& a, v_uint16& b) -{ a = vx_load(ptr); b = vx_load(ptr + v_uint16::nlanes); } +{ a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const uchar* ptr, v_int16& a, v_int16& b) { @@ -76,7 +76,7 @@ static inline void vx_load_pair_as(const schar* ptr, v_int16& a, v_int16& b) { v_expand(vx_load(ptr), a, b); } static inline void vx_load_pair_as(const short* ptr, v_int16& a, v_int16& b) -{ a = vx_load(ptr); b = vx_load(ptr + v_uint16::nlanes); } +{ a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const uchar* ptr, v_int32& a, v_int32& b) { @@ -105,7 +105,7 @@ static inline void vx_load_pair_as(const short* ptr, v_int32& a, v_int32& b) static inline void vx_load_pair_as(const int* ptr, v_int32& a, v_int32& b) { a = vx_load(ptr); - b = vx_load(ptr + v_int32::nlanes); + b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const uchar* ptr, v_float32& a, v_float32& b) @@ -142,18 +142,18 @@ static inline void vx_load_pair_as(const short* ptr, v_float32& a, v_float32& b) static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b) { - v_int32 ia = vx_load(ptr), ib = vx_load(ptr + v_int32::nlanes); + v_int32 ia = vx_load(ptr), ib = vx_load(ptr + VTraits::vlanes()); a = v_cvt_f32(ia); b = v_cvt_f32(ib); } static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b) -{ a = vx_load(ptr); b = vx_load(ptr + v_float32::nlanes); } +{ a = vx_load(ptr); b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b) { a = vx_load_expand(ptr); - b = vx_load_expand(ptr + v_float32::nlanes); + b = vx_load_expand(ptr + VTraits::vlanes()); } static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b) @@ -169,7 +169,7 @@ static inline void v_store_pair_as(schar* ptr, const v_uint16& a, const v_uint16 } static inline void v_store_pair_as(ushort* ptr, const v_uint16& a, const v_uint16& b) -{ v_store(ptr, a); v_store(ptr + v_uint16::nlanes, b); } +{ v_store(ptr, a); v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(uchar* ptr, const v_int16& a, const v_int16& b) { v_store(ptr, v_pack_u(a, b)); } @@ -178,7 +178,7 @@ static inline void v_store_pair_as(schar* ptr, const v_int16& a, const v_int16& { v_store(ptr, v_pack(a, b)); } static inline void v_store_pair_as(short* ptr, const v_int16& a, const v_int16& b) -{ v_store(ptr, a); v_store(ptr + v_int16::nlanes, b); } +{ v_store(ptr, a); v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(uchar* ptr, const v_int32& a, const v_int32& b) { v_pack_u_store(ptr, v_pack(a, b)); } @@ -195,7 +195,7 @@ static inline void v_store_pair_as(short* ptr, const v_int32& a, const v_int32& static inline void v_store_pair_as(int* ptr, const v_int32& a, const v_int32& b) { v_store(ptr, a); - v_store(ptr + v_int32::nlanes, b); + v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(uchar* ptr, const v_float32& a, const v_float32& b) @@ -214,24 +214,24 @@ static inline void v_store_pair_as(int* ptr, const v_float32& a, const v_float32 { v_int32 ia = v_round(a), ib = v_round(b); v_store(ptr, ia); - v_store(ptr + v_int32::nlanes, ib); + v_store(ptr + VTraits::vlanes(), ib); } static inline void v_store_pair_as(float* ptr, const v_float32& a, const v_float32& b) -{ v_store(ptr, a); v_store(ptr + v_float32::nlanes, b); } +{ v_store(ptr, a); v_store(ptr + VTraits::vlanes(), b); } -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) static inline void vx_load_as(const double* ptr, v_float32& a) { - v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); + v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits::vlanes()); a = v_cvt_f32(v0, v1); } static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b) { - v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); - v_float64 v2 = vx_load(ptr + v_float64::nlanes*2), v3 = vx_load(ptr + v_float64::nlanes*3); + v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits::vlanes()); + v_float64 v2 = vx_load(ptr + VTraits::vlanes()*2), v3 = vx_load(ptr + VTraits::vlanes()*3); v_int32 iv0 = v_round(v0), iv1 = v_round(v1); v_int32 iv2 = v_round(v2), iv3 = v_round(v3); a = v_combine_low(iv0, iv1); @@ -240,8 +240,8 @@ static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b) static inline void vx_load_pair_as(const double* ptr, v_float32& a, v_float32& b) { - v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); - v_float64 v2 = vx_load(ptr + v_float64::nlanes*2), v3 = vx_load(ptr + v_float64::nlanes*3); + v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + VTraits::vlanes()); + v_float64 v2 = vx_load(ptr + VTraits::vlanes()*2), v3 = vx_load(ptr + VTraits::vlanes()*3); a = v_cvt_f32(v0, v1); b = v_cvt_f32(v2, v3); } @@ -291,7 +291,7 @@ static inline void vx_load_pair_as(const float* ptr, v_float64& a, v_float64& b) static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b) { a = vx_load(ptr); - b = vx_load(ptr + v_float64::nlanes); + b = vx_load(ptr + VTraits::vlanes()); } static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b) @@ -305,7 +305,7 @@ static inline void v_store_as(double* ptr, const v_float32& a) { v_float64 fa0 = v_cvt_f64(a), fa1 = v_cvt_f64_high(a); v_store(ptr, fa0); - v_store(ptr + v_float64::nlanes, fa1); + v_store(ptr + VTraits::vlanes(), fa1); } static inline void v_store_pair_as(double* ptr, const v_int32& a, const v_int32& b) @@ -314,9 +314,9 @@ static inline void v_store_pair_as(double* ptr, const v_int32& a, const v_int32& v_float64 fb0 = v_cvt_f64(b), fb1 = v_cvt_f64_high(b); v_store(ptr, fa0); - v_store(ptr + v_float64::nlanes, fa1); - v_store(ptr + v_float64::nlanes*2, fb0); - v_store(ptr + v_float64::nlanes*3, fb1); + v_store(ptr + VTraits::vlanes(), fa1); + v_store(ptr + VTraits::vlanes()*2, fb0); + v_store(ptr + VTraits::vlanes()*3, fb1); } static inline void v_store_pair_as(double* ptr, const v_float32& a, const v_float32& b) @@ -325,15 +325,15 @@ static inline void v_store_pair_as(double* ptr, const v_float32& a, const v_floa v_float64 fb0 = v_cvt_f64(b), fb1 = v_cvt_f64_high(b); v_store(ptr, fa0); - v_store(ptr + v_float64::nlanes, fa1); - v_store(ptr + v_float64::nlanes*2, fb0); - v_store(ptr + v_float64::nlanes*3, fb1); + v_store(ptr + VTraits::vlanes(), fa1); + v_store(ptr + VTraits::vlanes()*2, fb0); + v_store(ptr + VTraits::vlanes()*3, fb1); } static inline void v_store_pair_as(double* ptr, const v_float64& a, const v_float64& b) { v_store(ptr, a); - v_store(ptr + v_float64::nlanes, b); + v_store(ptr + VTraits::vlanes(), b); } static inline void v_store_pair_as(int* ptr, const v_float64& a, const v_float64& b) diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp index 5154041b6d..c126450a13 100644 --- a/modules/core/src/convert.simd.hpp +++ b/modules/core/src/convert.simd.hpp @@ -39,8 +39,8 @@ void cvt16f32f( const float16_t* src, float* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; -#if CV_SIMD - const int VECSZ = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits::vlanes(); for( ; j < len; j += VECSZ ) { if( j > len - VECSZ ) @@ -60,8 +60,8 @@ void cvt32f16f( const float* src, float16_t* dst, int len ) { CV_INSTRUMENT_REGION(); int j = 0; -#if CV_SIMD - const int VECSZ = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits::vlanes(); for( ; j < len; j += VECSZ ) { if( j > len - VECSZ ) @@ -108,8 +108,8 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD - const int VECSZ = _Twvec::nlanes*2; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits<_Twvec>::vlanes()*2; for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -139,8 +139,8 @@ cvt1_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD - const int VECSZ = _Twvec::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + const int VECSZ = VTraits<_Twvec>::vlanes(); for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) diff --git a/modules/core/src/convert_scale.simd.hpp b/modules/core/src/convert_scale.simd.hpp index 2c6d55462b..c79a33f1b1 100644 --- a/modules/core/src/convert_scale.simd.hpp +++ b/modules/core/src/convert_scale.simd.hpp @@ -22,9 +22,9 @@ template inline void cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -32,7 +32,7 @@ cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -58,9 +58,9 @@ template inline void cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -68,7 +68,7 @@ cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -92,9 +92,9 @@ template inline void cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -102,7 +102,7 @@ cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -128,9 +128,9 @@ template inline void cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes; + const int VECSZ = VTraits::vlanes(); #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -138,7 +138,7 @@ cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) @@ -163,9 +163,9 @@ template inline void cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, double a, double b ) { -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) v_float64 va = vx_setall_f64(a), vb = vx_setall_f64(b); - const int VECSZ = v_float64::nlanes*2; + const int VECSZ = VTraits::vlanes()*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); @@ -173,7 +173,7 @@ cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 89948fb878..aab4fbd3f0 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -156,15 +156,15 @@ copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mste const uchar* src = (const uchar*)_src; uchar* dst = (uchar*)_dst; int x = 0; - #if CV_SIMD + #if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint8 v_zero = vx_setzero_u8(); - for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes ) + for( ; x <= size.width - VTraits::vlanes(); x += VTraits::vlanes() ) { v_uint8 v_src = vx_load(src + x), v_dst = vx_load(dst + x), - v_nmask = vx_load(mask + x) == v_zero; + v_nmask = v_eq(vx_load(mask + x), v_zero); v_dst = v_select(v_nmask, v_dst, v_src); v_store(dst + x, v_dst); @@ -188,23 +188,23 @@ copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mst const ushort* src = (const ushort*)_src; ushort* dst = (ushort*)_dst; int x = 0; - #if CV_SIMD + #if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint8 v_zero = vx_setzero_u8(); - for( ; x <= size.width - v_uint8::nlanes; x += v_uint8::nlanes ) + for( ; x <= size.width - VTraits::vlanes(); x += VTraits::vlanes() ) { - v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + v_uint16::nlanes), - v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + v_uint16::nlanes); + v_uint16 v_src1 = vx_load(src + x), v_src2 = vx_load(src + x + VTraits::vlanes()), + v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + VTraits::vlanes()); v_uint8 v_nmask1, v_nmask2; - v_uint8 v_nmask = vx_load(mask + x) == v_zero; + v_uint8 v_nmask = v_eq(vx_load(mask + x), v_zero); v_zip(v_nmask, v_nmask, v_nmask1, v_nmask2); v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1); v_dst2 = v_select(v_reinterpret_as_u16(v_nmask2), v_dst2, v_src2); v_store(dst + x, v_dst1); - v_store(dst + x + v_uint16::nlanes, v_dst2); + v_store(dst + x + VTraits::vlanes(), v_dst2); } } vx_cleanup(); diff --git a/modules/core/src/count_non_zero.simd.hpp b/modules/core/src/count_non_zero.simd.hpp index 6994564127..ce7c75aa54 100644 --- a/modules/core/src/count_non_zero.simd.hpp +++ b/modules/core/src/count_non_zero.simd.hpp @@ -32,8 +32,8 @@ static int countNonZero_(const T* src, int len ) static int countNonZero8u( const uchar* src, int len ) { int i=0, nz = 0; -#if CV_SIMD - int len0 = len & -v_uint8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_uint8 v_zero = vx_setzero_u8(); v_uint8 v_one = vx_setall_u8(1); @@ -42,20 +42,20 @@ static int countNonZero8u( const uchar* src, int len ) { v_uint16 v_sum16 = vx_setzero_u16(); int j = i; - while (j < std::min(len0, i + 65280 * v_uint16::nlanes)) + while (j < std::min(len0, i + 65280 * VTraits::vlanes())) { v_uint8 v_sum8 = vx_setzero_u8(); int k = j; - for (; k < std::min(len0, j + 255 * v_uint8::nlanes); k += v_uint8::nlanes) - v_sum8 += v_one & (vx_load(src + k) == v_zero); + for (; k < std::min(len0, j + 255 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_eq(vx_load(src + k), v_zero))); v_uint16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_uint32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -69,8 +69,8 @@ static int countNonZero8u( const uchar* src, int len ) static int countNonZero16u( const ushort* src, int len ) { int i = 0, nz = 0; -#if CV_SIMD - int len0 = len & -v_int8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_uint16 v_zero = vx_setzero_u16(); v_int8 v_one = vx_setall_s8(1); @@ -79,20 +79,20 @@ static int countNonZero16u( const ushort* src, int len ) { v_int16 v_sum16 = vx_setzero_s16(); int j = i; - while (j < std::min(len0, i + 32766 * v_int16::nlanes)) + while (j < std::min(len0, i + 32766 * VTraits::vlanes())) { v_int8 v_sum8 = vx_setzero_s8(); int k = j; - for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) - v_sum8 += v_one & v_pack(v_reinterpret_as_s16(vx_load(src + k) == v_zero), v_reinterpret_as_s16(vx_load(src + k + v_uint16::nlanes) == v_zero)); + for (; k < std::min(len0, j + 127 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_reinterpret_as_s16(v_eq(vx_load(src + k), v_zero)), v_reinterpret_as_s16(v_eq(vx_load(src + k + VTraits::vlanes()), v_zero))))); v_int16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_int32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -104,8 +104,8 @@ static int countNonZero16u( const ushort* src, int len ) static int countNonZero32s( const int* src, int len ) { int i = 0, nz = 0; -#if CV_SIMD - int len0 = len & -v_int8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_int32 v_zero = vx_setzero_s32(); v_int8 v_one = vx_setall_s8(1); @@ -114,23 +114,20 @@ static int countNonZero32s( const int* src, int len ) { v_int16 v_sum16 = vx_setzero_s16(); int j = i; - while (j < std::min(len0, i + 32766 * v_int16::nlanes)) + while (j < std::min(len0, i + 32766 * VTraits::vlanes())) { v_int8 v_sum8 = vx_setzero_s8(); int k = j; - for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) - v_sum8 += v_one & v_pack( - v_pack(vx_load(src + k ) == v_zero, vx_load(src + k + v_int32::nlanes) == v_zero), - v_pack(vx_load(src + k + 2*v_int32::nlanes) == v_zero, vx_load(src + k + 3*v_int32::nlanes) == v_zero) - ); + for (; k < std::min(len0, j + 127 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_pack(v_eq(vx_load(src + k), v_zero), v_eq(vx_load(src + k + VTraits::vlanes()), v_zero)), v_pack(v_eq(vx_load(src + k + 2 * VTraits::vlanes()), v_zero), v_eq(vx_load(src + k + 3 * VTraits::vlanes()), v_zero))))); v_int16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_int32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -142,8 +139,8 @@ static int countNonZero32s( const int* src, int len ) static int countNonZero32f( const float* src, int len ) { int i = 0, nz = 0; -#if CV_SIMD - int len0 = len & -v_int8::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(); v_float32 v_zero = vx_setzero_f32(); v_int8 v_one = vx_setall_s8(1); @@ -152,23 +149,20 @@ static int countNonZero32f( const float* src, int len ) { v_int16 v_sum16 = vx_setzero_s16(); int j = i; - while (j < std::min(len0, i + 32766 * v_int16::nlanes)) + while (j < std::min(len0, i + 32766 * VTraits::vlanes())) { v_int8 v_sum8 = vx_setzero_s8(); int k = j; - for (; k < std::min(len0, j + 127 * v_int8::nlanes); k += v_int8::nlanes) - v_sum8 += v_one & v_pack( - v_pack(v_reinterpret_as_s32(vx_load(src + k ) == v_zero), v_reinterpret_as_s32(vx_load(src + k + v_float32::nlanes) == v_zero)), - v_pack(v_reinterpret_as_s32(vx_load(src + k + 2*v_float32::nlanes) == v_zero), v_reinterpret_as_s32(vx_load(src + k + 3*v_float32::nlanes) == v_zero)) - ); + for (; k < std::min(len0, j + 127 * VTraits::vlanes()); k += VTraits::vlanes()) + v_sum8 = v_add(v_sum8, v_and(v_one, v_pack(v_pack(v_reinterpret_as_s32(v_eq(vx_load(src + k), v_zero)), v_reinterpret_as_s32(v_eq(vx_load(src + k + VTraits::vlanes()), v_zero))), v_pack(v_reinterpret_as_s32(v_eq(vx_load(src + k + 2 * VTraits::vlanes()), v_zero)), v_reinterpret_as_s32(v_eq(vx_load(src + k + 3 * VTraits::vlanes()), v_zero)))))); v_int16 part1, part2; v_expand(v_sum8, part1, part2); - v_sum16 += part1 + part2; + v_sum16 = v_add(v_sum16, v_add(part1, part2)); j = k; } v_int32 part1, part2; v_expand(v_sum16, part1, part2); - v_sum32 += part1 + part2; + v_sum32 = v_add(v_sum32, v_add(part1, part2)); i = j; } nz = i - v_reduce_sum(v_sum32); @@ -180,21 +174,21 @@ static int countNonZero32f( const float* src, int len ) static int countNonZero64f( const double* src, int len ) { int nz = 0, i = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) v_int64 sum1 = vx_setzero_s64(); v_int64 sum2 = vx_setzero_s64(); v_float64 zero = vx_setzero_f64(); - int step = v_float64::nlanes * 2; + int step = VTraits::vlanes() * 2; int len0 = len & -step; for(i = 0; i < len0; i += step ) { - sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero); - sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero); + sum1 = v_add(sum1, v_reinterpret_as_s64(v_eq(vx_load(&src[i]), zero))); + sum2 = v_add(sum2, v_reinterpret_as_s64(v_eq(vx_load(&src[i + step / 2]), zero))); } // N.B the value is incremented by -1 (0xF...F) for each value - nz = i + (int)v_reduce_sum(sum1 + sum2); + nz = i + (int)v_reduce_sum(v_add(sum1, sum2)); v_cleanup(); #endif return nz + countNonZero_(src + i, len - i); diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index a644fe15a7..43c6d07d58 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -274,22 +274,21 @@ template struct VBLAS { int dot(const T*, const T*, int, T*) const { return 0; } int givens(T*, T*, int, T, T) const { return 0; } - int givensx(T*, T*, int, T, T, T*, T*) const { return 0; } }; -#if CV_SIMD +#if CV_SIMD // TODO: enable for CV_SIMD_SCALABLE_64F template<> inline int VBLAS::dot(const float* a, const float* b, int n, float* result) const { - if( n < 2*v_float32::nlanes ) + if( n < 2*VTraits::vlanes() ) return 0; int k = 0; v_float32 s0 = vx_setzero_f32(); - for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float32 a0 = vx_load(a + k); v_float32 b0 = vx_load(b + k); - s0 += a0 * b0; + s0 = v_add(s0, v_mul(a0, b0)); } *result = v_reduce_sum(s0); vx_cleanup(); @@ -299,16 +298,16 @@ template<> inline int VBLAS::dot(const float* a, const float* b, int n, f template<> inline int VBLAS::givens(float* a, float* b, int n, float c, float s) const { - if( n < v_float32::nlanes) + if( n < VTraits::vlanes()) return 0; int k = 0; v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s); - for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float32 a0 = vx_load(a + k); v_float32 b0 = vx_load(b + k); - v_float32 t0 = (a0 * c4) + (b0 * s4); - v_float32 t1 = (b0 * c4) - (a0 * s4); + v_float32 t0 = v_add(v_mul(a0, c4), v_mul(b0, s4)); + v_float32 t1 = v_sub(v_mul(b0, c4), v_mul(a0, s4)); v_store(a + k, t0); v_store(b + k, t1); } @@ -317,44 +316,19 @@ template<> inline int VBLAS::givens(float* a, float* b, int n, float c, f } -template<> inline int VBLAS::givensx(float* a, float* b, int n, float c, float s, - float* anorm, float* bnorm) const -{ - if( n < v_float32::nlanes) - return 0; - int k = 0; - v_float32 c4 = vx_setall_f32(c), s4 = vx_setall_f32(s); - v_float32 sa = vx_setzero_f32(), sb = vx_setzero_f32(); - for( ; k <= n - v_float32::nlanes; k += v_float32::nlanes ) - { - v_float32 a0 = vx_load(a + k); - v_float32 b0 = vx_load(b + k); - v_float32 t0 = (a0 * c4) + (b0 * s4); - v_float32 t1 = (b0 * c4) - (a0 * s4); - v_store(a + k, t0); - v_store(b + k, t1); - sa += t0 + t0; - sb += t1 + t1; - } - *anorm = v_reduce_sum(sa); - *bnorm = v_reduce_sum(sb); - vx_cleanup(); - return k; -} - -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) template<> inline int VBLAS::dot(const double* a, const double* b, int n, double* result) const { - if( n < 2*v_float64::nlanes ) + if( n < 2*VTraits::vlanes() ) return 0; int k = 0; v_float64 s0 = vx_setzero_f64(); - for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float64 a0 = vx_load(a + k); v_float64 b0 = vx_load(b + k); - s0 += a0 * b0; + s0 = v_add(s0, v_mul(a0, b0)); } double sbuf[2]; v_store(sbuf, s0); @@ -368,12 +342,12 @@ template<> inline int VBLAS::givens(double* a, double* b, int n, double { int k = 0; v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s); - for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes ) + for( ; k <= n - VTraits::vlanes(); k += VTraits::vlanes() ) { v_float64 a0 = vx_load(a + k); v_float64 b0 = vx_load(b + k); - v_float64 t0 = (a0 * c2) + (b0 * s2); - v_float64 t1 = (b0 * c2) - (a0 * s2); + v_float64 t0 = v_add(v_mul(a0, c2), v_mul(b0, s2)); + v_float64 t1 = v_sub(v_mul(b0, c2), v_mul(a0, s2)); v_store(a + k, t0); v_store(b + k, t1); } @@ -382,30 +356,6 @@ template<> inline int VBLAS::givens(double* a, double* b, int n, double } -template<> inline int VBLAS::givensx(double* a, double* b, int n, double c, double s, - double* anorm, double* bnorm) const -{ - int k = 0; - v_float64 c2 = vx_setall_f64(c), s2 = vx_setall_f64(s); - v_float64 sa = vx_setzero_f64(), sb = vx_setzero_f64(); - for( ; k <= n - v_float64::nlanes; k += v_float64::nlanes ) - { - v_float64 a0 = vx_load(a + k); - v_float64 b0 = vx_load(b + k); - v_float64 t0 = (a0 * c2) + (b0 * s2); - v_float64 t1 = (b0 * c2) - (a0 * s2); - v_store(a + k, t0); - v_store(b + k, t1); - sa += t0 * t0; - sb += t1 * t1; - } - double abuf[2], bbuf[2]; - v_store(abuf, sa); - v_store(bbuf, sb); - *anorm = abuf[0] + abuf[1]; - *bnorm = bbuf[0] + bbuf[1]; - return k; -} #endif //CV_SIMD_64F #endif //CV_SIMD @@ -916,7 +866,7 @@ double invert( InputArray _src, OutputArray _dst, int method ) #if CV_SIMD128 const float d_32f = (float)d; const v_float32x4 d_vec(d_32f, -d_32f, -d_32f, d_32f); - v_float32x4 s0 = v_load_halves((const float*)srcdata, (const float*)(srcdata + srcstep)) * d_vec;//0123//3120 + v_float32x4 s0 = v_mul(v_load_halves((const float *)srcdata, (const float *)(srcdata + srcstep)), d_vec);//0123//3120 s0 = v_extract<3>(s0, v_combine_low(v_rotate_right<1>(s0), s0)); v_store_low((float*)dstdata, s0); v_store_high((float*)(dstdata + dststep), s0); @@ -942,10 +892,10 @@ double invert( InputArray _src, OutputArray _dst, int method ) d = 1./d; #if CV_SIMD128_64F v_float64x2 det = v_setall_f64(d); - v_float64x2 s0 = v_load((const double*)srcdata) * det; - v_float64x2 s1 = v_load((const double*)(srcdata+srcstep)) * det; + v_float64x2 s0 = v_mul(v_load((const double *)srcdata), det); + v_float64x2 s1 = v_mul(v_load((const double *)(srcdata + srcstep)), det); v_float64x2 sm = v_extract<1>(s1, s0);//30 - v_float64x2 ss = v_setall(0) - v_extract<1>(s0, s1);//12 + v_float64x2 ss = v_sub(v_setall(0), v_extract<1>(s0, s1));//12 v_store((double*)dstdata, v_combine_low(sm, ss));//31 v_store((double*)(dstdata + dststep), v_combine_high(ss, sm));//20 #else diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 056be63a71..9e3a1dbad2 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -614,13 +614,13 @@ void polarToCart( InputArray src1, InputArray src2, { k = 0; -#if CV_SIMD - int cWidth = v_float32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int cWidth = VTraits::vlanes(); for( ; k <= len - cWidth; k += cWidth ) { v_float32 v_m = vx_load(mag + k); - v_store(x + k, vx_load(x + k) * v_m); - v_store(y + k, vx_load(y + k) * v_m); + v_store(x + k, v_mul(vx_load(x + k), v_m)); + v_store(y + k, v_mul(vx_load(y + k), v_m)); } vx_cleanup(); #endif @@ -741,7 +741,7 @@ struct iPow_SIMD } }; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) template <> struct iPow_SIMD @@ -751,7 +751,7 @@ struct iPow_SIMD int i = 0; v_uint32 v_1 = vx_setall_u32(1u); - for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint32 v_a1 = v_1, v_a2 = v_1; v_uint16 v = vx_load_expand(src + i); @@ -763,16 +763,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_pack_store(dst + i, v); @@ -791,7 +791,7 @@ struct iPow_SIMD int i = 0; v_int32 v_1 = vx_setall_s32(1); - for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_int32 v_a1 = v_1, v_a2 = v_1; v_int16 v = vx_load_expand(src + i); @@ -803,16 +803,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_pack_store(dst + i, v); @@ -831,7 +831,7 @@ struct iPow_SIMD int i = 0; v_uint32 v_1 = vx_setall_u32(1u); - for ( ; i <= len - v_uint16::nlanes; i += v_uint16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint32 v_a1 = v_1, v_a2 = v_1; v_uint16 v = vx_load(src + i); @@ -843,16 +843,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_store(dst + i, v); @@ -871,7 +871,7 @@ struct iPow_SIMD int i = 0; v_int32 v_1 = vx_setall_s32(1); - for ( ; i <= len - v_int16::nlanes; i += v_int16::nlanes) + for ( ; i <= len - VTraits::vlanes(); i += VTraits::vlanes()) { v_int32 v_a1 = v_1, v_a2 = v_1; v_int16 v = vx_load(src + i); @@ -883,16 +883,16 @@ struct iPow_SIMD { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v = v_pack(v_a1, v_a2); v_store(dst + i, v); @@ -911,29 +911,29 @@ struct iPow_SIMD int i = 0; v_int32 v_1 = vx_setall_s32(1); - for ( ; i <= len - v_int32::nlanes*2; i += v_int32::nlanes*2) + for ( ; i <= len - VTraits::vlanes()*2; i += VTraits::vlanes()*2) { v_int32 v_a1 = v_1, v_a2 = v_1; - v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_int32::nlanes); + v_int32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits::vlanes()); int p = power; while( p > 1 ) { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v_store(dst + i, v_a1); - v_store(dst + i + v_int32::nlanes, v_a2); + v_store(dst + i + VTraits::vlanes(), v_a2); } vx_cleanup(); @@ -949,34 +949,34 @@ struct iPow_SIMD int i = 0; v_float32 v_1 = vx_setall_f32(1.f); - for ( ; i <= len - v_float32::nlanes*2; i += v_float32::nlanes*2) + for ( ; i <= len - VTraits::vlanes()*2; i += VTraits::vlanes()*2) { v_float32 v_a1 = v_1, v_a2 = v_1; - v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float32::nlanes); + v_float32 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits::vlanes()); int p = std::abs(power); if( power < 0 ) { - v_b1 = v_1 / v_b1; - v_b2 = v_1 / v_b2; + v_b1 = v_div(v_1, v_b1); + v_b2 = v_div(v_1, v_b2); } while( p > 1 ) { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v_store(dst + i, v_a1); - v_store(dst + i + v_float32::nlanes, v_a2); + v_store(dst + i + VTraits::vlanes(), v_a2); } vx_cleanup(); @@ -984,7 +984,7 @@ struct iPow_SIMD } }; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) template <> struct iPow_SIMD { @@ -993,34 +993,34 @@ struct iPow_SIMD int i = 0; v_float64 v_1 = vx_setall_f64(1.); - for ( ; i <= len - v_float64::nlanes*2; i += v_float64::nlanes*2) + for ( ; i <= len - VTraits::vlanes()*2; i += VTraits::vlanes()*2) { v_float64 v_a1 = v_1, v_a2 = v_1; - v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + v_float64::nlanes); + v_float64 v_b1 = vx_load(src + i), v_b2 = vx_load(src + i + VTraits::vlanes()); int p = std::abs(power); if( power < 0 ) { - v_b1 = v_1 / v_b1; - v_b2 = v_1 / v_b2; + v_b1 = v_div(v_1, v_b1); + v_b2 = v_div(v_1, v_b2); } while( p > 1 ) { if (p & 1) { - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); } - v_b1 *= v_b1; - v_b2 *= v_b2; + v_b1 = v_mul(v_b1, v_b1); + v_b2 = v_mul(v_b2, v_b2); p >>= 1; } - v_a1 *= v_b1; - v_a2 *= v_b2; + v_a1 = v_mul(v_a1, v_b1); + v_a2 = v_mul(v_a2, v_b2); v_store(dst + i, v_a1); - v_store(dst + i + v_float64::nlanes, v_a2); + v_store(dst + i + VTraits::vlanes(), v_a2); } vx_cleanup(); @@ -1614,7 +1614,7 @@ void patchNaNs( InputOutputArray _a, double _val ) Cv32suf val; val.f = (float)_val; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000); v_int32 v_val = vx_setall_s32(val.i); #endif @@ -1624,12 +1624,12 @@ void patchNaNs( InputOutputArray _a, double _val ) int* tptr = ptrs[0]; size_t j = 0; -#if CV_SIMD - size_t cWidth = (size_t)v_int32::nlanes; +#if (CV_SIMD || CV_SIMD_SCALABLE) + size_t cWidth = (size_t)VTraits::vlanes(); for ( ; j + cWidth <= len; j += cWidth) { v_int32 v_src = vx_load(tptr + j); - v_int32 v_cmp_mask = v_mask2 < (v_src & v_mask1); + v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1)); v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src); v_store(tptr + j, v_dst); } diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp index 5a7f36d12b..62aacc0d63 100644 --- a/modules/core/src/matmul.simd.hpp +++ b/modules/core/src/matmul.simd.hpp @@ -1454,7 +1454,7 @@ transform_( const T* src, T* dst, const WT* m, int len, int scn, int dcn ) static void transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) const int BITS = 10, SCALE = 1 << BITS; const float MAX_M = (float)(1 << (15 - BITS)); @@ -1485,7 +1485,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in v_int32 m10 = vx_setall_s32(m32[4]); v_int32 m11 = vx_setall_s32(m32[5]); int x = 0; - for (; x <= (len - v_uint8::nlanes) * nChannels; x += v_uint8::nlanes * nChannels) + for (; x <= (len - VTraits::vlanes()) * nChannels; x += VTraits::vlanes() * nChannels) { v_uint8 b, g, r; v_load_deinterleave(src + x, b, g, r); @@ -1499,20 +1499,20 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in v_int32 p1, p3; v_expand(bgl, p0, p2); v_expand(v_reinterpret_as_s16(rl), p1, p3); - dbl = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m01) + p1 * m2 + m3, - v_dotprod(v_reinterpret_as_s16(p2), m01) + p3 * m2 + m3); - dgl = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m45) + p1 * m6 + m7, - v_dotprod(v_reinterpret_as_s16(p2), m45) + p3 * m6 + m7); - drl = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m89) + p1 * m10 + m11, - v_dotprod(v_reinterpret_as_s16(p2), m89) + p3 * m10 + m11); + dbl = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m01), v_mul(p1, m2)), m3), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m01), v_mul(p3, m2)), m3)); + dgl = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m45), v_mul(p1, m6)), m7), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m45), v_mul(p3, m6)), m7)); + drl = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m89), v_mul(p1, m10)), m11), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m89), v_mul(p3, m10)), m11)); v_expand(bgh, p0, p2); v_expand(v_reinterpret_as_s16(rh), p1, p3); - dbh = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m01) + p1 * m2 + m3, - v_dotprod(v_reinterpret_as_s16(p2), m01) + p3 * m2 + m3); - dgh = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m45) + p1 * m6 + m7, - v_dotprod(v_reinterpret_as_s16(p2), m45) + p3 * m6 + m7); - drh = v_rshr_pack(v_dotprod(v_reinterpret_as_s16(p0), m89) + p1 * m10 + m11, - v_dotprod(v_reinterpret_as_s16(p2), m89) + p3 * m10 + m11); + dbh = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m01), v_mul(p1, m2)), m3), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m01), v_mul(p3, m2)), m3)); + dgh = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m45), v_mul(p1, m6)), m7), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m45), v_mul(p3, m6)), m7)); + drh = v_rshr_pack(v_add(v_add(v_dotprod(v_reinterpret_as_s16(p0), m89), v_mul(p1, m10)), m11), + v_add(v_add(v_dotprod(v_reinterpret_as_s16(p2), m89), v_mul(p3, m10)), m11)); v_store_interleave(dst + x, v_pack_u(dbl, dbh), v_pack_u(dgl, dgh), v_pack_u(drl, drh)); } m32[1] = saturate_cast((m[3] + 0.5f)*SCALE); @@ -1537,7 +1537,7 @@ transform_8u( const uchar* src, uchar* dst, const float* m, int len, int scn, in static void transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) if( scn == 3 && dcn == 3 ) { int x = 0; @@ -1555,7 +1555,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, v_float32 m10 = vx_setall_f32(m[10]); v_float32 m11 = vx_setall_f32(m[11] - 32768.f); v_int16 delta = vx_setall_s16(-32768); - for (; x <= (len - v_uint16::nlanes)*3; x += v_uint16::nlanes*3) + for (; x <= (len - VTraits::vlanes())*3; x += VTraits::vlanes()*3) { v_uint16 b, g, r; v_load_deinterleave(src + x, b, g, r); @@ -1574,6 +1574,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, v_store_interleave(dst + x, v_reinterpret_as_u16(db), v_reinterpret_as_u16(dg), v_reinterpret_as_u16(dr)); } #endif +#if CV_SIMD128 v_float32x4 _m0l(m[0], m[4], m[ 8], 0.f); v_float32x4 _m1l(m[1], m[5], m[ 9], 0.f); v_float32x4 _m2l(m[2], m[6], m[10], 0.f); @@ -1587,6 +1588,7 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, v_store(dst + x, v_rotate_right<1>(v_reinterpret_as_u16(v_add_wrap(v_pack( v_round(v_matmuladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand(src + x ))), _m0h, _m1h, _m2h, _m3h)), v_round(v_matmuladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand(src + x + 3))), _m0l, _m1l, _m2l, _m3l))), _delta)))); +#endif //CV_SIMD128 for( ; x < len * 3; x += 3 ) { float v0 = src[x], v1 = src[x + 1], v2 = src[x + 2]; @@ -1606,25 +1608,25 @@ transform_16u( const ushort* src, ushort* dst, const float* m, int len, int scn, static void transform_32f( const float* src, float* dst, const float* m, int len, int scn, int dcn ) { -#if CV_SIMD && !defined(__aarch64__) && !defined(_M_ARM64) +#if (CV_SIMD || CV_SIMD_SCALABLE) && !defined(__aarch64__) && !defined(_M_ARM64) int x = 0; if( scn == 3 && dcn == 3 ) { - int idx[v_float32::nlanes/2]; - for( int i = 0; i < v_float32::nlanes/4; i++ ) + int idx[VTraits::max_nlanes/2]; + for( int i = 0; i < VTraits::vlanes()/4; i++ ) { idx[i] = 3*i; - idx[i + v_float32::nlanes/4] = 0; + idx[i + VTraits::vlanes()/4] = 0; } float _m[] = { m[0], m[4], m[ 8], 0.f, m[1], m[5], m[ 9], 0.f, m[2], m[6], m[10], 0.f, m[3], m[7], m[11], 0.f }; - v_float32 m0 = vx_lut_quads(_m , idx + v_float32::nlanes/4); - v_float32 m1 = vx_lut_quads(_m + 4, idx + v_float32::nlanes/4); - v_float32 m2 = vx_lut_quads(_m + 8, idx + v_float32::nlanes/4); - v_float32 m3 = vx_lut_quads(_m + 12, idx + v_float32::nlanes/4); - for( ; x <= len*3 - v_float32::nlanes; x += 3*v_float32::nlanes/4 ) + v_float32 m0 = vx_lut_quads(_m , idx + VTraits::vlanes()/4); + v_float32 m1 = vx_lut_quads(_m + 4, idx + VTraits::vlanes()/4); + v_float32 m2 = vx_lut_quads(_m + 8, idx + VTraits::vlanes()/4); + v_float32 m3 = vx_lut_quads(_m + 12, idx + VTraits::vlanes()/4); + for( ; x <= len*3 - VTraits::vlanes(); x += 3*VTraits::vlanes()/4 ) v_store(dst + x, v_pack_triplets(v_matmuladd(vx_lut_quads(src + x, idx), m0, m1, m2, m3))); for( ; x < len*3; x += 3 ) { @@ -1641,8 +1643,8 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i if( scn == 4 && dcn == 4 ) { #if CV_SIMD_WIDTH > 16 - int idx[v_float32::nlanes/4]; - for( int i = 0; i < v_float32::nlanes/4; i++ ) + int idx[VTraits::max_nlanes/4]; + for( int i = 0; i < VTraits::vlanes()/4; i++ ) idx[i] = 0; float _m[] = { m[4], m[9], m[14], m[19] }; v_float32 m0 = vx_lut_quads(m , idx); @@ -1650,12 +1652,13 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i v_float32 m2 = vx_lut_quads(m+10, idx); v_float32 m3 = vx_lut_quads(m+15, idx); v_float32 m4 = vx_lut_quads(_m, idx); - for( ; x <= len*4 - v_float32::nlanes; x += v_float32::nlanes ) + for( ; x <= len*4 - VTraits::vlanes(); x += VTraits::vlanes() ) { v_float32 v_src = vx_load(src + x); - v_store(dst + x, v_reduce_sum4(v_src * m0, v_src * m1, v_src * m2, v_src * m3) + m4); + v_store(dst + x, v_add(v_reduce_sum4(v_mul(v_src, m0), v_mul(v_src, m1), v_mul(v_src, m2), v_mul(v_src, m3)), m4)); } #endif +#if CV_SIMD128 v_float32x4 _m0 = v_load(m ); v_float32x4 _m1 = v_load(m + 5); v_float32x4 _m2 = v_load(m + 10); @@ -1666,6 +1669,17 @@ transform_32f( const float* src, float* dst, const float* m, int len, int scn, i v_float32x4 v_src = v_load(src + x); v_store(dst + x, v_reduce_sum4(v_src * _m0, v_src * _m1, v_src * _m2, v_src * _m3) + _m4); } +#else // CV_SIMD_WIDTH >= 16 && !CV_SIMD128 + for( ; x < len*4; x += 4 ) + { + float v0 = src[x], v1 = src[x+1], v2 = src[x+2], v3 = src[x+3]; + float t0 = saturate_cast(m[0]*v0 + m[1]*v1 + m[ 2]*v2 + m[ 3]*v3 + m[ 4]); + float t1 = saturate_cast(m[5]*v0 + m[6]*v1 + m[ 7]*v2 + m[ 8]*v3 + m[ 9]); + float t2 = saturate_cast(m[10]*v0 + m[11]*v1 + m[12]*v2 + m[13]*v3 + m[14]); + float t3 = saturate_cast(m[15]*v0 + m[16]*v1 + m[17]*v2 + m[18]*v3 + m[19]); + dst[x] = t0; dst[x+1] = t1; dst[x+2] = t2; dst[x+3] = t3; + } +#endif vx_cleanup(); return; } @@ -1936,9 +1950,9 @@ static void scaleAdd_32f(const float* src1, const float* src2, float* dst, { float alpha = *_alpha; int i = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 v_alpha = vx_setall_f32(alpha); - const int cWidth = v_float32::nlanes; + const int cWidth = VTraits::vlanes(); for (; i <= len - cWidth; i += cWidth) v_store(dst + i, v_muladd(vx_load(src1 + i), v_alpha, vx_load(src2 + i))); vx_cleanup(); @@ -1953,9 +1967,9 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst, { double alpha = *_alpha; int i = 0; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) v_float64 a2 = vx_setall_f64(alpha); - const int cWidth = v_float64::nlanes; + const int cWidth = VTraits::vlanes(); for (; i <= len - cWidth; i += cWidth) v_store(dst + i, v_muladd(vx_load(src1 + i), a2, vx_load(src2 + i))); vx_cleanup(); @@ -2078,7 +2092,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double deltastep = deltastep ? 4 : 0; } -#if CV_SIMD_64F +#if CV_SIMD128_64F v_float64x2 v_scale = v_setall_f64(scale); #endif @@ -2090,7 +2104,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double for( j = i; j <= size.width - 4; j += 4 ) { -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64(); @@ -2150,7 +2164,7 @@ MulTransposedR(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double for( j = i; j <= size.width - 4; j += 4 ) { -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { v_float64x2 s0 = v_setzero_f64(), s1 = v_setzero_f64(); @@ -2227,7 +2241,7 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double double s = 0; const sT *tsrc1 = src + i*srcstep; const sT *tsrc2 = src + j*srcstep; -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { const double *v_tsrc1 = (double *)(tsrc1); @@ -2280,7 +2294,7 @@ MulTransposedL(const Mat& srcmat, const Mat& dstmat, const Mat& deltamat, double delta_buf[2] = delta_buf[3] = tdelta2[0]; tdelta2 = delta_buf; } -#if CV_SIMD_64F +#if CV_SIMD128_64F if (DataType::depth == CV_64F && DataType
::depth == CV_64F) { const double *v_tsrc2 = (double *)(tsrc2); @@ -2393,14 +2407,14 @@ double dotProd_8u(const uchar* src1, const uchar* src2, int len) double r = 0; int i = 0; -#if CV_SIMD - int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 15), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 15), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_uint32 v_sum = vx_setzero_u32(); - const int cWidth = v_uint16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth * 2; j += cWidth * 2) @@ -2414,7 +2428,7 @@ double dotProd_8u(const uchar* src1, const uchar* src2, int len) { v_int16 v_src10 = v_reinterpret_as_s16(vx_load_expand(src1 + j)); v_int16 v_src20 = v_reinterpret_as_s16(vx_load_expand(src2 + j)); - v_sum += v_reinterpret_as_u32(v_dotprod_fast(v_src10, v_src20)); + v_sum = v_add(v_sum, v_reinterpret_as_u32(v_dotprod_fast(v_src10, v_src20))); } r += (double)v_reduce_sum(v_sum); @@ -2433,14 +2447,14 @@ double dotProd_8s(const schar* src1, const schar* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 14), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 14), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_int32 v_sum = vx_setzero_s32(); - const int cWidth = v_int16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth * 2; j += cWidth * 2) @@ -2473,14 +2487,14 @@ double dotProd_16u(const ushort* src1, const ushort* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_uint16::nlanes, blockSize0 = (1 << 24), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 24), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_uint64 v_sum = vx_setzero_u64(); - const int cWidth = v_uint16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth; j += cWidth) @@ -2505,14 +2519,14 @@ double dotProd_16s(const short* src1, const short* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_int16::nlanes, blockSize0 = (1 << 24), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 24), blockSize; while (i < len0) { blockSize = std::min(len0 - i, blockSize0); v_int64 v_sum = vx_setzero_s64(); - const int cWidth = v_int16::nlanes; + const int cWidth = VTraits::vlanes(); int j = 0; for (; j <= blockSize - cWidth; j += cWidth) @@ -2534,10 +2548,10 @@ double dotProd_16s(const short* src1, const short* src2, int len) double dotProd_32s(const int* src1, const int* src2, int len) { -#if CV_SIMD_64F +#if CV_SIMD_64F // TODO: enable for CV_SIMD_SCALABLE_64F double r = .0; int i = 0; - const int step = v_int32::nlanes; + const int step = VTraits::vlanes(); v_float64 v_sum0 = vx_setzero_f64(); #if CV_SIMD_WIDTH == 16 const int wstep = step * 2; @@ -2572,8 +2586,8 @@ double dotProd_32f(const float* src1, const float* src2, int len) double r = 0.0; int i = 0; -#if CV_SIMD - int len0 = len & -v_float32::nlanes, blockSize0 = (1 << 13), blockSize; +#if (CV_SIMD || CV_SIMD_SCALABLE) + int len0 = len & -VTraits::vlanes(), blockSize0 = (1 << 13), blockSize; while (i < len0) { @@ -2581,7 +2595,7 @@ double dotProd_32f(const float* src1, const float* src2, int len) v_float32 v_sum = vx_setzero_f32(); int j = 0; - int cWidth = v_float32::nlanes; + int cWidth = VTraits::vlanes(); #if CV_ENABLE_UNROLLED v_float32 v_sum1 = vx_setzero_f32(); @@ -2600,7 +2614,7 @@ double dotProd_32f(const float* src1, const float* src2, int len) vx_load(src2 + j + (cWidth * 3)), v_sum3); } - v_sum += v_sum1 + v_sum2 + v_sum3; + v_sum = v_add(v_sum, v_add(v_add(v_sum1, v_sum2), v_sum3)); #endif for (; j <= blockSize - cWidth; j += cWidth) diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp index 7f1043fbbe..744ee69b0d 100644 --- a/modules/core/src/matrix_transform.cpp +++ b/modules/core/src/matrix_transform.cpp @@ -440,7 +440,7 @@ template CV_ALWAYS_INLINE void flipHoriz_double( const static void flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { -#if CV_SIMD +#if CV_SIMD128 #if CV_STRONG_ALIGNMENT size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; #endif @@ -563,7 +563,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, } #endif else -#endif // CV_SIMD +#endif // CV_SIMD128 { int i, j, limit = (int)(((size.width + 1)/2)*esz); AutoBuffer _tab(size.width*esz); @@ -596,12 +596,12 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, dst0 += dstep, dst1 -= dstep ) { int i = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) #if CV_STRONG_ALIGNMENT if (isAligned(src0, src1, dst0, dst1)) #endif { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + for (; i <= size.width - VTraits::vlanes(); i += VTraits::vlanes()) { v_int32 t0 = v_reinterpret_as_s32(vx_load(src0 + i)); v_int32 t1 = v_reinterpret_as_s32(vx_load(src1 + i)); @@ -612,7 +612,7 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, #if CV_STRONG_ALIGNMENT else { - for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + for (; i <= size.width - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint8 t0 = vx_load(src0 + i); v_uint8 t1 = vx_load(src1 + i); diff --git a/modules/core/src/merge.simd.hpp b/modules/core/src/merge.simd.hpp index ad08dd8879..d67a117c7b 100644 --- a/modules/core/src/merge.simd.hpp +++ b/modules/core/src/merge.simd.hpp @@ -15,7 +15,7 @@ void merge64s(const int64** src, int64* dst, int len, int cn); #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) /* The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following: on IA there are instructions movntps and such to which @@ -38,7 +38,7 @@ void merge64s(const int64** src, int64* dst, int len, int cn); template static void vecmerge_( const T** src, T* dst, int len, int cn ) { - const int VECSZ = VecT::nlanes; + const int VECSZ = VTraits::vlanes(); int i, i0 = 0; const T* src0 = src[0]; const T* src1 = src[1]; @@ -173,8 +173,8 @@ merge_( const T** src, T* dst, int len, int cn ) void merge8u(const uchar** src, uchar* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif @@ -184,8 +184,8 @@ void merge8u(const uchar** src, uchar* dst, int len, int cn ) void merge16u(const ushort** src, ushort* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif @@ -195,8 +195,8 @@ void merge16u(const ushort** src, ushort* dst, int len, int cn ) void merge32s(const int** src, int* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_int32::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif @@ -206,8 +206,8 @@ void merge32s(const int** src, int* dst, int len, int cn ) void merge64s(const int64** src, int64* dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecmerge_(src, dst, len, cn); else #endif diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 69da85f291..be68efddf0 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -63,25 +63,25 @@ int normHamming(const uchar* a, int n, int cellSize) return -1; int i = 0; int result = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_uint64 t = vx_setzero_u64(); if ( cellSize == 2) { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i)); - t += v_popcount(v_reinterpret_as_u64((a0 | (a0 >> 1)) & mask)); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(a0, v_shr<1>(a0)), mask)))); } } else // cellSize == 4 { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i)); - v_uint16 a1 = a0 | (a0 >> 2); - t += v_popcount(v_reinterpret_as_u64((a1 | (a1 >> 1)) & mask)); + v_uint16 a1 = v_or(a0, v_shr<2>(a0)); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(a1, v_shr<1>(a1)), mask)))); } } @@ -109,25 +109,25 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize) return -1; int i = 0; int result = 0; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_uint64 t = vx_setzero_u64(); if ( cellSize == 2) { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { - v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i)); - t += v_popcount(v_reinterpret_as_u64((ab0 | (ab0 >> 1)) & mask)); + v_uint16 ab0 = v_reinterpret_as_u16(v_xor(vx_load(a + i), vx_load(b + i))); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(ab0, v_shr<1>(ab0)), mask)))); } } else // cellSize == 4 { v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11)); - for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + for(; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) { - v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i)); - v_uint16 ab1 = ab0 | (ab0 >> 2); - t += v_popcount(v_reinterpret_as_u64((ab1 | (ab1 >> 1)) & mask)); + v_uint16 ab0 = v_reinterpret_as_u16(v_xor(vx_load(a + i), vx_load(b + i))); + v_uint16 ab1 = v_or(ab0, v_shr<2>(ab0)); + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_and(v_or(ab1, v_shr<1>(ab1)), mask)))); } } result += (int)v_reduce_sum(t); @@ -145,21 +145,21 @@ int normHamming(const uchar* a, const uchar* b, int n, int cellSize) float normL2Sqr_(const float* a, const float* b, int n) { int j = 0; float d = 0.f; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32(); v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32(); - for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes) + for (; j <= n - 4 * VTraits::vlanes(); j += 4 * VTraits::vlanes()) { - v_float32 t0 = vx_load(a + j) - vx_load(b + j); - v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes); + v_float32 t0 = v_sub(vx_load(a + j), vx_load(b + j)); + v_float32 t1 = v_sub(vx_load(a + j + VTraits::vlanes()), vx_load(b + j + VTraits::vlanes())); v_d0 = v_muladd(t0, t0, v_d0); - v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes); + v_float32 t2 = v_sub(vx_load(a + j + 2 * VTraits::vlanes()), vx_load(b + j + 2 * VTraits::vlanes())); v_d1 = v_muladd(t1, t1, v_d1); - v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes); + v_float32 t3 = v_sub(vx_load(a + j + 3 * VTraits::vlanes()), vx_load(b + j + 3 * VTraits::vlanes())); v_d2 = v_muladd(t2, t2, v_d2); v_d3 = v_muladd(t3, t3, v_d3); } - d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3); + d = v_reduce_sum(v_add(v_add(v_add(v_d0, v_d1), v_d2), v_d3)); #endif for( ; j < n; j++ ) { @@ -173,17 +173,17 @@ float normL2Sqr_(const float* a, const float* b, int n) float normL1_(const float* a, const float* b, int n) { int j = 0; float d = 0.f; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32(); v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32(); - for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes) + for (; j <= n - 4 * VTraits::vlanes(); j += 4 * VTraits::vlanes()) { - v_d0 += v_absdiff(vx_load(a + j), vx_load(b + j)); - v_d1 += v_absdiff(vx_load(a + j + v_float32::nlanes), vx_load(b + j + v_float32::nlanes)); - v_d2 += v_absdiff(vx_load(a + j + 2 * v_float32::nlanes), vx_load(b + j + 2 * v_float32::nlanes)); - v_d3 += v_absdiff(vx_load(a + j + 3 * v_float32::nlanes), vx_load(b + j + 3 * v_float32::nlanes)); + v_d0 = v_add(v_d0, v_absdiff(vx_load(a + j), vx_load(b + j))); + v_d1 = v_add(v_d1, v_absdiff(vx_load(a + j + VTraits::vlanes()), vx_load(b + j + VTraits::vlanes()))); + v_d2 = v_add(v_d2, v_absdiff(vx_load(a + j + 2 * VTraits::vlanes()), vx_load(b + j + 2 * VTraits::vlanes()))); + v_d3 = v_add(v_d3, v_absdiff(vx_load(a + j + 3 * VTraits::vlanes()), vx_load(b + j + 3 * VTraits::vlanes()))); } - d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3); + d = v_reduce_sum(v_add(v_add(v_add(v_d0, v_d1), v_d2), v_d3)); #endif for( ; j < n; j++ ) d += std::abs(a[j] - b[j]); @@ -193,12 +193,12 @@ float normL1_(const float* a, const float* b, int n) int normL1_(const uchar* a, const uchar* b, int n) { int j = 0, d = 0; -#if CV_SIMD - for (; j <= n - 4 * v_uint8::nlanes; j += 4 * v_uint8::nlanes) +#if (CV_SIMD || CV_SIMD_SCALABLE) + for (; j <= n - 4 * VTraits::vlanes(); j += 4 * VTraits::vlanes()) d += v_reduce_sad(vx_load(a + j), vx_load(b + j)) + - v_reduce_sad(vx_load(a + j + v_uint8::nlanes), vx_load(b + j + v_uint8::nlanes)) + - v_reduce_sad(vx_load(a + j + 2 * v_uint8::nlanes), vx_load(b + j + 2 * v_uint8::nlanes)) + - v_reduce_sad(vx_load(a + j + 3 * v_uint8::nlanes), vx_load(b + j + 3 * v_uint8::nlanes)); + v_reduce_sad(vx_load(a + j + VTraits::vlanes()), vx_load(b + j + VTraits::vlanes())) + + v_reduce_sad(vx_load(a + j + 2 * VTraits::vlanes()), vx_load(b + j + 2 * VTraits::vlanes())) + + v_reduce_sad(vx_load(a + j + 3 * VTraits::vlanes()), vx_load(b + j + 3 * VTraits::vlanes())); #endif for( ; j < n; j++ ) d += std::abs(a[j] - b[j]); diff --git a/modules/core/src/split.simd.hpp b/modules/core/src/split.simd.hpp index 25e90c0063..88414161b8 100644 --- a/modules/core/src/split.simd.hpp +++ b/modules/core/src/split.simd.hpp @@ -15,12 +15,12 @@ void split64s(const int64* src, int64** dst, int len, int cn); #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) // see the comments for vecmerge_ in merge.cpp template static void vecsplit_( const T* src, T** dst, int len, int cn ) { - const int VECSZ = VecT::nlanes; + const int VECSZ = VTraits::vlanes(); int i, i0 = 0; T* dst0 = dst[0]; T* dst1 = dst[1]; @@ -177,8 +177,8 @@ split_( const T* src, T** dst, int len, int cn ) void split8u(const uchar* src, uchar** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif @@ -188,8 +188,8 @@ void split8u(const uchar* src, uchar** dst, int len, int cn ) void split16u(const ushort* src, ushort** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif @@ -199,8 +199,8 @@ void split16u(const ushort* src, ushort** dst, int len, int cn ) void split32s(const int* src, int** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_uint32::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif @@ -210,8 +210,8 @@ void split32s(const int* src, int** dst, int len, int cn ) void split64s(const int64* src, int64** dst, int len, int cn ) { CV_INSTRUMENT_REGION(); -#if CV_SIMD - if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 ) +#if (CV_SIMD || CV_SIMD_SCALABLE) + if( len >= VTraits::vlanes() && 2 <= cn && cn <= 4 ) vecsplit_(src, dst, len, cn); else #endif diff --git a/modules/core/src/stat.simd.hpp b/modules/core/src/stat.simd.hpp index 0592f84794..a5fb05476d 100644 --- a/modules/core/src/stat.simd.hpp +++ b/modules/core/src/stat.simd.hpp @@ -33,11 +33,11 @@ int normHamming(const uchar* a, int n) int i = 0; int result = 0; -#if CV_SIMD && CV_SIMD_WIDTH > 16 +#if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint64 t = vx_setzero_u64(); - for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) - t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); + for (; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) + t = v_add(t, v_popcount(v_reinterpret_as_u64(vx_load(a + i)))); result = (int)v_reduce_sum(t); vx_cleanup(); } @@ -56,13 +56,6 @@ int normHamming(const uchar* a, int n) result += CV_POPCNT_U32(*(uint*)(a + i)); } } -#elif CV_SIMD - { - v_uint64x2 t = v_setzero_u64(); - for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) - t += v_popcount(v_reinterpret_as_u64(v_load(a + i))); - result += (int)v_reduce_sum(t); - } #endif #if CV_ENABLE_UNROLLED for(; i <= n - 4; i += 4) @@ -85,11 +78,11 @@ int normHamming(const uchar* a, const uchar* b, int n) int i = 0; int result = 0; -#if CV_SIMD && CV_SIMD_WIDTH > 16 +#if (CV_SIMD || CV_SIMD_SCALABLE) { v_uint64 t = vx_setzero_u64(); - for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) - t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); + for (; i <= n - VTraits::vlanes(); i += VTraits::vlanes()) + t = v_add(t, v_popcount(v_reinterpret_as_u64(v_xor(vx_load(a + i), vx_load(b + i))))); result += (int)v_reduce_sum(t); } #endif @@ -107,13 +100,6 @@ int normHamming(const uchar* a, const uchar* b, int n) result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i)); } } -#elif CV_SIMD - { - v_uint64x2 t = v_setzero_u64(); - for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) - t += v_popcount(v_reinterpret_as_u64(v_load(a + i) ^ v_load(b + i))); - result += (int)v_reduce_sum(t); - } #endif #if CV_ENABLE_UNROLLED for(; i <= n - 4; i += 4) diff --git a/modules/core/src/sum.simd.hpp b/modules/core/src/sum.simd.hpp index 2232013b24..e20cd39b70 100644 --- a/modules/core/src/sum.simd.hpp +++ b/modules/core/src/sum.simd.hpp @@ -22,7 +22,7 @@ struct Sum_SIMD } }; -#if CV_SIMD +#if (CV_SIMD || CV_SIMD_SCALABLE) template <> struct Sum_SIMD @@ -36,41 +36,41 @@ struct Sum_SIMD int x = 0; v_uint32 v_sum = vx_setzero_u32(); - int len0 = len & -v_uint8::nlanes; + int len0 = len & -VTraits::vlanes(); while (x < len0) { - const int len_tmp = min(x + 256*v_uint16::nlanes, len0); + const int len_tmp = min(x + 256*VTraits::vlanes(), len0); v_uint16 v_sum16 = vx_setzero_u16(); - for (; x < len_tmp; x += v_uint8::nlanes) + for (; x < len_tmp; x += VTraits::vlanes()) { v_uint16 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum16 += v_src0 + v_src1; + v_sum16 = v_add(v_sum16, v_add(v_src0, v_src1)); } v_uint32 v_half0, v_half1; v_expand(v_sum16, v_half0, v_half1); - v_sum += v_half0 + v_half1; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); } - if (x <= len - v_uint16::nlanes) + if (x <= len - VTraits::vlanes()) { v_uint32 v_half0, v_half1; v_expand(vx_load_expand(src0 + x), v_half0, v_half1); - v_sum += v_half0 + v_half1; - x += v_uint16::nlanes; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); + x += VTraits::vlanes(); } - if (x <= len - v_uint32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand_q(src0 + x); - x += v_uint32::nlanes; + v_sum = v_add(v_sum, vx_load_expand_q(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_uint32::nlanes]; + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_uint32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -91,41 +91,41 @@ struct Sum_SIMD int x = 0; v_int32 v_sum = vx_setzero_s32(); - int len0 = len & -v_int8::nlanes; + int len0 = len & -VTraits::vlanes(); while (x < len0) { - const int len_tmp = min(x + 256*v_int16::nlanes, len0); + const int len_tmp = min(x + 256*VTraits::vlanes(), len0); v_int16 v_sum16 = vx_setzero_s16(); - for (; x < len_tmp; x += v_int8::nlanes) + for (; x < len_tmp; x += VTraits::vlanes()) { v_int16 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum16 += v_src0 + v_src1; + v_sum16 = v_add(v_sum16, v_add(v_src0, v_src1)); } v_int32 v_half0, v_half1; v_expand(v_sum16, v_half0, v_half1); - v_sum += v_half0 + v_half1; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); } - if (x <= len - v_int16::nlanes) + if (x <= len - VTraits::vlanes()) { v_int32 v_half0, v_half1; v_expand(vx_load_expand(src0 + x), v_half0, v_half1); - v_sum += v_half0 + v_half1; - x += v_int16::nlanes; + v_sum = v_add(v_sum, v_add(v_half0, v_half1)); + x += VTraits::vlanes(); } - if (x <= len - v_int32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand_q(src0 + x); - x += v_int32::nlanes; + v_sum = v_add(v_sum, vx_load_expand_q(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_int32::nlanes]; + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_int32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -146,25 +146,25 @@ struct Sum_SIMD int x = 0; v_uint32 v_sum = vx_setzero_u32(); - for (; x <= len - v_uint16::nlanes; x += v_uint16::nlanes) + for (; x <= len - VTraits::vlanes(); x += VTraits::vlanes()) { v_uint32 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum += v_src0 + v_src1; + v_sum = v_add(v_sum, v_add(v_src0, v_src1)); } - if (x <= len - v_uint32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand(src0 + x); - x += v_uint32::nlanes; + v_sum = v_add(v_sum, vx_load_expand(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_uint32::nlanes]; + uint32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_uint32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -185,25 +185,25 @@ struct Sum_SIMD int x = 0; v_int32 v_sum = vx_setzero_s32(); - for (; x <= len - v_int16::nlanes; x += v_int16::nlanes) + for (; x <= len - VTraits::vlanes(); x += VTraits::vlanes()) { v_int32 v_src0, v_src1; v_expand(vx_load(src0 + x), v_src0, v_src1); - v_sum += v_src0 + v_src1; + v_sum = v_add(v_sum, v_add(v_src0, v_src1)); } - if (x <= len - v_int32::nlanes) + if (x <= len - VTraits::vlanes()) { - v_sum += vx_load_expand(src0 + x); - x += v_int32::nlanes; + v_sum = v_add(v_sum, vx_load_expand(src0 + x)); + x += VTraits::vlanes(); } if (cn == 1) *dst += v_reduce_sum(v_sum); else { - int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_int32::nlanes]; + int32_t CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; v_store_aligned(ar, v_sum); - for (int i = 0; i < v_int32::nlanes; ++i) + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; } v_cleanup(); @@ -212,7 +212,7 @@ struct Sum_SIMD } }; -#if CV_SIMD_64F +#if (CV_SIMD_64F || CV_SIMD_SCALABLE_64F) template <> struct Sum_SIMD { @@ -226,24 +226,24 @@ struct Sum_SIMD v_float64 v_sum0 = vx_setzero_f64(); v_float64 v_sum1 = vx_setzero_f64(); - for (; x <= len - 2 * v_int32::nlanes; x += 2 * v_int32::nlanes) + for (; x <= len - 2 * VTraits::vlanes(); x += 2 * VTraits::vlanes()) { v_int32 v_src0 = vx_load(src0 + x); - v_int32 v_src1 = vx_load(src0 + x + v_int32::nlanes); - v_sum0 += v_cvt_f64(v_src0) + v_cvt_f64(v_src1); - v_sum1 += v_cvt_f64_high(v_src0) + v_cvt_f64_high(v_src1); + v_int32 v_src1 = vx_load(src0 + x + VTraits::vlanes()); + v_sum0 = v_add(v_sum0, v_add(v_cvt_f64(v_src0), v_cvt_f64(v_src1))); + v_sum1 = v_add(v_sum1, v_add(v_cvt_f64_high(v_src0), v_cvt_f64_high(v_src1))); } #if CV_SIMD256 || CV_SIMD512 - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_float64::nlanes]; - v_store_aligned(ar, v_sum0 + v_sum1); - for (int i = 0; i < v_float64::nlanes; ++i) + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; + v_store_aligned(ar, v_add(v_sum0, v_sum1)); + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #else - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * v_float64::nlanes]; + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * VTraits::max_nlanes]; v_store_aligned(ar, v_sum0); - v_store_aligned(ar + v_float64::nlanes, v_sum1); - for (int i = 0; i < 2 * v_float64::nlanes; ++i) + v_store_aligned(ar + VTraits::vlanes(), v_sum1); + for (int i = 0; i < 2 * VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #endif v_cleanup(); @@ -265,24 +265,24 @@ struct Sum_SIMD v_float64 v_sum0 = vx_setzero_f64(); v_float64 v_sum1 = vx_setzero_f64(); - for (; x <= len - 2 * v_float32::nlanes; x += 2 * v_float32::nlanes) + for (; x <= len - 2 * VTraits::vlanes(); x += 2 * VTraits::vlanes()) { v_float32 v_src0 = vx_load(src0 + x); - v_float32 v_src1 = vx_load(src0 + x + v_float32::nlanes); - v_sum0 += v_cvt_f64(v_src0) + v_cvt_f64(v_src1); - v_sum1 += v_cvt_f64_high(v_src0) + v_cvt_f64_high(v_src1); + v_float32 v_src1 = vx_load(src0 + x + VTraits::vlanes()); + v_sum0 = v_add(v_sum0, v_add(v_cvt_f64(v_src0), v_cvt_f64(v_src1))); + v_sum1 = v_add(v_sum1, v_add(v_cvt_f64_high(v_src0), v_cvt_f64_high(v_src1))); } #if CV_SIMD256 || CV_SIMD512 - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[v_float64::nlanes]; - v_store_aligned(ar, v_sum0 + v_sum1); - for (int i = 0; i < v_float64::nlanes; ++i) + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[VTraits::max_nlanes]; + v_store_aligned(ar, v_add(v_sum0, v_sum1)); + for (int i = 0; i < VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #else - double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * v_float64::nlanes]; + double CV_DECL_ALIGNED(CV_SIMD_WIDTH) ar[2 * VTraits::max_nlanes]; v_store_aligned(ar, v_sum0); - v_store_aligned(ar + v_float64::nlanes, v_sum1); - for (int i = 0; i < 2 * v_float64::nlanes; ++i) + v_store_aligned(ar + VTraits::vlanes(), v_sum1); + for (int i = 0; i < 2 * VTraits::vlanes(); ++i) dst[i % cn] += ar[i]; #endif v_cleanup(); From a300e7e9454f80e2b08ac8c27e6bc27c479af9b2 Mon Sep 17 00:00:00 2001 From: DeePingXian Date: Sun, 13 Aug 2023 16:40:38 +0800 Subject: [PATCH 14/57] Adding support for Streamlabs Desktop Virtual Webcam Streamlabs Desktop has the same issue in https://github.com/opencv/opencv/issues/19746. This fixes it using https://github.com/opencv/opencv/pull/23460 method. --- modules/videoio/src/cap_dshow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp index d6b2b95545..21af06a147 100644 --- a/modules/videoio/src/cap_dshow.cpp +++ b/modules/videoio/src/cap_dshow.cpp @@ -2771,7 +2771,7 @@ int videoInput::start(int deviceID, videoDevice *VD){ if(customSize){ DebugPrintOut("SETUP: Default Format is set to %ix%i\n", currentWidth, currentHeight); - if (strcmp("OBS Virtual Camera", VD->nDeviceName) == 0) + if (strcmp("OBS Virtual Camera", VD->nDeviceName) == 0 || strcmp("Streamlabs Desktop Virtual Webcam", VD->nDeviceName) == 0) { // OBS Virtual Camera always returns S_OK on SetFormat(), even if it doesn't support // the actual format. So we have to choose a format that it supports manually, e.g. NV12. From a301d1c298250ecb9ca3d376cdc812729797908b Mon Sep 17 00:00:00 2001 From: Ginkgo Date: Mon, 14 Aug 2023 20:58:35 +0800 Subject: [PATCH 15/57] fix ipp_warpAffine return value error --- modules/imgproc/src/imgwarp.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index e5d9b0defb..bbeb8223f1 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2679,8 +2679,13 @@ static bool ipp_warpAffine( InputArray _src, OutputArray _dst, int interpolation } return true; +#else + CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(interpolation); + CV_UNUSED(borderType); CV_UNUSED(_M); CV_UNUSED(flags); + return false; #endif } + #endif namespace hal { From fb34f36c695c177d34b9303bd175925995cc3daf Mon Sep 17 00:00:00 2001 From: Mihir Patil Date: Mon, 14 Aug 2023 19:11:14 -0400 Subject: [PATCH 16/57] style: remove extraneous std::cout --- modules/highgui/src/window_cocoa.mm | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index 5e34b502db..7e364220fa 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -732,18 +732,15 @@ void cvSetModeWindow_COCOA( const char* name, double prop_value ) localpool = [[NSAutoreleasePool alloc] init]; - // std::cout << "setting mode" << std::endl; #if MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_10_6 if ( ([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_NORMAL ) { - // std::cout << "exiting fullscreen" << std::endl; [window toggleFullScreen:nil]; window.status=CV_WINDOW_NORMAL; } else if( !([window styleMask] & NSFullScreenWindowMask) && prop_value==CV_WINDOW_FULLSCREEN ) { - // std::cout << "entering fullscreen" << std::endl; [window setCollectionBehavior:NSWindowCollectionBehaviorFullScreenPrimary]; NSScreen* screen = [window screen]; From ad7ecf1dbaa0235f70a2a6d9affbbbb4f452df16 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 15 Aug 2023 11:32:44 +0300 Subject: [PATCH 17/57] Mark OpenVINO models for G-API tests optional --- .../gapi/misc/python/test/test_gapi_infer.py | 24 ++-- .../misc/python/test/test_gapi_infer_ov.py | 16 +-- .../gapi/test/infer/gapi_infer_ie_test.cpp | 108 +++++++++--------- .../gapi/test/infer/gapi_infer_ov_tests.cpp | 4 +- 4 files changed, 76 insertions(+), 76 deletions(-) diff --git a/modules/gapi/misc/python/test/test_gapi_infer.py b/modules/gapi/misc/python/test/test_gapi_infer.py index 8ecc957e41..d075651e87 100644 --- a/modules/gapi/misc/python/test/test_gapi_infer.py +++ b/modules/gapi/misc/python/test/test_gapi_infer.py @@ -38,8 +38,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -73,8 +73,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -112,8 +112,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] @@ -161,8 +161,8 @@ try: return root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] @@ -211,8 +211,8 @@ try: return root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) device_id = 'CPU' img = cv.resize(cv.imread(img_path), (544, 320)) @@ -270,8 +270,8 @@ try: return root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) device_id = 'CPU' img = cv.resize(cv.imread(img_path), (544, 320)) diff --git a/modules/gapi/misc/python/test/test_gapi_infer_ov.py b/modules/gapi/misc/python/test/test_gapi_infer_ov.py index b4022b6e2d..f48ec96369 100644 --- a/modules/gapi/misc/python/test/test_gapi_infer_ov.py +++ b/modules/gapi/misc/python/test/test_gapi_infer_ov.py @@ -86,8 +86,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -119,8 +119,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -148,8 +148,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path1 = self.find_file('cv/face/david1.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) @@ -190,8 +190,8 @@ try: skip_if_openvino_not_available() root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) + bin_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')], required=False) device_id = 'CPU' img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index 58e37040e8..92de39abfa 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -187,8 +187,8 @@ std::string compileAgeGenderBlob(const std::string& device) { cv::gapi::ie::detail::ParamDesc params; const std::string model_name = "age-gender-recognition-retail-0013"; const std::string output = model_name + ".blob"; - params.model_path = findDataFile(SUBDIR + model_name + ".xml"); - params.weights_path = findDataFile(SUBDIR + model_name + ".bin"); + params.model_path = findDataFile(SUBDIR + model_name + ".xml", false); + params.weights_path = findDataFile(SUBDIR + model_name + ".bin", false); params.device_id = device; compileBlob(params, output, IE::Precision::U8); return output; @@ -205,8 +205,8 @@ TEST(TestAgeGenderIE, InferBasicTensor) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -256,8 +256,8 @@ TEST(TestAgeGenderIE, InferBasicImage) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // FIXME: Ideally it should be an image from disk @@ -334,8 +334,8 @@ struct InferWithReshape: public ::testing::Test { reshape_dims = {1, 3, 70, 70}; initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; @@ -432,8 +432,8 @@ struct ROIList: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false)); @@ -505,8 +505,8 @@ struct ROIListNV12: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -585,8 +585,8 @@ struct SingleROI: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // FIXME: it must be cv::imread(findDataFile("../dnn/grace_hopper_227.png", false)); @@ -644,8 +644,8 @@ struct SingleROINV12: public ::testing::Test { void SetUp() { initDLDTDataPath(); - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -809,8 +809,8 @@ TEST(TestAgeGenderIE, GenericInfer) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Mat in_mat(cv::Size(320, 240), CV_8UC3); @@ -859,8 +859,8 @@ TEST(TestAgeGenderIE, InvalidConfigGeneric) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; // Configure & run G-API @@ -885,8 +885,8 @@ TEST(TestAgeGenderIE, CPUConfigGeneric) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; // Configure & run G-API @@ -912,8 +912,8 @@ TEST(TestAgeGenderIE, InvalidConfig) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; using AGInfo = std::tuple; @@ -937,8 +937,8 @@ TEST(TestAgeGenderIE, CPUConfig) { initDLDTDataPath(); - std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + std::string model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + std::string weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); std::string device_id = "CPU"; using AGInfo = std::tuple; @@ -1017,8 +1017,8 @@ TEST(TestAgeGenderIE, MediaInputNV12) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1082,8 +1082,8 @@ TEST(TestAgeGenderIE, MediaInputBGR) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1134,8 +1134,8 @@ TEST(InferROI, MediaInputBGR) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1196,8 +1196,8 @@ TEST(InferROI, MediaInputNV12) initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Size sz{320, 240}; @@ -1587,8 +1587,8 @@ TEST(Infer, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1654,8 +1654,8 @@ TEST(InferROI, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1732,8 +1732,8 @@ TEST(InferList, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1821,8 +1821,8 @@ TEST(Infer2, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1911,8 +1911,8 @@ TEST(InferEmptyList, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -1965,8 +1965,8 @@ TEST(Infer2EmptyList, TestStreamingInfer) std::string filepath = findDataFile("cv/video/768x576.avi"); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. @@ -2294,8 +2294,8 @@ struct LimitedSourceInfer: public ::testing::Test { GStreamingCompiled compileStreaming(int nireq) { cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; auto pp = cv::gapi::ie::Params { @@ -2348,8 +2348,8 @@ TEST(TestAgeGenderIE, InferWithBatch) constexpr int batch_size = 4; cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; cv::Mat in_mat({batch_size, 3, 62, 62}, CV_8U); @@ -3091,8 +3091,8 @@ struct AgeGenderInferTest: public ::testing::Test { void SetUp() { initDLDTDataPath(); - m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + m_params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + m_params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); m_params.device_id = "CPU"; m_plugin = cv::gimpl::ie::wrap::getPlugin(m_params); @@ -3191,8 +3191,8 @@ TEST(TestAgeGenderIE, InferTensorWithPreproc) { initDLDTDataPath(); cv::gapi::ie::detail::ParamDesc params; - params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + params.model_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + params.weights_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); params.device_id = "CPU"; // Load IE network, initialize input data using that. diff --git a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp index 09b54c1a46..abce82b329 100644 --- a/modules/gapi/test/infer/gapi_infer_ov_tests.cpp +++ b/modules/gapi/test/infer/gapi_infer_ov_tests.cpp @@ -255,8 +255,8 @@ private: struct BaseAgeGenderOV: public ::testing::Test { BaseAgeGenderOV() { initDLDTDataPath(); - xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml"); - bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin"); + xml_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.xml", false); + bin_path = findDataFile(SUBDIR + "age-gender-recognition-retail-0013.bin", false); device = "CPU"; blob_path = "age-gender-recognition-retail-0013.blob"; } From 8e52c0155bc797b93604813f96f2e9fa24593f3f Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 15 Aug 2023 20:49:21 +0300 Subject: [PATCH 18/57] gapi: update ADE library to 0.1.2b --- modules/gapi/cmake/DownloadADE.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/gapi/cmake/DownloadADE.cmake b/modules/gapi/cmake/DownloadADE.cmake index e22c4f1a32..26407f4fef 100644 --- a/modules/gapi/cmake/DownloadADE.cmake +++ b/modules/gapi/cmake/DownloadADE.cmake @@ -1,7 +1,7 @@ set(ade_src_dir "${OpenCV_BINARY_DIR}/3rdparty/ade") -set(ade_filename "v0.1.2a.zip") -set(ade_subdir "ade-0.1.2a") -set(ade_md5 "fa4b3e25167319cb0fa9432ef8281945") +set(ade_filename "v0.1.2b.zip") +set(ade_subdir "ade-0.1.2b") +set(ade_md5 "4f93a0844dfc463c617d83b09011819a") ocv_download(FILENAME ${ade_filename} HASH ${ade_md5} URL From 16681d1080928d31645d94ede6ce524fa8d6f177 Mon Sep 17 00:00:00 2001 From: MuZihao Date: Wed, 16 Aug 2023 09:34:59 +0800 Subject: [PATCH 19/57] fix the issue in layer fused --- modules/dnn/src/net_impl_fuse.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp index 4570d2b360..dfa542bd41 100644 --- a/modules/dnn/src/net_impl_fuse.cpp +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -210,7 +210,7 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) if (!nextData->params.has("operation") || toLowerCase(nextData->params.get("operation")) != "add") { CV_LOG_DEBUG(NULL, "DNN/CPU: fusion with NaryEltwise or Eltwise Layer operation is not supported: " - << nextData->params.get("operation")); + << toLowerCase(nextData->params.get("operation", "sum"))); break; } From 8ad5eb521a6e64f71963efcceaf995b0930ea357 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 16 Aug 2023 15:46:11 +0300 Subject: [PATCH 20/57] Merge pull request #24120 from dkurt:actualize_dnn_links OCL_FP16 MatMul with large batch * Workaround FP16 MatMul with large batch * Fix OCL reinitialization * Higher thresholds for INT8 quantization * Try fix gemm_buffer_NT for half (columns) * Fix GEMM by rows * Add batch dimension to InnerProduct layer test * Fix Test_ONNX_conformance.Layer_Test/test_basic_conv_with_padding * Batch 16 * Replace all vload4 * Version suffix for MobileNetSSD_deploy Caffe model --- modules/dnn/misc/python/test/test_dnn.py | 6 ++-- modules/dnn/perf/perf_caffe.cpp | 4 +-- modules/dnn/perf/perf_net.cpp | 2 +- modules/dnn/src/layers/convolution_layer.cpp | 4 +-- modules/dnn/src/opencl/gemm_buffer.cl | 38 ++++++++++---------- modules/dnn/test/test_backends.cpp | 4 +-- modules/dnn/test/test_caffe_importer.cpp | 4 +-- modules/dnn/test/test_halide_layers.cpp | 18 +++++----- modules/dnn/test/test_int8_layers.cpp | 6 ++-- modules/dnn/test/test_model.cpp | 10 +++--- 10 files changed, 49 insertions(+), 47 deletions(-) diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 5c91aae56f..a06c02ad2d 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -191,10 +191,10 @@ class dnn_test(NewOpenCVTests): def test_model(self): img_path = self.find_dnn_file("dnn/street.png") - weights = self.find_dnn_file("dnn/MobileNetSSD_deploy.caffemodel", required=False) - config = self.find_dnn_file("dnn/MobileNetSSD_deploy.prototxt", required=False) + weights = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", required=False) + config = self.find_dnn_file("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", required=False) if weights is None or config is None: - raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + raise unittest.SkipTest("Missing DNN test files (dnn/MobileNetSSD_deploy_19e3ec3.{prototxt/caffemodel}). Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") frame = cv.imread(img_path) model = cv.dnn_DetectionModel(weights, config) diff --git a/modules/dnn/perf/perf_caffe.cpp b/modules/dnn/perf/perf_caffe.cpp index 370f06dba2..f1ba26afcc 100644 --- a/modules/dnn/perf/perf_caffe.cpp +++ b/modules/dnn/perf/perf_caffe.cpp @@ -101,8 +101,8 @@ PERF_TEST(SqueezeNet_v1_1_caffe, CaffePerfTest) PERF_TEST(MobileNet_SSD, CaffePerfTest) { - caffe::Net* net = initNet("dnn/MobileNetSSD_deploy.prototxt", - "dnn/MobileNetSSD_deploy.caffemodel"); + caffe::Net* net = initNet("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", + "dnn/MobileNetSSD_deploy_19e3ec3.caffemodel"); TEST_CYCLE() net->Forward(); SANITY_CHECK_NOTHING(); } diff --git a/modules/dnn/perf/perf_net.cpp b/modules/dnn/perf/perf_net.cpp index cfbb45b173..7f852e8f7b 100644 --- a/modules/dnn/perf/perf_net.cpp +++ b/modules/dnn/perf/perf_net.cpp @@ -141,7 +141,7 @@ PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe) { if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", "", Mat(cv::Size(300, 300), CV_32FC3)); } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 0ed2bb7feb..0488dc462d 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -1069,7 +1069,7 @@ public: config.pads = pads; config.stride = stride; config.dilation = dilation; - if (inputs[0].dims != 4 && inputs[0].dims != umat_blobs[0].dims) + if (inputs[0].dims != 4 && inputs[0].dims != (blobs.empty() ? umat_blobs[0].dims : blobs[0].dims)) { static bool bypassCheck = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_CONVOLUTION_IGNORE_INPUT_DIMS_4_CHECK", false); if (!bypassCheck) @@ -1081,7 +1081,7 @@ public: return false; } } - config.group = inputs[0].size[1] / umat_blobs[0].size[1]; + config.group = inputs[0].size[1] / (blobs.empty() ? umat_blobs[0].size[1] : blobs[0].size[1]); if (config.group < 1) // config.group == 0 causes div by zero in ocl4dnn code { CV_LOG_WARNING(NULL, "DNN/OpenCL: Unsupported config.group=" << config.group diff --git a/modules/dnn/src/opencl/gemm_buffer.cl b/modules/dnn/src/opencl/gemm_buffer.cl index b345983aee..70028b0eec 100644 --- a/modules/dnn/src/opencl/gemm_buffer.cl +++ b/modules/dnn/src/opencl/gemm_buffer.cl @@ -453,14 +453,14 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( int w; for(int b_tile = 0; b_tile < K; b_tile += SLM_BLOCK) { barrier(CLK_LOCAL_MEM_FENCE); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(0, K, local_index))), 0, (__local float *)(slm_brow + mad24(0, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(1, K, local_index))), 0, (__local float *)(slm_brow + mad24(1, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(2, K, local_index))), 0, (__local float *)(slm_brow + mad24(2, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(3, K, local_index))), 0, (__local float *)(slm_brow + mad24(3, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(4, K, local_index))), 0, (__local float *)(slm_brow + mad24(4, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(5, K, local_index))), 0, (__local float *)(slm_brow + mad24(5, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(6, K, local_index))), 0, (__local float *)(slm_brow + mad24(6, SLM_BLOCK, local_index))); - vstore4(vload4(0, (__global float *)(src1_read0 + mad24(7, K, local_index))), 0, (__local float *)(slm_brow + mad24(7, SLM_BLOCK, local_index))); + vstore8(vload8(0, src1_read0 + mad24(0, K, local_index)), 0, slm_brow + mad24(0, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(1, K, local_index)), 0, slm_brow + mad24(1, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(2, K, local_index)), 0, slm_brow + mad24(2, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(3, K, local_index)), 0, slm_brow + mad24(3, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(4, K, local_index)), 0, slm_brow + mad24(4, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(5, K, local_index)), 0, slm_brow + mad24(5, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(6, K, local_index)), 0, slm_brow + mad24(6, SLM_BLOCK, local_index)); + vstore8(vload8(0, src1_read0 + mad24(7, K, local_index)), 0, slm_brow + mad24(7, SLM_BLOCK, local_index)); barrier(CLK_LOCAL_MEM_FENCE); slm_brow0 = slm_brow + local_x * (TILE_K / 8); @@ -469,17 +469,17 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( while( w + TILE_K <= end_w ) { Dtype8 arow; - brow0 = as_half8(vload4(0, (__local float *)(slm_brow0 + 0 * SLM_BLOCK))); - brow1 = as_half8(vload4(0, (__local float *)(slm_brow0 + 1 * SLM_BLOCK))); - brow2 = as_half8(vload4(0, (__local float *)(slm_brow0 + 2 * SLM_BLOCK))); - brow3 = as_half8(vload4(0, (__local float *)(slm_brow0 + 3 * SLM_BLOCK))); - brow4 = as_half8(vload4(0, (__local float *)(slm_brow0 + 4 * SLM_BLOCK))); - brow5 = as_half8(vload4(0, (__local float *)(slm_brow0 + 5 * SLM_BLOCK))); - brow6 = as_half8(vload4(0, (__local float *)(slm_brow0 + 6 * SLM_BLOCK))); - brow7 = as_half8(vload4(0, (__local float *)(slm_brow0 + 7 * SLM_BLOCK))); + brow0 = vload8(0, slm_brow0 + 0 * SLM_BLOCK); + brow1 = vload8(0, slm_brow0 + 1 * SLM_BLOCK); + brow2 = vload8(0, slm_brow0 + 2 * SLM_BLOCK); + brow3 = vload8(0, slm_brow0 + 3 * SLM_BLOCK); + brow4 = vload8(0, slm_brow0 + 4 * SLM_BLOCK); + brow5 = vload8(0, slm_brow0 + 5 * SLM_BLOCK); + brow6 = vload8(0, slm_brow0 + 6 * SLM_BLOCK); + brow7 = vload8(0, slm_brow0 + 7 * SLM_BLOCK); #define MM_DOT_PRODUCT( _row, _dot ) \ - arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K))); \ + arow = vload8(0, src0_read + _row * K); \ _dot = mad( (Dtype8)(arow.s0), (Dtype8)(brow0.s0, brow1.s0, brow2.s0, brow3.s0, brow4.s0, brow5.s0, brow6.s0, brow7.s0), _dot ); \ _dot = mad( (Dtype8)(arow.s1), (Dtype8)(brow0.s1, brow1.s1, brow2.s1, brow3.s1, brow4.s1, brow5.s1, brow6.s1, brow7.s1), _dot ); \ _dot = mad( (Dtype8)(arow.s2), (Dtype8)(brow0.s2, brow1.s2, brow2.s2, brow3.s2, brow4.s2, brow5.s2, brow6.s2, brow7.s2), _dot ); \ @@ -510,7 +510,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( Dtype8 arow; #define READ_BROW(_brow, _row) \ - _brow = as_half8(vload4(0, (__local float *)(slm_brow0 + _row * SLM_BLOCK))); \ + _brow = vload8(0, slm_brow0 + _row * SLM_BLOCK); \ _brow.s0 = (mad24(local_x, 8, w) < K) ? _brow.s0 : 0.0f; \ _brow.s1 = (mad24(local_x, 8, w + 1) < K) ? _brow.s1 : 0.0f; \ _brow.s2 = (mad24(local_x, 8, w + 2) < K) ? _brow.s2 : 0.0f; \ @@ -532,7 +532,7 @@ __kernel void TEMPLATE(gemm_buffer_NT, Dtype)( #undef READ_BROW #define MM_DOT_PRODUCT( _row, _dot ) \ - arow = as_half8(vload4(0, (__global float *)(src0_read + _row * K))); \ + arow = vload8(0, src0_read + _row * K); \ arow.s0 = (mad24(local_x, 8, w) < K) ? arow.s0 : 0.0f; \ arow.s1 = (mad24(local_x, 8, w + 1) < K) ? arow.s1 : 0.0f; \ arow.s2 = (mad24(local_x, 8, w + 2) < K) ? arow.s2 : 0.0f; \ diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index da666ace01..9570355b4f 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -194,7 +194,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CPU_FP16) ? 1.5e-2 : 0.0; float iouDiff = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.262 : FLT_MIN; - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", inp, "detection_out", "", scoreDiff, iouDiff, detectionConfThresh); expectNoFallbacksFromIE(net); } @@ -237,7 +237,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) scoreDiff = 0.03; iouDiff = 0.08; } - processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", + processNet("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", "dnn/MobileNetSSD_deploy_19e3ec3.prototxt", inp, "detection_out", "", scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 708e353aac..3f5458a873 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -290,8 +290,8 @@ TEST(Reproducibility_SSD, Accuracy) typedef testing::TestWithParam > Reproducibility_MobileNet_SSD; TEST_P(Reproducibility_MobileNet_SSD, Accuracy) { - const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false); - const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false); + const string proto = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false); + const string model = findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false); Net net = readNetFromCaffe(proto, model); int backendId = get<0>(GetParam()); int targetId = get<1>(GetParam()); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index d8a16d3efa..3629f720fb 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -407,15 +407,16 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine( //////////////////////////////////////////////////////////////////////////////// // Fully-connected //////////////////////////////////////////////////////////////////////////////// -typedef TestWithParam > > FullyConnected; +typedef TestWithParam > > FullyConnected; TEST_P(FullyConnected, Accuracy) { - int inChannels = get<0>(GetParam()); - Size inSize = get<1>(GetParam()); - int outChannels = get<2>(GetParam()); - bool hasBias = get<3>(GetParam()); - Backend backendId = get<0>(get<4>(GetParam())); - Target targetId = get<1>(get<4>(GetParam())); + int batch = get<0>(GetParam()); + int inChannels = get<1>(GetParam()); + Size inSize = get<2>(GetParam()); + int outChannels = get<3>(GetParam()); + bool hasBias = get<4>(GetParam()); + Backend backendId = get<0>(get<5>(GetParam())); + Target targetId = get<1>(get<5>(GetParam())); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2021040000) if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (targetId == DNN_TARGET_OPENCL_FP16 || @@ -439,7 +440,7 @@ TEST_P(FullyConnected, Accuracy) lp.type = "InnerProduct"; lp.name = "testLayer"; - int sz[] = {1, inChannels, inSize.height, inSize.width}; + int sz[] = {batch, inChannels, inSize.height, inSize.width}; Mat input(4, &sz[0], CV_32F); double l1 = 0.0; @@ -467,6 +468,7 @@ TEST_P(FullyConnected, Accuracy) } INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine( +/*batch*/ Values(1, 2, 4, 8, 16), /*in channels*/ Values(3, 4), /*in size*/ Values(Size(5, 4), Size(4, 5), Size(1, 1)), /*out channels*/ Values(3, 4), diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp index 8b3cd01f29..caba112516 100644 --- a/modules/dnn/test/test_int8_layers.cpp +++ b/modules/dnn/test/test_int8_layers.cpp @@ -878,14 +878,14 @@ TEST_P(Test_Int8_nets, MobileNet_SSD) if (target == DNN_TARGET_OPENCL && !ocl::Device::getDefault().isIntel()) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); - Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy.prototxt", false), - findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false)); + Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.prototxt", false), + findDataFile("dnn/MobileNetSSD_deploy_19e3ec3.caffemodel", false)); Mat inp = imread(_tf("street.png")); Mat blob = blobFromImage(inp, 1.0 / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); - float confThreshold = FLT_MIN, scoreDiff = 0.059, iouDiff = 0.11; + float confThreshold = FLT_MIN, scoreDiff = 0.084, iouDiff = 0.43; testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); } diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index a19923bf28..59b51c4bc0 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -490,8 +490,8 @@ TEST_P(Test_Model, DetectionMobilenetSSD) refBoxes.emplace_back(left, top, width, height); } - std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false); - std::string config_file = _tf("MobileNetSSD_deploy.prototxt"); + std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false); + std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt"); Scalar mean = Scalar(127.5, 127.5, 127.5); double scale = 1.0 / 127.5; @@ -511,7 +511,7 @@ TEST_P(Test_Model, DetectionMobilenetSSD) } else if (target == DNN_TARGET_CUDA_FP16) { - scoreDiff = 0.0021; + scoreDiff = 0.0028; iouDiff = 1e-2; } float confThreshold = FLT_MIN; @@ -595,8 +595,8 @@ TEST_P(Test_Model, Detection_normalized) std::vector refConfidences = {0.999222f}; std::vector refBoxes = {Rect2d(0, 4, 227, 222)}; - std::string weights_file = _tf("MobileNetSSD_deploy.caffemodel", false); - std::string config_file = _tf("MobileNetSSD_deploy.prototxt"); + std::string weights_file = _tf("MobileNetSSD_deploy_19e3ec3.caffemodel", false); + std::string config_file = _tf("MobileNetSSD_deploy_19e3ec3.prototxt"); Scalar mean = Scalar(127.5, 127.5, 127.5); double scale = 1.0 / 127.5; From d792ebc5d2feb19f697260dc7ac923f27b173139 Mon Sep 17 00:00:00 2001 From: Sean McBride Date: Fri, 26 May 2023 07:57:31 -0400 Subject: [PATCH 21/57] Fixed buffer overrun; removed the last two uses of sprintf Fixed an off-by-1 buffer resize, the space for the null termination was forgotten. Prefer snprintf, which can never overflow (if given the right size). In one case I cheated and used strcpy, because I cannot figure out the buffer size at that point in the code. --- modules/core/src/persistence_xml.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/core/src/persistence_xml.cpp b/modules/core/src/persistence_xml.cpp index caba4f5bf0..6141fade2d 100644 --- a/modules/core/src/persistence_xml.cpp +++ b/modules/core/src/persistence_xml.cpp @@ -308,8 +308,8 @@ public: if( !multiline ) { - ptr = fs->resizeWriteBuffer( ptr, len + 9 ); - sprintf( ptr, "", comment ); + ptr = fs->resizeWriteBuffer( ptr, len + 5+4+1 ); + snprintf( ptr, len + 5+4+1, "", comment ); len = (int)strlen(ptr); } else @@ -344,7 +344,7 @@ public: fs->setBufferPtr(ptr); ptr = fs->flush(); } - sprintf( ptr, "-->" ); + strcpy( ptr, "-->" ); fs->setBufferPtr(ptr + 3); fs->flush(); } From f5a14532c22d45dfec4b3d91dc7caf46b5c940b0 Mon Sep 17 00:00:00 2001 From: autoantwort <41973254+autoantwort@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:34:19 +0200 Subject: [PATCH 22/57] Merge pull request #24167 from autoantwort:missing-include * add missing include * Apply CR --- modules/dnn/src/op_halide.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/dnn/src/op_halide.cpp b/modules/dnn/src/op_halide.cpp index 653de36146..db1a72278e 100644 --- a/modules/dnn/src/op_halide.cpp +++ b/modules/dnn/src/op_halide.cpp @@ -14,6 +14,7 @@ #include "halide_scheduler.hpp" #include +#include #endif // HAVE_HALIDE namespace cv { From 70a58d7198dc57c44b876e2f8c7ca56890ad55fc Mon Sep 17 00:00:00 2001 From: CSBVision Date: Thu, 17 Aug 2023 12:02:29 +0200 Subject: [PATCH 23/57] Use STRING instead of PATH to fix #24141 --- cmake/OpenCVDetectPython.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake index c93eb9f9a7..88a4341856 100644 --- a/cmake/OpenCVDetectPython.cmake +++ b/cmake/OpenCVDetectPython.cmake @@ -258,7 +258,7 @@ if(NOT ${found}) set(${include_path} "${_include_path}" CACHE INTERNAL "") set(${include_dir} "${_include_dir}" CACHE PATH "Python include dir") set(${include_dir2} "${_include_dir2}" CACHE PATH "Python include dir 2") - set(${packages_path} "${_packages_path}" CACHE PATH "Where to install the python packages.") + set(${packages_path} "${_packages_path}" CACHE STRING "Where to install the python packages.") set(${numpy_include_dirs} ${_numpy_include_dirs} CACHE PATH "Path to numpy headers") set(${numpy_version} "${_numpy_version}" CACHE INTERNAL "") endif() From d88ad46978b8a08b1796685d7fb4b99ef72ebc42 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 18 Aug 2023 15:10:14 +0300 Subject: [PATCH 24/57] Remove explitit transB attribute from MatMul perf test --- modules/dnn/perf/perf_layer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index 261bc5c3ca..3020dbea66 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -678,7 +678,6 @@ PERF_TEST_P_(Layer_FullyConnected, fc) lp.set("axis", input.dims - 1); lp.set("is_matmul", weights.dims > 2); lp.set("bias_term", false); - lp.set("transB", true); lp.set("num_output", (int)weights.total(0, weights.dims - 1)); lp.blobs.resize(1, weights); From a0debc3a9a4871d08bb8d07bad68df246c7207ab Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Wed, 23 Aug 2023 10:31:14 +0300 Subject: [PATCH 25/57] Enable OpenVINO max pooling with indices since 2022.1 --- modules/dnn/src/layers/pooling_layer.cpp | 7 ++++++- modules/dnn/test/test_tflite_importer.cpp | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 5caaa36ba0..1337657127 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -209,7 +209,8 @@ public: #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()); + return type != STOCHASTIC && kernel_size.size() > 1 && (kernel_size.size() != 3 || !isArmComputePlugin()) && + (!computeMaxIdx || INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1)); } #endif if (backendId == DNN_BACKEND_OPENCV) @@ -615,10 +616,14 @@ public: else if (type == MAX) { std::shared_ptr max_pool; if (computeMaxIdx) { +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) std::vector dilations(kernel_size.size(), 1); max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Strides(dilations), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), rounding_type, pad_type); +#else + CV_Error(Error::StsNotImplemented, "OpenVINO MaxPool with indices"); +#endif } else { max_pool = std::make_shared(ieInpNode, ngraph::Strides(strides), ngraph::Shape(pads_begin), ngraph::Shape(pads_end), ngraph::Shape(kernel_size), diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index 19b3f3a94a..beb586f126 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -128,6 +128,11 @@ TEST_P(Test_TFLite, max_unpooling) if (backend == DNN_BACKEND_CUDA) applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2022010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target != DNN_TARGET_CPU) { if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); From 81cc89a3ce1e92b24ee46bf95728efe5e6d0f29f Mon Sep 17 00:00:00 2001 From: Kumataro Date: Thu, 24 Aug 2023 04:53:11 +0900 Subject: [PATCH 26/57] Merge pull request #24179 from Kumataro:fix24145 * core:add OPENCV_IPP_MEAN/MINMAX/SUM option to enable IPP optimizations * fix: to use guard HAVE_IPP and ocv_append_source_file_compile_definitions() macro. * support OPENCV_IPP_ENABLE_ALL * add document for OPENCV_IPP_ENABLE_ALL * fix OPENCV_IPP_ENABLE_ALL comment --- .../config_reference.markdown | 11 ++++++++++ modules/core/CMakeLists.txt | 20 +++++++++++++++++++ modules/core/src/mean.dispatch.cpp | 4 ++++ modules/core/src/minmax.cpp | 2 ++ modules/core/src/sum.dispatch.cpp | 2 ++ modules/imgproc/CMakeLists.txt | 10 ++++++---- 6 files changed, 45 insertions(+), 4 deletions(-) diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 2528baf41d..4fd256dd93 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -224,6 +224,16 @@ Following options can be used to produce special builds with instrumentation or @see [Link time optimization](https://gcc.gnu.org/wiki/LinkTimeOptimization) @see [ThinLTO](https://clang.llvm.org/docs/ThinLTO.html) +## Enable IPP optimization + +Following options can be used to enables IPP optimizations for each functions but increases the size of the opencv library. All options are disabled by default. + +| Option | Functions | + roughly size | +| -------| --------- | -------------- | +| `OPENCV_IPP_GAUSSIAN_BLUR` | GaussianBlur() | +8Mb | +| `OPENCV_IPP_MEAN` | mean() / meanStdDev() | +0.2Mb | +| `OPENCV_IPP_MINMAX` | minMaxLoc() / minMaxIdx() | +0.2Mb | +| `OPENCV_IPP_SUM` | sum() | +0.1Mb | # Functional features and dependencies {#tutorial_config_reference_func} @@ -565,6 +575,7 @@ Following options can be used to change installation layout for common scenarios | ------ | ------- | ----------- | | `OPENCV_ENABLE_NONFREE` | _OFF_ | Some algorithms included in the library are known to be protected by patents and are disabled by default. | | `OPENCV_FORCE_3RDPARTY_BUILD`| _OFF_ | Enable all `BUILD_` options at once. | +| `OPENCV_IPP_ENABLE_ALL`| _OFF_ | Enable all `OPENCV_IPP_` options at once. | | `ENABLE_CCACHE` | _ON_ (on Unix-like platforms) | Enable [ccache](https://en.wikipedia.org/wiki/Ccache) auto-detection. This tool wraps compiler calls and caches results, can significantly improve re-compilation time. | | `ENABLE_PRECOMPILED_HEADERS` | _ON_ (for MSVC) | Enable precompiled headers support. Improves build time. | | `BUILD_DOCS` | _OFF_ | Enable documentation build (_doxygen_, _doxygen_cpp_, _doxygen_python_, _doxygen_javadoc_ targets). [Doxygen](http://www.doxygen.org/index.html) must be installed for C++ documentation build. Python and [BeautifulSoup4](https://en.wikipedia.org/wiki/Beautiful_Soup_(HTML_parser)) must be installed for Python documentation build. Javadoc and Ant must be installed for Java documentation build (part of Java SDK). | diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 1b3f574275..ba5b61ef5f 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -60,6 +60,26 @@ if(CV_TRACE AND HAVE_ITT) add_definitions(-DOPENCV_WITH_ITT=1) endif() +# https://github.com/opencv/opencv/issues/24145 +if(HAVE_IPP) + OCV_OPTION(OPENCV_IPP_ENABLE_ALL "Enable all OPENCV_IPP_ options at once" OFF) + OCV_OPTION(OPENCV_IPP_MEAN "Enable IPP optimizations for mean (+200Kb in binary size)" OPENCV_IPP_ENABLE_ALL) + OCV_OPTION(OPENCV_IPP_MINMAX "Enable IPP optimizations for minMaxLoc/minMaxIdx (+200Kb in binary size)" OPENCV_IPP_ENABLE_ALL) + OCV_OPTION(OPENCV_IPP_SUM "Enable IPP optimizations for sum (+100Kb in binary size)" OPENCV_IPP_ENABLE_ALL) + + if(OPENCV_IPP_MEAN) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/mean.dispatch.cpp "OPENCV_IPP_MEAN=1") + endif() + + if(OPENCV_IPP_MINMAX) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/minmax.cpp "OPENCV_IPP_MINMAX=1") + endif() + + if(OPENCV_IPP_SUM) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/sum.dispatch.cpp "OPENCV_IPP_SUM=1") + endif() +endif() + file(GLOB lib_cuda_hdrs "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cuda/*.hpp" "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cuda/*.h") diff --git a/modules/core/src/mean.dispatch.cpp b/modules/core/src/mean.dispatch.cpp index 6a5275ab43..0f94e5421a 100644 --- a/modules/core/src/mean.dispatch.cpp +++ b/modules/core/src/mean.dispatch.cpp @@ -8,20 +8,24 @@ #include "opencv2/core/openvx/ovx_defs.hpp" #include "stat.hpp" +#ifndef OPENCV_IPP_MEAN #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_MEAN #include "mean.simd.hpp" #include "mean.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content +#ifndef OPENCV_IPP_MEAN #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_MEAN namespace cv { diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 092c5e9234..bf2471a076 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -11,11 +11,13 @@ #include +#ifndef OPENCV_IPP_MINMAX #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_MINMAX #define IPP_DISABLE_MINMAXIDX_MANY_ROWS 1 // see Core_MinMaxIdx.rows_overflow test diff --git a/modules/core/src/sum.dispatch.cpp b/modules/core/src/sum.dispatch.cpp index a1f7d73868..fade948336 100644 --- a/modules/core/src/sum.dispatch.cpp +++ b/modules/core/src/sum.dispatch.cpp @@ -10,11 +10,13 @@ #include "sum.simd.hpp" #include "sum.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content +#ifndef OPENCV_IPP_SUM #undef HAVE_IPP #undef CV_IPP_RUN_FAST #define CV_IPP_RUN_FAST(f, ...) #undef CV_IPP_RUN #define CV_IPP_RUN(c, f, ...) +#endif // OPENCV_IPP_SUM namespace cv { diff --git a/modules/imgproc/CMakeLists.txt b/modules/imgproc/CMakeLists.txt index 8ee300c320..10aed6bedd 100644 --- a/modules/imgproc/CMakeLists.txt +++ b/modules/imgproc/CMakeLists.txt @@ -12,8 +12,10 @@ ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2) ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX) ocv_define_module(imgproc opencv_core WRAP java objc python js) -ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR) -option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF) -if(OPENCV_IPP_GAUSSIAN_BLUR) - ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1") +if(HAVE_IPP) + # OPENCV_IPP_ENABLE_ALL is defined in modules/core/CMakeList.txt + OCV_OPTION(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OPENCV_IPP_ENABLE_ALL) + if(OPENCV_IPP_GAUSSIAN_BLUR) + ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1") + endif() endif() From 588ddf1b181aa7243144b27d65fc7690fb89e344 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Fri, 25 Aug 2023 14:53:34 +0300 Subject: [PATCH 27/57] Merge pull request #24186 from dkurt:ts_fixture_constructor_skip Skip test on SkipTestException at fixture's constructor * Skip test on SkipTestException at fixture's constructor * Add warning supression * Skip Python tests if no test file found * Skip instances of test fixture with exception at SetUpTestCase * Skip test with exception at SetUp method * Try remove warning disable * Add CV_NORETURN * Remove FAIL assertion * Use findDataFile to throw Skip exception * Throw exception conditionally --- modules/core/test/test_misc.cpp | 36 ++++++++++++++++++++ modules/python/test/tests_common.py | 2 ++ modules/ts/include/opencv2/ts/ts_ext.hpp | 42 +++++++++++++++++++++--- modules/ts/src/ts_tags.cpp | 13 +++++--- 4 files changed, 85 insertions(+), 8 deletions(-) diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index 8ed0afe771..cb89dcf573 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -917,5 +917,41 @@ REGISTER_TYPED_TEST_CASE_P(Rect_Test, Overflows); typedef ::testing::Types RectTypes; INSTANTIATE_TYPED_TEST_CASE_P(Negative_Test, Rect_Test, RectTypes); +// Expected that SkipTestException thrown in the constructor should skip test but not fail +struct TestFixtureSkip: public ::testing::Test { + TestFixtureSkip(bool throwEx = true) { + if (throwEx) { + throw SkipTestException("Skip test at constructor"); + } + } +}; + +TEST_F(TestFixtureSkip, NoBodyRun) { + FAIL() << "Unreachable code called"; +} + +// Check no test body started in case of skip exception at static SetUpTestCase +struct TestSetUpTestCaseSkip: public ::testing::Test { + static void SetUpTestCase() { + throw SkipTestException("Skip test at SetUpTestCase"); + } +}; + +TEST_F(TestSetUpTestCaseSkip, NoBodyRun) { + FAIL() << "Unreachable code called"; +} +TEST_F(TestSetUpTestCaseSkip, NoBodyRun2) { + FAIL() << "Unreachable code called"; +} + +struct TestSetUpSkip: public ::testing::Test { + virtual void SetUp() { + throw SkipTestException("Skip test at SetUp"); + } +}; + +TEST_F(TestSetUpSkip, NoBodyRun) { + FAIL() << "Unreachable code called"; +} }} // namespace diff --git a/modules/python/test/tests_common.py b/modules/python/test/tests_common.py index ec49f46d0d..d673dd7b78 100644 --- a/modules/python/test/tests_common.py +++ b/modules/python/test/tests_common.py @@ -36,6 +36,8 @@ class NewOpenCVTests(unittest.TestCase): return candidate if required: self.fail('File ' + filename + ' not found') + else: + self.skipTest('File ' + filename + ' not found') return None diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp index efa4860510..4603dba4f7 100644 --- a/modules/ts/include/opencv2/ts/ts_ext.hpp +++ b/modules/ts/include/opencv2/ts/ts_ext.hpp @@ -47,6 +47,18 @@ bool checkBigDataTests(); } \ } \ +#define CV__TEST_SETUP_IMPL(parent_class) \ + { \ + try { \ + parent_class::SetUp(); \ + } catch (const cvtest::details::SkipTestExceptionBase& e) { \ + printf("[ SKIP ] %s\n", e.what()); \ + } \ + } + +struct DummyTest : public ::testing::Test { + virtual void TestBody() CV_OVERRIDE {} +}; #undef TEST #define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_ATTR, BODY_IMPL) \ @@ -60,6 +72,17 @@ bool checkBigDataTests(); GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ };\ + class test_case_name##test_name##_factory : public ::testing::internal::TestFactoryBase { \ + public:\ + virtual ::testing::Test* CreateTest() { \ + try { \ + return new GTEST_TEST_CLASS_NAME_(test_case_name, test_name); \ + } catch (const cvtest::details::SkipTestExceptionBase& e) { \ + printf("[ SKIP ] %s\n", e.what()); \ + return new DummyTest(); \ + } \ + } \ + };\ \ ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ ::test_info_ =\ @@ -69,8 +92,7 @@ bool checkBigDataTests(); (::testing::internal::GetTestTypeId()), \ parent_class::SetUpTestCase, \ parent_class::TearDownTestCase, \ - new ::testing::internal::TestFactoryImpl<\ - GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ + new test_case_name##test_name##_factory);\ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName() @@ -109,10 +131,22 @@ bool checkBigDataTests(); private:\ virtual void TestBody() CV_OVERRIDE;\ virtual void Body(); \ + virtual void SetUp() CV_OVERRIDE; \ static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_fixture, test_name));\ };\ + class test_fixture##test_name##_factory : public ::testing::internal::TestFactoryBase { \ + public:\ + virtual ::testing::Test* CreateTest() { \ + try { \ + return new GTEST_TEST_CLASS_NAME_(test_fixture, test_name); \ + } catch (const cvtest::details::SkipTestExceptionBase& e) { \ + printf("[ SKIP ] %s\n", e.what()); \ + return new DummyTest(); \ + } \ + } \ + };\ \ ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_fixture, test_name)\ ::test_info_ =\ @@ -122,9 +156,9 @@ bool checkBigDataTests(); (::testing::internal::GetTypeId()), \ test_fixture::SetUpTestCase, \ test_fixture::TearDownTestCase, \ - new ::testing::internal::TestFactoryImpl<\ - GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\ + new test_fixture##test_name##_factory);\ void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV__TEST_BODY_IMPL( #test_fixture "_" #test_name ) \ + void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::SetUp() CV__TEST_SETUP_IMPL(test_fixture) \ void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body() // Don't use directly diff --git a/modules/ts/src/ts_tags.cpp b/modules/ts/src/ts_tags.cpp index 8bed1b739f..21653e17ee 100644 --- a/modules/ts/src/ts_tags.cpp +++ b/modules/ts/src/ts_tags.cpp @@ -11,7 +11,7 @@ namespace cvtest { static bool printTestTag = false; static std::vector currentDirectTestTags, currentImpliedTestTags; -static std::vector skipped_tests; +static std::vector skipped_tests; static std::map& getTestTagsSkipCounts() { @@ -26,7 +26,7 @@ static std::map& getTestTagsSkipExtraCounts() void testTagIncreaseSkipCount(const std::string& tag, bool isMain, bool appendSkipTests) { if (appendSkipTests) - skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info()); + skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case()); std::map& counts = isMain ? getTestTagsSkipCounts() : getTestTagsSkipExtraCounts(); std::map::iterator i = counts.find(tag); if (i == counts.end()) @@ -280,6 +280,11 @@ static bool isTestTagSkipped(const std::string& testTag, CV_OUT std::string& ski void checkTestTags() { + if (std::find(skipped_tests.begin(), skipped_tests.end(), + ::testing::UnitTest::GetInstance()->current_test_case()) != skipped_tests.end()) { + throw details::SkipTestExceptionBase(false); + } + std::string skipTag; const std::vector& testTags = currentDirectTestTags; { @@ -307,7 +312,7 @@ void checkTestTags() } if (found != tags.size()) { - skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info()); + skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case()); throw details::SkipTestExceptionBase("Test tags don't pass required tags list (--test_tag parameter)", true); } } @@ -341,7 +346,7 @@ void checkTestTags() if (!skip_message.empty()) { - skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info()); + skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case()); throw details::SkipTestExceptionBase(skip_message, true); } } From 4b1a4bdb49a6eb45d868eaf2de3fc2e8d34f3ece Mon Sep 17 00:00:00 2001 From: Kumataro Date: Sun, 27 Aug 2023 19:49:37 +0900 Subject: [PATCH 28/57] imgcodecs: fix libtiff homepage --- 3rdparty/readme.txt | 4 +++- modules/imgcodecs/src/grfmt_tiff.cpp | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/3rdparty/readme.txt b/3rdparty/readme.txt index c3068521e3..0e6ce1e05e 100644 --- a/3rdparty/readme.txt +++ b/3rdparty/readme.txt @@ -39,7 +39,9 @@ libspng Portable Network Graphics library. libtiff Tag Image File Format (TIFF) Software Copyright (c) 1988-1997 Sam Leffler Copyright (c) 1991-1997 Silicon Graphics, Inc. - See libtiff home page http://www.libtiff.org/ + See libtiff home page #1 http://www.simplesystems.org/libtiff/ + #2 https://libtiff.gitlab.io/libtiff/ + #3 http://libtiff.maptools.org/ for details and links to the source code WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs. diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index ed21f3f14c..4febee36db 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -245,7 +245,7 @@ bool TiffDecoder::readHeader() if (!tif) { // TIFFOpen() mode flags are different to fopen(). A 'b' in mode "rb" has no effect when reading. - // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html + // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html if ( !m_buf.empty() ) { m_buf_pos = 0; @@ -1118,7 +1118,7 @@ public: TIFF* open () { // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode. - // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html + // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html return TIFFClientOpen( "", "w", reinterpret_cast(this), &TiffEncoderBufHelper::read, &TiffEncoderBufHelper::write, &TiffEncoderBufHelper::seek, &TiffEncoderBufHelper::close, &TiffEncoderBufHelper::size, @@ -1200,7 +1200,7 @@ static bool readParam(const std::vector& params, int key, int& value) bool TiffEncoder::writeLibTiff( const std::vector& img_vec, const std::vector& params) { // do NOT put "wb" as the mode, because the b means "big endian" mode, not "binary" mode. - // http://www.remotesensing.org/libtiff/man/TIFFOpen.3tiff.html + // http://www.simplesystems.org/libtiff/functions/TIFFOpen.html TIFF* tif = NULL; TiffEncoderBufHelper buf_helper(m_buf); From 8a1b998b2ba3993919deafd137a33f55fce2c962 Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Sun, 27 Aug 2023 19:46:24 +0200 Subject: [PATCH 29/57] Merge pull request #24194 from vrabaud:compilation_fix * Fix compilation when forcing later C++. * Remove random_shuffle. * Remove random_shuffle. --- modules/objdetect/test/test_qrcode_encode.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/modules/objdetect/test/test_qrcode_encode.cpp b/modules/objdetect/test/test_qrcode_encode.cpp index 14900c3078..1005793269 100644 --- a/modules/objdetect/test/test_qrcode_encode.cpp +++ b/modules/objdetect/test/test_qrcode_encode.cpp @@ -264,7 +264,8 @@ TEST(Objdetect_QRCode_Encode_Decode, regression) int true_capacity = establishCapacity(mode, version, cur_capacity); std::string input_info = symbol_set; - std::random_shuffle(input_info.begin(),input_info.end()); + std::mt19937 rand_gen {1}; + std::shuffle(input_info.begin(), input_info.end(), rand_gen); int count = 0; if((int)input_info.length() > true_capacity) { @@ -390,15 +391,8 @@ TEST(Objdetect_QRCode_Encode_Decode_Structured_Append, DISABLED_regression) std::string symbol_set = config["symbols_set"]; std::string input_info = symbol_set; -#if defined CV_CXX11 - // std::random_shuffle is deprecated since C++11 and removed in C++17. - // Use manually constructed RNG with a fixed seed and std::shuffle instead. std::mt19937 rand_gen {1}; std::shuffle(input_info.begin(), input_info.end(), rand_gen); -#else - SeededRandFunctor<1> rand_gen; - std::random_shuffle(input_info.begin(), input_info.end(), rand_gen); -#endif for (int j = min_stuctures_num; j < max_stuctures_num; j++) { QRCodeEncoder::Params params; From c20febdbb04064ba23ea8fa83bde1b065932a94c Mon Sep 17 00:00:00 2001 From: Sam James Date: Tue, 29 Aug 2023 02:56:55 +0100 Subject: [PATCH 30/57] Fix compilation on arm64 with FP16 when disabled If building with -mcpu=native or any other setting which implies the current CPU has FP16 but with intrinsics disabled, we mistakenly try to use it even though convolution.hpp conditionally defines it correctly based on whether we should *use it*. convolution.cpp on the other hand was mismatched and trying to use it if the CPU supported it, even if not enabled in the build system. Make the guards match. Bug: https://bugs.gentoo.org/913031 Signed-off-by: Sam James --- modules/dnn/src/layers/cpu_kernels/convolution.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp index 22ef9a8575..5effdc2d0c 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.hpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.hpp @@ -14,7 +14,7 @@ #define CONV_NR_FP32 28 // The FP16 can only be supported by ARM64 and with FP16 FMA supported. -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC // check FP16 FMA. +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && CV_FP16 // check FP16 FMA. #define CONV_ARM_FP16 1 #endif From a308dfca9856574d37abe7628b965e29861fb105 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Wed, 30 Aug 2023 14:53:59 +0800 Subject: [PATCH 31/57] core: add broadcast (#23965) * add broadcast_to with tests * change name * fix test * fix implicit type conversion * replace type of shape with InputArray * add perf test * add perf tests which takes care of axis * v2 from ficus expand * rename to broadcast * use randu in place of declare * doc improvement; smaller scale in perf * capture get_index by reference --- modules/core/include/opencv2/core.hpp | 7 + modules/core/perf/perf_arithm.cpp | 27 ++++ modules/core/src/matrix_transform.cpp | 218 ++++++++++++++++++++++++++ modules/core/test/test_arithm.cpp | 133 ++++++++++++++++ 4 files changed, 385 insertions(+) diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index d9a21701f2..7b5108fcc4 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -1118,6 +1118,13 @@ CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode); */ CV_EXPORTS_W void flipND(InputArray src, OutputArray dst, int axis); +/** @brief Broadcast the given Mat to the given shape. + * @param src input array + * @param shape target shape. Should be a list of CV_32S numbers. Note that negative values are not supported. + * @param dst output array that has the given shape + */ +CV_EXPORTS_W void broadcast(InputArray src, InputArray shape, OutputArray dst); + enum RotateFlags { ROTATE_90_CLOCKWISE = 0, //!, perf::MatType, std::vector>>; typedef Size_MatType BinaryOpTest; +PERF_TEST_P_(BroadcastTest, basic) +{ + std::vector shape_src = get<0>(GetParam()); + int dt_type = get<1>(GetParam()); + std::vector shape_dst = get<2>(GetParam()); + + cv::Mat src(static_cast(shape_src.size()), shape_src.data(), dt_type); + cv::Mat dst(static_cast(shape_dst.size()), shape_dst.data(), dt_type); + + cv::randu(src, -1.f, 1.f); + + TEST_CYCLE() cv::broadcast(src, shape_dst, dst); + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/ , BroadcastTest, + testing::Combine( + testing::Values(std::vector{1, 100, 800}, + std::vector{10, 1, 800}, + std::vector{10, 100, 1}), + testing::Values(CV_32FC1), + testing::Values(std::vector{10, 100, 800}) + ) +); + PERF_TEST_P_(BinaryOpTest, min) { Size sz = get<0>(GetParam()); diff --git a/modules/core/src/matrix_transform.cpp b/modules/core/src/matrix_transform.cpp index 744ee69b0d..43bf9be057 100644 --- a/modules/core/src/matrix_transform.cpp +++ b/modules/core/src/matrix_transform.cpp @@ -7,6 +7,7 @@ #include "opencv2/core/detail/dispatch_helper.impl.hpp" #include // std::swap_ranges +#include // std::accumulate namespace cv { @@ -857,6 +858,223 @@ void flipND(InputArray _src, OutputArray _dst, int _axis) flipNDImpl(dst.ptr(), dst.size.p, dst.step.p, axis); } +/* + This function first prepends 1 to each tensor shape to have a common max_ndims dimension, then flatten non-broadcast dimensions. +*/ +static bool _flatten_for_broadcast(int narrays, int max_ndims, const int* ndims, const int** orig_shape, + int** flatten_shape, size_t** flatten_step) { + int i, j, k; + + // step 1. + // * make all inputs and the output max_ndims-dimensional. + // * compute proper step's + for (i = max_ndims - 1; i >= 0; i-- ) { + for (k = 0; k < narrays; k++) { + j = ndims[k] - (max_ndims - i); + int sz_i = j >= 0 ? orig_shape[k][j] : 1; + size_t st_i = i == max_ndims - 1 ? 1 : flatten_step[k][i+1] * flatten_shape[k][i+1]; + flatten_shape[k][i] = sz_i; + flatten_step[k][i] = st_i; + if (flatten_shape[k][i] == 0) + return false; + } + } + + // step 2. Let's do the flattening first, + // since we'd need proper values of steps to check continuity. + // this loop is probably the most tricky part + // in the whole implementation of broadcasting. + j = max_ndims-1; + for (i = j - 1; i >= 0; i--) { + bool all_contiguous = true, all_scalars = true, all_consistent = true; + for(k = 0; k < narrays; k++) { + size_t st = flatten_step[k][j] * flatten_shape[k][j]; + bool prev_scalar = flatten_shape[k][j] == 1; + bool scalar = flatten_shape[k][i] == 1; + all_contiguous = all_contiguous && (st == flatten_step[k][i]); + all_scalars = all_scalars && scalar; + all_consistent = all_consistent && (scalar == prev_scalar); + } + if (all_contiguous && (all_consistent || all_scalars)) { + for(k = 0; k < narrays; k++) + flatten_shape[k][j] *= flatten_shape[k][i]; + } else { + j--; + if (i < j) { + for(k = 0; k < narrays; k++) { + flatten_shape[k][j] = flatten_shape[k][i]; + flatten_step[k][j] = flatten_step[k][i]; + } + } + } + } + + // step 3. Set some step's to 0's. + for (i = max_ndims-1; i >= j; i--) { + for (k = 0; k < narrays; k++) + flatten_step[k][i] = flatten_shape[k][i] == 1 ? 0 : flatten_step[k][i]; + } + for (; i >= 0; i--) { + for (k = 0; k < narrays; k++) { + flatten_step[k][i] = 0; + flatten_shape[k][i] = 1; + } + } + return true; +} + +void broadcast(InputArray _src, InputArray _shape, OutputArray _dst) { + CV_INSTRUMENT_REGION(); + + Mat src = _src.getMat(); + CV_CheckTrue(src.isContinuous(), "broadcast: input array must be contiguous"); + CV_CheckChannelsEQ(src.channels(), 1, "broadcast: input array must be single channel"); + + Mat shape = _shape.getMat(); + CV_CheckTypeEQ(shape.type(), CV_32S, "broadcast: target shape must be of type int32"); + const auto dims_shape = static_cast(shape.total()); + const auto *ptr_shape = shape.ptr(); + + // check valid shape, 1D/0D Mat would fail in the following checks + const auto dims_src = src.dims; + CV_CheckLE(dims_src, dims_shape, + "broadcast: dimension of input array must be less than or equal to dimension of target shape"); + std::vector shape_src{src.size.p, src.size.p + dims_src}; + if (shape_src.size() < static_cast(dims_shape)) { + shape_src.insert(shape_src.begin(), dims_shape - shape_src.size(), 1); + } + for (int i = 0; i < static_cast(shape_src.size()); ++i) { + const auto *shape_target = ptr_shape; + if (shape_src[i] != 1) { + CV_CheckEQ(shape_src[i], shape_target[i], "target shape must be equal to input shape or 1"); + } + } + + // impl + _dst.create(dims_shape, shape.ptr(), src.type()); + Mat dst = _dst.getMat(); + std::vector is_same_shape(dims_shape, 0); + for (int i = 0; i < static_cast(shape_src.size()); ++i) { + if (shape_src[i] == ptr_shape[i]) { + is_same_shape[i] = 1; + } + } + // copy if same shape + if (std::accumulate(is_same_shape.begin(), is_same_shape.end(), 1, std::multiplies()) != 0) { + const auto *p_src = src.ptr(); + auto *p_dst = dst.ptr(); + std::memcpy(p_dst, p_src, dst.total() * dst.elemSize()); + return; + } + // other cases + int max_ndims = std::max(dims_src, dims_shape); + const int all_ndims[2] = {src.dims, dst.dims}; + const int* orig_shapes[2] = {src.size.p, dst.size.p}; + cv::AutoBuffer buff(max_ndims * 4); + int* flatten_shapes[2] = {(int*)buff.data(), (int*)(buff.data() + max_ndims)}; + size_t* flatten_steps[2] = {(size_t*)(buff.data() + 2 * max_ndims), (size_t*)(buff.data() + 3 * max_ndims)}; + if (_flatten_for_broadcast(2, max_ndims, all_ndims, orig_shapes, flatten_shapes, flatten_steps)) { + size_t src_dp = flatten_steps[0][max_ndims - 1]; + size_t dst_dp = flatten_steps[1][max_ndims - 1]; + CV_Assert(dst_dp == 1); + CV_Assert(max_ndims >= 2); // >= 3? + size_t rowstep_src = flatten_steps[0][max_ndims - 2]; + size_t rowstep_dst = flatten_steps[1][max_ndims - 2]; + const char* ptr_src = src.ptr(); + char* ptr_dst = dst.ptr(); + size_t esz = src.elemSize(); + int nrows = flatten_shapes[1][max_ndims - 2]; + int ncols = flatten_shapes[1][max_ndims - 1]; + int nplanes = 1; + CV_Check(esz, esz == 1 || esz == 2 || esz == 4 || esz == 8, "broadcast: not supported data type"); + + for (int k = 0; k < max_ndims - 2; k++) { + nplanes *= flatten_shapes[1][k]; + } + for (int plane_idx = 0; plane_idx < nplanes; plane_idx++) { + size_t offset_src = 0, offset_dst = 0; + size_t idx = (size_t)plane_idx; + for (int k = max_ndims - 3; k >= 0; k--) { + size_t prev_idx = idx / flatten_shapes[1][k]; + size_t i_k = (int)(idx - prev_idx * flatten_shapes[1][k]); + offset_src += i_k * flatten_steps[0][k]; + offset_dst += i_k * flatten_steps[1][k]; + idx = prev_idx; + } + + #define OPENCV_CORE_BROADCAST_LOOP(_Tp) \ + for (int i = 0; i < nrows; i++) { \ + const _Tp *ptr_src_ = (const _Tp*)ptr_src + offset_src + rowstep_src * i; \ + _Tp *ptr_dst_ = (_Tp*)ptr_dst + offset_dst + rowstep_dst * i; \ + if (src_dp == 1) { \ + for (int j = 0; j < ncols; j++) { \ + ptr_dst_[j] = ptr_src_[j]; \ + } \ + } else { \ + _Tp x = *ptr_src_; \ + for (int j = 0; j < ncols; j++) { \ + ptr_dst_[j] = x; \ + } \ + } \ + } + + if (esz == 1) { + OPENCV_CORE_BROADCAST_LOOP(int8_t); + } else if (esz == 2) { + OPENCV_CORE_BROADCAST_LOOP(int16_t); + } else if (esz == 4) { + OPENCV_CORE_BROADCAST_LOOP(int32_t); + } else if (esz == 8) { + OPENCV_CORE_BROADCAST_LOOP(int64_t); + } else { + CV_Error(cv::Error::StsNotImplemented, ""); + } + #undef OPENCV_CORE_BROADCAST_LOOP + } + } else { + // initial copy (src to dst) + std::vector step_src{src.step.p, src.step.p + dims_src}; + if (step_src.size() < static_cast(dims_shape)) { + step_src.insert(step_src.begin(), dims_shape - step_src.size(), step_src[0]); + } + for (size_t i = 0; i < src.total(); ++i) { + size_t t = i; + size_t src_offset = 0, dst_offset = 0; + for (int j = static_cast(shape_src.size() - 1); j >= 0; --j) { + size_t idx = t / shape_src[j]; + size_t offset = static_cast(t - idx * shape_src[j]); + src_offset += offset * step_src[j]; + dst_offset += offset * dst.step[j]; + t = idx; + } + const auto *p_src = src.ptr(); + auto *p_dst = dst.ptr(); + std::memcpy(p_dst + dst_offset, p_src + src_offset, dst.elemSize()); + } + // broadcast copy (dst inplace) + std::vector cumulative_shape(dims_shape, 1); + int total = static_cast(dst.total()); + for (int i = dims_shape - 1; i >= 0; --i) { + cumulative_shape[i] = static_cast(total / ptr_shape[i]); + total = cumulative_shape[i]; + } + for (int i = dims_shape - 1; i >= 0; --i) { + if (is_same_shape[i] == 1) { + continue; + } + auto step = dst.step[i]; + auto *p_dst = dst.ptr(); + for (int j = 0; j < cumulative_shape[i]; j++) { + for (int k = 0; k < ptr_shape[i] - 1; k++) { + std::memcpy(p_dst + step, p_dst, step); + p_dst += step; + } + p_dst += step; + } + } + } +} + void rotate(InputArray _src, OutputArray _dst, int rotateMode) { CV_Assert(_src.dims() <= 2); diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index ea9cda56be..848a2e8b6a 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2268,6 +2268,139 @@ INSTANTIATE_TEST_CASE_P(Arithm, FlipND, testing::Combine( testing::Values(perf::MatType(CV_8UC1), CV_32FC1) )); +TEST(BroadcastTo, basic) { + std::vector shape_src{2, 1}; + std::vector data_src{1, 2}; + Mat src(static_cast(shape_src.size()), shape_src.data(), CV_32SC1, data_src.data()); + + auto get_index = [](const std::vector& shape, size_t cnt) { + std::vector index(shape.size()); + size_t t = cnt; + for (int i = static_cast(shape.size() - 1); i >= 0; --i) { + size_t idx = t / shape[i]; + index[i] = static_cast(t - idx * shape[i]); + t = idx; + } + return index; + }; + + auto fn_verify = [&get_index](const Mat& ref, const Mat& res) { + // check type + EXPECT_EQ(ref.type(), res.type()); + // check shape + EXPECT_EQ(ref.dims, res.dims); + for (int i = 0; i < ref.dims; ++i) { + EXPECT_EQ(ref.size[i], res.size[i]); + } + // check value + std::vector shape{ref.size.p, ref.size.p + ref.dims}; + for (size_t i = 0; i < ref.total(); ++i) { + auto index = get_index(shape, i); + switch (ref.type()) { + case CV_32SC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + case CV_8UC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + case CV_32FC1: { + ASSERT_EQ(ref.at(index.data()), res.at(index.data())); + } break; + default: FAIL() << "Unsupported type: " << ref.type(); + } + } + }; + + { + std::vector shape{4, 2, 3}; + std::vector data_ref{ + 1, 1, 1, // [0, 0, :] + 2, 2, 2, // [0, 1, :] + 1, 1, 1, // [1, 0, :] + 2, 2, 2, // [1, 1, :] + 1, 1, 1, // [2, 0, :] + 2, 2, 2, // [2, 1, :] + 1, 1, 1, // [3, 0, :] + 2, 2, 2 // [3, 1, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), src.type(), data_ref.data()); + Mat dst; + broadcast(src, shape, dst); + fn_verify(ref, dst); + } + + { + Mat _src; + src.convertTo(_src, CV_8U); + std::vector shape{4, 2, 3}; + std::vector data_ref{ + 1, 1, 1, // [0, 0, :] + 2, 2, 2, // [0, 1, :] + 1, 1, 1, // [1, 0, :] + 2, 2, 2, // [1, 1, :] + 1, 1, 1, // [2, 0, :] + 2, 2, 2, // [2, 1, :] + 1, 1, 1, // [3, 0, :] + 2, 2, 2 // [3, 1, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } + + { + Mat _src; + src.convertTo(_src, CV_32F); + std::vector shape{1, 1, 2, 1}; // {2, 1} + std::vector data_ref{ + 1.f, // [0, 0, 0, 0] + 2.f, // [0, 0, 1, 0] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } + + { + std::vector _shape_src{2, 3, 4}; + std::vector _data_src{ + 1.f, 2.f, 3.f, 4.f, // [0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [1, 2, :] + }; + Mat _src(static_cast(_shape_src.size()), _shape_src.data(), CV_32FC1, _data_src.data()); + + std::vector shape{2, 1, 2, 3, 4}; + std::vector data_ref{ + 1.f, 2.f, 3.f, 4.f, // [0, 0, 0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [0, 0, 0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [0, 0, 0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [0, 0, 1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [0, 0, 1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [0, 0, 1, 2, :] + + 1.f, 2.f, 3.f, 4.f, // [1, 0, 0, 0, :] + 2.f, 3.f, 4.f, 5.f, // [1, 0, 0, 1, :] + 3.f, 4.f, 5.f, 6.f, // [1, 0, 0, 2, :] + + 4.f, 5.f, 6.f, 7.f, // [1, 0, 1, 0, :] + 5.f, 6.f, 7.f, 8.f, // [1, 0, 1, 1, :] + 6.f, 7.f, 8.f, 9.f, // [1, 0, 1, 2, :] + }; + Mat ref(static_cast(shape.size()), shape.data(), _src.type(), data_ref.data()); + Mat dst; + broadcast(_src, shape, dst); + fn_verify(ref, dst); + } +} + TEST(Core_minMaxIdx, regression_9207_2) { const int rows = 13; From 72bb8bb73cb97e9049a5dede776f47f39d29703c Mon Sep 17 00:00:00 2001 From: Kumataro Date: Sat, 2 Sep 2023 10:03:59 +0000 Subject: [PATCH 32/57] core: arm64: v_round() works with round to nearest, ties to even. --- modules/core/include/opencv2/core/hal/intrin_neon.hpp | 4 ++-- modules/core/test/test_intrin_utils.hpp | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 6f8973231b..14eb180819 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1997,12 +1997,12 @@ inline v_int32x4 v_trunc(const v_float32x4& a) inline v_int32x4 v_round(const v_float64x2& a) { static const int32x2_t zero = vdup_n_s32(0); - return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtnq_s64_f64(a.val)), zero)); } inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) { - return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), vmovn_s64(vcvtaq_s64_f64(b.val)))); + return v_int32x4(vcombine_s32(vmovn_s64(vcvtnq_s64_f64(a.val)), vmovn_s64(vcvtnq_s64_f64(b.val)))); } inline v_int32x4 v_floor(const v_float64x2& a) diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 481e6bb1f2..1ece6de82f 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1475,12 +1475,15 @@ template struct TheTest TheTest & test_float_math() { typedef typename V_RegTraits::round_reg Ri; - Data data1, data2, data3; + Data data1, data1_border, data2, data3; + // See https://github.com/opencv/opencv/issues/24213 + data1_border *= 0.5; data1 *= 1.1; data2 += 10; - R a1 = data1, a2 = data2, a3 = data3; + R a1 = data1, a1_border = data1_border, a2 = data2, a3 = data3; Data resB = v_round(a1), + resB_border = v_round(a1_border), resC = v_trunc(a1), resD = v_floor(a1), resE = v_ceil(a1); @@ -1493,6 +1496,7 @@ template struct TheTest { SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_EQ(cvRound(data1[i]), resB[i]); + EXPECT_EQ(cvRound(data1_border[i]), resB_border[i]); EXPECT_EQ((typename VTraits::lane_type)data1[i], resC[i]); EXPECT_EQ(cvFloor(data1[i]), resD[i]); EXPECT_EQ(cvCeil(data1[i]), resE[i]); From d19fc1264b4a05a7933361e57e0a301c54c1a661 Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Mon, 4 Sep 2023 11:48:53 +0200 Subject: [PATCH 33/57] Merge pull request #24178 from dmatveev:dm/streaming_queue G-API: Introduce a Queue Source #24178 - Added a new IStreamSource class: in fact, a wrapper over a concurrent queue; - Added minimal example on how it can be used; - Extended IStreamSource with optional "halt" interface to break the blocking calls in the emitter threads when required to stop. - Introduced a QueueInput class which allows to pass the whole graph's input vector at once. In fact it is a thin wrapper atop of individual Queue Sources. There is a hidden trap found with our type system as described in https://github.com/orgs/g-api-org/discussions/2 While it works even in this form, it should be addressed somewhere in the 5.0 timeframe. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/gapi/CMakeLists.txt | 3 + .../include/opencv2/gapi/gtype_traits.hpp | 6 +- .../opencv2/gapi/streaming/queue_source.hpp | 67 +++++++++ .../include/opencv2/gapi/streaming/source.hpp | 7 +- modules/gapi/src/compiler/gislandmodel.hpp | 1 + .../gapi/src/executor/gstreamingexecutor.cpp | 13 ++ modules/gapi/src/streaming/queue_source.cpp | 98 ++++++++++++++ .../gapi_streaming_queue_source_tests.cpp | 127 ++++++++++++++++++ 8 files changed, 319 insertions(+), 3 deletions(-) create mode 100644 modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp create mode 100644 modules/gapi/src/streaming/queue_source.cpp create mode 100644 modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index e30cb77e9e..2caeb02ae2 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -190,6 +190,9 @@ set(gapi_srcs src/backends/ov/bindings_ov.cpp src/backends/python/gpythonbackend.cpp + # Queue Streaming source + src/streaming/queue_source.cpp + # OpenVPL Streaming source src/streaming/onevpl/source.cpp src/streaming/onevpl/source_priv.cpp diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp index b56175788f..a1703a52cb 100644 --- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp +++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp @@ -141,8 +141,10 @@ namespace detail template struct GTypeOf > { using type = cv::GArray; }; template struct GTypeOf { using type = cv::GOpaque;}; template<> struct GTypeOf { using type = cv::GFrame; }; - // FIXME: This is not quite correct since IStreamSource may produce not only Mat but also Scalar - // and vector data. TODO: Extend the type dispatching on these types too. + + // FIXME: This is not quite correct since IStreamSource may + // produce not only Mat but also MediaFrame, Scalar and vector + // data. TODO: Extend the type dispatching on these types too. template<> struct GTypeOf { using type = cv::GMat;}; template using g_type_of_t = typename GTypeOf::type; diff --git a/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp b/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp new file mode 100644 index 0000000000..bd385ed16e --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/queue_source.hpp @@ -0,0 +1,67 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_QUEUE_SOURCE_HPP +#define OPENCV_GAPI_STREAMING_QUEUE_SOURCE_HPP + +#include // shared_ptr +#include // is_base_of + +#include // GRunArgs +#include // GMetaArg + all descr_of +#include // IStreamSource + +namespace cv { +namespace gapi { +namespace wip { +struct Data; // fwd-declare to avoid circular? header dependencies + +class GAPI_EXPORTS QueueSourceBase: public cv::gapi::wip::IStreamSource { + class Priv; + std::shared_ptr m_priv; + // FIXME: Need to understand how it works with IStreamSource's shared_from_this + // Can we avoid having too many shared_ptrs here? + +public: + explicit QueueSourceBase(const cv::GMetaArg &m); + void push(Data &&data); + virtual bool pull(Data &data) override; + virtual void halt() override; + virtual GMetaArg descr_of() const override; + virtual ~QueueSourceBase() = default; +}; + +/** + * @brief Queued streaming pipeline source. + * + */ +template +class QueueSource final: public QueueSourceBase +{ +public: + using Meta = decltype(cv::descr_of(T{})); + explicit QueueSource(Meta m) : QueueSourceBase(GMetaArg{m}) { + } + void push(T t) { + QueueSourceBase::push(Data{t}); + } +}; + +class GAPI_EXPORTS QueueInput { + std::vector > m_sources; + +public: + explicit QueueInput(const cv::GMetaArgs &args); + + void push(cv::GRunArgs &&ins); + operator cv::GRunArgs(); +}; + +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_STREAMING_SOURCE_HPP diff --git a/modules/gapi/include/opencv2/gapi/streaming/source.hpp b/modules/gapi/include/opencv2/gapi/streaming/source.hpp index 6597cad8f8..267469ad1b 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/source.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/source.hpp @@ -16,7 +16,7 @@ namespace cv { namespace gapi { namespace wip { - struct Data; // "forward-declaration" of GRunArg +struct Data; // forward-declaration of Data to avoid circular dependencies /** * @brief Abstract streaming pipeline source. @@ -43,6 +43,11 @@ public: Ptr ptr() { return shared_from_this(); } virtual bool pull(Data &data) = 0; virtual GMetaArg descr_of() const = 0; + virtual void halt() { + // Do nothing by default to maintain compatibility with the existing sources... + // In fact needs to be decorated atop of the child classes to maintain the behavior + // FIXME: Make it mandatory in OpenCV 5.0 + }; virtual ~IStreamSource() = default; }; diff --git a/modules/gapi/src/compiler/gislandmodel.hpp b/modules/gapi/src/compiler/gislandmodel.hpp index 3a1a8d5ab9..ade13a6f33 100644 --- a/modules/gapi/src/compiler/gislandmodel.hpp +++ b/modules/gapi/src/compiler/gislandmodel.hpp @@ -192,6 +192,7 @@ class GIslandEmitter public: // Obtain next value from the emitter virtual bool pull(GRunArg &) = 0; + virtual void halt() = 0; virtual ~GIslandEmitter() = default; }; diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index 124b27f39c..6a397faca6 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -41,6 +41,10 @@ using namespace cv::gimpl::stream; class VideoEmitter final: public cv::gimpl::GIslandEmitter { cv::gapi::wip::IStreamSource::Ptr src; + virtual void halt() override { + src->halt(); + } + virtual bool pull(cv::GRunArg &arg) override { // FIXME: probably we can maintain a pool of (then) pre-allocated // buffers to avoid runtime allocations. @@ -62,6 +66,10 @@ public: class ConstEmitter final: public cv::gimpl::GIslandEmitter { cv::GRunArg m_arg; + virtual void halt() override { + // Not used here, but in fact can be used. + } + virtual bool pull(cv::GRunArg &arg) override { arg = const_cast(m_arg); // FIXME: variant workaround return true; @@ -1918,6 +1926,11 @@ void cv::gimpl::GStreamingExecutor::stop() for (auto &q : m_emitter_queues) { q.push(stream::Cmd{stream::Stop{}}); } + // Also kindly ask emitter object to halt to break the blocking src->pull() + // loop + for (auto &nh : m_emitters) { + m_gim.metadata(nh).get().object->halt(); + } // Pull messages from the final queue to ensure completion Cmd cmd; diff --git a/modules/gapi/src/streaming/queue_source.cpp b/modules/gapi/src/streaming/queue_source.cpp new file mode 100644 index 0000000000..59fde09c44 --- /dev/null +++ b/modules/gapi/src/streaming/queue_source.cpp @@ -0,0 +1,98 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + +#include +#include + +#include + +#include +#include + +#include "executor/conc_queue.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +class QueueSourceBase::Priv { +public: + explicit Priv(const cv::GMetaArg &meta) { + m = meta; + halted = false; + } + + cv::GMetaArg m; + cv::gapi::own::concurrent_bounded_queue q; + int64_t c = 0; + std::atomic halted; +}; + +QueueSourceBase::QueueSourceBase(const cv::GMetaArg &m) + : m_priv(new Priv(m)) { +} + +void QueueSourceBase::push(Data &&data) { + + // Tag data with seq_id/ts + const auto now = std::chrono::system_clock::now(); + const auto dur = std::chrono::duration_cast + (now.time_since_epoch()); + data.meta[cv::gapi::streaming::meta_tag::timestamp] = int64_t{dur.count()}; + data.meta[cv::gapi::streaming::meta_tag::seq_id] = int64_t{m_priv->c++}; + + m_priv->q.push(data); +} + +bool QueueSourceBase::pull(Data &data) { + m_priv->q.pop(data); + + if (m_priv->halted) { + return false; + } + return true; +} + +void QueueSourceBase::halt() { + m_priv->halted.store(true); + m_priv->q.push(cv::GRunArg{}); +} + +cv::GMetaArg QueueSourceBase::descr_of() const { + return m_priv->m; +} + +QueueInput::QueueInput(const cv::GMetaArgs &args) { + for (auto &&m : args) { + m_sources.emplace_back(new cv::gapi::wip::QueueSourceBase(m)); + } +} + +void QueueInput::push(cv::GRunArgs &&args) { + GAPI_Assert(m_sources.size() == args.size()); + for (auto && it : ade::util::zip(ade::util::toRange(m_sources), + ade::util::toRange(args))) + { + auto &src = std::get<0>(it); + auto &obj = std::get<1>(it); + + Data d; + d = obj; + src->push(std::move(d)); + } +} + +QueueInput::operator cv::GRunArgs () { + cv::GRunArgs args; + for (auto &&s : m_sources) { + args.push_back(s->ptr()); + } + return args; +} + +} // wip +} // gapi +} // cv diff --git a/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp new file mode 100644 index 0000000000..093e654715 --- /dev/null +++ b/modules/gapi/test/streaming/gapi_streaming_queue_source_tests.cpp @@ -0,0 +1,127 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2023 Intel Corporation + + +#include "../test_precomp.hpp" + +#include +#include +#include + +namespace opencv_test +{ + +TEST(GAPI_Streaming_Queue_Source, SmokeTest) { + // This is more like an example on G-API Queue Source + + cv::GMat in; + cv::GMat out = in + 1; + cv::GStreamingCompiled comp = cv::GComputation(in, out).compileStreaming(); + + // Queue source needs to know format information to maintain contracts + auto src = std::make_shared > + (cv::GMatDesc{CV_8U, 1, cv::Size{128, 128}}); + + comp.setSource(cv::gin(src->ptr())); + comp.start(); + + // It is perfectly legal to start a pipeline at this point - the source was passed. + // Now we can push data through the source and get the pipeline results. + + cv::Mat eye = cv::Mat::eye(cv::Size{128, 128}, CV_8UC1); + src->push(eye); // Push I (identity matrix) + src->push(eye*2); // Push I*2 + + // Now its time to pop. The data could be already processed at this point. + // Note the queue source queues are unbounded to avoid deadlocks + + cv::Mat result; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye + 1, result, NORM_INF)); + + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye*2 + 1, result, NORM_INF)); +} + +TEST(GAPI_Streaming_Queue_Source, Mixed) { + // Mixing a regular "live" source (which runs on its own) with a + // manually controlled queue source may make a little sense, but + // is perfectly legal and possible. + + cv::GMat in1; + cv::GMat in2; + cv::GMat out = in2 - in1; + cv::GStreamingCompiled comp = cv::GComputation(in1, in2, out).compileStreaming(); + + // Queue source needs to know format information to maintain contracts + auto src1 = std::make_shared > + (cv::GMatDesc{CV_8U, 3, cv::Size{768, 576}}); + + std::shared_ptr src2; + auto path = findDataFile("cv/video/768x576.avi"); + try { + src2 = cv::gapi::wip::make_src(path); + } catch(...) { + throw SkipTestException("Video file can not be opened"); + } + + comp.setSource(cv::gin(src1->ptr(), src2)); // FIXME: quite inconsistent + comp.start(); + + cv::Mat eye = cv::Mat::eye(cv::Size{768, 576}, CV_8UC3); + src1->push(eye); // Push I (identity matrix) + src1->push(eye); // Push I (again) + + cv::Mat ref, result; + cv::VideoCapture cap(path); + + cap >> ref; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(ref - eye, result, NORM_INF)); + + cap >> ref; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(ref - eye, result, NORM_INF)); +} + +TEST(GAPI_Streaming_Queue_Input, SmokeTest) { + + // Queue Input: a tiny wrapper atop of multiple queue sources. + // Allows users to pass all input data at once. + + cv::GMat in1; + cv::GScalar in2; + cv::GMat out = in1 + in2; + cv::GStreamingCompiled comp = cv::GComputation(cv::GIn(in1, in2), cv::GOut(out)) + .compileStreaming(); + + // FIXME: This API is too raw + cv::gapi::wip::QueueInput input({ + cv::GMetaArg{ cv::GMatDesc{CV_8U, 1, cv::Size{64,64} } }, + cv::GMetaArg{ cv::empty_scalar_desc() } + }); + comp.setSource(input); // Implicit conversion allows it to be passed as-is. + comp.start(); + + // Push data via queue input + cv::Mat eye = cv::Mat::eye(cv::Size{64, 64}, CV_8UC1); + input.push(cv::gin(eye, cv::Scalar(1))); + input.push(cv::gin(eye, cv::Scalar(2))); + input.push(cv::gin(eye, cv::Scalar(3))); + + // Pop data and validate + cv::Mat result; + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye+1, result, NORM_INF)); + + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye+2, result, NORM_INF)); + + ASSERT_TRUE(comp.pull(cv::gout(result))); + EXPECT_EQ(0, cvtest::norm(eye+3, result, NORM_INF)); +} + +} // namespace opencv_test From 114c23e41108a68a6dce5ec9ab8a900bccc47637 Mon Sep 17 00:00:00 2001 From: alexander-varjo <118199184+alexander-varjo@users.noreply.github.com> Date: Mon, 4 Sep 2023 17:49:45 +0300 Subject: [PATCH 34/57] Merge pull request #23607 from alexander-varjo:alexander-varjo-patch-1 Fix crash in ap3p #23607 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/calib3d/src/ap3p.cpp | 65 ++----------------- modules/calib3d/test/test_solvepnp_ransac.cpp | 62 ++++++++++++++++++ 2 files changed, 69 insertions(+), 58 deletions(-) diff --git a/modules/calib3d/src/ap3p.cpp b/modules/calib3d/src/ap3p.cpp index 582b201b36..79da0f13a7 100644 --- a/modules/calib3d/src/ap3p.cpp +++ b/modules/calib3d/src/ap3p.cpp @@ -1,5 +1,6 @@ #include "precomp.hpp" #include "ap3p.h" +#include "polynom_solver.h" #include #include @@ -8,63 +9,10 @@ static inline double cbrt(double x) { return (double)cv::cubeRoot((float)x); }; #endif namespace { -void solveQuartic(const double *factors, double *realRoots) { - const double &a4 = factors[0]; - const double &a3 = factors[1]; - const double &a2 = factors[2]; - const double &a1 = factors[3]; - const double &a0 = factors[4]; - - double a4_2 = a4 * a4; - double a3_2 = a3 * a3; - double a4_3 = a4_2 * a4; - double a2a4 = a2 * a4; - - double p4 = (8 * a2a4 - 3 * a3_2) / (8 * a4_2); - double q4 = (a3_2 * a3 - 4 * a2a4 * a3 + 8 * a1 * a4_2) / (8 * a4_3); - double r4 = (256 * a0 * a4_3 - 3 * (a3_2 * a3_2) - 64 * a1 * a3 * a4_2 + 16 * a2a4 * a3_2) / (256 * (a4_3 * a4)); - - double p3 = ((p4 * p4) / 12 + r4) / 3; // /=-3 - double q3 = (72 * r4 * p4 - 2 * p4 * p4 * p4 - 27 * q4 * q4) / 432; // /=2 - - double t; // *=2 - std::complex w; - if (q3 >= 0) - w = -std::sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; - else - w = std::sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; - if (w.imag() == 0.0) { - w.real(std::cbrt(w.real())); - t = 2.0 * (w.real() + p3 / w.real()); - } else { - w = pow(w, 1.0 / 3); - t = 4.0 * w.real(); - } - - std::complex sqrt_2m = sqrt(static_cast >(-2 * p4 / 3 + t)); - double B_4A = -a3 / (4 * a4); - double complex1 = 4 * p4 / 3 + t; -#if defined(__clang__) && defined(__arm__) && (__clang_major__ == 3 || __clang_major__ == 4) && !defined(__ANDROID__) - // details: https://github.com/opencv/opencv/issues/11135 - // details: https://github.com/opencv/opencv/issues/11056 - std::complex complex2 = 2 * q4; - complex2 = std::complex(complex2.real() / sqrt_2m.real(), 0); -#else - std::complex complex2 = 2 * q4 / sqrt_2m; -#endif - double sqrt_2m_rh = sqrt_2m.real() / 2; - double sqrt1 = sqrt(-(complex1 + complex2)).real() / 2; - realRoots[0] = B_4A + sqrt_2m_rh + sqrt1; - realRoots[1] = B_4A + sqrt_2m_rh - sqrt1; - double sqrt2 = sqrt(-(complex1 - complex2)).real() / 2; - realRoots[2] = B_4A - sqrt_2m_rh + sqrt2; - realRoots[3] = B_4A - sqrt_2m_rh - sqrt2; -} - -void polishQuarticRoots(const double *coeffs, double *roots) { +void polishQuarticRoots(const double *coeffs, double *roots, int nb_roots) { const int iterations = 2; for (int i = 0; i < iterations; ++i) { - for (int j = 0; j < 4; ++j) { + for (int j = 0; j < nb_roots; ++j) { double error = (((coeffs[0] * roots[j] + coeffs[1]) * roots[j] + coeffs[2]) * roots[j] + coeffs[3]) * roots[j] + coeffs[4]; @@ -227,8 +175,9 @@ int ap3p::computePoses(const double featureVectors[3][4], 2 * (g6 * g7 - g1 * g2 - g3 * g4), g7 * g7 - g2 * g2 - g4 * g4}; double s[4]; - solveQuartic(coeffs, s); - polishQuarticRoots(coeffs, s); + int nb_roots = solve_deg4(coeffs[0], coeffs[1], coeffs[2], coeffs[3], coeffs[4], + s[0], s[1], s[2], s[3]); + polishQuarticRoots(coeffs, s, nb_roots); double temp[3]; vect_cross(k1, nl, temp); @@ -254,7 +203,7 @@ int ap3p::computePoses(const double featureVectors[3][4], double reproj_errors[4]; int nb_solutions = 0; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < nb_roots; ++i) { double ctheta1p = s[i]; if (abs(ctheta1p) > 1) continue; diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp index 759b9650a8..a9ed88f0f5 100644 --- a/modules/calib3d/test/test_solvepnp_ransac.cpp +++ b/modules/calib3d/test/test_solvepnp_ransac.cpp @@ -41,6 +41,7 @@ //M*/ #include "test_precomp.hpp" +#include "opencv2/core/utils/logger.hpp" namespace opencv_test { namespace { @@ -2258,4 +2259,65 @@ TEST(Calib3d_SolvePnP, inputShape) } } +bool hasNan(const cv::Mat& mat) +{ + bool has = false; + if (mat.type() == CV_32F) + { + for(int i = 0; i < static_cast(mat.total()); i++) + has |= cvIsNaN(mat.at(i)) != 0; + } + else if (mat.type() == CV_64F) + { + for(int i = 0; i < static_cast(mat.total()); i++) + has |= cvIsNaN(mat.at(i)) != 0; + } + else + { + has = true; + CV_LOG_ERROR(NULL, "check hasNan called with unsupported type!"); + } + + return has; +} + +TEST(AP3P, ctheta1p_nan_23607) +{ + // the task is not well defined and may not converge (empty R, t) or should + // converge to some non-NaN solution + const std::array cameraPts = { + cv::Point2d{0.042784865945577621, 0.59844839572906494}, + cv::Point2d{-0.028428621590137482, 0.60354739427566528}, + cv::Point2d{0.0046037044376134872, 0.70674681663513184} + }; + const std::array modelPts = { + cv::Point3d{-0.043258000165224075, 0.020459245890378952, -0.0069921980611979961}, + cv::Point3d{-0.045648999512195587, 0.0029820732306689024, 0.0079000638797879219}, + cv::Point3d{-0.043276999145746231, -0.013622495345771313, 0.0080113131552934647} + }; + + std::vector R, t; + solveP3P(modelPts, cameraPts, Mat::eye(3, 3, CV_64F), Mat(), R, t, SOLVEPNP_AP3P); + + EXPECT_EQ(R.size(), 2ul); + EXPECT_EQ(t.size(), 2ul); + + // Try apply rvec and tvec to get model points from camera points. + Mat pts = Mat(modelPts).reshape(1, 3); + Mat expected = Mat(cameraPts).reshape(1, 3); + for (size_t i = 0; i < R.size(); ++i) { + EXPECT_TRUE(!hasNan(R[i])); + EXPECT_TRUE(!hasNan(t[i])); + + Mat transform; + cv::Rodrigues(R[i], transform); + Mat res = pts * transform.t(); + for (int j = 0; j < 3; ++j) { + res.row(j) += t[i].reshape(1, 1); + res.row(j) /= res.row(j).at(2); + } + EXPECT_LE(cvtest::norm(res.colRange(0, 2), expected, NORM_INF), 3e-16); + } +} + }} // namespace From ca527040e2bee65bce6df50773252779d3335dfb Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 4 Sep 2023 18:28:28 +0300 Subject: [PATCH 35/57] fix refineDetectedMarkers, add test --- .../misc/python/test/test_objdetect_aruco.py | 33 +++++++++++++++++++ .../objdetect/src/aruco/aruco_detector.cpp | 8 ++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/modules/objdetect/misc/python/test/test_objdetect_aruco.py b/modules/objdetect/misc/python/test/test_objdetect_aruco.py index d63a19cd2f..dda58b6460 100644 --- a/modules/objdetect/misc/python/test/test_objdetect_aruco.py +++ b/modules/objdetect/misc/python/test/test_objdetect_aruco.py @@ -186,6 +186,39 @@ class aruco_objdetect_test(NewOpenCVTests): self.assertEqual((1, 4, 2), refine_corners[0].shape) np.testing.assert_array_equal(corners, refine_corners) + def test_charuco_refine(self): + aruco_dict = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_6X6_50) + board_size = (3, 4) + board = cv.aruco.CharucoBoard(board_size, 1., .7, aruco_dict) + aruco_detector = cv.aruco.ArucoDetector(aruco_dict) + charuco_detector = cv.aruco.CharucoDetector(board) + cell_size = 100 + image = board.generateImage((cell_size*board_size[0], cell_size*board_size[1])) + camera = np.array([[1, 0, 0.5], + [0, 1, 0.5], + [0, 0, 1]]) + dist = np.array([0, 0, 0, 0, 0], dtype=np.float32).reshape(1, -1) + + # generate gold corners of the ArUco markers for the test + gold_corners = np.array(board.getObjPoints())[:, :, 0:2]*cell_size + + # detect corners + markerCorners, markerIds, _ = aruco_detector.detectMarkers(image) + + # test refine + rejected = [markerCorners[-1]] + markerCorners, markerIds = markerCorners[:-1], markerIds[:-1] + markerCorners, markerIds, _, _ = aruco_detector.refineDetectedMarkers(image, board, markerCorners, markerIds, + rejected, cameraMatrix=camera, distCoeffs=dist) + + charucoCorners, charucoIds, _, _ = charuco_detector.detectBoard(image, markerCorners=markerCorners, + markerIds=markerIds) + self.assertEqual(len(charucoIds), 6) + self.assertEqual(len(markerIds), 6) + + for i, id in enumerate(markerIds.reshape(-1)): + np.testing.assert_allclose(gold_corners[id], markerCorners[i].reshape(4, 2), 0.01, 1.) + def test_write_read_dictionary(self): try: aruco_dict = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_5X5_50) diff --git a/modules/objdetect/src/aruco/aruco_detector.cpp b/modules/objdetect/src/aruco/aruco_detector.cpp index 395bb49338..84ccc6e323 100644 --- a/modules/objdetect/src/aruco/aruco_detector.cpp +++ b/modules/objdetect/src/aruco/aruco_detector.cpp @@ -1000,7 +1000,13 @@ static inline void _projectUndetectedMarkers(const Board &board, InputOutputArra OutputArray undetectedMarkersIds) { Mat rvec, tvec; // first estimate board pose with the current avaible markers Mat objPoints, imgPoints; // object and image points for the solvePnP function - board.matchImagePoints(detectedCorners, detectedIds, objPoints, imgPoints); + // To refine corners of ArUco markers the function refineDetectedMarkers() find an aruco markers pose from 3D-2D point correspondences. + // To find 3D-2D point correspondences uses matchImagePoints(). + // The method matchImagePoints() works with ArUco corners (in Board/GridBoard cases) or with ChArUco corners (in CharucoBoard case). + // To refine corners of ArUco markers we need work with ArUco corners only in all boards. + // To call matchImagePoints() with ArUco corners for all boards we need to call matchImagePoints() from base class Board. + // The method matchImagePoints() implemented in Pimpl and we need to create temp Board object to call the base method. + Board(board.getObjPoints(), board.getDictionary(), board.getIds()).matchImagePoints(detectedCorners, detectedIds, objPoints, imgPoints); if (objPoints.total() < 4ull) // at least one marker from board so rvec and tvec are valid return; solvePnP(objPoints, imgPoints, cameraMatrix, distCoeffs, rvec, tvec); From c91c631ae26bc5257c67754ed6b8dc2a67f60915 Mon Sep 17 00:00:00 2001 From: Philipp Schrader Date: Thu, 31 Aug 2023 11:43:53 -0700 Subject: [PATCH 36/57] Fix "use after free" issue in `essential_solver.cpp` The address sanitizer highlighted this issue in our code base. It looks like the code is currently grabbing a pointer to a temporary object and then performing operations on it. I printed some information right before the asan crash: eigensolver address: 0x7f0ad95032f0 eigensolver size: 4528 eig_vecs_ ptr: 0x7f0ad95045e0 eig_vecs_ offset: 4848 This shows that `eig_vecs_` points past the end of `eigensolver`. In other words, it points at the temporary object created by the `eigensolver.eigenvectors()` call. Compare the docs for `.eigenvalues()`: https://eigen.tuxfamily.org/dox/classEigen_1_1EigenSolver.html#a0f507ad7ab14797882f474ca8f2773e7 to the docs for `.eigenvectors()`: https://eigen.tuxfamily.org/dox/classEigen_1_1EigenSolver.html#a66288022802172e3ee059283b26201d7 The difference in return types is interesting. `.eigenvalues()` returns a reference. But `.eigenvectors()` returns a matrix. This patch here fixes the problem by saving the temporary object and then grabbing a pointer into it. This is a curated snippet of the original asan failure: ==12==ERROR: AddressSanitizer: stack-use-after-scope on address 0x7fc633704640 at pc 0x7fc64f7f1593 bp 0x7ffe8875fc90 sp 0x7ffe8875fc88 READ of size 8 at 0x7fc633704640 thread T0 #0 0x7fc64f7f1592 in cv::usac::EssentialMinimalSolverStewenius5ptsImpl::estimate(std::__1::vector > const&, std::__1::vector >&) const /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/essential_solver.cpp:181:48 #1 0x7fc64f915d92 in cv::usac::EssentialEstimatorImpl::estimateModels(std::__1::vector > const&, std::__1::vector >&) const /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/estimator.cpp:110:46 #2 0x7fc64fa74fb0 in cv::usac::Ransac::run(cv::Ptr&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/ransac_solvers.cpp:152:58 #3 0x7fc64fa6cd8e in cv::usac::run(cv::Ptr const&, cv::_InputArray const&, cv::_InputArray const&, int, cv::Ptr&, cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/ransac_solvers.cpp:1010:16 #4 0x7fc64fa6fb46 in cv::usac::findEssentialMat(cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, int, double, double, cv::_OutputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/ransac_solvers.cpp:527:9 #5 0x7fc64f3b5522 in cv::findEssentialMat(cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, int, double, double, int, cv::_OutputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/five-point.cpp:437:16 #6 0x7fc64f3b7e00 in cv::findEssentialMat(cv::_InputArray const&, cv::_InputArray const&, cv::_InputArray const&, int, double, double, cv::_OutputArray const&) /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/five-point.cpp:486:12 ... Address 0x7fc633704640 is located in stack of thread T0 at offset 17984 in frame #0 0x7fc64f7ed4ff in cv::usac::EssentialMinimalSolverStewenius5ptsImpl::estimate(std::__1::vector > const&, std::__1::vector >&) const /proc/self/cwd/external/com_github_opencv_opencv/modules/calib3d/src/usac/essential_solver.cpp:36 This frame has 63 object(s): [32, 56) 'coefficients' (line 38) [96, 384) 'ee' (line 55) ... [13040, 17568) 'eigensolver' (line 142) [17824, 17840) 'ref.tmp518' (line 143) [17856, 17872) 'ref.tmp523' (line 144) [17888, 19488) 'ref.tmp524' (line 144) <== Memory access at offset 17984 is inside this variable [19616, 19640) 'ref.tmp532' (line 169) ... The crash report says that we're accessing a temporary object from line 144 when we shouldn't be. Line 144 looks like this: https://github.com/opencv/opencv/blob/4.6.0/modules/calib3d/src/usac/essential_solver.cpp#L144 const auto * const eig_vecs_ = (double *) eigensolver.eigenvectors().real().data(); We are using version 4.6.0 for this, but the problem is present on the 4.x branch. Note that I am dropping the .real() call here. I think that is safe because of the code further down (line 277 in the most recent version): const int eig_i = 20 * i + 12; // eigen stores imaginary values too The code appears to expect to have to skip doubles for the imaginary parts of the complex numbers. Admittedly, I couldn't find a test case that exercised this code path to validate correctness. --- modules/calib3d/src/usac/essential_solver.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/calib3d/src/usac/essential_solver.cpp b/modules/calib3d/src/usac/essential_solver.cpp index 504fec6ab5..434db6d373 100644 --- a/modules/calib3d/src/usac/essential_solver.cpp +++ b/modules/calib3d/src/usac/essential_solver.cpp @@ -239,7 +239,8 @@ public: // (5) Compute the left eigenvectors of the action matrix Eigen::EigenSolver> eigensolver(action_mat_eig); const Eigen::VectorXcd &eigenvalues = eigensolver.eigenvalues(); - const auto * const eig_vecs_ = (double *) eigensolver.eigenvectors().real().data(); + const Eigen::MatrixXcd eigenvectors = eigensolver.eigenvectors(); + const auto * const eig_vecs_ = (double *) eigenvectors.data(); #else Matx A = constraint_mat.colRange(0, 10), B = constraint_mat.colRange(10, 20), eliminated_mat; From 84f32bbb243dbcd64c7abb8d99dbdf5ed58518cc Mon Sep 17 00:00:00 2001 From: Wanli Date: Tue, 5 Sep 2023 13:44:15 +0800 Subject: [PATCH 37/57] increase Fast Math threshold --- modules/dnn/test/test_caffe_importer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 3f5458a873..66eff49979 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -731,7 +731,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_vgg16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - double scoreDiff = 0.0, iouDiff = 0.0; + double scoreDiff = 0.001, iouDiff = 0.03; #if defined(INF_ENGINE_RELEASE) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); @@ -779,7 +779,7 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); - double scoreDiff = 0.0, iouDiff = 0.0; + double scoreDiff = 0.003, iouDiff = 0.07; if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { scoreDiff = 0.02; iouDiff = 0.13; From f280e3cbd9ba07ab2d0464cb4aefc31bdbe47665 Mon Sep 17 00:00:00 2001 From: Alexander Lyulkov Date: Thu, 31 Aug 2023 23:35:38 +0800 Subject: [PATCH 38/57] Fixed bug with the last 4 bytes in MJPEG encoder. --- modules/videoio/src/cap_mjpeg_encoder.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/modules/videoio/src/cap_mjpeg_encoder.cpp b/modules/videoio/src/cap_mjpeg_encoder.cpp index efac4093ae..2e7452cf17 100644 --- a/modules/videoio/src/cap_mjpeg_encoder.cpp +++ b/modules/videoio/src/cap_mjpeg_encoder.cpp @@ -268,7 +268,7 @@ public: m_buffer_list[0].finish(); m_data_len = m_buffer_list[0].get_len(); - m_last_bit_len = m_buffer_list[0].get_bits_free() ? 32 - m_buffer_list[0].get_bits_free() : 0; + m_last_bit_len = 32 - m_buffer_list[0].get_bits_free(); return m_buffer_list[0].get_data(); } @@ -331,9 +331,14 @@ public: } //bits == 0 means that last element shouldn't be used. - m_output_buffer[m_data_len++] = currval; - - m_last_bit_len = -bits; + if (bits != 0) { + m_output_buffer[m_data_len++] = currval; + m_last_bit_len = -bits; + } + else + { + m_last_bit_len = 32; + } return &m_output_buffer[0]; } @@ -1167,8 +1172,6 @@ public: fdct_qtab(_fdct_qtab), cat_table(_cat_table) { -#if 0 // disable parallel processing due to buffer overrun bug: https://github.com/opencv/opencv/issues/19634 - //empirically found value. if number of pixels is less than that value there is no sense to parallelize it. const int min_pixels_count = 96*96; @@ -1194,12 +1197,6 @@ public: stripes_count = std::min(stripes_count, max_stripes); -#else - if (nstripes > 1) - CV_LOG_ONCE_WARNING(NULL, "VIDEOIO/MJPEG: parallel processing is disabled: https://github.com/opencv/opencv/issues/19634"); - stripes_count = 1; -#endif - m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count); } From 6ae7caaa0107b8e1507b8de40c754c908f24a243 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 5 Sep 2023 10:44:56 +0300 Subject: [PATCH 39/57] Merge pull request #24216 from dkurt:inter_lines_less_compute Minor optimization of two lines intersection #24216 ### Pull Request Readiness Checklist Not significant, but we can reduce number of multiplications while compute two lines intersection. Both methods are used heavily in their modules. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/imgproc/src/geometry.cpp | 7 +++---- modules/objdetect/src/qrcode.cpp | 13 ++++--------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/modules/imgproc/src/geometry.cpp b/modules/imgproc/src/geometry.cpp index 701c3a647f..77d3d16e02 100644 --- a/modules/imgproc/src/geometry.cpp +++ b/modules/imgproc/src/geometry.cpp @@ -328,17 +328,16 @@ static LineSegmentIntersection parallelInt( Point2f a, Point2f b, Point2f c, Poi static LineSegmentIntersection intersectLineSegments( Point2f a, Point2f b, Point2f c, Point2f d, Point2f& p, Point2f& q ) { - double denom = a.x * (double)(d.y - c.y) + b.x * (double)(c.y - d.y) + - d.x * (double)(b.y - a.y) + c.x * (double)(a.y - b.y); + double denom = (a.x - b.x) * (double)(d.y - c.y) - (a.y - b.y) * (double)(d.x - c.x); // If denom is zero, then segments are parallel: handle separately. if( denom == 0. ) return parallelInt(a, b, c, d, p, q); - double num = a.x * (double)(d.y - c.y) + c.x * (double)(a.y - d.y) + d.x * (double)(c.y - a.y); + double num = (d.y - a.y) * (double)(a.x - c.x) + (a.x - d.x) * (double)(a.y - c.y); double s = num / denom; - num = a.x * (double)(b.y - c.y) + b.x * (double)(c.y - a.y) + c.x * (double)(a.y - b.y); + num = (b.y - a.y) * (double)(a.x - c.x) + (c.y - a.y) * (double)(b.x - a.x); double t = num / denom; p.x = (float)(a.x + s*(b.x - a.x)); diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 9f64c64462..1df46a9bb5 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -68,19 +68,14 @@ static void updatePointsResult(OutputArray points_, const vector& point static Point2f intersectionLines(Point2f a1, Point2f a2, Point2f b1, Point2f b2) { + // Try to solve a two lines intersection (a1, a2) and (b1, b2) as a system of equations: + // a2 + u * (a1 - a2) = b2 + v * (b1 - b2) const float divisor = (a1.x - a2.x) * (b1.y - b2.y) - (a1.y - a2.y) * (b1.x - b2.x); const float eps = 0.001f; if (abs(divisor) < eps) return a2; - Point2f result_square_angle( - ((a1.x * a2.y - a1.y * a2.x) * (b1.x - b2.x) - - (b1.x * b2.y - b1.y * b2.x) * (a1.x - a2.x)) / - divisor, - ((a1.x * a2.y - a1.y * a2.x) * (b1.y - b2.y) - - (b1.x * b2.y - b1.y * b2.x) * (a1.y - a2.y)) / - divisor - ); - return result_square_angle; + const float u = ((b2.x - a2.x) * (b1.y - b2.y) + (b1.x - b2.x) * (a2.y - b2.y)) / divisor; + return a2 + u * (a1 - a2); } // / | b From 639836ebf03ce39112b8c09e44bd0daa02d53f60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20B=C3=B6ken?= Date: Mon, 4 Sep 2023 11:31:06 +0200 Subject: [PATCH 40/57] Added CMake configuration OPENCV_DNN_BACKEND_DEFAULT --- CMakeLists.txt | 4 ++++ modules/dnn/CMakeLists.txt | 3 +++ modules/dnn/include/opencv2/dnn/dnn.hpp | 7 +------ modules/dnn/src/dnn_params.cpp | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d14b7af439..2a214a1a91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1672,6 +1672,10 @@ else() endif() endif() +if(BUILD_opencv_dnn) + status(" Default DNN backend:" ${OPENCV_DNN_BACKEND_DEFAULT}) +endif() + if(WITH_EIGEN OR HAVE_EIGEN) status(" Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO) endif() diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 60cc77ca8b..896ce5ded7 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -227,6 +227,9 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO) endif() endif() +set(OPENCV_DNN_BACKEND_DEFAULT "DNN_BACKEND_OPENCV" CACHE STRING "Default backend used by the DNN module") +ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}") + ocv_install_used_external_targets(${libs} ${dnn_runtime_libs}) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index d61f7191bc..2bd3f790b4 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -69,9 +69,7 @@ CV__DNN_INLINE_NS_BEGIN */ enum Backend { - //! DNN_BACKEND_DEFAULT equals to DNN_BACKEND_INFERENCE_ENGINE if - //! OpenCV is built with Intel OpenVINO or - //! DNN_BACKEND_OPENCV otherwise. + //! DNN_BACKEND_DEFAULT equals to OPENCV_DNN_BACKEND_DEFAULT, which can be defined using CMake or a configuration parameter DNN_BACKEND_DEFAULT = 0, DNN_BACKEND_HALIDE, DNN_BACKEND_INFERENCE_ENGINE, //!< Intel OpenVINO computational backend @@ -688,9 +686,6 @@ CV__DNN_INLINE_NS_BEGIN * @brief Ask network to use specific computation backend where it supported. * @param[in] backendId backend identifier. * @see Backend - * - * If OpenCV is compiled with Intel's Inference Engine library, DNN_BACKEND_DEFAULT - * means DNN_BACKEND_INFERENCE_ENGINE. Otherwise it equals to DNN_BACKEND_OPENCV. */ CV_WRAP void setPreferableBackend(int backendId); diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp index 86a43db757..19d453012c 100644 --- a/modules/dnn/src/dnn_params.cpp +++ b/modules/dnn/src/dnn_params.cpp @@ -36,7 +36,7 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES() int getParam_DNN_BACKEND_DEFAULT() { static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", - (size_t)DNN_BACKEND_OPENCV + (size_t)OPENCV_DNN_BACKEND_DEFAULT ); return PARAM_DNN_BACKEND_DEFAULT; } From cca4ee2e464d3ca43babe9eadbd900aca22fe9fa Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 5 Sep 2023 14:35:37 +0300 Subject: [PATCH 41/57] Revert PR 24186 as it force skip tests. --- modules/core/test/test_misc.cpp | 36 -------------------- modules/python/test/tests_common.py | 2 -- modules/ts/include/opencv2/ts/ts_ext.hpp | 42 +++--------------------- modules/ts/src/ts_tags.cpp | 13 +++----- 4 files changed, 8 insertions(+), 85 deletions(-) diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index cb89dcf573..8ed0afe771 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -917,41 +917,5 @@ REGISTER_TYPED_TEST_CASE_P(Rect_Test, Overflows); typedef ::testing::Types RectTypes; INSTANTIATE_TYPED_TEST_CASE_P(Negative_Test, Rect_Test, RectTypes); -// Expected that SkipTestException thrown in the constructor should skip test but not fail -struct TestFixtureSkip: public ::testing::Test { - TestFixtureSkip(bool throwEx = true) { - if (throwEx) { - throw SkipTestException("Skip test at constructor"); - } - } -}; - -TEST_F(TestFixtureSkip, NoBodyRun) { - FAIL() << "Unreachable code called"; -} - -// Check no test body started in case of skip exception at static SetUpTestCase -struct TestSetUpTestCaseSkip: public ::testing::Test { - static void SetUpTestCase() { - throw SkipTestException("Skip test at SetUpTestCase"); - } -}; - -TEST_F(TestSetUpTestCaseSkip, NoBodyRun) { - FAIL() << "Unreachable code called"; -} -TEST_F(TestSetUpTestCaseSkip, NoBodyRun2) { - FAIL() << "Unreachable code called"; -} - -struct TestSetUpSkip: public ::testing::Test { - virtual void SetUp() { - throw SkipTestException("Skip test at SetUp"); - } -}; - -TEST_F(TestSetUpSkip, NoBodyRun) { - FAIL() << "Unreachable code called"; -} }} // namespace diff --git a/modules/python/test/tests_common.py b/modules/python/test/tests_common.py index d673dd7b78..ec49f46d0d 100644 --- a/modules/python/test/tests_common.py +++ b/modules/python/test/tests_common.py @@ -36,8 +36,6 @@ class NewOpenCVTests(unittest.TestCase): return candidate if required: self.fail('File ' + filename + ' not found') - else: - self.skipTest('File ' + filename + ' not found') return None diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp index 4603dba4f7..efa4860510 100644 --- a/modules/ts/include/opencv2/ts/ts_ext.hpp +++ b/modules/ts/include/opencv2/ts/ts_ext.hpp @@ -47,18 +47,6 @@ bool checkBigDataTests(); } \ } \ -#define CV__TEST_SETUP_IMPL(parent_class) \ - { \ - try { \ - parent_class::SetUp(); \ - } catch (const cvtest::details::SkipTestExceptionBase& e) { \ - printf("[ SKIP ] %s\n", e.what()); \ - } \ - } - -struct DummyTest : public ::testing::Test { - virtual void TestBody() CV_OVERRIDE {} -}; #undef TEST #define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_ATTR, BODY_IMPL) \ @@ -72,17 +60,6 @@ struct DummyTest : public ::testing::Test { GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ };\ - class test_case_name##test_name##_factory : public ::testing::internal::TestFactoryBase { \ - public:\ - virtual ::testing::Test* CreateTest() { \ - try { \ - return new GTEST_TEST_CLASS_NAME_(test_case_name, test_name); \ - } catch (const cvtest::details::SkipTestExceptionBase& e) { \ - printf("[ SKIP ] %s\n", e.what()); \ - return new DummyTest(); \ - } \ - } \ - };\ \ ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ ::test_info_ =\ @@ -92,7 +69,8 @@ struct DummyTest : public ::testing::Test { (::testing::internal::GetTestTypeId()), \ parent_class::SetUpTestCase, \ parent_class::TearDownTestCase, \ - new test_case_name##test_name##_factory);\ + new ::testing::internal::TestFactoryImpl<\ + GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName() @@ -131,22 +109,10 @@ struct DummyTest : public ::testing::Test { private:\ virtual void TestBody() CV_OVERRIDE;\ virtual void Body(); \ - virtual void SetUp() CV_OVERRIDE; \ static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_fixture, test_name));\ };\ - class test_fixture##test_name##_factory : public ::testing::internal::TestFactoryBase { \ - public:\ - virtual ::testing::Test* CreateTest() { \ - try { \ - return new GTEST_TEST_CLASS_NAME_(test_fixture, test_name); \ - } catch (const cvtest::details::SkipTestExceptionBase& e) { \ - printf("[ SKIP ] %s\n", e.what()); \ - return new DummyTest(); \ - } \ - } \ - };\ \ ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_fixture, test_name)\ ::test_info_ =\ @@ -156,9 +122,9 @@ struct DummyTest : public ::testing::Test { (::testing::internal::GetTypeId()), \ test_fixture::SetUpTestCase, \ test_fixture::TearDownTestCase, \ - new test_fixture##test_name##_factory);\ + new ::testing::internal::TestFactoryImpl<\ + GTEST_TEST_CLASS_NAME_(test_fixture, test_name)>);\ void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::TestBody() CV__TEST_BODY_IMPL( #test_fixture "_" #test_name ) \ - void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::SetUp() CV__TEST_SETUP_IMPL(test_fixture) \ void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body() // Don't use directly diff --git a/modules/ts/src/ts_tags.cpp b/modules/ts/src/ts_tags.cpp index 21653e17ee..8bed1b739f 100644 --- a/modules/ts/src/ts_tags.cpp +++ b/modules/ts/src/ts_tags.cpp @@ -11,7 +11,7 @@ namespace cvtest { static bool printTestTag = false; static std::vector currentDirectTestTags, currentImpliedTestTags; -static std::vector skipped_tests; +static std::vector skipped_tests; static std::map& getTestTagsSkipCounts() { @@ -26,7 +26,7 @@ static std::map& getTestTagsSkipExtraCounts() void testTagIncreaseSkipCount(const std::string& tag, bool isMain, bool appendSkipTests) { if (appendSkipTests) - skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case()); + skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info()); std::map& counts = isMain ? getTestTagsSkipCounts() : getTestTagsSkipExtraCounts(); std::map::iterator i = counts.find(tag); if (i == counts.end()) @@ -280,11 +280,6 @@ static bool isTestTagSkipped(const std::string& testTag, CV_OUT std::string& ski void checkTestTags() { - if (std::find(skipped_tests.begin(), skipped_tests.end(), - ::testing::UnitTest::GetInstance()->current_test_case()) != skipped_tests.end()) { - throw details::SkipTestExceptionBase(false); - } - std::string skipTag; const std::vector& testTags = currentDirectTestTags; { @@ -312,7 +307,7 @@ void checkTestTags() } if (found != tags.size()) { - skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case()); + skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info()); throw details::SkipTestExceptionBase("Test tags don't pass required tags list (--test_tag parameter)", true); } } @@ -346,7 +341,7 @@ void checkTestTags() if (!skip_message.empty()) { - skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_case()); + skipped_tests.push_back(::testing::UnitTest::GetInstance()->current_test_info()); throw details::SkipTestExceptionBase(skip_message, true); } } From c4c2e2e796b829e05260ecfac6e8264e807c4df7 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 5 Sep 2023 15:21:10 +0300 Subject: [PATCH 42/57] Merge pull request #24214 from dkurt:distanceTransform_big_step Fix distanceTransform for inputs with large step and height #24214 ### Pull Request Readiness Checklist resolves https://github.com/opencv/opencv/issues/23895 See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/imgproc/src/distransform.cpp | 47 +++++++++++-------- .../imgproc/test/test_distancetransform.cpp | 24 ++++++++++ 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index e88ba44cfb..8f2a20e0d0 100644 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -78,7 +78,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met const uchar* src = _src.ptr(); int* temp = _temp.ptr(); - float* dist = _dist.ptr(); + float* dist = _dist.ptr(_dist.rows - 1); int srcstep = (int)(_src.step/sizeof(src[0])); int step = (int)(_temp.step/sizeof(temp[0])); int dststep = (int)(_dist.step/sizeof(dist[0])); @@ -87,11 +87,10 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met initTopBottom( _temp, BORDER ); // forward pass + unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER; + const uchar* s = src; for( i = 0; i < size.height; i++ ) { - const uchar* s = src + i*srcstep; - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - for( j = 0; j < BORDER; j++ ) tmp[-j-1] = tmp[size.width + j] = INIT_DIST0; @@ -111,13 +110,15 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met tmp[j] = t0; } } + tmp += step; + s += srcstep; } // backward pass + float* d = (float*)dist; for( i = size.height - 1; i >= 0; i-- ) { - float* d = (float*)(dist + i*dststep); - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; + tmp -= step; for( j = size.width - 1; j >= 0; j-- ) { @@ -137,6 +138,7 @@ distanceTransform_3x3( const Mat& _src, Mat& _temp, Mat& _dist, const float* met t0 = (t0 > DIST_MAX) ? DIST_MAX : t0; d[j] = (float)(t0 * scale); } + d -= dststep; } } @@ -153,7 +155,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met const uchar* src = _src.ptr(); int* temp = _temp.ptr(); - float* dist = _dist.ptr(); + float* dist = _dist.ptr(_dist.rows - 1); int srcstep = (int)(_src.step/sizeof(src[0])); int step = (int)(_temp.step/sizeof(temp[0])); int dststep = (int)(_dist.step/sizeof(dist[0])); @@ -162,11 +164,10 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met initTopBottom( _temp, BORDER ); // forward pass + unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER; + const uchar* s = src; for( i = 0; i < size.height; i++ ) { - const uchar* s = src + i*srcstep; - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - for( j = 0; j < BORDER; j++ ) tmp[-j-1] = tmp[size.width + j] = INIT_DIST0; @@ -194,13 +195,15 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met tmp[j] = t0; } } + tmp += step; + s += srcstep; } // backward pass + float* d = (float*)dist; for( i = size.height - 1; i >= 0; i-- ) { - float* d = (float*)(dist + i*dststep); - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; + tmp -= step; for( j = size.width - 1; j >= 0; j-- ) { @@ -228,6 +231,7 @@ distanceTransform_5x5( const Mat& _src, Mat& _temp, Mat& _dist, const float* met t0 = (t0 > DIST_MAX) ? DIST_MAX : t0; d[j] = (float)(t0 * scale); } + d -= dststep; } } @@ -245,7 +249,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, const uchar* src = _src.ptr(); int* temp = _temp.ptr(); - float* dist = _dist.ptr(); + float* dist = _dist.ptr(_dist.rows - 1); int* labels = _labels.ptr(); int srcstep = (int)(_src.step/sizeof(src[0])); int step = (int)(_temp.step/sizeof(temp[0])); @@ -256,12 +260,11 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, initTopBottom( _temp, BORDER ); // forward pass + const uchar* s = src; + unsigned int* tmp = (unsigned int*)(temp + BORDER*step) + BORDER; + int* lls = (int*)labels; for( i = 0; i < size.height; i++ ) { - const uchar* s = src + i*srcstep; - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - int* lls = (int*)(labels + i*lstep); - for( j = 0; j < BORDER; j++ ) tmp[-j-1] = tmp[size.width + j] = INIT_DIST0; @@ -330,14 +333,17 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, lls[j] = l0; } } + s += srcstep; + tmp += step; + lls += lstep; } // backward pass + float* d = (float*)dist; for( i = size.height - 1; i >= 0; i-- ) { - float* d = (float*)(dist + i*dststep); - unsigned int* tmp = (unsigned int*)(temp + (i+BORDER)*step) + BORDER; - int* lls = (int*)(labels + i*lstep); + tmp -= step; + lls -= lstep; for( j = size.width - 1; j >= 0; j-- ) { @@ -399,6 +405,7 @@ distanceTransformEx_5x5( const Mat& _src, Mat& _temp, Mat& _dist, Mat& _labels, t0 = (t0 > DIST_MAX) ? DIST_MAX : t0; d[j] = (float)(t0 * scale); } + d -= dststep; } } diff --git a/modules/imgproc/test/test_distancetransform.cpp b/modules/imgproc/test/test_distancetransform.cpp index 742595631a..00bca5b5b2 100644 --- a/modules/imgproc/test/test_distancetransform.cpp +++ b/modules/imgproc/test/test_distancetransform.cpp @@ -344,4 +344,28 @@ TEST(Imgproc_DistanceTransform, large_square_22732) EXPECT_EQ(0, nerrs) << "reference distance map is different from computed one at " << nerrs << " pixels\n"; } +BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_3x3) +{ + Mat src = Mat::zeros(50000, 50000, CV_8U), dist; + distanceTransform(src.col(0), dist, DIST_L2, DIST_MASK_3); + int nz = countNonZero(dist); + EXPECT_EQ(nz, 0); +} + +BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_5x5) +{ + Mat src = Mat::zeros(50000, 50000, CV_8U), dist; + distanceTransform(src.col(0), dist, DIST_L2, DIST_MASK_5); + int nz = countNonZero(dist); + EXPECT_EQ(nz, 0); +} + +BIGDATA_TEST(Imgproc_DistanceTransform, issue_23895_5x5_labels) +{ + Mat src = Mat::zeros(50000, 50000, CV_8U), dist, labels; + distanceTransform(src.col(0), dist, labels, DIST_L2, DIST_MASK_5); + int nz = countNonZero(dist); + EXPECT_EQ(nz, 0); +} + }} // namespace From 2c53e3f53d00b23d8479dfa9a9ca346ff9f36130 Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Tue, 5 Sep 2023 16:05:47 +0200 Subject: [PATCH 43/57] Merge pull request #24204 from georgthegreat:mser-license Properly preserve chi_table license as mandated by BSD-3-Clause #24204 Amend reference to online hosted file with the full license quotation as mandated by the original license. --- modules/features2d/3rdparty/mscr/chi_table.h | 135 ++++++++++++++++++ .../3rdparty/mscr/chi_table_LICENSE.txt | 28 ++++ modules/features2d/CMakeLists.txt | 2 + modules/features2d/src/mser.cpp | 118 ++------------- 4 files changed, 173 insertions(+), 110 deletions(-) create mode 100644 modules/features2d/3rdparty/mscr/chi_table.h create mode 100644 modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt diff --git a/modules/features2d/3rdparty/mscr/chi_table.h b/modules/features2d/3rdparty/mscr/chi_table.h new file mode 100644 index 0000000000..c0e9bae046 --- /dev/null +++ b/modules/features2d/3rdparty/mscr/chi_table.h @@ -0,0 +1,135 @@ +/* +** +** License Agreement +** For chi_table.h +** +** Copyright (C) 2007 Per-Erik Forssen, all rights reserved. +** +** Redistribution and use in source and binary forms, with or without modification, +** are permitted provided that the following conditions are met: +** +** * Redistribution's of source code must retain the above copyright notice, +** this list of conditions and the following disclaimer. +** +** * Redistribution's in binary form must reproduce the above copyright notice, +** this list of conditions and the following disclaimer in the documentation +** and/or other materials provided with the distribution. +** +** * The name of the copyright holders may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** This software is provided by the copyright holders and contributors "as is" and +** any express or implied warranties, including, but not limited to, the implied +** warranties of merchantability and fitness for a particular purpose are disclaimed. +** In no event shall the Intel Corporation or contributors be liable for any direct, +** indirect, incidental, special, exemplary, or consequential damages +** (including, but not limited to, procurement of substitute goods or services; +** loss of use, data, or profits; or business interruption) however caused +** and on any theory of liability, whether in contract, strict liability, +** or tort (including negligence or otherwise) arising in any way out of +** the use of this software, even if advised of the possibility of such damage. +** +** Content origin: http://users.isy.liu.se/cvl/perfo/software/chi_table.h +*/ +#define TABLE_SIZE 400 + +static double chitab3[]={0, 0.0150057, 0.0239478, 0.0315227, + 0.0383427, 0.0446605, 0.0506115, 0.0562786, + 0.0617174, 0.0669672, 0.0720573, 0.0770099, + 0.081843, 0.0865705, 0.0912043, 0.0957541, + 0.100228, 0.104633, 0.108976, 0.113261, + 0.117493, 0.121676, 0.125814, 0.12991, + 0.133967, 0.137987, 0.141974, 0.145929, + 0.149853, 0.15375, 0.15762, 0.161466, + 0.165287, 0.169087, 0.172866, 0.176625, + 0.180365, 0.184088, 0.187794, 0.191483, + 0.195158, 0.198819, 0.202466, 0.2061, + 0.209722, 0.213332, 0.216932, 0.220521, + 0.2241, 0.22767, 0.231231, 0.234783, + 0.238328, 0.241865, 0.245395, 0.248918, + 0.252435, 0.255947, 0.259452, 0.262952, + 0.266448, 0.269939, 0.273425, 0.276908, + 0.280386, 0.283862, 0.287334, 0.290803, + 0.29427, 0.297734, 0.301197, 0.304657, + 0.308115, 0.311573, 0.315028, 0.318483, + 0.321937, 0.32539, 0.328843, 0.332296, + 0.335749, 0.339201, 0.342654, 0.346108, + 0.349562, 0.353017, 0.356473, 0.35993, + 0.363389, 0.366849, 0.37031, 0.373774, + 0.377239, 0.380706, 0.384176, 0.387648, + 0.391123, 0.3946, 0.39808, 0.401563, + 0.405049, 0.408539, 0.412032, 0.415528, + 0.419028, 0.422531, 0.426039, 0.429551, + 0.433066, 0.436586, 0.440111, 0.44364, + 0.447173, 0.450712, 0.454255, 0.457803, + 0.461356, 0.464915, 0.468479, 0.472049, + 0.475624, 0.479205, 0.482792, 0.486384, + 0.489983, 0.493588, 0.4972, 0.500818, + 0.504442, 0.508073, 0.511711, 0.515356, + 0.519008, 0.522667, 0.526334, 0.530008, + 0.533689, 0.537378, 0.541075, 0.54478, + 0.548492, 0.552213, 0.555942, 0.55968, + 0.563425, 0.56718, 0.570943, 0.574715, + 0.578497, 0.582287, 0.586086, 0.589895, + 0.593713, 0.597541, 0.601379, 0.605227, + 0.609084, 0.612952, 0.61683, 0.620718, + 0.624617, 0.628526, 0.632447, 0.636378, + 0.64032, 0.644274, 0.648239, 0.652215, + 0.656203, 0.660203, 0.664215, 0.668238, + 0.672274, 0.676323, 0.680384, 0.684457, + 0.688543, 0.692643, 0.696755, 0.700881, + 0.70502, 0.709172, 0.713339, 0.717519, + 0.721714, 0.725922, 0.730145, 0.734383, + 0.738636, 0.742903, 0.747185, 0.751483, + 0.755796, 0.760125, 0.76447, 0.768831, + 0.773208, 0.777601, 0.782011, 0.786438, + 0.790882, 0.795343, 0.799821, 0.804318, + 0.808831, 0.813363, 0.817913, 0.822482, + 0.827069, 0.831676, 0.836301, 0.840946, + 0.84561, 0.850295, 0.854999, 0.859724, + 0.864469, 0.869235, 0.874022, 0.878831, + 0.883661, 0.888513, 0.893387, 0.898284, + 0.903204, 0.908146, 0.913112, 0.918101, + 0.923114, 0.928152, 0.933214, 0.938301, + 0.943413, 0.94855, 0.953713, 0.958903, + 0.964119, 0.969361, 0.974631, 0.979929, + 0.985254, 0.990608, 0.99599, 1.0014, + 1.00684, 1.01231, 1.01781, 1.02335, + 1.02891, 1.0345, 1.04013, 1.04579, + 1.05148, 1.05721, 1.06296, 1.06876, + 1.07459, 1.08045, 1.08635, 1.09228, + 1.09826, 1.10427, 1.11032, 1.1164, + 1.12253, 1.1287, 1.1349, 1.14115, + 1.14744, 1.15377, 1.16015, 1.16656, + 1.17303, 1.17954, 1.18609, 1.19269, + 1.19934, 1.20603, 1.21278, 1.21958, + 1.22642, 1.23332, 1.24027, 1.24727, + 1.25433, 1.26144, 1.26861, 1.27584, + 1.28312, 1.29047, 1.29787, 1.30534, + 1.31287, 1.32046, 1.32812, 1.33585, + 1.34364, 1.3515, 1.35943, 1.36744, + 1.37551, 1.38367, 1.39189, 1.4002, + 1.40859, 1.41705, 1.42561, 1.43424, + 1.44296, 1.45177, 1.46068, 1.46967, + 1.47876, 1.48795, 1.49723, 1.50662, + 1.51611, 1.52571, 1.53541, 1.54523, + 1.55517, 1.56522, 1.57539, 1.58568, + 1.59611, 1.60666, 1.61735, 1.62817, + 1.63914, 1.65025, 1.66152, 1.67293, + 1.68451, 1.69625, 1.70815, 1.72023, + 1.73249, 1.74494, 1.75757, 1.77041, + 1.78344, 1.79669, 1.81016, 1.82385, + 1.83777, 1.85194, 1.86635, 1.88103, + 1.89598, 1.91121, 1.92674, 1.94257, + 1.95871, 1.97519, 1.99201, 2.0092, + 2.02676, 2.04471, 2.06309, 2.08189, + 2.10115, 2.12089, 2.14114, 2.16192, + 2.18326, 2.2052, 2.22777, 2.25101, + 2.27496, 2.29966, 2.32518, 2.35156, + 2.37886, 2.40717, 2.43655, 2.46709, + 2.49889, 2.53206, 2.56673, 2.60305, + 2.64117, 2.6813, 2.72367, 2.76854, + 2.81623, 2.86714, 2.92173, 2.98059, + 3.04446, 3.1143, 3.19135, 3.27731, + 3.37455, 3.48653, 3.61862, 3.77982, + 3.98692, 4.2776, 4.77167, 133.333 }; diff --git a/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt b/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt new file mode 100644 index 0000000000..66b272dd2d --- /dev/null +++ b/modules/features2d/3rdparty/mscr/chi_table_LICENSE.txt @@ -0,0 +1,28 @@ + License Agreement + For chi_table.h + +Copyright (C) 2007 Per-Erik Forssen, all rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistribution's of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistribution's in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * The name of the copyright holders may not be used to endorse or promote products + derived from this software without specific prior written permission. + +This software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are disclaimed. +In no event shall the Intel Corporation or contributors be liable for any direct, +indirect, incidental, special, exemplary, or consequential damages +(including, but not limited to, procurement of substitute goods or services; +loss of use, data, or profits; or business interruption) however caused +and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of +the use of this software, even if advised of the possibility of such damage. diff --git a/modules/features2d/CMakeLists.txt b/modules/features2d/CMakeLists.txt index a586d4606e..91fea8bcc8 100644 --- a/modules/features2d/CMakeLists.txt +++ b/modules/features2d/CMakeLists.txt @@ -7,3 +7,5 @@ if(DEBUG_opencv_features2d) list(APPEND debug_modules opencv_highgui) endif() ocv_define_module(features2d opencv_imgproc ${debug_modules} OPTIONAL opencv_flann WRAP java objc python js) + +ocv_install_3rdparty_licenses(mscr "${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mscr/chi_table_LICENSE.txt") diff --git a/modules/features2d/src/mser.cpp b/modules/features2d/src/mser.cpp index d59ed39574..5c8db481b1 100644 --- a/modules/features2d/src/mser.cpp +++ b/modules/features2d/src/mser.cpp @@ -30,18 +30,23 @@ * OpenCV functions for MSER extraction * * 1. there are two different implementation of MSER, one for gray image, one for color image - * 2. the gray image algorithm is taken from: Linear Time Maximally Stable Extremal Regions; + * 2. the gray image algorithm is taken from: + * Linear Time Maximally Stable Extremal Regions; * the paper claims to be faster than union-find method; * it actually get 1.5~2m/s on my centrino L7200 1.2GHz laptop. - * 3. the color image algorithm is taken from: Maximally Stable Colour Regions for Recognition and Match; + * 3. the color image algorithm is taken from: + * Maximally Stable Colour Regions for Recognition and Match; * it should be much slower than gray image method ( 3~4 times ); - * the chi_table.h file is taken directly from paper's source code which is distributed under permissive BSD-like license: http://users.isy.liu.se/cvl/perfo/software/chi_table.h + * the chi_table.h file is taken directly from the paper's source code: + * http://users.isy.liu.se/cvl/perfo/software/chi_table.h + * license (BSD-like) is located in the file: 3rdparty/mscr/chi_table_LICENSE.txt * 4. though the name is *contours*, the result actually is a list of point set. */ #include "precomp.hpp" #include "opencv2/imgproc/imgproc_c.h" #include +#include "../3rdparty/mscr/chi_table.h" namespace cv { @@ -613,113 +618,6 @@ the color MSER has not been completely refactored yet. We leave it mostly as-is, with just enough changes to convert C structures to C++ ones and add support for color images into MSER_Impl::detectAndLabel. */ - -const int TABLE_SIZE = 400; - -static const float chitab3[]= -{ - 0.f, 0.0150057f, 0.0239478f, 0.0315227f, - 0.0383427f, 0.0446605f, 0.0506115f, 0.0562786f, - 0.0617174f, 0.0669672f, 0.0720573f, 0.0770099f, - 0.081843f, 0.0865705f, 0.0912043f, 0.0957541f, - 0.100228f, 0.104633f, 0.108976f, 0.113261f, - 0.117493f, 0.121676f, 0.125814f, 0.12991f, - 0.133967f, 0.137987f, 0.141974f, 0.145929f, - 0.149853f, 0.15375f, 0.15762f, 0.161466f, - 0.165287f, 0.169087f, 0.172866f, 0.176625f, - 0.180365f, 0.184088f, 0.187794f, 0.191483f, - 0.195158f, 0.198819f, 0.202466f, 0.2061f, - 0.209722f, 0.213332f, 0.216932f, 0.220521f, - 0.2241f, 0.22767f, 0.231231f, 0.234783f, - 0.238328f, 0.241865f, 0.245395f, 0.248918f, - 0.252435f, 0.255947f, 0.259452f, 0.262952f, - 0.266448f, 0.269939f, 0.273425f, 0.276908f, - 0.280386f, 0.283862f, 0.287334f, 0.290803f, - 0.29427f, 0.297734f, 0.301197f, 0.304657f, - 0.308115f, 0.311573f, 0.315028f, 0.318483f, - 0.321937f, 0.32539f, 0.328843f, 0.332296f, - 0.335749f, 0.339201f, 0.342654f, 0.346108f, - 0.349562f, 0.353017f, 0.356473f, 0.35993f, - 0.363389f, 0.366849f, 0.37031f, 0.373774f, - 0.377239f, 0.380706f, 0.384176f, 0.387648f, - 0.391123f, 0.3946f, 0.39808f, 0.401563f, - 0.405049f, 0.408539f, 0.412032f, 0.415528f, - 0.419028f, 0.422531f, 0.426039f, 0.429551f, - 0.433066f, 0.436586f, 0.440111f, 0.44364f, - 0.447173f, 0.450712f, 0.454255f, 0.457803f, - 0.461356f, 0.464915f, 0.468479f, 0.472049f, - 0.475624f, 0.479205f, 0.482792f, 0.486384f, - 0.489983f, 0.493588f, 0.4972f, 0.500818f, - 0.504442f, 0.508073f, 0.511711f, 0.515356f, - 0.519008f, 0.522667f, 0.526334f, 0.530008f, - 0.533689f, 0.537378f, 0.541075f, 0.54478f, - 0.548492f, 0.552213f, 0.555942f, 0.55968f, - 0.563425f, 0.56718f, 0.570943f, 0.574715f, - 0.578497f, 0.582287f, 0.586086f, 0.589895f, - 0.593713f, 0.597541f, 0.601379f, 0.605227f, - 0.609084f, 0.612952f, 0.61683f, 0.620718f, - 0.624617f, 0.628526f, 0.632447f, 0.636378f, - 0.64032f, 0.644274f, 0.648239f, 0.652215f, - 0.656203f, 0.660203f, 0.664215f, 0.668238f, - 0.672274f, 0.676323f, 0.680384f, 0.684457f, - 0.688543f, 0.692643f, 0.696755f, 0.700881f, - 0.70502f, 0.709172f, 0.713339f, 0.717519f, - 0.721714f, 0.725922f, 0.730145f, 0.734383f, - 0.738636f, 0.742903f, 0.747185f, 0.751483f, - 0.755796f, 0.760125f, 0.76447f, 0.768831f, - 0.773208f, 0.777601f, 0.782011f, 0.786438f, - 0.790882f, 0.795343f, 0.799821f, 0.804318f, - 0.808831f, 0.813363f, 0.817913f, 0.822482f, - 0.827069f, 0.831676f, 0.836301f, 0.840946f, - 0.84561f, 0.850295f, 0.854999f, 0.859724f, - 0.864469f, 0.869235f, 0.874022f, 0.878831f, - 0.883661f, 0.888513f, 0.893387f, 0.898284f, - 0.903204f, 0.908146f, 0.913112f, 0.918101f, - 0.923114f, 0.928152f, 0.933214f, 0.938301f, - 0.943413f, 0.94855f, 0.953713f, 0.958903f, - 0.964119f, 0.969361f, 0.974631f, 0.979929f, - 0.985254f, 0.990608f, 0.99599f, 1.0014f, - 1.00684f, 1.01231f, 1.01781f, 1.02335f, - 1.02891f, 1.0345f, 1.04013f, 1.04579f, - 1.05148f, 1.05721f, 1.06296f, 1.06876f, - 1.07459f, 1.08045f, 1.08635f, 1.09228f, - 1.09826f, 1.10427f, 1.11032f, 1.1164f, - 1.12253f, 1.1287f, 1.1349f, 1.14115f, - 1.14744f, 1.15377f, 1.16015f, 1.16656f, - 1.17303f, 1.17954f, 1.18609f, 1.19269f, - 1.19934f, 1.20603f, 1.21278f, 1.21958f, - 1.22642f, 1.23332f, 1.24027f, 1.24727f, - 1.25433f, 1.26144f, 1.26861f, 1.27584f, - 1.28312f, 1.29047f, 1.29787f, 1.30534f, - 1.31287f, 1.32046f, 1.32812f, 1.33585f, - 1.34364f, 1.3515f, 1.35943f, 1.36744f, - 1.37551f, 1.38367f, 1.39189f, 1.4002f, - 1.40859f, 1.41705f, 1.42561f, 1.43424f, - 1.44296f, 1.45177f, 1.46068f, 1.46967f, - 1.47876f, 1.48795f, 1.49723f, 1.50662f, - 1.51611f, 1.52571f, 1.53541f, 1.54523f, - 1.55517f, 1.56522f, 1.57539f, 1.58568f, - 1.59611f, 1.60666f, 1.61735f, 1.62817f, - 1.63914f, 1.65025f, 1.66152f, 1.67293f, - 1.68451f, 1.69625f, 1.70815f, 1.72023f, - 1.73249f, 1.74494f, 1.75757f, 1.77041f, - 1.78344f, 1.79669f, 1.81016f, 1.82385f, - 1.83777f, 1.85194f, 1.86635f, 1.88103f, - 1.89598f, 1.91121f, 1.92674f, 1.94257f, - 1.95871f, 1.97519f, 1.99201f, 2.0092f, - 2.02676f, 2.04471f, 2.06309f, 2.08189f, - 2.10115f, 2.12089f, 2.14114f, 2.16192f, - 2.18326f, 2.2052f, 2.22777f, 2.25101f, - 2.27496f, 2.29966f, 2.32518f, 2.35156f, - 2.37886f, 2.40717f, 2.43655f, 2.46709f, - 2.49889f, 2.53206f, 2.56673f, 2.60305f, - 2.64117f, 2.6813f, 2.72367f, 2.76854f, - 2.81623f, 2.86714f, 2.92173f, 2.98059f, - 3.04446f, 3.1143f, 3.19135f, 3.27731f, - 3.37455f, 3.48653f, 3.61862f, 3.77982f, - 3.98692f, 4.2776f, 4.77167f, 133.333f -}; - struct MSCRNode; struct TempMSCR From 178fdbbda89d1afbb6d311662588dc7bf0780371 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 5 Sep 2023 18:08:28 +0300 Subject: [PATCH 44/57] Merge pull request #24196 from dkurt:ov_backend_cleanups Use ngraph::Output in OpenVINO backend wrapper #24196 ### Pull Request Readiness Checklist resolves https://github.com/opencv/opencv/issues/24102 * Use `ngraph::Output>` insead of `std::shared_ptr` as a backend wrapper. It lets access to multi-output nodes: https://github.com/opencv/opencv/blob/588ddf1b181aa7243144b27d65fc7690fb89e344/modules/dnn/src/net_openvino.cpp#L501-L504 * All layers can be customizable with OpenVINO >= 2022.1. nGraph reference code used for default layer implementation does not required CPU plugin also (might be tested by commenting CPU plugin at `/opt/intel/openvino/runtime/lib/intel64/plugins.xml`). * Correct inference if only intermediate blobs requested. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/src/ie_ngraph.cpp | 21 ++++--- modules/dnn/src/ie_ngraph.hpp | 6 +- modules/dnn/src/layers/batch_norm_layer.cpp | 2 +- modules/dnn/src/layers/blank_layer.cpp | 2 +- modules/dnn/src/layers/concat_layer.cpp | 4 +- modules/dnn/src/layers/convolution_layer.cpp | 8 +-- .../dnn/src/layers/crop_and_resize_layer.cpp | 2 +- modules/dnn/src/layers/elementwise_layers.cpp | 28 ++++----- modules/dnn/src/layers/eltwise_layer.cpp | 15 +++-- modules/dnn/src/layers/flatten_layer.cpp | 2 +- .../dnn/src/layers/fully_connected_layer.cpp | 2 +- modules/dnn/src/layers/lrn_layer.cpp | 2 +- .../dnn/src/layers/max_unpooling_layer.cpp | 4 +- modules/dnn/src/layers/mvn_layer.cpp | 2 +- .../dnn/src/layers/nary_eltwise_layers.cpp | 6 +- .../dnn/src/layers/normalize_bbox_layer.cpp | 8 +-- modules/dnn/src/layers/pooling_layer.cpp | 2 +- modules/dnn/src/layers/proposal_layer.cpp | 4 +- modules/dnn/src/layers/region_layer.cpp | 4 +- modules/dnn/src/layers/resize_layer.cpp | 2 +- modules/dnn/src/layers/scale_layer.cpp | 18 +++--- modules/dnn/src/layers/slice_layer.cpp | 2 +- modules/dnn/src/layers/softmax_layer.cpp | 2 +- modules/dnn/src/net_openvino.cpp | 63 ++++++++++++++----- modules/dnn/test/test_halide_layers.cpp | 11 +++- modules/dnn/test/test_tflite_importer.cpp | 9 +-- 26 files changed, 136 insertions(+), 95 deletions(-) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 140d4b0d2f..f9341febb5 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -383,11 +383,17 @@ public: #endif // OpenVINO >= 2022.1 -InfEngineNgraphNode::InfEngineNgraphNode(std::shared_ptr&& _node) - : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) {} +InfEngineNgraphNode::InfEngineNgraphNode(ngraph::Output&& _node) + : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(std::move(_node)) { + CV_Assert(node.get_node()); + CV_Assert(node.get_node_shared_ptr()); +} -InfEngineNgraphNode::InfEngineNgraphNode(const std::shared_ptr& _node) - : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) {} +InfEngineNgraphNode::InfEngineNgraphNode(const ngraph::Output& _node) + : BackendNode(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH), node(_node) { + CV_Assert(node.get_node()); + CV_Assert(node.get_node_shared_ptr()); +} InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& nodes, Ptr& cvLayer_, std::vector& inputs, @@ -420,7 +426,7 @@ InfEngineNgraphNode::InfEngineNgraphNode(const std::vector >& n } void InfEngineNgraphNode::setName(const std::string& name) { - node->set_friendly_name(name); + node.get_node()->set_friendly_name(name); } InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl) @@ -441,8 +447,7 @@ InfEngineNgraphNet::InfEngineNgraphNet(detail::NetImplBase& netImpl, InferenceEn void InfEngineNgraphNet::addOutput(const Ptr& node) { CV_Assert(node); - CV_Assert(node->node); - const std::string& name = node->node->get_friendly_name(); + const std::string& name = node->node.get_node()->get_friendly_name(); requestedOutputs.insert({name, node.get()}); } @@ -458,7 +463,7 @@ void InfEngineNgraphNet::createNet(Target targetId) { CV_Assert(output_node_it->second); auto out = std::make_shared(output_node_it->second->node); #if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) - out->set_friendly_name(output_node_it->first + (output_node_it->second->node->get_output_size() == 1 ? "" : ".0")); + out->set_friendly_name(output_node_it->first + (output_node_it->second->node.get_node()->get_output_size() == 1 ? "" : ".0")); #endif outs.push_back(out); } diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp index 7bb0ac09df..cc8f53ca5c 100644 --- a/modules/dnn/src/ie_ngraph.hpp +++ b/modules/dnn/src/ie_ngraph.hpp @@ -93,13 +93,13 @@ public: std::vector& inputs, std::vector& outputs, std::vector& internals); - InfEngineNgraphNode(std::shared_ptr&& _node); - InfEngineNgraphNode(const std::shared_ptr& _node); + InfEngineNgraphNode(ngraph::Output&& _node); + InfEngineNgraphNode(const ngraph::Output& _node); void setName(const std::string& name); // Inference Engine network object that allows to obtain the outputs of this layer. - std::shared_ptr node; + ngraph::Output node; Ptr net; Ptr cvLayer; }; diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index b90ee934ef..1d95096e60 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -457,7 +457,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { auto ieInpNode = nodes[0].dynamicCast()->node; - std::vector shape(ieInpNode->get_shape().size(), 1); + std::vector shape(ieInpNode.get_shape().size(), 1); shape[1] = weights_.total(); auto weight = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), weights_.data); auto bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), bias_.data); diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 3095e2d6c9..16de23b15e 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -148,7 +148,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { - auto& ieInpNode = nodes[0].dynamicCast()->node; + auto ieInpNode = nodes[0].dynamicCast()->node; ngraph::OutputVector inp{ieInpNode}; auto blank = std::make_shared(inp, 0); return Ptr(new InfEngineNgraphNode(blank)); diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 6bd3dcdea5..a5af16f32e 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -392,7 +392,7 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { - const int numDims = nodes[0].dynamicCast()->node->get_shape().size(); + const int numDims = nodes[0].dynamicCast()->node.get_shape().size(); const int cAxis = normalize_axis(axis, numDims); std::vector maxDims(numDims, 0); @@ -403,7 +403,7 @@ public: auto inp = nodes[i].dynamicCast()->node; inp_nodes.push_back(inp); - std::vector inpShape = inp->get_shape(); + std::vector inpShape = inp.get_shape(); for (int i = 0; i < numDims; ++i) maxDims[i] = std::max(maxDims[i], inpShape[i]); } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 0488dc462d..d6e0aba1c6 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -822,13 +822,13 @@ public: CV_Assert(!blobs.empty()); CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1); auto& ieInpNode = nodes[0].dynamicCast()->node; - std::vector dims = ieInpNode->get_shape(); + std::vector dims = ieInpNode.get_shape(); CV_Check(dims.size(), dims.size() >= 3 && dims.size() <= 5, ""); - std::shared_ptr ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; + ngraph::Output ieWeights; if (nodes.size() > 1) - CV_Assert(ieWeights); // dynamic_cast should not fail + ieWeights = nodes[1].dynamicCast()->node; const int inpCn = dims[1]; - const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1]; + const int inpGroupCn = nodes.size() > 1 ? ieWeights.get_shape()[1] : blobs[0].size[1]; const int group = inpCn / inpGroupCn; std::vector kernel_shape; diff --git a/modules/dnn/src/layers/crop_and_resize_layer.cpp b/modules/dnn/src/layers/crop_and_resize_layer.cpp index eb8822870f..a6f58f8983 100644 --- a/modules/dnn/src/layers/crop_and_resize_layer.cpp +++ b/modules/dnn/src/layers/crop_and_resize_layer.cpp @@ -133,7 +133,7 @@ public: auto input = nodes[0].dynamicCast()->node; auto rois = nodes[1].dynamicCast()->node; - auto rois_shape = rois->get_shape(); + auto rois_shape = rois.get_shape(); std::vector dims(rois_shape.begin(), rois_shape.end()), offsets(4, 0); offsets[3] = 2; dims[3] = 7; diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 3bcd53f95c..4247511879 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -490,7 +490,7 @@ struct ReLUFunctor : public BaseFunctor #endif #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { if (slope) { auto param = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &slope); @@ -674,7 +674,7 @@ struct ReLU6Functor : public BaseFunctor #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node, minValue, maxValue); } @@ -796,7 +796,7 @@ struct BaseDefaultFunctor : public BaseFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { CV_Error(Error::StsNotImplemented, ""); } @@ -929,7 +929,7 @@ struct TanHFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node); } @@ -998,7 +998,7 @@ struct SwishFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto sigmoid = std::make_shared(node); return std::make_shared(node, sigmoid); @@ -1074,7 +1074,7 @@ struct MishFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { float one = 1.0f; auto constant = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &one); @@ -1157,7 +1157,7 @@ struct SigmoidFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node); } @@ -1237,7 +1237,7 @@ struct ELUFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node, alpha); } @@ -1307,7 +1307,7 @@ struct AbsValFunctor : public BaseDefaultFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { float coeff = -0.999999f; // float coeff = preferableTarget == DNN_TARGET_MYRIAD ? -0.999f : -0.999999f; @@ -1603,7 +1603,7 @@ struct SqrtFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { return std::make_shared(node); } @@ -2329,7 +2329,7 @@ struct PowerFunctor : public BaseFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto scale_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &scale); @@ -2439,7 +2439,7 @@ struct ExpFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto scale_node = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &normScale); @@ -2598,7 +2598,7 @@ struct ChannelsPReLUFunctor : public BaseFunctor #endif // HAVE_CANN #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { const size_t numChannels = scale.total(); auto slope = std::make_shared(ngraph::element::f32, ngraph::Shape{numChannels}, scale.data); @@ -2678,7 +2678,7 @@ struct PReLUFunctor : public ChannelsPReLUFunctor } #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const std::shared_ptr& node) + std::shared_ptr initNgraphAPI(const ngraph::Output& node) { auto shape = getShape(scale); auto slope = std::make_shared(ngraph::element::f32, shape, scale.ptr()); diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 8ed1b799eb..49b3c02de3 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -896,12 +896,14 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { + CV_Assert(nodes.size() >= 2); auto curr_node = nodes[0].dynamicCast()->node; if (!coeffs.empty()) { auto coeff = std::make_shared(ngraph::element::f32, ngraph::Shape{1}, &coeffs[0]); curr_node = std::make_shared(curr_node, coeff, ngraph::op::AutoBroadcastType::NUMPY); } + std::shared_ptr res; for (size_t i = 1; i < nodes.size(); i++) { auto next_node = nodes[i].dynamicCast()->node; @@ -910,15 +912,16 @@ public: next_node = std::make_shared(next_node, coeff, ngraph::op::AutoBroadcastType::NUMPY); } switch (op) { - case SUM: curr_node = std::make_shared(curr_node, next_node); break; - case PROD: curr_node = std::make_shared(curr_node, next_node); break; - case DIV: curr_node = std::make_shared(curr_node, next_node); break; - case MAX: curr_node = std::make_shared(curr_node, next_node); break; - case MIN: curr_node = std::make_shared(curr_node, next_node); break; + case SUM: res = std::make_shared(curr_node, next_node); break; + case PROD: res = std::make_shared(curr_node, next_node); break; + case DIV: res = std::make_shared(curr_node, next_node); break; + case MAX: res = std::make_shared(curr_node, next_node); break; + case MIN: res = std::make_shared(curr_node, next_node); break; default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); } + curr_node = res; } - return Ptr(new InfEngineNgraphNode(curr_node)); + return Ptr(new InfEngineNgraphNode(res)); } #endif // HAVE_DNN_NGRAPH diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index 6a502af7e9..9ff3bec38b 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -209,7 +209,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - std::vector dims = ieInpNode->get_shape(); + std::vector dims = ieInpNode.get_shape(); int numAxes = dims.size(); int startAxis = normalize_axis(_startAxis, numAxes); diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 9cdb31023c..f03af7c1fb 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -803,7 +803,7 @@ public: } else { - std::vector shape(1 + normalize_axis(axis, ieInpNode->get_shape().size()), 0); + std::vector shape(1 + normalize_axis(axis, ieInpNode.get_shape().size()), 0); shape[shape.size() - 1] = -1; auto inp = std::make_shared( ieInpNode, diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 61c2224e36..f8de64cb32 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -480,7 +480,7 @@ public: if (type != SPATIAL_NRM) { axes = {1}; } else { - axes.resize(ieInpNode->get_shape().size() - 2); + axes.resize(ieInpNode.get_shape().size() - 2); std::iota(axes.begin(), axes.end(), 2); } auto ngraph_axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes.size()}, axes.data()); diff --git a/modules/dnn/src/layers/max_unpooling_layer.cpp b/modules/dnn/src/layers/max_unpooling_layer.cpp index 6a599408e1..7ed6c64ae8 100644 --- a/modules/dnn/src/layers/max_unpooling_layer.cpp +++ b/modules/dnn/src/layers/max_unpooling_layer.cpp @@ -194,7 +194,7 @@ public: std::vector inpShapes(nodes.size()); std::vector outShapes, internals; for (int i = 0; i < nodes.size(); ++i) { - std::vector shape = nodes[i].dynamicCast()->node->get_shape(); + std::vector shape = nodes[i].dynamicCast()->node.get_shape(); inpShapes[i] = std::vector(shape.begin(), shape.end()); } getMemoryShapes(inpShapes, 1, outShapes, internals); @@ -213,7 +213,7 @@ public: std::make_shared(ngraph::element::i32, ngraph::Shape{1}, &newShape), true ); - if (indices->get_element_type() != ngraph::element::i32 && indices->get_element_type() != ngraph::element::i64) { + if (indices.get_element_type() != ngraph::element::i32 && indices.get_element_type() != ngraph::element::i64) { indices = std::make_shared(indices, ngraph::element::i64); } diff --git a/modules/dnn/src/layers/mvn_layer.cpp b/modules/dnn/src/layers/mvn_layer.cpp index dc23656b7a..aae53fa327 100644 --- a/modules/dnn/src/layers/mvn_layer.cpp +++ b/modules/dnn/src/layers/mvn_layer.cpp @@ -390,7 +390,7 @@ public: auto mvn = std::make_shared(ieInpNode, acrossChannels, normVariance, eps); #else int64_t start_axis = acrossChannels ? 1 : 2; - std::vector axes_v(ieInpNode->get_shape().size() - start_axis); + std::vector axes_v(ieInpNode.get_shape().size() - start_axis); std::iota(axes_v.begin(), axes_v.end(), start_axis); auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_v.size()}, axes_v.data()); auto mvn = std::make_shared(ieInpNode, axes, normVariance, eps, ngraph::op::MVNEpsMode::INSIDE_SQRT); diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index fadbf58244..8572eee995 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -900,12 +900,12 @@ public: auto& inp0 = nodes[0].dynamicCast()->node; auto& inp1 = nodes[1].dynamicCast()->node; - if (inp0->get_element_type() != inp1->get_element_type()) { + if (inp0.get_element_type() != inp1.get_element_type()) { auto dtype = preferableTarget == DNN_TARGET_OPENCL_FP16 || preferableTarget == DNN_TARGET_MYRIAD ? ngraph::element::f16 : ngraph::element::f32; - if (inp0->get_element_type() != dtype) + if (inp0.get_element_type() != dtype) inp0 = std::make_shared(inp0, dtype); - if (inp1->get_element_type() != dtype) + if (inp1.get_element_type() != dtype) inp1 = std::make_shared(inp1, dtype); } diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index f0ad6e6f61..431eeab82d 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -273,21 +273,21 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - const size_t batch = ieInpNode->get_shape()[0]; - const size_t numChannels = ieInpNode->get_shape()[1]; + const size_t batch = ieInpNode.get_shape()[0]; + const size_t numChannels = ieInpNode.get_shape()[1]; std::vector axes_data; if (!acrossSpatial) { axes_data.push_back(1); } else { - axes_data.resize(ieInpNode->get_shape().size() - 1); + axes_data.resize(ieInpNode.get_shape().size() - 1); std::iota(axes_data.begin(), axes_data.end(), 1); } auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data); auto norm = std::make_shared(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD); CV_Assert(blobs.empty() || numChannels == blobs[0].total()); - std::vector shape(ieInpNode->get_shape().size(), 1); + std::vector shape(ieInpNode.get_shape().size(), 1); shape[0] = blobs.empty() ? 1 : batch; shape[1] = numChannels; if (!blobs.empty()) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 1337657127..a75382d8a5 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -601,7 +601,7 @@ public: return Ptr(new InfEngineNgraphNode(ave_pool)); } else if (type == SUM) { - ngraph::Shape inpShape = ieInpNode->get_shape(); + ngraph::Shape inpShape = ieInpNode.get_shape(); CV_Assert(inpShape.size() == 2 + kernel_size.size()); std::vector axes; for (size_t i = 0; i < kernel_size.size(); i++) diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index e9edcf1547..2f2a33cc6f 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -366,10 +366,10 @@ public: auto& class_logits = nodes[1].dynamicCast()->node; auto& image_shape = nodes[2].dynamicCast()->node; - CV_Assert_N(image_shape->get_shape().size() == 2, image_shape->get_shape().front() == 1); + CV_Assert_N(image_shape.get_shape().size() == 2, image_shape.get_shape().front() == 1); auto shape = std::make_shared(ngraph::element::i64, ngraph::Shape{1}, - std::vector{(int64_t)image_shape->get_shape().back()}); + std::vector{(int64_t)image_shape.get_shape().back()}); auto reshape = std::make_shared(image_shape, shape, true); auto proposal = std::make_shared(class_probs, class_logits, reshape, attr); diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 7ab8cdd93f..49952b4c83 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -466,7 +466,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& input = nodes[0].dynamicCast()->node; - auto parent_shape = input->get_shape(); + auto parent_shape = input.get_shape(); int64_t b = parent_shape[0]; int64_t h = parent_shape[1]; int64_t w = parent_shape[2]; @@ -567,7 +567,7 @@ public: int hNorm, wNorm; if (nodes.size() > 1) { - auto node_1_shape = nodes[1].dynamicCast()->node->get_shape(); + auto node_1_shape = nodes[1].dynamicCast()->node.get_shape(); hNorm = node_1_shape[2]; wNorm = node_1_shape[3]; } diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 607adb8aa1..fe27748319 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -443,7 +443,7 @@ public: std::vector shape = {outHeight, outWidth}; auto out_shape = std::make_shared(ngraph::element::i64, ngraph::Shape{2}, shape.data()); - auto& input_shape = ieInpNode->get_shape(); + auto& input_shape = ieInpNode.get_shape(); CV_Assert_N(input_shape[2] != 0, input_shape[3] != 0); std::vector scales = {static_cast(outHeight) / input_shape[2], static_cast(outWidth) / input_shape[3]}; auto scales_shape = std::make_shared(ngraph::element::f32, ngraph::Shape{2}, scales.data()); diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 5338ab2215..2a4e1a05d5 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -331,34 +331,36 @@ public: virtual Ptr initNgraph(const std::vector >& inputs, const std::vector >& nodes) CV_OVERRIDE { auto ieInpNode0 = nodes[0].dynamicCast()->node; - auto ieInpNode1 = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; + ngraph::Output ieInpNode1; + if (nodes.size() > 1) + ieInpNode1 = nodes[1].dynamicCast()->node; size_t numChannels = 1; if (blobs.empty()) - for (const size_t& dim : ieInpNode1->get_shape()) + for (const size_t& dim : ieInpNode1.get_shape()) numChannels *= dim; else numChannels = blobs[0].total(); - std::vector shape(ieInpNode0->get_shape().size(), 1); + std::vector shape(ieInpNode0.get_shape().size(), 1); int cAxis = normalize_axis(axis, shape.size()); shape[cAxis] = numChannels; - auto node = ieInpNode0; + std::shared_ptr node; if (hasWeights) { - auto weight = blobs.empty() ? ieInpNode1 : + ngraph::Output weight = blobs.empty() ? ieInpNode1 : std::make_shared(ngraph::element::f32, ngraph::Shape(shape), blobs[0].data); #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2021_2) - node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + node = std::make_shared(ieInpNode0, weight, ngraph::op::AutoBroadcastType::NUMPY); #else - node = std::make_shared(node, weight, ngraph::op::AutoBroadcastType::NUMPY); + node = std::make_shared(ieInpNode0, weight, ngraph::op::AutoBroadcastType::NUMPY); #endif } if (hasBias || !hasWeights) { - std::shared_ptr bias; + ngraph::Output bias; if (hasBias) { bias = blobs.empty() ? ieInpNode1 : diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index d3675e23a5..c44d18182e 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -759,7 +759,7 @@ public: { CV_Assert_N(nodes.size() <= 2); auto& ieInpNode = nodes[0].dynamicCast()->node; - CV_Assert(finalSliceRanges[0].size() == ieInpNode->get_shape().size()); + CV_Assert(finalSliceRanges[0].size() == ieInpNode.get_shape().size()); std::vector offsets, dims; for (int i = 0; i < finalSliceRanges[0].size(); ++i) diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index b74f2b6791..faab6a565f 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -385,7 +385,7 @@ public: const std::vector >& nodes) CV_OVERRIDE { auto& ieInpNode = nodes[0].dynamicCast()->node; - int axis = normalize_axis(axisRaw, ieInpNode->get_shape().size()); + int axis = normalize_axis(axisRaw, ieInpNode.get_shape().size()); auto softmax = std::make_shared(ieInpNode, axis); if (logSoftMax) return Ptr(new InfEngineNgraphNode(std::make_shared(softmax))); diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp index e974ce34a3..4d08edeaaa 100644 --- a/modules/dnn/src/net_openvino.cpp +++ b/modules/dnn/src/net_openvino.cpp @@ -321,8 +321,10 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) return; } +#if INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2022_1) bool supportsCPUFallback = !isArmComputePlugin() && (preferableTarget == DNN_TARGET_CPU || openvino::checkTarget(DNN_TARGET_CPU)); +#endif // Build Inference Engine networks from sets of layers that support this // backend. Split a whole model on several Inference Engine networks if @@ -341,6 +343,10 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) bool fused = ld.skip; Ptr layer = ld.layerInstance; +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) + if (ld.id == 0) + continue; +#else if (!fused && !layer->supportBackend(preferableBackend)) { CV_LOG_DEBUG(NULL, "DNN/IE: NOT supported!"); @@ -355,17 +361,6 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) } } - // TODO: fix these workarounds - if (preferableTarget == DNN_TARGET_MYRIAD || - preferableTarget == DNN_TARGET_HDDL || - preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Concat"; - - if (preferableTarget == DNN_TARGET_OPENCL || - preferableTarget == DNN_TARGET_OPENCL_FP16) - customizable &= ld.type != "Power"; - if (preferableTarget == DNN_TARGET_OPENCL) customizable &= ld.type != "Eltwise"; @@ -390,6 +385,7 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) continue; } } +#endif ld.skip = true; // Initially skip all Inference Engine supported layers. // Create a new network if one of inputs from different Inference Engine graph. @@ -478,7 +474,7 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) int oid = ld.inputBlobsId[i].oid; auto ieInpNode = inputNodes[i].dynamicCast(); - const auto& ngraph_input_node = ieInpNode->node; + const auto& ngraph_input_node = ieInpNode->node.get_node_shared_ptr(); CV_LOG_DEBUG(NULL, "DNN/IE: bind output port " << lid << ":" << oid << " (" << ngraph_input_node->get_friendly_name() << ":" << ngraph_input_node->get_type_info().name << ")"); if ((oid == 0 && ngraph_input_node->get_output_size() == 1) || lid == 0) @@ -498,10 +494,7 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) } CV_CheckLT((size_t)oid, ngraph_input_node->get_output_size(), ""); #if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_4) - // FIXIT refactor ".initNgraph()" API to use Output - // WA: use Concat to emulate Identity operation with requested output port - auto oid_node = std::make_shared(ngraph::OutputVector { ngraph_input_node->output(oid) }, 0); - inputNodes[i] = Ptr(new InfEngineNgraphNode(oid_node)); + inputNodes[i] = new InfEngineNgraphNode(ngraph_input_node->output(oid)); #elif INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2020_3) inputNodes[i] = Ptr(new InfEngineNgraphNode(ieInpNode->node->get_output_as_single_output_node(oid))); #else @@ -556,6 +549,36 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) addNgraphOutputs(ld); } + // User may choose to return only intermediate blobs but not network's result (see Test_TFLite.max_unpooling) + // Such layers should not be skipped when forwardLayer is called. + // Also, perform a sanity check that there is no double inferred networks (a single skip=false per unique net instance) + std::set> uniqueNets; + if (!blobsToKeep_.empty()) + { + LayerPin latestLayerPin = getLatestLayerPin(blobsToKeep_); + for (MapIdToLayerData::iterator it = layers.begin(); it != layers.end(); ++it) + { + LayerData& ld = it->second; + auto iter = ld.backendNodes.find(preferableBackend); + if (iter == ld.backendNodes.end()) + continue; + + Ptr& node = iter->second; + if (node.empty()) + continue; + + Ptr ieNode = node.dynamicCast(); + if (ieNode.empty()) + continue; + + if (ld.id == latestLayerPin.lid) { + ld.skip = false; + uniqueNets.insert(ieNode->net); + break; + } + } + } + // Initialize all networks. for (MapIdToLayerData::reverse_iterator it = layers.rbegin(); it != layers.rend(); ++it) { @@ -578,9 +601,15 @@ void NetImplOpenVINO::initBackend(const std::vector& blobsToKeep_) { ieNode->net->addOutput(ieNode); ieNode->net->createNet((Target)preferableTarget); - ld.skip = false; + if (uniqueNets.find(ieNode->net) == uniqueNets.end()) { + ld.skip = false; + uniqueNets.insert(ieNode->net); + } } } +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2022_1) + CV_Assert(uniqueNets.size() == 1); +#endif } diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 3629f720fb..12e62c754a 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -425,6 +425,13 @@ TEST_P(FullyConnected, Accuracy) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); } #endif + // https://github.com/openvinotoolkit/openvino/issues/19436 + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL_FP16 && batch == 16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2023000000) + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL && batch == 16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL); +#endif Mat weights(outChannels, inChannels * inSize.height * inSize.width, CV_32F); randu(weights, -1.0f, 1.0f); @@ -454,11 +461,13 @@ TEST_P(FullyConnected, Accuracy) if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL_FP16) { l1 = 0.01; + if (INF_ENGINE_VER_MAJOR_GE(2023000000)) + lInf = 0.016; } if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && targetId == DNN_TARGET_OPENCL) { l1 = 5e-3; - lInf = 7e-3; + lInf = INF_ENGINE_VER_MAJOR_GE(2023000000) ? 0.016 : 7e-3; } #endif if (targetId == DNN_TARGET_CUDA_FP16) diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index beb586f126..4f3a8b4a96 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -157,14 +157,7 @@ TEST_P(Test_TFLite, max_unpooling) net.setInput(input); std::vector > outs; - if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) { - // TODO: seems like a bug with a retrieving intermediate tensors - net.forward(outs, {"conv2d_transpose_4", "p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); - outs.erase(outs.begin()); - } - else { - net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); - } + net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); ASSERT_EQ(outs.size(), 4); ASSERT_EQ(outs[0].size(), 1); From 494d201fda24d2862e37302c73d3b1febfe47a5f Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Mon, 28 Aug 2023 19:16:19 +0300 Subject: [PATCH 45/57] Add missing includes --- apps/visualisation/opencv_visualisation.cpp | 1 + modules/core/include/opencv2/core/opencl/opencl_info.hpp | 1 + modules/core/src/check.cpp | 2 ++ 3 files changed, 4 insertions(+) diff --git a/apps/visualisation/opencv_visualisation.cpp b/apps/visualisation/opencv_visualisation.cpp index 85e9697aad..9b7fcd9f48 100644 --- a/apps/visualisation/opencv_visualisation.cpp +++ b/apps/visualisation/opencv_visualisation.cpp @@ -60,6 +60,7 @@ Created by: Puttemans Steven - April 2016 #include #include +#include using namespace std; using namespace cv; diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp index 3ead76e5c4..0f0de893ca 100644 --- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp +++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include +#include #include #include diff --git a/modules/core/src/check.cpp b/modules/core/src/check.cpp index ffd9b302bf..2891f3a2e3 100644 --- a/modules/core/src/check.cpp +++ b/modules/core/src/check.cpp @@ -4,6 +4,8 @@ #include "precomp.hpp" +#include + #include "opencv2/core/check.hpp" namespace cv { From 674c618471f2f7c57d6ca51a6638667864b6ebc8 Mon Sep 17 00:00:00 2001 From: CSBVision Date: Tue, 8 Aug 2023 13:31:32 +0200 Subject: [PATCH 46/57] Update dnn_utils.cpp --- modules/dnn/src/dnn_utils.cpp | 67 ++++++++++++++++++++-------------- modules/dnn/test/test_misc.cpp | 22 +++++++++++ 2 files changed, 61 insertions(+), 28 deletions(-) diff --git a/modules/dnn/src/dnn_utils.cpp b/modules/dnn/src/dnn_utils.cpp index 18c7e975eb..d4d7dda008 100644 --- a/modules/dnn/src/dnn_utils.cpp +++ b/modules/dnn/src/dnn_utils.cpp @@ -5,6 +5,7 @@ #include "precomp.hpp" #include +#include namespace cv { @@ -100,15 +101,29 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con images_.getMatVector(images); CV_Assert(!images.empty()); - int nch = images[0].channels(); - Scalar scalefactor = param.scalefactor; - if (param.ddepth == CV_8U) { - CV_Assert(scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth"); + CV_Assert(param.scalefactor == Scalar::all(1.0) && "Scaling is not supported for CV_8U blob depth"); CV_Assert(param.mean == Scalar() && "Mean subtraction is not supported for CV_8U blob depth"); } + int nch = images[0].channels(); + Scalar scalefactor = param.scalefactor; + Scalar mean = param.mean; + + if (param.swapRB) + { + if (nch > 2) + { + std::swap(mean[0], mean[2]); + std::swap(scalefactor[0], scalefactor[2]); + } + else + { + CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels."); + } + } + for (size_t i = 0; i < images.size(); i++) { Size imgSize = images[i].size(); @@ -126,34 +141,26 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con size); images[i] = images[i](crop); } + else if (param.paddingmode == DNN_PMODE_LETTERBOX) + { + float resizeFactor = std::min(size.width / (float)imgSize.width, + size.height / (float)imgSize.height); + int rh = int(imgSize.height * resizeFactor); + int rw = int(imgSize.width * resizeFactor); + resize(images[i], images[i], Size(rw, rh), INTER_LINEAR); + + int top = (size.height - rh)/2; + int bottom = size.height - top - rh; + int left = (size.width - rw)/2; + int right = size.width - left - rw; + copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT); + } else { - if (param.paddingmode == DNN_PMODE_LETTERBOX) - { - float resizeFactor = std::min(size.width / (float)imgSize.width, - size.height / (float)imgSize.height); - int rh = int(imgSize.height * resizeFactor); - int rw = int(imgSize.width * resizeFactor); - resize(images[i], images[i], Size(rw, rh), INTER_LINEAR); - - int top = (size.height - rh)/2; - int bottom = size.height - top - rh; - int left = (size.width - rw)/2; - int right = size.width - left - rw; - copyMakeBorder(images[i], images[i], top, bottom, left, right, BORDER_CONSTANT); - } - else - resize(images[i], images[i], size, 0, 0, INTER_LINEAR); + resize(images[i], images[i], size, 0, 0, INTER_LINEAR); } } - Scalar mean = param.mean; - if (param.swapRB) - { - std::swap(mean[0], mean[2]); - std::swap(scalefactor[0], scalefactor[2]); - } - if (images[i].depth() == CV_8U && param.ddepth == CV_32F) images[i].convertTo(images[i], CV_32F); @@ -220,18 +227,22 @@ void blobFromImagesWithParams(InputArrayOfArrays images_, OutputArray blob_, con CV_Assert(image.depth() == blob_.depth()); CV_Assert(image.channels() == image0.channels()); CV_Assert(image.size() == image0.size()); - if (param.swapRB) + if (nch > 2 && param.swapRB) { Mat tmpRB; cvtColor(image, tmpRB, COLOR_BGR2RGB); tmpRB.copyTo(Mat(tmpRB.rows, tmpRB.cols, subMatType, blob.ptr((int)i, 0))); } else + { image.copyTo(Mat(image.rows, image.cols, subMatType, blob.ptr((int)i, 0))); + } } } else + { CV_Error(Error::StsUnsupportedFormat, "Unsupported data layout in blobFromImagesWithParams function."); + } } void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_) diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 4ee3e013cb..0c5fb28c5d 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -120,6 +120,28 @@ TEST(blobFromImageWithParams_4ch, letter_box) EXPECT_EQ(0, cvtest::norm(targetBlob, blob, NORM_INF)); } +TEST(blobFromImagesWithParams_4ch, multi_image) +{ + Mat img(10, 10, CV_8UC4, cv::Scalar(0, 1, 2, 3)); + Scalar scalefactor(0.1, 0.2, 0.3, 0.4); + + Image2BlobParams param; + param.scalefactor = scalefactor; + param.datalayout = DNN_LAYOUT_NHWC; + + Mat blobs = blobFromImagesWithParams(std::vector { img, 2*img }, param); + vector ranges; + ranges.push_back(Range(0, 1)); + ranges.push_back(Range(0, blobs.size[1])); + ranges.push_back(Range(0, blobs.size[2])); + ranges.push_back(Range(0, blobs.size[3])); + Mat blob0 = blobs(ranges); + ranges[0] = Range(1, 2); + Mat blob1 = blobs(ranges); + + EXPECT_EQ(0, cvtest::norm(2*blob0, blob1, NORM_INF)); +} + TEST(readNet, Regression) { Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"), From 8a415c881ab9e13d8d1319ca8fdbc1d3c3076aec Mon Sep 17 00:00:00 2001 From: Yuriy Chernyshov Date: Wed, 6 Sep 2023 13:45:28 +0300 Subject: [PATCH 47/57] Add missing std namespace qualifiers --- .../calib3d/test/test_affine2d_estimator.cpp | 4 ++-- .../calib3d/test/test_affine3d_estimator.cpp | 4 ++-- .../test/test_affine_partial2d_estimator.cpp | 4 ++-- modules/calib3d/test/test_stereomatching.cpp | 8 ++++---- .../test/test_translation3d_estimator.cpp | 4 ++-- modules/core/test/test_countnonzero.cpp | 2 +- .../test/test_descriptors_regression.cpp | 2 +- modules/imgproc/test/test_histograms.cpp | 4 ++-- modules/objdetect/src/qrcode.cpp | 18 +++++++++--------- modules/objdetect/test/test_cascadeandhog.cpp | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/modules/calib3d/test/test_affine2d_estimator.cpp b/modules/calib3d/test/test_affine2d_estimator.cpp index 95f1235105..2282dc3240 100644 --- a/modules/calib3d/test/test_affine2d_estimator.cpp +++ b/modules/calib3d/test/test_affine2d_estimator.cpp @@ -115,8 +115,8 @@ TEST_P(EstimateAffine2D, testNPoints) EXPECT_NEAR(0., cvtest::norm(aff_est, aff, NORM_INF), 1e-4); - bool inliers_good = count(inliers.begin(), inliers.end(), 1) == m && - m == accumulate(inliers.begin(), inliers.begin() + m, 0); + bool inliers_good = std::count(inliers.begin(), inliers.end(), 1) == m && + m == std::accumulate(inliers.begin(), inliers.begin() + m, 0); EXPECT_TRUE(inliers_good); } diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp index 3f1b50e5f2..bb639a4018 100644 --- a/modules/calib3d/test/test_affine3d_estimator.cpp +++ b/modules/calib3d/test/test_affine3d_estimator.cpp @@ -160,8 +160,8 @@ bool CV_Affine3D_EstTest::testNPoints() return false; } - bool outl_good = count(outl.begin(), outl.end(), 1) == m && - m == accumulate(outl.begin(), outl.begin() + m, 0); + bool outl_good = std::count(outl.begin(), outl.end(), 1) == m && + m == std::accumulate(outl.begin(), outl.begin() + m, 0); if (!outl_good) { diff --git a/modules/calib3d/test/test_affine_partial2d_estimator.cpp b/modules/calib3d/test/test_affine_partial2d_estimator.cpp index 0be25ee7eb..dbbb4da0d9 100644 --- a/modules/calib3d/test/test_affine_partial2d_estimator.cpp +++ b/modules/calib3d/test/test_affine_partial2d_estimator.cpp @@ -125,8 +125,8 @@ TEST_P(EstimateAffinePartial2D, testNPoints) EXPECT_NEAR(0., cvtest::norm(aff_est, aff, NORM_INF), 1e-4); - bool inliers_good = count(inliers.begin(), inliers.end(), 1) == m && - m == accumulate(inliers.begin(), inliers.begin() + m, 0); + bool inliers_good = std::count(inliers.begin(), inliers.end(), 1) == m && + m == std::accumulate(inliers.begin(), inliers.begin() + m, 0); EXPECT_TRUE(inliers_good); } diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp index 02d1823d2d..c17d92292a 100644 --- a/modules/calib3d/test/test_stereomatching.cpp +++ b/modules/calib3d/test/test_stereomatching.cpp @@ -740,8 +740,8 @@ public: CV_StereoBMTest() { name = "stereobm"; - fill(rmsEps.begin(), rmsEps.end(), 0.4f); - fill(fracEps.begin(), fracEps.end(), 0.022f); + std::fill(rmsEps.begin(), rmsEps.end(), 0.4f); + std::fill(fracEps.begin(), fracEps.end(), 0.022f); } protected: @@ -866,8 +866,8 @@ public: CV_StereoSGBMTest() { name = "stereosgbm"; - fill(rmsEps.begin(), rmsEps.end(), 0.25f); - fill(fracEps.begin(), fracEps.end(), 0.01f); + std::fill(rmsEps.begin(), rmsEps.end(), 0.25f); + std::fill(fracEps.begin(), fracEps.end(), 0.01f); } protected: diff --git a/modules/calib3d/test/test_translation3d_estimator.cpp b/modules/calib3d/test/test_translation3d_estimator.cpp index 88ad40e0f8..97c20e5033 100644 --- a/modules/calib3d/test/test_translation3d_estimator.cpp +++ b/modules/calib3d/test/test_translation3d_estimator.cpp @@ -91,8 +91,8 @@ TEST(Calib3d_EstimateTranslation3D, testNPoints) << "aff est: " << trans_est << endl << "aff ref: " << trans; - bool outl_good = count(outl.begin(), outl.end(), 1) == m && - m == accumulate(outl.begin(), outl.begin() + m, 0); + bool outl_good = std::count(outl.begin(), outl.end(), 1) == m && + m == std::accumulate(outl.begin(), outl.begin() + m, 0); EXPECT_TRUE(outl_good); } diff --git a/modules/core/test/test_countnonzero.cpp b/modules/core/test/test_countnonzero.cpp index fe14affb9c..41eaceb189 100644 --- a/modules/core/test/test_countnonzero.cpp +++ b/modules/core/test/test_countnonzero.cpp @@ -259,7 +259,7 @@ TEST_P (CountNonZeroND, ndim) const int ONE_SIZE = 5; vector sizes(dims); - fill(sizes.begin(), sizes.end(), ONE_SIZE); + std::fill(sizes.begin(), sizes.end(), ONE_SIZE); Mat data(sizes, CV_MAKETYPE(type, 1)); data = 0; diff --git a/modules/features2d/test/test_descriptors_regression.cpp b/modules/features2d/test/test_descriptors_regression.cpp index 0258fea0f3..e44edb0769 100644 --- a/modules/features2d/test/test_descriptors_regression.cpp +++ b/modules/features2d/test/test_descriptors_regression.cpp @@ -142,7 +142,7 @@ TEST_P(DescriptorImage, no_crash) { vector fnames; glob(cvtest::TS::ptr()->get_data_path() + pattern, fnames, false); - sort(fnames.begin(), fnames.end()); + std::sort(fnames.begin(), fnames.end()); Ptr akaze_mldb = AKAZE::create(AKAZE::DESCRIPTOR_MLDB); Ptr akaze_mldb_upright = AKAZE::create(AKAZE::DESCRIPTOR_MLDB_UPRIGHT); diff --git a/modules/imgproc/test/test_histograms.cpp b/modules/imgproc/test/test_histograms.cpp index b57af774f2..efd045d31b 100644 --- a/modules/imgproc/test/test_histograms.cpp +++ b/modules/imgproc/test/test_histograms.cpp @@ -1198,7 +1198,7 @@ void CV_CalcHistTest::run_func(void) } std::vector imagesv(cdims); - copy(images.begin(), images.begin() + cdims, imagesv.begin()); + std::copy(images.begin(), images.begin() + cdims, imagesv.begin()); Mat mask = images[CV_MAX_DIM]; if( !CV_IS_SPARSE_HIST(hist[0]) ) @@ -1493,7 +1493,7 @@ void CV_CalcBackProjectTest::run_func(void) } std::vector imagesv(hdims); - copy(images.begin(), images.begin() + hdims, imagesv.begin()); + std::copy(images.begin(), images.begin() + hdims, imagesv.begin()); cv::Mat dst = images[CV_MAX_DIM+1]; diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 1df46a9bb5..f4df6555da 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -1249,14 +1249,14 @@ bool QRDecode::computeSidesPoints(const vector &result_integer_hull) { if (points.front().x > points.back().x) { - reverse(points.begin(), points.end()); + std::reverse(points.begin(), points.end()); } } else { if (points.front().y > points.back().y) { - reverse(points.begin(), points.end()); + std::reverse(points.begin(), points.end()); } } if (points.empty()) @@ -1632,7 +1632,7 @@ bool QRDecode::findPatternsVerticesPoints(vector > &patterns_verti } if ((int)min_angle_pnts_indexes.size() == num_vertices) { break; } } - sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end()); + std::sort(min_angle_pnts_indexes.begin(), min_angle_pnts_indexes.end()); vector contour_vertices_points; @@ -1761,11 +1761,11 @@ bool QRDecode::findTempPatternsAddingPoints(vector } if (abs(p1.x - p2.x) > abs(p1.y - p2.y)) { - sort(points.begin(), points.end(), sortPointsByX()); + std::sort(points.begin(), points.end(), sortPointsByX()); } else { - sort(points.begin(), points.end(), sortPointsByY()); + std::sort(points.begin(), points.end(), sortPointsByY()); } temp_patterns_add_points.push_back(std::pair >(idx_curved_side,points)); @@ -1909,11 +1909,11 @@ void QRDecode::completeAndSortSides() Point p2 = it->second.back(); if (abs(p1.x - p2.x) > abs(p1.y - p2.y)) { - sort(it->second.begin(), it->second.end(), sortPointsByX()); + std::sort(it->second.begin(), it->second.end(), sortPointsByX()); } else { - sort(it->second.begin(), it->second.end(), sortPointsByY()); + std::sort(it->second.begin(), it->second.end(), sortPointsByY()); } } } @@ -2075,8 +2075,8 @@ bool QRDecode::divideIntoEvenSegments(vector > &segments_points) Point2f segment_start = segments_points[i][j]; Point2f segment_end = segments_points[i][j + 1]; vector::iterator it_start, it_end, it; - it_start = find(spline_lines[i].begin(), spline_lines[i].end(), segment_start); - it_end = find(spline_lines[i].begin(), spline_lines[i].end(), segment_end); + it_start = std::find(spline_lines[i].begin(), spline_lines[i].end(), segment_start); + it_end = std::find(spline_lines[i].begin(), spline_lines[i].end(), segment_end); float max_dist_to_line = 0.0; for (it = it_start; it != it_end; it++) { diff --git a/modules/objdetect/test/test_cascadeandhog.cpp b/modules/objdetect/test/test_cascadeandhog.cpp index 4151b899e3..0a68bd9bb3 100644 --- a/modules/objdetect/test/test_cascadeandhog.cpp +++ b/modules/objdetect/test/test_cascadeandhog.cpp @@ -355,7 +355,7 @@ int CV_DetectorTest::validate( int detectorIdx, vector >& objects ) map[minIdx] = 1; } } - noPair += (int)count_if( map.begin(), map.end(), isZero ); + noPair += (int)std::count_if( map.begin(), map.end(), isZero ); totalNoPair += noPair; /*if( noPair > cvRound(valRects.size()*eps.noPair)+1 ) From d0de575aef0b3383f183f6951a64dddc02a7563a Mon Sep 17 00:00:00 2001 From: beanjoy <120680451@qq.com> Date: Thu, 7 Sep 2023 18:06:39 +0800 Subject: [PATCH 48/57] Merge pull request #24142 from beanjoy:4.x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify the outputVideoFormat after changing the output format in MSMF backend #24142 After changing the output format, need to modify the outputVideoFormat, otherwise the outputVideoFormat is always CV_CAP_MODE_BGR, and an error will occur when converting the format in retrieveVideoFrame(), and will always enter "case CV_CAP_MODE_BGR:" process. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [ ] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake Co-authored-by: 李龙 --- modules/videoio/src/cap_msmf.cpp | 7 ++++++- modules/videoio/test/test_camera.cpp | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index a55f919ed1..4b234b8cae 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -1159,7 +1159,12 @@ bool CvCapture_MSMF::configureVideoOutput(MediaType newType, cv::uint32_t outFor { initStream(dwVideoStreamIndex, nativeFormat); } - return initStream(dwVideoStreamIndex, newFormat); + if (!initStream(dwVideoStreamIndex, newFormat)) + { + return false; + } + outputVideoFormat = outFormat; + return true; } bool CvCapture_MSMF::configureOutput() diff --git a/modules/videoio/test/test_camera.cpp b/modules/videoio/test/test_camera.cpp index fc269959c3..8b0f0efe83 100644 --- a/modules/videoio/test/test_camera.cpp +++ b/modules/videoio/test/test_camera.cpp @@ -119,6 +119,21 @@ TEST(DISABLED_videoio_camera, v4l_read_mjpg) capture.release(); } +TEST(DISABLED_videoio_camera, msmf_read_yuyv) +{ + VideoCapture capture(CAP_MSMF); + ASSERT_TRUE(capture.isOpened()); + ASSERT_TRUE(capture.set(CAP_PROP_FOURCC, VideoWriter::fourcc('Y', 'U', 'Y', 'V'))); + std::cout << "Camera 0 via " << capture.getBackendName() << " backend" << std::endl; + std::cout << "Frame width: " << capture.get(CAP_PROP_FRAME_WIDTH) << std::endl; + std::cout << " height: " << capture.get(CAP_PROP_FRAME_HEIGHT) << std::endl; + std::cout << "Capturing FPS: " << capture.get(CAP_PROP_FPS) << std::endl; + int fourcc = (int)capture.get(CAP_PROP_FOURCC); + std::cout << "FOURCC code: " << cv::format("0x%8x", fourcc) << std::endl; + test_readFrames(capture); + capture.release(); +} + TEST(DISABLED_videoio_camera, v4l_open_mjpg) { VideoCapture capture; From e8f94182f577894410cc59d5d20979dff69d8878 Mon Sep 17 00:00:00 2001 From: jason_w Date: Thu, 7 Sep 2023 20:47:00 +0800 Subject: [PATCH 49/57] Merge pull request #24180 from MambaWong:4.x Fixed the channels when capturing yuv422 with v4l2 backend #24180 example to reproduce the problem ```cpp #include #include #include #include #include using namespace cv; using namespace std; void help_func(VideoCapture& cap) { int height = cap.get(cv::CAP_PROP_FRAME_HEIGHT); int width = cap.get(cv::CAP_PROP_FRAME_WIDTH); int pixel_type = cap.get(cv::CAP_PROP_FORMAT); int channels = CV_MAT_CN(pixel_type); int pixel_bytes = CV_ELEM_SIZE(pixel_type); bool to_bgr = static_cast(cap.get(cv::CAP_PROP_CONVERT_RGB)); std::cout << "backend: " << cap.getBackendName() << std::endl; std::cout << std::hex << "fourcc: " << static_cast(cap.get(cv::CAP_PROP_FOURCC)) << std::endl; std::cout << std::boolalpha << "to_bgr: " << to_bgr << std::endl; std::cout << std::dec << "height: " << height << " width: " << width << " channels: " << channels << " pixel_bytes: " << pixel_bytes << std::endl; std::cout << "-----------------------------------------" << std::endl; } int main(int, char**) { VideoCapture cap; cap.open("/dev/video0"); if (!cap.isOpened()) { cerr << "ERROR! Unable to open camera\n"; return -1; } { help_func(cap); } { cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080); cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920); cap.set(cv::CAP_PROP_CONVERT_RGB, 0); help_func(cap); } // { // cap.set(cv::CAP_PROP_CONVERT_RGB, 0); // cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080); // cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920); // help_func(cap); // } Mat frame; int frame_idx = 0; while (cap.read(frame)) { std::cout << "frame index: " << frame_idx++ << std::endl; help_func(cap); if (frame.empty()) { cerr << "ERROR! blank frame grabbed\n"; break; } Mat bgr; if (cap.get(cv::CAP_PROP_CONVERT_RGB)) { bgr = frame; } else { cv::cvtColor(frame, bgr, cv::COLOR_YUV2BGR_YUYV); } imshow("frame", bgr); if (waitKey(5) >= 0) { break; } } return 0; } ``` The above code will get the wrong channels. By changing lines 41-45 like below, can get the correct channels. code This is because `cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080);` and `cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920);` reinitialize the `frame`, but `cap.set(cv::CAP_PROP_CONVERT_RGB, 0);` not. Log info. log We can also observe that we get the correct channels in the while loop. This is because: https://github.com/opencv/opencv/blob/ca0bd70cde431b1dd211254011dd9bcf965f582f/modules/videoio/src/cap_v4l.cpp#L2309-L2310 reinitialize the `frame`. --- modules/videoio/src/cap_v4l.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp index 905c79e42f..5b282f1966 100644 --- a/modules/videoio/src/cap_v4l.cpp +++ b/modules/videoio/src/cap_v4l.cpp @@ -2155,6 +2155,7 @@ bool CvCaptureCAM_V4L::setProperty( int property_id, double _value ) }else{ convert_rgb = false; releaseFrame(); + v4l2_create_frame(); return true; } case cv::CAP_PROP_FOURCC: From e5ff41ec9bdbf1d81d095f82e3b87ce913dd69a7 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 7 Sep 2023 14:09:01 +0300 Subject: [PATCH 50/57] fixes extendDictionary, add test --- modules/objdetect/src/aruco/aruco_dictionary.cpp | 3 ++- modules/objdetect/test/test_boarddetection.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/objdetect/src/aruco/aruco_dictionary.cpp b/modules/objdetect/src/aruco/aruco_dictionary.cpp index f73cea3357..3d5f9b1bfd 100644 --- a/modules/objdetect/src/aruco/aruco_dictionary.cpp +++ b/modules/objdetect/src/aruco/aruco_dictionary.cpp @@ -355,6 +355,7 @@ static int _getSelfDistance(const Mat &marker) { Dictionary extendDictionary(int nMarkers, int markerSize, const Dictionary &baseDictionary, int randomSeed) { + CV_Assert(nMarkers > 0); RNG rng((uint64)(randomSeed)); Dictionary out = Dictionary(Mat(), markerSize); @@ -370,7 +371,7 @@ Dictionary extendDictionary(int nMarkers, int markerSize, const Dictionary &base // if baseDictionary is provided, calculate its intermarker distance if(baseDictionary.bytesList.rows > 0) { CV_Assert(baseDictionary.markerSize == markerSize); - out.bytesList = baseDictionary.bytesList.clone(); + out.bytesList = baseDictionary.bytesList.rowRange(0, min(nMarkers, baseDictionary.bytesList.rows)).clone(); int minDistance = markerSize * markerSize + 1; for(int i = 0; i < out.bytesList.rows; i++) { diff --git a/modules/objdetect/test/test_boarddetection.cpp b/modules/objdetect/test/test_boarddetection.cpp index e47e6c3cb6..0c99e6de61 100644 --- a/modules/objdetect/test/test_boarddetection.cpp +++ b/modules/objdetect/test/test_boarddetection.cpp @@ -318,4 +318,12 @@ TEST(CV_ArucoGenerateBoard, regression_1226) { }); } +TEST(CV_ArucoDictionary, extendDictionary) { + aruco::Dictionary base_dictionary = aruco::getPredefinedDictionary(aruco::DICT_4X4_250); + aruco::Dictionary custom_dictionary = aruco::extendDictionary(150, 4, base_dictionary); + + ASSERT_EQ(custom_dictionary.bytesList.rows, 150); + ASSERT_EQ(cv::norm(custom_dictionary.bytesList, base_dictionary.bytesList.rowRange(0, 150)), 0.); +} + }} // namespace From ceeb01dce5f6358df0c7b784b04fead14603a85d Mon Sep 17 00:00:00 2001 From: Alexander Lyulkov Date: Fri, 8 Sep 2023 12:44:22 +0700 Subject: [PATCH 51/57] Replaced torch7 by onnx model in fast-neural-style dnn sample --- samples/dnn/fast_neural_style.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py index 912c2f0832..22b8217b3a 100644 --- a/samples/dnn/fast_neural_style.py +++ b/samples/dnn/fast_neural_style.py @@ -5,15 +5,15 @@ import argparse parser = argparse.ArgumentParser( description='This script is used to run style transfer models from ' - 'https://github.com/jcjohnson/fast-neural-style using OpenCV') + 'https://github.com/onnx/models/tree/main/vision/style_transfer/fast_neural_style using OpenCV') parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera') -parser.add_argument('--model', help='Path to .t7 model') +parser.add_argument('--model', help='Path to .onnx model') parser.add_argument('--width', default=-1, type=int, help='Resize input to specific width.') parser.add_argument('--height', default=-1, type=int, help='Resize input to specific height.') parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.') args = parser.parse_args() -net = cv.dnn.readNetFromTorch(cv.samples.findFile(args.model)) +net = cv.dnn.readNetFromONNX(cv.samples.findFile(args.model)) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) if args.input: @@ -31,16 +31,12 @@ while cv.waitKey(1) < 0: inWidth = args.width if args.width != -1 else frame.shape[1] inHeight = args.height if args.height != -1 else frame.shape[0] inp = cv.dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), - (103.939, 116.779, 123.68), swapRB=False, crop=False) + swapRB=True, crop=False) net.setInput(inp) out = net.forward() out = out.reshape(3, out.shape[2], out.shape[3]) - out[0] += 103.939 - out[1] += 116.779 - out[2] += 123.68 - out /= 255 out = out.transpose(1, 2, 0) t, _ = net.getPerfProfile() @@ -50,4 +46,7 @@ while cv.waitKey(1) < 0: if args.median_filter: out = cv.medianBlur(out, args.median_filter) + out = np.clip(out, 0, 255) + out = out.astype(np.uint8) + cv.imshow('Styled image', out) From 0367a12b920a553fdc5349e3eebedf4808bce2b2 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Fri, 8 Sep 2023 12:36:46 +0300 Subject: [PATCH 52/57] Check that cv::merge input matrices are not empty. --- modules/core/src/merge.dispatch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/core/src/merge.dispatch.cpp b/modules/core/src/merge.dispatch.cpp index b95dc7345d..abde21e0df 100644 --- a/modules/core/src/merge.dispatch.cpp +++ b/modules/core/src/merge.dispatch.cpp @@ -118,6 +118,7 @@ void merge(const Mat* mv, size_t n, OutputArray _dst) CV_INSTRUMENT_REGION(); CV_Assert( mv && n > 0 ); + CV_Assert(!mv[0].empty()); int depth = mv[0].depth(); bool allch1 = true; From 910db5c9b7015e623dabf13b591e40a9b577a3c4 Mon Sep 17 00:00:00 2001 From: Alexander Lyulkov Date: Fri, 8 Sep 2023 18:36:13 +0700 Subject: [PATCH 53/57] changed readNetFromONNX to readNet --- samples/dnn/fast_neural_style.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py index 22b8217b3a..43b8b121d6 100644 --- a/samples/dnn/fast_neural_style.py +++ b/samples/dnn/fast_neural_style.py @@ -13,7 +13,7 @@ parser.add_argument('--height', default=-1, type=int, help='Resize input to spec parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of postprocessing blurring.') args = parser.parse_args() -net = cv.dnn.readNetFromONNX(cv.samples.findFile(args.model)) +net = cv.dnn.readNet(cv.samples.findFile(args.model)) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) if args.input: From 91cf0d18430631df0b9cdd7480667df46e9bc0f2 Mon Sep 17 00:00:00 2001 From: alexlyulkov Date: Fri, 8 Sep 2023 19:36:01 +0700 Subject: [PATCH 54/57] Merge pull request #24244 from alexlyulkov:al/update-dnn-js-face-recognition-sample Replaced torch7 by onnx model in js_face_recognition dnn sample #24244 Changed face recognition model in js_face_recognition dnn sample: replaced torch7 model from https://github.com/pyannote/pyannote-data by ONNX model from https://github.com/opencv/opencv_zoo/tree/main/models/face_recognition_sface --- samples/dnn/js_face_recognition.html | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/dnn/js_face_recognition.html b/samples/dnn/js_face_recognition.html index d94ead1e58..5893a5cf13 100644 --- a/samples/dnn/js_face_recognition.html +++ b/samples/dnn/js_face_recognition.html @@ -40,7 +40,7 @@ function detectFaces(img) { //! [Get 128 floating points feature vector] function face2vec(face) { - var blob = cv.blobFromImage(face, 1.0 / 255, {width: 96, height: 96}, [0, 0, 0, 0], true, false) + var blob = cv.blobFromImage(face, 1.0, {width: 112, height: 112}, [0, 0, 0, 0], true, false) netRecogn.setInput(blob); var vec = netRecogn.forward(); blob.delete(); @@ -71,15 +71,15 @@ function loadModels(callback) { var utils = new Utils(''); var proto = 'https://raw.githubusercontent.com/opencv/opencv/4.x/samples/dnn/face_detector/deploy_lowres.prototxt'; var weights = 'https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20180205_fp16/res10_300x300_ssd_iter_140000_fp16.caffemodel'; - var recognModel = 'https://raw.githubusercontent.com/pyannote/pyannote-data/master/openface.nn4.small2.v1.t7'; + var recognModel = 'https://media.githubusercontent.com/media/opencv/opencv_zoo/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx'; utils.createFileFromUrl('face_detector.prototxt', proto, () => { document.getElementById('status').innerHTML = 'Downloading face_detector.caffemodel'; utils.createFileFromUrl('face_detector.caffemodel', weights, () => { document.getElementById('status').innerHTML = 'Downloading OpenFace model'; - utils.createFileFromUrl('face_recognition.t7', recognModel, () => { + utils.createFileFromUrl('face_recognition_sface_2021dec.onnx', recognModel, () => { document.getElementById('status').innerHTML = ''; netDet = cv.readNetFromCaffe('face_detector.prototxt', 'face_detector.caffemodel'); - netRecogn = cv.readNetFromTorch('face_recognition.t7'); + netRecogn = cv.readNet('face_recognition_sface_2021dec.onnx'); callback(); }); }); @@ -121,8 +121,8 @@ function main() { persons[name] = face2vec(face).clone(); var canvas = document.createElement("canvas"); - canvas.setAttribute("width", 96); - canvas.setAttribute("height", 96); + canvas.setAttribute("width", 112); + canvas.setAttribute("height", 112); var cell = document.getElementById("targetImgs").insertCell(0); cell.appendChild(canvas); From c319735d9b1e21760bb51260fa155596892a6348 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 9 Sep 2023 03:19:45 +0000 Subject: [PATCH 55/57] js: include LUT support --- modules/js/test/test_core.js | 41 ++++++++++++++++++++++++++++++++ modules/js/test/test_mat.js | 2 +- modules/js/test/tests.html | 5 ++-- modules/js/test/tests.js | 11 ++++++--- platforms/js/opencv_js.config.py | 1 + 5 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 modules/js/test/test_core.js diff --git a/modules/js/test/test_core.js b/modules/js/test/test_core.js new file mode 100644 index 0000000000..14d4ffe72b --- /dev/null +++ b/modules/js/test/test_core.js @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +if (typeof module !== 'undefined' && module.exports) { + // The environment is Node.js + var cv = require('./opencv.js'); // eslint-disable-line no-var +} + +QUnit.module('Core', {}); + +QUnit.test('test_LUT', function(assert) { + // test LUT + { + let src = cv.matFromArray(3, 3, cv.CV_8UC1, [255, 128, 0, 0, 128, 255, 1, 2, 254]); + let lutTable = []; + for (let i = 0; i < 256; i++) + { + lutTable[i] = 255 - i; + } + let lut = cv.matFromArray(1, 256, cv.CV_8UC1, lutTable); + let dst = new cv.Mat(); + + cv.LUT(src, lut, dst); + + //console.log(dst.data); + assert.equal(dst.ucharAt(0), 0); + assert.equal(dst.ucharAt(1), 127); + assert.equal(dst.ucharAt(2), 255); + assert.equal(dst.ucharAt(3), 255); + assert.equal(dst.ucharAt(4), 127); + assert.equal(dst.ucharAt(5), 0); + assert.equal(dst.ucharAt(6), 254); + assert.equal(dst.ucharAt(7), 253); + assert.equal(dst.ucharAt(8), 1); + + src.delete(); + lut.delete(); + dst.delete(); + } +}); diff --git a/modules/js/test/test_mat.js b/modules/js/test/test_mat.js index 409ed1b123..fd3611cd2c 100644 --- a/modules/js/test/test_mat.js +++ b/modules/js/test/test_mat.js @@ -73,7 +73,7 @@ if (typeof module !== 'undefined' && module.exports) { var cv = require('./opencv.js'); // eslint-disable-line no-var } -QUnit.module('Core', {}); +QUnit.module('CoreMat', {}); QUnit.test('test_mat_creation', function(assert) { // Mat constructors. diff --git a/modules/js/test/tests.html b/modules/js/test/tests.html index de64ca7a29..b20013ec63 100644 --- a/modules/js/test/tests.html +++ b/modules/js/test/tests.html @@ -52,12 +52,12 @@ if (window.cv instanceof Promise) { window.cv.then((target) => { window.cv = target; - //console.log(cv.getBuildInformation()); + console.log(cv.getBuildInformation()); QUnit.start(); }) } else { // for backward compatible - // console.log(cv.getBuildInformation()); + console.log(cv.getBuildInformation()); QUnit.start(); } }, @@ -108,6 +108,7 @@ + diff --git a/modules/js/test/tests.js b/modules/js/test/tests.js index f3156f6ea0..74a4b87e45 100644 --- a/modules/js/test/tests.js +++ b/modules/js/test/tests.js @@ -44,10 +44,15 @@ testrunner.options.maxBlockDuration = 20000; // cause opencv_js.js need time to testrunner.run( { code: 'opencv.js', - tests: ['test_mat.js', 'test_utils.js', 'test_imgproc.js', - 'test_objdetect.js', 'test_video.js', 'test_features2d.js', + tests: ['test_mat.js', + 'test_utils.js', + 'test_core.js', + 'test_imgproc.js', + 'test_objdetect.js', + 'test_video.js', + 'test_features2d.js', 'test_photo.js', - 'test_calib3d.js' + 'test_calib3d.js', ], }, function(err, report) { diff --git a/platforms/js/opencv_js.config.py b/platforms/js/opencv_js.config.py index 69891ea71a..5dca863bef 100644 --- a/platforms/js/opencv_js.config.py +++ b/platforms/js/opencv_js.config.py @@ -9,6 +9,7 @@ core = { 'perspectiveTransform', 'polarToCart', 'pow', 'randn', 'randu', 'reduce', 'repeat', 'rotate', 'setIdentity', 'setRNGSeed', 'solve', 'solvePoly', 'split', 'sqrt', 'subtract', 'trace', 'transform', 'transpose', 'vconcat', 'setLogLevel', 'getLogLevel', + 'LUT', ], 'Algorithm': [], } From 5dc5b2785884736f2889402502f35020b0481f45 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sat, 9 Sep 2023 20:38:59 +0300 Subject: [PATCH 56/57] Enable build with OpenVINO in Debug --- modules/dnn/src/net_openvino.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/net_openvino.cpp b/modules/dnn/src/net_openvino.cpp index 4d08edeaaa..c274f44a87 100644 --- a/modules/dnn/src/net_openvino.cpp +++ b/modules/dnn/src/net_openvino.cpp @@ -252,7 +252,7 @@ void NetImplOpenVINO::addNgraphOutputs(LayerData& ld) CV_Assert(!ieInpNode->net.empty()); if (layerNet != ieInpNode->net) { - CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node->get_friendly_name()); + CV_LOG_DEBUG(NULL, "DNN/IE: pin output between subnets: " << ieInpNode->node.get_node()->get_friendly_name()); ieInpNode->net->addOutput(ieInpNode); } } From 02525abd9fed88c39a393285f3c78880efa09101 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 10 Sep 2023 13:11:01 +0000 Subject: [PATCH 57/57] cmake: revise OPENCV_DNN_BACKEND_DEFAULT integration - disable message on default value --- CMakeLists.txt | 2 +- modules/dnn/CMakeLists.txt | 7 ++++--- modules/dnn/src/dnn_params.cpp | 4 ++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a214a1a91..40d80e112c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1672,7 +1672,7 @@ else() endif() endif() -if(BUILD_opencv_dnn) +if(BUILD_opencv_dnn AND OPENCV_DNN_BACKEND_DEFAULT) status(" Default DNN backend:" ${OPENCV_DNN_BACKEND_DEFAULT}) endif() diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 896ce5ded7..774e3c7b5a 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -227,9 +227,10 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO) endif() endif() -set(OPENCV_DNN_BACKEND_DEFAULT "DNN_BACKEND_OPENCV" CACHE STRING "Default backend used by the DNN module") -ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}") - +set(OPENCV_DNN_BACKEND_DEFAULT "" CACHE STRING "Default backend used by the DNN module (DNN_BACKEND_OPENCV if empty)") +if(OPENCV_DNN_BACKEND_DEFAULT) + ocv_append_source_file_compile_definitions("${CMAKE_CURRENT_LIST_DIR}/src/dnn_params.cpp" "OPENCV_DNN_BACKEND_DEFAULT=${OPENCV_DNN_BACKEND_DEFAULT}") +endif() ocv_install_used_external_targets(${libs} ${dnn_runtime_libs}) diff --git a/modules/dnn/src/dnn_params.cpp b/modules/dnn/src/dnn_params.cpp index 19d453012c..a76f4cd512 100644 --- a/modules/dnn/src/dnn_params.cpp +++ b/modules/dnn/src/dnn_params.cpp @@ -36,7 +36,11 @@ bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES() int getParam_DNN_BACKEND_DEFAULT() { static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT", +#ifdef OPENCV_DNN_BACKEND_DEFAULT (size_t)OPENCV_DNN_BACKEND_DEFAULT +#else + (size_t)DNN_BACKEND_OPENCV +#endif ); return PARAM_DNN_BACKEND_DEFAULT; }