From 91e078be932726e2d3ad03954590aa0cc438670b Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 7 Apr 2025 14:11:13 +0300 Subject: [PATCH] Dropped inefficient (disabled) IPP integration for LUT. --- modules/core/include/opencv2/core/private.hpp | 1 - modules/core/src/lut.cpp | 182 ------------------ 2 files changed, 183 deletions(-) diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 230f692c92..140264086f 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -213,7 +213,6 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); } // Temporary disabled named IPP region. Performance #define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations -#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653) #define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better #define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better diff --git a/modules/core/src/lut.cpp b/modules/core/src/lut.cpp index 20e12e4b92..090ba50d5e 100644 --- a/modules/core/src/lut.cpp +++ b/modules/core/src/lut.cpp @@ -104,184 +104,6 @@ static bool ocl_LUT(InputArray _src, InputArray _lut, OutputArray _dst) #endif -#if defined(HAVE_IPP) -#if !IPP_DISABLE_PERF_LUT // there are no performance benefits (PR #2653) -namespace ipp { - -class IppLUTParallelBody_LUTC1 : public ParallelLoopBody -{ -public: - bool* ok; - const Mat& src_; - const Mat& lut_; - Mat& dst_; - - int width; - size_t elemSize1; - - IppLUTParallelBody_LUTC1(const Mat& src, const Mat& lut, Mat& dst, bool* _ok) - : ok(_ok), src_(src), lut_(lut), dst_(dst) - { - width = dst.cols * dst.channels(); - elemSize1 = CV_ELEM_SIZE1(dst.depth()); - - CV_DbgAssert(elemSize1 == 1 || elemSize1 == 4); - *ok = true; - } - - void operator()( const cv::Range& range ) const - { - if (!*ok) - return; - - const int row0 = range.start; - const int row1 = range.end; - - Mat src = src_.rowRange(row0, row1); - Mat dst = dst_.rowRange(row0, row1); - - IppiSize sz = { width, dst.rows }; - - if (elemSize1 == 1) - { - if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C1R, (const Ipp8u*)src.data, (int)src.step[0], dst.data, (int)dst.step[0], sz, lut_.data, 8) >= 0) - return; - } - else if (elemSize1 == 4) - { - if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u32u_C1R, (const Ipp8u*)src.data, (int)src.step[0], (Ipp32u*)dst.data, (int)dst.step[0], sz, (Ipp32u*)lut_.data, 8) >= 0) - return; - } - *ok = false; - } -private: - IppLUTParallelBody_LUTC1(const IppLUTParallelBody_LUTC1&); - IppLUTParallelBody_LUTC1& operator=(const IppLUTParallelBody_LUTC1&); -}; - -class IppLUTParallelBody_LUTCN : public ParallelLoopBody -{ -public: - bool *ok; - const Mat& src_; - const Mat& lut_; - Mat& dst_; - - int lutcn; - - uchar* lutBuffer; - uchar* lutTable[4]; - - IppLUTParallelBody_LUTCN(const Mat& src, const Mat& lut, Mat& dst, bool* _ok) - : ok(_ok), src_(src), lut_(lut), dst_(dst), lutBuffer(NULL) - { - lutcn = lut.channels(); - IppiSize sz256 = {256, 1}; - - size_t elemSize1 = dst.elemSize1(); - CV_DbgAssert(elemSize1 == 1); - lutBuffer = (uchar*)CV_IPP_MALLOC(256 * (int)elemSize1 * 4); - lutTable[0] = lutBuffer + 0; - lutTable[1] = lutBuffer + 1 * 256 * elemSize1; - lutTable[2] = lutBuffer + 2 * 256 * elemSize1; - lutTable[3] = lutBuffer + 3 * 256 * elemSize1; - - CV_DbgAssert(lutcn == 3 || lutcn == 4); - if (lutcn == 3) - { - IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C3P3R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256); - if (status < 0) - return; - } - else if (lutcn == 4) - { - IppStatus status = CV_INSTRUMENT_FUN_IPP(ippiCopy_8u_C4P4R, lut.ptr(), (int)lut.step[0], lutTable, (int)lut.step[0], sz256); - if (status < 0) - return; - } - - *ok = true; - } - - ~IppLUTParallelBody_LUTCN() - { - if (lutBuffer != NULL) - ippFree(lutBuffer); - lutBuffer = NULL; - lutTable[0] = NULL; - } - - void operator()( const cv::Range& range ) const - { - if (!*ok) - return; - - const int row0 = range.start; - const int row1 = range.end; - - Mat src = src_.rowRange(row0, row1); - Mat dst = dst_.rowRange(row0, row1); - - if (lutcn == 3) - { - if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C3R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0) - return; - } - else if (lutcn == 4) - { - if (CV_INSTRUMENT_FUN_IPP(ippiLUTPalette_8u_C4R, src.ptr(), (int)src.step[0], dst.ptr(), (int)dst.step[0], ippiSize(dst.size()), lutTable, 8) >= 0) - return; - } - *ok = false; - } -private: - IppLUTParallelBody_LUTCN(const IppLUTParallelBody_LUTCN&); - IppLUTParallelBody_LUTCN& operator=(const IppLUTParallelBody_LUTCN&); -}; -} // namespace ipp - -static bool ipp_lut(Mat &src, Mat &lut, Mat &dst) -{ - CV_INSTRUMENT_REGION_IPP(); - - int lutcn = lut.channels(); - - if(src.dims > 2) - return false; - - bool ok = false; - Ptr body; - - size_t elemSize1 = CV_ELEM_SIZE1(dst.depth()); - - if (lutcn == 1) - { - ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTC1(src, lut, dst, &ok); - body.reset(p); - } - else if ((lutcn == 3 || lutcn == 4) && elemSize1 == 1) - { - ParallelLoopBody* p = new ipp::IppLUTParallelBody_LUTCN(src, lut, dst, &ok); - body.reset(p); - } - - if (body != NULL && ok) - { - Range all(0, dst.rows); - if (dst.total()>>18) - parallel_for_(all, *body, (double)std::max((size_t)1, dst.total()>>16)); - else - (*body)(all); - if (ok) - return true; - } - - return false; -} - -#endif -#endif // IPP - class LUTParallelBody : public ParallelLoopBody { public: @@ -348,10 +170,6 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst ) CALL_HAL(LUT, cv_hal_lut, src.data, src.step, src.type(), lut.data, lut.elemSize1(), lutcn, dst.data, dst.step, src.cols, src.rows); -#if !IPP_DISABLE_PERF_LUT - CV_IPP_RUN(_src.dims() <= 2, ipp_lut(src, lut, dst)); -#endif - if (_src.dims() <= 2) { bool ok = false;