From 7a9ce585f0d3cfa3b5d6c534a9d6cfb0674c282e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 1 May 2025 20:54:15 +0000 Subject: [PATCH] core(ocl): fix POWN OpenCL implementation --- modules/core/perf/opencl/perf_arithm.cpp | 4 +-- modules/core/src/mathfuncs.cpp | 35 +++++++++++++++++++-- modules/core/src/opencl/arithm.cl | 27 +++++++++++++--- modules/core/test/ocl/test_arithm.cpp | 40 ++++++++++++++++++------ 4 files changed, 89 insertions(+), 17 deletions(-) diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 42f5244b7f..2ee7862aa8 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -693,7 +693,7 @@ OCL_PERF_TEST_P(PowFixture, Pow, ::testing::Combine( ///////////// iPow //////////////////////// OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine( - OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8SC1,CV_16UC1,CV_16SC1,CV_32SC1))) + OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8UC3, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1))) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); @@ -705,7 +705,7 @@ OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine( randu(src, 0, 100); declare.in(src).out(dst); - OCL_TEST_CYCLE() cv::pow(src, 7.0, dst); + OCL_TEST_CYCLE() cv::pow(src, 3, dst); SANITY_CHECK_NOTHING(); } diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 0604d2c33f..ac037e38c7 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -938,9 +938,40 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, bool issqrt = std::abs(power - 0.5) < DBL_EPSILON; const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; + // Note: channels are unrolled + + std::string extra_opts =""; + if (is_ipower) + { + int wdepth = CV_32F; + if (depth == CV_64F) + wdepth = CV_64F; + else if (depth == CV_16F) + wdepth = CV_16F; + + char cvt[2][50]; + extra_opts = format( + " -D srcT1=%s -DsrcT1_C1=%s" + " -D srcT2=int -D workST=int" + " -D workT=%s -D wdepth=%d -D convertToWT1=%s" + " -D convertToDT=%s" + " -D workT1=%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), + ocl::typeToStr(CV_MAKE_TYPE(wdepth, 1)), + wdepth, + ocl::convertTypeStr(depth, wdepth, 1, cvt[0], sizeof(cvt[0])), + ocl::convertTypeStr(wdepth, depth, 1, cvt[1], sizeof(cvt[1])), + ocl::typeToStr(wdepth) + ); + } + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", - ocl::typeToStr(depth), depth, rowsPerWI, op, + format("-D cn=%d -D dstT=%s -D dstT_C1=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s%s%s%s", + 1, + ocl::typeToStr(depth), ocl::typeToStr(depth), depth, rowsPerWI, op, + " -D UNARY_OP=1", + extra_opts.empty() ? "" : extra_opts.c_str(), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 301cea9f98..bbf3b83c2c 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -80,6 +80,10 @@ #error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead" #endif +#define CAT__(x, y) x ## y +#define CAT_(x, y) CAT__(x, y) +#define CAT(x, y) CAT_(x, y) + #if DEPTH_dst < 5 /* CV_32F */ #define CV_DST_TYPE_IS_INTEGER @@ -325,9 +329,12 @@ #define PROCESS_ELEM storedst(pow(srcelem1, srcelem2)) #elif defined OP_POWN -#undef workT -#define workT int -#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2)) +#if cn > 1 +#define PROCESS_INIT CAT(int, cn) powi = (CAT(int, cn))srcelem2; +#else // cn +#define PROCESS_INIT int powi = srcelem2; +#endif +#define PROCESS_ELEM storedst(convertToDT(pown(srcelem1, powi))) #elif defined OP_SQRT #if CV_DST_TYPE_FIT_32F @@ -469,7 +476,7 @@ #define srcelem2 srcelem2_ #endif -#if cn == 3 +#if !defined(PROCESS_INIT) && cn == 3 #undef srcelem2 #define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z) #endif @@ -517,6 +524,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1, int x = get_global_id(0); int y0 = get_global_id(1) * rowsPerWI; +#ifdef PROCESS_INIT + PROCESS_INIT +#endif + if (x < cols) { int mask_index = mad24(y0, maskstep, x + maskoffset); @@ -542,6 +553,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1, int x = get_global_id(0); int y0 = get_global_id(1) * rowsPerWI; +#ifdef PROCESS_INIT + PROCESS_INIT +#endif + if (x < cols) { int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1)); @@ -564,6 +579,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1, int x = get_global_id(0); int y0 = get_global_id(1) * rowsPerWI; +#ifdef PROCESS_INIT + PROCESS_INIT +#endif + if (x < cols) { int mask_index = mad24(y0, maskstep, x + maskoffset); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index da7a003f11..2f20091c32 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -132,19 +132,25 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool) use_roi = GET_PARAM(2); } - void generateTestData(bool with_val_in_range = false) + void generateTestData(bool with_val_in_range = false, + double minVal1 = std::numeric_limits::quiet_NaN(), double maxVal1 = std::numeric_limits::quiet_NaN(), + double minVal2 = std::numeric_limits::quiet_NaN(), double maxVal2 = std::numeric_limits::quiet_NaN() + ) { const int type = CV_MAKE_TYPE(depth, cn); - double minV = cvtest::getMinVal(type); - double maxV = cvtest::getMaxVal(type); + double minV1 = cvIsNaN(minVal1) ? 2 : minVal1; + double maxV1 = cvIsNaN(maxVal1) ? 11 : maxVal1; + + double minV2 = cvIsNaN(minVal2) ? std::max(-1540., cvtest::getMinVal(type)) : minVal2; + double maxV2 = cvIsNaN(maxVal2) ? std::min(1740., cvtest::getMaxVal(type)) : maxVal2; Size roiSize = randomSize(1, MAX_VALUE); Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); - randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11); // FIXIT: Test with minV, maxV + randomSubMat(src1, src1_roi, roiSize, src1Border, type, minV1, maxV1); // FIXIT: Test with minV, maxV Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0); - randomSubMat(src2, src2_roi, roiSize, src2Border, type, std::max(-1540., minV), std::min(1740., maxV)); + randomSubMat(src2, src2_roi, roiSize, src2Border, type, minV2, maxV2); Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0); randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16); @@ -162,8 +168,8 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool) if (with_val_in_range) { - val_in_range = cv::Scalar(rng.uniform(minV, maxV), rng.uniform(minV, maxV), - rng.uniform(minV, maxV), rng.uniform(minV, maxV)); + val_in_range = cv::Scalar(rng.uniform(minV1, maxV1), rng.uniform(minV1, maxV1), + rng.uniform(minV1, maxV1), rng.uniform(minV1, maxV1)); } UMAT_UPLOAD_INPUT_PARAMETER(src1); @@ -844,14 +850,30 @@ OCL_TEST_P(Pow, Mat) for (int j = 0; j < 1/*test_loop_times*/; j++) for (int k = 0, size = sizeof(pows) / sizeof(double); k < size; ++k) { - SCOPED_TRACE(pows[k]); + SCOPED_TRACE(cv::format("POW=%g", pows[k])); - generateTestData(); + generateTestData(false, 1, 3); OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi)); OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi)); OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-5); + + if (cvtest::debugLevel >= 100) + { + cv::Rect roi(0, 0, 4, 4); + std::cout << src1_roi(roi) << std::endl; + std::cout << dst1_roi(roi) << std::endl; + std::cout << udst1_roi(roi) << std::endl; + + Mat diff; + cv::absdiff(dst1_roi, udst1_roi, diff); + std::cout << std::endl << diff(roi) << std::endl; + + std::cout << std::endl << dst1_roi << std::endl; + std::cout << std::endl << udst1_roi << std::endl; + std::cout << std::endl << diff << std::endl; + } } }