Merge pull request #27274 from opencv-pushbot:gitee/alalek/fix_26328

core(OCL): fix POWN OpenCL implementation
This commit is contained in:
Alexander Smorkalov 2025-05-03 14:44:39 +03:00 committed by GitHub
commit 806eb4767c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 89 additions and 17 deletions

View File

@ -693,7 +693,7 @@ OCL_PERF_TEST_P(PowFixture, Pow, ::testing::Combine(
///////////// iPow ////////////////////////
OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8SC1,CV_16UC1,CV_16SC1,CV_32SC1)))
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8UC3, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
@ -705,7 +705,7 @@ OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine(
randu(src, 0, 100);
declare.in(src).out(dst);
OCL_TEST_CYCLE() cv::pow(src, 7.0, dst);
OCL_TEST_CYCLE() cv::pow(src, 3, dst);
SANITY_CHECK_NOTHING();
}

View File

@ -938,9 +938,40 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
// Note: channels are unrolled
std::string extra_opts ="";
if (is_ipower)
{
int wdepth = CV_32F;
if (depth == CV_64F)
wdepth = CV_64F;
else if (depth == CV_16F)
wdepth = CV_16F;
char cvt[2][50];
extra_opts = format(
" -D srcT1=%s -DsrcT1_C1=%s"
" -D srcT2=int -D workST=int"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s"
" -D convertToDT=%s"
" -D workT1=%s",
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
ocl::typeToStr(CV_MAKE_TYPE(wdepth, 1)),
wdepth,
ocl::convertTypeStr(depth, wdepth, 1, cvt[0], sizeof(cvt[0])),
ocl::convertTypeStr(wdepth, depth, 1, cvt[1], sizeof(cvt[1])),
ocl::typeToStr(wdepth)
);
}
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
ocl::typeToStr(depth), depth, rowsPerWI, op,
format("-D cn=%d -D dstT=%s -D dstT_C1=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s%s%s%s",
1,
ocl::typeToStr(depth), ocl::typeToStr(depth), depth, rowsPerWI, op,
" -D UNARY_OP=1",
extra_opts.empty() ? "" : extra_opts.c_str(),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;

View File

@ -80,6 +80,10 @@
#error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead"
#endif
#define CAT__(x, y) x ## y
#define CAT_(x, y) CAT__(x, y)
#define CAT(x, y) CAT_(x, y)
#if DEPTH_dst < 5 /* CV_32F */
#define CV_DST_TYPE_IS_INTEGER
@ -325,9 +329,12 @@
#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
#elif defined OP_POWN
#undef workT
#define workT int
#define PROCESS_ELEM storedst(pown(srcelem1, srcelem2))
#if cn > 1
#define PROCESS_INIT CAT(int, cn) powi = (CAT(int, cn))srcelem2;
#else // cn
#define PROCESS_INIT int powi = srcelem2;
#endif
#define PROCESS_ELEM storedst(convertToDT(pown(srcelem1, powi)))
#elif defined OP_SQRT
#if CV_DST_TYPE_FIT_32F
@ -469,7 +476,7 @@
#define srcelem2 srcelem2_
#endif
#if cn == 3
#if !defined(PROCESS_INIT) && cn == 3
#undef srcelem2
#define srcelem2 (workT)(srcelem2_.x, srcelem2_.y, srcelem2_.z)
#endif
@ -517,6 +524,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI;
#ifdef PROCESS_INIT
PROCESS_INIT
#endif
if (x < cols)
{
int mask_index = mad24(y0, maskstep, x + maskoffset);
@ -542,6 +553,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI;
#ifdef PROCESS_INIT
PROCESS_INIT
#endif
if (x < cols)
{
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
@ -564,6 +579,10 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI;
#ifdef PROCESS_INIT
PROCESS_INIT
#endif
if (x < cols)
{
int mask_index = mad24(y0, maskstep, x + maskoffset);

View File

@ -132,19 +132,25 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
use_roi = GET_PARAM(2);
}
void generateTestData(bool with_val_in_range = false)
void generateTestData(bool with_val_in_range = false,
double minVal1 = std::numeric_limits<double>::quiet_NaN(), double maxVal1 = std::numeric_limits<double>::quiet_NaN(),
double minVal2 = std::numeric_limits<double>::quiet_NaN(), double maxVal2 = std::numeric_limits<double>::quiet_NaN()
)
{
const int type = CV_MAKE_TYPE(depth, cn);
double minV = cvtest::getMinVal(type);
double maxV = cvtest::getMaxVal(type);
double minV1 = cvIsNaN(minVal1) ? 2 : minVal1;
double maxV1 = cvIsNaN(maxVal1) ? 11 : maxVal1;
double minV2 = cvIsNaN(minVal2) ? std::max(-1540., cvtest::getMinVal(type)) : minVal2;
double maxV2 = cvIsNaN(maxVal2) ? std::min(1740., cvtest::getMaxVal(type)) : maxVal2;
Size roiSize = randomSize(1, MAX_VALUE);
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src1, src1_roi, roiSize, src1Border, type, 2, 11); // FIXIT: Test with minV, maxV
randomSubMat(src1, src1_roi, roiSize, src1Border, type, minV1, maxV1); // FIXIT: Test with minV, maxV
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src2, src2_roi, roiSize, src2Border, type, std::max(-1540., minV), std::min(1740., maxV));
randomSubMat(src2, src2_roi, roiSize, src2Border, type, minV2, maxV2);
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type, 5, 16);
@ -162,8 +168,8 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
if (with_val_in_range)
{
val_in_range = cv::Scalar(rng.uniform(minV, maxV), rng.uniform(minV, maxV),
rng.uniform(minV, maxV), rng.uniform(minV, maxV));
val_in_range = cv::Scalar(rng.uniform(minV1, maxV1), rng.uniform(minV1, maxV1),
rng.uniform(minV1, maxV1), rng.uniform(minV1, maxV1));
}
UMAT_UPLOAD_INPUT_PARAMETER(src1);
@ -844,14 +850,30 @@ OCL_TEST_P(Pow, Mat)
for (int j = 0; j < 1/*test_loop_times*/; j++)
for (int k = 0, size = sizeof(pows) / sizeof(double); k < size; ++k)
{
SCOPED_TRACE(pows[k]);
SCOPED_TRACE(cv::format("POW=%g", pows[k]));
generateTestData();
generateTestData(false, 1, 3);
OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi));
OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi));
OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-5);
if (cvtest::debugLevel >= 100)
{
cv::Rect roi(0, 0, 4, 4);
std::cout << src1_roi(roi) << std::endl;
std::cout << dst1_roi(roi) << std::endl;
std::cout << udst1_roi(roi) << std::endl;
Mat diff;
cv::absdiff(dst1_roi, udst1_roi, diff);
std::cout << std::endl << diff(roi) << std::endl;
std::cout << std::endl << dst1_roi << std::endl;
std::cout << std::endl << udst1_roi << std::endl;
std::cout << std::endl << diff << std::endl;
}
}
}