diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index f733dd11fb..0cff7b5fdd 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2379,7 +2379,7 @@ struct Program::Impl size_t retsz = 0; retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG, 0, 0, &retsz); - if( retval >= 0 && retsz > 0 ) + if( retval >= 0 && retsz > 1 ) { AutoBuffer bufbuf(retsz + 16); char* buf = bufbuf; diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index b4cdb53f2c..9c86057caa 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -58,10 +58,10 @@ */ #ifdef DOUBLE_SUPPORT -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#elif defined (cl_amd_fp64) +#ifdef cl_amd_fp64 #pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable #endif #define CV_EPSILON DBL_EPSILON #define CV_PI M_PI @@ -76,12 +76,18 @@ #ifndef workT + #ifndef srcT1 #define srcT1 dstT + #endif + #ifndef srcT2 #define srcT2 dstT + #endif #define workT dstT - #define srcelem1 *(__global dstT*)(srcptr1 + src1_index) - #define srcelem2 *(__global dstT*)(srcptr2 + src2_index) + #define srcelem1 *(__global srcT1*)(srcptr1 + src1_index) + #define srcelem2 *(__global srcT2*)(srcptr2 + src2_index) + #ifndef convertToDT #define convertToDT noconvert + #endif #else @@ -160,6 +166,11 @@ #elif defined OP_MAG #define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2) +#elif defined OP_ABS_NOSAT +#define PROCESS_ELEM \ + dstT v = convertToDT(srcelem1); \ + dstelem = v >= 0 ? v : -v + #elif defined OP_PHASE_RADIANS #define PROCESS_ELEM \ workT tmp = atan2(srcelem2, srcelem1); \ diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index b19be3b476..d04857ce81 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1760,15 +1760,76 @@ static NormDiffFunc getNormDiffFunc(int normType, int depth) } +namespace cv { + +static bool ocl_norm( InputArray _src, int normType, double & result ) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) || + (!doubleSupport && depth == CV_64F)) + return false; + + UMat src = _src.getUMat(); + + if (normType == NORM_INF) + { + UMat abssrc; + + if (depth != CV_8U && depth != CV_16U) + { + int wdepth = std::max(CV_32S, depth); + char cvt[50]; + + ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc, + format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s -D convertToDT=%s%s", + ocl::typeToStr(wdepth), ocl::typeToStr(depth), + ocl::convertTypeStr(depth, wdepth, 1, cvt), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (kabs.empty()) + return false; + + abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn)); + kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn)); + + size_t globalsize[2] = { src.cols * cn, src.rows }; + if (!kabs.run(2, globalsize, NULL, false)) + return false; + } + else + abssrc = src; + + cv::minMaxIdx(abssrc.reshape(1), NULL, &result); + } + else if (normType == NORM_L1 || normType == NORM_L2) + { + Scalar s; + bool unstype = depth == CV_8U || depth == CV_16U; + + ocl_sum(src.reshape(1), s, normType == NORM_L2 ? + OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS) ); + result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]); + } + + return true; +} + +} + double cv::norm( InputArray _src, int normType, InputArray _mask ) { - Mat src = _src.getMat(), mask = _mask.getMat(); - int depth = src.depth(), cn = src.channels(); - - normType &= 7; + normType &= NORM_TYPE_MASK; CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR || - ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) ); + ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) ); + + double _result = 0; + if (ocl::useOpenCL() && _mask.empty() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _result)) + return _result; + + Mat src = _src.getMat(), mask = _mask.getMat(); + int depth = src.depth(), cn = src.channels(); #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) size_t total_size = src.total(); @@ -2047,9 +2108,56 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) return result.d; } +namespace cv { + +static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, double & result ) +{ + int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + bool relative = (normType & NORM_RELATIVE) != 0; + normType &= ~NORM_RELATIVE; + + if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) || + (!doubleSupport && depth == CV_64F)) + return false; + + int wdepth = std::max(CV_32S, depth); + char cvt[50]; + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1" + " -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT%s", + ocl::typeToStr(wdepth), ocl::typeToStr(depth), + ocl::convertTypeStr(depth, wdepth, 1, cvt), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), diff(src1.size(), CV_MAKE_TYPE(wdepth, cn)); + k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), + ocl::KernelArg::WriteOnly(diff, cn)); + + size_t globalsize[2] = { diff.cols * cn, diff.rows }; + if (!k.run(2, globalsize, NULL, false)) + return false; + + result = cv::norm(diff, normType); + if (relative) + result /= cv::norm(src2, normType) + DBL_EPSILON; + + return true; +} + +} double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) { + CV_Assert( _src1.size() == _src2.size() && _src1.type() == _src2.type() ); + + double _result = 0; + if (ocl::useOpenCL() && _mask.empty() && _src1.isUMat() && _src2.isUMat() && + _src1.dims() <= 2 && _src2.dims() <= 2 && ocl_norm(_src1, _src2, normType, _result)) + return _result; + if( normType & CV_RELATIVE ) { #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) @@ -2135,7 +2243,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); int depth = src1.depth(), cn = src1.channels(); - CV_Assert( src1.size == src2.size && src1.type() == src2.type() ); + CV_Assert( src1.size == src2.size ); normType &= 7; CV_Assert( normType == NORM_INF || normType == NORM_L1 || diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 844be7bdf2..045fbd7c49 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -795,8 +795,8 @@ struct RepeatTestCase : { const int type = CV_MAKE_TYPE(depth, cn); - nx = 2;//randomInt(1, 4); - ny = 2;//randomInt(1, 4); + nx = randomInt(1, 4); + ny = randomInt(1, 4); Size srcRoiSize = randomSize(1, MAX_VALUE); Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); @@ -813,7 +813,7 @@ struct RepeatTestCase : typedef RepeatTestCase Repeat; -OCL_TEST_P(Repeat, DISABLED_Mat) +OCL_TEST_P(Repeat, Mat) { for (int i = 0; i < test_loop_times; ++i) { @@ -1004,6 +1004,108 @@ OCL_TEST_P(Flip, BOTH) } } +//////////////////////////////// Norm ///////////////////////////////////////////////// + +static bool relativeError(double actual, double expected, double eps) +{ + return std::abs(actual - expected) / actual < eps; +} + +typedef ArithmTestBase Norm; + +OCL_TEST_P(Norm, NORM_INF_1arg) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_INF)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_INF)); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +OCL_TEST_P(Norm, NORM_L1_1arg) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L1)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L1)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + +OCL_TEST_P(Norm, NORM_L2_1arg) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L2)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L2)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + +OCL_TEST_P(Norm, NORM_INF_2args) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + int type = NORM_INF; + if (relative == 1) + type |= NORM_RELATIVE; + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type)); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + +OCL_TEST_P(Norm, NORM_L1_2args) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + int type = NORM_L1; + if (relative == 1) + type |= NORM_RELATIVE; + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + +OCL_TEST_P(Norm, NORM_L2_2args) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + int type = NORM_L2; + if (relative == 1) + type |= NORM_RELATIVE; + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); @@ -1017,10 +1119,10 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHA OCL_INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); -//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); -//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); -//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); -//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); @@ -1033,7 +1135,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(::testing::Values(CV_32F, CV_64 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); } } // namespace cvtest::ocl