From 9d23a0cb819e12cc706ec4623ffea071d651b3ab Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 21 Nov 2013 23:25:17 +0400 Subject: [PATCH] fixed ocl_arithm_op; fix for 3-channel images is needed --- modules/core/include/opencv2/core/mat.hpp | 25 --------- modules/core/src/arithm.cpp | 60 ++++++++-------------- modules/core/src/ocl.cpp | 1 - modules/core/src/opencl/arithm.cl | 12 +---- modules/core/test/ocl/test_arithm.cpp | 56 ++++++++++++++++++-- modules/ts/include/opencv2/ts/ocl_test.hpp | 43 ++++++++++++---- 6 files changed, 109 insertions(+), 88 deletions(-) diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 79fac33ae1..2f38f8bbb8 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -2340,31 +2340,6 @@ CV_EXPORTS MatExpr max(double s, const Mat& a); CV_EXPORTS MatExpr abs(const Mat& m); CV_EXPORTS MatExpr abs(const MatExpr& e); -namespace traits { - -template -struct GetMatForRead -{ -}; -template <> -struct GetMatForRead -{ - static const Mat get(const Mat& m) { return m; } -}; -template <> -struct GetMatForRead -{ - static const Mat get(const UMat& m) { return m.getMat(ACCESS_READ); } -}; - -} // namespace traits - -template -const Mat getMatForRead(const T& mat) -{ - return traits::GetMatForRead::get(mat); -} - } // cv #include "opencv2/core/mat.inl.hpp" diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 9c36d18e59..c3ba8c6d64 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -1283,47 +1283,29 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); bool haveMask = !_mask.empty(); - if( (haveMask || haveScalar) && cn > 4 ) + if( ((haveMask || haveScalar) && cn > 4) || cn == 3) // TODO need fix for 3 channels return false; - int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = CV_MAT_DEPTH(wtype); + int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype)); wtype = CV_MAKETYPE(wdepth, cn); - int type2 = haveScalar ? _src2.type() : wtype, depth2 = CV_MAT_DEPTH(type2); + int type2 = haveScalar ? wtype : _src2.type(), depth2 = CV_MAT_DEPTH(type2); + int kercn = haveMask || haveScalar ? cn : 1; UMat src1 = _src1.getUMat(), src2; UMat dst = _dst.getUMat(), mask = _mask.getUMat(); - char opts[1024]; - int kercn = haveMask || haveScalar ? cn : 1; - - if( (depth1 == depth2 || haveScalar) && ddepth == depth1 && wdepth == depth1 ) - { - const char* oclopstr = oclop2str[oclop]; - if( wdepth <= CV_16S ) - { - oclopstr = oclop == OCL_OP_ADD ? "OCL_OP_ADD_SAT" : - oclop == OCL_OP_SUB ? "OCL_OP_SUB_SAT" : - oclop == OCL_OP_RSUB ? "OCL_OP_RSUB_SAT" : oclopstr; - } - sprintf(opts, "-D %s%s -D %s -D dstT=%s", - (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), - oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(ddepth, kercn))); - } - else - { - char cvtstr[3][32]; - sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s " - "-D dstT=%s -D workT=%s -D convertToWT1=%s " - "-D convertToWT2=%s -D convertToDT=%s", - (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), - oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), - ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), - ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), - ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), - ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), - ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), - ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2])); - } + char cvtstr[3][32], opts[1024]; + sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s " + "-D dstT=%s -D workT=%s -D convertToWT1=%s " + "-D convertToWT2=%s -D convertToDT=%s", + (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), + oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), + ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), + ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), + ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), + ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), + ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), + ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2])); const uchar* usrdata_p = (const uchar*)usrdata; const double* usrdata_d = (const double*)usrdata; @@ -1336,7 +1318,6 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, usrdata_f[i] = (float)usrdata_d[i]; usrdata_p = (const uchar*)usrdata_f; } - size_t usrdata_esz = CV_ELEM_SIZE(wdepth); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); if( k.empty() ) @@ -1368,6 +1349,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, } else { + size_t usrdata_esz = CV_ELEM_SIZE(wdepth); src2 = _src2.getUMat(); ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale); @@ -1392,8 +1374,8 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, } } - size_t globalsize[] = { src1.cols*(cn/kercn), src1.rows }; - return k.run(2, globalsize, 0, false); + size_t globalsize[] = { src1.cols*cscale, src1.rows }; + return k.run(2, globalsize, NULL, false); } @@ -1410,8 +1392,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims(); Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); - bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT || _dst.kind() == _OutputArray::UMAT) && - ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2; + bool use_opencl = _dst.kind() == _OutputArray::UMAT && ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2; bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2); bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1); @@ -1426,6 +1407,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, (!usrdata ? type1 : std::max(depth1, CV_32F)), usrdata, oclop, false)) return; + Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); Size sz = getContinuousSize(src1, src2, dst, src1.channels()); tab[depth1](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata); diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 8cd004723b..c042f0259f 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2347,7 +2347,6 @@ struct Program::Impl void** deviceList = deviceListBuf; for( i = 0; i < n; i++ ) deviceList[i] = ctx.device(i).ptr(); - printf("Building the OpenCL program ...\n"); retval = clBuildProgram(handle, n, (const cl_device_id*)deviceList, buildflags.c_str(), 0, 0); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 114abe8405..2f1915cd08 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -89,21 +89,12 @@ #define EXTRA_PARAMS -#if defined OP_ADD_SAT -#define PROCESS_ELEM dstelem = add_sat(srcelem1, srcelem2) - -#elif defined OP_ADD +#if defined OP_ADD #define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2) -#elif defined OP_SUB_SAT -#define PROCESS_ELEM dstelem = sub_sat(srcelem1, srcelem2) - #elif defined OP_SUB #define PROCESS_ELEM dstelem = convertToDT(srcelem1 - srcelem2) -#elif defined OP_RSUB_SAT -#define PROCESS_ELEM dstelem = sub_sat(srcelem2, srcelem1) - #elif defined OP_RSUB #define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1) @@ -226,7 +217,6 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1, int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset); PROCESS_ELEM; - //printf("(x=%d, y=%d). %d, %d, %d\n", x, y, (int)srcelem1, (int)srcelem2, (int)dstelem); } } diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 7763f399ae..51f50c1c47 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -149,7 +149,7 @@ OCL_TEST_P(Add, Scalar) generateTestData(); OCL_OFF(cv::add(src1_roi, val, dst1_roi)); - OCL_ON(cv::add(usrc1_roi, val, udst1_roi)); + OCL_ON(cv::add(val, usrc1_roi, udst1_roi)); Near(1e-5); } } @@ -166,12 +166,62 @@ OCL_TEST_P(Add, Scalar_Mask) } } +//////////////////////////////////////// Subtract ////////////////////////////////////////////// +typedef ArithmTestBase Subtract; + +OCL_TEST_P(Subtract, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::subtract(src1_roi, src2_roi, dst1_roi)); + OCL_ON(cv::subtract(usrc1_roi, usrc2_roi, udst1_roi)); + Near(0); + } +} + +OCL_TEST_P(Subtract, Mat_Mask) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::subtract(src1_roi, src2_roi, dst1_roi, mask_roi)); + OCL_ON(cv::subtract(usrc1_roi, usrc2_roi, udst1_roi, umask_roi)); + Near(0); + } +} + +OCL_TEST_P(Subtract, Scalar) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::subtract(val, src1_roi, dst1_roi)); + OCL_ON(cv::subtract(val, usrc1_roi, udst1_roi)); + Near(1e-5); + } +} + +OCL_TEST_P(Subtract, Scalar_Mask) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::subtract(src1_roi, val, dst1_roi, mask_roi)); + OCL_ON(cv::subtract(usrc1_roi, val, udst1_roi, umask_roi)); + Near(1e-5); + } +} //////////////////////////////////////// Instantiation ///////////////////////////////////////// -// TODO FIXIT Invalid "add" implementation -//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(OCL_ALL_DEPTHS, ::testing::Values(1, 2, 4), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, Subtract, Combine(OCL_ALL_DEPTHS, ::testing::Values(1, 2, 4), Bool())); } } // namespace cvtest::ocl diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 5c6aaef799..eb4e2a0d2b 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -60,6 +60,31 @@ namespace ocl { using namespace cv; using namespace testing; +namespace traits { + +template +struct GetMatForRead +{ +}; +template <> +struct GetMatForRead +{ + static const Mat get(const Mat& m) { return m; } +}; +template <> +struct GetMatForRead +{ + static const Mat get(const UMat& m) { return m.getMat(ACCESS_READ); } +}; + +} // namespace traits + +template +const Mat getMatForRead(const T& mat) +{ + return traits::GetMatForRead::get(mat); +} + extern int test_loop_times; #define MAX_VALUE 357 @@ -203,32 +228,32 @@ struct TestUtils template static double checkNorm(const T1& m) { - return checkNorm(cv::getMatForRead(m)); + return checkNorm(getMatForRead(m)); } template static double checkNorm(const T1& m1, const T2& m2) { - return checkNorm(cv::getMatForRead(m1), cv::getMatForRead(m2)); + return checkNorm(getMatForRead(m1), getMatForRead(m2)); } template static double checkSimilarity(const T1& m1, const T2& m2) { - return checkSimilarity(cv::getMatForRead(m1), cv::getMatForRead(m2)); + return checkSimilarity(getMatForRead(m1), getMatForRead(m2)); } template static inline double checkNormRelative(const T1& m1, const T2& m2) { - const Mat _m1 = cv::getMatForRead(m1); - const Mat _m2 = cv::getMatForRead(m2); + const Mat _m1 = getMatForRead(m1); + const Mat _m2 = getMatForRead(m2); return checkNormRelative(_m1, _m2); } template static void showDiff(const T1& src, const T2& gold, const T3& actual, double eps, bool alwaysShow = false) { - const Mat _src = cv::getMatForRead(src); - const Mat _gold = cv::getMatForRead(gold); - const Mat _actual = cv::getMatForRead(actual); + const Mat _src = getMatForRead(src); + const Mat _gold = getMatForRead(gold); + const Mat _actual = getMatForRead(actual); showDiff(_src, _gold, _actual, eps, alwaysShow); } }; @@ -277,7 +302,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int) #define OCL_OFF(fn) cv::ocl::setUseOpenCL(false); fn #define OCL_ON(fn) cv::ocl::setUseOpenCL(true); fn -#define OCL_ALL_DEPTHS Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F) +#define OCL_ALL_DEPTHS Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F) #define OCL_ALL_CHANNELS Values(1, 2, 3, 4) #define OCL_INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \