From 49474903cb6eebf37a68f7f6f43e3c67db33bafd Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 3 Dec 2013 00:41:07 +0400 Subject: [PATCH] refactoring --- modules/core/src/arithm.cpp | 18 ++++++++------- modules/core/src/mathfuncs.cpp | 38 ++++++++++++++++++------------- modules/core/src/matrix.cpp | 6 +++-- modules/core/src/opencl/reduce.cl | 14 ++++-------- modules/core/src/stat.cpp | 14 ++++++++---- 5 files changed, 51 insertions(+), 39 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 72c27c5397..bcd11d2566 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -2598,14 +2598,6 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in if (!doubleSupport && (depth == CV_64F || _src2.depth() == CV_64F)) return false; - CV_Assert(type == type2); - UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); - Size size = src1.size(); - CV_Assert(size == src2.size()); - - _dst.create(size, CV_8UC(cn)); - UMat dst = _dst.getUMat(); - const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D BINARY_OP -D srcT1=%s -D workT=srcT1" @@ -2613,6 +2605,16 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), operationMap[op], doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + CV_Assert(type == type2); + UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); + Size size = src1.size(); + CV_Assert(size == src2.size()); + + _dst.create(size, CV_8UC(cn)); + UMat dst = _dst.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index aa94e03d0a..79959435d3 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -508,6 +508,14 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2, (depth == CV_64F && !doubleSupport) ) return false; + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D BINARY_OP -D dstT=%s -D OP_CTP_%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), + angleInDegrees ? "AD" : "AR", + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); Size size = src1.size(); CV_Assert( size == src2.size() ); @@ -516,12 +524,6 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2, _dst2.create(size, type); UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat(); - ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D BINARY_OP -D dstT=%s -D OP_CTP_%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), - angleInDegrees ? "AD" : "AR", - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); - k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), ocl::KernelArg::WriteOnly(dst1, cn), @@ -690,6 +692,14 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle, if ( !doubleSupport && depth == CV_64F ) return false; + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D dstT=%s -D BINARY_OP -D OP_PTC_%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), + angleInDegrees ? "AD" : "AR", + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + UMat mag = _mag.getUMat(), angle = _angle.getUMat(); Size size = angle.size(); CV_Assert(mag.size() == size); @@ -698,12 +708,6 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle, _dst2.create(size, type); UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat(); - ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D BINARY_OP -D OP_PTC_%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), - angleInDegrees ? "AD" : "AR", - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); - k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle), ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2)); @@ -2037,13 +2041,15 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst) (depth == CV_64F && !doubleSupport) ) return false; - UMat src = _src.getUMat(); - _dst.create(src.size(), type); - UMat dst = _dst.getUMat(); - ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D dstT=%s -D OP_POW -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + _dst.create(src.size(), type); + UMat dst = _dst.getUMat(); ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), dstarg = ocl::KernelArg::WriteOnly(dst, cn); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index b2b164e45f..4e9be9807c 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -2378,10 +2378,12 @@ static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s ) if (cn == 3) return false; - UMat m = _m.getUMat(); - ocl::Kernel k("setIdentity", ocl::core::set_identity_oclsrc, format("-D T=%s", ocl::memopTypeToStr(type))); + if (k.empty()) + return false; + + UMat m = _m.getUMat(); k.args(ocl::KernelArg::WriteOnly(m), ocl::KernelArg::Constant(Mat(1, 1, type, s))); size_t globalsize[2] = { m.cols, m.rows }; diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 2ba36e8595..526cc51010 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -66,7 +66,7 @@ dstT temp = convertToDT(src[0]); \ FUNC(accumulator, temp) #define REDUCE_LOCAL_1 \ - localmem[lid] += accumulator + localmem[lid - WGS2_ALIGNED] += accumulator #define REDUCE_LOCAL_2 \ localmem[lid] += localmem[lid2] @@ -78,7 +78,7 @@ #define REDUCE_GLOBAL \ accumulator += src[0] == zero ? zero : one #define REDUCE_LOCAL_1 \ - localmem[lid] += accumulator + localmem[lid - WGS2_ALIGNED] += accumulator #define REDUCE_LOCAL_2 \ localmem[lid] += localmem[lid2] @@ -95,10 +95,6 @@ __kernel void reduce(__global const uchar * srcptr, int step, int offset, int co int id = get_global_id(0); __local dstT localmem[WGS2_ALIGNED]; - if (lid < WGS2_ALIGNED) - localmem[lid] = (dstT)(0); - barrier(CLK_LOCAL_MEM_FENCE); - DEFINE_ACCUMULATOR; for (int grain = groupnum * WGS; id < total; id += grain) @@ -108,11 +104,11 @@ __kernel void reduce(__global const uchar * srcptr, int step, int offset, int co REDUCE_GLOBAL; } - if (lid >= WGS2_ALIGNED) - localmem[lid - WGS2_ALIGNED] = accumulator; + if (lid < WGS2_ALIGNED) + localmem[lid] = accumulator; barrier(CLK_LOCAL_MEM_FENCE); - if (lid < WGS2_ALIGNED) + if (lid >= WGS2_ALIGNED) REDUCE_LOCAL_1; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 3feb2db6ed..b19be3b476 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -480,7 +480,6 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); int ddepth = std::max(CV_32S, depth), dtype = CV_MAKE_TYPE(ddepth, cn); - UMat src = _src.getUMat(), db(1, dbsize, dtype); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) @@ -494,6 +493,10 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) ocl::typeToStr(type), ocl::typeToStr(dtype), ocl::convertTypeStr(depth, ddepth, cn, cvt), opMap[sum_op], (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat src = _src.getUMat(), db(1, dbsize, dtype); k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), dbsize, ocl::KernelArg::PtrWriteOnly(db)); @@ -611,7 +614,7 @@ namespace cv { static bool ocl_countNonZero( InputArray _src, int & res ) { - int depth = _src.depth(); + int type = _src.type(), depth = CV_MAT_DEPTH(type); bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; if (depth == CV_64F && !doubleSupport) @@ -619,7 +622,6 @@ static bool ocl_countNonZero( InputArray _src, int & res ) int dbsize = ocl::Device::getDefault().maxComputeUnits(); size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); - UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) @@ -628,8 +630,12 @@ static bool ocl_countNonZero( InputArray _src, int & res ) ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, format("-D srcT=%s -D OP_COUNT_NON_ZERO -D WGS=%d -D WGS2_ALIGNED=%d%s", - ocl::typeToStr(src.type()), (int)wgs, + ocl::typeToStr(type), (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1); k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), dbsize, ocl::KernelArg::PtrWriteOnly(db));