diff --git a/modules/imgproc/src/opencl/bilateral.cl b/modules/imgproc/src/opencl/bilateral.cl index f459cfc850..013be8015d 100644 --- a/modules/imgproc/src/opencl/bilateral.cl +++ b/modules/imgproc/src/opencl/bilateral.cl @@ -32,6 +32,28 @@ // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. +#if cn != 3 +#define loadpix(addr) *(__global const uchar_t *)(addr) +#define storepix(val, addr) *(__global uchar_t *)(addr) = val +#define TSIZE cn +#else +#define loadpix(addr) vload3(0, (__global const uchar *)(addr)) +#define storepix(val, addr) vstore3(val, 0, (__global uchar *)(addr)) +#define TSIZE 3 +#endif + +#if cn == 1 +#define SUM(a) a +#elif cn == 2 +#define SUM(a) a.x + a.y +#elif cn == 3 +#define SUM(a) a.x + a.y + a.z +#elif cn == 4 +#define SUM(a) a.x + a.y + a.z + a.w +#else +#error "cn should be <= 4" +#endif + __kernel void bilateral(__global const uchar * src, int src_step, int src_offset, __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, __constant float * color_weight, __constant float * space_weight, __constant int * space_ofs) @@ -41,19 +63,23 @@ __kernel void bilateral(__global const uchar * src, int src_step, int src_offset if (y < dst_rows && x < dst_cols) { - int src_index = mad24(y + radius, src_step, x + radius + src_offset); - int dst_index = mad24(y, dst_step, x + dst_offset); - float sum = 0.f, wsum = 0.f; - int val0 = convert_int(src[src_index]); + int src_index = mad24(y + radius, src_step, mad24(x + radius, TSIZE, src_offset)); + int dst_index = mad24(y, dst_step, mad24(x, TSIZE, dst_offset)); + + float_t sum = (float_t)(0.0f); + float wsum = 0.0f; + int_t val0 = convert_int_t(loadpix(src + src_index)); #pragma unroll for (int k = 0; k < maxk; k++ ) { - int val = convert_int(src[src_index + space_ofs[k]]); - float w = space_weight[k] * color_weight[abs(val - val0)]; - sum += (float)(val) * w; + int_t val = convert_int_t(loadpix(src + src_index + space_ofs[k])); + uint_t diff = abs(val - val0); + float w = space_weight[k] * color_weight[SUM(diff)]; + sum += convert_float_t(val) * (float_t)(w); wsum += w; } - dst[dst_index] = convert_uchar_rtz(sum / wsum + 0.5f); + + storepix(convert_uchar_t(sum / (float_t)(wsum)), dst + dst_index); } } diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 40687a226c..0641bc8fea 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -2210,10 +2210,10 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, double sigma_color, double sigma_space, int borderType) { - int type = _src.type(), cn = CV_MAT_CN(type); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int i, j, maxk, radius; - if ( type != CV_8UC1 ) + if (depth != CV_8U || cn > 4) return false; if (sigma_color <= 0) @@ -2240,9 +2240,9 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, std::vector _color_weight(cn * 256); std::vector _space_weight(d * d); std::vector _space_ofs(d * d); - float *color_weight = &_color_weight[0]; - float *space_weight = &_space_weight[0]; - int *space_ofs = &_space_ofs[0]; + float * const color_weight = &_color_weight[0]; + float * const space_weight = &_space_weight[0]; + int * const space_ofs = &_space_ofs[0]; // initialize color-related bilateral filter coefficients for( i = 0; i < 256 * cn; i++ ) @@ -2256,11 +2256,19 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d, if ( r > radius ) continue; space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff); - space_ofs[maxk++] = (int)(i * temp.step + j); + space_ofs[maxk++] = (int)(i * temp.step + j * cn); } + char cvt[3][40]; + String cnstr = cn > 1 ? format("%d", cn) : ""; ocl::Kernel k("bilateral", ocl::imgproc::bilateral_oclsrc, - format("-D radius=%d -D maxk=%d", radius, maxk)); + format("-D radius=%d -D maxk=%d -D cn=%d -D int_t=%s -D uint_t=uint%s -D convert_int_t=%s" + " -D uchar_t=%s -D float_t=%s -D convert_float_t=%s -D convert_uchar_t=%s", + radius, maxk, cn, ocl::typeToStr(CV_32SC(cn)), cnstr.c_str(), + ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), + ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)), + ocl::convertTypeStr(CV_32S, CV_32F, cn, cvt[1]), + ocl::convertTypeStr(CV_32F, CV_8U, cn, cvt[2]))); if (k.empty()) return false; diff --git a/modules/imgproc/test/ocl/test_filters.cpp b/modules/imgproc/test/ocl/test_filters.cpp index 55d7bc6d1b..d2819a388c 100644 --- a/modules/imgproc/test/ocl/test_filters.cpp +++ b/modules/imgproc/test/ocl/test_filters.cpp @@ -290,8 +290,6 @@ OCL_TEST_P(MorphologyEx, Mat) } } - - ////////////////////////////////////////////////////////////////////////////////////////////////////////////// #define FILTER_BORDER_SET_NO_ISOLATED \ @@ -309,7 +307,7 @@ OCL_TEST_P(MorphologyEx, Mat) #define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4) OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine( - Values((MatType)CV_8UC1), + Values(CV_8UC1, CV_8UC3), Values(5, 9), // kernel size Values(Size(0, 0)), // not used FILTER_BORDER_SET_NO_ISOLATED, @@ -372,7 +370,6 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine( Values(1.0, 2.0, 3.0), Bool())); - } } // namespace cvtest::ocl #endif // HAVE_OPENCL