diff --git a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp index 41264045c3..2fa11a351d 100644 --- a/modules/photo/src/fast_nlmeans_denoising_opencl.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_opencl.hpp @@ -89,13 +89,13 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, char buf[4][40]; String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d" - " -D sample_t=%s -D pixel_t=%s -D int_t=%s" + " -D pixel_t=%s -D int_t=%s" " -D weight_t=%s -D sum_t=%s -D convert_sum_t=%s" " -D BLOCK_COLS=%d -D BLOCK_ROWS=%d" " -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d" " -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s", templateWindowSize, searchWindowSize, - ocl::typeToStr(depth), ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), + ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)), depth == CV_8U ? ocl::typeToStr(CV_32S) : "long", depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) : (sprintf(buf[0], "long%d", cn), buf[0]), @@ -103,7 +103,8 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h, (sprintf(buf[1], "convert_long%d", cn), buf[1]), BLOCK_COLS, BLOCK_ROWS, ctaSize, templateWindowHalfWize, searchWindowHalfSize, - ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, cn == 3 ? 4 : cn, + ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn, + (depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn), ocl::convertTypeStr(CV_32S, depth, cn, buf[3]), abs ? " -D ABS" : ""); ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts); diff --git a/modules/photo/src/opencl/nlmeans.cl b/modules/photo/src/opencl/nlmeans.cl index 91b0123547..11837a5fcd 100644 --- a/modules/photo/src/opencl/nlmeans.cl +++ b/modules/photo/src/opencl/nlmeans.cl @@ -97,7 +97,7 @@ inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_v #define COND if (x == 0 && y == 0) -inline void calcFirstElementInRow(__global const sample_t * src, int src_step, int src_offset, +inline void calcFirstElementInRow(__global const uchar * src, int src_step, int src_offset, __local int * dists, int y, int x, int id, __global int * col_dists, __global int * up_col_dists) { @@ -129,8 +129,8 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i dist += value; } - src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); - src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step); } #pragma unroll @@ -142,7 +142,7 @@ inline void calcFirstElementInRow(__global const sample_t * src, int src_step, i } } -inline void calcElementInFirstRow(__global const sample_t * src, int src_step, int src_offset, +inline void calcElementInFirstRow(__global const uchar * src, int src_step, int src_offset, __local int * dists, int y, int x0, int x, int id, int first, __global int * col_dists, __global int * up_col_dists) { @@ -164,8 +164,8 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i { col_dist += calcDist(src_current[0], src_template[0]); - src_current = (__global const pixel_t *)((__global const sample_t *)src_current + src_step); - src_template = (__global const pixel_t *)((__global const sample_t *)src_template + src_step); + src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step); + src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step); } dists[i] += col_dist - col_dists_current[first]; @@ -174,7 +174,7 @@ inline void calcElementInFirstRow(__global const sample_t * src, int src_step, i } } -inline void calcElement(__global const sample_t * src, int src_step, int src_offset, +inline void calcElement(__global const uchar * src, int src_step, int src_offset, __local int * dists, int y, int x0, int x, int id, int first, __global int * col_dists, __global int * up_col_dists) { @@ -207,9 +207,9 @@ inline void calcElement(__global const sample_t * src, int src_step, int src_off } } -inline void convolveWindow(__global const sample_t * src, int src_step, int src_offset, +inline void convolveWindow(__global const uchar * src, int src_step, int src_offset, __local int * dists, __global const int * almostDist2Weight, - __global sample_t * dst, int dst_step, int dst_offset, + __global uchar * dst, int dst_step, int dst_offset, int y, int x, int id, __local weight_t * weights_local, __local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift) { @@ -255,9 +255,9 @@ inline void convolveWindow(__global const sample_t * src, int src_step, int src_ } } -__kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, int src_offset, - __global sample_t * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __global const int * almostDist2Weight, __global sample_t * buffer, +__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __global const int * almostDist2Weight, __global uchar * buffer, int almostTemplateWindowSizeSqBinShift) { int block_x = get_group_id(0), nblocks_x = get_num_groups(0); @@ -277,11 +277,6 @@ __kernel void fastNlMeansDenoising(__global const sample_t * src, int src_step, __global int * col_dists = (__global int *)(buffer + block_data_start * sizeof(int)); __global int * up_col_dists = col_dists + SEARCH_SIZE_SQ * TEMPLATE_SIZE; - src_step /= sizeof(sample_t); - src_offset /= sizeof(sample_t); - dst_step /= sizeof(sample_t); - dst_offset /= sizeof(sample_t); - for (int y = y0; y < y1; ++y) for (int x = x0; x < x1; ++x) {