diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index be93577e9f..ef0f220d50 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -989,7 +989,8 @@ namespace cv { static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, const Scalar& value ) { - int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); + int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type), + rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; bool isolated = (borderType & BORDER_ISOLATED) != 0; borderType &= ~cv::BORDER_ISOLATED; @@ -1001,12 +1002,10 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; int scalarcn = cn == 3 ? 4 : cn; int sctype = CV_MAKETYPE(depth, scalarcn); - String buildOptions = format( - "-D T=%s -D %s " - "-D T1=%s -D cn=%d -D ST=%s", - ocl::memopTypeToStr(type), borderMap[borderType], - ocl::memopTypeToStr(depth), cn, ocl::memopTypeToStr(sctype) - ); + String buildOptions = format("-D T=%s -D %s -D T1=%s -D cn=%d -D ST=%s -D rowsPerWI=%d", + ocl::memopTypeToStr(type), borderMap[borderType], + ocl::memopTypeToStr(depth), cn, + ocl::memopTypeToStr(sctype), rowsPerWI); ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, buildOptions); if (k.empty()) @@ -1042,7 +1041,7 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value))); - size_t globalsize[2] = { dst.cols, dst.rows }; + size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, NULL, false); } diff --git a/modules/core/src/opencl/copymakeborder.cl b/modules/core/src/opencl/copymakeborder.cl index 55239ced9a..a78285e382 100644 --- a/modules/core/src/opencl/copymakeborder.cl +++ b/modules/core/src/opencl/copymakeborder.cl @@ -55,27 +55,18 @@ #endif #ifdef BORDER_CONSTANT -#define EXTRAPOLATE(x, y, v) v = scalar; +#define EXTRAPOLATE(x, cols) \ + ; #elif defined BORDER_REPLICATE -#define EXTRAPOLATE(x, y, v) \ - { \ - x = clamp(x, 0, src_cols - 1); \ - y = clamp(y, 0, src_rows - 1); \ - v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \ - } +#define EXTRAPOLATE(x, cols) \ + x = clamp(x, 0, cols - 1); #elif defined BORDER_WRAP -#define EXTRAPOLATE(x, y, v) \ +#define EXTRAPOLATE(x, cols) \ { \ if (x < 0) \ - x -= ((x - src_cols + 1) / src_cols) * src_cols; \ - if (x >= src_cols) \ - x %= src_cols; \ - \ - if (y < 0) \ - y -= ((y - src_rows + 1) / src_rows) * src_rows; \ - if( y >= src_rows ) \ - y %= src_rows; \ - v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \ + x -= ((x - cols + 1) / cols) * cols; \ + if (x >= cols) \ + x %= cols; \ } #elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) #ifdef BORDER_REFLECT @@ -83,10 +74,10 @@ #else #define DELTA int delta = 1 #endif -#define EXTRAPOLATE(x, y, v) \ +#define EXTRAPOLATE(x, cols) \ { \ DELTA; \ - if (src_cols == 1) \ + if (cols == 1) \ x = 0; \ else \ do \ @@ -94,58 +85,56 @@ if( x < 0 ) \ x = -x - 1 + delta; \ else \ - x = src_cols - 1 - (x - src_cols) - delta; \ + x = cols - 1 - (x - cols) - delta; \ } \ - while (x >= src_cols || x < 0); \ - \ - if (src_rows == 1) \ - y = 0; \ - else \ - do \ - { \ - if( y < 0 ) \ - y = -y - 1 + delta; \ - else \ - y = src_rows - 1 - (y - src_rows) - delta; \ - } \ - while (y >= src_rows || y < 0); \ - v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \ + while (x >= cols || x < 0); \ } #else -#error No extrapolation method +#error "No extrapolation method" #endif -#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0) +#define NEED_EXTRAPOLATION(x, cols) (x >= cols || x < 0) __kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, int top, int left, ST nVal) { int x = get_global_id(0); - int y = get_global_id(1); + int y0 = get_global_id(1) * rowsPerWI; #ifdef BORDER_CONSTANT T scalar = convertScalar(nVal); #endif - if (x < dst_cols && y < dst_rows) + if (x < dst_cols) { - int src_x = x - left; - int src_y = y - top; + int src_x = x - left, src_y; + int dst_index = mad24(y0, dst_step, mad24(x, (int)TSIZE, dst_offset)); - int dst_index = mad24(y, dst_step, mad24(x, (int)TSIZE, dst_offset)); - __global T * dst = (__global T *)(dstptr + dst_index); + if (NEED_EXTRAPOLATION(src_x, src_cols)) + { +#ifdef BORDER_CONSTANT + for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step) + storepix(scalar, dstptr + dst_index); + return; +#endif + EXTRAPOLATE(src_x, src_cols) + } + src_x = mad24(src_x, TSIZE, src_offset); - T v; - if (NEED_EXTRAPOLATION(src_x, src_y)) + for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step) { - EXTRAPOLATE(src_x, src_y, v) + src_y = y - top; + if (NEED_EXTRAPOLATION(src_y, src_rows)) + { + EXTRAPOLATE(src_y, src_rows) +#ifdef BORDER_CONSTANT + storepix(scalar, dstptr + dst_index); + continue; +#endif + } + int src_index = mad24(src_y, src_step, src_x); + storepix(loadpix(srcptr + src_index), dstptr + dst_index); } - else - { - int src_index = mad24(src_y, src_step, mad24(src_x, TSIZE, src_offset)); - v = loadpix(srcptr + src_index); - } - storepix(v, dst); } }