mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
morph ocl kernel for erode and dilate filter
This kernel is for CV_8UC1 format and 3x3 kernel size, It is about 33% ~ 55% faster than current ocl kernel with below perf test python ./modules/ts/misc/run.py -t imgproc --gtest_filter=OCL_ErodeFixture* python ./modules/ts/misc/run.py -t imgproc --gtest_filter=OCL_DilateFixture* Also add accuracy test cases for this kernel, the test command is ./bin/opencv_test_imgproc --gtest_filter=OCL_Filter/MorphFilter3x3* Signed-off-by: Li Peng <peng.li@intel.com>
This commit is contained in:
parent
18327aed30
commit
35198b84a4
@ -1477,6 +1477,78 @@ Ptr<Morph> Morph ::create(int op, int src_type, int dst_type, int max_width, int
|
||||
|
||||
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
|
||||
|
||||
static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor,
|
||||
int op, int actual_op = -1, InputArray _extraMat = noArray())
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
Size ksize = _kernel.size();
|
||||
|
||||
Mat kernel8u;
|
||||
String processing;
|
||||
|
||||
bool haveExtraMat = !_extraMat.empty();
|
||||
CV_Assert(actual_op <= 3 || haveExtraMat);
|
||||
|
||||
_kernel.getMat().convertTo(kernel8u, CV_8U);
|
||||
for (int y = 0; y < kernel8u.rows; ++y)
|
||||
for (int x = 0; x < kernel8u.cols; ++x)
|
||||
if (kernel8u.at<uchar>(y, x) != 0)
|
||||
processing += format("PROCESS(%d,%d)", y, x);
|
||||
|
||||
if (anchor.x < 0)
|
||||
anchor.x = ksize.width / 2;
|
||||
if (anchor.y < 0)
|
||||
anchor.y = ksize.height / 2;
|
||||
|
||||
if (actual_op < 0)
|
||||
actual_op = op;
|
||||
|
||||
if (type != CV_8UC1 ||
|
||||
!((_src.offset() == 0) && (_src.step() % 4 == 0)) ||
|
||||
!((_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) ||
|
||||
!(anchor.x == 1 && anchor.y == 1) ||
|
||||
!(ksize.width == 3 && ksize.height == 3))
|
||||
return false;
|
||||
|
||||
Size size = _src.size();
|
||||
size_t globalsize[2] = { 0, 0 };
|
||||
size_t localsize[2] = { 0, 0 };
|
||||
|
||||
globalsize[0] = size.width / 16;
|
||||
globalsize[1] = size.height / 2;
|
||||
|
||||
static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
|
||||
String opts = format("-D PROCESS_ELEM_=%s -D %s%s", processing.c_str(), op2str[op],
|
||||
actual_op == op ? "" : cv::format(" -D %s", op2str[actual_op]).c_str());
|
||||
|
||||
ocl::Kernel k;
|
||||
k.create("morph3x3_8UC1_cols16_rows2", cv::ocl::imgproc::morph3x3_oclsrc, opts);
|
||||
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(size, CV_MAKETYPE(depth, cn));
|
||||
if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
|
||||
return false;
|
||||
UMat dst = _dst.getUMat();
|
||||
UMat extraMat = _extraMat.getUMat();
|
||||
|
||||
int idxArg = k.set(0, ocl::KernelArg::PtrReadOnly(src));
|
||||
idxArg = k.set(idxArg, (int)src.step);
|
||||
idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
idxArg = k.set(idxArg, (int)dst.step);
|
||||
idxArg = k.set(idxArg, (int)dst.rows);
|
||||
idxArg = k.set(idxArg, (int)dst.cols);
|
||||
|
||||
if (haveExtraMat)
|
||||
{
|
||||
idxArg = k.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(extraMat));
|
||||
}
|
||||
|
||||
return k.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
|
||||
}
|
||||
|
||||
static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType,
|
||||
int op, int actual_op = -1, InputArray _extraMat = noArray())
|
||||
{
|
||||
@ -1676,6 +1748,9 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
|
||||
#endif
|
||||
)
|
||||
{
|
||||
if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat))
|
||||
return true;
|
||||
|
||||
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
|
||||
return true;
|
||||
}
|
||||
|
119
modules/imgproc/src/opencl/morph3x3.cl
Normal file
119
modules/imgproc/src/opencl/morph3x3.cl
Normal file
@ -0,0 +1,119 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifdef OP_ERODE
|
||||
#define OP(m1, m2) min(m1, m2)
|
||||
#define VAL UCHAR_MAX
|
||||
#endif
|
||||
|
||||
#ifdef OP_DILATE
|
||||
#define OP(m1, m2) max(m1, m2)
|
||||
#define VAL 0
|
||||
#endif
|
||||
|
||||
#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
|
||||
#define EXTRA_PARAMS , __global const uchar * matptr, int mat_step, int mat_offset
|
||||
#else
|
||||
#define EXTRA_PARAMS
|
||||
#endif
|
||||
|
||||
#define PROCESS(_y, _x) \
|
||||
line_out[0] = OP(line_out[0], arr[_x + 3 * _y]); \
|
||||
line_out[1] = OP(line_out[1], arr[_x + 3 * (_y + 1)]);
|
||||
|
||||
#define PROCESS_ELEM \
|
||||
line_out[0] = (uchar16)VAL; \
|
||||
line_out[1] = (uchar16)VAL; \
|
||||
PROCESS_ELEM_
|
||||
|
||||
__kernel void morph3x3_8UC1_cols16_rows2(__global const uint* src, int src_step,
|
||||
__global uint* dst, int dst_step,
|
||||
int rows, int cols
|
||||
EXTRA_PARAMS)
|
||||
{
|
||||
int block_x = get_global_id(0);
|
||||
int y = get_global_id(1) * 2;
|
||||
int ssx = 1, dsx = 1;
|
||||
|
||||
if ((block_x * 16) >= cols || y >= rows) return;
|
||||
|
||||
uchar a; uchar16 b; uchar c;
|
||||
uchar d; uchar16 e; uchar f;
|
||||
uchar g; uchar16 h; uchar i;
|
||||
uchar j; uchar16 k; uchar l;
|
||||
|
||||
uchar16 line[4];
|
||||
uchar16 line_out[2];
|
||||
|
||||
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
|
||||
line[0] = (y == 0) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index));
|
||||
line[1] = as_uchar16(vload4(0, src + src_index + (src_step / 4)));
|
||||
line[2] = as_uchar16(vload4(0, src + src_index + 2 * (src_step / 4)));
|
||||
line[3] = (y == (rows - 2)) ? (uchar16)VAL: as_uchar16(vload4(0, src + src_index + 3 * (src_step / 4)));
|
||||
|
||||
__global uchar *src_p = (__global uchar *)src;
|
||||
bool line_end = ((block_x + 1) * 16 == cols);
|
||||
|
||||
src_index = block_x * 16 * ssx + (y - 1) * src_step;
|
||||
|
||||
a = (block_x == 0 || y == 0) ? VAL : src_p[src_index - 1];
|
||||
b = line[0];
|
||||
c = (line_end || y == 0) ? VAL : src_p[src_index + 16];
|
||||
|
||||
d = (block_x == 0) ? VAL : src_p[src_index + src_step - 1];
|
||||
e = line[1];
|
||||
f = line_end ? VAL : src_p[src_index + src_step + 16];
|
||||
|
||||
g = (block_x == 0) ? VAL : src_p[src_index + 2 * src_step - 1];
|
||||
h = line[2];
|
||||
i = line_end ? VAL : src_p[src_index + 2 * src_step + 16];
|
||||
|
||||
j = (block_x == 0 || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step - 1];
|
||||
k = line[3];
|
||||
l = (line_end || y == (rows - 2)) ? VAL : src_p[src_index + 3 * src_step + 16];
|
||||
|
||||
uchar16 arr[12];
|
||||
arr[0] = (uchar16)(a, b.s01234567, b.s89ab, b.scde);
|
||||
arr[1] = b;
|
||||
arr[2] = (uchar16)(b.s12345678, b.s9abc, b.sdef, c);
|
||||
arr[3] = (uchar16)(d, e.s01234567, e.s89ab, e.scde);
|
||||
arr[4] = e;
|
||||
arr[5] = (uchar16)(e.s12345678, e.s9abc, e.sdef, f);
|
||||
arr[6] = (uchar16)(g, h.s01234567, h.s89ab, h.scde);
|
||||
arr[7] = h;
|
||||
arr[8] = (uchar16)(h.s12345678, h.s9abc, h.sdef, i);
|
||||
arr[9] = (uchar16)(j, k.s01234567, k.s89ab, k.scde);
|
||||
arr[10] = k;
|
||||
arr[11] = (uchar16)(k.s12345678, k.s9abc, k.sdef, l);
|
||||
|
||||
PROCESS_ELEM;
|
||||
|
||||
int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
|
||||
|
||||
#if defined OP_GRADIENT || defined OP_TOPHAT || defined OP_BLACKHAT
|
||||
int mat_index = y * mat_step + block_x * 16 * ssx + mat_offset;
|
||||
uchar16 val0 = vload16(0, matptr + mat_index);
|
||||
uchar16 val1 = vload16(0, matptr + mat_index + mat_step);
|
||||
|
||||
#ifdef OP_GRADIENT
|
||||
line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
|
||||
line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
|
||||
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
|
||||
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
|
||||
#elif defined OP_TOPHAT
|
||||
line_out[0] = convert_uchar16_sat(convert_int16(val0) - convert_int16(line_out[0]));
|
||||
line_out[1] = convert_uchar16_sat(convert_int16(val1) - convert_int16(line_out[1]));
|
||||
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
|
||||
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
|
||||
#elif defined OP_BLACKHAT
|
||||
line_out[0] = convert_uchar16_sat(convert_int16(line_out[0]) - convert_int16(val0));
|
||||
line_out[1] = convert_uchar16_sat(convert_int16(line_out[1]) - convert_int16(val1));
|
||||
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
|
||||
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
|
||||
#endif
|
||||
#else
|
||||
vstore4(as_uint4(line_out[0]), 0, dst + dst_index);
|
||||
vstore4(as_uint4(line_out[1]), 0, dst + dst_index + (dst_step / 4));
|
||||
#endif
|
||||
}
|
@ -273,6 +273,85 @@ OCL_TEST_P(Dilate, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(MorphFilter3x3_cols16_rows2_Base, MatType,
|
||||
int, // kernel size
|
||||
Size, // dx, dy
|
||||
BorderType, // border type
|
||||
double, // optional parameter
|
||||
bool, // roi or not
|
||||
int) // width multiplier
|
||||
{
|
||||
int type, borderType, ksize;
|
||||
Size size;
|
||||
double param;
|
||||
bool useRoi;
|
||||
int widthMultiple;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
size = GET_PARAM(2);
|
||||
borderType = GET_PARAM(3);
|
||||
param = GET_PARAM(4);
|
||||
useRoi = GET_PARAM(5);
|
||||
widthMultiple = GET_PARAM(6);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
size = Size(3, 3);
|
||||
|
||||
Size roiSize = randomSize(size.width, MAX_VALUE, size.height, MAX_VALUE);
|
||||
roiSize.width = std::max(size.width + 13, roiSize.width & (~0xf));
|
||||
roiSize.height = std::max(size.height + 1, roiSize.height & (~0x1));
|
||||
|
||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
|
||||
|
||||
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -60, 70);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src);
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
|
||||
}
|
||||
|
||||
void Near()
|
||||
{
|
||||
Near(1, false);
|
||||
}
|
||||
|
||||
void Near(double threshold, bool relative)
|
||||
{
|
||||
if (relative)
|
||||
OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold);
|
||||
else
|
||||
OCL_EXPECT_MATS_NEAR(dst, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
typedef MorphFilter3x3_cols16_rows2_Base MorphFilter3x3_cols16_rows2;
|
||||
|
||||
OCL_TEST_P(MorphFilter3x3_cols16_rows2, Mat)
|
||||
{
|
||||
Size kernelSize(ksize, ksize);
|
||||
int iterations = (int)param;
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
Mat kernel = ksize==0 ? Mat() : randomMat(kernelSize, CV_8UC1, 0, 3);
|
||||
|
||||
OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1, -1), iterations) );
|
||||
OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1, -1), iterations) );
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MorphologyEx
|
||||
IMPLEMENT_PARAM_CLASS(MorphOp, int)
|
||||
@ -429,6 +508,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphFilter3x3_cols16_rows2, Combine(
|
||||
Values((MatType)CV_8UC1),
|
||||
Values(0, 3), // kernel size, 0 means kernel = Mat()
|
||||
Values(Size(0, 0)), // not used
|
||||
Values((BorderType)BORDER_CONSTANT),
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||
Values(3, 5, 7), // kernel size
|
||||
|
Loading…
Reference in New Issue
Block a user