mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 11:40:44 +08:00
Merge pull request #7653 from pengli:deriv
This commit is contained in:
commit
c93fb14dd2
@ -517,6 +517,66 @@ static bool ipp_sobel(InputArray _src, OutputArray _dst, int ddepth, int dx, int
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
namespace cv
|
||||
{
|
||||
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
|
||||
InputArray _kernelX, InputArray _kernelY, double delta, int borderType)
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
|
||||
if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
|
||||
(_src.offset() == 0) && (_src.step() % 4 == 0) &&
|
||||
(_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
|
||||
return false;
|
||||
|
||||
Mat kernelX = _kernelX.getMat().reshape(1, 1);
|
||||
if (kernelX.cols % 2 != 1)
|
||||
return false;
|
||||
Mat kernelY = _kernelY.getMat().reshape(1, 1);
|
||||
if (kernelY.cols % 2 != 1)
|
||||
return false;
|
||||
|
||||
if (ddepth < 0)
|
||||
ddepth = sdepth;
|
||||
|
||||
Size size = _src.size();
|
||||
size_t globalsize[2] = { 0, 0 };
|
||||
size_t localsize[2] = { 0, 0 };
|
||||
|
||||
globalsize[0] = size.width / 16;
|
||||
globalsize[1] = size.height / 2;
|
||||
|
||||
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
|
||||
char build_opts[1024];
|
||||
sprintf(build_opts, "-D %s %s%s", borderMap[borderType],
|
||||
ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(),
|
||||
ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str());
|
||||
|
||||
ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(size, CV_MAKETYPE(ddepth, cn));
|
||||
if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
|
||||
return false;
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
|
||||
idxArg = kernel.set(idxArg, (int)src.step);
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
idxArg = kernel.set(idxArg, (int)dst.step);
|
||||
idxArg = kernel.set(idxArg, (int)dst.rows);
|
||||
idxArg = kernel.set(idxArg, (int)dst.cols);
|
||||
idxArg = kernel.set(idxArg, static_cast<float>(delta));
|
||||
|
||||
return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
|
||||
int ksize, double scale, double delta, int borderType )
|
||||
{
|
||||
@ -554,6 +614,11 @@ void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
|
||||
else
|
||||
ky *= scale;
|
||||
}
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && ksize == 3 &&
|
||||
(size_t)_src.rows() > ky.total() && (size_t)_src.cols() > kx.total(),
|
||||
ocl_sepFilter3x3_8UC1(_src, _dst, ddepth, kx, ky, delta, borderType));
|
||||
|
||||
sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
|
||||
}
|
||||
|
||||
@ -593,6 +658,11 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
|
||||
else
|
||||
ky *= scale;
|
||||
}
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 &&
|
||||
(size_t)_src.rows() > ky.total() && (size_t)_src.cols() > kx.total(),
|
||||
ocl_sepFilter3x3_8UC1(_src, _dst, ddepth, kx, ky, delta, borderType));
|
||||
|
||||
sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
|
||||
}
|
||||
|
||||
|
135
modules/imgproc/src/opencl/sepFilter3x3.cl
Normal file
135
modules/imgproc/src/opencl/sepFilter3x3.cl
Normal file
@ -0,0 +1,135 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#define DIG(a) a,
|
||||
__constant float kx[] = { KERNEL_MATRIX_X };
|
||||
__constant float ky[] = { KERNEL_MATRIX_Y };
|
||||
|
||||
#define OP(delta, y, x) (convert_float16(arr[(y + delta) * 3 + x]) * ky[y] * kx[x])
|
||||
|
||||
__kernel void sepFilter3x3_8UC1_cols16_rows2(__global const uint* src, int src_step,
|
||||
__global uint* dst, int dst_step,
|
||||
int rows, int cols, float delta)
|
||||
{
|
||||
int block_x = get_global_id(0);
|
||||
int y = get_global_id(1) * 2;
|
||||
int ssx, dsx;
|
||||
|
||||
if ((block_x * 16) >= cols || y >= rows) return;
|
||||
|
||||
uint4 line[4];
|
||||
uint4 line_out[2];
|
||||
uchar a; uchar16 b; uchar c;
|
||||
uchar d; uchar16 e; uchar f;
|
||||
uchar g; uchar16 h; uchar i;
|
||||
uchar j; uchar16 k; uchar l;
|
||||
|
||||
ssx = dsx = 1;
|
||||
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
|
||||
line[1] = vload4(0, src + src_index + (src_step / 4));
|
||||
line[2] = vload4(0, src + src_index + 2 * (src_step / 4));
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
line[0] = (y == 0) ? (uint4)0 : vload4(0, src + src_index);
|
||||
line[3] = (y == (rows - 2)) ? (uint4)0 : vload4(0, src + src_index + 3 * (src_step / 4));
|
||||
#elif defined BORDER_REFLECT_101
|
||||
line[0] = (y == 0) ? line[2] : vload4(0, src + src_index);
|
||||
line[3] = (y == (rows - 2)) ? line[1] : vload4(0, src + src_index + 3 * (src_step / 4));
|
||||
#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
|
||||
line[0] = (y == 0) ? line[1] : vload4(0, src + src_index);
|
||||
line[3] = (y == (rows - 2)) ? line[2] : vload4(0, src + src_index + 3 * (src_step / 4));
|
||||
#endif
|
||||
|
||||
__global uchar *src_p = (__global uchar *)src;
|
||||
|
||||
src_index = block_x * 16 * ssx + (y - 1) * src_step;
|
||||
bool line_end = ((block_x + 1) * 16 == cols);
|
||||
|
||||
b = as_uchar16(line[0]);
|
||||
e = as_uchar16(line[1]);
|
||||
h = as_uchar16(line[2]);
|
||||
k = as_uchar16(line[3]);
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
a = (block_x == 0 || y == 0) ? 0 : src_p[src_index - 1];
|
||||
c = (line_end || y == 0) ? 0 : src_p[src_index + 16];
|
||||
|
||||
d = (block_x == 0) ? 0 : src_p[src_index + src_step - 1];
|
||||
f = line_end ? 0 : src_p[src_index + src_step + 16];
|
||||
|
||||
g = (block_x == 0) ? 0 : src_p[src_index + 2 * src_step - 1];
|
||||
i = line_end ? 0 : src_p[src_index + 2 * src_step + 16];
|
||||
|
||||
j = (block_x == 0 || y == (rows - 2)) ? 0 : src_p[src_index + 3 * src_step - 1];
|
||||
l = (line_end || y == (rows - 2))? 0 : src_p[src_index + 3 * src_step + 16];
|
||||
|
||||
#elif defined BORDER_REFLECT_101
|
||||
int offset;
|
||||
offset = (y == 0) ? (2 * src_step) : 0;
|
||||
|
||||
a = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
|
||||
c = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
|
||||
|
||||
d = (block_x == 0) ? src_p[src_index + src_step + 1] : src_p[src_index + src_step - 1];
|
||||
f = line_end ? src_p[src_index + src_step + 14] : src_p[src_index + src_step + 16];
|
||||
|
||||
g = (block_x == 0) ? src_p[src_index + 2 * src_step + 1] : src_p[src_index + 2 * src_step - 1];
|
||||
i = line_end ? src_p[src_index + 2 * src_step + 14] : src_p[src_index + 2 * src_step + 16];
|
||||
|
||||
offset = (y == (rows - 2)) ? (1 * src_step) : (3 * src_step);
|
||||
|
||||
j = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
|
||||
l = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
|
||||
|
||||
#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
|
||||
int offset;
|
||||
offset = (y == 0) ? (1 * src_step) : 0;
|
||||
|
||||
a = (block_x == 0) ? src_p[src_index + offset] : src_p[src_index + offset - 1];
|
||||
c = line_end ? src_p[src_index + offset + 15] : src_p[src_index + offset + 16];
|
||||
|
||||
d = (block_x == 0) ? src_p[src_index + src_step] : src_p[src_index + src_step - 1];
|
||||
f = line_end ? src_p[src_index + src_step + 15] : src_p[src_index + src_step + 16];
|
||||
|
||||
g = (block_x == 0) ? src_p[src_index + 2 * src_step] : src_p[src_index + 2 * src_step - 1];
|
||||
i = line_end ? src_p[src_index + 2 * src_step + 15] : src_p[src_index + 2 * src_step + 16];
|
||||
|
||||
offset = (y == (rows - 2)) ? (2 * src_step) : (3 * src_step);
|
||||
|
||||
j = (block_x == 0) ? src_p[src_index + offset] : src_p[src_index + offset - 1];
|
||||
l = line_end ? src_p[src_index + offset + 15] : src_p[src_index + offset + 16];
|
||||
|
||||
#endif
|
||||
|
||||
uchar16 arr[12];
|
||||
float16 sum[2];
|
||||
|
||||
arr[0] = (uchar16)(a, b.s0123, b.s456789ab, b.scde);
|
||||
arr[1] = b;
|
||||
arr[2] = (uchar16)(b.s123, b.s4567, b.s89abcdef, c);
|
||||
arr[3] = (uchar16)(d, e.s0123, e.s456789ab, e.scde);
|
||||
arr[4] = e;
|
||||
arr[5] = (uchar16)(e.s123, e.s4567, e.s89abcdef, f);
|
||||
arr[6] = (uchar16)(g, h.s0123, h.s456789ab, h.scde);
|
||||
arr[7] = h;
|
||||
arr[8] = (uchar16)(h.s123, h.s4567, h.s89abcdef, i);
|
||||
arr[9] = (uchar16)(j, k.s0123, k.s456789ab, k.scde);
|
||||
arr[10] = k;
|
||||
arr[11] = (uchar16)(k.s123, k.s4567, k.s89abcdef, l);
|
||||
|
||||
sum[0] = OP(0, 0, 0) + OP(0, 0, 1) + OP(0, 0, 2) +
|
||||
OP(0, 1, 0) + OP(0, 1, 1) + OP(0, 1, 2) +
|
||||
OP(0, 2, 0) + OP(0, 2, 1) + OP(0, 2, 2);
|
||||
|
||||
sum[1] = OP(1, 0, 0) + OP(1, 0, 1) + OP(1, 0, 2) +
|
||||
OP(1, 1, 0) + OP(1, 1, 1) + OP(1, 1, 2) +
|
||||
OP(1, 2, 0) + OP(1, 2, 1) + OP(1, 2, 2);
|
||||
|
||||
line_out[0] = as_uint4(convert_uchar16_sat_rte(sum[0] + delta));
|
||||
line_out[1] = as_uint4(convert_uchar16_sat_rte(sum[1] + delta));
|
||||
|
||||
int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
|
||||
vstore4(line_out[0], 0, dst + dst_index);
|
||||
vstore4(line_out[1], 0, dst + dst_index + (dst_step / 4));
|
||||
}
|
@ -187,6 +187,84 @@ OCL_TEST_P(SobelTest, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(Deriv3x3_cols16_rows2_Base, MatType,
|
||||
int, // kernel size
|
||||
Size, // dx, dy
|
||||
BorderType, // border type
|
||||
double, // optional parameter
|
||||
bool, // roi or not
|
||||
int) // width multiplier
|
||||
{
|
||||
int type, borderType, ksize;
|
||||
Size size;
|
||||
double param;
|
||||
bool useRoi;
|
||||
int widthMultiple;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = GET_PARAM(0);
|
||||
ksize = GET_PARAM(1);
|
||||
size = GET_PARAM(2);
|
||||
borderType = GET_PARAM(3);
|
||||
param = GET_PARAM(4);
|
||||
useRoi = GET_PARAM(5);
|
||||
widthMultiple = GET_PARAM(6);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
size = Size(3, 3);
|
||||
|
||||
Size roiSize = randomSize(size.width, MAX_VALUE, size.height, MAX_VALUE);
|
||||
roiSize.width = std::max(size.width + 13, roiSize.width & (~0xf));
|
||||
roiSize.height = std::max(size.height + 1, roiSize.height & (~0x1));
|
||||
|
||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
|
||||
|
||||
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -60, 70);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src);
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
|
||||
}
|
||||
|
||||
void Near()
|
||||
{
|
||||
Near(1, false);
|
||||
}
|
||||
|
||||
void Near(double threshold, bool relative)
|
||||
{
|
||||
if (relative)
|
||||
OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold);
|
||||
else
|
||||
OCL_EXPECT_MATS_NEAR(dst, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
typedef Deriv3x3_cols16_rows2_Base Sobel3x3_cols16_rows2;
|
||||
|
||||
OCL_TEST_P(Sobel3x3_cols16_rows2, Mat)
|
||||
{
|
||||
int dx = size.width, dy = size.height;
|
||||
double scale = param;
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
OCL_OFF(cv::Sobel(src_roi, dst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
|
||||
OCL_ON(cv::Sobel(usrc_roi, udst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Scharr
|
||||
|
||||
@ -208,6 +286,24 @@ OCL_TEST_P(ScharrTest, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
typedef Deriv3x3_cols16_rows2_Base Scharr3x3_cols16_rows2;
|
||||
|
||||
OCL_TEST_P(Scharr3x3_cols16_rows2, Mat)
|
||||
{
|
||||
int dx = size.width, dy = size.height;
|
||||
double scale = param;
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
OCL_OFF(cv::Scharr(src_roi, dst_roi, -1, dx, dy, scale, /* delta */ 0, borderType));
|
||||
OCL_ON(cv::Scharr(usrc_roi, udst_roi, -1, dx, dy, scale, /* delta */ 0, borderType));
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// GaussianBlur
|
||||
|
||||
@ -552,6 +648,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Sobel3x3_cols16_rows2, Combine(
|
||||
Values((MatType)CV_8UC1),
|
||||
Values(3), // kernel size
|
||||
Values(Size(1, 0), Size(1, 1), Size(2, 0), Size(2, 1)), // dx, dy
|
||||
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
|
||||
Values(0.0), // not used
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
|
||||
FILTER_TYPES,
|
||||
Values(0), // not used
|
||||
@ -561,6 +666,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Scharr3x3_cols16_rows2, Combine(
|
||||
FILTER_TYPES,
|
||||
Values(0), // not used
|
||||
Values(Size(0, 1), Size(1, 0)), // dx, dy
|
||||
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
|
||||
Values(1.0, 0.2), // kernel scale
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
|
||||
FILTER_TYPES,
|
||||
Values(3, 5), // kernel size
|
||||
|
Loading…
Reference in New Issue
Block a user