mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
generalized OpenCL version of cv::sepFilter2D; removed some restrictions and added 3-channels support
This commit is contained in:
parent
b449b0bf71
commit
291458a859
@ -4317,8 +4317,8 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
|
||||
if (ddepth != depth)
|
||||
kernel.convertTo(kernel, ddepth);
|
||||
|
||||
typedef std::string (*func_t)(const Mat &);
|
||||
static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
|
||||
typedef std::string (* func_t)(const Mat &);
|
||||
static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
|
||||
kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
|
||||
const func_t func = funcs[depth];
|
||||
CV_Assert(func != 0);
|
||||
|
@ -41,7 +41,6 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#define CV_OPENCL_RUN_ASSERT
|
||||
#include "opencl_kernels.hpp"
|
||||
#include <sstream>
|
||||
|
||||
@ -3135,7 +3134,7 @@ template<typename ST, class CastOp, class VecOp> struct Filter2D : public BaseFi
|
||||
// b e h b e h 0 0
|
||||
// c f i c f i 0 0
|
||||
template <typename T>
|
||||
static int _prepareKernelFilter2D(std::vector<T>& data, const Mat &kernel)
|
||||
static int _prepareKernelFilter2D(std::vector<T> & data, const Mat & kernel)
|
||||
{
|
||||
Mat _kernel; kernel.convertTo(_kernel, DataDepth<T>::value);
|
||||
int size_y_aligned = ROUNDUP(kernel.rows * 2, 4);
|
||||
@ -3318,11 +3317,16 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
return kernel.run(2, globalsize, localsize, true);
|
||||
}
|
||||
|
||||
static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType)
|
||||
static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor,
|
||||
int borderType, int ddepth, bool fast8uc1)
|
||||
{
|
||||
int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
Size bufSize = buf.size();
|
||||
|
||||
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|
||||
return false;
|
||||
|
||||
#ifdef ANDROID
|
||||
size_t localsize[2] = {16, 10};
|
||||
#else
|
||||
@ -3330,7 +3334,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
|
||||
#endif
|
||||
|
||||
size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]};
|
||||
if (type == CV_8UC1)
|
||||
if (fast8uc1)
|
||||
globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0];
|
||||
|
||||
int radiusX = anchor, radiusY = (buf.rows - src.rows) >> 1;
|
||||
@ -3346,20 +3350,21 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
|
||||
|
||||
char cvt[40];
|
||||
cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s"
|
||||
" -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s",
|
||||
" -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s%s",
|
||||
radiusX, (int)localsize[0], (int)localsize[1], cn, btype,
|
||||
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
|
||||
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
|
||||
ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)),
|
||||
ocl::convertTypeStr(sdepth, CV_32F, cn, cvt),
|
||||
ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F));
|
||||
ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
build_options += ocl::kernelToStr(kernelX, CV_32F);
|
||||
|
||||
Size srcWholeSize; Point srcOffset;
|
||||
src.locateROI(srcWholeSize, srcOffset);
|
||||
|
||||
String kernelName("row_filter");
|
||||
if (type == CV_8UC1)
|
||||
if (fast8uc1)
|
||||
kernelName += "_C1_D0";
|
||||
|
||||
ocl::Kernel k(kernelName.c_str(), cv::ocl::imgproc::filterSepRow_oclsrc,
|
||||
@ -3367,39 +3372,47 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x,
|
||||
srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
|
||||
ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()),
|
||||
buf.cols, buf.rows, radiusY);
|
||||
if (fast8uc1)
|
||||
k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x,
|
||||
srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
|
||||
ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()),
|
||||
buf.cols, buf.rows, radiusY);
|
||||
else
|
||||
k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffset.x,
|
||||
srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
|
||||
ocl::KernelArg::PtrWriteOnly(buf), (int)buf.step, buf.cols, buf.rows, radiusY);
|
||||
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anchor)
|
||||
static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, int anchor)
|
||||
{
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
if (dst.depth() == CV_64F && !doubleSupport)
|
||||
return false;
|
||||
|
||||
#ifdef ANDROID
|
||||
size_t localsize[2] = {16, 10};
|
||||
size_t localsize[2] = { 16, 10 };
|
||||
#else
|
||||
size_t localsize[2] = {16, 16};
|
||||
size_t localsize[2] = { 16, 16 };
|
||||
#endif
|
||||
size_t globalsize[2] = {0, 0};
|
||||
size_t globalsize[2] = { 0, 0 };
|
||||
|
||||
int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype);
|
||||
Size sz = dst.size();
|
||||
|
||||
globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
|
||||
|
||||
if (dtype == CV_8UC2)
|
||||
globalsize[0] = DIVUP((sz.width + 1) / 2, localsize[0]) * localsize[0];
|
||||
else
|
||||
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
|
||||
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
|
||||
|
||||
char cvt[40];
|
||||
cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
|
||||
" -D srcT=%s -D dstT=%s -D convertToDstT=%s",
|
||||
" -D srcT=%s -D dstT=%s -D convertToDstT=%s"
|
||||
" -D srcT1=%s -D dstT1=%s%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn,
|
||||
ocl::typeToStr(buf.type()), ocl::typeToStr(dtype),
|
||||
ocl::convertTypeStr(CV_32F, ddepth, cn, cvt));
|
||||
ocl::convertTypeStr(CV_32F, ddepth, cn, cvt),
|
||||
ocl::typeToStr(CV_32F), ocl::typeToStr(ddepth),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
build_options += ocl::kernelToStr(kernelY, CV_32F);
|
||||
|
||||
ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc,
|
||||
@ -3407,13 +3420,13 @@ static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anc
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::PtrReadOnly(buf), (int)(buf.step / buf.elemSize()), buf.cols,
|
||||
buf.rows, ocl::KernelArg::PtrWriteOnly(dst), (int)(dst.offset / dst.elemSize()),
|
||||
(int)(dst.step / dst.elemSize()), dst.cols, dst.rows);
|
||||
k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst));
|
||||
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
const int optimizedSepFilterLocalSize = 16;
|
||||
|
||||
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||
@ -3471,18 +3484,19 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||
return k.run(2, gt2, lt2, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
InputArray _kernelX, InputArray _kernelY, Point anchor,
|
||||
double delta, int borderType )
|
||||
{
|
||||
Size imgSize = _src.size();
|
||||
// Size imgSize = _src.size();
|
||||
|
||||
if (abs(delta)> FLT_MIN)
|
||||
return false;
|
||||
|
||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
||||
if ( !( (type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4) &&
|
||||
(ddepth == CV_32F || ddepth == CV_16S || ddepth == CV_8U || ddepth < 0) ) )
|
||||
if (cn > 4)
|
||||
return false;
|
||||
|
||||
Mat kernelX = _kernelX.getMat().reshape(1, 1);
|
||||
@ -3501,9 +3515,6 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
if (ddepth < 0)
|
||||
ddepth = sdepth;
|
||||
|
||||
// printf("%d %d\n", imgSize.width, optimizedSepFilterLocalSize + (kernelX.rows >> 1));
|
||||
// printf("%d %d\n", imgSize.height, optimizedSepFilterLocalSize + (kernelY.rows >> 1));
|
||||
|
||||
// CV_OCL_RUN_(kernelY.rows <= 21 && kernelX.rows <= 21 &&
|
||||
// imgSize.width > optimizedSepFilterLocalSize + (kernelX.rows >> 1) &&
|
||||
// imgSize.height > optimizedSepFilterLocalSize + (kernelY.rows >> 1),
|
||||
@ -3512,20 +3523,19 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
UMat src = _src.getUMat();
|
||||
Size srcWholeSize; Point srcOffset;
|
||||
src.locateROI(srcWholeSize, srcOffset);
|
||||
if ( (0 != (srcOffset.x % 4)) ||
|
||||
(0 != (src.cols % 4)) ||
|
||||
(0 != ((src.step / src.elemSize()) % 4))
|
||||
)
|
||||
return false;
|
||||
|
||||
bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 &&
|
||||
src.cols % 4 == 0 && src.step % 4 == 0;
|
||||
|
||||
Size srcSize = src.size();
|
||||
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
|
||||
UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
|
||||
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType))
|
||||
UMat buf(bufSize, CV_32FC(cn));
|
||||
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1))
|
||||
return false;
|
||||
|
||||
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y);
|
||||
}
|
||||
|
||||
|
@ -34,29 +34,36 @@
|
||||
//
|
||||
//
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
|
||||
#define RADIUS 1
|
||||
|
||||
#define noconvert
|
||||
|
||||
/**********************************************************************************
|
||||
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
|
||||
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
|
||||
kernel must be in the center. ROI is not supported either.
|
||||
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
|
||||
from LDS to calculate the result.
|
||||
The length of the convovle kernel supported is only related to the MAX size of LDS,
|
||||
which is HW related.
|
||||
Niko
|
||||
6/29/2011
|
||||
The info above maybe obsolete.
|
||||
***********************************************************************************/
|
||||
#if CN != 3
|
||||
#define loadpix(addr) *(__global const srcT *)(addr)
|
||||
#define storepix(val, addr) *(__global dstT *)(addr) = val
|
||||
#define SRCSIZE (int)sizeof(srcT)
|
||||
#define DSTSIZE (int)sizeof(dstT)
|
||||
#else
|
||||
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
|
||||
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
|
||||
#define SRCSIZE (int)sizeof(srcT1)*3
|
||||
#define DSTSIZE (int)sizeof(dstT1)*3
|
||||
#endif
|
||||
|
||||
#define DIG(a) a,
|
||||
__constant float mat_kernel[] = { COEFF };
|
||||
|
||||
__kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int src_whole_cols, int src_whole_rows,
|
||||
__global dstT * dst, int dst_offset_in_pixel, int dst_step_in_pixel, int dst_cols, int dst_rows)
|
||||
__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
@ -64,8 +71,8 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
|
||||
int start_addr = mad24(y, src_step_in_pixel, x);
|
||||
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
|
||||
int start_addr = mad24(y, src_step, x * SRCSIZE);
|
||||
int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);
|
||||
|
||||
srcT sum, temp[READ_TIMES_COL];
|
||||
__local srcT LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
|
||||
@ -73,9 +80,9 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
|
||||
// read pixels from src
|
||||
for (int i = 0; i < READ_TIMES_COL; ++i)
|
||||
{
|
||||
int current_addr = mad24(i, LSIZE1 * src_step_in_pixel, start_addr);
|
||||
int current_addr = mad24(i, LSIZE1 * src_step, start_addr);
|
||||
current_addr = current_addr < end_addr ? current_addr : 0;
|
||||
temp[i] = src[current_addr];
|
||||
temp[i] = loadpix(src + current_addr);
|
||||
}
|
||||
|
||||
// save pixels to lds
|
||||
@ -95,7 +102,7 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
|
||||
// write the result to dst
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel);
|
||||
dst[start_addr] = convertToDstT(sum);
|
||||
start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
|
||||
storepix(convertToDstT(sum), dst + start_addr);
|
||||
}
|
||||
}
|
||||
|
@ -34,6 +34,14 @@
|
||||
//
|
||||
//
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only
|
||||
#define RADIUS 1
|
||||
|
||||
@ -117,16 +125,16 @@
|
||||
|
||||
#define noconvert
|
||||
|
||||
#if cn != 3
|
||||
#if CN != 3
|
||||
#define loadpix(addr) *(__global const srcT *)(addr)
|
||||
#define storepix(val, addr) *(__global dstT *)(addr) = val
|
||||
#define SRCSIZE ((int)sizeof(srcT))
|
||||
#define DSTSIZE ((int)sizeof(dstT))
|
||||
#define SRCSIZE (int)sizeof(srcT)
|
||||
#define DSTSIZE (int)sizeof(dstT)
|
||||
#else
|
||||
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
|
||||
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
|
||||
#define SRCSIZE ((int)sizeof(srcT1)*3)
|
||||
#define DSTSIZE ((int)sizeof(dstT1)*3)
|
||||
#define SRCSIZE (int)sizeof(srcT1)*3
|
||||
#define DSTSIZE (int)sizeof(dstT1)*3
|
||||
#endif
|
||||
|
||||
#define DIG(a) a,
|
||||
@ -269,32 +277,33 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
|
||||
dst[start_addr] = sum.x;
|
||||
}
|
||||
|
||||
__kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int src_offset_x, int src_offset_y,
|
||||
__kernel void row_filter(__global const uchar * src, int src_step, int src_offset_x, int src_offset_y,
|
||||
int src_cols, int src_rows, int src_whole_cols, int src_whole_rows,
|
||||
__global dstT * dst, int dst_step_in_pixel, int dst_cols, int dst_rows,
|
||||
__global uchar * dst, int dst_step, int dst_cols, int dst_rows,
|
||||
int radiusy)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
|
||||
int start_x = x + src_offset_x - RADIUSX;
|
||||
int start_y = y + src_offset_y - radiusy;
|
||||
int start_addr = mad24(start_y, src_step_in_pixel, start_x);
|
||||
int start_addr = mad24(start_y, src_step, start_x * SRCSIZE);
|
||||
|
||||
dstT sum;
|
||||
srcT temp[READ_TIMES_ROW];
|
||||
|
||||
__local srcT LDS_DAT[LSIZE1][READ_TIMES_ROW * LSIZE0 + 1];
|
||||
#ifdef BORDER_CONSTANT
|
||||
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
|
||||
int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);
|
||||
|
||||
// read pixels from src
|
||||
for (int i = 0; i < READ_TIMES_ROW; i++)
|
||||
{
|
||||
int current_addr = mad24(i, LSIZE0, start_addr);
|
||||
current_addr = current_addr < end_addr && current_addr > 0 ? current_addr : 0;
|
||||
temp[i] = src[current_addr];
|
||||
int current_addr = mad24(i, LSIZE0 * SRCSIZE, start_addr);
|
||||
current_addr = current_addr < end_addr && current_addr >= 0 ? current_addr : 0;
|
||||
temp[i] = loadpix(src + current_addr);
|
||||
}
|
||||
|
||||
// judge if read out of boundary
|
||||
@ -312,8 +321,7 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
int index[READ_TIMES_ROW];
|
||||
int s_x, s_y;
|
||||
int index[READ_TIMES_ROW], s_x, s_y;
|
||||
|
||||
// judge if read out of boundary
|
||||
for (int i = 0; i < READ_TIMES_ROW; ++i)
|
||||
@ -328,12 +336,12 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
|
||||
EXTRAPOLATE(s_x, 0, src_whole_cols);
|
||||
EXTRAPOLATE(s_y, 0, src_whole_rows);
|
||||
#endif
|
||||
index[i] = mad24(s_y, src_step_in_pixel, s_x);
|
||||
index[i] = mad24(s_y, src_step, s_x * SRCSIZE);
|
||||
}
|
||||
|
||||
// read pixels from src
|
||||
for (int i = 0; i < READ_TIMES_ROW; ++i)
|
||||
temp[i] = src[index[i]];
|
||||
temp[i] = loadpix(src + index[i]);
|
||||
#endif // BORDER_CONSTANT
|
||||
|
||||
// save pixels to lds
|
||||
@ -349,10 +357,11 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
|
||||
temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i];
|
||||
sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
|
||||
}
|
||||
|
||||
// write the result to dst
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
start_addr = mad24(y, dst_step_in_pixel, x);
|
||||
dst[start_addr] = sum;
|
||||
start_addr = mad24(y, dst_step, x * DSTSIZE);
|
||||
storepix(sum, dst + start_addr);
|
||||
}
|
||||
}
|
||||
|
@ -312,7 +312,7 @@ OCL_TEST_P(MorphologyEx, Mat)
|
||||
(int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
|
||||
(int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version
|
||||
|
||||
#define FILTER_TYPES Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4)
|
||||
#define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4)
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
|
||||
Values((MatType)CV_8UC1),
|
||||
|
@ -75,9 +75,9 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
||||
void random_roi()
|
||||
{
|
||||
Size ksize = randomSize(kernelMinSize, kernelMaxSize);
|
||||
if (1 != (ksize.width % 2))
|
||||
if (1 != ksize.width % 2)
|
||||
ksize.width++;
|
||||
if (1 != (ksize.height % 2))
|
||||
if (1 != ksize.height % 2)
|
||||
ksize.height++;
|
||||
|
||||
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
|
||||
@ -86,24 +86,22 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
||||
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
|
||||
|
||||
Size roiSize = randomSize(ksize.width + 16, MAX_VALUE, ksize.height + 20, MAX_VALUE);
|
||||
std::cout << roiSize << std::endl;
|
||||
int rest = roiSize.width % 4;
|
||||
if (0 != rest)
|
||||
if (rest != 0)
|
||||
roiSize.width += (4 - rest);
|
||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
rest = srcBorder.lef % 4;
|
||||
if (0 != rest)
|
||||
if (rest != 0)
|
||||
srcBorder.lef += (4 - rest);
|
||||
rest = srcBorder.rig % 4;
|
||||
if (0 != rest)
|
||||
if (rest != 0)
|
||||
srcBorder.rig += (4 - rest);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
anchor.x = -1;
|
||||
anchor.y = -1;
|
||||
anchor.x = anchor.y = -1;
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
|
||||
@ -128,11 +126,10 @@ OCL_TEST_P(SepFilter2D, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
|
||||
Combine(
|
||||
Values(CV_8U, CV_32F),
|
||||
Values(1, 4),
|
||||
OCL_ALL_CHANNELS,
|
||||
Values(
|
||||
(BorderType)BORDER_CONSTANT,
|
||||
(BorderType)BORDER_REPLICATE,
|
||||
|
Loading…
Reference in New Issue
Block a user