Merge pull request #2525 from ilya-lavrenov:tapi_sep

This commit is contained in:
Andrey Pavlenko 2014-03-24 22:43:34 +04:00 committed by OpenCV Buildbot
commit 9e1124d24a
4 changed files with 18 additions and 16 deletions

View File

@ -3385,7 +3385,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
return k.run(2, globalsize, localsize, false);
}
static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, int anchor)
static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor)
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (dst.depth() == CV_64F && !doubleSupport)
@ -3420,7 +3420,8 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
if (k.empty())
return false;
k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst));
k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst),
static_cast<float>(delta));
return k.run(2, globalsize, localsize, false);
}
@ -3429,7 +3430,7 @@ const int optimizedSepFilterLocalSize = 16;
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
Mat row_kernel, Mat col_kernel,
int borderType, int ddepth)
double delta, int borderType, int ddepth)
{
Size size = _src.size(), wholeSize;
Point origin;
@ -3477,7 +3478,8 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
src.locateROI(wholeSize, origin);
k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y,
wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst));
wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst),
static_cast<float>(delta));
return k.run(2, gt2, lt2, false);
}
@ -3489,9 +3491,6 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
const ocl::Device & d = ocl::Device::getDefault();
Size imgSize = _src.size();
if (abs(delta)> FLT_MIN)
return false;
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if (cn > 4)
return false;
@ -3511,7 +3510,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) &&
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) &&
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, borderType, ddepth), true)
ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta,
borderType & ~BORDER_ISOLATED, ddepth), true)
if (anchor.x < 0)
anchor.x = kernelX.cols >> 1;
@ -3534,7 +3534,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat();
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y);
return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y);
}
#endif

View File

@ -63,7 +63,7 @@
__constant float mat_kernel[] = { COEFF };
__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
{
int x = get_global_id(0);
int y = get_global_id(1);
@ -103,6 +103,6 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
if (x < dst_cols && y < dst_rows)
{
start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
storepix(convertToDstT(sum), dst + start_addr);
storepix(convertToDstT(sum + (srcT)(delta)), dst + start_addr);
}
}

View File

@ -104,7 +104,7 @@ __constant float mat_kernelX[] = { KERNEL_MATRIX_X };
__constant float mat_kernelY[] = { KERNEL_MATRIX_Y };
__kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width,
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
{
// RADIUSX, RADIUSY are filter dimensions
// BLK_X, BLK_Y are local wrogroup sizes
@ -182,6 +182,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
for (i=0; i<=2*RADIUSX; i++)
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
//store result into destination image
storepix(convertToDstT(sum), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
// store result into destination image
storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
}

View File

@ -61,6 +61,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
int borderType;
bool useRoi;
Mat kernelX, kernelY;
double delta;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
@ -93,6 +94,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = anchor.y = -1;
delta = randomDouble(-100, 100);
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
@ -110,8 +112,8 @@ OCL_TEST_P(SepFilter2D, Mat)
{
random_roi();
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, delta, borderType));
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, delta, borderType));
Near(1.0);
}