mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
Merge pull request #2525 from ilya-lavrenov:tapi_sep
This commit is contained in:
commit
9e1124d24a
@ -3385,7 +3385,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, int anchor)
|
||||
static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor)
|
||||
{
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
if (dst.depth() == CV_64F && !doubleSupport)
|
||||
@ -3420,7 +3420,8 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst));
|
||||
k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst),
|
||||
static_cast<float>(delta));
|
||||
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
@ -3429,7 +3430,7 @@ const int optimizedSepFilterLocalSize = 16;
|
||||
|
||||
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||
Mat row_kernel, Mat col_kernel,
|
||||
int borderType, int ddepth)
|
||||
double delta, int borderType, int ddepth)
|
||||
{
|
||||
Size size = _src.size(), wholeSize;
|
||||
Point origin;
|
||||
@ -3477,7 +3478,8 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||
src.locateROI(wholeSize, origin);
|
||||
|
||||
k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y,
|
||||
wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst));
|
||||
wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst),
|
||||
static_cast<float>(delta));
|
||||
|
||||
return k.run(2, gt2, lt2, false);
|
||||
}
|
||||
@ -3489,9 +3491,6 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
Size imgSize = _src.size();
|
||||
|
||||
if (abs(delta)> FLT_MIN)
|
||||
return false;
|
||||
|
||||
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
if (cn > 4)
|
||||
return false;
|
||||
@ -3511,7 +3510,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) &&
|
||||
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) &&
|
||||
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
|
||||
ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, borderType, ddepth), true)
|
||||
ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta,
|
||||
borderType & ~BORDER_ISOLATED, ddepth), true)
|
||||
|
||||
if (anchor.x < 0)
|
||||
anchor.x = kernelX.cols >> 1;
|
||||
@ -3534,7 +3534,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y);
|
||||
return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -63,7 +63,7 @@
|
||||
__constant float mat_kernel[] = { COEFF };
|
||||
|
||||
__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
@ -103,6 +103,6 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
|
||||
storepix(convertToDstT(sum), dst + start_addr);
|
||||
storepix(convertToDstT(sum + (srcT)(delta)), dst + start_addr);
|
||||
}
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ __constant float mat_kernelX[] = { KERNEL_MATRIX_X };
|
||||
__constant float mat_kernelY[] = { KERNEL_MATRIX_Y };
|
||||
|
||||
__kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int srcOffsetY, int height, int width,
|
||||
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
__global uchar* Dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
|
||||
{
|
||||
// RADIUSX, RADIUSY are filter dimensions
|
||||
// BLK_X, BLK_Y are local wrogroup sizes
|
||||
@ -182,6 +182,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
|
||||
for (i=0; i<=2*RADIUSX; i++)
|
||||
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
|
||||
|
||||
//store result into destination image
|
||||
storepix(convertToDstT(sum), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
|
||||
// store result into destination image
|
||||
storepix(convertToDstT(sum + (WT)(delta)), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
||||
int borderType;
|
||||
bool useRoi;
|
||||
Mat kernelX, kernelY;
|
||||
double delta;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
||||
@ -93,6 +94,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
anchor.x = anchor.y = -1;
|
||||
delta = randomDouble(-100, 100);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src);
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
|
||||
@ -110,8 +112,8 @@ OCL_TEST_P(SepFilter2D, Mat)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
|
||||
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
|
||||
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, delta, borderType));
|
||||
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, delta, borderType));
|
||||
|
||||
Near(1.0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user