mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
added 3-channels support to optimized version
This commit is contained in:
parent
63d8a61b9b
commit
2875ce60ea
@ -118,6 +118,8 @@ public:
|
|||||||
|
|
||||||
virtual int kind() const;
|
virtual int kind() const;
|
||||||
virtual int dims(int i=-1) const;
|
virtual int dims(int i=-1) const;
|
||||||
|
virtual int cols(int i=-1) const;
|
||||||
|
virtual int rows(int i=-1) const;
|
||||||
virtual Size size(int i=-1) const;
|
virtual Size size(int i=-1) const;
|
||||||
virtual int sizend(int* sz, int i=-1) const;
|
virtual int sizend(int* sz, int i=-1) const;
|
||||||
virtual bool sameSize(const _InputArray& arr) const;
|
virtual bool sameSize(const _InputArray& arr) const;
|
||||||
|
@ -1416,6 +1416,16 @@ int _InputArray::kind() const
|
|||||||
return flags & KIND_MASK;
|
return flags & KIND_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int _InputArray::rows(int i) const
|
||||||
|
{
|
||||||
|
return size(i).height;
|
||||||
|
}
|
||||||
|
|
||||||
|
int _InputArray::cols(int i) const
|
||||||
|
{
|
||||||
|
return size(i).width;
|
||||||
|
}
|
||||||
|
|
||||||
Size _InputArray::size(int i) const
|
Size _InputArray::size(int i) const
|
||||||
{
|
{
|
||||||
int k = kind();
|
int k = kind();
|
||||||
@ -2078,45 +2088,45 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
|
|||||||
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransposed, int fixedDepthMask) const
|
void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTransposed, int fixedDepthMask) const
|
||||||
{
|
{
|
||||||
int k = kind();
|
int k = kind();
|
||||||
if( k == MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
if( k == MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||||
{
|
{
|
||||||
CV_Assert(!fixedSize() || ((Mat*)obj)->size.operator()() == Size(cols, rows));
|
CV_Assert(!fixedSize() || ((Mat*)obj)->size.operator()() == Size(_cols, _rows));
|
||||||
CV_Assert(!fixedType() || ((Mat*)obj)->type() == mtype);
|
CV_Assert(!fixedType() || ((Mat*)obj)->type() == mtype);
|
||||||
((Mat*)obj)->create(rows, cols, mtype);
|
((Mat*)obj)->create(_rows, _cols, mtype);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||||
{
|
{
|
||||||
CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(cols, rows));
|
CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(_cols, _rows));
|
||||||
CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype);
|
CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype);
|
||||||
((UMat*)obj)->create(rows, cols, mtype);
|
((UMat*)obj)->create(_rows, _cols, mtype);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||||
{
|
{
|
||||||
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(cols, rows));
|
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
|
||||||
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
|
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
|
||||||
((cuda::GpuMat*)obj)->create(rows, cols, mtype);
|
((cuda::GpuMat*)obj)->create(_rows, _cols, mtype);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||||
{
|
{
|
||||||
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(cols, rows));
|
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows));
|
||||||
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
|
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
|
||||||
((ogl::Buffer*)obj)->create(rows, cols, mtype);
|
((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||||
{
|
{
|
||||||
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(cols, rows));
|
CV_Assert(!fixedSize() || ((cuda::CudaMem*)obj)->size() == Size(_cols, _rows));
|
||||||
CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
|
CV_Assert(!fixedType() || ((cuda::CudaMem*)obj)->type() == mtype);
|
||||||
((cuda::CudaMem*)obj)->create(rows, cols, mtype);
|
((cuda::CudaMem*)obj)->create(_rows, _cols, mtype);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int sizes[] = {rows, cols};
|
int sizes[] = {_rows, _cols};
|
||||||
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3428,7 +3428,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
|
|||||||
const int optimizedSepFilterLocalSize = 16;
|
const int optimizedSepFilterLocalSize = 16;
|
||||||
|
|
||||||
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||||
InputArray _row_kernel, InputArray _col_kernel,
|
Mat row_kernel, Mat col_kernel,
|
||||||
int borderType, int ddepth)
|
int borderType, int ddepth)
|
||||||
{
|
{
|
||||||
Size size = _src.size(), wholeSize;
|
Size size = _src.size(), wholeSize;
|
||||||
@ -3439,7 +3439,7 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
|||||||
size_t src_step = _src.step(), src_offset = _src.offset();
|
size_t src_step = _src.step(), src_offset = _src.offset();
|
||||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||||
|
|
||||||
if ((src_offset % src_step) % esz != 0 || (!doubleSupport && sdepth == CV_64F) ||
|
if ((src_offset % src_step) % esz != 0 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
|
||||||
!(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE ||
|
!(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE ||
|
||||||
borderType == BORDER_REFLECT || borderType == BORDER_WRAP ||
|
borderType == BORDER_REFLECT || borderType == BORDER_WRAP ||
|
||||||
borderType == BORDER_REFLECT_101))
|
borderType == BORDER_REFLECT_101))
|
||||||
@ -3454,10 +3454,10 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
|||||||
|
|
||||||
String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
|
String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d%s%s"
|
||||||
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
|
" -D srcT=%s -D convertToWT=%s -D WT=%s -D dstT=%s -D convertToDstT=%s"
|
||||||
" -D %s -D srcT1=%s -D dstT1=%s -D cn=%d", (int)lt2[0], (int)lt2[1],
|
" -D %s -D srcT1=%s -D dstT1=%s -D CN=%d", (int)lt2[0], (int)lt2[1],
|
||||||
_row_kernel.size().height / 2, _col_kernel.size().height / 2,
|
row_kernel.cols / 2, col_kernel.cols / 2,
|
||||||
ocl::kernelToStr(_row_kernel, CV_32F, "KERNEL_MATRIX_X").c_str(),
|
ocl::kernelToStr(row_kernel, CV_32F, "KERNEL_MATRIX_X").c_str(),
|
||||||
ocl::kernelToStr(_col_kernel, CV_32F, "KERNEL_MATRIX_Y").c_str(),
|
ocl::kernelToStr(col_kernel, CV_32F, "KERNEL_MATRIX_Y").c_str(),
|
||||||
ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
|
ocl::typeToStr(stype), ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
|
||||||
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype),
|
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), ocl::typeToStr(dtype),
|
||||||
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType],
|
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), borderMap[borderType],
|
||||||
@ -3486,12 +3486,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
InputArray _kernelX, InputArray _kernelY, Point anchor,
|
InputArray _kernelX, InputArray _kernelY, Point anchor,
|
||||||
double delta, int borderType )
|
double delta, int borderType )
|
||||||
{
|
{
|
||||||
|
const ocl::Device & d = ocl::Device::getDefault();
|
||||||
Size imgSize = _src.size();
|
Size imgSize = _src.size();
|
||||||
|
|
||||||
if (abs(delta)> FLT_MIN)
|
if (abs(delta)> FLT_MIN)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
if (cn > 4)
|
if (cn > 4)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -3502,21 +3503,21 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
if (kernelY.cols % 2 != 1)
|
if (kernelY.cols % 2 != 1)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int sdepth = CV_MAT_DEPTH(type);
|
if (ddepth < 0)
|
||||||
|
ddepth = sdepth;
|
||||||
|
|
||||||
|
CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
|
||||||
|
imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) &&
|
||||||
|
imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) &&
|
||||||
|
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) &&
|
||||||
|
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
|
||||||
|
ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, borderType, ddepth), true)
|
||||||
|
|
||||||
if (anchor.x < 0)
|
if (anchor.x < 0)
|
||||||
anchor.x = kernelX.cols >> 1;
|
anchor.x = kernelX.cols >> 1;
|
||||||
if (anchor.y < 0)
|
if (anchor.y < 0)
|
||||||
anchor.y = kernelY.cols >> 1;
|
anchor.y = kernelY.cols >> 1;
|
||||||
|
|
||||||
if (ddepth < 0)
|
|
||||||
ddepth = sdepth;
|
|
||||||
|
|
||||||
CV_OCL_RUN_(kernelY.rows <= 21 && kernelX.rows <= 21 &&
|
|
||||||
imgSize.width > optimizedSepFilterLocalSize + (kernelX.rows >> 1) &&
|
|
||||||
imgSize.height > optimizedSepFilterLocalSize + (kernelY.rows >> 1) &&
|
|
||||||
(borderType & BORDER_ISOLATED) != 0,
|
|
||||||
ocl_sepFilter2D_SinglePass(_src, _dst, _kernelX, _kernelY, borderType, ddepth), true)
|
|
||||||
|
|
||||||
UMat src = _src.getUMat();
|
UMat src = _src.getUMat();
|
||||||
Size srcWholeSize; Point srcOffset;
|
Size srcWholeSize; Point srcOffset;
|
||||||
src.locateROI(srcWholeSize, srcOffset);
|
src.locateROI(srcWholeSize, srcOffset);
|
||||||
|
@ -74,8 +74,19 @@
|
|||||||
#error No extrapolation method
|
#error No extrapolation method
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SRC(_x,_y) convertToWT(((global srcT*)(Src+(_y)*src_step))[_x])
|
#if CN != 3
|
||||||
#define DST(_x,_y) (((global dstT*)(Dst+dst_offset+(_y)*dst_step))[_x])
|
#define loadpix(addr) *(__global const srcT *)(addr)
|
||||||
|
#define storepix(val, addr) *(__global dstT *)(addr) = val
|
||||||
|
#define SRCSIZE (int)sizeof(srcT)
|
||||||
|
#define DSTSIZE (int)sizeof(dstT)
|
||||||
|
#else
|
||||||
|
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
|
||||||
|
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
|
||||||
|
#define SRCSIZE (int)sizeof(srcT1)*3
|
||||||
|
#define DSTSIZE (int)sizeof(dstT1)*3
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SRC(_x,_y) convertToWT(loadpix(Src + mad24(_y, src_step, SRCSIZE * _x)))
|
||||||
|
|
||||||
#ifdef BORDER_CONSTANT
|
#ifdef BORDER_CONSTANT
|
||||||
// CCCCCC|abcdefgh|CCCCCCC
|
// CCCCCC|abcdefgh|CCCCCCC
|
||||||
@ -172,5 +183,5 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int
|
|||||||
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
|
sum = mad(lsmemDy[liy][lix+i], mat_kernelX[i], sum);
|
||||||
|
|
||||||
//store result into destination image
|
//store result into destination image
|
||||||
DST(x,y) = convertToDstT(sum);
|
storepix(convertToDstT(sum), Dst + mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset)));
|
||||||
}
|
}
|
||||||
|
@ -86,16 +86,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
|||||||
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
|
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
|
||||||
|
|
||||||
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
|
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
|
||||||
int rest = roiSize.width % 4;
|
|
||||||
if (rest != 0)
|
|
||||||
roiSize.width += (4 - rest);
|
|
||||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||||
rest = srcBorder.lef % 4;
|
|
||||||
if (rest != 0)
|
|
||||||
srcBorder.lef += (4 - rest);
|
|
||||||
rest = srcBorder.rig % 4;
|
|
||||||
if (rest != 0)
|
|
||||||
srcBorder.rig += (4 - rest);
|
|
||||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
|
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
|
||||||
|
|
||||||
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||||
@ -115,7 +106,7 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
|||||||
|
|
||||||
OCL_TEST_P(SepFilter2D, Mat)
|
OCL_TEST_P(SepFilter2D, Mat)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < test_loop_times + 1; j++)
|
for (int j = 0; j < test_loop_times + 3; j++)
|
||||||
{
|
{
|
||||||
random_roi();
|
random_roi();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user