mirror of
https://github.com/opencv/opencv.git
synced 2025-07-31 01:47:12 +08:00
imgproc: dispatch box_filter
This commit is contained in:
parent
ce3c92eb1f
commit
5a01227aa1
@ -1,6 +1,7 @@
|
||||
set(the_description "Image Processing")
|
||||
ocv_add_dispatched_file(accum SSE4_1 AVX AVX2)
|
||||
ocv_add_dispatched_file(bilateral_filter SSE2 AVX2)
|
||||
ocv_add_dispatched_file(box_filter SSE2 SSE4_1 AVX2)
|
||||
ocv_add_dispatched_file(filter SSE2 SSE4_1 AVX2)
|
||||
ocv_add_dispatched_file(color_hsv SSE2 SSE4_1 AVX2)
|
||||
ocv_add_dispatched_file(color_rgb SSE2 SSE4_1 AVX2)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -42,21 +42,25 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include "opencl_kernels_imgproc.hpp"
|
||||
|
||||
#include "opencv2/core/openvx/ovx_defs.hpp"
|
||||
namespace cv {
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
// forward declarations
|
||||
Ptr<BaseRowFilter> getRowSumFilter(int srcType, int sumType, int ksize, int anchor);
|
||||
Ptr<BaseColumnFilter> getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale);
|
||||
Ptr<FilterEngine> createBoxFilter(int srcType, int dstType, Size ksize,
|
||||
Point anchor, bool normalize, int borderType);
|
||||
|
||||
namespace cv
|
||||
{
|
||||
Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor);
|
||||
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
/****************************************************************************************\
|
||||
Box Filter
|
||||
\****************************************************************************************/
|
||||
|
||||
namespace {
|
||||
template<typename T, typename ST>
|
||||
struct RowSum :
|
||||
public BaseRowFilter
|
||||
@ -70,6 +74,8 @@ struct RowSum :
|
||||
|
||||
virtual void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
const T* S = (const T*)src;
|
||||
ST* D = (ST*)dst;
|
||||
int i = 0, k, ksz_cn = ksize*cn;
|
||||
@ -183,6 +189,8 @@ struct ColumnSum :
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int i;
|
||||
ST* SUM;
|
||||
bool haveScale = scale != 1;
|
||||
@ -281,6 +289,8 @@ struct ColumnSum<int, uchar> :
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int* SUM;
|
||||
bool haveScale = scale != 1;
|
||||
double _scale = scale;
|
||||
@ -408,9 +418,6 @@ struct ColumnSum<int, uchar> :
|
||||
}
|
||||
dst += dststep;
|
||||
}
|
||||
#if CV_SIMD
|
||||
vx_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
double scale;
|
||||
@ -452,6 +459,8 @@ public BaseColumnFilter
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
const int ds = divScale;
|
||||
const int dd = divDelta;
|
||||
ushort* SUM;
|
||||
@ -586,9 +595,6 @@ public BaseColumnFilter
|
||||
}
|
||||
dst += dststep;
|
||||
}
|
||||
#if CV_SIMD
|
||||
vx_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
double scale;
|
||||
@ -616,6 +622,8 @@ struct ColumnSum<int, short> :
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int i;
|
||||
int* SUM;
|
||||
bool haveScale = scale != 1;
|
||||
@ -739,9 +747,6 @@ struct ColumnSum<int, short> :
|
||||
}
|
||||
dst += dststep;
|
||||
}
|
||||
#if CV_SIMD
|
||||
vx_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
double scale;
|
||||
@ -767,6 +772,8 @@ struct ColumnSum<int, ushort> :
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int* SUM;
|
||||
bool haveScale = scale != 1;
|
||||
double _scale = scale;
|
||||
@ -888,9 +895,6 @@ struct ColumnSum<int, ushort> :
|
||||
}
|
||||
dst += dststep;
|
||||
}
|
||||
#if CV_SIMD
|
||||
vx_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
double scale;
|
||||
@ -915,6 +919,8 @@ struct ColumnSum<int, int> :
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int* SUM;
|
||||
bool haveScale = scale != 1;
|
||||
double _scale = scale;
|
||||
@ -1022,9 +1028,6 @@ struct ColumnSum<int, int> :
|
||||
}
|
||||
dst += dststep;
|
||||
}
|
||||
#if CV_SIMD
|
||||
vx_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
double scale;
|
||||
@ -1050,6 +1053,8 @@ struct ColumnSum<int, float> :
|
||||
|
||||
virtual void operator()(const uchar** src, uchar* dst, int dststep, int count, int width) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int* SUM;
|
||||
bool haveScale = scale != 1;
|
||||
double _scale = scale;
|
||||
@ -1154,9 +1159,6 @@ struct ColumnSum<int, float> :
|
||||
}
|
||||
dst += dststep;
|
||||
}
|
||||
#if CV_SIMD
|
||||
vx_cleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
double scale;
|
||||
@ -1164,243 +1166,13 @@ struct ColumnSum<int, float> :
|
||||
std::vector<int> sum;
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
} // namespace anon
|
||||
|
||||
static bool ocl_boxFilter3x3_8UC1( InputArray _src, OutputArray _dst, int ddepth,
|
||||
Size ksize, Point anchor, int borderType, bool normalize )
|
||||
|
||||
Ptr<BaseRowFilter> getRowSumFilter(int srcType, int sumType, int ksize, int anchor)
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
if (ddepth < 0)
|
||||
ddepth = sdepth;
|
||||
|
||||
if (anchor.x < 0)
|
||||
anchor.x = ksize.width / 2;
|
||||
if (anchor.y < 0)
|
||||
anchor.y = ksize.height / 2;
|
||||
|
||||
if ( !(dev.isIntel() && (type == CV_8UC1) &&
|
||||
(_src.offset() == 0) && (_src.step() % 4 == 0) &&
|
||||
(_src.cols() % 16 == 0) && (_src.rows() % 2 == 0) &&
|
||||
(anchor.x == 1) && (anchor.y == 1) &&
|
||||
(ksize.width == 3) && (ksize.height == 3)) )
|
||||
return false;
|
||||
|
||||
float alpha = 1.0f / (ksize.height * ksize.width);
|
||||
Size size = _src.size();
|
||||
size_t globalsize[2] = { 0, 0 };
|
||||
size_t localsize[2] = { 0, 0 };
|
||||
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
|
||||
|
||||
globalsize[0] = size.width / 16;
|
||||
globalsize[1] = size.height / 2;
|
||||
|
||||
char build_opts[1024];
|
||||
sprintf(build_opts, "-D %s %s", borderMap[borderType], normalize ? "-D NORMALIZE" : "");
|
||||
|
||||
ocl::Kernel kernel("boxFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::boxFilter3x3_oclsrc, build_opts);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(size, CV_MAKETYPE(ddepth, cn));
|
||||
if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
|
||||
return false;
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
|
||||
idxArg = kernel.set(idxArg, (int)src.step);
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
idxArg = kernel.set(idxArg, (int)dst.step);
|
||||
idxArg = kernel.set(idxArg, (int)dst.rows);
|
||||
idxArg = kernel.set(idxArg, (int)dst.cols);
|
||||
if (normalize)
|
||||
idxArg = kernel.set(idxArg, (float)alpha);
|
||||
|
||||
return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
|
||||
}
|
||||
|
||||
static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
|
||||
Size ksize, Point anchor, int borderType, bool normalize, bool sqr = false )
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
|
||||
bool doubleSupport = dev.doubleFPConfig() > 0;
|
||||
|
||||
if (ddepth < 0)
|
||||
ddepth = sdepth;
|
||||
|
||||
if (cn > 4 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
|
||||
_src.offset() % esz != 0 || _src.step() % esz != 0)
|
||||
return false;
|
||||
|
||||
if (anchor.x < 0)
|
||||
anchor.x = ksize.width / 2;
|
||||
if (anchor.y < 0)
|
||||
anchor.y = ksize.height / 2;
|
||||
|
||||
int computeUnits = ocl::Device::getDefault().maxComputeUnits();
|
||||
float alpha = 1.0f / (ksize.height * ksize.width);
|
||||
Size size = _src.size(), wholeSize;
|
||||
bool isolated = (borderType & BORDER_ISOLATED) != 0;
|
||||
borderType &= ~BORDER_ISOLATED;
|
||||
int wdepth = std::max(CV_32F, std::max(ddepth, sdepth)),
|
||||
wtype = CV_MAKE_TYPE(wdepth, cn), dtype = CV_MAKE_TYPE(ddepth, cn);
|
||||
|
||||
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
|
||||
size_t globalsize[2] = { (size_t)size.width, (size_t)size.height };
|
||||
size_t localsize_general[2] = { 0, 1 }, * localsize = NULL;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
if (!isolated)
|
||||
{
|
||||
Point ofs;
|
||||
src.locateROI(wholeSize, ofs);
|
||||
}
|
||||
|
||||
int h = isolated ? size.height : wholeSize.height;
|
||||
int w = isolated ? size.width : wholeSize.width;
|
||||
|
||||
size_t maxWorkItemSizes[32];
|
||||
ocl::Device::getDefault().maxWorkItemSizes(maxWorkItemSizes);
|
||||
int tryWorkItems = (int)maxWorkItemSizes[0];
|
||||
|
||||
ocl::Kernel kernel;
|
||||
|
||||
if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
|
||||
((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
|
||||
(ksize.width == 5 && ksize.height == 5 && cn == 1)))
|
||||
{
|
||||
if (w < ksize.width || h < ksize.height)
|
||||
return false;
|
||||
|
||||
// Figure out what vector size to use for loading the pixels.
|
||||
int pxLoadNumPixels = cn != 1 || size.width % 4 ? 1 : 4;
|
||||
int pxLoadVecSize = cn * pxLoadNumPixels;
|
||||
|
||||
// Figure out how many pixels per work item to compute in X and Y
|
||||
// directions. Too many and we run out of registers.
|
||||
int pxPerWorkItemX = 1, pxPerWorkItemY = 1;
|
||||
if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4)
|
||||
{
|
||||
pxPerWorkItemX = size.width % 8 ? size.width % 4 ? size.width % 2 ? 1 : 2 : 4 : 8;
|
||||
pxPerWorkItemY = size.height % 2 ? 1 : 2;
|
||||
}
|
||||
else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4))
|
||||
{
|
||||
pxPerWorkItemX = size.width % 2 ? 1 : 2;
|
||||
pxPerWorkItemY = size.height % 2 ? 1 : 2;
|
||||
}
|
||||
globalsize[0] = size.width / pxPerWorkItemX;
|
||||
globalsize[1] = size.height / pxPerWorkItemY;
|
||||
|
||||
// Need some padding in the private array for pixels
|
||||
int privDataWidth = roundUp(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels);
|
||||
|
||||
// Make the global size a nice round number so the runtime can pick
|
||||
// from reasonable choices for the workgroup size
|
||||
const int wgRound = 256;
|
||||
globalsize[0] = roundUp(globalsize[0], wgRound);
|
||||
|
||||
char build_options[1024], cvt[2][40];
|
||||
sprintf(build_options, "-D cn=%d "
|
||||
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
|
||||
"-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d "
|
||||
"-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
|
||||
"-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
|
||||
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
|
||||
"-D convertToWT=%s -D convertToDstT=%s%s%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D OP_BOX_FILTER",
|
||||
cn, anchor.x, anchor.y, ksize.width, ksize.height,
|
||||
pxLoadVecSize, pxLoadNumPixels,
|
||||
pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType],
|
||||
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
|
||||
privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1,
|
||||
ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype),
|
||||
ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth),
|
||||
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
|
||||
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
|
||||
normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "",
|
||||
ocl::typeToStr(CV_MAKE_TYPE(wdepth, pxLoadVecSize)) //PX_LOAD_FLOAT_VEC_CONV
|
||||
);
|
||||
|
||||
|
||||
if (!kernel.create("filterSmall", cv::ocl::imgproc::filterSmall_oclsrc, build_options))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
localsize = localsize_general;
|
||||
for ( ; ; )
|
||||
{
|
||||
int BLOCK_SIZE_X = tryWorkItems, BLOCK_SIZE_Y = std::min(ksize.height * 10, size.height);
|
||||
|
||||
while (BLOCK_SIZE_X > 32 && BLOCK_SIZE_X >= ksize.width * 2 && BLOCK_SIZE_X > size.width * 2)
|
||||
BLOCK_SIZE_X /= 2;
|
||||
while (BLOCK_SIZE_Y < BLOCK_SIZE_X / 8 && BLOCK_SIZE_Y * computeUnits * 32 < size.height)
|
||||
BLOCK_SIZE_Y *= 2;
|
||||
|
||||
if (ksize.width > BLOCK_SIZE_X || w < ksize.width || h < ksize.height)
|
||||
return false;
|
||||
|
||||
char cvt[2][50];
|
||||
String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
|
||||
" -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
|
||||
" -D ST1=%s -D DT1=%s -D cn=%d",
|
||||
BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)),
|
||||
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
|
||||
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]),
|
||||
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]),
|
||||
anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType],
|
||||
isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "",
|
||||
ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn);
|
||||
|
||||
localsize[0] = BLOCK_SIZE_X;
|
||||
globalsize[0] = divUp(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X;
|
||||
globalsize[1] = divUp(size.height, BLOCK_SIZE_Y);
|
||||
|
||||
kernel.create("boxFilter", cv::ocl::imgproc::boxFilter_oclsrc, opts);
|
||||
if (kernel.empty())
|
||||
return false;
|
||||
|
||||
size_t kernelWorkGroupSize = kernel.workGroupSize();
|
||||
if (localsize[0] <= kernelWorkGroupSize)
|
||||
break;
|
||||
if (BLOCK_SIZE_X < (int)kernelWorkGroupSize)
|
||||
return false;
|
||||
|
||||
tryWorkItems = (int)kernelWorkGroupSize;
|
||||
}
|
||||
}
|
||||
|
||||
_dst.create(size, CV_MAKETYPE(ddepth, cn));
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
|
||||
idxArg = kernel.set(idxArg, (int)src.step);
|
||||
int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
|
||||
int srcOffsetY = (int)(src.offset / src.step);
|
||||
int srcEndX = isolated ? srcOffsetX + size.width : wholeSize.width;
|
||||
int srcEndY = isolated ? srcOffsetY + size.height : wholeSize.height;
|
||||
idxArg = kernel.set(idxArg, srcOffsetX);
|
||||
idxArg = kernel.set(idxArg, srcOffsetY);
|
||||
idxArg = kernel.set(idxArg, srcEndX);
|
||||
idxArg = kernel.set(idxArg, srcEndY);
|
||||
idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst));
|
||||
if (normalize)
|
||||
idxArg = kernel.set(idxArg, (float)alpha);
|
||||
|
||||
return kernel.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
cv::Ptr<cv::BaseRowFilter> cv::getRowSumFilter(int srcType, int sumType, int ksize, int anchor)
|
||||
{
|
||||
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
|
||||
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
|
||||
|
||||
@ -1434,9 +1206,10 @@ cv::Ptr<cv::BaseRowFilter> cv::getRowSumFilter(int srcType, int sumType, int ksi
|
||||
}
|
||||
|
||||
|
||||
cv::Ptr<cv::BaseColumnFilter> cv::getColumnSumFilter(int sumType, int dstType, int ksize,
|
||||
int anchor, double scale)
|
||||
Ptr<BaseColumnFilter> getColumnSumFilter(int sumType, int dstType, int ksize, int anchor, double scale)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int sdepth = CV_MAT_DEPTH(sumType), ddepth = CV_MAT_DEPTH(dstType);
|
||||
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(dstType) );
|
||||
|
||||
@ -1474,9 +1247,11 @@ cv::Ptr<cv::BaseColumnFilter> cv::getColumnSumFilter(int sumType, int dstType, i
|
||||
}
|
||||
|
||||
|
||||
cv::Ptr<cv::FilterEngine> cv::createBoxFilter( int srcType, int dstType, Size ksize,
|
||||
Point anchor, bool normalize, int borderType )
|
||||
Ptr<FilterEngine> createBoxFilter(int srcType, int dstType, Size ksize,
|
||||
Point anchor, bool normalize, int borderType)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int sdepth = CV_MAT_DEPTH(srcType);
|
||||
int cn = CV_MAT_CN(srcType), sumType = CV_64F;
|
||||
if( sdepth == CV_8U && CV_MAT_DEPTH(dstType) == CV_8U &&
|
||||
@ -1496,199 +1271,12 @@ cv::Ptr<cv::FilterEngine> cv::createBoxFilter( int srcType, int dstType, Size ks
|
||||
srcType, dstType, sumType, borderType );
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENVX
|
||||
namespace cv
|
||||
{
|
||||
namespace ovx {
|
||||
template <> inline bool skipSmallImages<VX_KERNEL_BOX_3x3>(int w, int h) { return w*h < 640 * 480; }
|
||||
}
|
||||
static bool openvx_boxfilter(InputArray _src, OutputArray _dst, int ddepth,
|
||||
Size ksize, Point anchor,
|
||||
bool normalize, int borderType)
|
||||
{
|
||||
if (ddepth < 0)
|
||||
ddepth = CV_8UC1;
|
||||
if (_src.type() != CV_8UC1 || ddepth != CV_8U || !normalize ||
|
||||
_src.cols() < 3 || _src.rows() < 3 ||
|
||||
ksize.width != 3 || ksize.height != 3 ||
|
||||
(anchor.x >= 0 && anchor.x != 1) ||
|
||||
(anchor.y >= 0 && anchor.y != 1) ||
|
||||
ovx::skipSmallImages<VX_KERNEL_BOX_3x3>(_src.cols(), _src.rows()))
|
||||
return false;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
|
||||
if ((borderType & BORDER_ISOLATED) == 0 && src.isSubmatrix())
|
||||
return false; //Process isolated borders only
|
||||
vx_enum border;
|
||||
switch (borderType & ~BORDER_ISOLATED)
|
||||
{
|
||||
case BORDER_CONSTANT:
|
||||
border = VX_BORDER_CONSTANT;
|
||||
break;
|
||||
case BORDER_REPLICATE:
|
||||
border = VX_BORDER_REPLICATE;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
_dst.create(src.size(), CV_8UC1);
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
try
|
||||
{
|
||||
ivx::Context ctx = ovx::getOpenVXContext();
|
||||
|
||||
Mat a;
|
||||
if (dst.data != src.data)
|
||||
a = src;
|
||||
else
|
||||
src.copyTo(a);
|
||||
|
||||
ivx::Image
|
||||
ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
|
||||
ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data),
|
||||
ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
|
||||
ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data);
|
||||
|
||||
//ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
|
||||
//since OpenVX standard says nothing about thread-safety for now
|
||||
ivx::border_t prevBorder = ctx.immediateBorder();
|
||||
ctx.setImmediateBorder(border, (vx_uint8)(0));
|
||||
ivx::IVX_CHECK_STATUS(vxuBox3x3(ctx, ia, ib));
|
||||
ctx.setImmediateBorder(prevBorder);
|
||||
}
|
||||
catch (const ivx::RuntimeError & e)
|
||||
{
|
||||
VX_DbgThrow(e.what());
|
||||
}
|
||||
catch (const ivx::WrapperError & e)
|
||||
{
|
||||
VX_DbgThrow(e.what());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_IPP)
|
||||
namespace cv
|
||||
{
|
||||
static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
#if IPP_VERSION_X100 < 201801
|
||||
// Problem with SSE42 optimization for 16s and some 8u modes
|
||||
if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5))))
|
||||
return false;
|
||||
|
||||
// Other optimizations has some degradations too
|
||||
if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5))))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
if(!normalize)
|
||||
return false;
|
||||
|
||||
if(!ippiCheckAnchor(anchor, ksize))
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
::ipp::IwiSize iwKSize = ippiGetSize(ksize);
|
||||
::ipp::IwiBorderSize borderSize(iwKSize);
|
||||
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
|
||||
if(!ippBorder)
|
||||
return false;
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder);
|
||||
}
|
||||
catch (const ::ipp::IwException &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
|
||||
Size ksize, Point anchor,
|
||||
bool normalize, int borderType )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat() &&
|
||||
(borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT ||
|
||||
borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101),
|
||||
ocl_boxFilter3x3_8UC1(_src, _dst, ddepth, ksize, anchor, borderType, normalize))
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat(), ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype);
|
||||
if( ddepth < 0 )
|
||||
ddepth = sdepth;
|
||||
_dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
|
||||
Mat dst = _dst.getMat();
|
||||
if( borderType != BORDER_CONSTANT && normalize && (borderType & BORDER_ISOLATED) != 0 )
|
||||
{
|
||||
if( src.rows == 1 )
|
||||
ksize.height = 1;
|
||||
if( src.cols == 1 )
|
||||
ksize.width = 1;
|
||||
}
|
||||
|
||||
Point ofs;
|
||||
Size wsz(src.cols, src.rows);
|
||||
if(!(borderType&BORDER_ISOLATED))
|
||||
src.locateROI( wsz, ofs );
|
||||
|
||||
CALL_HAL(boxFilter, cv_hal_boxFilter, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, ddepth, cn,
|
||||
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
|
||||
anchor.x, anchor.y, normalize, borderType&~BORDER_ISOLATED);
|
||||
|
||||
CV_OVX_RUN(true,
|
||||
openvx_boxfilter(src, dst, ddepth, ksize, anchor, normalize, borderType))
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_boxfilter(src, dst, ksize, anchor, normalize, borderType));
|
||||
|
||||
borderType = (borderType&~BORDER_ISOLATED);
|
||||
|
||||
Ptr<FilterEngine> f = createBoxFilter( src.type(), dst.type(),
|
||||
ksize, anchor, normalize, borderType );
|
||||
|
||||
f->apply( src, dst, wsz, ofs );
|
||||
}
|
||||
|
||||
|
||||
void cv::blur( InputArray src, OutputArray dst,
|
||||
Size ksize, Point anchor, int borderType )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
boxFilter( src, dst, -1, ksize, anchor, true, borderType );
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
Squared Box Filter
|
||||
\****************************************************************************************/
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace {
|
||||
|
||||
template<typename T, typename ST>
|
||||
struct SqrRowSum :
|
||||
@ -1703,6 +1291,8 @@ struct SqrRowSum :
|
||||
|
||||
virtual void operator()(const uchar* src, uchar* dst, int width, int cn) CV_OVERRIDE
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
const T* S = (const T*)src;
|
||||
ST* D = (ST*)dst;
|
||||
int i = 0, k, ksz_cn = ksize*cn;
|
||||
@ -1727,7 +1317,9 @@ struct SqrRowSum :
|
||||
}
|
||||
};
|
||||
|
||||
static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
|
||||
} // namespace anon
|
||||
|
||||
Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
|
||||
{
|
||||
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
|
||||
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
|
||||
@ -1753,52 +1345,6 @@ static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize
|
||||
srcType, sumType));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
|
||||
Size ksize, Point anchor,
|
||||
bool normalize, int borderType )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int srcType = _src.type(), sdepth = CV_MAT_DEPTH(srcType), cn = CV_MAT_CN(srcType);
|
||||
Size size = _src.size();
|
||||
|
||||
if( ddepth < 0 )
|
||||
ddepth = sdepth < CV_32F ? CV_32F : CV_64F;
|
||||
|
||||
if( borderType != BORDER_CONSTANT && normalize )
|
||||
{
|
||||
if( size.height == 1 )
|
||||
ksize.height = 1;
|
||||
if( size.width == 1 )
|
||||
ksize.width = 1;
|
||||
}
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
|
||||
ocl_boxFilter(_src, _dst, ddepth, ksize, anchor, borderType, normalize, true))
|
||||
|
||||
int sumDepth = CV_64F;
|
||||
if( sdepth == CV_8U )
|
||||
sumDepth = CV_32S;
|
||||
int sumType = CV_MAKETYPE( sumDepth, cn ), dstType = CV_MAKETYPE(ddepth, cn);
|
||||
|
||||
Mat src = _src.getMat();
|
||||
_dst.create( size, dstType );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
Ptr<BaseRowFilter> rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x );
|
||||
Ptr<BaseColumnFilter> columnFilter = getColumnSumFilter(sumType,
|
||||
dstType, ksize.height, anchor.y,
|
||||
normalize ? 1./(ksize.width*ksize.height) : 1);
|
||||
|
||||
Ptr<FilterEngine> f = makePtr<FilterEngine>(Ptr<BaseFilter>(), rowFilter, columnFilter,
|
||||
srcType, dstType, sumType, borderType );
|
||||
Point ofs;
|
||||
Size wsz(src.cols, src.rows);
|
||||
src.locateROI( wsz, ofs );
|
||||
|
||||
f->apply( src, dst, wsz, ofs );
|
||||
}
|
||||
|
||||
/* End of file. */
|
||||
#endif
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user