Merge pull request #1360 from vpisarev:24_ipp_dft_canny_morphrect

This commit is contained in:
Roman Donchenko 2013-08-30 17:55:07 +04:00 committed by OpenCV Buildbot
commit 46d74d7c6b
3 changed files with 154 additions and 81 deletions

View File

@ -50,6 +50,13 @@ namespace cv
# pragma warning(disable: 4748)
#endif
#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7
#define USE_IPP_DFT 1
#else
#undef USE_IPP_DFT
#endif
/****************************************************************************************\
Discrete Fourier Transform
\****************************************************************************************/
@ -455,7 +462,7 @@ template<> struct DFT_VecR4<float>
#endif
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
static void ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
const void* spec, uchar* buf)
{
@ -517,7 +524,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
int nf, const int* factors, const int* itab,
const Complex<T>* wave, int tab_size,
const void*
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
spec
#endif
, Complex<T>* buf,
@ -537,7 +544,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
T scale = (T)_scale;
int tab_step;
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
if( spec )
{
if( !inv )
@ -957,7 +964,7 @@ DFT( const Complex<T>* src, Complex<T>* dst, int n,
template<typename T> static void
RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
const Complex<T>* wave, int tab_size, const void*
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
spec
#endif
,
@ -968,11 +975,18 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
int j, n2 = n >> 1;
dst += complex_output;
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
if( spec )
{
ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf );
goto finalize;
if( complex_output )
{
dst[-1] = dst[0];
dst[0] = 0;
if( (n & 1) == 0 )
dst[n] = 0;
}
return;
}
#endif
assert( tab_size == n );
@ -1056,14 +1070,10 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
}
}
#ifdef HAVE_IPP
finalize:
#endif
if( complex_output && (n & 1) == 0 )
{
dst[-1] = dst[0];
dst[0] = 0;
if( (n & 1) == 0 )
dst[n] = 0;
}
}
@ -1076,7 +1086,7 @@ template<typename T> static void
CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
const Complex<T>* wave, int tab_size,
const void*
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
spec
#endif
, Complex<T>* buf,
@ -1097,7 +1107,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
((T*)src)[1] = src[0];
src++;
}
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
if( spec )
{
ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf );
@ -1225,7 +1235,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
}
}
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
finalize:
#endif
if( complex_input )
@ -1458,7 +1468,7 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac
}
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
#endif
@ -1486,7 +1496,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
int factors[34];
bool inplace_transform = false;
#ifdef HAVE_IPP
#ifdef USE_IPP_DFT
AutoBuffer<uchar> ippbuf;
int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
#endif
@ -1546,12 +1556,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
}
spec = 0;
#ifdef HAVE_IPP
if(
#if IPP_VERSION_MAJOR >= 7
depth == CV_32F && // IPP 7.x and 8.0 have bug somewhere in double-precision DFT
#endif
len*count >= 64 ) // use IPP DFT if available
#ifdef USE_IPP_DFT
if( len*count >= 64 ) // use IPP DFT if available
{
int specsize=0, initsize=0, worksize=0;
IppDFTGetSizeFunc getSizeFunc = 0;

View File

@ -41,6 +41,50 @@
#include "precomp.hpp"
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
#define USE_IPP_CANNY 1
#else
#undef USE_IPP_CANNY
#endif
#ifdef USE_IPP_CANNY
namespace cv
{
static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high)
{
int size = 0, size1 = 0;
IppiSize roi = { _src.cols, _src.rows };
ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size);
ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1);
size = std::max(size, size1);
ippiCannyGetSize(roi, &size1);
size = std::max(size, size1);
AutoBuffer<uchar> buf(size + 64);
uchar* buffer = alignPtr((uchar*)buf, 32);
Mat _dx(_src.rows, _src.cols, CV_16S);
if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.data, (int)_src.step,
_dx.ptr<short>(), (int)_dx.step, roi,
ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
return false;
Mat _dy(_src.rows, _src.cols, CV_16S);
if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.data, (int)_src.step,
_dy.ptr<short>(), (int)_dy.step, roi,
ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
return false;
if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
_dy.ptr<short>(), (int)_dy.step,
_dst.data, (int)_dst.step, roi, low, high, buffer) < 0 )
return false;
return true;
}
}
#endif
void cv::Canny( InputArray _src, OutputArray _dst,
double low_thresh, double high_thresh,
int aperture_size, bool L2gradient )
@ -61,20 +105,26 @@ void cv::Canny( InputArray _src, OutputArray _dst,
if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
CV_Error(CV_StsBadFlag, "");
if (low_thresh > high_thresh)
std::swap(low_thresh, high_thresh);
#ifdef HAVE_TEGRA_OPTIMIZATION
if (tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient))
return;
#endif
#ifdef USE_IPP_CANNY
if( aperture_size == 3 && !L2gradient &&
ippCanny(src, dst, low_thresh, high_thresh) >= 0 )
return;
#endif
const int cn = src.channels();
cv::Mat dx(src.rows, src.cols, CV_16SC(cn));
cv::Mat dy(src.rows, src.cols, CV_16SC(cn));
Mat dx(src.rows, src.cols, CV_16SC(cn));
Mat dy(src.rows, src.cols, CV_16SC(cn));
cv::Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE);
cv::Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE);
if (low_thresh > high_thresh)
std::swap(low_thresh, high_thresh);
Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE);
Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE);
if (L2gradient)
{
@ -88,7 +138,7 @@ void cv::Canny( InputArray _src, OutputArray _dst,
int high = cvFloor(high_thresh);
ptrdiff_t mapstep = src.cols + 2;
cv::AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
AutoBuffer<uchar> buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
int* mag_buf[3];
mag_buf[0] = (int*)(uchar*)buffer;

View File

@ -1137,7 +1137,8 @@ private:
};
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, const Point &anchor)
static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel,
const Size& ksize, const Point &anchor, bool rectKernel)
{
int type = src.type();
const Mat* _src = &src;
@ -1149,55 +1150,65 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne
}
//DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation.
typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **);
ippiMorphologyInitAllocFunc ippInitAllocFunc =
type == CV_8UC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C1R :
type == CV_8UC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C3R :
type == CV_8UC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C4R :
type == CV_32FC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C1R :
type == CV_32FC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C3R :
type == CV_32FC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C4R :
0;
typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, IppiSize, IppiBorderType, IppiMorphState *);
ippiMorphologyBorderReplicateFunc ippFunc = 0;
switch( op )
typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int,
IppiSize, IppiBorderType, IppiMorphState *);
typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*);
typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int,
IppiSize, IppiSize, IppiPoint, void*);
ippiMorphologyInitAllocFunc initAllocFunc = 0;
ippiMorphologyBorderReplicateFunc morphFunc = 0;
ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0;
ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0;
#define IPP_MORPH_CASE(type, flavor) \
case type: \
initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \
morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \
(ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \
getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \
morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \
(ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \
break
switch( type )
{
case MORPH_DILATE:
{
ippFunc =
type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C1R :
type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C3R :
type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C4R :
type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C1R :
type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C3R :
type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C4R :
0;
break;
IPP_MORPH_CASE(CV_8UC1, 8u_C1R);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R);
default:
return false;
}
case MORPH_ERODE:
{
ippFunc =
type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C1R :
type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C3R :
type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C4R :
type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C1R :
type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C3R :
type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C4R :
0;
break;
}
}
if( ippFunc && ippInitAllocFunc)
#undef IPP_MORPH_CASE
IppiSize roiSize = {src.cols, src.rows};
IppiSize kernelSize = {ksize.width, ksize.height};
IppiPoint point = {anchor.x, anchor.y};
if( !rectKernel && morphFunc && initAllocFunc )
{
IppiMorphState* pState;
IppiSize roiSize = {src.cols, src.rows};
IppiSize kernelSize = {kernel.cols, kernel.rows};
IppiPoint point = {anchor.x, anchor.y};
if( ippInitAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 )
if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 )
return false;
bool is_ok = ippFunc( _src->data, _src->step[0], dst.data, dst.step[0], roiSize, ippBorderRepl, pState ) >= 0;
bool is_ok = morphFunc( _src->data, (int)_src->step[0],
dst.data, (int)dst.step[0],
roiSize, ippBorderRepl, pState ) >= 0;
ippiMorphologyFree(pState);
return is_ok;
}
else if( rectKernel && morphRectFunc && getBufSizeFunc )
{
int bufSize = 0;
if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 )
return false;
AutoBuffer<uchar> buf(bufSize + 64);
uchar* buffer = alignPtr((uchar*)buf, 32);
return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0],
roiSize, kernelSize, point, buffer) >= 0;
}
return false;
}
@ -1211,7 +1222,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
!( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) )
|| !( op == MORPH_DILATE || op == MORPH_ERODE) )
return false;
if( borderType == cv::BORDER_CONSTANT )
if( borderType == cv::BORDER_CONSTANT && kernel.data )
{
int x, y;
for( y = 0; y < kernel.rows; y++ )
@ -1250,23 +1261,29 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
return true;
}
bool rectKernel = false;
if( !kernel.data )
{
kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
ksize = Size(1+iterations*2,1+iterations*2);
normanchor = Point(iterations, iterations);
rectKernel = true;
iterations = 1;
}
else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
else if( iterations >= 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
{
normanchor = Point(normanchor.x*iterations, normanchor.y*iterations);
kernel = getStructuringElement(MORPH_RECT,
Size(ksize.width + (iterations-1)*(ksize.width-1),
ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
ksize.height + (iterations-1)*(ksize.height-1)),
normanchor);
normanchor = Point(normanchor.x*iterations, normanchor.y*iterations);
kernel = Mat();
rectKernel = true;
iterations = 1;
}
return IPPMorphReplicate( op, src, dst, kernel, normanchor );
// TODO: implement the case of iterations > 1.
if( iterations > 1 )
return false;
return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel );
}
#endif
@ -1459,7 +1476,7 @@ static void convertConvKernel( const IplConvKernel* src, cv::Mat& dst, cv::Point
int i, size = src->nRows*src->nCols;
for( i = 0; i < size; i++ )
dst.data[i] = (uchar)src->values[i];
dst.data[i] = (uchar)(src->values[i] != 0);
}