Merge pull request #5493 from lupustr3:pvlasov/ipp9_fixes

This commit is contained in:
Alexander Alekhin 2015-10-13 14:24:09 +00:00
commit 466a98f7c3
7 changed files with 112 additions and 44 deletions

View File

@ -241,6 +241,26 @@ static inline IppDataType ippiGetDataType(int depth)
depth == CV_64F ? ipp64f : (IppDataType)-1;
}
// IPP temporary buffer hepler
template<typename T>
class IppAutoBuffer
{
public:
IppAutoBuffer() { m_pBuffer = NULL; }
IppAutoBuffer(int size) { Alloc(size); }
~IppAutoBuffer() { Release(); }
T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; }
void Release() { if(m_pBuffer) ippFree(m_pBuffer); }
inline operator T* () { return (T*)m_pBuffer;}
inline operator const T* () const { return (const T*)m_pBuffer;}
private:
// Disable copy operations
IppAutoBuffer(IppAutoBuffer &) {};
IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;};
T* m_pBuffer;
};
#else
#define IPP_VERSION_X100 0
#endif

View File

@ -3131,7 +3131,7 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
static double dotProd_16s(const short* src1, const short* src2, int len)
{
#if (ARITHM_USE_IPP == 1)
#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0
CV_IPP_CHECK()
{
double r = 0;

View File

@ -1318,6 +1318,12 @@ public:
ippFeatures = ippCPUID_SSE;
else if(env == "sse2")
ippFeatures = ippCPUID_SSE2;
else if(env == "sse3")
ippFeatures = ippCPUID_SSE3;
else if(env == "ssse3")
ippFeatures = ippCPUID_SSSE3;
else if(env == "sse41")
ippFeatures = ippCPUID_SSE41;
else if(env == "sse42")
ippFeatures = ippCPUID_SSE42;
else if(env == "avx")

View File

@ -4579,7 +4579,11 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth,
int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
ktype = kernel.type(), kdepth = CV_MAT_DEPTH(ktype);
bool isolated = (borderType & BORDER_ISOLATED) != 0;
#if IPP_VERSION_X100 >= 900
Point ippAnchor((kernel.cols-1)/2, (kernel.rows-1)/2);
#else
Point ippAnchor(kernel.cols >> 1, kernel.rows >> 1);
#endif
int borderTypeNI = borderType & ~BORDER_ISOLATED;
IppiBorderType ippBorderType = ippiGetBorderType(borderTypeNI);
@ -4610,24 +4614,64 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth,
if ((status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize)) >= 0)
{
IppiFilterBorderSpec * spec = (IppiFilterBorderSpec *)ippMalloc(specSize);
Ipp8u * buffer = ippsMalloc_8u(bufsize);
IppAutoBuffer<IppiFilterBorderSpec> spec(specSize);
IppAutoBuffer<Ipp8u> buffer(bufsize);
Ipp32f borderValue[4] = { 0, 0, 0, 0 };
Mat reversedKernel;
flip(kernel, reversedKernel, -1);
if ((kdepth == CV_32F && (status = ippiFilterBorderInit_32f((const Ipp32f *)reversedKernel.data, kernelSize,
dataType, cn, ippRndFinancial, spec)) >= 0 ) ||
(kdepth == CV_16S && (status = ippiFilterBorderInit_16s((const Ipp16s *)reversedKernel.data,
kernelSize, 0, dataType, cn, ippRndFinancial, spec)) >= 0))
if(kdepth == CV_32F)
{
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
ippBorderType, borderValue, spec, buffer);
}
Ipp32f *pKerBuffer = (Ipp32f*)kernel.data;
IppAutoBuffer<Ipp32f> kerTmp;
int kerStep = sizeof(Ipp32f)*kernelSize.width;
#if IPP_VERSION_X100 >= 900
if(kernel.step != kerStep)
{
kerTmp.Alloc(kerStep*kernelSize.height);
if(ippiCopy_32f_C1R((Ipp32f*)kernel.data, (int)kernel.step, kerTmp, kerStep, kernelSize) < 0)
return false;
pKerBuffer = kerTmp;
}
#else
kerTmp.Alloc(kerStep*kernelSize.height);
Mat kerFlip(Size(kernelSize.width, kernelSize.height), CV_32FC1, kerTmp, kerStep);
flip(kernel, kerFlip, -1);
pKerBuffer = kerTmp;
#endif
ippsFree(buffer);
ippsFree(spec);
if((status = ippiFilterBorderInit_32f(pKerBuffer, kernelSize,
dataType, cn, ippRndFinancial, spec)) >= 0 )
{
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
ippBorderType, borderValue, spec, buffer);
}
}
else if(kdepth == CV_16S)
{
Ipp16s *pKerBuffer = (Ipp16s*)kernel.data;
IppAutoBuffer<Ipp16s> kerTmp;
int kerStep = sizeof(Ipp16s)*kernelSize.width;
#if IPP_VERSION_X100 >= 900
if(kernel.step != kerStep)
{
kerTmp.Alloc(kerStep*kernelSize.height);
if(ippiCopy_16s_C1R((Ipp16s*)kernel.data, (int)kernel.step, kerTmp, kerStep, kernelSize) < 0)
return false;
pKerBuffer = kerTmp;
}
#else
kerTmp.Alloc(kerStep*kernelSize.height);
Mat kerFlip(Size(kernelSize.width, kernelSize.height), CV_16SC1, kerTmp, kerStep);
flip(kernel, kerFlip, -1);
pKerBuffer = kerTmp;
#endif
if((status = ippiFilterBorderInit_16s(pKerBuffer, kernelSize,
0, dataType, cn, ippRndFinancial, spec)) >= 0)
{
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
ippBorderType, borderValue, spec, buffer);
}
}
}
if (status >= 0)

View File

@ -1231,17 +1231,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
}
else
{
#if IPP_VERSION_X100 != 900 // Problems with accuracy in 9.0.0
#if IPP_VERSION_X100 >= 900
if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
if (((kernelSize.width - 1) / 2 != anchor.x) || ((kernelSize.height - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
return false;
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
#define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
case cvtype: \
{\
if (op == MORPH_ERODE)\
{\
int bufSize = 0;\
if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\
if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
return false;\
AutoBuffer<uchar> buf(bufSize + 64);\
uchar* buffer = alignPtr((uchar*)buf, 32);\
@ -1250,7 +1251,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
else\
{\
int bufSize = 0;\
if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\
if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
return false;\
AutoBuffer<uchar> buf(bufSize + 64);\
uchar* buffer = alignPtr((uchar*)buf, 32);\
@ -1261,7 +1262,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
#else
IppiPoint point = {anchor.x, anchor.y};
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
#define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
case cvtype: \
{\
int bufSize = 0;\
@ -1279,17 +1280,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
CV_SUPPRESS_DEPRECATED_START
switch (type)
{
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u, 1);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u, 3);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u, 4);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f, 1);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f, 3);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f, 4);
default:
;
}
CV_SUPPRESS_DEPRECATED_END
#undef IPP_MORPH_CASE
#endif
}
#else
CV_UNUSED(op); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(kernel); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(rectKernel);

View File

@ -1695,32 +1695,33 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
if (ippiFilterGaussianGetBufferSize(roiSize, (Ipp32u)ksize.width, dataType, cn, &specSize, &bufferSize) >= 0)
{
IppFilterGaussianSpec * pSpec = (IppFilterGaussianSpec *)ippMalloc(specSize);
Ipp8u * pBuffer = (Ipp8u*)ippMalloc(bufferSize);
IppAutoBuffer<IppFilterGaussianSpec> spec(specSize);
IppAutoBuffer<Ipp8u> buffer(bufferSize);
if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, 1, pSpec, pBuffer) >= 0)
if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, cn, spec, buffer) >= 0)
{
#define IPP_FILTER_GAUSS_C1(ippfavor) \
{ \
typedef Ipp##ippfavor ippType; \
ippType borderValues = 0; \
status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr<ippType>(), (int)src.step, \
dst.ptr<ippType>(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \
Ipp##ippfavor borderValues = 0; \
status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr<Ipp##ippfavor>(), (int)src.step, \
dst.ptr<Ipp##ippfavor>(), (int)dst.step, roiSize, borderValues, spec, buffer); \
}
#define IPP_FILTER_GAUSS_CN(ippfavor, ippcn) \
{ \
typedef Ipp##ippfavor ippType; \
ippType borderValues[] = { 0, 0, 0 }; \
status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr<ippType>(), (int)src.step, \
dst.ptr<ippType>(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \
Ipp##ippfavor borderValues[] = { 0, 0, 0 }; \
status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr<Ipp##ippfavor>(), (int)src.step, \
dst.ptr<Ipp##ippfavor>(), (int)dst.step, roiSize, borderValues, spec, buffer); \
}
IppStatus status = ippStsErr;
#if !HAVE_ICV
#if IPP_VERSION_X100 > 901 // Buffer overflow in IPP
if (type == CV_8UC1)
IPP_FILTER_GAUSS_C1(8u)
else if (type == CV_8UC3)
else
#endif
if (type == CV_8UC3)
IPP_FILTER_GAUSS_CN(8u, 3)
else if (type == CV_16UC1)
IPP_FILTER_GAUSS_C1(16u)
@ -1737,11 +1738,6 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
if (type == CV_32FC1)
IPP_FILTER_GAUSS_C1(32f)
if (pSpec)
ippFree(pSpec);
if (pBuffer)
ippFree(pBuffer);
if(status >= 0)
return true;

View File

@ -425,7 +425,7 @@ namespace cv
{
static bool ipp_integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth)
{
#if !defined(HAVE_IPP_ICV_ONLY) // Disabled on ICV due invalid results
#if !defined(HAVE_IPP_ICV_ONLY) && (IPP_VERSION_X100 != 900) // Disabled on ICV due invalid results
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if( sdepth <= 0 )
sdepth = depth == CV_8U ? CV_32S : CV_64F;