mirror of
https://github.com/opencv/opencv.git
synced 2024-12-15 18:09:11 +08:00
93ffebc273
- initialize arithmetic dispatcher - add new universal intrinsic v_absdiffs - add new universal intrinsic v_pack_b - add accumulate version of universal intrinsic v_round - fix sse/avx2:uint8 multiplication overflow - reimplement arithmetic, logic and comparison operations into wide universal intrinsics with full support for all types - reimplement IPP arithmetic, logic and comparison operations in a sperate file arithm_ipp.hpp - avoid scalar multiplication if scaling factor eq 1 and use integer multiplication - move C arithmetic operations to precomp.hpp and delete [arithm_simd|arithm_core].hpp - add compatibility with new opencv4 divide policy
417 lines
17 KiB
C++
417 lines
17 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html
|
|
#if ARITHM_USE_IPP
|
|
|
|
namespace cv { namespace hal {
|
|
|
|
//=======================================
|
|
// Arithmetic and logical operations
|
|
// +, -, *, /, &, |, ^, ~, abs ...
|
|
//=======================================
|
|
|
|
#define ARITHM_IPP_BIN(fun, ...) \
|
|
do { \
|
|
if (!CV_IPP_CHECK_COND) \
|
|
return 0; \
|
|
if (height == 1) \
|
|
step1 = step2 = step = width * sizeof(dst[0]); \
|
|
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
|
|
{ \
|
|
CV_IMPL_ADD(CV_IMPL_IPP); \
|
|
return 1; \
|
|
} \
|
|
setIppErrorStatus(); \
|
|
return 0; \
|
|
} while(0)
|
|
|
|
//=======================================
|
|
// Addition
|
|
//=======================================
|
|
|
|
inline int arithm_ipp_add8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAdd_8u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_add16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
|
|
ushort* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAdd_16u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_add16s(const short* src1, size_t step1, const short* src2, size_t step2,
|
|
short* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAdd_16s_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_add32f(const float* src1, size_t step1, const float* src2, size_t step2,
|
|
float* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAdd_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
#define arithm_ipp_add8s(...) 0
|
|
#define arithm_ipp_add32s(...) 0
|
|
#define arithm_ipp_add64f(...) 0
|
|
|
|
//=======================================
|
|
// Subtract
|
|
//=======================================
|
|
|
|
inline int arithm_ipp_sub8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiSub_8u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_sub16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
|
|
ushort* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiSub_16u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_sub16s(const short* src1, size_t step1, const short* src2, size_t step2,
|
|
short* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiSub_16s_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_sub32f(const float* src1, size_t step1, const float* src2, size_t step2,
|
|
float* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiSub_32f_C1R, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
#define arithm_ipp_sub8s(...) 0
|
|
#define arithm_ipp_sub32s(...) 0
|
|
#define arithm_ipp_sub64f(...) 0
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#define ARITHM_IPP_MIN_MAX(fun, type) \
|
|
do { \
|
|
if (!CV_IPP_CHECK_COND) \
|
|
return 0; \
|
|
type* s1 = (type*)src1; \
|
|
type* s2 = (type*)src2; \
|
|
type* d = dst; \
|
|
if (height == 1) \
|
|
step1 = step2 = step = width * sizeof(dst[0]); \
|
|
int i = 0; \
|
|
for(; i < height; i++) \
|
|
{ \
|
|
if (0 > CV_INSTRUMENT_FUN_IPP(fun, s1, s2, d, width)) \
|
|
break; \
|
|
s1 = (type*)((uchar*)s1 + step1); \
|
|
s2 = (type*)((uchar*)s2 + step2); \
|
|
d = (type*)((uchar*)d + step); \
|
|
} \
|
|
if (i == height) \
|
|
{ \
|
|
CV_IMPL_ADD(CV_IMPL_IPP); \
|
|
return 1; \
|
|
} \
|
|
setIppErrorStatus(); \
|
|
return 0; \
|
|
} while(0)
|
|
|
|
//=======================================
|
|
// Max
|
|
//=======================================
|
|
|
|
inline int arithm_ipp_max8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMaxEvery_8u, uchar);
|
|
}
|
|
|
|
inline int arithm_ipp_max16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
|
|
ushort* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMaxEvery_16u, ushort);
|
|
}
|
|
|
|
inline int arithm_ipp_max32f(const float* src1, size_t step1, const float* src2, size_t step2,
|
|
float* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMaxEvery_32f, float);
|
|
}
|
|
|
|
inline int arithm_ipp_max64f(const double* src1, size_t step1, const double* src2, size_t step2,
|
|
double* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMaxEvery_64f, double);
|
|
}
|
|
|
|
#define arithm_ipp_max8s(...) 0
|
|
#define arithm_ipp_max16s(...) 0
|
|
#define arithm_ipp_max32s(...) 0
|
|
|
|
//=======================================
|
|
// Min
|
|
//=======================================
|
|
|
|
inline int arithm_ipp_min8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMinEvery_8u, uchar);
|
|
}
|
|
|
|
inline int arithm_ipp_min16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
|
|
ushort* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMinEvery_16u, ushort);
|
|
}
|
|
|
|
inline int arithm_ipp_min32f(const float* src1, size_t step1, const float* src2,size_t step2,
|
|
float* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMinEvery_32f, float);
|
|
}
|
|
|
|
inline int arithm_ipp_min64f(const double* src1, size_t step1, const double* src2, size_t step2,
|
|
double* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_MIN_MAX(ippsMinEvery_64f, double);
|
|
}
|
|
|
|
#define arithm_ipp_min8s(...) 0
|
|
#define arithm_ipp_min16s(...) 0
|
|
#define arithm_ipp_min32s(...) 0
|
|
|
|
//=======================================
|
|
// AbsDiff
|
|
//=======================================
|
|
|
|
inline int arithm_ipp_absdiff8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAbsDiff_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_absdiff16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
|
|
ushort* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAbsDiff_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_absdiff32f(const float* src1, size_t step1, const float* src2, size_t step2,
|
|
float* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAbsDiff_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
#define arithm_ipp_absdiff8s(...) 0
|
|
#define arithm_ipp_absdiff16s(...) 0
|
|
#define arithm_ipp_absdiff32s(...) 0
|
|
#define arithm_ipp_absdiff64f(...) 0
|
|
|
|
//=======================================
|
|
// Logical
|
|
//=======================================
|
|
|
|
inline int arithm_ipp_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiAnd_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiOr_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height)
|
|
{
|
|
ARITHM_IPP_BIN(ippiXor_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height)
|
|
{
|
|
if (!CV_IPP_CHECK_COND)
|
|
return 0;
|
|
if (height == 1)
|
|
step1 = step = width * sizeof(dst[0]);
|
|
if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height)))
|
|
{
|
|
CV_IMPL_ADD(CV_IMPL_IPP);
|
|
return 1;
|
|
}
|
|
setIppErrorStatus();
|
|
return 0;
|
|
}
|
|
|
|
//=======================================
|
|
// Compare
|
|
//=======================================
|
|
|
|
#define ARITHM_IPP_CMP(fun, ...) \
|
|
do { \
|
|
if (!CV_IPP_CHECK_COND) \
|
|
return 0; \
|
|
IppCmpOp op = arithm_ipp_convert_cmp(cmpop); \
|
|
if (op < 0) \
|
|
return 0; \
|
|
if (height == 1) \
|
|
step1 = step2 = step = width * sizeof(dst[0]); \
|
|
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__, op)) \
|
|
{ \
|
|
CV_IMPL_ADD(CV_IMPL_IPP); \
|
|
return 1; \
|
|
} \
|
|
setIppErrorStatus(); \
|
|
return 0; \
|
|
} while(0)
|
|
|
|
inline IppCmpOp arithm_ipp_convert_cmp(int cmpop)
|
|
{
|
|
switch(cmpop)
|
|
{
|
|
case CMP_EQ: return ippCmpEq;
|
|
case CMP_GT: return ippCmpGreater;
|
|
case CMP_GE: return ippCmpGreaterEq;
|
|
case CMP_LT: return ippCmpLess;
|
|
case CMP_LE: return ippCmpLessEq;
|
|
default: return (IppCmpOp)-1;
|
|
}
|
|
}
|
|
|
|
inline int arithm_ipp_cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height, int cmpop)
|
|
{
|
|
ARITHM_IPP_CMP(ippiCompare_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height, int cmpop)
|
|
{
|
|
ARITHM_IPP_CMP(ippiCompare_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height, int cmpop)
|
|
{
|
|
ARITHM_IPP_CMP(ippiCompare_16s_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
inline int arithm_ipp_cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
|
|
uchar* dst, size_t step, int width, int height, int cmpop)
|
|
{
|
|
ARITHM_IPP_CMP(ippiCompare_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
#define arithm_ipp_cmp8s(...) 0
|
|
#define arithm_ipp_cmp32s(...) 0
|
|
#define arithm_ipp_cmp64f(...) 0
|
|
|
|
//=======================================
|
|
// Multiply
|
|
//=======================================
|
|
|
|
#define ARITHM_IPP_MUL(fun, ...) \
|
|
do { \
|
|
if (!CV_IPP_CHECK_COND) \
|
|
return 0; \
|
|
float fscale = (float)scale; \
|
|
if (std::fabs(fscale - 1) > FLT_EPSILON) \
|
|
return 0; \
|
|
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
|
|
{ \
|
|
CV_IMPL_ADD(CV_IMPL_IPP); \
|
|
return 1; \
|
|
} \
|
|
setIppErrorStatus(); \
|
|
return 0; \
|
|
} while(0)
|
|
|
|
inline int arithm_ipp_mul8u(const uchar *src1, size_t step1, const uchar *src2, size_t step2,
|
|
uchar *dst, size_t step, int width, int height, double scale)
|
|
{
|
|
ARITHM_IPP_MUL(ippiMul_8u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
inline int arithm_ipp_mul16u(const ushort *src1, size_t step1, const ushort *src2, size_t step2,
|
|
ushort *dst, size_t step, int width, int height, double scale)
|
|
{
|
|
ARITHM_IPP_MUL(ippiMul_16u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_mul16s(const short *src1, size_t step1, const short *src2, size_t step2,
|
|
short *dst, size_t step, int width, int height, double scale)
|
|
{
|
|
ARITHM_IPP_MUL(ippiMul_16s_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
|
|
}
|
|
|
|
inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2, size_t step2,
|
|
float *dst, size_t step, int width, int height, double scale)
|
|
{
|
|
ARITHM_IPP_MUL(ippiMul_32f_C1R, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height));
|
|
}
|
|
|
|
#define arithm_ipp_mul8s(...) 0
|
|
#define arithm_ipp_mul32s(...) 0
|
|
#define arithm_ipp_mul64f(...) 0
|
|
|
|
//=======================================
|
|
// Div
|
|
//=======================================
|
|
|
|
#define arithm_ipp_div8u(...) 0
|
|
#define arithm_ipp_div8s(...) 0
|
|
#define arithm_ipp_div16u(...) 0
|
|
#define arithm_ipp_div16s(...) 0
|
|
#define arithm_ipp_div32s(...) 0
|
|
#define arithm_ipp_div32f(...) 0
|
|
#define arithm_ipp_div64f(...) 0
|
|
|
|
//=======================================
|
|
// AddWeighted
|
|
//=======================================
|
|
|
|
#define arithm_ipp_addWeighted8u(...) 0
|
|
#define arithm_ipp_addWeighted8s(...) 0
|
|
#define arithm_ipp_addWeighted16u(...) 0
|
|
#define arithm_ipp_addWeighted16s(...) 0
|
|
#define arithm_ipp_addWeighted32s(...) 0
|
|
#define arithm_ipp_addWeighted32f(...) 0
|
|
#define arithm_ipp_addWeighted64f(...) 0
|
|
|
|
//=======================================
|
|
// Reciprocial
|
|
//=======================================
|
|
|
|
#define arithm_ipp_recip8u(...) 0
|
|
#define arithm_ipp_recip8s(...) 0
|
|
#define arithm_ipp_recip16u(...) 0
|
|
#define arithm_ipp_recip16s(...) 0
|
|
#define arithm_ipp_recip32s(...) 0
|
|
#define arithm_ipp_recip32f(...) 0
|
|
#define arithm_ipp_recip64f(...) 0
|
|
|
|
/** empty block in case if you have "fun"
|
|
#define arithm_ipp_8u(...) 0
|
|
#define arithm_ipp_8s(...) 0
|
|
#define arithm_ipp_16u(...) 0
|
|
#define arithm_ipp_16s(...) 0
|
|
#define arithm_ipp_32s(...) 0
|
|
#define arithm_ipp_32f(...) 0
|
|
#define arithm_ipp_64f(...) 0
|
|
**/
|
|
|
|
}} // cv::hal::
|
|
|
|
#define ARITHM_CALL_IPP(fun, ...) \
|
|
{ \
|
|
if (__CV_EXPAND(fun(__VA_ARGS__))) \
|
|
return; \
|
|
}
|
|
|
|
#endif // ARITHM_USE_IPP
|
|
|
|
|
|
#if !ARITHM_USE_IPP
|
|
#define ARITHM_CALL_IPP(...)
|
|
#endif |