opencv/modules/core/src/arithm_ipp.hpp
Sayed Adel 93ffebc273 core: reimplement SIMD arithmetic, logic and comparison operations into wide universal intrinsics
- initialize arithmetic dispatcher
  - add new universal intrinsic v_absdiffs
  - add new universal intrinsic v_pack_b
  - add accumulate version of universal intrinsic v_round
  - fix sse/avx2:uint8 multiplication overflow
  - reimplement arithmetic, logic and comparison operations into wide universal intrinsics
    with full support for all types
  - reimplement IPP arithmetic, logic and comparison operations in a sperate file arithm_ipp.hpp
  - avoid scalar multiplication if scaling factor eq 1 and use integer multiplication
  - move C arithmetic operations to precomp.hpp and delete [arithm_simd|arithm_core].hpp
  - add compatibility with new opencv4 divide policy
2018-10-30 12:48:31 +02:00

417 lines
17 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#if ARITHM_USE_IPP
namespace cv { namespace hal {
//=======================================
// Arithmetic and logical operations
// +, -, *, /, &, |, ^, ~, abs ...
//=======================================
#define ARITHM_IPP_BIN(fun, ...) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
if (height == 1) \
step1 = step2 = step = width * sizeof(dst[0]); \
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
//=======================================
// Addition
//=======================================
inline int arithm_ipp_add8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_8u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_16u_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add16s(const short* src1, size_t step1, const short* src2, size_t step2,
short* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_16s_C1RSfs, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_add32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAdd_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_add8s(...) 0
#define arithm_ipp_add32s(...) 0
#define arithm_ipp_add64f(...) 0
//=======================================
// Subtract
//=======================================
inline int arithm_ipp_sub8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_8u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_sub16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_16u_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_sub16s(const short* src1, size_t step1, const short* src2, size_t step2,
short* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_16s_C1RSfs, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_sub32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiSub_32f_C1R, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_sub8s(...) 0
#define arithm_ipp_sub32s(...) 0
#define arithm_ipp_sub64f(...) 0
///////////////////////////////////////////////////////////////////////////////////////////////////
#define ARITHM_IPP_MIN_MAX(fun, type) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
type* s1 = (type*)src1; \
type* s2 = (type*)src2; \
type* d = dst; \
if (height == 1) \
step1 = step2 = step = width * sizeof(dst[0]); \
int i = 0; \
for(; i < height; i++) \
{ \
if (0 > CV_INSTRUMENT_FUN_IPP(fun, s1, s2, d, width)) \
break; \
s1 = (type*)((uchar*)s1 + step1); \
s2 = (type*)((uchar*)s2 + step2); \
d = (type*)((uchar*)d + step); \
} \
if (i == height) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
//=======================================
// Max
//=======================================
inline int arithm_ipp_max8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_8u, uchar);
}
inline int arithm_ipp_max16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_16u, ushort);
}
inline int arithm_ipp_max32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_32f, float);
}
inline int arithm_ipp_max64f(const double* src1, size_t step1, const double* src2, size_t step2,
double* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMaxEvery_64f, double);
}
#define arithm_ipp_max8s(...) 0
#define arithm_ipp_max16s(...) 0
#define arithm_ipp_max32s(...) 0
//=======================================
// Min
//=======================================
inline int arithm_ipp_min8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_8u, uchar);
}
inline int arithm_ipp_min16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_16u, ushort);
}
inline int arithm_ipp_min32f(const float* src1, size_t step1, const float* src2,size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_32f, float);
}
inline int arithm_ipp_min64f(const double* src1, size_t step1, const double* src2, size_t step2,
double* dst, size_t step, int width, int height)
{
ARITHM_IPP_MIN_MAX(ippsMinEvery_64f, double);
}
#define arithm_ipp_min8s(...) 0
#define arithm_ipp_min16s(...) 0
#define arithm_ipp_min32s(...) 0
//=======================================
// AbsDiff
//=======================================
inline int arithm_ipp_absdiff8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAbsDiff_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_absdiff16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
ushort* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAbsDiff_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_absdiff32f(const float* src1, size_t step1, const float* src2, size_t step2,
float* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAbsDiff_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_absdiff8s(...) 0
#define arithm_ipp_absdiff16s(...) 0
#define arithm_ipp_absdiff32s(...) 0
#define arithm_ipp_absdiff64f(...) 0
//=======================================
// Logical
//=======================================
inline int arithm_ipp_and8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiAnd_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_or8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiOr_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_xor8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height)
{
ARITHM_IPP_BIN(ippiXor_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_not8u(const uchar* src1, size_t step1, uchar* dst, size_t step, int width, int height)
{
if (!CV_IPP_CHECK_COND)
return 0;
if (height == 1)
step1 = step = width * sizeof(dst[0]);
if (0 <= CV_INSTRUMENT_FUN_IPP(ippiNot_8u_C1R, src1, (int)step1, dst, (int)step, ippiSize(width, height)))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return 1;
}
setIppErrorStatus();
return 0;
}
//=======================================
// Compare
//=======================================
#define ARITHM_IPP_CMP(fun, ...) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
IppCmpOp op = arithm_ipp_convert_cmp(cmpop); \
if (op < 0) \
return 0; \
if (height == 1) \
step1 = step2 = step = width * sizeof(dst[0]); \
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__, op)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
inline IppCmpOp arithm_ipp_convert_cmp(int cmpop)
{
switch(cmpop)
{
case CMP_EQ: return ippCmpEq;
case CMP_GT: return ippCmpGreater;
case CMP_GE: return ippCmpGreaterEq;
case CMP_LT: return ippCmpLess;
case CMP_LE: return ippCmpLessEq;
default: return (IppCmpOp)-1;
}
}
inline int arithm_ipp_cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_8u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_16u_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_16s_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
inline int arithm_ipp_cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
uchar* dst, size_t step, int width, int height, int cmpop)
{
ARITHM_IPP_CMP(ippiCompare_32f_C1R, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_cmp8s(...) 0
#define arithm_ipp_cmp32s(...) 0
#define arithm_ipp_cmp64f(...) 0
//=======================================
// Multiply
//=======================================
#define ARITHM_IPP_MUL(fun, ...) \
do { \
if (!CV_IPP_CHECK_COND) \
return 0; \
float fscale = (float)scale; \
if (std::fabs(fscale - 1) > FLT_EPSILON) \
return 0; \
if (0 <= CV_INSTRUMENT_FUN_IPP(fun, __VA_ARGS__)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return 1; \
} \
setIppErrorStatus(); \
return 0; \
} while(0)
inline int arithm_ipp_mul8u(const uchar *src1, size_t step1, const uchar *src2, size_t step2,
uchar *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_8u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul16u(const ushort *src1, size_t step1, const ushort *src2, size_t step2,
ushort *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_16u_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul16s(const short *src1, size_t step1, const short *src2, size_t step2,
short *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_16s_C1RSfs, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height), 0);
}
inline int arithm_ipp_mul32f(const float *src1, size_t step1, const float *src2, size_t step2,
float *dst, size_t step, int width, int height, double scale)
{
ARITHM_IPP_MUL(ippiMul_32f_C1R, src1, (int)step1, src2, (int)step2,dst, (int)step, ippiSize(width, height));
}
#define arithm_ipp_mul8s(...) 0
#define arithm_ipp_mul32s(...) 0
#define arithm_ipp_mul64f(...) 0
//=======================================
// Div
//=======================================
#define arithm_ipp_div8u(...) 0
#define arithm_ipp_div8s(...) 0
#define arithm_ipp_div16u(...) 0
#define arithm_ipp_div16s(...) 0
#define arithm_ipp_div32s(...) 0
#define arithm_ipp_div32f(...) 0
#define arithm_ipp_div64f(...) 0
//=======================================
// AddWeighted
//=======================================
#define arithm_ipp_addWeighted8u(...) 0
#define arithm_ipp_addWeighted8s(...) 0
#define arithm_ipp_addWeighted16u(...) 0
#define arithm_ipp_addWeighted16s(...) 0
#define arithm_ipp_addWeighted32s(...) 0
#define arithm_ipp_addWeighted32f(...) 0
#define arithm_ipp_addWeighted64f(...) 0
//=======================================
// Reciprocial
//=======================================
#define arithm_ipp_recip8u(...) 0
#define arithm_ipp_recip8s(...) 0
#define arithm_ipp_recip16u(...) 0
#define arithm_ipp_recip16s(...) 0
#define arithm_ipp_recip32s(...) 0
#define arithm_ipp_recip32f(...) 0
#define arithm_ipp_recip64f(...) 0
/** empty block in case if you have "fun"
#define arithm_ipp_8u(...) 0
#define arithm_ipp_8s(...) 0
#define arithm_ipp_16u(...) 0
#define arithm_ipp_16s(...) 0
#define arithm_ipp_32s(...) 0
#define arithm_ipp_32f(...) 0
#define arithm_ipp_64f(...) 0
**/
}} // cv::hal::
#define ARITHM_CALL_IPP(fun, ...) \
{ \
if (__CV_EXPAND(fun(__VA_ARGS__))) \
return; \
}
#endif // ARITHM_USE_IPP
#if !ARITHM_USE_IPP
#define ARITHM_CALL_IPP(...)
#endif