Merge pull request #15370 from alalek:core_fastmath_hpp_update

This commit is contained in:
Alexander Alekhin 2019-08-22 17:05:38 +00:00
commit 601096f360

View File

@ -47,12 +47,6 @@
#include "opencv2/core/cvdef.h" #include "opencv2/core/cvdef.h"
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
#include <emmintrin.h>
#endif
//! @addtogroup core_utils //! @addtogroup core_utils
//! @{ //! @{
@ -70,19 +64,24 @@
# endif # endif
#endif #endif
#ifdef HAVE_TEGRA_OPTIMIZATION #if defined(__CUDACC__)
# include "tegra_round.hpp" // nothing, intrinsics/asm code is not supported
#endif #else
#if ((defined _MSC_VER && defined _M_X64) \
|| (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
&& !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
#include <emmintrin.h>
#endif
#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__) #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
# include <altivec.h> && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
#endif #include <altivec.h>
#endif
#if ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ #if defined(CV_INLINE_ROUND_FLT)
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION // user-specified version
#define CV_INLINE_ROUND_DBL(value) TEGRA_ROUND_DBL(value); // CV_INLINE_ROUND_DBL should be defined too
#define CV_INLINE_ROUND_FLT(value) TEGRA_ROUND_FLT(value); #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
// 1. general scheme // 1. general scheme
#define ARM_ROUND(_value, _asm_string) \ #define ARM_ROUND(_value, _asm_string) \
int res; \ int res; \
@ -98,7 +97,7 @@
#endif #endif
// 3. version for float // 3. version for float
#define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__) #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
// P8 and newer machines can convert fp32/64 to int quickly. // P8 and newer machines can convert fp32/64 to int quickly.
#define CV_INLINE_ROUND_DBL(value) \ #define CV_INLINE_ROUND_DBL(value) \
int out; \ int out; \
@ -108,40 +107,86 @@
// FP32 also works with FP64 routine above // FP32 also works with FP64 routine above
#define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value) #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
#endif
#ifdef _ARCH_PWR9 #ifdef CV_INLINE_ISINF_FLT
// user-specified version
// CV_INLINE_ISINF_DBL should be defined too
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
#define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30); #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
#define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value) #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
#endif
#ifdef CV_INLINE_ISNAN_FLT
// user-specified version
// CV_INLINE_ISNAN_DBL should be defined too
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
#endif #endif
#elif defined CV_ICC || defined __GNUC__
#define CV_INLINE_ROUND_DBL(value) return (int)(lrint(value));
#define CV_INLINE_ROUND_FLT(value) return (int)(lrintf(value));
#endif
#if defined __PPC64__ && !defined OPENCV_USE_FASTMATH_GCC_BUILTINS #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
/* Let GCC inline C math functions when available. Dedicated hardware is available to && ( \
round and covert FP values. */ defined(__x86_64__) || defined(__i686__) \
#define OPENCV_USE_FASTMATH_GCC_BUILTINS || defined(__arm__) \
#endif || defined(__PPC64__) \
)
/* Let builtin C math functions when available. Dedicated hardware is available to
round and convert FP values. */
#define OPENCV_USE_FASTMATH_BUILTINS 1
#endif
/* Enable GCC builtin math functions if possible, desired, and available. /* Enable builtin math functions if possible, desired, and available.
Note, not all math functions inline equally. E.g lrint will not inline Note, not all math functions inline equally. E.g lrint will not inline
without the -fno-math-errno option. */ without the -fno-math-errno option. */
#if defined OPENCV_USE_FASTMATH_GCC_BUILTINS && defined __GNUC__ && !defined __clang__ && !defined (__CUDACC__) #if defined(CV_ICC)
#define _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS // nothing
#endif #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
#if defined(__clang__)
/* Allow overrides for some functions which may benefit from tuning. Likewise, #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
note that isinf is not used as the return value is signed. */ #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_DBL
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
#endif #endif
#if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_FLT #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
#endif
#if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
#endif
#if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
#endif
#elif defined(__GNUC__)
#define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
#if !defined(CV_INLINE_ISNAN_DBL)
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
#endif
#if !defined(CV_INLINE_ISNAN_FLT)
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value); #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
#endif #endif
#if !defined(CV_INLINE_ISINF_DBL)
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
#endif
#if !defined(CV_INLINE_ISINF_FLT)
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
#endif
#elif defined(_MSC_VER)
#if !defined(CV_INLINE_ISNAN_DBL)
#define CV_INLINE_ISNAN_DBL(value) return isnan(value);
#endif
#if !defined(CV_INLINE_ISNAN_FLT)
#define CV_INLINE_ISNAN_FLT(value) return isnan(value);
#endif
#if !defined(CV_INLINE_ISINF_DBL)
#define CV_INLINE_ISINF_DBL(value) return isinf(value);
#endif
#if !defined(CV_INLINE_ISINF_FLT)
#define CV_INLINE_ISINF_FLT(value) return isinf(value);
#endif
#endif
#endif
#endif // defined(__CUDACC__)
/** @brief Rounds floating-point number to the nearest integer /** @brief Rounds floating-point number to the nearest integer
@ -151,8 +196,11 @@
CV_INLINE int CV_INLINE int
cvRound( double value ) cvRound( double value )
{ {
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ #if defined CV_INLINE_ROUND_DBL
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) CV_INLINE_ROUND_DBL(value);
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
&& !defined(__CUDACC__)
__m128d t = _mm_set_sd( value ); __m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t); return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
@ -163,8 +211,8 @@ cvRound( double value )
fistp t; fistp t;
} }
return t; return t;
#elif defined CV_INLINE_ROUND_DBL #elif defined CV_ICC || defined __GNUC__
CV_INLINE_ROUND_DBL(value); return (int)(lrint(value));
#else #else
/* it's ok if round does not comply with IEEE754 standard; /* it's ok if round does not comply with IEEE754 standard;
the tests should allow +/-1 difference when the tested functions use round */ the tests should allow +/-1 difference when the tested functions use round */
@ -182,7 +230,10 @@ cvRound( double value )
*/ */
CV_INLINE int cvFloor( double value ) CV_INLINE int cvFloor( double value )
{ {
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
&& ( \
defined(__PPC64__) \
)
return __builtin_floor(value); return __builtin_floor(value);
#else #else
int i = (int)value; int i = (int)value;
@ -199,7 +250,10 @@ CV_INLINE int cvFloor( double value )
*/ */
CV_INLINE int cvCeil( double value ) CV_INLINE int cvCeil( double value )
{ {
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
&& ( \
defined(__PPC64__) \
)
return __builtin_ceil(value); return __builtin_ceil(value);
#else #else
int i = (int)value; int i = (int)value;
@ -235,6 +289,11 @@ CV_INLINE int cvIsInf( double value )
{ {
#if defined CV_INLINE_ISINF_DBL #if defined CV_INLINE_ISINF_DBL
CV_INLINE_ISINF_DBL(value); CV_INLINE_ISINF_DBL(value);
#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(__PPC64__)
Cv64suf ieee754;
ieee754.f = value;
return (ieee754.u & 0x7fffffff00000000) ==
0x7ff0000000000000;
#else #else
Cv64suf ieee754; Cv64suf ieee754;
ieee754.f = value; ieee754.f = value;
@ -248,8 +307,11 @@ CV_INLINE int cvIsInf( double value )
/** @overload */ /** @overload */
CV_INLINE int cvRound(float value) CV_INLINE int cvRound(float value)
{ {
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ #if defined CV_INLINE_ROUND_FLT
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) CV_INLINE_ROUND_FLT(value);
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
&& !defined(__CUDACC__)
__m128 t = _mm_set_ss( value ); __m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t); return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
@ -260,8 +322,8 @@ CV_INLINE int cvRound(float value)
fistp t; fistp t;
} }
return t; return t;
#elif defined CV_INLINE_ROUND_FLT #elif defined CV_ICC || defined __GNUC__
CV_INLINE_ROUND_FLT(value); return (int)(lrintf(value));
#else #else
/* it's ok if round does not comply with IEEE754 standard; /* it's ok if round does not comply with IEEE754 standard;
the tests should allow +/-1 difference when the tested functions use round */ the tests should allow +/-1 difference when the tested functions use round */
@ -278,7 +340,10 @@ CV_INLINE int cvRound( int value )
/** @overload */ /** @overload */
CV_INLINE int cvFloor( float value ) CV_INLINE int cvFloor( float value )
{ {
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
&& ( \
defined(__PPC64__) \
)
return __builtin_floorf(value); return __builtin_floorf(value);
#else #else
int i = (int)value; int i = (int)value;
@ -295,7 +360,10 @@ CV_INLINE int cvFloor( int value )
/** @overload */ /** @overload */
CV_INLINE int cvCeil( float value ) CV_INLINE int cvCeil( float value )
{ {
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
&& ( \
defined(__PPC64__) \
)
return __builtin_ceilf(value); return __builtin_ceilf(value);
#else #else
int i = (int)value; int i = (int)value;