diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp index b1e8c4202d..0e8187cac0 100644 --- a/modules/core/include/opencv2/core/fast_math.hpp +++ b/modules/core/include/opencv2/core/fast_math.hpp @@ -47,12 +47,6 @@ #include "opencv2/core/cvdef.h" -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) -#include -#endif - - //! @addtogroup core_utils //! @{ @@ -70,19 +64,24 @@ # endif #endif -#ifdef HAVE_TEGRA_OPTIMIZATION -# include "tegra_round.hpp" -#endif +#if defined(__CUDACC__) + // nothing, intrinsics/asm code is not supported +#else + #if ((defined _MSC_VER && defined _M_X64) \ + || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \ + && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H) + #include + #endif -#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__) -# include -#endif + #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \ + && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H) + #include + #endif -#if ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ - defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION - #define CV_INLINE_ROUND_DBL(value) TEGRA_ROUND_DBL(value); - #define CV_INLINE_ROUND_FLT(value) TEGRA_ROUND_FLT(value); -#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__) + #if defined(CV_INLINE_ROUND_FLT) + // user-specified version + // CV_INLINE_ROUND_DBL should be defined too + #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ // 1. general scheme #define ARM_ROUND(_value, _asm_string) \ int res; \ @@ -98,7 +97,7 @@ #endif // 3. version for float #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") -#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__) + #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 // P8 and newer machines can convert fp32/64 to int quickly. #define CV_INLINE_ROUND_DBL(value) \ int out; \ @@ -108,40 +107,86 @@ // FP32 also works with FP64 routine above #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value) + #endif - #ifdef _ARCH_PWR9 - #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30); - #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40); - #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value) - #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) + #ifdef CV_INLINE_ISINF_FLT + // user-specified version + // CV_INLINE_ISINF_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30); + #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value) + #endif + + #ifdef CV_INLINE_ISNAN_FLT + // user-specified version + // CV_INLINE_ISNAN_DBL should be defined too + #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class) + #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40); + #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) + #endif + + #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \ + && ( \ + defined(__x86_64__) || defined(__i686__) \ + || defined(__arm__) \ + || defined(__PPC64__) \ + ) + /* Let builtin C math functions when available. Dedicated hardware is available to + round and convert FP values. */ + #define OPENCV_USE_FASTMATH_BUILTINS 1 + #endif + + /* Enable builtin math functions if possible, desired, and available. + Note, not all math functions inline equally. E.g lrint will not inline + without the -fno-math-errno option. */ + #if defined(CV_ICC) + // nothing + #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS + #if defined(__clang__) + #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value); + #endif + #elif defined(__GNUC__) + #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value); + #endif + #elif defined(_MSC_VER) + #if !defined(CV_INLINE_ISNAN_DBL) + #define CV_INLINE_ISNAN_DBL(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISNAN_FLT) + #define CV_INLINE_ISNAN_FLT(value) return isnan(value); + #endif + #if !defined(CV_INLINE_ISINF_DBL) + #define CV_INLINE_ISINF_DBL(value) return isinf(value); + #endif + #if !defined(CV_INLINE_ISINF_FLT) + #define CV_INLINE_ISINF_FLT(value) return isinf(value); + #endif #endif -#elif defined CV_ICC || defined __GNUC__ - #define CV_INLINE_ROUND_DBL(value) return (int)(lrint(value)); - #define CV_INLINE_ROUND_FLT(value) return (int)(lrintf(value)); -#endif + #endif -#if defined __PPC64__ && !defined OPENCV_USE_FASTMATH_GCC_BUILTINS - /* Let GCC inline C math functions when available. Dedicated hardware is available to - round and covert FP values. */ - #define OPENCV_USE_FASTMATH_GCC_BUILTINS -#endif - -/* Enable GCC builtin math functions if possible, desired, and available. - Note, not all math functions inline equally. E.g lrint will not inline - without the -fno-math-errno option. */ -#if defined OPENCV_USE_FASTMATH_GCC_BUILTINS && defined __GNUC__ && !defined __clang__ && !defined (__CUDACC__) - #define _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS -#endif - -/* Allow overrides for some functions which may benefit from tuning. Likewise, - note that isinf is not used as the return value is signed. */ -#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_DBL - #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value); -#endif - -#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_FLT - #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value); -#endif +#endif // defined(__CUDACC__) /** @brief Rounds floating-point number to the nearest integer @@ -151,8 +196,11 @@ CV_INLINE int cvRound( double value ) { -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) +#if defined CV_INLINE_ROUND_DBL + CV_INLINE_ROUND_DBL(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); return _mm_cvtsd_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -163,8 +211,8 @@ cvRound( double value ) fistp t; } return t; -#elif defined CV_INLINE_ROUND_DBL - CV_INLINE_ROUND_DBL(value); +#elif defined CV_ICC || defined __GNUC__ + return (int)(lrint(value)); #else /* it's ok if round does not comply with IEEE754 standard; the tests should allow +/-1 difference when the tested functions use round */ @@ -182,7 +230,10 @@ cvRound( double value ) */ CV_INLINE int cvFloor( double value ) { -#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) return __builtin_floor(value); #else int i = (int)value; @@ -199,7 +250,10 @@ CV_INLINE int cvFloor( double value ) */ CV_INLINE int cvCeil( double value ) { -#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) return __builtin_ceil(value); #else int i = (int)value; @@ -235,6 +289,11 @@ CV_INLINE int cvIsInf( double value ) { #if defined CV_INLINE_ISINF_DBL CV_INLINE_ISINF_DBL(value); +#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(__PPC64__) + Cv64suf ieee754; + ieee754.f = value; + return (ieee754.u & 0x7fffffff00000000) == + 0x7ff0000000000000; #else Cv64suf ieee754; ieee754.f = value; @@ -248,8 +307,11 @@ CV_INLINE int cvIsInf( double value ) /** @overload */ CV_INLINE int cvRound(float value) { -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ - && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) +#if defined CV_INLINE_ROUND_FLT + CV_INLINE_ROUND_FLT(value); +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \ + && !defined(__CUDACC__) __m128 t = _mm_set_ss( value ); return _mm_cvtss_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -260,8 +322,8 @@ CV_INLINE int cvRound(float value) fistp t; } return t; -#elif defined CV_INLINE_ROUND_FLT - CV_INLINE_ROUND_FLT(value); +#elif defined CV_ICC || defined __GNUC__ + return (int)(lrintf(value)); #else /* it's ok if round does not comply with IEEE754 standard; the tests should allow +/-1 difference when the tested functions use round */ @@ -278,7 +340,10 @@ CV_INLINE int cvRound( int value ) /** @overload */ CV_INLINE int cvFloor( float value ) { -#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) return __builtin_floorf(value); #else int i = (int)value; @@ -295,7 +360,10 @@ CV_INLINE int cvFloor( int value ) /** @overload */ CV_INLINE int cvCeil( float value ) { -#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS +#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ + && ( \ + defined(__PPC64__) \ + ) return __builtin_ceilf(value); #else int i = (int)value;