mirror of
https://github.com/opencv/opencv.git
synced 2024-11-30 14:29:49 +08:00
Merge pull request #15370 from alalek:core_fastmath_hpp_update
This commit is contained in:
commit
601096f360
@ -47,12 +47,6 @@
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
@ -70,19 +64,24 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
# include "tegra_round.hpp"
|
||||
#endif
|
||||
#if defined(__CUDACC__)
|
||||
// nothing, intrinsics/asm code is not supported
|
||||
#else
|
||||
#if ((defined _MSC_VER && defined _M_X64) \
|
||||
|| (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
|
||||
&& !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__)
|
||||
# include <altivec.h>
|
||||
#endif
|
||||
#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
|
||||
&& !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
|
||||
#include <altivec.h>
|
||||
#endif
|
||||
|
||||
#if ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||
#define CV_INLINE_ROUND_DBL(value) TEGRA_ROUND_DBL(value);
|
||||
#define CV_INLINE_ROUND_FLT(value) TEGRA_ROUND_FLT(value);
|
||||
#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
|
||||
#if defined(CV_INLINE_ROUND_FLT)
|
||||
// user-specified version
|
||||
// CV_INLINE_ROUND_DBL should be defined too
|
||||
#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
|
||||
// 1. general scheme
|
||||
#define ARM_ROUND(_value, _asm_string) \
|
||||
int res; \
|
||||
@ -98,7 +97,7 @@
|
||||
#endif
|
||||
// 3. version for float
|
||||
#define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
||||
#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__)
|
||||
#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
|
||||
// P8 and newer machines can convert fp32/64 to int quickly.
|
||||
#define CV_INLINE_ROUND_DBL(value) \
|
||||
int out; \
|
||||
@ -108,40 +107,86 @@
|
||||
|
||||
// FP32 also works with FP64 routine above
|
||||
#define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR9
|
||||
#define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
|
||||
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
|
||||
#define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
|
||||
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
|
||||
#ifdef CV_INLINE_ISINF_FLT
|
||||
// user-specified version
|
||||
// CV_INLINE_ISINF_DBL should be defined too
|
||||
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
|
||||
#define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
|
||||
#define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
|
||||
#endif
|
||||
|
||||
#ifdef CV_INLINE_ISNAN_FLT
|
||||
// user-specified version
|
||||
// CV_INLINE_ISNAN_DBL should be defined too
|
||||
#elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
|
||||
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
|
||||
#endif
|
||||
|
||||
#if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__x86_64__) || defined(__i686__) \
|
||||
|| defined(__arm__) \
|
||||
|| defined(__PPC64__) \
|
||||
)
|
||||
/* Let builtin C math functions when available. Dedicated hardware is available to
|
||||
round and convert FP values. */
|
||||
#define OPENCV_USE_FASTMATH_BUILTINS 1
|
||||
#endif
|
||||
|
||||
/* Enable builtin math functions if possible, desired, and available.
|
||||
Note, not all math functions inline equally. E.g lrint will not inline
|
||||
without the -fno-math-errno option. */
|
||||
#if defined(CV_ICC)
|
||||
// nothing
|
||||
#elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
|
||||
#if defined(__clang__)
|
||||
#define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
|
||||
#if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
|
||||
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
|
||||
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
|
||||
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#if !defined(CV_INLINE_ISNAN_DBL)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISNAN_FLT)
|
||||
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_DBL)
|
||||
#define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_FLT)
|
||||
#define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if !defined(CV_INLINE_ISNAN_DBL)
|
||||
#define CV_INLINE_ISNAN_DBL(value) return isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISNAN_FLT)
|
||||
#define CV_INLINE_ISNAN_FLT(value) return isnan(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_DBL)
|
||||
#define CV_INLINE_ISINF_DBL(value) return isinf(value);
|
||||
#endif
|
||||
#if !defined(CV_INLINE_ISINF_FLT)
|
||||
#define CV_INLINE_ISINF_FLT(value) return isinf(value);
|
||||
#endif
|
||||
#endif
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
#define CV_INLINE_ROUND_DBL(value) return (int)(lrint(value));
|
||||
#define CV_INLINE_ROUND_FLT(value) return (int)(lrintf(value));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined __PPC64__ && !defined OPENCV_USE_FASTMATH_GCC_BUILTINS
|
||||
/* Let GCC inline C math functions when available. Dedicated hardware is available to
|
||||
round and covert FP values. */
|
||||
#define OPENCV_USE_FASTMATH_GCC_BUILTINS
|
||||
#endif
|
||||
|
||||
/* Enable GCC builtin math functions if possible, desired, and available.
|
||||
Note, not all math functions inline equally. E.g lrint will not inline
|
||||
without the -fno-math-errno option. */
|
||||
#if defined OPENCV_USE_FASTMATH_GCC_BUILTINS && defined __GNUC__ && !defined __clang__ && !defined (__CUDACC__)
|
||||
#define _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#endif
|
||||
|
||||
/* Allow overrides for some functions which may benefit from tuning. Likewise,
|
||||
note that isinf is not used as the return value is signed. */
|
||||
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_DBL
|
||||
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||
#endif
|
||||
|
||||
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_FLT
|
||||
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
|
||||
#endif
|
||||
#endif // defined(__CUDACC__)
|
||||
|
||||
/** @brief Rounds floating-point number to the nearest integer
|
||||
|
||||
@ -151,8 +196,11 @@
|
||||
CV_INLINE int
|
||||
cvRound( double value )
|
||||
{
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
|
||||
#if defined CV_INLINE_ROUND_DBL
|
||||
CV_INLINE_ROUND_DBL(value);
|
||||
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
|
||||
&& !defined(__CUDACC__)
|
||||
__m128d t = _mm_set_sd( value );
|
||||
return _mm_cvtsd_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
@ -163,8 +211,8 @@ cvRound( double value )
|
||||
fistp t;
|
||||
}
|
||||
return t;
|
||||
#elif defined CV_INLINE_ROUND_DBL
|
||||
CV_INLINE_ROUND_DBL(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
return (int)(lrint(value));
|
||||
#else
|
||||
/* it's ok if round does not comply with IEEE754 standard;
|
||||
the tests should allow +/-1 difference when the tested functions use round */
|
||||
@ -182,7 +230,10 @@ cvRound( double value )
|
||||
*/
|
||||
CV_INLINE int cvFloor( double value )
|
||||
{
|
||||
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_floor(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
@ -199,7 +250,10 @@ CV_INLINE int cvFloor( double value )
|
||||
*/
|
||||
CV_INLINE int cvCeil( double value )
|
||||
{
|
||||
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_ceil(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
@ -235,6 +289,11 @@ CV_INLINE int cvIsInf( double value )
|
||||
{
|
||||
#if defined CV_INLINE_ISINF_DBL
|
||||
CV_INLINE_ISINF_DBL(value);
|
||||
#elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(__PPC64__)
|
||||
Cv64suf ieee754;
|
||||
ieee754.f = value;
|
||||
return (ieee754.u & 0x7fffffff00000000) ==
|
||||
0x7ff0000000000000;
|
||||
#else
|
||||
Cv64suf ieee754;
|
||||
ieee754.f = value;
|
||||
@ -248,8 +307,11 @@ CV_INLINE int cvIsInf( double value )
|
||||
/** @overload */
|
||||
CV_INLINE int cvRound(float value)
|
||||
{
|
||||
#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
|
||||
#if defined CV_INLINE_ROUND_FLT
|
||||
CV_INLINE_ROUND_FLT(value);
|
||||
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
|
||||
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
|
||||
&& !defined(__CUDACC__)
|
||||
__m128 t = _mm_set_ss( value );
|
||||
return _mm_cvtss_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
@ -260,8 +322,8 @@ CV_INLINE int cvRound(float value)
|
||||
fistp t;
|
||||
}
|
||||
return t;
|
||||
#elif defined CV_INLINE_ROUND_FLT
|
||||
CV_INLINE_ROUND_FLT(value);
|
||||
#elif defined CV_ICC || defined __GNUC__
|
||||
return (int)(lrintf(value));
|
||||
#else
|
||||
/* it's ok if round does not comply with IEEE754 standard;
|
||||
the tests should allow +/-1 difference when the tested functions use round */
|
||||
@ -278,7 +340,10 @@ CV_INLINE int cvRound( int value )
|
||||
/** @overload */
|
||||
CV_INLINE int cvFloor( float value )
|
||||
{
|
||||
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_floorf(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
@ -295,7 +360,10 @@ CV_INLINE int cvFloor( int value )
|
||||
/** @overload */
|
||||
CV_INLINE int cvCeil( float value )
|
||||
{
|
||||
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
|
||||
&& ( \
|
||||
defined(__PPC64__) \
|
||||
)
|
||||
return __builtin_ceilf(value);
|
||||
#else
|
||||
int i = (int)value;
|
||||
|
Loading…
Reference in New Issue
Block a user