* cleaned cvRound(), cvFloor() and cvCeil() implementations, removed the old non-banking rounding branch completely

* enable the use of GCC/clang __builtin_*() functions more broadly
This commit is contained in:
Vadim Pisarevsky 2022-06-24 14:58:32 +03:00
parent a6ca48a1c2
commit b5adffd5c2

View File

@ -128,12 +128,8 @@
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value) #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
#endif #endif
#if !defined(OPENCV_USE_FASTMATH_BUILTINS) \ #if !defined(OPENCV_USE_FASTMATH_BUILTINS) && \
&& ( \ (defined __GNUC__ || defined __clang__ || defined _MSC_VER)
defined(__x86_64__) || defined(__i686__) \
|| defined(__arm__) \
|| defined(__PPC64__) \
)
/* Let builtin C math functions when available. Dedicated hardware is available to /* Let builtin C math functions when available. Dedicated hardware is available to
round and convert FP values. */ round and convert FP values. */
#define OPENCV_USE_FASTMATH_BUILTINS 1 #define OPENCV_USE_FASTMATH_BUILTINS 1
@ -201,9 +197,7 @@ cvRound( double value )
{ {
#if defined CV_INLINE_ROUND_DBL #if defined CV_INLINE_ROUND_DBL
CV_INLINE_ROUND_DBL(value); CV_INLINE_ROUND_DBL(value);
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ #elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__)
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
&& !defined(__CUDACC__)
__m128d t = _mm_set_sd( value ); __m128d t = _mm_set_sd( value );
return _mm_cvtsd_si32(t); return _mm_cvtsd_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
@ -214,12 +208,11 @@ cvRound( double value )
fistp t; fistp t;
} }
return t; return t;
#elif defined CV_ICC || defined __GNUC__ #elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
return (int)(lrint(value)); defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
return (int)__builtin_lrint(value);
#else #else
/* it's ok if round does not comply with IEEE754 standard; return (int)lrint(value);
the tests should allow +/-1 difference when the tested functions use round */
return (int)(value + (value >= 0 ? 0.5 : -0.5));
#endif #endif
} }
@ -233,11 +226,9 @@ cvRound( double value )
*/ */
CV_INLINE int cvFloor( double value ) CV_INLINE int cvFloor( double value )
{ {
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
&& ( \ defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
defined(__PPC64__) \ return (int)__builtin_floor(value);
)
return __builtin_floor(value);
#else #else
int i = (int)value; int i = (int)value;
return i - (i > value); return i - (i > value);
@ -253,11 +244,9 @@ CV_INLINE int cvFloor( double value )
*/ */
CV_INLINE int cvCeil( double value ) CV_INLINE int cvCeil( double value )
{ {
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
&& ( \ defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
defined(__PPC64__) \ return (int)__builtin_ceil(value);
)
return __builtin_ceil(value);
#else #else
int i = (int)value; int i = (int)value;
return i + (i < value); return i + (i < value);
@ -312,9 +301,7 @@ CV_INLINE int cvRound(float value)
{ {
#if defined CV_INLINE_ROUND_FLT #if defined CV_INLINE_ROUND_FLT
CV_INLINE_ROUND_FLT(value); CV_INLINE_ROUND_FLT(value);
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ #elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__)
&& defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
&& !defined(__CUDACC__)
__m128 t = _mm_set_ss( value ); __m128 t = _mm_set_ss( value );
return _mm_cvtss_si32(t); return _mm_cvtss_si32(t);
#elif defined _MSC_VER && defined _M_IX86 #elif defined _MSC_VER && defined _M_IX86
@ -325,12 +312,11 @@ CV_INLINE int cvRound(float value)
fistp t; fistp t;
} }
return t; return t;
#elif defined CV_ICC || defined __GNUC__ #elif defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
return (int)(lrintf(value)); defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
return (int)__builtin_lrintf(value);
#else #else
/* it's ok if round does not comply with IEEE754 standard; return (int)lrintf(value);
the tests should allow +/-1 difference when the tested functions use round */
return (int)(value + (value >= 0 ? 0.5f : -0.5f));
#endif #endif
} }
@ -343,11 +329,9 @@ CV_INLINE int cvRound( int value )
/** @overload */ /** @overload */
CV_INLINE int cvFloor( float value ) CV_INLINE int cvFloor( float value )
{ {
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
&& ( \ defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
defined(__PPC64__) \ return (int)__builtin_floorf(value);
)
return __builtin_floorf(value);
#else #else
int i = (int)value; int i = (int)value;
return i - (i > value); return i - (i > value);
@ -363,11 +347,9 @@ CV_INLINE int cvFloor( int value )
/** @overload */ /** @overload */
CV_INLINE int cvCeil( float value ) CV_INLINE int cvCeil( float value )
{ {
#if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \ #if defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || \
&& ( \ defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
defined(__PPC64__) \ return (int)__builtin_ceilf(value);
)
return __builtin_ceilf(value);
#else #else
int i = (int)value; int i = (int)value;
return i + (i < value); return i + (i < value);