mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #15122 from pmur:fast-math-improvements
This commit is contained in:
commit
13ecd5bb25
@ -74,7 +74,15 @@
|
|||||||
# include "tegra_round.hpp"
|
# include "tegra_round.hpp"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
|
#if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__)
|
||||||
|
# include <altivec.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
||||||
|
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
||||||
|
#define CV_INLINE_ROUND_DBL(value) TEGRA_ROUND_DBL(value);
|
||||||
|
#define CV_INLINE_ROUND_FLT(value) TEGRA_ROUND_FLT(value);
|
||||||
|
#elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
|
||||||
// 1. general scheme
|
// 1. general scheme
|
||||||
#define ARM_ROUND(_value, _asm_string) \
|
#define ARM_ROUND(_value, _asm_string) \
|
||||||
int res; \
|
int res; \
|
||||||
@ -84,12 +92,55 @@
|
|||||||
return res
|
return res
|
||||||
// 2. version for double
|
// 2. version for double
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
#define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
|
#define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
|
||||||
#else
|
#else
|
||||||
#define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
|
#define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
|
||||||
#endif
|
#endif
|
||||||
// 3. version for float
|
// 3. version for float
|
||||||
#define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
#define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
|
||||||
|
#elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 && !defined (__CUDACC__)
|
||||||
|
// P8 and newer machines can convert fp32/64 to int quickly.
|
||||||
|
#define CV_INLINE_ROUND_DBL(value) \
|
||||||
|
int out; \
|
||||||
|
double temp; \
|
||||||
|
__asm__( "fctiw %[temp],%[in]\n\tmffprwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
|
||||||
|
return out;
|
||||||
|
|
||||||
|
// FP32 also works with FP64 routine above
|
||||||
|
#define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
|
||||||
|
|
||||||
|
#ifdef _ARCH_PWR9
|
||||||
|
#define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
|
||||||
|
#define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
|
||||||
|
#define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
|
||||||
|
#define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
|
||||||
|
#endif
|
||||||
|
#elif defined CV_ICC || defined __GNUC__
|
||||||
|
#define CV_INLINE_ROUND_DBL(value) return (int)(lrint(value));
|
||||||
|
#define CV_INLINE_ROUND_FLT(value) return (int)(lrintf(value));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined __PPC64__ && !defined OPENCV_USE_FASTMATH_GCC_BUILTINS
|
||||||
|
/* Let GCC inline C math functions when available. Dedicated hardware is available to
|
||||||
|
round and covert FP values. */
|
||||||
|
#define OPENCV_USE_FASTMATH_GCC_BUILTINS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Enable GCC builtin math functions if possible, desired, and available.
|
||||||
|
Note, not all math functions inline equally. E.g lrint will not inline
|
||||||
|
without the -fno-math-errno option. */
|
||||||
|
#if defined OPENCV_USE_FASTMATH_GCC_BUILTINS && defined __GNUC__ && !defined __clang__ && !defined (__CUDACC__)
|
||||||
|
#define _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Allow overrides for some functions which may benefit from tuning. Likewise,
|
||||||
|
note that isinf is not used as the return value is signed. */
|
||||||
|
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_DBL
|
||||||
|
#define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS && !defined CV_INLINE_ISNAN_FLT
|
||||||
|
#define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** @brief Rounds floating-point number to the nearest integer
|
/** @brief Rounds floating-point number to the nearest integer
|
||||||
@ -112,15 +163,8 @@ cvRound( double value )
|
|||||||
fistp t;
|
fistp t;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
#elif defined CV_INLINE_ROUND_DBL
|
||||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
CV_INLINE_ROUND_DBL(value);
|
||||||
TEGRA_ROUND_DBL(value);
|
|
||||||
#elif defined CV_ICC || defined __GNUC__
|
|
||||||
# if defined ARM_ROUND_DBL
|
|
||||||
ARM_ROUND_DBL(value);
|
|
||||||
# else
|
|
||||||
return (int)lrint(value);
|
|
||||||
# endif
|
|
||||||
#else
|
#else
|
||||||
/* it's ok if round does not comply with IEEE754 standard;
|
/* it's ok if round does not comply with IEEE754 standard;
|
||||||
the tests should allow +/-1 difference when the tested functions use round */
|
the tests should allow +/-1 difference when the tested functions use round */
|
||||||
@ -138,8 +182,12 @@ cvRound( double value )
|
|||||||
*/
|
*/
|
||||||
CV_INLINE int cvFloor( double value )
|
CV_INLINE int cvFloor( double value )
|
||||||
{
|
{
|
||||||
|
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||||
|
return __builtin_floor(value);
|
||||||
|
#else
|
||||||
int i = (int)value;
|
int i = (int)value;
|
||||||
return i - (i > value);
|
return i - (i > value);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
|
/** @brief Rounds floating-point number to the nearest integer not smaller than the original.
|
||||||
@ -151,8 +199,12 @@ CV_INLINE int cvFloor( double value )
|
|||||||
*/
|
*/
|
||||||
CV_INLINE int cvCeil( double value )
|
CV_INLINE int cvCeil( double value )
|
||||||
{
|
{
|
||||||
|
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||||
|
return __builtin_ceil(value);
|
||||||
|
#else
|
||||||
int i = (int)value;
|
int i = (int)value;
|
||||||
return i + (i < value);
|
return i + (i < value);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Determines if the argument is Not A Number.
|
/** @brief Determines if the argument is Not A Number.
|
||||||
@ -163,10 +215,14 @@ CV_INLINE int cvCeil( double value )
|
|||||||
otherwise. */
|
otherwise. */
|
||||||
CV_INLINE int cvIsNaN( double value )
|
CV_INLINE int cvIsNaN( double value )
|
||||||
{
|
{
|
||||||
|
#if defined CV_INLINE_ISNAN_DBL
|
||||||
|
CV_INLINE_ISNAN_DBL(value);
|
||||||
|
#else
|
||||||
Cv64suf ieee754;
|
Cv64suf ieee754;
|
||||||
ieee754.f = value;
|
ieee754.f = value;
|
||||||
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
|
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
|
||||||
((unsigned)ieee754.u != 0) > 0x7ff00000;
|
((unsigned)ieee754.u != 0) > 0x7ff00000;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @brief Determines if the argument is Infinity.
|
/** @brief Determines if the argument is Infinity.
|
||||||
@ -177,10 +233,14 @@ CV_INLINE int cvIsNaN( double value )
|
|||||||
and 0 otherwise. */
|
and 0 otherwise. */
|
||||||
CV_INLINE int cvIsInf( double value )
|
CV_INLINE int cvIsInf( double value )
|
||||||
{
|
{
|
||||||
|
#if defined CV_INLINE_ISINF_DBL
|
||||||
|
CV_INLINE_ISINF_DBL(value);
|
||||||
|
#else
|
||||||
Cv64suf ieee754;
|
Cv64suf ieee754;
|
||||||
ieee754.f = value;
|
ieee754.f = value;
|
||||||
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
|
return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
|
||||||
(unsigned)ieee754.u == 0;
|
(unsigned)ieee754.u == 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
@ -200,15 +260,8 @@ CV_INLINE int cvRound(float value)
|
|||||||
fistp t;
|
fistp t;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
#elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
|
#elif defined CV_INLINE_ROUND_FLT
|
||||||
defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
|
CV_INLINE_ROUND_FLT(value);
|
||||||
TEGRA_ROUND_FLT(value);
|
|
||||||
#elif defined CV_ICC || defined __GNUC__
|
|
||||||
# if defined ARM_ROUND_FLT
|
|
||||||
ARM_ROUND_FLT(value);
|
|
||||||
# else
|
|
||||||
return (int)lrintf(value);
|
|
||||||
# endif
|
|
||||||
#else
|
#else
|
||||||
/* it's ok if round does not comply with IEEE754 standard;
|
/* it's ok if round does not comply with IEEE754 standard;
|
||||||
the tests should allow +/-1 difference when the tested functions use round */
|
the tests should allow +/-1 difference when the tested functions use round */
|
||||||
@ -225,8 +278,12 @@ CV_INLINE int cvRound( int value )
|
|||||||
/** @overload */
|
/** @overload */
|
||||||
CV_INLINE int cvFloor( float value )
|
CV_INLINE int cvFloor( float value )
|
||||||
{
|
{
|
||||||
|
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||||
|
return __builtin_floorf(value);
|
||||||
|
#else
|
||||||
int i = (int)value;
|
int i = (int)value;
|
||||||
return i - (i > value);
|
return i - (i > value);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @overload */
|
/** @overload */
|
||||||
@ -238,8 +295,12 @@ CV_INLINE int cvFloor( int value )
|
|||||||
/** @overload */
|
/** @overload */
|
||||||
CV_INLINE int cvCeil( float value )
|
CV_INLINE int cvCeil( float value )
|
||||||
{
|
{
|
||||||
|
#if defined _OPENCV_FASTMATH_ENABLE_GCC_MATH_BUILTINS
|
||||||
|
return __builtin_ceilf(value);
|
||||||
|
#else
|
||||||
int i = (int)value;
|
int i = (int)value;
|
||||||
return i + (i < value);
|
return i + (i < value);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @overload */
|
/** @overload */
|
||||||
@ -251,17 +312,25 @@ CV_INLINE int cvCeil( int value )
|
|||||||
/** @overload */
|
/** @overload */
|
||||||
CV_INLINE int cvIsNaN( float value )
|
CV_INLINE int cvIsNaN( float value )
|
||||||
{
|
{
|
||||||
|
#if defined CV_INLINE_ISNAN_FLT
|
||||||
|
CV_INLINE_ISNAN_FLT(value);
|
||||||
|
#else
|
||||||
Cv32suf ieee754;
|
Cv32suf ieee754;
|
||||||
ieee754.f = value;
|
ieee754.f = value;
|
||||||
return (ieee754.u & 0x7fffffff) > 0x7f800000;
|
return (ieee754.u & 0x7fffffff) > 0x7f800000;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @overload */
|
/** @overload */
|
||||||
CV_INLINE int cvIsInf( float value )
|
CV_INLINE int cvIsInf( float value )
|
||||||
{
|
{
|
||||||
|
#if defined CV_INLINE_ISINF_FLT
|
||||||
|
CV_INLINE_ISINF_FLT(value);
|
||||||
|
#else
|
||||||
Cv32suf ieee754;
|
Cv32suf ieee754;
|
||||||
ieee754.f = value;
|
ieee754.f = value;
|
||||||
return (ieee754.u & 0x7fffffff) == 0x7f800000;
|
return (ieee754.u & 0x7fffffff) == 0x7f800000;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // __cplusplus
|
#endif // __cplusplus
|
||||||
|
@ -4,42 +4,52 @@ namespace opencv_test
|
|||||||
{
|
{
|
||||||
using namespace perf;
|
using namespace perf;
|
||||||
|
|
||||||
template <typename T>
|
#define DECL_ROUND_TEST(NAME, OP, EXTRA) \
|
||||||
static void CvRoundMat(const cv::Mat & src, cv::Mat & dst)
|
template <typename T> \
|
||||||
{
|
static void OP ## Mat(const cv::Mat & src, cv::Mat & dst) \
|
||||||
for (int y = 0; y < dst.rows; ++y)
|
{ \
|
||||||
{
|
for (int y = 0; y < dst.rows; ++y) \
|
||||||
const T * sptr = src.ptr<T>(y);
|
{ \
|
||||||
int * dptr = dst.ptr<int>(y);
|
const T * sptr = src.ptr<T>(y); \
|
||||||
|
int * dptr = dst.ptr<int>(y); \
|
||||||
for (int x = 0; x < dst.cols; ++x)
|
\
|
||||||
dptr[x] = cvRound(sptr[x]);
|
for (int x = 0; x < dst.cols; ++x) \
|
||||||
}
|
dptr[x] = OP(sptr[x]) EXTRA; \
|
||||||
}
|
} \
|
||||||
|
} \
|
||||||
PERF_TEST_P(Size_MatType, CvRound_Float,
|
\
|
||||||
testing::Combine(testing::Values(TYPICAL_MAT_SIZES),
|
PERF_TEST_P(Size_MatType, CvRound_Float ## NAME, \
|
||||||
testing::Values(CV_32FC1, CV_64FC1)))
|
testing::Combine(testing::Values(TYPICAL_MAT_SIZES), \
|
||||||
{
|
testing::Values(CV_32FC1, CV_64FC1))) \
|
||||||
Size size = get<0>(GetParam());
|
{ \
|
||||||
int type = get<1>(GetParam()), depth = CV_MAT_DEPTH(type);
|
Size size = get<0>(GetParam()); \
|
||||||
|
int type = get<1>(GetParam()), depth = CV_MAT_DEPTH(type); \
|
||||||
cv::Mat src(size, type), dst(size, CV_32SC1);
|
\
|
||||||
|
cv::Mat src(size, type), dst(size, CV_32SC1); \
|
||||||
declare.in(src, WARMUP_RNG).out(dst);
|
\
|
||||||
|
declare.in(src, WARMUP_RNG).out(dst); \
|
||||||
if (depth == CV_32F)
|
\
|
||||||
{
|
if (depth == CV_32F) \
|
||||||
TEST_CYCLE()
|
{ \
|
||||||
CvRoundMat<float>(src, dst);
|
TEST_CYCLE() \
|
||||||
}
|
OP ## Mat<float>(src, dst); \
|
||||||
else if (depth == CV_64F)
|
} \
|
||||||
{
|
else if (depth == CV_64F) \
|
||||||
TEST_CYCLE()
|
{ \
|
||||||
CvRoundMat<double>(src, dst);
|
TEST_CYCLE() \
|
||||||
|
OP ## Mat<double>(src, dst); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
SANITY_CHECK_NOTHING(); \
|
||||||
}
|
}
|
||||||
|
|
||||||
SANITY_CHECK_NOTHING();
|
DECL_ROUND_TEST(,cvRound,)
|
||||||
}
|
DECL_ROUND_TEST(_Ceil,cvCeil,)
|
||||||
|
DECL_ROUND_TEST(_Floor,cvFloor,)
|
||||||
|
|
||||||
|
/* For FP classification tests, try to test them in way which uses
|
||||||
|
branching logic and avoids extra FP logic. */
|
||||||
|
DECL_ROUND_TEST(_NaN,cvIsNaN, ? 1 : 2)
|
||||||
|
DECL_ROUND_TEST(_Inf,cvIsInf, ? 1 : 2)
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -3923,5 +3923,59 @@ TEST(Core_SoftFloat, CvRound)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static void checkRounding(T in, int outCeil, int outFloor)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(outCeil,cvCeil(in));
|
||||||
|
EXPECT_EQ(outFloor,cvFloor(in));
|
||||||
|
|
||||||
|
/* cvRound is not expected to be IEEE compliant. The implementation
|
||||||
|
should round to one of the above. */
|
||||||
|
EXPECT_TRUE((cvRound(in) == outCeil) || (cvRound(in) == outFloor));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Core_FastMath, InlineRoundingOps)
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
double in;
|
||||||
|
int outCeil;
|
||||||
|
int outFloor;
|
||||||
|
} values[] =
|
||||||
|
{
|
||||||
|
// Values are chosen to convert to binary float 32/64 exactly
|
||||||
|
{ 1.0, 1, 1 },
|
||||||
|
{ 1.5, 2, 1 },
|
||||||
|
{ -1.5, -1, -2}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int i = 0, maxi = sizeof(values) / sizeof(values[0]); i < maxi; i++)
|
||||||
|
{
|
||||||
|
checkRounding<double>(values[i].in, values[i].outCeil, values[i].outFloor);
|
||||||
|
checkRounding<float>((float)values[i].in, values[i].outCeil, values[i].outFloor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Core_FastMath, InlineNaN)
|
||||||
|
{
|
||||||
|
EXPECT_EQ( cvIsNaN((float) NAN), 1);
|
||||||
|
EXPECT_EQ( cvIsNaN((float) -NAN), 1);
|
||||||
|
EXPECT_EQ( cvIsNaN(0.0f), 0);
|
||||||
|
EXPECT_EQ( cvIsNaN((double) NAN), 1);
|
||||||
|
EXPECT_EQ( cvIsNaN((double) -NAN), 1);
|
||||||
|
EXPECT_EQ( cvIsNaN(0.0), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Core_FastMath, InlineIsInf)
|
||||||
|
{
|
||||||
|
// Assume HUGE_VAL is infinity. Strictly speaking, may not always be true.
|
||||||
|
EXPECT_EQ( cvIsInf((float) HUGE_VAL), 1);
|
||||||
|
EXPECT_EQ( cvIsInf((float) -HUGE_VAL), 1);
|
||||||
|
EXPECT_EQ( cvIsInf(0.0f), 0);
|
||||||
|
EXPECT_EQ( cvIsInf((double) HUGE_VAL), 1);
|
||||||
|
EXPECT_EQ( cvIsInf((double) -HUGE_VAL), 1);
|
||||||
|
EXPECT_EQ( cvIsInf(0.0), 0);
|
||||||
|
}
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
/* End of file. */
|
/* End of file. */
|
||||||
|
Loading…
Reference in New Issue
Block a user