From 3cce299a789d3d504146ea73dd7f5e6dec474dd4 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Mon, 17 Jul 2023 14:40:57 +0200 Subject: [PATCH] Use intrinsics for `cvRound` on x86 and x86_64 `__GNUC__` (clang/gcc linux) too. We've measured a 7x improvement in speed for `cvRound` using the intrinsic. --- modules/core/include/opencv2/core/fast_math.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp index 47a2948222..9ec984d7aa 100644 --- a/modules/core/include/opencv2/core/fast_math.hpp +++ b/modules/core/include/opencv2/core/fast_math.hpp @@ -201,7 +201,7 @@ cvRound( double value ) { #if defined CV_INLINE_ROUND_DBL CV_INLINE_ROUND_DBL(value); -#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__) +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__) __m128d t = _mm_set_sd( value ); return _mm_cvtsd_si32(t); #elif defined _MSC_VER && defined _M_IX86 @@ -323,7 +323,7 @@ CV_INLINE int cvRound(float value) { #if defined CV_INLINE_ROUND_FLT CV_INLINE_ROUND_FLT(value); -#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__) +#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__) __m128 t = _mm_set_ss( value ); return _mm_cvtss_si32(t); #elif defined _MSC_VER && defined _M_IX86