mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 05:06:29 +08:00
Use intrinsics for cvRound
on x86 and x86_64 __GNUC__
(clang/gcc linux) too.
We've measured a 7x improvement in speed for `cvRound` using the intrinsic.
This commit is contained in:
parent
0052d46b8e
commit
3cce299a78
@ -201,7 +201,7 @@ cvRound( double value )
|
||||
{
|
||||
#if defined CV_INLINE_ROUND_DBL
|
||||
CV_INLINE_ROUND_DBL(value);
|
||||
#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__)
|
||||
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
|
||||
__m128d t = _mm_set_sd( value );
|
||||
return _mm_cvtsd_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
@ -323,7 +323,7 @@ CV_INLINE int cvRound(float value)
|
||||
{
|
||||
#if defined CV_INLINE_ROUND_FLT
|
||||
CV_INLINE_ROUND_FLT(value);
|
||||
#elif (defined _MSC_VER && defined _M_X64) && !defined(__CUDACC__)
|
||||
#elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __SSE2__)) && !defined(__CUDACC__)
|
||||
__m128 t = _mm_set_ss( value );
|
||||
return _mm_cvtss_si32(t);
|
||||
#elif defined _MSC_VER && defined _M_IX86
|
||||
|
Loading…
Reference in New Issue
Block a user