Merge pull request #15738 from ChipKerchner:bugInt64x2Comparison

Fixing bug with comparison of v_int64x2 or v_uint64x2 * Casting v_uint64x2 to v_float64x2 and comparing does NOT work in all cases. Rewrite using epi64 instructions - faster too. * Fix bad merge. * Fix equal comparsion for non-SSE4.1. Add test cases for v_int64x2 comparisons. * Try to fix merge conflict. * Only test v_int64x2 comparisons if CV_SIMD_64F * Fix compiler warning.
2025-07-25 22:57:53 +08:00 · 2019-10-22 09:37:20 -04:00 · 2019-10-22 09:37:20 -04:00 · 5a6a49405d
commit 5a6a49405d
parent 1864b64f64
2 changed files with 64 additions and 5 deletions
--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@ -1220,14 +1220,23 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
 OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
 OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
-#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \
+#if CV_SSE4_1
 #define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
-{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \
+{ return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \
 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
-{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); }
+{ return ~(a == b); }
 #else
 #define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
 { __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \
  return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \
 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
 { return ~(a == b); }
 #endif
-OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64)
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2)
-OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64)
+OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2)
 inline v_float32x4 v_not_nan(const v_float32x4& a)
 { return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -1442,6 +1442,50 @@ template<typename R> struct TheTest
        return *this;
    }
 #endif
 #if CV_SIMD_64F
    TheTest & test_cmp64()
    {
        Data<R> dataA, dataB;
        R a = dataA, b = dataB;
        for (int i = 0; i < R::nlanes; ++i)
        {
            dataA[i] = dataB[i];
        }
        dataA[0]++;
        a = dataA, b = dataB;
        Data<R> resC = (a == b);
        Data<R> resD = (a != b);
        for (int i = 0; i < R::nlanes; ++i)
        {
            SCOPED_TRACE(cv::format("i=%d", i));
            EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
            EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
        }
        for (int i = 0; i < R::nlanes; ++i)
        {
            dataA[i] = dataB[i] = (LaneType)-1;
        }
        a = dataA, b = dataB;
        resC = (a == b);
        resD = (a != b);
        for (int i = 0; i < R::nlanes; ++i)
        {
            SCOPED_TRACE(cv::format("i=%d", i));
            EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
            EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
        }
        return *this;
    }
 #endif
 };
@ -1657,6 +1701,9 @@ void test_hal_intrin_uint64()
    TheTest<v_uint64>()
        .test_loadstore()
        .test_addsub()
 #if CV_SIMD_64F
        .test_cmp64()
 #endif
        .test_shift<1>().test_shift<8>()
        .test_logic()
        .test_reverse()
@ -1671,6 +1718,9 @@ void test_hal_intrin_int64()
    TheTest<v_int64>()
        .test_loadstore()
        .test_addsub()
 #if CV_SIMD_64F
        .test_cmp64()
 #endif
        .test_shift<1>().test_shift<8>()
        .test_logic()
        .test_reverse()