From fe59a5695f9afd9cbf02fd20a1551ed0d4dfeac8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 27 Feb 2023 03:17:46 +0000 Subject: [PATCH] core(simd): 64-bit integer EQ/NE without misused 64F guard --- .../include/opencv2/core/hal/intrin_cpp.hpp | 8 +- .../include/opencv2/core/hal/intrin_neon.hpp | 54 +++++++++---- modules/core/test/test_intrin_utils.hpp | 78 +++++++++++-------- 3 files changed, 86 insertions(+), 54 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 46222140e6..9a97376898 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -879,14 +879,10 @@ OPENCV_HAL_IMPL_CMP_OP(<=) For all types except 64-bit integer values. */ OPENCV_HAL_IMPL_CMP_OP(>=) -/** @brief Equal comparison - -For all types except 64-bit integer values. */ +/** @brief Equal comparison */ OPENCV_HAL_IMPL_CMP_OP(==) -/** @brief Not equal comparison - -For all types except 64-bit integer values. */ +/** @brief Not equal comparison */ OPENCV_HAL_IMPL_CMP_OP(!=) template diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 5792694a40..3897cee12b 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1038,18 +1038,6 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64) #endif -#if CV_SIMD128_64F -inline int64x2_t vmvnq_s64(int64x2_t a) -{ - int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF)); - return veorq_s64(a, vx); -} -inline uint64x2_t vmvnq_u64(uint64x2_t a) -{ - uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); - return veorq_u64(a, vx); -} -#endif #define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ { return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \ @@ -1071,9 +1059,47 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) +#if defined(__aarch64__) || defined(_M_ARM64) +static inline uint64x2_t vmvnq_u64(uint64x2_t a) +{ + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_u64(a, vx); +} +//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) +//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) +static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b) +{ return v_uint64x2(vceqq_u64(a.val, b.val)); } +static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b) +{ return v_uint64x2(vmvnq_u64(vceqq_u64(a.val, b.val))); } +static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b) +{ return v_int64x2(vreinterpretq_s64_u64(vceqq_s64(a.val, b.val))); } +static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b) +{ return v_int64x2(vreinterpretq_s64_u64(vmvnq_u64(vceqq_s64(a.val, b.val)))); } +#else +static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b) +{ + uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val)); + uint32x4_t swapped = vrev64q_u32(cmp); + return v_uint64x2(vreinterpretq_u64_u32(vandq_u32(cmp, swapped))); +} +static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b) +{ + uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val)); + uint32x4_t swapped = vrev64q_u32(cmp); + uint64x2_t v_eq = vreinterpretq_u64_u32(vandq_u32(cmp, swapped)); + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return v_uint64x2(veorq_u64(v_eq, vx)); +} +static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b) +{ + return v_reinterpret_as_s64(v_reinterpret_as_u64(a) == v_reinterpret_as_u64(b)); +} +static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b) +{ + return v_reinterpret_as_s64(v_reinterpret_as_u64(a) != v_reinterpret_as_u64(b)); +} +#endif #if CV_SIMD128_64F -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64) #endif diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 3f196f1342..da1f26790c 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -97,7 +97,7 @@ template struct Data { *this = r; } - operator R () + operator R () const { return initializer().init(*this); } @@ -1559,11 +1559,34 @@ template struct TheTest } #endif -#if CV_SIMD_64F + void do_check_cmp64(const Data& dataA, const Data& dataB) + { + R a = dataA; + R b = dataB; + + Data dataEQ = (a == b); + Data dataNE = (a != b); + + for (int i = 0; i < R::nlanes; ++i) + { + SCOPED_TRACE(cv::format("i=%d", i)); + if (cvtest::debugLevel > 0) cout << "i=" << i << " ( " << dataA[i] << " vs " << dataB[i] << " ): eq=" << dataEQ[i] << " ne=" << dataNE[i] << endl; + EXPECT_NE((LaneType)dataEQ[i], (LaneType)dataNE[i]); + if (dataA[i] == dataB[i]) + EXPECT_EQ((LaneType)-1, (LaneType)dataEQ[i]); + else + EXPECT_EQ((LaneType)0, (LaneType)dataEQ[i]); + if (dataA[i] != dataB[i]) + EXPECT_EQ((LaneType)-1, (LaneType)dataNE[i]); + else + EXPECT_EQ((LaneType)0, (LaneType)dataNE[i]); + } + } + TheTest & test_cmp64() { - Data dataA, dataB; - R a = dataA, b = dataB; + Data dataA; + Data dataB; for (int i = 0; i < R::nlanes; ++i) { @@ -1571,37 +1594,25 @@ template struct TheTest } dataA[0]++; - a = dataA, b = dataB; + do_check_cmp64(dataA, dataB); + do_check_cmp64(dataB, dataA); - Data resC = (a == b); - Data resD = (a != b); + dataA[0] = dataB[0]; + dataA[1] += (((LaneType)1) << 32); + do_check_cmp64(dataA, dataB); + do_check_cmp64(dataB, dataA); - for (int i = 0; i < R::nlanes; ++i) - { - SCOPED_TRACE(cv::format("i=%d", i)); - EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); - EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); - } + dataA[0] = (LaneType)-1; + dataB[0] = (LaneType)-1; + dataA[1] = (LaneType)-1; + dataB[1] = (LaneType)2; - for (int i = 0; i < R::nlanes; ++i) - { - dataA[i] = dataB[i] = (LaneType)-1; - } + do_check_cmp64(dataA, dataB); + do_check_cmp64(dataB, dataA); - a = dataA, b = dataB; - - resC = (a == b); - resD = (a != b); - - for (int i = 0; i < R::nlanes; ++i) - { - SCOPED_TRACE(cv::format("i=%d", i)); - EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); - EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); - } return *this; } -#endif + }; @@ -1837,9 +1848,8 @@ void test_hal_intrin_uint64() TheTest() .test_loadstore() .test_addsub() -#if CV_SIMD_64F .test_cmp64() -#endif + //.test_cmp() - not declared as supported .test_shift<1>().test_shift<8>() .test_logic() .test_reverse() @@ -1857,9 +1867,8 @@ void test_hal_intrin_int64() TheTest() .test_loadstore() .test_addsub() -#if CV_SIMD_64F .test_cmp64() -#endif + //.test_cmp() - not declared as supported .test_shift<1>().test_shift<8>() .test_logic() .test_reverse() @@ -1936,7 +1945,8 @@ void test_hal_intrin_float64() .test_rotate<2>().test_rotate<3>() #endif ; - +#else + std::cout << "SKIP: CV_SIMD_64F is not available" << std::endl; #endif }