From f0fb91f2d42dba32fde8dc19c94bf4decddc4b39 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Thu, 23 May 2019 13:45:14 +0300 Subject: [PATCH] Fixed v_signmask implementation for AVX2, updated universal intrinsics tests. --- modules/core/include/opencv2/core/hal/intrin_avx.hpp | 10 +++------- modules/core/test/test_intrin_utils.hpp | 9 ++++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 91e4483444..cd7490bb0d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -1227,18 +1227,14 @@ inline int v_signmask(const v_uint8x32& a) { return v_signmask(v_reinterpret_as_s8(a)); } inline int v_signmask(const v_int16x16& a) -{ - v_int8x32 v = v_int8x32(_mm256_packs_epi16(a.val, a.val)); - return v_signmask(v) & 255; -} +{ return v_signmask(v_pack(a, a)) & 0xFFFF; } inline int v_signmask(const v_uint16x16& a) { return v_signmask(v_reinterpret_as_s16(a)); } inline int v_signmask(const v_int32x8& a) { - __m256i a16 = _mm256_packs_epi32(a.val, a.val); - v_int8x32 v = v_int8x32(_mm256_packs_epi16(a16, a16)); - return v_signmask(v) & 15; + v_int16x16 a16 = v_pack(a, a); + return v_signmask(v_pack(a16, a16)) & 0xFF; } inline int v_signmask(const v_uint32x8& a) { return v_signmask(v_reinterpret_as_s32(a)); } diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 6ead0ecc60..3cd1145985 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -32,8 +32,7 @@ template <> struct initializer<64> return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15], d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31], d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47], - d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[50], d[51], d[52], d[53], - d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]); + d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]); } }; @@ -660,7 +659,7 @@ template struct TheTest { SCOPED_TRACE(cv::format("i=%d", i)); EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); - EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); + EXPECT_COMPARE_EQ((float)(1/std::sqrt(dataA[i])), (float)resC[i]); EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]); } @@ -808,8 +807,8 @@ template struct TheTest dataC *= (LaneType)-1; R a = dataA, b = dataB, c = dataC, d = dataD, e = dataE; - int m = v_signmask(a); - EXPECT_EQ(2, m); + EXPECT_EQ(2, v_signmask(a)); + EXPECT_EQ(2 | (1 << (R::nlanes / 2)) | (1 << (R::nlanes - 1)), v_signmask(b)); EXPECT_EQ(false, v_check_all(a)); EXPECT_EQ(false, v_check_all(b));