mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
Fixed v_reduce_sad intrinsics implementation and added tests
This commit is contained in:
parent
5c0a98cfb6
commit
18d10d6b86
@ -1141,12 +1141,16 @@ inline v_float32x8 v_reduce_sum4(const v_float32x8& a, const v_float32x8& b,
|
||||
|
||||
inline unsigned v_reduce_sad(const v_uint8x32& a, const v_uint8x32& b)
|
||||
{
|
||||
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(a.val, b.val));
|
||||
__m256i half = _mm256_sad_epu8(a.val, b.val);
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x32& a, const v_int8x32& b)
|
||||
{
|
||||
__m256i half = _mm256_set1_epi8(0x7f);
|
||||
return (unsigned)_v_cvtsi256_si32(_mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half)));
|
||||
half = _mm256_sad_epu8(_mm256_add_epi8(a.val, half), _mm256_add_epi8(b.val, half));
|
||||
__m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x16& a, const v_uint16x16& b)
|
||||
{
|
||||
|
@ -1486,13 +1486,14 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_float32x4, float, min, std::min)
|
||||
|
||||
inline unsigned v_reduce_sad(const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(a.val, b.val));
|
||||
__m128i half = _mm_sad_epu8(a.val, b.val);
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_int8x16& a, const v_int8x16& b)
|
||||
{
|
||||
__m128i half = _mm_set1_epi8(0x7f);
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_sad_epu8(_mm_add_epi8(a.val, half),
|
||||
_mm_add_epi8(b.val, half)));
|
||||
half = _mm_sad_epu8(_mm_add_epi8(a.val, half), _mm_add_epi8(b.val, half));
|
||||
return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half)));
|
||||
}
|
||||
inline unsigned v_reduce_sad(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
|
@ -770,6 +770,15 @@ template<typename R> struct TheTest
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_reduce_sad()
|
||||
{
|
||||
Data<R> dataA, dataB(R::nlanes/2);
|
||||
R a = dataA;
|
||||
R b = dataB;
|
||||
EXPECT_EQ((unsigned)(R::nlanes*R::nlanes/4), v_reduce_sad(a, b));
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_mask()
|
||||
{
|
||||
typedef typename V_RegTraits<R>::int_reg int_reg;
|
||||
@ -1320,6 +1329,7 @@ void test_hal_intrin_uint8()
|
||||
.test_logic()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
@ -1358,6 +1368,7 @@ void test_hal_intrin_int8()
|
||||
.test_absdiff()
|
||||
.test_absdiffs()
|
||||
.test_abs()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||
@ -1387,6 +1398,7 @@ void test_hal_intrin_uint16()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
@ -1418,6 +1430,7 @@ void test_hal_intrin_int16()
|
||||
.test_absdiffs()
|
||||
.test_abs()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||
@ -1446,6 +1459,7 @@ void test_hal_intrin_uint32()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_popcount()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
@ -1473,6 +1487,7 @@ void test_hal_intrin_int32()
|
||||
.test_min_max()
|
||||
.test_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||
.test_unpack()
|
||||
@ -1528,6 +1543,7 @@ void test_hal_intrin_float32()
|
||||
.test_min_max()
|
||||
.test_float_absdiff()
|
||||
.test_reduce()
|
||||
.test_reduce_sad()
|
||||
.test_mask()
|
||||
.test_unpack()
|
||||
.test_float_math()
|
||||
|
Loading…
Reference in New Issue
Block a user