Updated v_popcount description, reference implementation and test.

This commit is contained in:
Vitaly Tuzov 2019-04-03 11:45:38 +03:00
parent 96ab78dc4f
commit 1220dd4877
3 changed files with 24 additions and 25 deletions

View File

@ -603,27 +603,20 @@ static const unsigned char popCountTable[] =
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
}; };
/** @brief Count the 1 bits in the vector and return 4 values /** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Scheme: Scheme:
@code @code
{A1 A2 A3 ...} => popcount(A1) {A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}
@endcode @endcode
Any types but result will be in v_uint32x4*/ For all integer types. */
template<typename _Tp, int n> inline v_uint32x4 v_popcount(const v_reg<_Tp, n>& a) template<typename _Tp, int n>
inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_popcount(const v_reg<_Tp, n>& a)
{ {
v_uint8x16 b; v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b = v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
b = v_reinterpret_as_u8(a); for( int i = 0; i < n*sizeof(_Tp); i++ )
for( int i = 0; i < v_uint8x16::nlanes; i++ ) b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
{ return b;
b.s[i] = popCountTable[b.s[i]];
}
v_uint32x4 c;
for( int i = 0; i < v_uint32x4::nlanes; i++ )
{
c.s[i] = b.s[i*4] + b.s[i*4+1] + b.s[i*4+2] + b.s[i*4+3];
}
return c;
} }

View File

@ -75,7 +75,7 @@ int normHamming(const uchar* a, int n)
v_uint64 t = vx_setzero_u64(); v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); t += v_popcount(v_reinterpret_as_u64(vx_load(a + i)));
result += v_reduce_sum(t); result += (int)v_reduce_sum(t);
} }
#endif // CV_SIMD #endif // CV_SIMD
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED
@ -144,7 +144,7 @@ int normHamming(const uchar* a, const uchar* b, int n)
v_uint64 t = vx_setzero_u64(); v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i)));
result += v_reduce_sum(t); result += (int)v_reduce_sum(t);
} }
#endif // CV_SIMD #endif // CV_SIMD
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED

View File

@ -686,18 +686,24 @@ template<typename R> struct TheTest
TheTest & test_popcount() TheTest & test_popcount()
{ {
typedef typename V_RegTraits<R>::u_reg Ru;
static unsigned popcountTable[] = { static unsigned popcountTable[] = {
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, //0x00-0x0f
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x10-0x1f
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x20-0x2f
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x30-0x3f
176, 181, 186, 192, 193 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x40-0x4f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x50-0x5f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x60-0x6f
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, //0x70-0x7f
1 //0x80
}; };
Data<R> dataA; Data<R> dataA;
R a = dataA; R a = dataA;
unsigned resB = (unsigned)v_reduce_sum(v_popcount(a)); Data<Ru> resB = v_popcount(a);
EXPECT_EQ(popcountTable[R::nlanes], resB); for (int i = 0; i < Ru::nlanes; ++i)
EXPECT_EQ(popcountTable[i + 1], resB[i]);
return *this; return *this;
} }