mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
improve MomentsInTile_SIMD<uchar, int, int>()
This commit is contained in:
parent
b34272f8a2
commit
065d631f0e
@ -227,16 +227,15 @@ struct MomentsInTile_SIMD<uchar, int, int>
|
||||
|
||||
if( useSIMD )
|
||||
{
|
||||
__m128i qx_init = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
__m128i dx = _mm_set1_epi16(8);
|
||||
__m128i z = _mm_setzero_si128(), qx0 = z, qx1 = z, qx2 = z, qx3 = z, qx = qx_init;
|
||||
__m128i z = _mm_setzero_si128(), qx0 = z, qx1 = z, qx2 = z, qx3 = z, qx = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
|
||||
for( ; x <= len - 8; x += 8 )
|
||||
{
|
||||
__m128i p = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr + x)), z);
|
||||
__m128i sx = _mm_mullo_epi16(qx, qx);
|
||||
|
||||
qx0 = _mm_add_epi32(qx0, _mm_sad_epu8(p, z));
|
||||
qx0 = _mm_add_epi16(qx0, p);
|
||||
qx1 = _mm_add_epi32(qx1, _mm_madd_epi16(p, qx));
|
||||
qx2 = _mm_add_epi32(qx2, _mm_madd_epi16(p, sx));
|
||||
qx3 = _mm_add_epi32(qx3, _mm_madd_epi16( _mm_mullo_epi16(p, qx), sx));
|
||||
@ -244,14 +243,21 @@ struct MomentsInTile_SIMD<uchar, int, int>
|
||||
qx = _mm_add_epi16(qx, dx);
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i*)buf, qx0);
|
||||
x0 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
_mm_store_si128((__m128i*)buf, qx1);
|
||||
x1 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
_mm_store_si128((__m128i*)buf, qx2);
|
||||
x2 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
_mm_store_si128((__m128i*)buf, qx3);
|
||||
x3 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
__m128i qx01_lo = _mm_unpacklo_epi32(qx0, qx1);
|
||||
__m128i qx23_lo = _mm_unpacklo_epi32(qx2, qx3);
|
||||
__m128i qx01_hi = _mm_unpackhi_epi32(qx0, qx1);
|
||||
__m128i qx23_hi = _mm_unpackhi_epi32(qx2, qx3);
|
||||
qx01_lo = _mm_add_epi32(qx01_lo, qx01_hi);
|
||||
qx23_lo = _mm_add_epi32(qx23_lo, qx23_hi);
|
||||
__m128i qx0123_lo = _mm_unpacklo_epi64(qx01_lo, qx23_lo);
|
||||
__m128i qx0123_hi = _mm_unpackhi_epi64(qx01_lo, qx23_lo);
|
||||
qx0123_lo = _mm_add_epi32(qx0123_lo, qx0123_hi);
|
||||
_mm_store_si128((__m128i*)buf, qx0123_lo);
|
||||
|
||||
x0 = (buf[0] & 0xffff) + (buf[0] >> 16);
|
||||
x1 = buf[1];
|
||||
x2 = buf[2];
|
||||
x3 = buf[3];
|
||||
}
|
||||
|
||||
return x;
|
||||
|
Loading…
Reference in New Issue
Block a user