mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 11:45:30 +08:00
Merge pull request #12121 from maver1:amatyuko/sse2_convert_with_saturation_fix
This commit is contained in:
commit
23022f3ffb
@ -494,7 +494,12 @@ void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
|
||||
inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
|
||||
{
|
||||
__m128i delta32 = _mm_set1_epi32(32768);
|
||||
__m128i r = _mm_packs_epi32(_mm_sub_epi32(a.val, delta32), _mm_sub_epi32(b.val, delta32));
|
||||
|
||||
// preliminary saturate negative values to zero
|
||||
__m128i a1 = _mm_and_si128(a.val, _mm_cmpgt_epi32(a.val, _mm_set1_epi32(0)));
|
||||
__m128i b1 = _mm_and_si128(b.val, _mm_cmpgt_epi32(b.val, _mm_set1_epi32(0)));
|
||||
|
||||
__m128i r = _mm_packs_epi32(_mm_sub_epi32(a1, delta32), _mm_sub_epi32(b1, delta32));
|
||||
return v_uint16x8(_mm_sub_epi16(r, _mm_set1_epi16(-32768)));
|
||||
}
|
||||
|
||||
|
@ -453,9 +453,9 @@ struct Cvt_SIMD<int, uchar>
|
||||
{
|
||||
v_int32x4 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
|
||||
v_int32x4 v_src3 = v_load(src + x + cWidth * 2), v_src4 = v_load(src + x + cWidth * 3);
|
||||
v_uint16x8 v_dst1 = v_pack_u(v_src1, v_src2);
|
||||
v_uint16x8 v_dst2 = v_pack_u(v_src3, v_src4);
|
||||
v_store(dst + x, v_pack(v_dst1, v_dst2));
|
||||
v_int16x8 v_dst1 = v_pack(v_src1, v_src2);
|
||||
v_int16x8 v_dst2 = v_pack(v_src3, v_src4);
|
||||
v_store(dst + x, v_pack_u(v_dst1, v_dst2));
|
||||
}
|
||||
}
|
||||
return x;
|
||||
|
Loading…
Reference in New Issue
Block a user