Merge pull request #8911 from alalek:fix_vsum4

This commit is contained in:
Alexander Alekhin 2017-06-14 12:00:58 +00:00
commit cf86f88c71
2 changed files with 24 additions and 0 deletions

View File

@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, const v_float32x4& d)
{
#if CV_SSE3
__m128 ab = _mm_hadd_ps(a.val, b.val);
__m128 cd = _mm_hadd_ps(c.val, d.val);
return v_float32x4(_mm_hadd_ps(ab, cd));
#else
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
#endif
}
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)

View File

@ -741,6 +741,23 @@ template<typename R> struct TheTest
return *this;
}
TheTest & test_reduce_sum4()
{
R a(0.1f, 0.02f, 0.003f, 0.0004f);
R b(1, 20, 300, 4000);
R c(10, 2, 0.3f, 0.04f);
R d(1, 2, 3, 4);
R sum = v_reduce_sum4(a, b, c, d);
Data<R> res = sum;
EXPECT_EQ(0.1234f, res[0]);
EXPECT_EQ(4321.0f, res[1]);
EXPECT_EQ(12.34f, res[2]);
EXPECT_EQ(10.0f, res[3]);
return *this;
}
TheTest & test_loadstore_fp16()
{
#if CV_FP16 && CV_SIMD128
@ -986,6 +1003,7 @@ TEST(hal_intrin, float32x4) {
.test_float_cvt64()
.test_matmul()
.test_transpose()
.test_reduce_sum4()
;
}