mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 06:53:50 +08:00
Merge pull request #8911 from alalek:fix_vsum4
This commit is contained in:
commit
cf86f88c71
@ -1129,9 +1129,15 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_s
|
||||
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
const v_float32x4& c, const v_float32x4& d)
|
||||
{
|
||||
#if CV_SSE3
|
||||
__m128 ab = _mm_hadd_ps(a.val, b.val);
|
||||
__m128 cd = _mm_hadd_ps(c.val, d.val);
|
||||
return v_float32x4(_mm_hadd_ps(ab, cd));
|
||||
#else
|
||||
__m128 ac = _mm_add_ps(_mm_unpacklo_ps(a.val, c.val), _mm_unpackhi_ps(a.val, c.val));
|
||||
__m128 bd = _mm_add_ps(_mm_unpacklo_ps(b.val, d.val), _mm_unpackhi_ps(b.val, d.val));
|
||||
return v_float32x4(_mm_add_ps(_mm_unpacklo_ps(ac, bd), _mm_unpackhi_ps(ac, bd)));
|
||||
#endif
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_REDUCE_OP_4(v_uint32x4, unsigned, max, std::max)
|
||||
|
@ -741,6 +741,23 @@ template<typename R> struct TheTest
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_reduce_sum4()
|
||||
{
|
||||
R a(0.1f, 0.02f, 0.003f, 0.0004f);
|
||||
R b(1, 20, 300, 4000);
|
||||
R c(10, 2, 0.3f, 0.04f);
|
||||
R d(1, 2, 3, 4);
|
||||
|
||||
R sum = v_reduce_sum4(a, b, c, d);
|
||||
|
||||
Data<R> res = sum;
|
||||
EXPECT_EQ(0.1234f, res[0]);
|
||||
EXPECT_EQ(4321.0f, res[1]);
|
||||
EXPECT_EQ(12.34f, res[2]);
|
||||
EXPECT_EQ(10.0f, res[3]);
|
||||
return *this;
|
||||
}
|
||||
|
||||
TheTest & test_loadstore_fp16()
|
||||
{
|
||||
#if CV_FP16 && CV_SIMD128
|
||||
@ -986,6 +1003,7 @@ TEST(hal_intrin, float32x4) {
|
||||
.test_float_cvt64()
|
||||
.test_matmul()
|
||||
.test_transpose()
|
||||
.test_reduce_sum4()
|
||||
;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user