From 2610a47c897cf168aebe33c80883eae38022e353 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 8 Nov 2017 17:37:29 +0800 Subject: [PATCH] core:ppc Fix 2 interleave logic errors in vsx_utils.hpp When elements are 64 bits, the vec_st_interleave()/vec_ld_deinterleave() doesn't interleave 4 elements correctly. For vec_st_interleave(), following is saved into mem: a0 b0 a1 b1 c0 d0 c1 d1 -> we expected: a0 b0 c0 d0 a1 b1 c1 d1 for vec_ld_deinterleave(), following is loaded into a b c d for memory string { 1 2 3 4 5 6 7 8 }: a: 1 3 b: 2 4 c: 5 7 d: 6 8 -> we expected: a: 1 5 b: 2 6 c: 3 7 d: 4 8 This patch corrects this behavior. Signed-off-by: Simon Guo --- modules/core/include/opencv2/core/vsx_utils.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index 3ce190b9b6..592d996662 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -797,8 +797,8 @@ FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \ const Tvec& c, const Tvec& d, Tp* ptr) \ { \ st_func(vec_mergeh(a, b), 0, ptr); \ - st_func(vec_mergel(a, b), 2, ptr); \ - st_func(vec_mergeh(c, d), 4, ptr); \ + st_func(vec_mergeh(c, d), 2, ptr); \ + st_func(vec_mergel(a, b), 4, ptr); \ st_func(vec_mergel(c, d), 6, ptr); \ } \ FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \ @@ -813,12 +813,12 @@ FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \ { \ Tvec v0 = ld_func(0, ptr); \ Tvec v1 = ld_func(2, ptr); \ - a = vec_mergeh(v0, v1); \ - b = vec_mergel(v0, v1); \ - v0 = ld_func(4, ptr); \ - v1 = ld_func(6, ptr); \ - c = vec_mergeh(v0, v1); \ - d = vec_mergel(v0, v1); \ + Tvec v2 = ld_func(4, ptr); \ + Tvec v3 = ld_func(6, ptr); \ + a = vec_mergeh(v0, v2); \ + b = vec_mergel(v0, v2); \ + c = vec_mergeh(v1, v3); \ + d = vec_mergel(v1, v3); \ } VSX_IMPL_ST_D_INTERLEAVE_64(int64, vec_dword2, vsx_ld2, vsx_st2) VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2)