mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
core:ppc Fix 2 interleave logic errors in vsx_utils.hpp
When elements are 64 bits, the vec_st_interleave()/vec_ld_deinterleave() doesn't interleave 4 elements correctly. For vec_st_interleave(), following is saved into mem: a0 b0 a1 b1 c0 d0 c1 d1 -> we expected: a0 b0 c0 d0 a1 b1 c1 d1 for vec_ld_deinterleave(), following is loaded into a b c d for memory string { 1 2 3 4 5 6 7 8 }: a: 1 3 b: 2 4 c: 5 7 d: 6 8 -> we expected: a: 1 5 b: 2 6 c: 3 7 d: 4 8 This patch corrects this behavior. Signed-off-by: Simon Guo <wei.guo.simon@gmail.com>
This commit is contained in:
parent
e89501a3ad
commit
2610a47c89
@ -797,8 +797,8 @@ FORCE_INLINE(void) vec_st_interleave(const Tvec& a, const Tvec& b, \
|
||||
const Tvec& c, const Tvec& d, Tp* ptr) \
|
||||
{ \
|
||||
st_func(vec_mergeh(a, b), 0, ptr); \
|
||||
st_func(vec_mergel(a, b), 2, ptr); \
|
||||
st_func(vec_mergeh(c, d), 4, ptr); \
|
||||
st_func(vec_mergeh(c, d), 2, ptr); \
|
||||
st_func(vec_mergel(a, b), 4, ptr); \
|
||||
st_func(vec_mergel(c, d), 6, ptr); \
|
||||
} \
|
||||
FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b) \
|
||||
@ -813,12 +813,12 @@ FORCE_INLINE(void) vec_ld_deinterleave(const Tp* ptr, Tvec& a, Tvec& b, \
|
||||
{ \
|
||||
Tvec v0 = ld_func(0, ptr); \
|
||||
Tvec v1 = ld_func(2, ptr); \
|
||||
a = vec_mergeh(v0, v1); \
|
||||
b = vec_mergel(v0, v1); \
|
||||
v0 = ld_func(4, ptr); \
|
||||
v1 = ld_func(6, ptr); \
|
||||
c = vec_mergeh(v0, v1); \
|
||||
d = vec_mergel(v0, v1); \
|
||||
Tvec v2 = ld_func(4, ptr); \
|
||||
Tvec v3 = ld_func(6, ptr); \
|
||||
a = vec_mergeh(v0, v2); \
|
||||
b = vec_mergel(v0, v2); \
|
||||
c = vec_mergeh(v1, v3); \
|
||||
d = vec_mergel(v1, v3); \
|
||||
}
|
||||
VSX_IMPL_ST_D_INTERLEAVE_64(int64, vec_dword2, vsx_ld2, vsx_st2)
|
||||
VSX_IMPL_ST_D_INTERLEAVE_64(uint64, vec_udword2, vsx_ld2, vsx_st2)
|
||||
|
Loading…
Reference in New Issue
Block a user