Merge pull request #23973 from mshabunin:riscv-unaligned-access

RISC-V: fix unaligned loads and stores
This commit is contained in:
Alexander Smorkalov 2023-07-12 14:51:56 +03:00 committed by GitHub
commit 85f0074f23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 16 deletions

View File

@ -603,10 +603,10 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
{
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
{
v_int32 t0 = vx_load((int*)(src0 + i));
v_int32 t1 = vx_load((int*)(src1 + i));
v_store((int*)(dst0 + i), t1);
v_store((int*)(dst1 + i), t0);
v_int32 t0 = v_reinterpret_as_s32(vx_load(src0 + i));
v_int32 t1 = v_reinterpret_as_s32(vx_load(src1 + i));
v_store(dst0 + i, v_reinterpret_as_u8(t1));
v_store(dst1 + i, v_reinterpret_as_u8(t0));
}
}
#if CV_STRONG_ALIGNMENT

View File

@ -184,9 +184,9 @@ public:
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 )
{
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = ((r0 << 8) >> 7) + ((r2 << 8) >> 7);
v_uint16x8 b0 = v_rotate_right<1>(b1) + b1;
@ -265,9 +265,9 @@ public:
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 )
{
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo);
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
@ -398,9 +398,9 @@ public:
for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 )
{
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = (r0 & masklo) + (r2 & masklo);
v_uint16x8 nextb1 = v_rotate_right<1>(b1);
@ -494,9 +494,9 @@ public:
B G B G | B G B G | B G B G | B G B G
*/
v_uint16x8 r0 = v_load((ushort*)bayer);
v_uint16x8 r1 = v_load((ushort*)(bayer+bayer_step));
v_uint16x8 r2 = v_load((ushort*)(bayer+bayer_step*2));
v_uint16x8 r0 = v_reinterpret_as_u16(v_load(bayer));
v_uint16x8 r1 = v_reinterpret_as_u16(v_load(bayer+bayer_step));
v_uint16x8 r2 = v_reinterpret_as_u16(v_load(bayer+bayer_step*2));
v_uint16x8 b1 = (r0 & masklow) + (r2 & masklow);
v_uint16x8 nextb1 = v_rotate_right<1>(b1);