mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
SymmRowSmallVec_8u32s 1x3 asymmetric
NEON speedup: 1.95x Auto-vect speedup: 1.17x Test kernel: [-2, 0, 2]
This commit is contained in:
parent
969a218057
commit
2e7b9a2c0f
@ -2370,9 +2370,32 @@ struct SymmRowSmallVec_8u32s
|
|||||||
vst1q_s32((int32_t *)(dst + i), vmovl_s16(vget_low_s16(y0)));
|
vst1q_s32((int32_t *)(dst + i), vmovl_s16(vget_low_s16(y0)));
|
||||||
vst1q_s32((int32_t *)(dst + i + 4), vmovl_s16(vget_high_s16(y0)));
|
vst1q_s32((int32_t *)(dst + i + 4), vmovl_s16(vget_high_s16(y0)));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return 0;
|
int32x4_t k32 = vdupq_n_s32(0);
|
||||||
|
k32 = vld1q_lane_s32(kx + 1, k32, 1);
|
||||||
|
|
||||||
|
int16x4_t k = vqmovn_s32(k32);
|
||||||
|
|
||||||
|
uint8x8_t z = vdup_n_u8(0);
|
||||||
|
|
||||||
|
for( ; i <= width - 8; i += 8, src += 8 )
|
||||||
|
{
|
||||||
|
uint8x8_t x0, x1;
|
||||||
|
x0 = vld1_u8( (uint8_t *) (src - cn) );
|
||||||
|
x1 = vld1_u8( (uint8_t *) (src + cn) );
|
||||||
|
|
||||||
|
int16x8_t y0;
|
||||||
|
int32x4_t y1, y2;
|
||||||
|
y0 = vsubq_s16(vreinterpretq_s16_u16(vaddl_u8(x1, z)),
|
||||||
|
vreinterpretq_s16_u16(vaddl_u8(x0, z)));
|
||||||
|
y1 = vmull_lane_s16(vget_low_s16(y0), k, 1);
|
||||||
|
y2 = vmull_lane_s16(vget_high_s16(y0), k, 1);
|
||||||
|
|
||||||
|
vst1q_s32((int32_t *)(dst + i), y1);
|
||||||
|
vst1q_s32((int32_t *)(dst + i + 4), y2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( _ksize == 5 )
|
else if( _ksize == 5 )
|
||||||
|
Loading…
Reference in New Issue
Block a user