fixes in VResizeLinearVec_32s8u

This commit is contained in:
Ilya Lavrenov 2014-10-12 12:40:41 -07:00
parent a4dd2162f2
commit 15af3f7114

View File

@ -917,22 +917,26 @@ struct VResizeLinearVec_32s8u
int16x8_t v_src0 = vcombine_s16(vmovn_s32(v_src00), vmovn_s32(v_src01));
int16x8_t v_src1 = vcombine_s16(vmovn_s32(v_src10), vmovn_s32(v_src11));
int16x8_t v_dst0 = vmlaq_s16(vmulq_s16(v_src0, v_b0), v_src1, v_b1);
int16x8_t v_dst0 = vaddq_s16(vshrq_n_s16(vqdmulhq_s16(v_src0, v_b0), 1),
vshrq_n_s16(vqdmulhq_s16(v_src1, v_b1), 1));
v_dst0 = vshrq_n_s16(vaddq_s16(v_dst0, v_delta), 2);
v_src00 = vshrq_n_s32(vld1q_s32(S0 + x + 8), 4), v_src10 = vshrq_n_s32(vld1q_s32(S1 + x + 8), 4);
v_src01 = vshrq_n_s32(vld1q_s32(S0 + x + 12), 4), v_src11 = vshrq_n_s32(vld1q_s32(S1 + x + 12), 4);
v_src00 = vshrq_n_s32(vld1q_s32(S0 + x + 8), 4);
v_src10 = vshrq_n_s32(vld1q_s32(S1 + x + 8), 4);
v_src01 = vshrq_n_s32(vld1q_s32(S0 + x + 12), 4);
v_src11 = vshrq_n_s32(vld1q_s32(S1 + x + 12), 4);
v_src0 = vcombine_s16(vmovn_s32(v_src00), vmovn_s32(v_src01));
v_src1 = vcombine_s16(vmovn_s32(v_src10), vmovn_s32(v_src11));
int16x8_t v_dst1 = vmlaq_s16(vmulq_s16(v_src0, v_b0), v_src1, v_b1);
int16x8_t v_dst1 = vaddq_s16(vshrq_n_s16(vqdmulhq_s16(v_src0, v_b0), 1),
vshrq_n_s16(vqdmulhq_s16(v_src1, v_b1), 1));
v_dst1 = vshrq_n_s16(vaddq_s16(v_dst1, v_delta), 2);
vst1q_u8(dst + x, vcombine_u8(vqmovun_s16(v_dst0), vqmovun_s16(v_dst1)));
}
return 0;
return x;
}
};