mirror of
https://github.com/opencv/opencv.git
synced 2025-06-11 20:09:23 +08:00
Fix for incorrect calcOpticalFlowPyrLK result evaluation with ARM NEON
This commit is contained in:
parent
44bda8fbb7
commit
d8dc6caf09
@ -294,7 +294,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
|
||||
#if CV_NEON
|
||||
|
||||
int CV_DECL_ALIGNED(16) nA11[] = {0, 0, 0, 0}, nA12[] = {0, 0, 0, 0}, nA22[] = {0, 0, 0, 0};
|
||||
float CV_DECL_ALIGNED(16) nA11[] = { 0, 0, 0, 0 }, nA12[] = { 0, 0, 0, 0 }, nA22[] = { 0, 0, 0, 0 };
|
||||
const int shifter1 = -(W_BITS - 5); //negative so it shifts right
|
||||
const int shifter2 = -(W_BITS);
|
||||
|
||||
@ -406,19 +406,19 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
q6 = vaddq_s32(q6, q8);
|
||||
|
||||
q7 = vmull_s16(d4d5.val[0], d28);
|
||||
int32x4_t nq0 = vmull_s16(d4d5.val[1], d28);
|
||||
int32x4_t q14 = vmull_s16(d4d5.val[1], d28);
|
||||
q8 = vmull_s16(d6d7.val[0], d29);
|
||||
int32x4_t q15 = vmull_s16(d6d7.val[1], d29);
|
||||
|
||||
q7 = vaddq_s32(q7, q8);
|
||||
nq0 = vaddq_s32(nq0, q15);
|
||||
q14 = vaddq_s32(q14, q15);
|
||||
|
||||
q4 = vaddq_s32(q4, q7);
|
||||
q6 = vaddq_s32(q6, nq0);
|
||||
q6 = vaddq_s32(q6, q14);
|
||||
|
||||
int32x4_t nq1 = vld1q_s32(nA12);
|
||||
int32x4_t nq2 = vld1q_s32(nA22);
|
||||
nq0 = vld1q_s32(nA11);
|
||||
float32x4_t nq0 = vld1q_f32(nA11);
|
||||
float32x4_t nq1 = vld1q_f32(nA12);
|
||||
float32x4_t nq2 = vld1q_f32(nA22);
|
||||
|
||||
q4 = vqrshlq_s32(q4, q12);
|
||||
q6 = vqrshlq_s32(q6, q12);
|
||||
@ -427,13 +427,13 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
q8 = vmulq_s32(q4, q6);
|
||||
q15 = vmulq_s32(q6, q6);
|
||||
|
||||
nq0 = vaddq_s32(nq0, q7);
|
||||
nq1 = vaddq_s32(nq1, q8);
|
||||
nq2 = vaddq_s32(nq2, q15);
|
||||
nq0 = vaddq_f32(nq0, vreinterpretq_f32_s32(q7));
|
||||
nq1 = vaddq_f32(nq1, vreinterpretq_f32_s32(q8));
|
||||
nq2 = vaddq_f32(nq2, vreinterpretq_f32_s32(q15));
|
||||
|
||||
vst1q_s32(nA11, nq0);
|
||||
vst1q_s32(nA12, nq1);
|
||||
vst1q_s32(nA22, nq2);
|
||||
vst1q_f32(nA11, nq0);
|
||||
vst1q_f32(nA12, nq1);
|
||||
vst1q_f32(nA22, nq2);
|
||||
|
||||
int16x4_t d8 = vmovn_s32(q4);
|
||||
int16x4_t d12 = vmovn_s32(q6);
|
||||
@ -474,9 +474,9 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
#endif
|
||||
|
||||
#if CV_NEON
|
||||
iA11 += (float)(nA11[0] + nA11[1] + nA11[2] + nA11[3]);
|
||||
iA12 += (float)(nA12[0] + nA12[1] + nA12[2] + nA12[3]);
|
||||
iA22 += (float)(nA22[0] + nA22[1] + nA22[2] + nA22[3]);
|
||||
iA11 += nA11[0] + nA11[1] + nA11[2] + nA11[3];
|
||||
iA12 += nA12[0] + nA12[1] + nA12[2] + nA12[3];
|
||||
iA22 += nA22[0] + nA22[1] + nA22[2] + nA22[3];
|
||||
#endif
|
||||
|
||||
A11 = iA11*FLT_SCALE;
|
||||
@ -530,7 +530,7 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
#endif
|
||||
|
||||
#if CV_NEON
|
||||
int CV_DECL_ALIGNED(16) nB1[] = {0,0,0,0}, nB2[] = {0,0,0,0};
|
||||
float CV_DECL_ALIGNED(16) nB1[] = { 0,0,0,0 }, nB2[] = { 0,0,0,0 };
|
||||
|
||||
const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00);
|
||||
const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01);
|
||||
@ -625,8 +625,8 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
nq5 = vqrshlq_s32(nq5, q11);
|
||||
|
||||
int16x8x2_t q0q1 = vld2q_s16(dIptr);
|
||||
nq11 = vld1q_s32(nB1);
|
||||
int32x4_t nq15 = vld1q_s32(nB2);
|
||||
float32x4_t nB1v = vld1q_f32(nB1);
|
||||
float32x4_t nB2v = vld1q_f32(nB2);
|
||||
|
||||
nq4 = vsubq_s32(nq4, nq6);
|
||||
nq5 = vsubq_s32(nq5, nq8);
|
||||
@ -646,11 +646,11 @@ void cv::detail::LKTrackerInvoker::operator()(const Range& range) const
|
||||
nq9 = vaddq_s32(nq9, nq10);
|
||||
nq4 = vaddq_s32(nq4, nq5);
|
||||
|
||||
nq11 = vaddq_s32(nq11, nq9);
|
||||
nq15 = vaddq_s32(nq15, nq4);
|
||||
nB1v = vaddq_f32(nB1v, vreinterpretq_f32_s32(nq9));
|
||||
nB2v = vaddq_f32(nB2v, vreinterpretq_f32_s32(nq4));
|
||||
|
||||
vst1q_s32(nB1, nq11);
|
||||
vst1q_s32(nB2, nq15);
|
||||
vst1q_f32(nB1, nB1v);
|
||||
vst1q_f32(nB2, nB2v);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user