mirror of
https://github.com/opencv/opencv.git
synced 2025-06-19 08:54:45 +08:00
540 lines
22 KiB
C++
540 lines
22 KiB
C++
/*
|
|
* By downloading, copying, installing or using the software you agree to this license.
|
|
* If you do not agree to this license, do not download, install,
|
|
* copy or use the software.
|
|
*
|
|
*
|
|
* License Agreement
|
|
* For Open Source Computer Vision Library
|
|
* (3-clause BSD License)
|
|
*
|
|
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
|
|
* Third party copyrights are property of their respective owners.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* * Neither the names of the copyright holders nor the names of the contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* This software is provided by the copyright holders and contributors "as is" and
|
|
* any express or implied warranties, including, but not limited to, the implied
|
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
* In no event shall copyright holders or contributors be liable for any direct,
|
|
* indirect, incidental, special, exemplary, or consequential damages
|
|
* (including, but not limited to, procurement of substitute goods or services;
|
|
* loss of use, data, or profits; or business interruption) however caused
|
|
* and on any theory of liability, whether in contract, strict liability,
|
|
* or tort (including negligence or otherwise) arising in any way out of
|
|
* the use of this software, even if advised of the possibility of such damage.
|
|
*/
|
|
|
|
#include "common.hpp"
|
|
#include "saturate_cast.hpp"
|
|
#include <vector>
|
|
#include <float.h> // For FLT_EPSILON
|
|
|
|
namespace CAROTENE_NS {
|
|
|
|
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
|
|
|
/*
|
|
* Pyramidal Lucas-Kanade Optical Flow level processing
|
|
*/
|
|
void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
|
|
const u8 *prevData, ptrdiff_t prevStride,
|
|
const s16 *prevDerivData, ptrdiff_t prevDerivStride,
|
|
const u8 *nextData, ptrdiff_t nextStride,
|
|
u32 ptCount,
|
|
const f32 *prevPts, f32 *nextPts,
|
|
u8 *status, f32 *err,
|
|
const Size2D &winSize,
|
|
u32 terminationCount, f64 terminationEpsilon,
|
|
u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
|
|
f32 minEigThreshold)
|
|
{
|
|
internal::assertSupportedConfiguration();
|
|
#ifdef CAROTENE_NEON
|
|
f32 halfWinX = (winSize.width-1)*0.5f, halfWinY = (winSize.height-1)*0.5f;
|
|
s32 cn2 = cn*2;
|
|
|
|
std::vector<s16> _buf(winSize.total()*(cn + cn2));
|
|
s16* IWinBuf = &_buf[0];
|
|
s32 IWinBufStride = winSize.width*cn;
|
|
s16* derivIWinBuf = &_buf[winSize.total()*cn];
|
|
s32 derivIWinBufStride = winSize.width*cn2;
|
|
|
|
for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
|
|
{
|
|
f32 levscale = (1./(1 << level));
|
|
u32 ptref = ptidx << 1;
|
|
f32 prevPtX = prevPts[ptref+0]*levscale;
|
|
f32 prevPtY = prevPts[ptref+1]*levscale;
|
|
f32 nextPtX;
|
|
f32 nextPtY;
|
|
if( level == maxLevel )
|
|
{
|
|
if( useInitialFlow )
|
|
{
|
|
nextPtX = nextPts[ptref+0]*levscale;
|
|
nextPtY = nextPts[ptref+1]*levscale;
|
|
}
|
|
else
|
|
{
|
|
nextPtX = prevPtX;
|
|
nextPtY = prevPtY;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
nextPtX = nextPts[ptref+0]*2.f;
|
|
nextPtY = nextPts[ptref+1]*2.f;
|
|
}
|
|
nextPts[ptref+0] = nextPtX;
|
|
nextPts[ptref+1] = nextPtY;
|
|
|
|
s32 iprevPtX, iprevPtY;
|
|
s32 inextPtX, inextPtY;
|
|
prevPtX -= halfWinX;
|
|
prevPtY -= halfWinY;
|
|
iprevPtX = floor(prevPtX);
|
|
iprevPtY = floor(prevPtY);
|
|
|
|
if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
|
|
iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
|
|
{
|
|
if( level == 0 )
|
|
{
|
|
if( status )
|
|
status[ptidx] = false;
|
|
if( err )
|
|
err[ptidx] = 0;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
f32 a = prevPtX - iprevPtX;
|
|
f32 b = prevPtY - iprevPtY;
|
|
const s32 W_BITS = 14, W_BITS1 = 14;
|
|
const f32 FLT_SCALE = 1.f/(1 << 20);
|
|
s32 iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
|
|
s32 iw01 = round(a*(1.f - b)*(1 << W_BITS));
|
|
s32 iw10 = round((1.f - a)*b*(1 << W_BITS));
|
|
s32 iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
|
|
|
s32 dstep = prevDerivStride/sizeof(s16);
|
|
f32 A11 = 0, A12 = 0, A22 = 0;
|
|
|
|
int16x4_t viw00 = vmov_n_s16((s16)iw00);
|
|
int16x4_t viw01 = vmov_n_s16((s16)iw01);
|
|
int16x4_t viw10 = vmov_n_s16((s16)iw10);
|
|
int16x4_t viw11 = vmov_n_s16((s16)iw11);
|
|
|
|
float32x4_t vA11 = vmovq_n_f32(0);
|
|
float32x4_t vA12 = vmovq_n_f32(0);
|
|
float32x4_t vA22 = vmovq_n_f32(0);
|
|
|
|
s32 wwcn = winSize.width*cn;
|
|
|
|
// extract the patch from the first image, compute covariation matrix of derivatives
|
|
s32 x = 0;
|
|
for(s32 y = 0; y < (s32)winSize.height; y++ )
|
|
{
|
|
const u8* src = prevData + prevStride*(y + iprevPtY) + iprevPtX*cn;
|
|
const s16* dsrc = prevDerivData + dstep*(y + iprevPtY) + iprevPtX*cn2;
|
|
|
|
s16* Iptr = IWinBuf + y*IWinBufStride;
|
|
s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
|
|
|
|
internal::prefetch(src + x + prevStride * 2, 0);
|
|
for(x = 0; x <= wwcn - 8; x += 8)
|
|
{
|
|
uint8x8_t vsrc00 = vld1_u8(src + x);
|
|
uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
|
|
uint8x8_t vsrc01 = vld1_u8(src + x + cn);
|
|
uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
|
|
|
|
int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vsrc00));
|
|
int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vsrc10));
|
|
int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vsrc01));
|
|
int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vsrc11));
|
|
|
|
int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
|
|
int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
|
|
|
|
vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
|
|
vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
|
|
|
|
vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
|
|
vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
|
|
|
|
vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
|
|
vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
|
|
|
|
int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
|
|
int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
|
|
|
|
vst1q_s16(Iptr + x, vcombine_s16(vsumnl, vsumnh));
|
|
}
|
|
for(; x <= wwcn - 4; x += 4)
|
|
{
|
|
uint8x8_t vsrc00 = vld1_u8(src + x);
|
|
uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
|
|
uint8x8_t vsrc01 = vld1_u8(src + x + cn);
|
|
uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
|
|
|
|
int16x4_t vs00 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc00)));
|
|
int16x4_t vs10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc10)));
|
|
int16x4_t vs01 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc01)));
|
|
int16x4_t vs11 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc11)));
|
|
|
|
int32x4_t vsuml1 = vmull_s16(vs00, viw00);
|
|
int32x4_t vsuml2 = vmull_s16(vs01, viw01);
|
|
vsuml1 = vmlal_s16(vsuml1, vs10, viw10);
|
|
vsuml2 = vmlal_s16(vsuml2, vs11, viw11);
|
|
int32x4_t vsuml = vaddq_s32(vsuml1, vsuml2);
|
|
|
|
int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
|
|
|
|
vst1_s16(Iptr + x, vsumnl);
|
|
}
|
|
|
|
internal::prefetch(dsrc + dstep * 2, 0);
|
|
for(x = 0; x <= wwcn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
|
|
{
|
|
#if __GNUC_MINOR__ < 0
|
|
__asm__ (
|
|
"vld2.16 {d0-d1}, [%[dsrc00]] \n\t"
|
|
"vld2.16 {d2-d3}, [%[dsrc10]] \n\t"
|
|
"vld2.16 {d4-d5}, [%[dsrc01]] \n\t"
|
|
"vld2.16 {d6-d7}, [%[dsrc11]] \n\t"
|
|
"vmull.s16 q4, d3, %P[viw10] \n\t"
|
|
"vmull.s16 q5, d0, %P[viw00] \n\t"
|
|
"vmlal.s16 q4, d7, %P[viw11] \n\t"
|
|
"vmlal.s16 q5, d4, %P[viw01] \n\t"
|
|
"vmlal.s16 q4, d1, %P[viw00] \n\t"
|
|
"vmlal.s16 q5, d2, %P[viw10] \n\t"
|
|
"vmlal.s16 q4, d5, %P[viw01] \n\t"
|
|
"vmlal.s16 q5, d6, %P[viw11] \n\t"
|
|
"vrshrn.s32 d13, q4, %[W_BITS1] \n\t"
|
|
"vrshrn.s32 d12, q5, %[W_BITS1] \n\t"
|
|
"vmull.s16 q3, d13, d13 \n\t"
|
|
"vmull.s16 q4, d12, d12 \n\t"
|
|
"vmull.s16 q5, d13, d12 \n\t"
|
|
"vcvt.f32.s32 q3, q3 \n\t"
|
|
"vcvt.f32.s32 q4, q4 \n\t"
|
|
"vcvt.f32.s32 q5, q5 \n\t"
|
|
"vadd.f32 %q[vA22], q3 \n\t"
|
|
"vadd.f32 %q[vA11], q4 \n\t"
|
|
"vadd.f32 %q[vA12], q5 \n\t"
|
|
"vst2.16 {d12-d13}, [%[out]] \n\t"
|
|
: [vA22] "=w" (vA22),
|
|
[vA11] "=w" (vA11),
|
|
[vA12] "=w" (vA12)
|
|
: "0" (vA22),
|
|
"1" (vA11),
|
|
"2" (vA12),
|
|
[out] "r" (dIptr),
|
|
[dsrc00] "r" (dsrc),
|
|
[dsrc10] "r" (dsrc + dstep),
|
|
[dsrc01] "r" (dsrc + cn2),
|
|
[dsrc11] "r" (dsrc + dstep + cn2),
|
|
[viw00] "w" (viw00),
|
|
[viw10] "w" (viw10),
|
|
[viw01] "w" (viw01),
|
|
[viw11] "w" (viw11),
|
|
[W_BITS1] "I" (W_BITS1)
|
|
: "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13"
|
|
);
|
|
#else
|
|
int16x4x2_t vdsrc00 = vld2_s16(dsrc);
|
|
int16x4x2_t vdsrc10 = vld2_s16(dsrc + dstep);
|
|
int16x4x2_t vdsrc01 = vld2_s16(dsrc + cn2);
|
|
int16x4x2_t vdsrc11 = vld2_s16(dsrc + dstep + cn2);
|
|
|
|
int32x4_t vsumy = vmull_s16(vdsrc10.val[1], viw10);
|
|
int32x4_t vsumx = vmull_s16(vdsrc00.val[0], viw00);
|
|
|
|
vsumy = vmlal_s16(vsumy, vdsrc11.val[1], viw11);
|
|
vsumx = vmlal_s16(vsumx, vdsrc01.val[0], viw01);
|
|
|
|
vsumy = vmlal_s16(vsumy, vdsrc00.val[1], viw00);
|
|
vsumx = vmlal_s16(vsumx, vdsrc10.val[0], viw10);
|
|
|
|
vsumy = vmlal_s16(vsumy, vdsrc01.val[1], viw01);
|
|
vsumx = vmlal_s16(vsumx, vdsrc11.val[0], viw11);
|
|
|
|
int16x4_t vsumny = vrshrn_n_s32(vsumy, W_BITS1);
|
|
int16x4_t vsumnx = vrshrn_n_s32(vsumx, W_BITS1);
|
|
|
|
int32x4_t va22i = vmull_s16(vsumny, vsumny);
|
|
int32x4_t va11i = vmull_s16(vsumnx, vsumnx);
|
|
int32x4_t va12i = vmull_s16(vsumnx, vsumny);
|
|
|
|
float32x4_t va22f = vcvtq_f32_s32(va22i);
|
|
float32x4_t va11f = vcvtq_f32_s32(va11i);
|
|
float32x4_t va12f = vcvtq_f32_s32(va12i);
|
|
|
|
vA22 = vaddq_f32(vA22, va22f);
|
|
vA11 = vaddq_f32(vA11, va11f);
|
|
vA12 = vaddq_f32(vA12, va12f);
|
|
|
|
int16x4x2_t vsum;
|
|
vsum.val[0] = vsumnx;
|
|
vsum.val[1] = vsumny;
|
|
vst2_s16(dIptr, vsum);
|
|
#endif
|
|
}
|
|
|
|
for( ; x < wwcn; x++, dsrc += 2, dIptr += 2 )
|
|
{
|
|
s32 ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
|
|
src[x+prevStride]*iw10 + src[x+prevStride+cn]*iw11, W_BITS1-5);
|
|
s32 ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
|
|
dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
|
|
s32 iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
|
|
dsrc[dstep+cn2+1]*iw11, W_BITS1);
|
|
Iptr[x] = (s16)ival;
|
|
dIptr[0] = (s16)ixval;
|
|
dIptr[1] = (s16)iyval;
|
|
|
|
A11 += (f32)(ixval*ixval);
|
|
A12 += (f32)(ixval*iyval);
|
|
A22 += (f32)(iyval*iyval);
|
|
}
|
|
}
|
|
|
|
f32 A11buf[2], A12buf[2], A22buf[2];
|
|
vst1_f32(A11buf, vadd_f32(vget_low_f32(vA11), vget_high_f32(vA11)));
|
|
vst1_f32(A12buf, vadd_f32(vget_low_f32(vA12), vget_high_f32(vA12)));
|
|
vst1_f32(A22buf, vadd_f32(vget_low_f32(vA22), vget_high_f32(vA22)));
|
|
A11 += A11buf[0] + A11buf[1];
|
|
A12 += A12buf[0] + A12buf[1];
|
|
A22 += A22buf[0] + A22buf[1];
|
|
|
|
A11 *= FLT_SCALE;
|
|
A12 *= FLT_SCALE;
|
|
A22 *= FLT_SCALE;
|
|
|
|
f32 D = A11*A22 - A12*A12;
|
|
f32 minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
|
|
4.f*A12*A12))/(2*winSize.width*winSize.height);
|
|
|
|
if( err && getMinEigenVals )
|
|
err[ptidx] = (f32)minEig;
|
|
|
|
if( minEig < minEigThreshold || D < FLT_EPSILON )
|
|
{
|
|
if( level == 0 && status )
|
|
status[ptidx] = false;
|
|
continue;
|
|
}
|
|
|
|
D = 1.f/D;
|
|
|
|
nextPtX -= halfWinX;
|
|
nextPtY -= halfWinY;
|
|
f32 prevDeltaX = 0;
|
|
f32 prevDeltaY = 0;
|
|
|
|
for(u32 j = 0; j < terminationCount; j++ )
|
|
{
|
|
inextPtX = floor(nextPtX);
|
|
inextPtY = floor(nextPtY);
|
|
|
|
if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
|
|
inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
|
|
{
|
|
if( level == 0 && status )
|
|
status[ptidx] = false;
|
|
break;
|
|
}
|
|
|
|
a = nextPtX - inextPtX;
|
|
b = nextPtY - inextPtY;
|
|
iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
|
|
iw01 = round(a*(1.f - b)*(1 << W_BITS));
|
|
iw10 = round((1.f - a)*b*(1 << W_BITS));
|
|
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
|
f32 b1 = 0, b2 = 0;
|
|
|
|
viw00 = vmov_n_s16((s16)iw00);
|
|
viw01 = vmov_n_s16((s16)iw01);
|
|
viw10 = vmov_n_s16((s16)iw10);
|
|
viw11 = vmov_n_s16((s16)iw11);
|
|
|
|
float32x4_t vb1 = vmovq_n_f32(0);
|
|
float32x4_t vb2 = vmovq_n_f32(0);
|
|
|
|
for(s32 y = 0; y < (s32)winSize.height; y++ )
|
|
{
|
|
const u8* Jptr = nextData + nextStride*(y + inextPtY) + inextPtX*cn;
|
|
const s16* Iptr = IWinBuf + y*IWinBufStride;
|
|
const s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
|
|
|
|
x = 0;
|
|
|
|
internal::prefetch(Jptr, nextStride * 2);
|
|
internal::prefetch(Iptr, IWinBufStride/2);
|
|
internal::prefetch(dIptr, derivIWinBufStride/2);
|
|
|
|
for( ; x <= wwcn - 8; x += 8, dIptr += 8*2 )
|
|
{
|
|
uint8x8_t vj00 = vld1_u8(Jptr + x);
|
|
uint8x8_t vj10 = vld1_u8(Jptr + x + nextStride);
|
|
uint8x8_t vj01 = vld1_u8(Jptr + x + cn);
|
|
uint8x8_t vj11 = vld1_u8(Jptr + x + nextStride + cn);
|
|
int16x8_t vI = vld1q_s16(Iptr + x);
|
|
int16x8x2_t vDerivI = vld2q_s16(dIptr);
|
|
|
|
int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vj00));
|
|
int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vj10));
|
|
int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vj01));
|
|
int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vj11));
|
|
|
|
int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
|
|
int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
|
|
|
|
vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
|
|
vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
|
|
|
|
vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
|
|
vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
|
|
|
|
vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
|
|
vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
|
|
|
|
int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
|
|
int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
|
|
|
|
int16x8_t diff = vqsubq_s16(vcombine_s16(vsumnl, vsumnh), vI);
|
|
|
|
int32x4_t vb1l = vmull_s16(vget_low_s16(diff), vget_low_s16(vDerivI.val[0]));
|
|
int32x4_t vb2h = vmull_s16(vget_high_s16(diff), vget_high_s16(vDerivI.val[1]));
|
|
int32x4_t vb1i = vmlal_s16(vb1l, vget_high_s16(diff), vget_high_s16(vDerivI.val[0]));
|
|
int32x4_t vb2i = vmlal_s16(vb2h, vget_low_s16(diff), vget_low_s16(vDerivI.val[1]));
|
|
|
|
float32x4_t vb1f = vcvtq_f32_s32(vb1i);
|
|
float32x4_t vb2f = vcvtq_f32_s32(vb2i);
|
|
|
|
vb1 = vaddq_f32(vb1, vb1f);
|
|
vb2 = vaddq_f32(vb2, vb2f);
|
|
}
|
|
|
|
for( ; x < wwcn; x++, dIptr += 2 )
|
|
{
|
|
s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
|
|
Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
|
|
W_BITS1-5) - Iptr[x];
|
|
b1 += (f32)(diff*dIptr[0]);
|
|
b2 += (f32)(diff*dIptr[1]);
|
|
}
|
|
}
|
|
|
|
f32 bbuf[2];
|
|
float32x2_t vb = vpadd_f32(vadd_f32(vget_low_f32(vb1), vget_high_f32(vb1)), vadd_f32(vget_low_f32(vb2), vget_high_f32(vb2)));
|
|
vst1_f32(bbuf, vb);
|
|
b1 += bbuf[0];
|
|
b2 += bbuf[1];
|
|
|
|
b1 *= FLT_SCALE;
|
|
b2 *= FLT_SCALE;
|
|
|
|
f32 deltaX = (f32)((A12*b2 - A22*b1) * D);
|
|
f32 deltaY = (f32)((A12*b1 - A11*b2) * D);
|
|
|
|
nextPtX += deltaX;
|
|
nextPtY += deltaY;
|
|
nextPts[ptref+0] = nextPtX + halfWinX;
|
|
nextPts[ptref+1] = nextPtY + halfWinY;
|
|
|
|
if( ((double)deltaX*deltaX + (double)deltaY*deltaY) <= terminationEpsilon )
|
|
break;
|
|
|
|
if( j > 0 && std::abs(deltaX + prevDeltaX) < 0.01 &&
|
|
std::abs(deltaY + prevDeltaY) < 0.01 )
|
|
{
|
|
nextPts[ptref+0] -= deltaX*0.5f;
|
|
nextPts[ptref+1] -= deltaY*0.5f;
|
|
break;
|
|
}
|
|
prevDeltaX = deltaX;
|
|
prevDeltaY = deltaY;
|
|
}
|
|
|
|
if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
|
|
{
|
|
f32 nextPointX = nextPts[ptref+0] - halfWinX;
|
|
f32 nextPointY = nextPts[ptref+1] - halfWinY;
|
|
|
|
s32 inextPointX = floor(nextPointX);
|
|
s32 inextPointY = floor(nextPointY);
|
|
|
|
if( inextPointX < -(s32)winSize.width || inextPointX >= (s32)size.width ||
|
|
inextPointY < -(s32)winSize.height || inextPointY >= (s32)size.height )
|
|
{
|
|
if( status )
|
|
status[ptidx] = false;
|
|
continue;
|
|
}
|
|
|
|
f32 aa = nextPointX - inextPointX;
|
|
f32 bb = nextPointY - inextPointY;
|
|
iw00 = round((1.f - aa)*(1.f - bb)*(1 << W_BITS));
|
|
iw01 = round(aa*(1.f - bb)*(1 << W_BITS));
|
|
iw10 = round((1.f - aa)*bb*(1 << W_BITS));
|
|
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
|
f32 errval = 0.f;
|
|
|
|
for(s32 y = 0; y < (s32)winSize.height; y++ )
|
|
{
|
|
const u8* Jptr = nextData + nextStride*(y + inextPointY) + inextPointX*cn;
|
|
const s16* Iptr = IWinBuf + y*IWinBufStride;
|
|
|
|
for( x = 0; x < wwcn; x++ )
|
|
{
|
|
s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
|
|
Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
|
|
W_BITS1-5) - Iptr[x];
|
|
errval += std::abs((f32)diff);
|
|
}
|
|
}
|
|
err[ptidx] = errval / (32*wwcn*winSize.height);
|
|
}
|
|
}
|
|
#else
|
|
(void)size;
|
|
(void)cn;
|
|
(void)prevData;
|
|
(void)prevStride;
|
|
(void)prevDerivData;
|
|
(void)prevDerivStride;
|
|
(void)nextData;
|
|
(void)nextStride;
|
|
(void)prevPts;
|
|
(void)nextPts;
|
|
(void)status;
|
|
(void)err;
|
|
(void)winSize;
|
|
(void)terminationCount;
|
|
(void)terminationEpsilon;
|
|
(void)level;
|
|
(void)maxLevel;
|
|
(void)useInitialFlow;
|
|
(void)getMinEigenVals;
|
|
(void)minEigThreshold;
|
|
(void)ptCount;
|
|
#endif
|
|
}
|
|
|
|
}//CAROTENE_NS
|
|
|