/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000, Intel Corporation, all rights reserved. // Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include #include #include "lkpyramid.hpp" #include "opencl_kernels_video.hpp" #include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/openvx/ovx_defs.hpp" #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) namespace { static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst) { using namespace cv; using cv::detail::deriv_type; int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth(); CV_Assert(depth == CV_8U); dst.create(rows, cols, CV_MAKETYPE(DataType::depth, cn*2)); #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::useTegra() && tegra::calcSharrDeriv(src, dst)) return; #endif int x, y, delta = (int)alignSize((cols + 2)*cn, 16); AutoBuffer _tempBuf(delta*2 + 64); deriv_type *trow0 = alignPtr(_tempBuf + cn, 16), *trow1 = alignPtr(trow0 + delta, 16); #if CV_SIMD128 v_int16x8 c3 = v_setall_s16(3), c10 = v_setall_s16(10); bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON); #endif for( y = 0; y < rows; y++ ) { const uchar* srow0 = src.ptr(y > 0 ? y-1 : rows > 1 ? 1 : 0); const uchar* srow1 = src.ptr(y); const uchar* srow2 = src.ptr(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0); deriv_type* drow = dst.ptr(y); // do vertical convolution x = 0; #if CV_SIMD128 if(haveSIMD) { for( ; x <= colsn - 8; x += 8 ) { v_int16x8 s0 = v_reinterpret_as_s16(v_load_expand(srow0 + x)); v_int16x8 s1 = v_reinterpret_as_s16(v_load_expand(srow1 + x)); v_int16x8 s2 = v_reinterpret_as_s16(v_load_expand(srow2 + x)); v_int16x8 t1 = s2 - s0; v_int16x8 t0 = (s0 + s2) * c3 + s1 * c10; v_store(trow0 + x, t0); v_store(trow1 + x, t1); } } #endif for( ; x < colsn; x++ ) { int t0 = (srow0[x] + srow2[x])*3 + srow1[x]*10; int t1 = srow2[x] - srow0[x]; trow0[x] = (deriv_type)t0; trow1[x] = (deriv_type)t1; } // make border int x0 = (cols > 1 ? 1 : 0)*cn, x1 = (cols > 1 ? cols-2 : 0)*cn; for( int k = 0; k < cn; k++ ) { trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k]; trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k]; } // do horizontal convolution, interleave the results and store them to dst x = 0; #if CV_SIMD128 if(haveSIMD) { for( ; x <= colsn - 8; x += 8 ) { v_int16x8 s0 = v_load(trow0 + x - cn); v_int16x8 s1 = v_load(trow0 + x + cn); v_int16x8 s2 = v_load(trow1 + x - cn); v_int16x8 s3 = v_load(trow1 + x); v_int16x8 s4 = v_load(trow1 + x + cn); v_int16x8 t0 = s1 - s0; v_int16x8 t1 = ((s2 + s4) * c3) + (s3 * c10); v_store_interleave((drow + x*2), t0, t1); } } #endif for( ; x < colsn; x++ ) { deriv_type t0 = (deriv_type)(trow0[x+cn] - trow0[x-cn]); deriv_type t1 = (deriv_type)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10); drow[x*2] = t0; drow[x*2+1] = t1; } } } }//namespace cv::detail::LKTrackerInvoker::LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg, const Point2f* _prevPts, Point2f* _nextPts, uchar* _status, float* _err, Size _winSize, TermCriteria _criteria, int _level, int _maxLevel, int _flags, float _minEigThreshold ) { prevImg = &_prevImg; prevDeriv = &_prevDeriv; nextImg = &_nextImg; prevPts = _prevPts; nextPts = _nextPts; status = _status; err = _err; winSize = _winSize; criteria = _criteria; level = _level; maxLevel = _maxLevel; flags = _flags; minEigThreshold = _minEigThreshold; } #if defined __arm__ && !CV_NEON typedef int64 acctype; typedef int itemtype; #else typedef float acctype; typedef float itemtype; #endif void cv::detail::LKTrackerInvoker::operator()(const Range& range) const { CV_INSTRUMENT_REGION() Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f); const Mat& I = *prevImg; const Mat& J = *nextImg; const Mat& derivI = *prevDeriv; int j, cn = I.channels(), cn2 = cn*2; cv::AutoBuffer _buf(winSize.area()*(cn + cn2)); int derivDepth = DataType::depth; Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf); Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn); for( int ptidx = range.start; ptidx < range.end; ptidx++ ) { Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level)); Point2f nextPt; if( level == maxLevel ) { if( flags & OPTFLOW_USE_INITIAL_FLOW ) nextPt = nextPts[ptidx]*(float)(1./(1 << level)); else nextPt = prevPt; } else nextPt = nextPts[ptidx]*2.f; nextPts[ptidx] = nextPt; Point2i iprevPt, inextPt; prevPt -= halfWin; iprevPt.x = cvFloor(prevPt.x); iprevPt.y = cvFloor(prevPt.y); if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols || iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows ) { if( level == 0 ) { if( status ) status[ptidx] = false; if( err ) err[ptidx] = 0; } continue; } float a = prevPt.x - iprevPt.x; float b = prevPt.y - iprevPt.y; const int W_BITS = 14, W_BITS1 = 14; const float FLT_SCALE = 1.f/(1 << 20); int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS)); int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS)); int iw10 = cvRound((1.f - a)*b*(1 << W_BITS)); int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; int dstep = (int)(derivI.step/derivI.elemSize1()); int stepI = (int)(I.step/I.elemSize1()); int stepJ = (int)(J.step/J.elemSize1()); acctype iA11 = 0, iA12 = 0, iA22 = 0; float A11, A12, A22; #if CV_SSE2 __m128i qw0 = _mm_set1_epi32(iw00 + (iw01 << 16)); __m128i qw1 = _mm_set1_epi32(iw10 + (iw11 << 16)); __m128i z = _mm_setzero_si128(); __m128i qdelta_d = _mm_set1_epi32(1 << (W_BITS1-1)); __m128i qdelta = _mm_set1_epi32(1 << (W_BITS1-5-1)); __m128 qA11 = _mm_setzero_ps(), qA12 = _mm_setzero_ps(), qA22 = _mm_setzero_ps(); #endif #if CV_NEON float CV_DECL_ALIGNED(16) nA11[] = { 0, 0, 0, 0 }, nA12[] = { 0, 0, 0, 0 }, nA22[] = { 0, 0, 0, 0 }; const int shifter1 = -(W_BITS - 5); //negative so it shifts right const int shifter2 = -(W_BITS); const int16x4_t d26 = vdup_n_s16((int16_t)iw00); const int16x4_t d27 = vdup_n_s16((int16_t)iw01); const int16x4_t d28 = vdup_n_s16((int16_t)iw10); const int16x4_t d29 = vdup_n_s16((int16_t)iw11); const int32x4_t q11 = vdupq_n_s32((int32_t)shifter1); const int32x4_t q12 = vdupq_n_s32((int32_t)shifter2); #endif // extract the patch from the first image, compute covariation matrix of derivatives int x, y; for( y = 0; y < winSize.height; y++ ) { const uchar* src = I.ptr() + (y + iprevPt.y)*stepI + iprevPt.x*cn; const deriv_type* dsrc = derivI.ptr() + (y + iprevPt.y)*dstep + iprevPt.x*cn2; deriv_type* Iptr = IWinBuf.ptr(y); deriv_type* dIptr = derivIWinBuf.ptr(y); x = 0; #if CV_SSE2 for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 ) { __m128i v00, v01, v10, v11, t0, t1; v00 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x)), z); v01 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + cn)), z); v10 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI)), z); v11 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const int*)(src + x + stepI + cn)), z); t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0), _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1)); t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5); _mm_storel_epi64((__m128i*)(Iptr + x), _mm_packs_epi32(t0,t0)); v00 = _mm_loadu_si128((const __m128i*)(dsrc)); v01 = _mm_loadu_si128((const __m128i*)(dsrc + cn2)); v10 = _mm_loadu_si128((const __m128i*)(dsrc + dstep)); v11 = _mm_loadu_si128((const __m128i*)(dsrc + dstep + cn2)); t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0), _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1)); t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0), _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1)); t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta_d), W_BITS1); t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta_d), W_BITS1); v00 = _mm_packs_epi32(t0, t1); // Ix0 Iy0 Ix1 Iy1 ... _mm_storeu_si128((__m128i*)dIptr, v00); t0 = _mm_srai_epi32(v00, 16); // Iy0 Iy1 Iy2 Iy3 t1 = _mm_srai_epi32(_mm_slli_epi32(v00, 16), 16); // Ix0 Ix1 Ix2 Ix3 __m128 fy = _mm_cvtepi32_ps(t0); __m128 fx = _mm_cvtepi32_ps(t1); qA22 = _mm_add_ps(qA22, _mm_mul_ps(fy, fy)); qA12 = _mm_add_ps(qA12, _mm_mul_ps(fx, fy)); qA11 = _mm_add_ps(qA11, _mm_mul_ps(fx, fx)); } #endif #if CV_NEON for( ; x <= winSize.width*cn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 ) { uint8x8_t d0 = vld1_u8(&src[x]); uint8x8_t d2 = vld1_u8(&src[x+cn]); uint16x8_t q0 = vmovl_u8(d0); uint16x8_t q1 = vmovl_u8(d2); int32x4_t q5 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26); int32x4_t q6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27); uint8x8_t d4 = vld1_u8(&src[x + stepI]); uint8x8_t d6 = vld1_u8(&src[x + stepI + cn]); uint16x8_t q2 = vmovl_u8(d4); uint16x8_t q3 = vmovl_u8(d6); int32x4_t q7 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28); int32x4_t q8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29); q5 = vaddq_s32(q5, q6); q7 = vaddq_s32(q7, q8); q5 = vaddq_s32(q5, q7); int16x4x2_t d0d1 = vld2_s16(dsrc); int16x4x2_t d2d3 = vld2_s16(&dsrc[cn2]); q5 = vqrshlq_s32(q5, q11); int32x4_t q4 = vmull_s16(d0d1.val[0], d26); q6 = vmull_s16(d0d1.val[1], d26); int16x4_t nd0 = vmovn_s32(q5); q7 = vmull_s16(d2d3.val[0], d27); q8 = vmull_s16(d2d3.val[1], d27); vst1_s16(&Iptr[x], nd0); int16x4x2_t d4d5 = vld2_s16(&dsrc[dstep]); int16x4x2_t d6d7 = vld2_s16(&dsrc[dstep+cn2]); q4 = vaddq_s32(q4, q7); q6 = vaddq_s32(q6, q8); q7 = vmull_s16(d4d5.val[0], d28); int32x4_t q14 = vmull_s16(d4d5.val[1], d28); q8 = vmull_s16(d6d7.val[0], d29); int32x4_t q15 = vmull_s16(d6d7.val[1], d29); q7 = vaddq_s32(q7, q8); q14 = vaddq_s32(q14, q15); q4 = vaddq_s32(q4, q7); q6 = vaddq_s32(q6, q14); float32x4_t nq0 = vld1q_f32(nA11); float32x4_t nq1 = vld1q_f32(nA12); float32x4_t nq2 = vld1q_f32(nA22); q4 = vqrshlq_s32(q4, q12); q6 = vqrshlq_s32(q6, q12); q7 = vmulq_s32(q4, q4); q8 = vmulq_s32(q4, q6); q15 = vmulq_s32(q6, q6); nq0 = vaddq_f32(nq0, vcvtq_f32_s32(q7)); nq1 = vaddq_f32(nq1, vcvtq_f32_s32(q8)); nq2 = vaddq_f32(nq2, vcvtq_f32_s32(q15)); vst1q_f32(nA11, nq0); vst1q_f32(nA12, nq1); vst1q_f32(nA22, nq2); int16x4_t d8 = vmovn_s32(q4); int16x4_t d12 = vmovn_s32(q6); int16x4x2_t d8d12; d8d12.val[0] = d8; d8d12.val[1] = d12; vst2_s16(dIptr, d8d12); } #endif for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 ) { int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 + src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5); int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 + dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1); int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 + dsrc[dstep+cn2+1]*iw11, W_BITS1); Iptr[x] = (short)ival; dIptr[0] = (short)ixval; dIptr[1] = (short)iyval; iA11 += (itemtype)(ixval*ixval); iA12 += (itemtype)(ixval*iyval); iA22 += (itemtype)(iyval*iyval); } } #if CV_SSE2 float CV_DECL_ALIGNED(16) A11buf[4], A12buf[4], A22buf[4]; _mm_store_ps(A11buf, qA11); _mm_store_ps(A12buf, qA12); _mm_store_ps(A22buf, qA22); iA11 += A11buf[0] + A11buf[1] + A11buf[2] + A11buf[3]; iA12 += A12buf[0] + A12buf[1] + A12buf[2] + A12buf[3]; iA22 += A22buf[0] + A22buf[1] + A22buf[2] + A22buf[3]; #endif #if CV_NEON iA11 += nA11[0] + nA11[1] + nA11[2] + nA11[3]; iA12 += nA12[0] + nA12[1] + nA12[2] + nA12[3]; iA22 += nA22[0] + nA22[1] + nA22[2] + nA22[3]; #endif A11 = iA11*FLT_SCALE; A12 = iA12*FLT_SCALE; A22 = iA22*FLT_SCALE; float D = A11*A22 - A12*A12; float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) + 4.f*A12*A12))/(2*winSize.width*winSize.height); if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 ) err[ptidx] = (float)minEig; if( minEig < minEigThreshold || D < FLT_EPSILON ) { if( level == 0 && status ) status[ptidx] = false; continue; } D = 1.f/D; nextPt -= halfWin; Point2f prevDelta; for( j = 0; j < criteria.maxCount; j++ ) { inextPt.x = cvFloor(nextPt.x); inextPt.y = cvFloor(nextPt.y); if( inextPt.x < -winSize.width || inextPt.x >= J.cols || inextPt.y < -winSize.height || inextPt.y >= J.rows ) { if( level == 0 && status ) status[ptidx] = false; break; } a = nextPt.x - inextPt.x; b = nextPt.y - inextPt.y; iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS)); iw01 = cvRound(a*(1.f - b)*(1 << W_BITS)); iw10 = cvRound((1.f - a)*b*(1 << W_BITS)); iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; acctype ib1 = 0, ib2 = 0; float b1, b2; #if CV_SSE2 qw0 = _mm_set1_epi32(iw00 + (iw01 << 16)); qw1 = _mm_set1_epi32(iw10 + (iw11 << 16)); __m128 qb0 = _mm_setzero_ps(), qb1 = _mm_setzero_ps(); #endif #if CV_NEON float CV_DECL_ALIGNED(16) nB1[] = { 0,0,0,0 }, nB2[] = { 0,0,0,0 }; const int16x4_t d26_2 = vdup_n_s16((int16_t)iw00); const int16x4_t d27_2 = vdup_n_s16((int16_t)iw01); const int16x4_t d28_2 = vdup_n_s16((int16_t)iw10); const int16x4_t d29_2 = vdup_n_s16((int16_t)iw11); #endif for( y = 0; y < winSize.height; y++ ) { const uchar* Jptr = J.ptr() + (y + inextPt.y)*stepJ + inextPt.x*cn; const deriv_type* Iptr = IWinBuf.ptr(y); const deriv_type* dIptr = derivIWinBuf.ptr(y); x = 0; #if CV_SSE2 for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 ) { __m128i diff0 = _mm_loadu_si128((const __m128i*)(Iptr + x)), diff1; __m128i v00 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x)), z); __m128i v01 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + cn)), z); __m128i v10 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ)), z); __m128i v11 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(Jptr + x + stepJ + cn)), z); __m128i t0 = _mm_add_epi32(_mm_madd_epi16(_mm_unpacklo_epi16(v00, v01), qw0), _mm_madd_epi16(_mm_unpacklo_epi16(v10, v11), qw1)); __m128i t1 = _mm_add_epi32(_mm_madd_epi16(_mm_unpackhi_epi16(v00, v01), qw0), _mm_madd_epi16(_mm_unpackhi_epi16(v10, v11), qw1)); t0 = _mm_srai_epi32(_mm_add_epi32(t0, qdelta), W_BITS1-5); t1 = _mm_srai_epi32(_mm_add_epi32(t1, qdelta), W_BITS1-5); diff0 = _mm_subs_epi16(_mm_packs_epi32(t0, t1), diff0); diff1 = _mm_unpackhi_epi16(diff0, diff0); diff0 = _mm_unpacklo_epi16(diff0, diff0); // It0 It0 It1 It1 ... v00 = _mm_loadu_si128((const __m128i*)(dIptr)); // Ix0 Iy0 Ix1 Iy1 ... v01 = _mm_loadu_si128((const __m128i*)(dIptr + 8)); v10 = _mm_unpacklo_epi16(v00, v01); v11 = _mm_unpackhi_epi16(v00, v01); v00 = _mm_unpacklo_epi16(diff0, diff1); v01 = _mm_unpackhi_epi16(diff0, diff1); v00 = _mm_madd_epi16(v00, v10); v11 = _mm_madd_epi16(v01, v11); qb0 = _mm_add_ps(qb0, _mm_cvtepi32_ps(v00)); qb1 = _mm_add_ps(qb1, _mm_cvtepi32_ps(v11)); } #endif #if CV_NEON for( ; x <= winSize.width*cn - 8; x += 8, dIptr += 8*2 ) { uint8x8_t d0 = vld1_u8(&Jptr[x]); uint8x8_t d2 = vld1_u8(&Jptr[x+cn]); uint8x8_t d4 = vld1_u8(&Jptr[x+stepJ]); uint8x8_t d6 = vld1_u8(&Jptr[x+stepJ+cn]); uint16x8_t q0 = vmovl_u8(d0); uint16x8_t q1 = vmovl_u8(d2); uint16x8_t q2 = vmovl_u8(d4); uint16x8_t q3 = vmovl_u8(d6); int32x4_t nq4 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q0)), d26_2); int32x4_t nq5 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q0)), d26_2); int32x4_t nq6 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q1)), d27_2); int32x4_t nq7 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q1)), d27_2); int32x4_t nq8 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q2)), d28_2); int32x4_t nq9 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q2)), d28_2); int32x4_t nq10 = vmull_s16(vget_low_s16(vreinterpretq_s16_u16(q3)), d29_2); int32x4_t nq11 = vmull_s16(vget_high_s16(vreinterpretq_s16_u16(q3)), d29_2); nq4 = vaddq_s32(nq4, nq6); nq5 = vaddq_s32(nq5, nq7); nq8 = vaddq_s32(nq8, nq10); nq9 = vaddq_s32(nq9, nq11); int16x8_t q6 = vld1q_s16(&Iptr[x]); nq4 = vaddq_s32(nq4, nq8); nq5 = vaddq_s32(nq5, nq9); nq8 = vmovl_s16(vget_high_s16(q6)); nq6 = vmovl_s16(vget_low_s16(q6)); nq4 = vqrshlq_s32(nq4, q11); nq5 = vqrshlq_s32(nq5, q11); int16x8x2_t q0q1 = vld2q_s16(dIptr); float32x4_t nB1v = vld1q_f32(nB1); float32x4_t nB2v = vld1q_f32(nB2); nq4 = vsubq_s32(nq4, nq6); nq5 = vsubq_s32(nq5, nq8); int32x4_t nq2 = vmovl_s16(vget_low_s16(q0q1.val[0])); int32x4_t nq3 = vmovl_s16(vget_high_s16(q0q1.val[0])); nq7 = vmovl_s16(vget_low_s16(q0q1.val[1])); nq8 = vmovl_s16(vget_high_s16(q0q1.val[1])); nq9 = vmulq_s32(nq4, nq2); nq10 = vmulq_s32(nq5, nq3); nq4 = vmulq_s32(nq4, nq7); nq5 = vmulq_s32(nq5, nq8); nq9 = vaddq_s32(nq9, nq10); nq4 = vaddq_s32(nq4, nq5); nB1v = vaddq_f32(nB1v, vcvtq_f32_s32(nq9)); nB2v = vaddq_f32(nB2v, vcvtq_f32_s32(nq4)); vst1q_f32(nB1, nB1v); vst1q_f32(nB2, nB2v); } #endif for( ; x < winSize.width*cn; x++, dIptr += 2 ) { int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 + Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11, W_BITS1-5) - Iptr[x]; ib1 += (itemtype)(diff*dIptr[0]); ib2 += (itemtype)(diff*dIptr[1]); } } #if CV_SSE2 float CV_DECL_ALIGNED(16) bbuf[4]; _mm_store_ps(bbuf, _mm_add_ps(qb0, qb1)); ib1 += bbuf[0] + bbuf[2]; ib2 += bbuf[1] + bbuf[3]; #endif #if CV_NEON ib1 += (float)(nB1[0] + nB1[1] + nB1[2] + nB1[3]); ib2 += (float)(nB2[0] + nB2[1] + nB2[2] + nB2[3]); #endif b1 = ib1*FLT_SCALE; b2 = ib2*FLT_SCALE; Point2f delta( (float)((A12*b2 - A22*b1) * D), (float)((A12*b1 - A11*b2) * D)); //delta = -delta; nextPt += delta; nextPts[ptidx] = nextPt + halfWin; if( delta.ddot(delta) <= criteria.epsilon ) break; if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 && std::abs(delta.y + prevDelta.y) < 0.01 ) { nextPts[ptidx] -= delta*0.5f; break; } prevDelta = delta; } CV_Assert(status != NULL); if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 ) { Point2f nextPoint = nextPts[ptidx] - halfWin; Point inextPoint; inextPoint.x = cvFloor(nextPoint.x); inextPoint.y = cvFloor(nextPoint.y); if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols || inextPoint.y < -winSize.height || inextPoint.y >= J.rows ) { if( status ) status[ptidx] = false; continue; } float aa = nextPoint.x - inextPoint.x; float bb = nextPoint.y - inextPoint.y; iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS)); iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS)); iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS)); iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; float errval = 0.f; for( y = 0; y < winSize.height; y++ ) { const uchar* Jptr = J.ptr() + (y + inextPoint.y)*stepJ + inextPoint.x*cn; const deriv_type* Iptr = IWinBuf.ptr(y); for( x = 0; x < winSize.width*cn; x++ ) { int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 + Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11, W_BITS1-5) - Iptr[x]; errval += std::abs((float)diff); } } err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height); } } } int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives, int pyrBorder, int derivBorder, bool tryReuseInputImage) { CV_INSTRUMENT_REGION() Mat img = _img.getMat(); CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 ); int pyrstep = withDerivatives ? 2 : 1; pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0); int derivType = CV_MAKETYPE(DataType::depth, img.channels() * 2); //level 0 bool lvl0IsSet = false; if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0) { Size wholeSize; Point ofs; img.locateROI(wholeSize, ofs); if (ofs.x >= winSize.width && ofs.y >= winSize.height && ofs.x + img.cols + winSize.width <= wholeSize.width && ofs.y + img.rows + winSize.height <= wholeSize.height) { pyramid.getMatRef(0) = img; lvl0IsSet = true; } } if(!lvl0IsSet) { Mat& temp = pyramid.getMatRef(0); if(!temp.empty()) temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width); if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows) temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type()); if(pyrBorder == BORDER_TRANSPARENT) img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows))); else copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder); temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width); } Size sz = img.size(); Mat prevLevel = pyramid.getMatRef(0); Mat thisLevel = prevLevel; for(int level = 0; level <= maxLevel; ++level) { if (level != 0) { Mat& temp = pyramid.getMatRef(level * pyrstep); if(!temp.empty()) temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width); if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height) temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type()); thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height)); pyrDown(prevLevel, thisLevel, sz); if(pyrBorder != BORDER_TRANSPARENT) copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED); temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width); } if(withDerivatives) { Mat& deriv = pyramid.getMatRef(level * pyrstep + 1); if(!deriv.empty()) deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width); if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height) deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType); Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height)); calcSharrDeriv(thisLevel, derivI); if(derivBorder != BORDER_TRANSPARENT) copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED); deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width); } sz = Size((sz.width+1)/2, (sz.height+1)/2); if( sz.width <= winSize.width || sz.height <= winSize.height ) { pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this return level; } prevLevel = thisLevel; } return maxLevel; } namespace cv { namespace { class SparsePyrLKOpticalFlowImpl : public SparsePyrLKOpticalFlow { struct dim3 { unsigned int x, y, z; dim3() : x(0), y(0), z(0) { } }; public: SparsePyrLKOpticalFlowImpl(Size winSize_ = Size(21,21), int maxLevel_ = 3, TermCriteria criteria_ = TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), int flags_ = 0, double minEigThreshold_ = 1e-4) : winSize(winSize_), maxLevel(maxLevel_), criteria(criteria_), flags(flags_), minEigThreshold(minEigThreshold_) #ifdef HAVE_OPENCL , iters(criteria_.maxCount), derivLambda(criteria_.epsilon), useInitialFlow(0 != (flags_ & OPTFLOW_LK_GET_MIN_EIGENVALS)), waveSize(0) #endif { } virtual Size getWinSize() const CV_OVERRIDE { return winSize;} virtual void setWinSize(Size winSize_) CV_OVERRIDE { winSize = winSize_;} virtual int getMaxLevel() const CV_OVERRIDE { return maxLevel;} virtual void setMaxLevel(int maxLevel_) CV_OVERRIDE { maxLevel = maxLevel_;} virtual TermCriteria getTermCriteria() const CV_OVERRIDE { return criteria;} virtual void setTermCriteria(TermCriteria& crit_) CV_OVERRIDE { criteria=crit_;} virtual int getFlags() const CV_OVERRIDE { return flags; } virtual void setFlags(int flags_) CV_OVERRIDE { flags=flags_;} virtual double getMinEigThreshold() const CV_OVERRIDE { return minEigThreshold;} virtual void setMinEigThreshold(double minEigThreshold_) CV_OVERRIDE { minEigThreshold=minEigThreshold_;} virtual void calc(InputArray prevImg, InputArray nextImg, InputArray prevPts, InputOutputArray nextPts, OutputArray status, OutputArray err = cv::noArray()) CV_OVERRIDE; private: #ifdef HAVE_OPENCL bool checkParam() { iters = std::min(std::max(iters, 0), 100); derivLambda = std::min(std::max(derivLambda, 0.0), 1.0); if (derivLambda < 0) return false; if (maxLevel < 0 || winSize.width <= 2 || winSize.height <= 2) return false; if (winSize.width < 8 || winSize.height < 8 || winSize.width > 24 || winSize.height > 24) return false; calcPatchSize(); if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6) return false; if (!initWaveSize()) return false; return true; } bool sparse(const UMat &prevImg, const UMat &nextImg, const UMat &prevPts, UMat &nextPts, UMat &status, UMat &err) { if (!checkParam()) return false; UMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); UMat temp2 = nextPts.reshape(1); multiply(1.0f / (1 << maxLevel) /2.0f, temp1, temp2); status.setTo(Scalar::all(1)); // build the image pyramids. std::vector prevPyr; prevPyr.resize(maxLevel + 1); std::vector nextPyr; nextPyr.resize(maxLevel + 1); // allocate buffers with aligned pitch to be able to use cl_khr_image2d_from_buffer extension // This is the required pitch alignment in pixels int pitchAlign = (int)ocl::Device::getDefault().imagePitchAlignment(); if (pitchAlign>0) { prevPyr[0] = UMat(prevImg.rows,(prevImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,prevImg.cols); nextPyr[0] = UMat(nextImg.rows,(nextImg.cols+pitchAlign-1)&(-pitchAlign),CV_32FC1).colRange(0,nextImg.cols); for (int level = 1; level <= maxLevel; ++level) { int cols,rows; // allocate buffers with aligned pitch to be able to use image on buffer extension cols = (prevPyr[level - 1].cols+1)/2; rows = (prevPyr[level - 1].rows+1)/2; prevPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),prevPyr[level-1].type()).colRange(0,cols); cols = (nextPyr[level - 1].cols+1)/2; rows = (nextPyr[level - 1].rows+1)/2; nextPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),nextPyr[level-1].type()).colRange(0,cols); } } prevImg.convertTo(prevPyr[0], CV_32F); nextImg.convertTo(nextPyr[0], CV_32F); for (int level = 1; level <= maxLevel; ++level) { pyrDown(prevPyr[level - 1], prevPyr[level]); pyrDown(nextPyr[level - 1], nextPyr[level]); } // dI/dx ~ Ix, dI/dy ~ Iy for (int level = maxLevel; level >= 0; level--) { if (!lkSparse_run(prevPyr[level], nextPyr[level], prevPts, nextPts, status, err, prevPts.cols, level)) return false; } return true; } #endif Size winSize; int maxLevel; TermCriteria criteria; int flags; double minEigThreshold; #ifdef HAVE_OPENCL int iters; double derivLambda; bool useInitialFlow; int waveSize; bool initWaveSize() { waveSize = 1; if (isDeviceCPU()) return true; ocl::Kernel kernel; if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, "")) return false; waveSize = (int)kernel.preferedWorkGroupSizeMultiple(); return true; } dim3 patch; void calcPatchSize() { dim3 block; if (winSize.width > 32 && winSize.width > 2 * winSize.height) { block.x = 32; block.y = 8; } else { block.x = 16; block.y = 16; } patch.x = (winSize.width + block.x - 1) / block.x; patch.y = (winSize.height + block.y - 1) / block.y; block.z = patch.z = 1; } bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err, int ptcount, int level) { size_t localThreads[3] = { 8, 8}; size_t globalThreads[3] = { 8 * (size_t)ptcount, 8}; char calcErr = (0 == level) ? 1 : 0; int wsx = 1, wsy = 1; if(winSize.width < 16) wsx = 0; if(winSize.height < 16) wsy = 0; cv::String build_options; if (isDeviceCPU()) build_options = " -D CPU"; else build_options = cv::format("-D WAVE_SIZE=%d -D WSX=%d -D WSY=%d", waveSize, wsx, wsy); ocl::Kernel kernel; if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options)) return false; CV_Assert(I.depth() == CV_32F && J.depth() == CV_32F); ocl::Image2D imageI(I, false, ocl::Image2D::canCreateAlias(I)); ocl::Image2D imageJ(J, false, ocl::Image2D::canCreateAlias(J)); int idxArg = 0; idxArg = kernel.set(idxArg, imageI); //image2d_t I idxArg = kernel.set(idxArg, imageJ); //image2d_t J idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err idxArg = kernel.set(idxArg, (int)level); // const int level idxArg = kernel.set(idxArg, (int)I.rows); // const int rows idxArg = kernel.set(idxArg, (int)I.cols); // const int cols idxArg = kernel.set(idxArg, (int)patch.x); // int PATCH_X idxArg = kernel.set(idxArg, (int)patch.y); // int PATCH_Y idxArg = kernel.set(idxArg, (int)winSize.width); // int c_winSize_x idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y idxArg = kernel.set(idxArg, (int)iters); // int c_iters idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr return kernel.run(2, globalThreads, localThreads, true); // sync=true because ocl::Image2D lifetime is not handled well for temp UMat } private: inline static bool isDeviceCPU() { return (cv::ocl::Device::TYPE_CPU == cv::ocl::Device::getDefault().type()); } bool ocl_calcOpticalFlowPyrLK(InputArray _prevImg, InputArray _nextImg, InputArray _prevPts, InputOutputArray _nextPts, OutputArray _status, OutputArray _err) { if (0 != (OPTFLOW_LK_GET_MIN_EIGENVALS & flags)) return false; if (!cv::ocl::Device::getDefault().imageSupport()) return false; if (_nextImg.size() != _prevImg.size()) return false; int typePrev = _prevImg.type(); int typeNext = _nextImg.type(); if ((1 != CV_MAT_CN(typePrev)) || (1 != CV_MAT_CN(typeNext))) return false; if ((0 != CV_MAT_DEPTH(typePrev)) || (0 != CV_MAT_DEPTH(typeNext))) return false; if (_prevPts.empty() || _prevPts.type() != CV_32FC2 || (!_prevPts.isContinuous())) return false; if ((1 != _prevPts.size().height) && (1 != _prevPts.size().width)) return false; size_t npoints = _prevPts.total(); if (useInitialFlow) { if (_nextPts.empty() || _nextPts.type() != CV_32FC2 || (!_prevPts.isContinuous())) return false; if ((1 != _nextPts.size().height) && (1 != _nextPts.size().width)) return false; if (_nextPts.total() != npoints) return false; } else { _nextPts.create(_prevPts.size(), _prevPts.type()); } if (!checkParam()) return false; UMat umatErr; if (_err.needed()) { _err.create((int)npoints, 1, CV_32FC1); umatErr = _err.getUMat(); } else umatErr.create((int)npoints, 1, CV_32FC1); _status.create((int)npoints, 1, CV_8UC1); UMat umatNextPts = _nextPts.getUMat(); UMat umatStatus = _status.getUMat(); return sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr); } #endif #ifdef HAVE_OPENVX bool openvx_pyrlk(InputArray _prevImg, InputArray _nextImg, InputArray _prevPts, InputOutputArray _nextPts, OutputArray _status, OutputArray _err) { using namespace ivx; // Pyramids as inputs are not acceptable because there's no (direct or simple) way // to build vx_pyramid on user data if(_prevImg.kind() != _InputArray::MAT || _nextImg.kind() != _InputArray::MAT) return false; Mat prevImgMat = _prevImg.getMat(), nextImgMat = _nextImg.getMat(); if(prevImgMat.type() != CV_8UC1 || nextImgMat.type() != CV_8UC1) return false; if (ovx::skipSmallImages(prevImgMat.cols, prevImgMat.rows)) return false; CV_Assert(prevImgMat.size() == nextImgMat.size()); Mat prevPtsMat = _prevPts.getMat(); int checkPrev = prevPtsMat.checkVector(2, CV_32F, false); CV_Assert( checkPrev >= 0 ); size_t npoints = checkPrev; if( !(flags & OPTFLOW_USE_INITIAL_FLOW) ) _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true); Mat nextPtsMat = _nextPts.getMat(); CV_Assert( nextPtsMat.checkVector(2, CV_32F, false) == (int)npoints ); _status.create((int)npoints, 1, CV_8U, -1, true); Mat statusMat = _status.getMat(); uchar* status = statusMat.ptr(); for(size_t i = 0; i < npoints; i++ ) status[i] = true; // OpenVX doesn't return detection errors if( _err.needed() ) { return false; } try { Context context = ovx::getOpenVXContext(); if(context.vendorID() == VX_ID_KHRONOS) { // PyrLK in OVX 1.0.1 performs vxCommitImagePatch incorrecty and crashes if(VX_VERSION == VX_VERSION_1_0) return false; // Implementation ignores border mode // So check that minimal size of image in pyramid is big enough int width = prevImgMat.cols, height = prevImgMat.rows; for(int i = 0; i < maxLevel+1; i++) { if(width < winSize.width + 1 || height < winSize.height + 1) return false; else { width /= 2; height /= 2; } } } Image prevImg = Image::createFromHandle(context, Image::matTypeToFormat(prevImgMat.type()), Image::createAddressing(prevImgMat), (void*)prevImgMat.data); Image nextImg = Image::createFromHandle(context, Image::matTypeToFormat(nextImgMat.type()), Image::createAddressing(nextImgMat), (void*)nextImgMat.data); Graph graph = Graph::create(context); Pyramid prevPyr = Pyramid::createVirtual(graph, (vx_size)maxLevel+1, VX_SCALE_PYRAMID_HALF, prevImg.width(), prevImg.height(), prevImg.format()); Pyramid nextPyr = Pyramid::createVirtual(graph, (vx_size)maxLevel+1, VX_SCALE_PYRAMID_HALF, nextImg.width(), nextImg.height(), nextImg.format()); ivx::Node::create(graph, VX_KERNEL_GAUSSIAN_PYRAMID, prevImg, prevPyr); ivx::Node::create(graph, VX_KERNEL_GAUSSIAN_PYRAMID, nextImg, nextPyr); Array prevPts = Array::create(context, VX_TYPE_KEYPOINT, npoints); Array estimatedPts = Array::create(context, VX_TYPE_KEYPOINT, npoints); Array nextPts = Array::create(context, VX_TYPE_KEYPOINT, npoints); std::vector vxPrevPts(npoints), vxEstPts(npoints), vxNextPts(npoints); for(size_t i = 0; i < npoints; i++) { vx_keypoint_t& prevPt = vxPrevPts[i]; vx_keypoint_t& estPt = vxEstPts[i]; prevPt.x = prevPtsMat.at(i).x; prevPt.y = prevPtsMat.at(i).y; estPt.x = nextPtsMat.at(i).x; estPt.y = nextPtsMat.at(i).y; prevPt.tracking_status = estPt.tracking_status = vx_true_e; } prevPts.addItems(vxPrevPts); estimatedPts.addItems(vxEstPts); if( (criteria.type & TermCriteria::COUNT) == 0 ) criteria.maxCount = 30; else criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100); if( (criteria.type & TermCriteria::EPS) == 0 ) criteria.epsilon = 0.01; else criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.); criteria.epsilon *= criteria.epsilon; vx_enum termEnum = (criteria.type == TermCriteria::COUNT) ? VX_TERM_CRITERIA_ITERATIONS : (criteria.type == TermCriteria::EPS) ? VX_TERM_CRITERIA_EPSILON : VX_TERM_CRITERIA_BOTH; //minEigThreshold is fixed to 0.0001f ivx::Scalar termination = ivx::Scalar::create(context, termEnum); ivx::Scalar epsilon = ivx::Scalar::create(context, criteria.epsilon); ivx::Scalar numIterations = ivx::Scalar::create(context, criteria.maxCount); ivx::Scalar useInitial = ivx::Scalar::create(context, (vx_bool)(flags & OPTFLOW_USE_INITIAL_FLOW)); //assume winSize is square ivx::Scalar windowSize = ivx::Scalar::create(context, (vx_size)winSize.width); ivx::Node::create(graph, VX_KERNEL_OPTICAL_FLOW_PYR_LK, prevPyr, nextPyr, prevPts, estimatedPts, nextPts, termination, epsilon, numIterations, useInitial, windowSize); graph.verify(); graph.process(); nextPts.copyTo(vxNextPts); for(size_t i = 0; i < npoints; i++) { vx_keypoint_t kp = vxNextPts[i]; nextPtsMat.at(i) = Point2f(kp.x, kp.y); statusMat.at(i) = (bool)kp.tracking_status; } #ifdef VX_VERSION_1_1 //we should take user memory back before release //(it's not done automatically according to standard) prevImg.swapHandle(); nextImg.swapHandle(); #endif } catch (RuntimeError & e) { VX_DbgThrow(e.what()); } catch (WrapperError & e) { VX_DbgThrow(e.what()); } return true; } #endif }; void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg, InputArray _prevPts, InputOutputArray _nextPts, OutputArray _status, OutputArray _err) { CV_INSTRUMENT_REGION() CV_OCL_RUN(ocl::isOpenCLActivated() && (_prevImg.isUMat() || _nextImg.isUMat()) && ocl::Image2D::isFormatSupported(CV_32F, 1, false), ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err)) // Disabled due to bad accuracy CV_OVX_RUN(false, openvx_pyrlk(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err)) Mat prevPtsMat = _prevPts.getMat(); const int derivDepth = DataType::depth; CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 ); int level=0, i, npoints; CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 ); if( npoints == 0 ) { _nextPts.release(); _status.release(); _err.release(); return; } if( !(flags & OPTFLOW_USE_INITIAL_FLOW) ) _nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true); Mat nextPtsMat = _nextPts.getMat(); CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints ); const Point2f* prevPts = prevPtsMat.ptr(); Point2f* nextPts = nextPtsMat.ptr(); _status.create((int)npoints, 1, CV_8U, -1, true); Mat statusMat = _status.getMat(), errMat; CV_Assert( statusMat.isContinuous() ); uchar* status = statusMat.ptr(); float* err = 0; for( i = 0; i < npoints; i++ ) status[i] = true; if( _err.needed() ) { _err.create((int)npoints, 1, CV_32F, -1, true); errMat = _err.getMat(); CV_Assert( errMat.isContinuous() ); err = errMat.ptr(); } std::vector prevPyr, nextPyr; int levels1 = -1; int lvlStep1 = 1; int levels2 = -1; int lvlStep2 = 1; if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT) { _prevImg.getMatVector(prevPyr); levels1 = int(prevPyr.size()) - 1; CV_Assert(levels1 >= 0); if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth) { lvlStep1 = 2; levels1 /= 2; } // ensure that pyramid has reqired padding if(levels1 > 0) { Size fullSize; Point ofs; prevPyr[lvlStep1].locateROI(fullSize, ofs); CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height && ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width && ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height); } if(levels1 < maxLevel) maxLevel = levels1; } if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT) { _nextImg.getMatVector(nextPyr); levels2 = int(nextPyr.size()) - 1; CV_Assert(levels2 >= 0); if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth) { lvlStep2 = 2; levels2 /= 2; } // ensure that pyramid has reqired padding if(levels2 > 0) { Size fullSize; Point ofs; nextPyr[lvlStep2].locateROI(fullSize, ofs); CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height && ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width && ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height); } if(levels2 < maxLevel) maxLevel = levels2; } if (levels1 < 0) maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false); if (levels2 < 0) maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false); if( (criteria.type & TermCriteria::COUNT) == 0 ) criteria.maxCount = 30; else criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100); if( (criteria.type & TermCriteria::EPS) == 0 ) criteria.epsilon = 0.01; else criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.); criteria.epsilon *= criteria.epsilon; // dI/dx ~ Ix, dI/dy ~ Iy Mat derivIBuf; if(lvlStep1 == 1) derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2)); for( level = maxLevel; level >= 0; level-- ) { Mat derivI; if(lvlStep1 == 1) { Size imgSize = prevPyr[level * lvlStep1].size(); Mat _derivI( imgSize.height + winSize.height*2, imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() ); derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height)); calcSharrDeriv(prevPyr[level * lvlStep1], derivI); copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED); } else derivI = prevPyr[level * lvlStep1 + 1]; CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size()); CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type()); #ifdef HAVE_TEGRA_OPTIMIZATION typedef tegra::LKTrackerInvoker LKTrackerInvoker; #else typedef cv::detail::LKTrackerInvoker LKTrackerInvoker; #endif parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, nextPyr[level * lvlStep2], prevPts, nextPts, status, err, winSize, criteria, level, maxLevel, flags, (float)minEigThreshold)); } } } // namespace } // namespace cv cv::Ptr cv::SparsePyrLKOpticalFlow::create(Size winSize, int maxLevel, TermCriteria crit, int flags, double minEigThreshold){ return makePtr(winSize,maxLevel,crit,flags,minEigThreshold); } void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg, InputArray _prevPts, InputOutputArray _nextPts, OutputArray _status, OutputArray _err, Size winSize, int maxLevel, TermCriteria criteria, int flags, double minEigThreshold ) { Ptr optflow = cv::SparsePyrLKOpticalFlow::create(winSize,maxLevel,criteria,flags,minEigThreshold); optflow->calc(_prevImg,_nextImg,_prevPts,_nextPts,_status,_err); } namespace cv { static void getRTMatrix( const Point2f* a, const Point2f* b, int count, Mat& M, bool fullAffine ) { CV_Assert( M.isContinuous() ); if( fullAffine ) { double sa[6][6]={{0.}}, sb[6]={0.}; Mat A( 6, 6, CV_64F, &sa[0][0] ), B( 6, 1, CV_64F, sb ); Mat MM = M.reshape(1, 6); for( int i = 0; i < count; i++ ) { sa[0][0] += a[i].x*a[i].x; sa[0][1] += a[i].y*a[i].x; sa[0][2] += a[i].x; sa[1][1] += a[i].y*a[i].y; sa[1][2] += a[i].y; sb[0] += a[i].x*b[i].x; sb[1] += a[i].y*b[i].x; sb[2] += b[i].x; sb[3] += a[i].x*b[i].y; sb[4] += a[i].y*b[i].y; sb[5] += b[i].y; } sa[3][4] = sa[4][3] = sa[1][0] = sa[0][1]; sa[3][5] = sa[5][3] = sa[2][0] = sa[0][2]; sa[4][5] = sa[5][4] = sa[2][1] = sa[1][2]; sa[3][3] = sa[0][0]; sa[4][4] = sa[1][1]; sa[5][5] = sa[2][2] = count; solve( A, B, MM, DECOMP_EIG ); } else { double sa[4][4]={{0.}}, sb[4]={0.}, m[4] = {0}; Mat A( 4, 4, CV_64F, sa ), B( 4, 1, CV_64F, sb ); Mat MM( 4, 1, CV_64F, m ); for( int i = 0; i < count; i++ ) { sa[0][0] += a[i].x*a[i].x + a[i].y*a[i].y; sa[0][2] += a[i].x; sa[0][3] += a[i].y; sb[0] += a[i].x*b[i].x + a[i].y*b[i].y; sb[1] += a[i].x*b[i].y - a[i].y*b[i].x; sb[2] += b[i].x; sb[3] += b[i].y; } sa[1][1] = sa[0][0]; sa[2][1] = sa[1][2] = -sa[0][3]; sa[3][1] = sa[1][3] = sa[2][0] = sa[0][2]; sa[2][2] = sa[3][3] = count; sa[3][0] = sa[0][3]; solve( A, B, MM, DECOMP_EIG ); double* om = M.ptr(); om[0] = om[4] = m[0]; om[1] = -m[1]; om[3] = m[1]; om[2] = m[2]; om[5] = m[3]; } } } cv::Mat cv::estimateRigidTransform( InputArray src1, InputArray src2, bool fullAffine ) { CV_INSTRUMENT_REGION() Mat M(2, 3, CV_64F), A = src1.getMat(), B = src2.getMat(); const int COUNT = 15; const int WIDTH = 160, HEIGHT = 120; const int RANSAC_MAX_ITERS = 500; const int RANSAC_SIZE0 = 3; const double RANSAC_GOOD_RATIO = 0.5; std::vector pA, pB; std::vector good_idx; std::vector status; double scale = 1.; int i, j, k, k1; RNG rng((uint64)-1); int good_count = 0; if( A.size() != B.size() ) CV_Error( Error::StsUnmatchedSizes, "Both input images must have the same size" ); if( A.type() != B.type() ) CV_Error( Error::StsUnmatchedFormats, "Both input images must have the same data type" ); int count = A.checkVector(2); if( count > 0 ) { A.reshape(2, count).convertTo(pA, CV_32F); B.reshape(2, count).convertTo(pB, CV_32F); } else if( A.depth() == CV_8U ) { int cn = A.channels(); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); Size sz0 = A.size(); Size sz1(WIDTH, HEIGHT); scale = std::max(1., std::max( (double)sz1.width/sz0.width, (double)sz1.height/sz0.height )); sz1.width = cvRound( sz0.width * scale ); sz1.height = cvRound( sz0.height * scale ); bool equalSizes = sz1.width == sz0.width && sz1.height == sz0.height; if( !equalSizes || cn != 1 ) { Mat sA, sB; if( cn != 1 ) { Mat gray; cvtColor(A, gray, COLOR_BGR2GRAY); resize(gray, sA, sz1, 0., 0., INTER_AREA); cvtColor(B, gray, COLOR_BGR2GRAY); resize(gray, sB, sz1, 0., 0., INTER_AREA); } else { resize(A, sA, sz1, 0., 0., INTER_AREA); resize(B, sB, sz1, 0., 0., INTER_AREA); } A = sA; B = sB; } int count_y = COUNT; int count_x = cvRound((double)COUNT*sz1.width/sz1.height); count = count_x * count_y; pA.resize(count); pB.resize(count); status.resize(count); for( i = 0, k = 0; i < count_y; i++ ) for( j = 0; j < count_x; j++, k++ ) { pA[k].x = (j+0.5f)*sz1.width/count_x; pA[k].y = (i+0.5f)*sz1.height/count_y; } // find the corresponding points in B calcOpticalFlowPyrLK(A, B, pA, pB, status, noArray(), Size(21, 21), 3, TermCriteria(TermCriteria::MAX_ITER,40,0.1)); // repack the remained points for( i = 0, k = 0; i < count; i++ ) if( status[i] ) { if( i > k ) { pA[k] = pA[i]; pB[k] = pB[i]; } k++; } count = k; pA.resize(count); pB.resize(count); } else CV_Error( Error::StsUnsupportedFormat, "Both input images must have either 8uC1 or 8uC3 type" ); good_idx.resize(count); if( count < RANSAC_SIZE0 ) return Mat(); Rect brect = boundingRect(pB); // RANSAC stuff: // 1. find the consensus for( k = 0; k < RANSAC_MAX_ITERS; k++ ) { int idx[RANSAC_SIZE0]; Point2f a[RANSAC_SIZE0]; Point2f b[RANSAC_SIZE0]; // choose random 3 non-coplanar points from A & B for( i = 0; i < RANSAC_SIZE0; i++ ) { for( k1 = 0; k1 < RANSAC_MAX_ITERS; k1++ ) { idx[i] = rng.uniform(0, count); for( j = 0; j < i; j++ ) { if( idx[j] == idx[i] ) break; // check that the points are not very close one each other if( fabs(pA[idx[i]].x - pA[idx[j]].x) + fabs(pA[idx[i]].y - pA[idx[j]].y) < FLT_EPSILON ) break; if( fabs(pB[idx[i]].x - pB[idx[j]].x) + fabs(pB[idx[i]].y - pB[idx[j]].y) < FLT_EPSILON ) break; } if( j < i ) continue; if( i+1 == RANSAC_SIZE0 ) { // additional check for non-complanar vectors a[0] = pA[idx[0]]; a[1] = pA[idx[1]]; a[2] = pA[idx[2]]; b[0] = pB[idx[0]]; b[1] = pB[idx[1]]; b[2] = pB[idx[2]]; double dax1 = a[1].x - a[0].x, day1 = a[1].y - a[0].y; double dax2 = a[2].x - a[0].x, day2 = a[2].y - a[0].y; double dbx1 = b[1].x - b[0].x, dby1 = b[1].y - b[0].y; double dbx2 = b[2].x - b[0].x, dby2 = b[2].y - b[0].y; const double eps = 0.01; if( fabs(dax1*day2 - day1*dax2) < eps*std::sqrt(dax1*dax1+day1*day1)*std::sqrt(dax2*dax2+day2*day2) || fabs(dbx1*dby2 - dby1*dbx2) < eps*std::sqrt(dbx1*dbx1+dby1*dby1)*std::sqrt(dbx2*dbx2+dby2*dby2) ) continue; } break; } if( k1 >= RANSAC_MAX_ITERS ) break; } if( i < RANSAC_SIZE0 ) continue; // estimate the transformation using 3 points getRTMatrix( a, b, 3, M, fullAffine ); const double* m = M.ptr(); for( i = 0, good_count = 0; i < count; i++ ) { if( std::abs( m[0]*pA[i].x + m[1]*pA[i].y + m[2] - pB[i].x ) + std::abs( m[3]*pA[i].x + m[4]*pA[i].y + m[5] - pB[i].y ) < std::max(brect.width,brect.height)*0.05 ) good_idx[good_count++] = i; } if( good_count >= count*RANSAC_GOOD_RATIO ) break; } if( k >= RANSAC_MAX_ITERS ) return Mat(); if( good_count < count ) { for( i = 0; i < good_count; i++ ) { j = good_idx[i]; pA[i] = pA[j]; pB[i] = pB[j]; } } getRTMatrix( &pA[0], &pB[0], good_count, M, fullAffine ); M.at(0, 2) /= scale; M.at(1, 2) /= scale; return M; } /* End of file. */