diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 6fb61bf6cd..ec4427f219 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -719,29 +719,45 @@ template <> int PyrUpVecV(float** src, float** dst, int width) #endif +template +struct PyrDownInvoker : ParallelLoopBody +{ + PyrDownInvoker(const Mat& src, const Mat& dst, int borderType, int **tabR, int **tabM, int **tabL) + { + _src = &src; + _dst = &dst; + _borderType = borderType; + _tabR = tabR; + _tabM = tabM; + _tabL = tabL; + } + + void operator()(const Range& range) const CV_OVERRIDE; + + int **_tabR; + int **_tabM; + int **_tabL; + const Mat *_src; + const Mat *_dst; + int _borderType; +}; + template void pyrDown_( const Mat& _src, Mat& _dst, int borderType ) { const int PD_SZ = 5; - typedef typename CastOp::type1 WT; - typedef typename CastOp::rtype T; - CV_Assert( !_src.empty() ); Size ssize = _src.size(), dsize = _dst.size(); int cn = _src.channels(); - int bufstep = (int)alignSize(dsize.width*cn, 16); - AutoBuffer _buf(bufstep*PD_SZ + 16); - WT* buf = alignPtr((WT*)_buf.data(), 16); + int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)]; AutoBuffer _tabM(dsize.width*cn); int* tabM = _tabM.data(); - WT* rows[PD_SZ]; - CastOp castOp; CV_Assert( ssize.width > 0 && ssize.height > 0 && std::abs(dsize.width*2 - ssize.width) <= 2 && std::abs(dsize.height*2 - ssize.height) <= 2 ); - int sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width); + int width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width); for (int x = 0; x <= PD_SZ+1; x++) { @@ -754,27 +770,51 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType ) } } + for (int x = 0; x < dsize.width*cn; x++) + tabM[x] = (x/cn)*2*cn + x % cn; + + int *tabLPtr = tabL; + int *tabRPtr = tabR; + + cv::parallel_for_(Range(0,dsize.height), cv::PyrDownInvoker(_src, _dst, borderType, &tabRPtr, &tabM, &tabLPtr), cv::getNumThreads()); +} + +template +void PyrDownInvoker::operator()(const Range& range) const +{ + const int PD_SZ = 5; + typedef typename CastOp::type1 WT; + typedef typename CastOp::rtype T; + Size ssize = _src->size(), dsize = _dst->size(); + int cn = _src->channels(); + int bufstep = (int)alignSize(dsize.width*cn, 16); + AutoBuffer _buf(bufstep*PD_SZ + 16); + WT* buf = alignPtr((WT*)_buf.data(), 16); + WT* rows[PD_SZ]; + CastOp castOp; + + int sy0 = -PD_SZ/2, sy = range.start * 2 + sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width); + ssize.width *= cn; dsize.width *= cn; width0 *= cn; - for (int x = 0; x < dsize.width; x++) - tabM[x] = (x/cn)*2*cn + x % cn; - - for (int y = 0; y < dsize.height; y++) + for (int y = range.start; y < range.end; y++) { - T* dst = _dst.ptr(y); + T* dst = (T*)_dst->ptr(y); WT *row0, *row1, *row2, *row3, *row4; // fill the ring buffer (horizontal convolution and decimation) - for( ; sy <= y*2 + 2; sy++ ) + int sy_limit = y*2 + 2; + for( ; sy <= sy_limit; sy++ ) { WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep; - int _sy = borderInterpolate(sy, ssize.height, borderType); - const T* src = _src.ptr(_sy); + int _sy = borderInterpolate(sy, ssize.height, _borderType); + const T* src = _src->ptr(_sy); do { int x = 0; + const int* tabL = *_tabL; for( ; x < cn; x++ ) { row[x] = src[tabL[x+cn*2]]*6 + (src[tabL[x+cn]] + src[tabL[x+cn*3]])*4 + @@ -832,13 +872,14 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType ) { for( ; x < width0; x++ ) { - int sx = tabM[x]; + int sx = (*_tabM)[x]; row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 + src[sx - cn*2] + src[sx + cn*2]; } } // tabR + const int* tabR = *_tabR; for (int x_ = 0; x < dsize.width; x++, x_++) { row[x] = src[tabR[x_+cn*2]]*6 + (src[tabR[x_+cn]] + src[tabR[x_+cn*3]])*4 + diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 3e81f3be58..22c6874f59 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -56,9 +56,18 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst) { using namespace cv; using cv::detail::deriv_type; - int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth(); + int rows = src.rows, cols = src.cols, cn = src.channels(), depth = src.depth(); CV_Assert(depth == CV_8U); dst.create(rows, cols, CV_MAKETYPE(DataType::depth, cn*2)); + parallel_for_(Range(0, rows), cv::detail::SharrDerivInvoker(src, dst), cv::getNumThreads()); +} + +}//namespace + +void cv::detail::SharrDerivInvoker::operator()(const Range& range) const +{ + using cv::detail::deriv_type; + int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn; #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::useTegra() && tegra::calcSharrDeriv(src, dst)) @@ -73,12 +82,12 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst) v_int16x8 c3 = v_setall_s16(3), c10 = v_setall_s16(10); #endif - for( y = 0; y < rows; y++ ) + for( y = range.start; y < range.end; y++ ) { const uchar* srow0 = src.ptr(y > 0 ? y-1 : rows > 1 ? 1 : 0); const uchar* srow1 = src.ptr(y); const uchar* srow2 = src.ptr(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0); - deriv_type* drow = dst.ptr(y); + deriv_type* drow = (deriv_type *)dst.ptr(y); // do vertical convolution x = 0; @@ -143,8 +152,6 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst) } } -}//namespace - cv::detail::LKTrackerInvoker::LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg, const Point2f* _prevPts, Point2f* _nextPts, diff --git a/modules/video/src/lkpyramid.hpp b/modules/video/src/lkpyramid.hpp index 9e62d06b81..16b0da189e 100644 --- a/modules/video/src/lkpyramid.hpp +++ b/modules/video/src/lkpyramid.hpp @@ -7,6 +7,18 @@ namespace detail typedef short deriv_type; + struct SharrDerivInvoker : ParallelLoopBody + { + SharrDerivInvoker(const Mat& _src, const Mat& _dst) + : src(_src), dst(_dst) + { } + + void operator()(const Range& range) const CV_OVERRIDE; + + const Mat& src; + const Mat& dst; + }; + struct LKTrackerInvoker : ParallelLoopBody { LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,