mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
Merge pull request #15799 from Cpitis:feature/parallelization
Parallelize pyrDown & calcSharrDeriv * ::pyrDown has been parallelized * CalcSharrDeriv parallelized * Fixed whitespace * Set granularity based on amount of threads enabled * Granularity changed to cv::getNumThreads, now each thread should receive 1/n sized stripes * imgproc: move PyrDownInvoker<CastOp>::operator() implementation * imgproc(pyramid): remove syloopboundary() * video: SharrDerivInvoker replace 'Mat*' => 'Mat&' fields
This commit is contained in:
parent
c2f2ea6b85
commit
d2e02779c4
@ -719,29 +719,45 @@ template <> int PyrUpVecV<float, float>(float** src, float** dst, int width)
|
||||
|
||||
#endif
|
||||
|
||||
template<class CastOp>
|
||||
struct PyrDownInvoker : ParallelLoopBody
|
||||
{
|
||||
PyrDownInvoker(const Mat& src, const Mat& dst, int borderType, int **tabR, int **tabM, int **tabL)
|
||||
{
|
||||
_src = &src;
|
||||
_dst = &dst;
|
||||
_borderType = borderType;
|
||||
_tabR = tabR;
|
||||
_tabM = tabM;
|
||||
_tabL = tabL;
|
||||
}
|
||||
|
||||
void operator()(const Range& range) const CV_OVERRIDE;
|
||||
|
||||
int **_tabR;
|
||||
int **_tabM;
|
||||
int **_tabL;
|
||||
const Mat *_src;
|
||||
const Mat *_dst;
|
||||
int _borderType;
|
||||
};
|
||||
|
||||
template<class CastOp> void
|
||||
pyrDown_( const Mat& _src, Mat& _dst, int borderType )
|
||||
{
|
||||
const int PD_SZ = 5;
|
||||
typedef typename CastOp::type1 WT;
|
||||
typedef typename CastOp::rtype T;
|
||||
|
||||
CV_Assert( !_src.empty() );
|
||||
Size ssize = _src.size(), dsize = _dst.size();
|
||||
int cn = _src.channels();
|
||||
int bufstep = (int)alignSize(dsize.width*cn, 16);
|
||||
AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
|
||||
WT* buf = alignPtr((WT*)_buf.data(), 16);
|
||||
|
||||
int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)];
|
||||
AutoBuffer<int> _tabM(dsize.width*cn);
|
||||
int* tabM = _tabM.data();
|
||||
WT* rows[PD_SZ];
|
||||
CastOp castOp;
|
||||
|
||||
CV_Assert( ssize.width > 0 && ssize.height > 0 &&
|
||||
std::abs(dsize.width*2 - ssize.width) <= 2 &&
|
||||
std::abs(dsize.height*2 - ssize.height) <= 2 );
|
||||
int sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
|
||||
int width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
|
||||
|
||||
for (int x = 0; x <= PD_SZ+1; x++)
|
||||
{
|
||||
@ -754,27 +770,51 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType )
|
||||
}
|
||||
}
|
||||
|
||||
for (int x = 0; x < dsize.width*cn; x++)
|
||||
tabM[x] = (x/cn)*2*cn + x % cn;
|
||||
|
||||
int *tabLPtr = tabL;
|
||||
int *tabRPtr = tabR;
|
||||
|
||||
cv::parallel_for_(Range(0,dsize.height), cv::PyrDownInvoker<CastOp>(_src, _dst, borderType, &tabRPtr, &tabM, &tabLPtr), cv::getNumThreads());
|
||||
}
|
||||
|
||||
template<class CastOp>
|
||||
void PyrDownInvoker<CastOp>::operator()(const Range& range) const
|
||||
{
|
||||
const int PD_SZ = 5;
|
||||
typedef typename CastOp::type1 WT;
|
||||
typedef typename CastOp::rtype T;
|
||||
Size ssize = _src->size(), dsize = _dst->size();
|
||||
int cn = _src->channels();
|
||||
int bufstep = (int)alignSize(dsize.width*cn, 16);
|
||||
AutoBuffer<WT> _buf(bufstep*PD_SZ + 16);
|
||||
WT* buf = alignPtr((WT*)_buf.data(), 16);
|
||||
WT* rows[PD_SZ];
|
||||
CastOp castOp;
|
||||
|
||||
int sy0 = -PD_SZ/2, sy = range.start * 2 + sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width);
|
||||
|
||||
ssize.width *= cn;
|
||||
dsize.width *= cn;
|
||||
width0 *= cn;
|
||||
|
||||
for (int x = 0; x < dsize.width; x++)
|
||||
tabM[x] = (x/cn)*2*cn + x % cn;
|
||||
|
||||
for (int y = 0; y < dsize.height; y++)
|
||||
for (int y = range.start; y < range.end; y++)
|
||||
{
|
||||
T* dst = _dst.ptr<T>(y);
|
||||
T* dst = (T*)_dst->ptr<T>(y);
|
||||
WT *row0, *row1, *row2, *row3, *row4;
|
||||
|
||||
// fill the ring buffer (horizontal convolution and decimation)
|
||||
for( ; sy <= y*2 + 2; sy++ )
|
||||
int sy_limit = y*2 + 2;
|
||||
for( ; sy <= sy_limit; sy++ )
|
||||
{
|
||||
WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep;
|
||||
int _sy = borderInterpolate(sy, ssize.height, borderType);
|
||||
const T* src = _src.ptr<T>(_sy);
|
||||
int _sy = borderInterpolate(sy, ssize.height, _borderType);
|
||||
const T* src = _src->ptr<T>(_sy);
|
||||
|
||||
do {
|
||||
int x = 0;
|
||||
const int* tabL = *_tabL;
|
||||
for( ; x < cn; x++ )
|
||||
{
|
||||
row[x] = src[tabL[x+cn*2]]*6 + (src[tabL[x+cn]] + src[tabL[x+cn*3]])*4 +
|
||||
@ -832,13 +872,14 @@ pyrDown_( const Mat& _src, Mat& _dst, int borderType )
|
||||
{
|
||||
for( ; x < width0; x++ )
|
||||
{
|
||||
int sx = tabM[x];
|
||||
int sx = (*_tabM)[x];
|
||||
row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 +
|
||||
src[sx - cn*2] + src[sx + cn*2];
|
||||
}
|
||||
}
|
||||
|
||||
// tabR
|
||||
const int* tabR = *_tabR;
|
||||
for (int x_ = 0; x < dsize.width; x++, x_++)
|
||||
{
|
||||
row[x] = src[tabR[x_+cn*2]]*6 + (src[tabR[x_+cn]] + src[tabR[x_+cn*3]])*4 +
|
||||
|
@ -56,9 +56,18 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
{
|
||||
using namespace cv;
|
||||
using cv::detail::deriv_type;
|
||||
int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn, depth = src.depth();
|
||||
int rows = src.rows, cols = src.cols, cn = src.channels(), depth = src.depth();
|
||||
CV_Assert(depth == CV_8U);
|
||||
dst.create(rows, cols, CV_MAKETYPE(DataType<deriv_type>::depth, cn*2));
|
||||
parallel_for_(Range(0, rows), cv::detail::SharrDerivInvoker(src, dst), cv::getNumThreads());
|
||||
}
|
||||
|
||||
}//namespace
|
||||
|
||||
void cv::detail::SharrDerivInvoker::operator()(const Range& range) const
|
||||
{
|
||||
using cv::detail::deriv_type;
|
||||
int rows = src.rows, cols = src.cols, cn = src.channels(), colsn = cols*cn;
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
if (tegra::useTegra() && tegra::calcSharrDeriv(src, dst))
|
||||
@ -73,12 +82,12 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
v_int16x8 c3 = v_setall_s16(3), c10 = v_setall_s16(10);
|
||||
#endif
|
||||
|
||||
for( y = 0; y < rows; y++ )
|
||||
for( y = range.start; y < range.end; y++ )
|
||||
{
|
||||
const uchar* srow0 = src.ptr<uchar>(y > 0 ? y-1 : rows > 1 ? 1 : 0);
|
||||
const uchar* srow1 = src.ptr<uchar>(y);
|
||||
const uchar* srow2 = src.ptr<uchar>(y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
|
||||
deriv_type* drow = dst.ptr<deriv_type>(y);
|
||||
deriv_type* drow = (deriv_type *)dst.ptr<deriv_type>(y);
|
||||
|
||||
// do vertical convolution
|
||||
x = 0;
|
||||
@ -143,8 +152,6 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
|
||||
}
|
||||
}
|
||||
|
||||
}//namespace
|
||||
|
||||
cv::detail::LKTrackerInvoker::LKTrackerInvoker(
|
||||
const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
|
||||
const Point2f* _prevPts, Point2f* _nextPts,
|
||||
|
@ -7,6 +7,18 @@ namespace detail
|
||||
|
||||
typedef short deriv_type;
|
||||
|
||||
struct SharrDerivInvoker : ParallelLoopBody
|
||||
{
|
||||
SharrDerivInvoker(const Mat& _src, const Mat& _dst)
|
||||
: src(_src), dst(_dst)
|
||||
{ }
|
||||
|
||||
void operator()(const Range& range) const CV_OVERRIDE;
|
||||
|
||||
const Mat& src;
|
||||
const Mat& dst;
|
||||
};
|
||||
|
||||
struct LKTrackerInvoker : ParallelLoopBody
|
||||
{
|
||||
LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg,
|
||||
|
Loading…
Reference in New Issue
Block a user