mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
BufferArea: initial version, usage in StereoBM
New class BufferArea is used to hide complexity of buffers allocations and allow instrumentation with valgrind and sanitizers.
This commit is contained in:
parent
6ad390a1cd
commit
55cdeaa6dd
@ -48,8 +48,10 @@
|
||||
#include "precomp.hpp"
|
||||
#include <stdio.h>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include "opencl_kernels_calib3d.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include "opencv2/core/utils/buffer_area.private.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -85,6 +87,19 @@ struct StereoBMParams
|
||||
Rect roi1, roi2;
|
||||
int disp12MaxDiff;
|
||||
int dispType;
|
||||
|
||||
inline bool useShorts() const
|
||||
{
|
||||
return preFilterCap <= 31 && SADWindowSize <= 21;
|
||||
}
|
||||
inline bool useFilterSpeckles() const
|
||||
{
|
||||
return speckleRange >= 0 && speckleWindowSize > 0;
|
||||
}
|
||||
inline bool useNormPrefilter() const
|
||||
{
|
||||
return preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE;
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
@ -110,10 +125,10 @@ static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsi
|
||||
}
|
||||
#endif
|
||||
|
||||
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf )
|
||||
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, int *buf )
|
||||
{
|
||||
int x, y, wsz2 = winsize/2;
|
||||
int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
|
||||
int* vsum = buf + (wsz2 + 1);
|
||||
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
|
||||
const int OFS = 256*5, TABSZ = OFS*2 + 256;
|
||||
uchar tab[TABSZ];
|
||||
@ -309,13 +324,77 @@ inline int dispDescale(int v1, int v2, int d)
|
||||
return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float
|
||||
}
|
||||
|
||||
|
||||
class BufferBM
|
||||
{
|
||||
static const int TABSZ = 256;
|
||||
public:
|
||||
std::vector<int*> sad;
|
||||
std::vector<int*> hsad;
|
||||
std::vector<int*> htext;
|
||||
std::vector<uchar*> cbuf0;
|
||||
std::vector<ushort*> sad_short;
|
||||
std::vector<ushort*> hsad_short;
|
||||
int *prefilter[2];
|
||||
uchar tab[TABSZ];
|
||||
private:
|
||||
utils::BufferArea area;
|
||||
|
||||
public:
|
||||
BufferBM(size_t nstripes, size_t width, size_t height, const StereoBMParams& params)
|
||||
: sad(nstripes, NULL),
|
||||
hsad(nstripes, NULL),
|
||||
htext(nstripes, NULL),
|
||||
cbuf0(nstripes, NULL),
|
||||
sad_short(nstripes, NULL),
|
||||
hsad_short(nstripes, NULL)
|
||||
{
|
||||
const int wsz = params.SADWindowSize;
|
||||
const int ndisp = params.numDisparities;
|
||||
const int ftzero = params.preFilterCap;
|
||||
for (size_t i = 0; i < nstripes; ++i)
|
||||
{
|
||||
// 1D: [1][ ndisp ][1]
|
||||
#if CV_SIMD
|
||||
if (params.useShorts())
|
||||
area.allocate(sad_short[i], ndisp + 2);
|
||||
else
|
||||
#endif
|
||||
area.allocate(sad[i], ndisp + 2);
|
||||
|
||||
// 2D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ]
|
||||
#if CV_SIMD
|
||||
if (params.useShorts())
|
||||
area.allocate(hsad_short[i], (height + wsz + 2) * ndisp);
|
||||
else
|
||||
#endif
|
||||
area.allocate(hsad[i], (height + wsz + 2) * ndisp);
|
||||
|
||||
// 1D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ]
|
||||
area.allocate(htext[i], (height + wsz + 2));
|
||||
|
||||
// 3D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ] * [ wsz/2 + 1 ][ wsz/2 + 1 ]
|
||||
area.allocate(cbuf0[i], ((height + wsz + 2) * ndisp * (wsz + 2) + 256));
|
||||
}
|
||||
if (params.useNormPrefilter())
|
||||
{
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
area.allocate(prefilter[0], width + params.preFilterSize + 2);
|
||||
}
|
||||
area.commit();
|
||||
|
||||
// static table
|
||||
for (int x = 0; x < TABSZ; x++)
|
||||
tab[x] = (uchar)std::abs(x - ftzero);
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_SIMD
|
||||
template <typename dType>
|
||||
static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
|
||||
Mat& disp, Mat& cost, StereoBMParams& state,
|
||||
uchar* buf, int _dy0, int _dy1 )
|
||||
Mat& disp, Mat& cost, const StereoBMParams& state,
|
||||
int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum )
|
||||
{
|
||||
const int ALIGN = CV_SIMD_WIDTH;
|
||||
int x, y, d;
|
||||
int wsz = state.SADWindowSize, wsz2 = wsz/2;
|
||||
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
|
||||
@ -325,15 +404,13 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
|
||||
int rofs = -MIN(ndisp - 1 + mindisp, 0);
|
||||
int width = left.cols, height = left.rows;
|
||||
int width1 = width - rofs - ndisp + 1;
|
||||
int ftzero = state.preFilterCap;
|
||||
int textureThreshold = state.textureThreshold;
|
||||
int uniquenessRatio = state.uniquenessRatio;
|
||||
const int disp_shift = dispShiftTemplate<dType>::value;
|
||||
dType FILTERED = (dType)((mindisp - 1) << disp_shift);
|
||||
|
||||
ushort *sad, *hsad0, *hsad, *hsad_sub;
|
||||
int *htext;
|
||||
uchar *cbuf0, *cbuf;
|
||||
ushort *hsad, *hsad_sub;
|
||||
uchar *cbuf;
|
||||
const uchar* lptr0 = left.ptr() + lofs;
|
||||
const uchar* rptr0 = right.ptr() + rofs;
|
||||
const uchar *lptr, *lptr_sub, *rptr;
|
||||
@ -343,23 +420,20 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
|
||||
int cstep = (height + dy0 + dy1)*ndisp;
|
||||
short costbuf = 0;
|
||||
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
|
||||
const int TABSZ = 256;
|
||||
uchar tab[TABSZ];
|
||||
const uchar * tab = bufX.tab;
|
||||
short v_seq[v_int16::nlanes];
|
||||
for (short i = 0; i < v_int16::nlanes; ++i)
|
||||
v_seq[i] = i;
|
||||
|
||||
sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN);
|
||||
hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
|
||||
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
|
||||
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
|
||||
|
||||
for( x = 0; x < TABSZ; x++ )
|
||||
tab[x] = (uchar)std::abs(x - ftzero);
|
||||
ushort *sad = bufX.sad_short[bufNum] + 1;
|
||||
ushort *hsad0 = bufX.hsad_short[bufNum] + (wsz2 + 1) * ndisp;
|
||||
int *htext = bufX.htext[bufNum] + (wsz2 + 1);
|
||||
uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp;
|
||||
|
||||
// initialize buffers
|
||||
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
|
||||
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
|
||||
memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0]));
|
||||
memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0]));
|
||||
memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0]));
|
||||
|
||||
for( x = -wsz2-1; x < wsz2; x++ )
|
||||
{
|
||||
@ -594,10 +668,9 @@ template <typename mType>
|
||||
static void
|
||||
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
||||
Mat& disp, Mat& cost, const StereoBMParams& state,
|
||||
uchar* buf, int _dy0, int _dy1 )
|
||||
int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum )
|
||||
{
|
||||
|
||||
const int ALIGN = CV_SIMD_WIDTH;
|
||||
int x, y, d;
|
||||
int wsz = state.SADWindowSize, wsz2 = wsz/2;
|
||||
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
|
||||
@ -607,14 +680,13 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
||||
int rofs = -MIN(ndisp - 1 + mindisp, 0);
|
||||
int width = left.cols, height = left.rows;
|
||||
int width1 = width - rofs - ndisp + 1;
|
||||
int ftzero = state.preFilterCap;
|
||||
int textureThreshold = state.textureThreshold;
|
||||
int uniquenessRatio = state.uniquenessRatio;
|
||||
const int disp_shift = dispShiftTemplate<mType>::value;
|
||||
mType FILTERED = (mType)((mindisp - 1) << disp_shift);
|
||||
|
||||
int *sad, *hsad0, *hsad, *hsad_sub, *htext;
|
||||
uchar *cbuf0, *cbuf;
|
||||
int *hsad, *hsad_sub;
|
||||
uchar *cbuf;
|
||||
const uchar* lptr0 = left.ptr() + lofs;
|
||||
const uchar* rptr0 = right.ptr() + rofs;
|
||||
const uchar *lptr, *lptr_sub, *rptr;
|
||||
@ -624,8 +696,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
||||
int cstep = (height+dy0+dy1)*ndisp;
|
||||
int costbuf = 0;
|
||||
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
|
||||
const int TABSZ = 256;
|
||||
uchar tab[TABSZ];
|
||||
const uchar * tab = bufX.tab;
|
||||
|
||||
#if CV_SIMD
|
||||
int v_seq[v_int32::nlanes];
|
||||
@ -634,17 +705,15 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
||||
v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes);
|
||||
#endif
|
||||
|
||||
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
|
||||
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
|
||||
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
|
||||
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
|
||||
|
||||
for( x = 0; x < TABSZ; x++ )
|
||||
tab[x] = (uchar)std::abs(x - ftzero);
|
||||
int *sad = bufX.sad[bufNum] + 1;
|
||||
int *hsad0 = bufX.hsad[bufNum] + (wsz2 + 1) * ndisp;
|
||||
int *htext = bufX.htext[bufNum] + (wsz2 + 1);
|
||||
uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp;
|
||||
|
||||
// initialize buffers
|
||||
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
|
||||
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
|
||||
memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0]));
|
||||
memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0]));
|
||||
memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0]));
|
||||
|
||||
for( x = -wsz2-1; x < wsz2; x++ )
|
||||
{
|
||||
@ -890,7 +959,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
|
||||
#ifdef HAVE_OPENCL
|
||||
static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state)
|
||||
{
|
||||
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
|
||||
if (state->useNormPrefilter())
|
||||
{
|
||||
if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap))
|
||||
return false;
|
||||
@ -911,29 +980,28 @@ static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray le
|
||||
struct PrefilterInvoker : public ParallelLoopBody
|
||||
{
|
||||
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
|
||||
uchar* buf0, uchar* buf1, StereoBMParams* _state)
|
||||
const BufferBM &bufX_, const StereoBMParams &state_)
|
||||
: bufX(bufX_), state(state_)
|
||||
{
|
||||
imgs0[0] = &left0; imgs0[1] = &right0;
|
||||
imgs[0] = &left; imgs[1] = &right;
|
||||
buf[0] = buf0; buf[1] = buf1;
|
||||
state = _state;
|
||||
}
|
||||
|
||||
void operator()(const Range& range) const CV_OVERRIDE
|
||||
{
|
||||
for( int i = range.start; i < range.end; i++ )
|
||||
{
|
||||
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
|
||||
prefilterNorm( *imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i] );
|
||||
if (state.useNormPrefilter())
|
||||
prefilterNorm( *imgs0[i], *imgs[i], state.preFilterSize, state.preFilterCap, bufX.prefilter[i] );
|
||||
else
|
||||
prefilterXSobel( *imgs0[i], *imgs[i], state->preFilterCap );
|
||||
prefilterXSobel( *imgs0[i], *imgs[i], state.preFilterCap );
|
||||
}
|
||||
}
|
||||
|
||||
const Mat* imgs0[2];
|
||||
Mat* imgs[2];
|
||||
uchar* buf[2];
|
||||
StereoBMParams* state;
|
||||
const BufferBM &bufX;
|
||||
const StereoBMParams &state;
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
@ -986,18 +1054,17 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
|
||||
struct FindStereoCorrespInvoker : public ParallelLoopBody
|
||||
{
|
||||
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
|
||||
Mat& _disp, StereoBMParams* _state,
|
||||
int _nstripes, size_t _stripeBufSize,
|
||||
bool _useShorts, Rect _validDisparityRect,
|
||||
Mat& _slidingSumBuf, Mat& _cost )
|
||||
Mat& _disp, const StereoBMParams &_state,
|
||||
int _nstripes,
|
||||
Rect _validDisparityRect,
|
||||
Mat& _cost, const BufferBM & buf_ )
|
||||
: state(_state), buf(buf_)
|
||||
{
|
||||
CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S );
|
||||
left = &_left; right = &_right;
|
||||
disp = &_disp; state = _state;
|
||||
nstripes = _nstripes; stripeBufSize = _stripeBufSize;
|
||||
useShorts = _useShorts;
|
||||
disp = &_disp;
|
||||
nstripes = _nstripes;
|
||||
validDisparityRect = _validDisparityRect;
|
||||
slidingSumBuf = &_slidingSumBuf;
|
||||
cost = &_cost;
|
||||
}
|
||||
|
||||
@ -1006,11 +1073,10 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
|
||||
int cols = left->cols, rows = left->rows;
|
||||
int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
|
||||
int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
|
||||
uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
|
||||
|
||||
int dispShift = disp->type() == CV_16S ? DISPARITY_SHIFT_16S :
|
||||
DISPARITY_SHIFT_32S;
|
||||
int FILTERED = (state->minDisparity - 1) << dispShift;
|
||||
int FILTERED = (state.minDisparity - 1) << dispShift;
|
||||
|
||||
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
|
||||
if( roi.height == 0 )
|
||||
@ -1033,27 +1099,27 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
|
||||
Mat left_i = left->rowRange(row0, row1);
|
||||
Mat right_i = right->rowRange(row0, row1);
|
||||
Mat disp_i = disp->rowRange(row0, row1);
|
||||
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
|
||||
Mat cost_i = state.disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
|
||||
|
||||
#if CV_SIMD
|
||||
if (useShorts)
|
||||
if (state.useShorts())
|
||||
{
|
||||
if( disp_i.type() == CV_16S)
|
||||
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
|
||||
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
|
||||
else
|
||||
findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1);
|
||||
findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if( disp_i.type() == CV_16S )
|
||||
findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
|
||||
findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
|
||||
else
|
||||
findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
|
||||
findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
|
||||
}
|
||||
|
||||
if( state->disp12MaxDiff >= 0 )
|
||||
validateDisparity( disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff );
|
||||
if( state.disp12MaxDiff >= 0 )
|
||||
validateDisparity( disp_i, cost_i, state.minDisparity, state.numDisparities, state.disp12MaxDiff );
|
||||
|
||||
if( roi.x > 0 )
|
||||
{
|
||||
@ -1069,13 +1135,12 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
|
||||
|
||||
protected:
|
||||
const Mat *left, *right;
|
||||
Mat* disp, *slidingSumBuf, *cost;
|
||||
StereoBMParams *state;
|
||||
Mat* disp, *cost;
|
||||
const StereoBMParams &state;
|
||||
|
||||
int nstripes;
|
||||
size_t stripeBufSize;
|
||||
bool useShorts;
|
||||
Rect validDisparityRect;
|
||||
const BufferBM & buf;
|
||||
};
|
||||
|
||||
class StereoBMImpl CV_FINAL : public StereoBM
|
||||
@ -1149,7 +1214,7 @@ public:
|
||||
disp_shift = DISPARITY_SHIFT_16S;
|
||||
FILTERED = (params.minDisparity - 1) << disp_shift;
|
||||
|
||||
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
|
||||
if (params.useFilterSpeckles())
|
||||
filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
|
||||
if (dtype == CV_32F)
|
||||
disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0);
|
||||
@ -1192,44 +1257,39 @@ public:
|
||||
disp = dispbuf;
|
||||
}
|
||||
|
||||
int wsz = params.SADWindowSize;
|
||||
int bufSize0 = (int)((ndisp + 2)*sizeof(int));
|
||||
bufSize0 += (int)((height+wsz+2)*ndisp*sizeof(int));
|
||||
bufSize0 += (int)((height + wsz + 2)*sizeof(int));
|
||||
bufSize0 += (int)((height+wsz+2)*ndisp*(wsz+2)*sizeof(uchar) + 256);
|
||||
{
|
||||
const double SAD_overhead_coeff = 10.0;
|
||||
const double N0 = 8000000 / (params.useShorts() ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
|
||||
const double maxStripeSize = std::min(
|
||||
std::max(
|
||||
N0 / (width * ndisp),
|
||||
(params.SADWindowSize-1) * SAD_overhead_coeff
|
||||
),
|
||||
(double)height
|
||||
);
|
||||
const int nstripes = cvCeil(height / maxStripeSize);
|
||||
BufferBM localBuf(nstripes, width, height, params);
|
||||
|
||||
int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
|
||||
int bufSize2 = 0;
|
||||
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
|
||||
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
|
||||
// Prefiltering
|
||||
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, localBuf, params), 1);
|
||||
|
||||
bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21;
|
||||
const double SAD_overhead_coeff = 10.0;
|
||||
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
|
||||
double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
|
||||
int nstripes = cvCeil(height / maxStripeSize);
|
||||
int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));
|
||||
|
||||
if( slidingSumBuf.cols < bufSize )
|
||||
slidingSumBuf.create( 1, bufSize, CV_8U );
|
||||
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
|
||||
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
|
||||
!R2.empty() ? R2 : validDisparityRect,
|
||||
params.minDisparity, params.numDisparities,
|
||||
params.SADWindowSize);
|
||||
|
||||
uchar *_buf = slidingSumBuf.ptr();
|
||||
FindStereoCorrespInvoker invoker(left, right, disp, params, nstripes, validDisparityRect, cost, localBuf);
|
||||
parallel_for_(Range(0, nstripes), invoker);
|
||||
|
||||
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1);
|
||||
if (params.useFilterSpeckles())
|
||||
{
|
||||
slidingSumBuf.create( 1, width * height * (sizeof(Point_<short>) + sizeof(int) + sizeof(uchar)), CV_8U );
|
||||
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
|
||||
}
|
||||
|
||||
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
|
||||
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
|
||||
!R2.empty() ? R2 : validDisparityRect,
|
||||
params.minDisparity, params.numDisparities,
|
||||
params.SADWindowSize);
|
||||
|
||||
parallel_for_(Range(0, nstripes),
|
||||
FindStereoCorrespInvoker(left, right, disp, ¶ms, nstripes,
|
||||
bufSize0, useShorts, validDisparityRect,
|
||||
slidingSumBuf, cost));
|
||||
|
||||
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
|
||||
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
|
||||
}
|
||||
|
||||
if (disp0.data != disp.data)
|
||||
disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0);
|
||||
|
103
modules/core/include/opencv2/core/utils/buffer_area.private.hpp
Normal file
103
modules/core/include/opencv2/core/utils/buffer_area.private.hpp
Normal file
@ -0,0 +1,103 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#ifndef OPENCV_UTILS_BUFFER_AREA_HPP
|
||||
#define OPENCV_UTILS_BUFFER_AREA_HPP
|
||||
|
||||
#include <opencv2/core/base.hpp>
|
||||
#include <opencv2/core/private.hpp>
|
||||
#include <opencv2/core/utility.hpp>
|
||||
#include <vector>
|
||||
|
||||
namespace cv { namespace utils {
|
||||
|
||||
//! @addtogroup core_utils
|
||||
//! @{
|
||||
|
||||
/** @brief Manages memory block shared by muliple buffers.
|
||||
|
||||
This class allows to allocate one large memory block and split it into several smaller
|
||||
non-overlapping buffers. In safe mode each buffer allocation will be performed independently,
|
||||
this mode allows dynamic memory access instrumentation using valgrind or memory sanitizer.
|
||||
|
||||
Safe mode can be explicitly switched ON in constructor. It will also be enabled when compiling with
|
||||
memory sanitizer support or in runtime with the environment variable `OPENCV_BUFFER_AREA_ALWAYS_SAFE`.
|
||||
|
||||
Example of usage:
|
||||
@code
|
||||
int * buf1 = 0;
|
||||
double * buf2 = 0;
|
||||
cv::util::BufferArea area;
|
||||
area.allocate(buf1, 200); // buf1 = new int[200];
|
||||
area.allocate(buf2, 1000, 64); // buf2 = new double[1000]; - aligned by 64
|
||||
area.commit();
|
||||
@endcode
|
||||
|
||||
@note This class is considered private and should be used only in OpenCV itself. API can be changed.
|
||||
*/
|
||||
class CV_EXPORTS BufferArea
|
||||
{
|
||||
public:
|
||||
/** @brief Class constructor.
|
||||
|
||||
@param safe Enable _safe_ operation mode, each allocation will be performed independently.
|
||||
*/
|
||||
BufferArea(bool safe = false);
|
||||
|
||||
/** @brief Class destructor
|
||||
|
||||
All allocated memory well be freed. Each bound pointer will be reset to NULL.
|
||||
*/
|
||||
~BufferArea();
|
||||
|
||||
/** @brief Bind a pointer to local area.
|
||||
|
||||
BufferArea will store reference to the pointer and allocation parameters effectively owning the
|
||||
pointer and allocated memory. This operation has the same parameters and does the same job
|
||||
as the operator `new`, except allocation can be performed later during the BufferArea::commit call.
|
||||
|
||||
@param ptr Reference to a pointer of type T. Must be NULL
|
||||
@param count Count of objects to be allocated, it has the same meaning as in the operator `new`.
|
||||
@param alignment Alignment of allocated memory. same meaning as in the operator `new` (C++17).
|
||||
Must be divisible by sizeof(T). Must be power of two.
|
||||
|
||||
@note In safe mode allocation will be performed immediatly.
|
||||
*/
|
||||
template <typename T>
|
||||
void allocate(T*&ptr, size_t count, ushort alignment = sizeof(T))
|
||||
{
|
||||
CV_Assert(ptr == NULL);
|
||||
CV_Assert(count > 0);
|
||||
CV_Assert(alignment > 0);
|
||||
CV_Assert(alignment % sizeof(T) == 0);
|
||||
CV_Assert((alignment & (alignment - 1)) == 0);
|
||||
allocate_((void**)(&ptr), static_cast<ushort>(sizeof(T)), count, alignment);
|
||||
}
|
||||
|
||||
/** @brief Allocate memory and initialize all bound pointers
|
||||
|
||||
Each pointer bound to the area with the BufferArea::allocate will be initialized and will be set
|
||||
to point to a memory block with requested size and alignment.
|
||||
|
||||
@note Does nothing in safe mode as all allocations will be performed by BufferArea::allocate
|
||||
*/
|
||||
void commit();
|
||||
|
||||
private:
|
||||
BufferArea(const BufferArea &); // = delete
|
||||
BufferArea &operator=(const BufferArea &); // = delete
|
||||
void allocate_(void **ptr, ushort type_size, size_t count, ushort alignment);
|
||||
|
||||
private:
|
||||
class Block;
|
||||
std::vector<Block> blocks;
|
||||
void * oneBuf;
|
||||
size_t totalSize;
|
||||
const bool safe;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
||||
}} // cv::utils::
|
||||
|
||||
#endif
|
121
modules/core/src/buffer_area.cpp
Normal file
121
modules/core/src/buffer_area.cpp
Normal file
@ -0,0 +1,121 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "opencv2/core/utils/buffer_area.private.hpp"
|
||||
#include "opencv2/core/utils/configuration.private.hpp"
|
||||
|
||||
#ifdef OPENCV_ENABLE_MEMORY_SANITIZER
|
||||
#define BUFFER_AREA_DEFAULT_MODE true
|
||||
#else
|
||||
#define BUFFER_AREA_DEFAULT_MODE false
|
||||
#endif
|
||||
|
||||
static bool CV_BUFFER_AREA_OVERRIDE_SAFE_MODE =
|
||||
cv::utils::getConfigurationParameterBool("OPENCV_BUFFER_AREA_ALWAYS_SAFE", BUFFER_AREA_DEFAULT_MODE);
|
||||
|
||||
namespace cv { namespace utils {
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
class BufferArea::Block
|
||||
{
|
||||
private:
|
||||
inline size_t reserve_count() const
|
||||
{
|
||||
return alignment / type_size - 1;
|
||||
}
|
||||
public:
|
||||
Block(void **ptr_, ushort type_size_, size_t count_, ushort alignment_)
|
||||
: ptr(ptr_), raw_mem(0), count(count_), type_size(type_size_), alignment(alignment_)
|
||||
{
|
||||
CV_Assert(ptr && *ptr == NULL);
|
||||
}
|
||||
void cleanup() const
|
||||
{
|
||||
CV_Assert(ptr && *ptr);
|
||||
*ptr = 0;
|
||||
if (raw_mem)
|
||||
fastFree(raw_mem);
|
||||
}
|
||||
size_t getByteCount() const
|
||||
{
|
||||
return type_size * (count + reserve_count());
|
||||
}
|
||||
void real_allocate()
|
||||
{
|
||||
CV_Assert(ptr && *ptr == NULL);
|
||||
const size_t allocated_count = count + reserve_count();
|
||||
raw_mem = fastMalloc(type_size * allocated_count);
|
||||
if (alignment != type_size)
|
||||
{
|
||||
*ptr = alignPtr(raw_mem, alignment);
|
||||
CV_Assert(reinterpret_cast<size_t>(*ptr) % alignment == 0);
|
||||
CV_Assert(static_cast<uchar*>(*ptr) + type_size * count <= static_cast<uchar*>(raw_mem) + type_size * allocated_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
*ptr = raw_mem;
|
||||
}
|
||||
}
|
||||
void * fast_allocate(void * buf) const
|
||||
{
|
||||
CV_Assert(ptr && *ptr == NULL);
|
||||
buf = alignPtr(buf, alignment);
|
||||
CV_Assert(reinterpret_cast<size_t>(buf) % alignment == 0);
|
||||
*ptr = buf;
|
||||
return static_cast<void*>(static_cast<uchar*>(*ptr) + type_size * count);
|
||||
}
|
||||
private:
|
||||
void **ptr;
|
||||
void * raw_mem;
|
||||
size_t count;
|
||||
ushort type_size;
|
||||
ushort alignment;
|
||||
};
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
BufferArea::BufferArea(bool safe_) :
|
||||
oneBuf(0),
|
||||
totalSize(0),
|
||||
safe(safe_ || CV_BUFFER_AREA_OVERRIDE_SAFE_MODE)
|
||||
{
|
||||
}
|
||||
|
||||
BufferArea::~BufferArea()
|
||||
{
|
||||
for(std::vector<Block>::const_iterator i = blocks.begin(); i != blocks.end(); ++i)
|
||||
i->cleanup();
|
||||
if (oneBuf)
|
||||
fastFree(oneBuf);
|
||||
}
|
||||
|
||||
void BufferArea::allocate_(void **ptr, ushort type_size, size_t count, ushort alignment)
|
||||
{
|
||||
blocks.push_back(Block(ptr, type_size, count, alignment));
|
||||
if (safe)
|
||||
blocks.back().real_allocate();
|
||||
else
|
||||
totalSize += blocks.back().getByteCount();
|
||||
}
|
||||
|
||||
void BufferArea::commit()
|
||||
{
|
||||
if (!safe)
|
||||
{
|
||||
CV_Assert(totalSize > 0);
|
||||
CV_Assert(oneBuf == NULL);
|
||||
CV_Assert(!blocks.empty());
|
||||
oneBuf = fastMalloc(totalSize);
|
||||
void * ptr = oneBuf;
|
||||
for(std::vector<Block>::const_iterator i = blocks.begin(); i != blocks.end(); ++i)
|
||||
{
|
||||
ptr = i->fast_allocate(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
}} // cv::utils::
|
@ -3,6 +3,7 @@
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/core/utils/logger.hpp"
|
||||
#include "opencv2/core/utils/buffer_area.private.hpp"
|
||||
|
||||
#include "test_utils_tls.impl.hpp"
|
||||
|
||||
@ -303,4 +304,132 @@ TEST(Samples, findFile_missing)
|
||||
cv::utils::logging::setLogLevel(prev);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool buffers_overlap(T * first, size_t first_num, T * second, size_t second_num)
|
||||
{
|
||||
// cerr << "[" << (void*)first << " : " << (void*)(first + first_num) << ")";
|
||||
// cerr << " X ";
|
||||
// cerr << "[" << (void*)second << " : " << (void*)(second + second_num) << ")";
|
||||
// cerr << endl;
|
||||
bool res = false;
|
||||
res |= (second <= first) && (first < second + second_num);
|
||||
res |= (second < first + first_num) && (first + first_num < second + second_num);
|
||||
return res;
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<bool> BufferArea;
|
||||
|
||||
TEST_P(BufferArea, basic)
|
||||
{
|
||||
const bool safe = GetParam();
|
||||
const size_t SZ = 3;
|
||||
int * int_ptr = NULL;
|
||||
uchar * uchar_ptr = NULL;
|
||||
double * dbl_ptr = NULL;
|
||||
{
|
||||
cv::utils::BufferArea area(safe);
|
||||
area.allocate(int_ptr, SZ);
|
||||
area.allocate(uchar_ptr, SZ);
|
||||
area.allocate(dbl_ptr, SZ);
|
||||
area.commit();
|
||||
ASSERT_TRUE(int_ptr != NULL);
|
||||
ASSERT_TRUE(uchar_ptr != NULL);
|
||||
ASSERT_TRUE(dbl_ptr != NULL);
|
||||
EXPECT_EQ((size_t)0, (size_t)int_ptr % sizeof(int));
|
||||
EXPECT_EQ((size_t)0, (size_t)dbl_ptr % sizeof(double));
|
||||
}
|
||||
EXPECT_TRUE(int_ptr == NULL);
|
||||
EXPECT_TRUE(uchar_ptr == NULL);
|
||||
EXPECT_TRUE(dbl_ptr == NULL);
|
||||
}
|
||||
|
||||
TEST_P(BufferArea, align)
|
||||
{
|
||||
const bool safe = GetParam();
|
||||
const size_t SZ = 3;
|
||||
const size_t CNT = 5;
|
||||
typedef int T;
|
||||
T * buffers[CNT] = {0};
|
||||
{
|
||||
cv::utils::BufferArea area(safe);
|
||||
// allocate buffers with 3 elements with growing alignment (power of two)
|
||||
for (size_t i = 0; i < CNT; ++i)
|
||||
{
|
||||
const ushort ALIGN = static_cast<ushort>(sizeof(T) << i);
|
||||
EXPECT_TRUE(buffers[i] == NULL);
|
||||
area.allocate(buffers[i], SZ, ALIGN);
|
||||
}
|
||||
area.commit();
|
||||
for (size_t i = 0; i < CNT; ++i)
|
||||
{
|
||||
const ushort ALIGN = static_cast<ushort>(sizeof(T) << i);
|
||||
EXPECT_TRUE(buffers[i] != NULL);
|
||||
EXPECT_EQ((size_t)0, reinterpret_cast<size_t>(buffers[i]) % ALIGN);
|
||||
if (i < CNT - 1)
|
||||
{
|
||||
SCOPED_TRACE(i);
|
||||
EXPECT_FALSE(buffers_overlap(buffers[i], SZ, buffers[i + 1], SZ))
|
||||
<< "Buffers overlap: "
|
||||
<< buffers[i] << " (" << SZ << " elems)"
|
||||
<< " and "
|
||||
<< buffers[i + 1] << " (" << SZ << " elems)"
|
||||
<< " (element size: " << sizeof(T) << ")";
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < CNT; ++i)
|
||||
{
|
||||
EXPECT_TRUE(buffers[i] == NULL);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BufferArea, default_align)
|
||||
{
|
||||
const bool safe = GetParam();
|
||||
const size_t CNT = 100;
|
||||
const ushort ALIGN = 64;
|
||||
typedef int T;
|
||||
T * buffers[CNT] = {0};
|
||||
{
|
||||
cv::utils::BufferArea area(safe);
|
||||
// allocate buffers with 1-99 elements with default alignment
|
||||
for (size_t i = 0; i < CNT; ++ i)
|
||||
{
|
||||
EXPECT_TRUE(buffers[i] == NULL);
|
||||
area.allocate(buffers[i], i + 1, ALIGN);
|
||||
}
|
||||
area.commit();
|
||||
for (size_t i = 0; i < CNT; ++i)
|
||||
{
|
||||
EXPECT_TRUE(buffers[i] != NULL);
|
||||
EXPECT_EQ((size_t)0, reinterpret_cast<size_t>(buffers[i]) % ALIGN);
|
||||
if (i < CNT - 1)
|
||||
{
|
||||
SCOPED_TRACE(i);
|
||||
EXPECT_FALSE(buffers_overlap(buffers[i], i + 1, buffers[i + 1], i + 2))
|
||||
<< "Buffers overlap: "
|
||||
<< buffers[i] << " (" << i + 1 << " elems)"
|
||||
<< " and "
|
||||
<< buffers[i + 1] << " (" << i + 2 << " elems)"
|
||||
<< " (element size: " << sizeof(T) << ")";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BufferArea, bad)
|
||||
{
|
||||
const bool safe = GetParam();
|
||||
int * ptr = 0;
|
||||
cv::utils::BufferArea area(safe);
|
||||
EXPECT_ANY_THROW(area.allocate(ptr, 0)); // bad size
|
||||
EXPECT_ANY_THROW(area.allocate(ptr, 1, 0)); // bad alignment
|
||||
EXPECT_ANY_THROW(area.allocate(ptr, 1, 3)); // bad alignment
|
||||
ptr = (int*)1;
|
||||
EXPECT_ANY_THROW(area.allocate(ptr, 1)); // non-zero pointer
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, BufferArea, testing::Values(true, false));
|
||||
|
||||
|
||||
}} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user