BufferArea: initial version, usage in StereoBM

New class BufferArea is used to hide complexity of buffers allocations and allow instrumentation with valgrind and sanitizers.
This commit is contained in:
Maksim Shabunin 2020-01-23 14:25:58 +03:00
parent 6ad390a1cd
commit 55cdeaa6dd
4 changed files with 513 additions and 100 deletions

View File

@ -48,8 +48,10 @@
#include "precomp.hpp"
#include <stdio.h>
#include <limits>
#include <vector>
#include "opencl_kernels_calib3d.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/utils/buffer_area.private.hpp"
namespace cv
{
@ -85,6 +87,19 @@ struct StereoBMParams
Rect roi1, roi2;
int disp12MaxDiff;
int dispType;
inline bool useShorts() const
{
return preFilterCap <= 31 && SADWindowSize <= 21;
}
inline bool useFilterSpeckles() const
{
return speckleRange >= 0 && speckleWindowSize > 0;
}
inline bool useNormPrefilter() const
{
return preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE;
}
};
#ifdef HAVE_OPENCL
@ -110,10 +125,10 @@ static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsi
}
#endif
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf )
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, int *buf )
{
int x, y, wsz2 = winsize/2;
int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
int* vsum = buf + (wsz2 + 1);
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
const int OFS = 256*5, TABSZ = OFS*2 + 256;
uchar tab[TABSZ];
@ -309,13 +324,77 @@ inline int dispDescale(int v1, int v2, int d)
return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float
}
class BufferBM
{
static const int TABSZ = 256;
public:
std::vector<int*> sad;
std::vector<int*> hsad;
std::vector<int*> htext;
std::vector<uchar*> cbuf0;
std::vector<ushort*> sad_short;
std::vector<ushort*> hsad_short;
int *prefilter[2];
uchar tab[TABSZ];
private:
utils::BufferArea area;
public:
BufferBM(size_t nstripes, size_t width, size_t height, const StereoBMParams& params)
: sad(nstripes, NULL),
hsad(nstripes, NULL),
htext(nstripes, NULL),
cbuf0(nstripes, NULL),
sad_short(nstripes, NULL),
hsad_short(nstripes, NULL)
{
const int wsz = params.SADWindowSize;
const int ndisp = params.numDisparities;
const int ftzero = params.preFilterCap;
for (size_t i = 0; i < nstripes; ++i)
{
// 1D: [1][ ndisp ][1]
#if CV_SIMD
if (params.useShorts())
area.allocate(sad_short[i], ndisp + 2);
else
#endif
area.allocate(sad[i], ndisp + 2);
// 2D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ]
#if CV_SIMD
if (params.useShorts())
area.allocate(hsad_short[i], (height + wsz + 2) * ndisp);
else
#endif
area.allocate(hsad[i], (height + wsz + 2) * ndisp);
// 1D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ]
area.allocate(htext[i], (height + wsz + 2));
// 3D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ] * [ wsz/2 + 1 ][ wsz/2 + 1 ]
area.allocate(cbuf0[i], ((height + wsz + 2) * ndisp * (wsz + 2) + 256));
}
if (params.useNormPrefilter())
{
for (size_t i = 0; i < 2; ++i)
area.allocate(prefilter[0], width + params.preFilterSize + 2);
}
area.commit();
// static table
for (int x = 0; x < TABSZ; x++)
tab[x] = (uchar)std::abs(x - ftzero);
}
};
#if CV_SIMD
template <typename dType>
static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, StereoBMParams& state,
uchar* buf, int _dy0, int _dy1 )
Mat& disp, Mat& cost, const StereoBMParams& state,
int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum )
{
const int ALIGN = CV_SIMD_WIDTH;
int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
@ -325,15 +404,13 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio;
const int disp_shift = dispShiftTemplate<dType>::value;
dType FILTERED = (dType)((mindisp - 1) << disp_shift);
ushort *sad, *hsad0, *hsad, *hsad_sub;
int *htext;
uchar *cbuf0, *cbuf;
ushort *hsad, *hsad_sub;
uchar *cbuf;
const uchar* lptr0 = left.ptr() + lofs;
const uchar* rptr0 = right.ptr() + rofs;
const uchar *lptr, *lptr_sub, *rptr;
@ -343,23 +420,20 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
int cstep = (height + dy0 + dy1)*ndisp;
short costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256;
uchar tab[TABSZ];
const uchar * tab = bufX.tab;
short v_seq[v_int16::nlanes];
for (short i = 0; i < v_int16::nlanes; ++i)
v_seq[i] = i;
sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)std::abs(x - ftzero);
ushort *sad = bufX.sad_short[bufNum] + 1;
ushort *hsad0 = bufX.hsad_short[bufNum] + (wsz2 + 1) * ndisp;
int *htext = bufX.htext[bufNum] + (wsz2 + 1);
uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp;
// initialize buffers
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0]));
memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0]));
memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0]));
for( x = -wsz2-1; x < wsz2; x++ )
{
@ -594,10 +668,9 @@ template <typename mType>
static void
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, const StereoBMParams& state,
uchar* buf, int _dy0, int _dy1 )
int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum )
{
const int ALIGN = CV_SIMD_WIDTH;
int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
@ -607,14 +680,13 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio;
const int disp_shift = dispShiftTemplate<mType>::value;
mType FILTERED = (mType)((mindisp - 1) << disp_shift);
int *sad, *hsad0, *hsad, *hsad_sub, *htext;
uchar *cbuf0, *cbuf;
int *hsad, *hsad_sub;
uchar *cbuf;
const uchar* lptr0 = left.ptr() + lofs;
const uchar* rptr0 = right.ptr() + rofs;
const uchar *lptr, *lptr_sub, *rptr;
@ -624,8 +696,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int cstep = (height+dy0+dy1)*ndisp;
int costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256;
uchar tab[TABSZ];
const uchar * tab = bufX.tab;
#if CV_SIMD
int v_seq[v_int32::nlanes];
@ -634,17 +705,15 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes);
#endif
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)std::abs(x - ftzero);
int *sad = bufX.sad[bufNum] + 1;
int *hsad0 = bufX.hsad[bufNum] + (wsz2 + 1) * ndisp;
int *htext = bufX.htext[bufNum] + (wsz2 + 1);
uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp;
// initialize buffers
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0]));
memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0]));
memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0]));
for( x = -wsz2-1; x < wsz2; x++ )
{
@ -890,7 +959,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
#ifdef HAVE_OPENCL
static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state)
{
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
if (state->useNormPrefilter())
{
if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap))
return false;
@ -911,29 +980,28 @@ static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray le
struct PrefilterInvoker : public ParallelLoopBody
{
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
uchar* buf0, uchar* buf1, StereoBMParams* _state)
const BufferBM &bufX_, const StereoBMParams &state_)
: bufX(bufX_), state(state_)
{
imgs0[0] = &left0; imgs0[1] = &right0;
imgs[0] = &left; imgs[1] = &right;
buf[0] = buf0; buf[1] = buf1;
state = _state;
}
void operator()(const Range& range) const CV_OVERRIDE
{
for( int i = range.start; i < range.end; i++ )
{
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
prefilterNorm( *imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i] );
if (state.useNormPrefilter())
prefilterNorm( *imgs0[i], *imgs[i], state.preFilterSize, state.preFilterCap, bufX.prefilter[i] );
else
prefilterXSobel( *imgs0[i], *imgs[i], state->preFilterCap );
prefilterXSobel( *imgs0[i], *imgs[i], state.preFilterCap );
}
}
const Mat* imgs0[2];
Mat* imgs[2];
uchar* buf[2];
StereoBMParams* state;
const BufferBM &bufX;
const StereoBMParams &state;
};
#ifdef HAVE_OPENCL
@ -986,18 +1054,17 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
struct FindStereoCorrespInvoker : public ParallelLoopBody
{
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
Mat& _disp, StereoBMParams* _state,
int _nstripes, size_t _stripeBufSize,
bool _useShorts, Rect _validDisparityRect,
Mat& _slidingSumBuf, Mat& _cost )
Mat& _disp, const StereoBMParams &_state,
int _nstripes,
Rect _validDisparityRect,
Mat& _cost, const BufferBM & buf_ )
: state(_state), buf(buf_)
{
CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S );
left = &_left; right = &_right;
disp = &_disp; state = _state;
nstripes = _nstripes; stripeBufSize = _stripeBufSize;
useShorts = _useShorts;
disp = &_disp;
nstripes = _nstripes;
validDisparityRect = _validDisparityRect;
slidingSumBuf = &_slidingSumBuf;
cost = &_cost;
}
@ -1006,11 +1073,10 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
int cols = left->cols, rows = left->rows;
int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
int dispShift = disp->type() == CV_16S ? DISPARITY_SHIFT_16S :
DISPARITY_SHIFT_32S;
int FILTERED = (state->minDisparity - 1) << dispShift;
int FILTERED = (state.minDisparity - 1) << dispShift;
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
if( roi.height == 0 )
@ -1033,27 +1099,27 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
Mat left_i = left->rowRange(row0, row1);
Mat right_i = right->rowRange(row0, row1);
Mat disp_i = disp->rowRange(row0, row1);
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
Mat cost_i = state.disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
#if CV_SIMD
if (useShorts)
if (state.useShorts())
{
if( disp_i.type() == CV_16S)
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
else
findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1);
findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start);
}
else
#endif
{
if( disp_i.type() == CV_16S )
findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
else
findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start );
}
if( state->disp12MaxDiff >= 0 )
validateDisparity( disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff );
if( state.disp12MaxDiff >= 0 )
validateDisparity( disp_i, cost_i, state.minDisparity, state.numDisparities, state.disp12MaxDiff );
if( roi.x > 0 )
{
@ -1069,13 +1135,12 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
protected:
const Mat *left, *right;
Mat* disp, *slidingSumBuf, *cost;
StereoBMParams *state;
Mat* disp, *cost;
const StereoBMParams &state;
int nstripes;
size_t stripeBufSize;
bool useShorts;
Rect validDisparityRect;
const BufferBM & buf;
};
class StereoBMImpl CV_FINAL : public StereoBM
@ -1149,7 +1214,7 @@ public:
disp_shift = DISPARITY_SHIFT_16S;
FILTERED = (params.minDisparity - 1) << disp_shift;
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
if (params.useFilterSpeckles())
filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
if (dtype == CV_32F)
disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0);
@ -1192,44 +1257,39 @@ public:
disp = dispbuf;
}
int wsz = params.SADWindowSize;
int bufSize0 = (int)((ndisp + 2)*sizeof(int));
bufSize0 += (int)((height+wsz+2)*ndisp*sizeof(int));
bufSize0 += (int)((height + wsz + 2)*sizeof(int));
bufSize0 += (int)((height+wsz+2)*ndisp*(wsz+2)*sizeof(uchar) + 256);
{
const double SAD_overhead_coeff = 10.0;
const double N0 = 8000000 / (params.useShorts() ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
const double maxStripeSize = std::min(
std::max(
N0 / (width * ndisp),
(params.SADWindowSize-1) * SAD_overhead_coeff
),
(double)height
);
const int nstripes = cvCeil(height / maxStripeSize);
BufferBM localBuf(nstripes, width, height, params);
int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
int bufSize2 = 0;
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
// Prefiltering
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, localBuf, params), 1);
bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21;
const double SAD_overhead_coeff = 10.0;
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
int nstripes = cvCeil(height / maxStripeSize);
int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));
if( slidingSumBuf.cols < bufSize )
slidingSumBuf.create( 1, bufSize, CV_8U );
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
!R2.empty() ? R2 : validDisparityRect,
params.minDisparity, params.numDisparities,
params.SADWindowSize);
uchar *_buf = slidingSumBuf.ptr();
FindStereoCorrespInvoker invoker(left, right, disp, params, nstripes, validDisparityRect, cost, localBuf);
parallel_for_(Range(0, nstripes), invoker);
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);
if (params.useFilterSpeckles())
{
slidingSumBuf.create( 1, width * height * (sizeof(Point_<short>) + sizeof(int) + sizeof(uchar)), CV_8U );
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
}
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
!R2.empty() ? R2 : validDisparityRect,
params.minDisparity, params.numDisparities,
params.SADWindowSize);
parallel_for_(Range(0, nstripes),
FindStereoCorrespInvoker(left, right, disp, &params, nstripes,
bufSize0, useShorts, validDisparityRect,
slidingSumBuf, cost));
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
}
if (disp0.data != disp.data)
disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0);

View File

@ -0,0 +1,103 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_UTILS_BUFFER_AREA_HPP
#define OPENCV_UTILS_BUFFER_AREA_HPP
#include <opencv2/core/base.hpp>
#include <opencv2/core/private.hpp>
#include <opencv2/core/utility.hpp>
#include <vector>
namespace cv { namespace utils {
//! @addtogroup core_utils
//! @{
/** @brief Manages memory block shared by muliple buffers.
This class allows to allocate one large memory block and split it into several smaller
non-overlapping buffers. In safe mode each buffer allocation will be performed independently,
this mode allows dynamic memory access instrumentation using valgrind or memory sanitizer.
Safe mode can be explicitly switched ON in constructor. It will also be enabled when compiling with
memory sanitizer support or in runtime with the environment variable `OPENCV_BUFFER_AREA_ALWAYS_SAFE`.
Example of usage:
@code
int * buf1 = 0;
double * buf2 = 0;
cv::util::BufferArea area;
area.allocate(buf1, 200); // buf1 = new int[200];
area.allocate(buf2, 1000, 64); // buf2 = new double[1000]; - aligned by 64
area.commit();
@endcode
@note This class is considered private and should be used only in OpenCV itself. API can be changed.
*/
class CV_EXPORTS BufferArea
{
public:
/** @brief Class constructor.
@param safe Enable _safe_ operation mode, each allocation will be performed independently.
*/
BufferArea(bool safe = false);
/** @brief Class destructor
All allocated memory well be freed. Each bound pointer will be reset to NULL.
*/
~BufferArea();
/** @brief Bind a pointer to local area.
BufferArea will store reference to the pointer and allocation parameters effectively owning the
pointer and allocated memory. This operation has the same parameters and does the same job
as the operator `new`, except allocation can be performed later during the BufferArea::commit call.
@param ptr Reference to a pointer of type T. Must be NULL
@param count Count of objects to be allocated, it has the same meaning as in the operator `new`.
@param alignment Alignment of allocated memory. same meaning as in the operator `new` (C++17).
Must be divisible by sizeof(T). Must be power of two.
@note In safe mode allocation will be performed immediatly.
*/
template <typename T>
void allocate(T*&ptr, size_t count, ushort alignment = sizeof(T))
{
CV_Assert(ptr == NULL);
CV_Assert(count > 0);
CV_Assert(alignment > 0);
CV_Assert(alignment % sizeof(T) == 0);
CV_Assert((alignment & (alignment - 1)) == 0);
allocate_((void**)(&ptr), static_cast<ushort>(sizeof(T)), count, alignment);
}
/** @brief Allocate memory and initialize all bound pointers
Each pointer bound to the area with the BufferArea::allocate will be initialized and will be set
to point to a memory block with requested size and alignment.
@note Does nothing in safe mode as all allocations will be performed by BufferArea::allocate
*/
void commit();
private:
BufferArea(const BufferArea &); // = delete
BufferArea &operator=(const BufferArea &); // = delete
void allocate_(void **ptr, ushort type_size, size_t count, ushort alignment);
private:
class Block;
std::vector<Block> blocks;
void * oneBuf;
size_t totalSize;
const bool safe;
};
//! @}
}} // cv::utils::
#endif

View File

@ -0,0 +1,121 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "opencv2/core/utils/buffer_area.private.hpp"
#include "opencv2/core/utils/configuration.private.hpp"
#ifdef OPENCV_ENABLE_MEMORY_SANITIZER
#define BUFFER_AREA_DEFAULT_MODE true
#else
#define BUFFER_AREA_DEFAULT_MODE false
#endif
static bool CV_BUFFER_AREA_OVERRIDE_SAFE_MODE =
cv::utils::getConfigurationParameterBool("OPENCV_BUFFER_AREA_ALWAYS_SAFE", BUFFER_AREA_DEFAULT_MODE);
namespace cv { namespace utils {
//==================================================================================================
class BufferArea::Block
{
private:
inline size_t reserve_count() const
{
return alignment / type_size - 1;
}
public:
Block(void **ptr_, ushort type_size_, size_t count_, ushort alignment_)
: ptr(ptr_), raw_mem(0), count(count_), type_size(type_size_), alignment(alignment_)
{
CV_Assert(ptr && *ptr == NULL);
}
void cleanup() const
{
CV_Assert(ptr && *ptr);
*ptr = 0;
if (raw_mem)
fastFree(raw_mem);
}
size_t getByteCount() const
{
return type_size * (count + reserve_count());
}
void real_allocate()
{
CV_Assert(ptr && *ptr == NULL);
const size_t allocated_count = count + reserve_count();
raw_mem = fastMalloc(type_size * allocated_count);
if (alignment != type_size)
{
*ptr = alignPtr(raw_mem, alignment);
CV_Assert(reinterpret_cast<size_t>(*ptr) % alignment == 0);
CV_Assert(static_cast<uchar*>(*ptr) + type_size * count <= static_cast<uchar*>(raw_mem) + type_size * allocated_count);
}
else
{
*ptr = raw_mem;
}
}
void * fast_allocate(void * buf) const
{
CV_Assert(ptr && *ptr == NULL);
buf = alignPtr(buf, alignment);
CV_Assert(reinterpret_cast<size_t>(buf) % alignment == 0);
*ptr = buf;
return static_cast<void*>(static_cast<uchar*>(*ptr) + type_size * count);
}
private:
void **ptr;
void * raw_mem;
size_t count;
ushort type_size;
ushort alignment;
};
//==================================================================================================
BufferArea::BufferArea(bool safe_) :
oneBuf(0),
totalSize(0),
safe(safe_ || CV_BUFFER_AREA_OVERRIDE_SAFE_MODE)
{
}
BufferArea::~BufferArea()
{
for(std::vector<Block>::const_iterator i = blocks.begin(); i != blocks.end(); ++i)
i->cleanup();
if (oneBuf)
fastFree(oneBuf);
}
void BufferArea::allocate_(void **ptr, ushort type_size, size_t count, ushort alignment)
{
blocks.push_back(Block(ptr, type_size, count, alignment));
if (safe)
blocks.back().real_allocate();
else
totalSize += blocks.back().getByteCount();
}
void BufferArea::commit()
{
if (!safe)
{
CV_Assert(totalSize > 0);
CV_Assert(oneBuf == NULL);
CV_Assert(!blocks.empty());
oneBuf = fastMalloc(totalSize);
void * ptr = oneBuf;
for(std::vector<Block>::const_iterator i = blocks.begin(); i != blocks.end(); ++i)
{
ptr = i->fast_allocate(ptr);
}
}
}
//==================================================================================================
}} // cv::utils::

View File

@ -3,6 +3,7 @@
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "opencv2/core/utils/logger.hpp"
#include "opencv2/core/utils/buffer_area.private.hpp"
#include "test_utils_tls.impl.hpp"
@ -303,4 +304,132 @@ TEST(Samples, findFile_missing)
cv::utils::logging::setLogLevel(prev);
}
template <typename T>
inline bool buffers_overlap(T * first, size_t first_num, T * second, size_t second_num)
{
// cerr << "[" << (void*)first << " : " << (void*)(first + first_num) << ")";
// cerr << " X ";
// cerr << "[" << (void*)second << " : " << (void*)(second + second_num) << ")";
// cerr << endl;
bool res = false;
res |= (second <= first) && (first < second + second_num);
res |= (second < first + first_num) && (first + first_num < second + second_num);
return res;
}
typedef testing::TestWithParam<bool> BufferArea;
TEST_P(BufferArea, basic)
{
const bool safe = GetParam();
const size_t SZ = 3;
int * int_ptr = NULL;
uchar * uchar_ptr = NULL;
double * dbl_ptr = NULL;
{
cv::utils::BufferArea area(safe);
area.allocate(int_ptr, SZ);
area.allocate(uchar_ptr, SZ);
area.allocate(dbl_ptr, SZ);
area.commit();
ASSERT_TRUE(int_ptr != NULL);
ASSERT_TRUE(uchar_ptr != NULL);
ASSERT_TRUE(dbl_ptr != NULL);
EXPECT_EQ((size_t)0, (size_t)int_ptr % sizeof(int));
EXPECT_EQ((size_t)0, (size_t)dbl_ptr % sizeof(double));
}
EXPECT_TRUE(int_ptr == NULL);
EXPECT_TRUE(uchar_ptr == NULL);
EXPECT_TRUE(dbl_ptr == NULL);
}
TEST_P(BufferArea, align)
{
const bool safe = GetParam();
const size_t SZ = 3;
const size_t CNT = 5;
typedef int T;
T * buffers[CNT] = {0};
{
cv::utils::BufferArea area(safe);
// allocate buffers with 3 elements with growing alignment (power of two)
for (size_t i = 0; i < CNT; ++i)
{
const ushort ALIGN = static_cast<ushort>(sizeof(T) << i);
EXPECT_TRUE(buffers[i] == NULL);
area.allocate(buffers[i], SZ, ALIGN);
}
area.commit();
for (size_t i = 0; i < CNT; ++i)
{
const ushort ALIGN = static_cast<ushort>(sizeof(T) << i);
EXPECT_TRUE(buffers[i] != NULL);
EXPECT_EQ((size_t)0, reinterpret_cast<size_t>(buffers[i]) % ALIGN);
if (i < CNT - 1)
{
SCOPED_TRACE(i);
EXPECT_FALSE(buffers_overlap(buffers[i], SZ, buffers[i + 1], SZ))
<< "Buffers overlap: "
<< buffers[i] << " (" << SZ << " elems)"
<< " and "
<< buffers[i + 1] << " (" << SZ << " elems)"
<< " (element size: " << sizeof(T) << ")";
}
}
}
for (size_t i = 0; i < CNT; ++i)
{
EXPECT_TRUE(buffers[i] == NULL);
}
}
TEST_P(BufferArea, default_align)
{
const bool safe = GetParam();
const size_t CNT = 100;
const ushort ALIGN = 64;
typedef int T;
T * buffers[CNT] = {0};
{
cv::utils::BufferArea area(safe);
// allocate buffers with 1-99 elements with default alignment
for (size_t i = 0; i < CNT; ++ i)
{
EXPECT_TRUE(buffers[i] == NULL);
area.allocate(buffers[i], i + 1, ALIGN);
}
area.commit();
for (size_t i = 0; i < CNT; ++i)
{
EXPECT_TRUE(buffers[i] != NULL);
EXPECT_EQ((size_t)0, reinterpret_cast<size_t>(buffers[i]) % ALIGN);
if (i < CNT - 1)
{
SCOPED_TRACE(i);
EXPECT_FALSE(buffers_overlap(buffers[i], i + 1, buffers[i + 1], i + 2))
<< "Buffers overlap: "
<< buffers[i] << " (" << i + 1 << " elems)"
<< " and "
<< buffers[i + 1] << " (" << i + 2 << " elems)"
<< " (element size: " << sizeof(T) << ")";
}
}
}
}
TEST_P(BufferArea, bad)
{
const bool safe = GetParam();
int * ptr = 0;
cv::utils::BufferArea area(safe);
EXPECT_ANY_THROW(area.allocate(ptr, 0)); // bad size
EXPECT_ANY_THROW(area.allocate(ptr, 1, 0)); // bad alignment
EXPECT_ANY_THROW(area.allocate(ptr, 1, 3)); // bad alignment
ptr = (int*)1;
EXPECT_ANY_THROW(area.allocate(ptr, 1)); // non-zero pointer
}
INSTANTIATE_TEST_CASE_P(/**/, BufferArea, testing::Values(true, false));
}} // namespace