diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index a7c7bfd849..afc404bffe 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -48,8 +48,10 @@ #include "precomp.hpp" #include #include +#include #include "opencl_kernels_calib3d.hpp" #include "opencv2/core/hal/intrin.hpp" +#include "opencv2/core/utils/buffer_area.private.hpp" namespace cv { @@ -85,6 +87,19 @@ struct StereoBMParams Rect roi1, roi2; int disp12MaxDiff; int dispType; + + inline bool useShorts() const + { + return preFilterCap <= 31 && SADWindowSize <= 21; + } + inline bool useFilterSpeckles() const + { + return speckleRange >= 0 && speckleWindowSize > 0; + } + inline bool useNormPrefilter() const + { + return preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE; + } }; #ifdef HAVE_OPENCL @@ -110,10 +125,10 @@ static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsi } #endif -static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf ) +static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, int *buf ) { int x, y, wsz2 = winsize/2; - int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32); + int* vsum = buf + (wsz2 + 1); int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2); const int OFS = 256*5, TABSZ = OFS*2 + 256; uchar tab[TABSZ]; @@ -309,13 +324,77 @@ inline int dispDescale(int v1, int v2, int d) return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float } + +class BufferBM +{ + static const int TABSZ = 256; +public: + std::vector sad; + std::vector hsad; + std::vector htext; + std::vector cbuf0; + std::vector sad_short; + std::vector hsad_short; + int *prefilter[2]; + uchar tab[TABSZ]; +private: + utils::BufferArea area; + +public: + BufferBM(size_t nstripes, size_t width, size_t height, const StereoBMParams& params) + : sad(nstripes, NULL), + hsad(nstripes, NULL), + htext(nstripes, NULL), + cbuf0(nstripes, NULL), + sad_short(nstripes, NULL), + hsad_short(nstripes, NULL) + { + const int wsz = params.SADWindowSize; + const int ndisp = params.numDisparities; + const int ftzero = params.preFilterCap; + for (size_t i = 0; i < nstripes; ++i) + { + // 1D: [1][ ndisp ][1] +#if CV_SIMD + if (params.useShorts()) + area.allocate(sad_short[i], ndisp + 2); + else +#endif + area.allocate(sad[i], ndisp + 2); + + // 2D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ] +#if CV_SIMD + if (params.useShorts()) + area.allocate(hsad_short[i], (height + wsz + 2) * ndisp); + else +#endif + area.allocate(hsad[i], (height + wsz + 2) * ndisp); + + // 1D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] + area.allocate(htext[i], (height + wsz + 2)); + + // 3D: [ wsz/2 + 1 ][ height ][ wsz/2 + 1 ] * [ ndisp ] * [ wsz/2 + 1 ][ wsz/2 + 1 ] + area.allocate(cbuf0[i], ((height + wsz + 2) * ndisp * (wsz + 2) + 256)); + } + if (params.useNormPrefilter()) + { + for (size_t i = 0; i < 2; ++i) + area.allocate(prefilter[0], width + params.preFilterSize + 2); + } + area.commit(); + + // static table + for (int x = 0; x < TABSZ; x++) + tab[x] = (uchar)std::abs(x - ftzero); + } +}; + #if CV_SIMD template static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, - Mat& disp, Mat& cost, StereoBMParams& state, - uchar* buf, int _dy0, int _dy1 ) + Mat& disp, Mat& cost, const StereoBMParams& state, + int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum ) { - const int ALIGN = CV_SIMD_WIDTH; int x, y, d; int wsz = state.SADWindowSize, wsz2 = wsz/2; int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); @@ -325,15 +404,13 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, int rofs = -MIN(ndisp - 1 + mindisp, 0); int width = left.cols, height = left.rows; int width1 = width - rofs - ndisp + 1; - int ftzero = state.preFilterCap; int textureThreshold = state.textureThreshold; int uniquenessRatio = state.uniquenessRatio; const int disp_shift = dispShiftTemplate::value; dType FILTERED = (dType)((mindisp - 1) << disp_shift); - ushort *sad, *hsad0, *hsad, *hsad_sub; - int *htext; - uchar *cbuf0, *cbuf; + ushort *hsad, *hsad_sub; + uchar *cbuf; const uchar* lptr0 = left.ptr() + lofs; const uchar* rptr0 = right.ptr() + rofs; const uchar *lptr, *lptr_sub, *rptr; @@ -343,23 +420,20 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, int cstep = (height + dy0 + dy1)*ndisp; short costbuf = 0; int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0; - const int TABSZ = 256; - uchar tab[TABSZ]; + const uchar * tab = bufX.tab; short v_seq[v_int16::nlanes]; for (short i = 0; i < v_int16::nlanes; ++i) v_seq[i] = i; - sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN); - hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); - htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN); - cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN); - - for( x = 0; x < TABSZ; x++ ) - tab[x] = (uchar)std::abs(x - ftzero); + ushort *sad = bufX.sad_short[bufNum] + 1; + ushort *hsad0 = bufX.hsad_short[bufNum] + (wsz2 + 1) * ndisp; + int *htext = bufX.htext[bufNum] + (wsz2 + 1); + uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp; // initialize buffers - memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) ); - memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) ); + memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0])); + memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0])); + memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0])); for( x = -wsz2-1; x < wsz2; x++ ) { @@ -594,10 +668,9 @@ template static void findStereoCorrespondenceBM( const Mat& left, const Mat& right, Mat& disp, Mat& cost, const StereoBMParams& state, - uchar* buf, int _dy0, int _dy1 ) + int _dy0, int _dy1, const BufferBM & bufX, size_t bufNum ) { - const int ALIGN = CV_SIMD_WIDTH; int x, y, d; int wsz = state.SADWindowSize, wsz2 = wsz/2; int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); @@ -607,14 +680,13 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, int rofs = -MIN(ndisp - 1 + mindisp, 0); int width = left.cols, height = left.rows; int width1 = width - rofs - ndisp + 1; - int ftzero = state.preFilterCap; int textureThreshold = state.textureThreshold; int uniquenessRatio = state.uniquenessRatio; const int disp_shift = dispShiftTemplate::value; mType FILTERED = (mType)((mindisp - 1) << disp_shift); - int *sad, *hsad0, *hsad, *hsad_sub, *htext; - uchar *cbuf0, *cbuf; + int *hsad, *hsad_sub; + uchar *cbuf; const uchar* lptr0 = left.ptr() + lofs; const uchar* rptr0 = right.ptr() + rofs; const uchar *lptr, *lptr_sub, *rptr; @@ -624,8 +696,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, int cstep = (height+dy0+dy1)*ndisp; int costbuf = 0; int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0; - const int TABSZ = 256; - uchar tab[TABSZ]; + const uchar * tab = bufX.tab; #if CV_SIMD int v_seq[v_int32::nlanes]; @@ -634,17 +705,15 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes); #endif - sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN); - hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); - htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN); - cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN); - - for( x = 0; x < TABSZ; x++ ) - tab[x] = (uchar)std::abs(x - ftzero); + int *sad = bufX.sad[bufNum] + 1; + int *hsad0 = bufX.hsad[bufNum] + (wsz2 + 1) * ndisp; + int *htext = bufX.htext[bufNum] + (wsz2 + 1); + uchar *cbuf0 = bufX.cbuf0[bufNum] + (wsz2 + 1) * ndisp; // initialize buffers - memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) ); - memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) ); + memset(sad - 1, 0, (ndisp + 2) * sizeof(sad[0])); + memset(hsad0 - dy0 * ndisp, 0, (height + wsz + 2) * ndisp * sizeof(hsad[0])); + memset(htext - dy0, 0, (height + wsz + 2) * sizeof(htext[0])); for( x = -wsz2-1; x < wsz2; x++ ) { @@ -890,7 +959,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, #ifdef HAVE_OPENCL static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state) { - if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE ) + if (state->useNormPrefilter()) { if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap)) return false; @@ -911,29 +980,28 @@ static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray le struct PrefilterInvoker : public ParallelLoopBody { PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right, - uchar* buf0, uchar* buf1, StereoBMParams* _state) + const BufferBM &bufX_, const StereoBMParams &state_) + : bufX(bufX_), state(state_) { imgs0[0] = &left0; imgs0[1] = &right0; imgs[0] = &left; imgs[1] = &right; - buf[0] = buf0; buf[1] = buf1; - state = _state; } void operator()(const Range& range) const CV_OVERRIDE { for( int i = range.start; i < range.end; i++ ) { - if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE ) - prefilterNorm( *imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i] ); + if (state.useNormPrefilter()) + prefilterNorm( *imgs0[i], *imgs[i], state.preFilterSize, state.preFilterCap, bufX.prefilter[i] ); else - prefilterXSobel( *imgs0[i], *imgs[i], state->preFilterCap ); + prefilterXSobel( *imgs0[i], *imgs[i], state.preFilterCap ); } } const Mat* imgs0[2]; Mat* imgs[2]; - uchar* buf[2]; - StereoBMParams* state; + const BufferBM &bufX; + const StereoBMParams &state; }; #ifdef HAVE_OPENCL @@ -986,18 +1054,17 @@ static bool ocl_stereobm( InputArray _left, InputArray _right, struct FindStereoCorrespInvoker : public ParallelLoopBody { FindStereoCorrespInvoker( const Mat& _left, const Mat& _right, - Mat& _disp, StereoBMParams* _state, - int _nstripes, size_t _stripeBufSize, - bool _useShorts, Rect _validDisparityRect, - Mat& _slidingSumBuf, Mat& _cost ) + Mat& _disp, const StereoBMParams &_state, + int _nstripes, + Rect _validDisparityRect, + Mat& _cost, const BufferBM & buf_ ) + : state(_state), buf(buf_) { CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S ); left = &_left; right = &_right; - disp = &_disp; state = _state; - nstripes = _nstripes; stripeBufSize = _stripeBufSize; - useShorts = _useShorts; + disp = &_disp; + nstripes = _nstripes; validDisparityRect = _validDisparityRect; - slidingSumBuf = &_slidingSumBuf; cost = &_cost; } @@ -1006,11 +1073,10 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody int cols = left->cols, rows = left->rows; int _row0 = std::min(cvRound(range.start * rows / nstripes), rows); int _row1 = std::min(cvRound(range.end * rows / nstripes), rows); - uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize; int dispShift = disp->type() == CV_16S ? DISPARITY_SHIFT_16S : DISPARITY_SHIFT_32S; - int FILTERED = (state->minDisparity - 1) << dispShift; + int FILTERED = (state.minDisparity - 1) << dispShift; Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0); if( roi.height == 0 ) @@ -1033,27 +1099,27 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody Mat left_i = left->rowRange(row0, row1); Mat right_i = right->rowRange(row0, row1); Mat disp_i = disp->rowRange(row0, row1); - Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); + Mat cost_i = state.disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); #if CV_SIMD - if (useShorts) + if (state.useShorts()) { if( disp_i.type() == CV_16S) - findStereoCorrespondenceBM_SIMD( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 ); + findStereoCorrespondenceBM_SIMD( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start ); else - findStereoCorrespondenceBM_SIMD( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1); + findStereoCorrespondenceBM_SIMD( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start); } else #endif { if( disp_i.type() == CV_16S ) - findStereoCorrespondenceBM( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 ); + findStereoCorrespondenceBM( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start ); else - findStereoCorrespondenceBM( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 ); + findStereoCorrespondenceBM( left_i, right_i, disp_i, cost_i, state, row0, rows - row1, buf, range.start ); } - if( state->disp12MaxDiff >= 0 ) - validateDisparity( disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff ); + if( state.disp12MaxDiff >= 0 ) + validateDisparity( disp_i, cost_i, state.minDisparity, state.numDisparities, state.disp12MaxDiff ); if( roi.x > 0 ) { @@ -1069,13 +1135,12 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody protected: const Mat *left, *right; - Mat* disp, *slidingSumBuf, *cost; - StereoBMParams *state; + Mat* disp, *cost; + const StereoBMParams &state; int nstripes; - size_t stripeBufSize; - bool useShorts; Rect validDisparityRect; + const BufferBM & buf; }; class StereoBMImpl CV_FINAL : public StereoBM @@ -1149,7 +1214,7 @@ public: disp_shift = DISPARITY_SHIFT_16S; FILTERED = (params.minDisparity - 1) << disp_shift; - if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) + if (params.useFilterSpeckles()) filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); if (dtype == CV_32F) disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0); @@ -1192,44 +1257,39 @@ public: disp = dispbuf; } - int wsz = params.SADWindowSize; - int bufSize0 = (int)((ndisp + 2)*sizeof(int)); - bufSize0 += (int)((height+wsz+2)*ndisp*sizeof(int)); - bufSize0 += (int)((height + wsz + 2)*sizeof(int)); - bufSize0 += (int)((height+wsz+2)*ndisp*(wsz+2)*sizeof(uchar) + 256); + { + const double SAD_overhead_coeff = 10.0; + const double N0 = 8000000 / (params.useShorts() ? 1 : 4); // approx tbb's min number instructions reasonable for one thread + const double maxStripeSize = std::min( + std::max( + N0 / (width * ndisp), + (params.SADWindowSize-1) * SAD_overhead_coeff + ), + (double)height + ); + const int nstripes = cvCeil(height / maxStripeSize); + BufferBM localBuf(nstripes, width, height, params); - int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256); - int bufSize2 = 0; - if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) - bufSize2 = width*height*(sizeof(Point_) + sizeof(int) + sizeof(uchar)); + // Prefiltering + parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, localBuf, params), 1); - bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21; - const double SAD_overhead_coeff = 10.0; - double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread - double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height); - int nstripes = cvCeil(height / maxStripeSize); - int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2)); - if( slidingSumBuf.cols < bufSize ) - slidingSumBuf.create( 1, bufSize, CV_8U ); + Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2; + validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect, + !R2.empty() ? R2 : validDisparityRect, + params.minDisparity, params.numDisparities, + params.SADWindowSize); - uchar *_buf = slidingSumBuf.ptr(); + FindStereoCorrespInvoker invoker(left, right, disp, params, nstripes, validDisparityRect, cost, localBuf); + parallel_for_(Range(0, nstripes), invoker); - parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, ¶ms), 1); + if (params.useFilterSpeckles()) + { + slidingSumBuf.create( 1, width * height * (sizeof(Point_) + sizeof(int) + sizeof(uchar)), CV_8U ); + filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); + } - Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2; - validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect, - !R2.empty() ? R2 : validDisparityRect, - params.minDisparity, params.numDisparities, - params.SADWindowSize); - - parallel_for_(Range(0, nstripes), - FindStereoCorrespInvoker(left, right, disp, ¶ms, nstripes, - bufSize0, useShorts, validDisparityRect, - slidingSumBuf, cost)); - - if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) - filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); + } if (disp0.data != disp.data) disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0); diff --git a/modules/core/include/opencv2/core/utils/buffer_area.private.hpp b/modules/core/include/opencv2/core/utils/buffer_area.private.hpp new file mode 100644 index 0000000000..141ad2c502 --- /dev/null +++ b/modules/core/include/opencv2/core/utils/buffer_area.private.hpp @@ -0,0 +1,103 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef OPENCV_UTILS_BUFFER_AREA_HPP +#define OPENCV_UTILS_BUFFER_AREA_HPP + +#include +#include +#include +#include + +namespace cv { namespace utils { + +//! @addtogroup core_utils +//! @{ + +/** @brief Manages memory block shared by muliple buffers. + +This class allows to allocate one large memory block and split it into several smaller +non-overlapping buffers. In safe mode each buffer allocation will be performed independently, +this mode allows dynamic memory access instrumentation using valgrind or memory sanitizer. + +Safe mode can be explicitly switched ON in constructor. It will also be enabled when compiling with +memory sanitizer support or in runtime with the environment variable `OPENCV_BUFFER_AREA_ALWAYS_SAFE`. + +Example of usage: +@code +int * buf1 = 0; +double * buf2 = 0; +cv::util::BufferArea area; +area.allocate(buf1, 200); // buf1 = new int[200]; +area.allocate(buf2, 1000, 64); // buf2 = new double[1000]; - aligned by 64 +area.commit(); +@endcode + +@note This class is considered private and should be used only in OpenCV itself. API can be changed. +*/ +class CV_EXPORTS BufferArea +{ +public: + /** @brief Class constructor. + + @param safe Enable _safe_ operation mode, each allocation will be performed independently. + */ + BufferArea(bool safe = false); + + /** @brief Class destructor + + All allocated memory well be freed. Each bound pointer will be reset to NULL. + */ + ~BufferArea(); + + /** @brief Bind a pointer to local area. + + BufferArea will store reference to the pointer and allocation parameters effectively owning the + pointer and allocated memory. This operation has the same parameters and does the same job + as the operator `new`, except allocation can be performed later during the BufferArea::commit call. + + @param ptr Reference to a pointer of type T. Must be NULL + @param count Count of objects to be allocated, it has the same meaning as in the operator `new`. + @param alignment Alignment of allocated memory. same meaning as in the operator `new` (C++17). + Must be divisible by sizeof(T). Must be power of two. + + @note In safe mode allocation will be performed immediatly. + */ + template + void allocate(T*&ptr, size_t count, ushort alignment = sizeof(T)) + { + CV_Assert(ptr == NULL); + CV_Assert(count > 0); + CV_Assert(alignment > 0); + CV_Assert(alignment % sizeof(T) == 0); + CV_Assert((alignment & (alignment - 1)) == 0); + allocate_((void**)(&ptr), static_cast(sizeof(T)), count, alignment); + } + + /** @brief Allocate memory and initialize all bound pointers + + Each pointer bound to the area with the BufferArea::allocate will be initialized and will be set + to point to a memory block with requested size and alignment. + + @note Does nothing in safe mode as all allocations will be performed by BufferArea::allocate + */ + void commit(); + +private: + BufferArea(const BufferArea &); // = delete + BufferArea &operator=(const BufferArea &); // = delete + void allocate_(void **ptr, ushort type_size, size_t count, ushort alignment); + +private: + class Block; + std::vector blocks; + void * oneBuf; + size_t totalSize; + const bool safe; +}; + +//! @} + +}} // cv::utils:: + +#endif diff --git a/modules/core/src/buffer_area.cpp b/modules/core/src/buffer_area.cpp new file mode 100644 index 0000000000..2a41c72f45 --- /dev/null +++ b/modules/core/src/buffer_area.cpp @@ -0,0 +1,121 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/core/utils/buffer_area.private.hpp" +#include "opencv2/core/utils/configuration.private.hpp" + +#ifdef OPENCV_ENABLE_MEMORY_SANITIZER +#define BUFFER_AREA_DEFAULT_MODE true +#else +#define BUFFER_AREA_DEFAULT_MODE false +#endif + +static bool CV_BUFFER_AREA_OVERRIDE_SAFE_MODE = + cv::utils::getConfigurationParameterBool("OPENCV_BUFFER_AREA_ALWAYS_SAFE", BUFFER_AREA_DEFAULT_MODE); + +namespace cv { namespace utils { + +//================================================================================================== + +class BufferArea::Block +{ +private: + inline size_t reserve_count() const + { + return alignment / type_size - 1; + } +public: + Block(void **ptr_, ushort type_size_, size_t count_, ushort alignment_) + : ptr(ptr_), raw_mem(0), count(count_), type_size(type_size_), alignment(alignment_) + { + CV_Assert(ptr && *ptr == NULL); + } + void cleanup() const + { + CV_Assert(ptr && *ptr); + *ptr = 0; + if (raw_mem) + fastFree(raw_mem); + } + size_t getByteCount() const + { + return type_size * (count + reserve_count()); + } + void real_allocate() + { + CV_Assert(ptr && *ptr == NULL); + const size_t allocated_count = count + reserve_count(); + raw_mem = fastMalloc(type_size * allocated_count); + if (alignment != type_size) + { + *ptr = alignPtr(raw_mem, alignment); + CV_Assert(reinterpret_cast(*ptr) % alignment == 0); + CV_Assert(static_cast(*ptr) + type_size * count <= static_cast(raw_mem) + type_size * allocated_count); + } + else + { + *ptr = raw_mem; + } + } + void * fast_allocate(void * buf) const + { + CV_Assert(ptr && *ptr == NULL); + buf = alignPtr(buf, alignment); + CV_Assert(reinterpret_cast(buf) % alignment == 0); + *ptr = buf; + return static_cast(static_cast(*ptr) + type_size * count); + } +private: + void **ptr; + void * raw_mem; + size_t count; + ushort type_size; + ushort alignment; +}; + +//================================================================================================== + +BufferArea::BufferArea(bool safe_) : + oneBuf(0), + totalSize(0), + safe(safe_ || CV_BUFFER_AREA_OVERRIDE_SAFE_MODE) +{ +} + +BufferArea::~BufferArea() +{ + for(std::vector::const_iterator i = blocks.begin(); i != blocks.end(); ++i) + i->cleanup(); + if (oneBuf) + fastFree(oneBuf); +} + +void BufferArea::allocate_(void **ptr, ushort type_size, size_t count, ushort alignment) +{ + blocks.push_back(Block(ptr, type_size, count, alignment)); + if (safe) + blocks.back().real_allocate(); + else + totalSize += blocks.back().getByteCount(); +} + +void BufferArea::commit() +{ + if (!safe) + { + CV_Assert(totalSize > 0); + CV_Assert(oneBuf == NULL); + CV_Assert(!blocks.empty()); + oneBuf = fastMalloc(totalSize); + void * ptr = oneBuf; + for(std::vector::const_iterator i = blocks.begin(); i != blocks.end(); ++i) + { + ptr = i->fast_allocate(ptr); + } + } +} + +//================================================================================================== + +}} // cv::utils:: diff --git a/modules/core/test/test_utils.cpp b/modules/core/test/test_utils.cpp index 2bae77892b..87891488ec 100644 --- a/modules/core/test/test_utils.cpp +++ b/modules/core/test/test_utils.cpp @@ -3,6 +3,7 @@ // of this distribution and at http://opencv.org/license.html. #include "test_precomp.hpp" #include "opencv2/core/utils/logger.hpp" +#include "opencv2/core/utils/buffer_area.private.hpp" #include "test_utils_tls.impl.hpp" @@ -303,4 +304,132 @@ TEST(Samples, findFile_missing) cv::utils::logging::setLogLevel(prev); } +template +inline bool buffers_overlap(T * first, size_t first_num, T * second, size_t second_num) +{ + // cerr << "[" << (void*)first << " : " << (void*)(first + first_num) << ")"; + // cerr << " X "; + // cerr << "[" << (void*)second << " : " << (void*)(second + second_num) << ")"; + // cerr << endl; + bool res = false; + res |= (second <= first) && (first < second + second_num); + res |= (second < first + first_num) && (first + first_num < second + second_num); + return res; +} + +typedef testing::TestWithParam BufferArea; + +TEST_P(BufferArea, basic) +{ + const bool safe = GetParam(); + const size_t SZ = 3; + int * int_ptr = NULL; + uchar * uchar_ptr = NULL; + double * dbl_ptr = NULL; + { + cv::utils::BufferArea area(safe); + area.allocate(int_ptr, SZ); + area.allocate(uchar_ptr, SZ); + area.allocate(dbl_ptr, SZ); + area.commit(); + ASSERT_TRUE(int_ptr != NULL); + ASSERT_TRUE(uchar_ptr != NULL); + ASSERT_TRUE(dbl_ptr != NULL); + EXPECT_EQ((size_t)0, (size_t)int_ptr % sizeof(int)); + EXPECT_EQ((size_t)0, (size_t)dbl_ptr % sizeof(double)); + } + EXPECT_TRUE(int_ptr == NULL); + EXPECT_TRUE(uchar_ptr == NULL); + EXPECT_TRUE(dbl_ptr == NULL); +} + +TEST_P(BufferArea, align) +{ + const bool safe = GetParam(); + const size_t SZ = 3; + const size_t CNT = 5; + typedef int T; + T * buffers[CNT] = {0}; + { + cv::utils::BufferArea area(safe); + // allocate buffers with 3 elements with growing alignment (power of two) + for (size_t i = 0; i < CNT; ++i) + { + const ushort ALIGN = static_cast(sizeof(T) << i); + EXPECT_TRUE(buffers[i] == NULL); + area.allocate(buffers[i], SZ, ALIGN); + } + area.commit(); + for (size_t i = 0; i < CNT; ++i) + { + const ushort ALIGN = static_cast(sizeof(T) << i); + EXPECT_TRUE(buffers[i] != NULL); + EXPECT_EQ((size_t)0, reinterpret_cast(buffers[i]) % ALIGN); + if (i < CNT - 1) + { + SCOPED_TRACE(i); + EXPECT_FALSE(buffers_overlap(buffers[i], SZ, buffers[i + 1], SZ)) + << "Buffers overlap: " + << buffers[i] << " (" << SZ << " elems)" + << " and " + << buffers[i + 1] << " (" << SZ << " elems)" + << " (element size: " << sizeof(T) << ")"; + } + } + } + for (size_t i = 0; i < CNT; ++i) + { + EXPECT_TRUE(buffers[i] == NULL); + } +} + +TEST_P(BufferArea, default_align) +{ + const bool safe = GetParam(); + const size_t CNT = 100; + const ushort ALIGN = 64; + typedef int T; + T * buffers[CNT] = {0}; + { + cv::utils::BufferArea area(safe); + // allocate buffers with 1-99 elements with default alignment + for (size_t i = 0; i < CNT; ++ i) + { + EXPECT_TRUE(buffers[i] == NULL); + area.allocate(buffers[i], i + 1, ALIGN); + } + area.commit(); + for (size_t i = 0; i < CNT; ++i) + { + EXPECT_TRUE(buffers[i] != NULL); + EXPECT_EQ((size_t)0, reinterpret_cast(buffers[i]) % ALIGN); + if (i < CNT - 1) + { + SCOPED_TRACE(i); + EXPECT_FALSE(buffers_overlap(buffers[i], i + 1, buffers[i + 1], i + 2)) + << "Buffers overlap: " + << buffers[i] << " (" << i + 1 << " elems)" + << " and " + << buffers[i + 1] << " (" << i + 2 << " elems)" + << " (element size: " << sizeof(T) << ")"; + } + } + } +} + +TEST_P(BufferArea, bad) +{ + const bool safe = GetParam(); + int * ptr = 0; + cv::utils::BufferArea area(safe); + EXPECT_ANY_THROW(area.allocate(ptr, 0)); // bad size + EXPECT_ANY_THROW(area.allocate(ptr, 1, 0)); // bad alignment + EXPECT_ANY_THROW(area.allocate(ptr, 1, 3)); // bad alignment + ptr = (int*)1; + EXPECT_ANY_THROW(area.allocate(ptr, 1)); // non-zero pointer +} + +INSTANTIATE_TEST_CASE_P(/**/, BufferArea, testing::Values(true, false)); + + }} // namespace