Reduced direct TBB dependencies

This commit is contained in:
Maksim Shabunin 2018-04-06 14:21:15 +03:00
parent 875b4e212a
commit b88609a921
7 changed files with 4 additions and 791 deletions

View File

@ -34,42 +34,6 @@ using cv::ParallelLoopBody;
#include "cvconfig.h" #include "cvconfig.h"
#ifdef HAVE_TBB
# include "tbb/tbb.h"
# include "tbb/task.h"
# undef min
# undef max
#endif
#ifdef HAVE_TBB
typedef tbb::blocked_range<int> BlockedRange;
template<typename Body> static inline
void parallel_for( const BlockedRange& range, const Body& body )
{
tbb::parallel_for(range, body);
}
#else
class BlockedRange
{
public:
BlockedRange() : _begin(0), _end(0), _grainsize(0) {}
BlockedRange(int b, int e, int g=1) : _begin(b), _end(e), _grainsize(g) {}
int begin() const { return _begin; }
int end() const { return _end; }
int grainsize() const { return _grainsize; }
protected:
int _begin, _end, _grainsize;
};
template<typename Body> static inline
void parallel_for( const BlockedRange& range, const Body& body )
{
body(range);
}
#endif
using namespace std; using namespace std;
static inline double static inline double

View File

@ -107,7 +107,3 @@ endif()
if(TBB_INTERFACE_VERSION LESS 6000) # drop support of versions < 4.0 if(TBB_INTERFACE_VERSION LESS 6000) # drop support of versions < 4.0
set(HAVE_TBB FALSE) set(HAVE_TBB FALSE)
endif() endif()
if(HAVE_TBB)
list(APPEND OPENCV_LINKER_LIBS tbb)
endif()

View File

@ -13,6 +13,10 @@ if(WINRT AND CMAKE_SYSTEM_NAME MATCHES WindowsStore AND CMAKE_SYSTEM_VERSION MAT
list(APPEND extra_libs ole32.lib) list(APPEND extra_libs ole32.lib)
endif() endif()
if(HAVE_TBB)
list(APPEND extra_libs tbb)
endif()
if(DEFINED WINRT AND NOT DEFINED ENABLE_WINRT_MODE_NATIVE) if(DEFINED WINRT AND NOT DEFINED ENABLE_WINRT_MODE_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW")
endif() endif()

View File

@ -61,37 +61,10 @@
# include "opencv2/core/eigen.hpp" # include "opencv2/core/eigen.hpp"
#endif #endif
#ifdef HAVE_TBB
# include "tbb/tbb.h"
# include "tbb/task.h"
# undef min
# undef max
#endif
//! @cond IGNORED //! @cond IGNORED
namespace cv namespace cv
{ {
#ifdef HAVE_TBB
typedef tbb::blocked_range<int> BlockedRange;
template<typename Body> static inline
void parallel_for( const BlockedRange& range, const Body& body )
{
tbb::parallel_for(range, body);
}
typedef tbb::split Split;
template<typename Body> static inline
void parallel_reduce( const BlockedRange& range, Body& body )
{
tbb::parallel_reduce(range, body);
}
typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;
#else
class BlockedRange class BlockedRange
{ {
public: public:
@ -119,7 +92,6 @@ namespace cv
{ {
body(range); body(range);
} }
#endif
// Returns a static string if there is a parallel framework, // Returns a static string if there is a parallel framework,
// NULL otherwise. // NULL otherwise.

View File

@ -169,13 +169,11 @@ static void histPrepareImages( const Mat* images, int nimages, const int* channe
deltas[dims*2 + 1] = (int)(mask.step/mask.elemSize1()); deltas[dims*2 + 1] = (int)(mask.step/mask.elemSize1());
} }
#ifndef HAVE_TBB
if( isContinuous ) if( isContinuous )
{ {
imsize.width *= imsize.height; imsize.width *= imsize.height;
imsize.height = 1; imsize.height = 1;
} }
#endif
if( !ranges ) if( !ranges )
{ {
@ -213,544 +211,6 @@ static void histPrepareImages( const Mat* images, int nimages, const int* channe
////////////////////////////////// C A L C U L A T E H I S T O G R A M //////////////////////////////////// ////////////////////////////////// C A L C U L A T E H I S T O G R A M ////////////////////////////////////
#ifdef HAVE_TBB
enum {one = 1, two, three}; // array elements number
template<typename T>
class calcHist1D_Invoker
{
public:
calcHist1D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Mat& hist, const double* _uniranges, int sz, int dims,
Size& imageSize )
: mask_(_ptrs[dims]),
mstep_(_deltas[dims*2 + 1]),
imageWidth_(imageSize.width),
histogramSize_(hist.size()), histogramType_(hist.type()),
globalHistogram_((tbb::atomic<int>*)hist.data)
{
p_[0] = ((T**)&_ptrs[0])[0];
step_[0] = (&_deltas[0])[1];
d_[0] = (&_deltas[0])[0];
a_[0] = (&_uniranges[0])[0];
b_[0] = (&_uniranges[0])[1];
size_[0] = sz;
}
void operator()( const BlockedRange& range ) const
{
T* p0 = p_[0] + range.begin() * (step_[0] + imageWidth_*d_[0]);
uchar* mask = mask_ + range.begin()*mstep_;
for( int row = range.begin(); row < range.end(); row++, p0 += step_[0] )
{
if( !mask_ )
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0] )
{
int idx = cvFloor(*p0*a_[0] + b_[0]);
if( (unsigned)idx < (unsigned)size_[0] )
{
globalHistogram_[idx].fetch_and_add(1);
}
}
}
else
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0] )
{
if( mask[x] )
{
int idx = cvFloor(*p0*a_[0] + b_[0]);
if( (unsigned)idx < (unsigned)size_[0] )
{
globalHistogram_[idx].fetch_and_add(1);
}
}
}
mask += mstep_;
}
}
}
private:
calcHist1D_Invoker operator=(const calcHist1D_Invoker&);
T* p_[one];
uchar* mask_;
int step_[one];
int d_[one];
int mstep_;
double a_[one];
double b_[one];
int size_[one];
int imageWidth_;
Size histogramSize_;
int histogramType_;
tbb::atomic<int>* globalHistogram_;
};
template<typename T>
class calcHist2D_Invoker
{
public:
calcHist2D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Mat& hist, const double* _uniranges, const int* size,
int dims, Size& imageSize, size_t* hstep )
: mask_(_ptrs[dims]),
mstep_(_deltas[dims*2 + 1]),
imageWidth_(imageSize.width),
histogramSize_(hist.size()), histogramType_(hist.type()),
globalHistogram_(hist.data)
{
p_[0] = ((T**)&_ptrs[0])[0]; p_[1] = ((T**)&_ptrs[0])[1];
step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3];
d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2];
a_[0] = (&_uniranges[0])[0]; a_[1] = (&_uniranges[0])[2];
b_[0] = (&_uniranges[0])[1]; b_[1] = (&_uniranges[0])[3];
size_[0] = size[0]; size_[1] = size[1];
hstep_[0] = hstep[0];
}
void operator()(const BlockedRange& range) const
{
T* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]);
T* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]);
uchar* mask = mask_ + range.begin()*mstep_;
for( int row = range.begin(); row < range.end(); row++, p0 += step_[0], p1 += step_[1] )
{
if( !mask_ )
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
{
int idx0 = cvFloor(*p0*a_[0] + b_[0]);
int idx1 = cvFloor(*p1*a_[1] + b_[1]);
if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] )
( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0) )[idx1].fetch_and_add(1);
}
}
else
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
{
if( mask[x] )
{
int idx0 = cvFloor(*p0*a_[0] + b_[0]);
int idx1 = cvFloor(*p1*a_[1] + b_[1]);
if( (unsigned)idx0 < (unsigned)size_[0] && (unsigned)idx1 < (unsigned)size_[1] )
((tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0))[idx1].fetch_and_add(1);
}
}
mask += mstep_;
}
}
}
private:
calcHist2D_Invoker operator=(const calcHist2D_Invoker&);
T* p_[two];
uchar* mask_;
int step_[two];
int d_[two];
int mstep_;
double a_[two];
double b_[two];
int size_[two];
const int imageWidth_;
size_t hstep_[one];
Size histogramSize_;
int histogramType_;
uchar* globalHistogram_;
};
template<typename T>
class calcHist3D_Invoker
{
public:
calcHist3D_Invoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Size imsize, Mat& hist, const double* uniranges, int _dims,
size_t* hstep, int* size )
: mask_(_ptrs[_dims]),
mstep_(_deltas[_dims*2 + 1]),
imageWidth_(imsize.width),
globalHistogram_(hist.data)
{
p_[0] = ((T**)&_ptrs[0])[0]; p_[1] = ((T**)&_ptrs[0])[1]; p_[2] = ((T**)&_ptrs[0])[2];
step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; step_[2] = (&_deltas[0])[5];
d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2]; d_[2] = (&_deltas[0])[4];
a_[0] = uniranges[0]; a_[1] = uniranges[2]; a_[2] = uniranges[4];
b_[0] = uniranges[1]; b_[1] = uniranges[3]; b_[2] = uniranges[5];
size_[0] = size[0]; size_[1] = size[1]; size_[2] = size[2];
hstep_[0] = hstep[0]; hstep_[1] = hstep[1];
}
void operator()( const BlockedRange& range ) const
{
T* p0 = p_[0] + range.begin()*(imageWidth_*d_[0] + step_[0]);
T* p1 = p_[1] + range.begin()*(imageWidth_*d_[1] + step_[1]);
T* p2 = p_[2] + range.begin()*(imageWidth_*d_[2] + step_[2]);
uchar* mask = mask_ + range.begin()*mstep_;
for( int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1], p2 += step_[2] )
{
if( !mask_ )
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
{
int idx0 = cvFloor(*p0*a_[0] + b_[0]);
int idx1 = cvFloor(*p1*a_[1] + b_[1]);
int idx2 = cvFloor(*p2*a_[2] + b_[2]);
if( (unsigned)idx0 < (unsigned)size_[0] &&
(unsigned)idx1 < (unsigned)size_[1] &&
(unsigned)idx2 < (unsigned)size_[2] )
{
( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0 + hstep_[1]*idx1) )[idx2].fetch_and_add(1);
}
}
}
else
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
{
if( mask[x] )
{
int idx0 = cvFloor(*p0*a_[0] + b_[0]);
int idx1 = cvFloor(*p1*a_[1] + b_[1]);
int idx2 = cvFloor(*p2*a_[2] + b_[2]);
if( (unsigned)idx0 < (unsigned)size_[0] &&
(unsigned)idx1 < (unsigned)size_[1] &&
(unsigned)idx2 < (unsigned)size_[2] )
{
( (tbb::atomic<int>*)(globalHistogram_ + hstep_[0]*idx0 + hstep_[1]*idx1) )[idx2].fetch_and_add(1);
}
}
}
mask += mstep_;
}
}
}
static bool isFit( const Mat& histogram, const Size imageSize )
{
return ( imageSize.width * imageSize.height >= 320*240
&& histogram.total() >= 8*8*8 );
}
private:
calcHist3D_Invoker operator=(const calcHist3D_Invoker&);
T* p_[three];
uchar* mask_;
int step_[three];
int d_[three];
const int mstep_;
double a_[three];
double b_[three];
int size_[three];
int imageWidth_;
size_t hstep_[two];
uchar* globalHistogram_;
};
class CalcHist1D_8uInvoker
{
public:
CalcHist1D_8uInvoker( const std::vector<uchar*>& ptrs, const std::vector<int>& deltas,
Size imsize, Mat& hist, int dims, const std::vector<size_t>& tab,
tbb::mutex* lock )
: mask_(ptrs[dims]),
mstep_(deltas[dims*2 + 1]),
imageWidth_(imsize.width),
imageSize_(imsize),
histSize_(hist.size()), histType_(hist.type()),
tab_((size_t*)&tab[0]),
histogramWriteLock_(lock),
globalHistogram_(hist.data)
{
p_[0] = (&ptrs[0])[0];
step_[0] = (&deltas[0])[1];
d_[0] = (&deltas[0])[0];
}
void operator()( const BlockedRange& range ) const
{
int localHistogram[256] = { 0, };
uchar* mask = mask_;
uchar* p0 = p_[0];
int x;
tbb::mutex::scoped_lock lock;
if( !mask_ )
{
int n = (imageWidth_ - 4) / 4 + 1;
int tail = imageWidth_ - n*4;
int xN = 4*n;
p0 += (xN*d_[0] + tail*d_[0] + step_[0]) * range.begin();
}
else
{
p0 += (imageWidth_*d_[0] + step_[0]) * range.begin();
mask += mstep_*range.begin();
}
for( int i = range.begin(); i < range.end(); i++, p0 += step_[0] )
{
if( !mask_ )
{
if( d_[0] == 1 )
{
for( x = 0; x <= imageWidth_ - 4; x += 4 )
{
int t0 = p0[x], t1 = p0[x+1];
localHistogram[t0]++; localHistogram[t1]++;
t0 = p0[x+2]; t1 = p0[x+3];
localHistogram[t0]++; localHistogram[t1]++;
}
p0 += x;
}
else
{
for( x = 0; x <= imageWidth_ - 4; x += 4 )
{
int t0 = p0[0], t1 = p0[d_[0]];
localHistogram[t0]++; localHistogram[t1]++;
p0 += d_[0]*2;
t0 = p0[0]; t1 = p0[d_[0]];
localHistogram[t0]++; localHistogram[t1]++;
p0 += d_[0]*2;
}
}
for( ; x < imageWidth_; x++, p0 += d_[0] )
{
localHistogram[*p0]++;
}
}
else
{
for( x = 0; x < imageWidth_; x++, p0 += d_[0] )
{
if( mask[x] )
{
localHistogram[*p0]++;
}
}
mask += mstep_;
}
}
lock.acquire(*histogramWriteLock_);
for(int i = 0; i < 256; i++ )
{
size_t hidx = tab_[i];
if( hidx < OUT_OF_RANGE )
{
*(int*)((globalHistogram_ + hidx)) += localHistogram[i];
}
}
lock.release();
}
static bool isFit( const Mat& histogram, const Size imageSize )
{
return ( histogram.total() >= 8
&& imageSize.width * imageSize.height >= 160*120 );
}
private:
uchar* p_[one];
uchar* mask_;
int mstep_;
int step_[one];
int d_[one];
int imageWidth_;
Size imageSize_;
Size histSize_;
int histType_;
size_t* tab_;
tbb::mutex* histogramWriteLock_;
uchar* globalHistogram_;
};
class CalcHist2D_8uInvoker
{
public:
CalcHist2D_8uInvoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Size imsize, Mat& hist, int dims, const std::vector<size_t>& _tab,
tbb::mutex* lock )
: mask_(_ptrs[dims]),
mstep_(_deltas[dims*2 + 1]),
imageWidth_(imsize.width),
histSize_(hist.size()), histType_(hist.type()),
tab_((size_t*)&_tab[0]),
histogramWriteLock_(lock),
globalHistogram_(hist.data)
{
p_[0] = (uchar*)(&_ptrs[0])[0]; p_[1] = (uchar*)(&_ptrs[0])[1];
step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3];
d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2];
}
void operator()( const BlockedRange& range ) const
{
uchar* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]);
uchar* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]);
uchar* mask = mask_ + range.begin()*mstep_;
Mat localHist = Mat::zeros(histSize_, histType_);
uchar* localHistData = localHist.data;
tbb::mutex::scoped_lock lock;
for(int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1])
{
if( !mask_ )
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
{
size_t idx = tab_[*p0] + tab_[*p1 + 256];
if( idx < OUT_OF_RANGE )
{
++*(int*)(localHistData + idx);
}
}
}
else
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1] )
{
size_t idx;
if( mask[x] && (idx = tab_[*p0] + tab_[*p1 + 256]) < OUT_OF_RANGE )
{
++*(int*)(localHistData + idx);
}
}
mask += mstep_;
}
}
lock.acquire(*histogramWriteLock_);
for(int i = 0; i < histSize_.width*histSize_.height; i++)
{
((int*)globalHistogram_)[i] += ((int*)localHistData)[i];
}
lock.release();
}
static bool isFit( const Mat& histogram, const Size imageSize )
{
return ( (histogram.total() > 4*4 && histogram.total() <= 116*116
&& imageSize.width * imageSize.height >= 320*240)
|| (histogram.total() > 116*116 && imageSize.width * imageSize.height >= 1280*720) );
}
private:
uchar* p_[two];
uchar* mask_;
int step_[two];
int d_[two];
int mstep_;
int imageWidth_;
Size histSize_;
int histType_;
size_t* tab_;
tbb::mutex* histogramWriteLock_;
uchar* globalHistogram_;
};
class CalcHist3D_8uInvoker
{
public:
CalcHist3D_8uInvoker( const std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Size imsize, Mat& hist, int dims, const std::vector<size_t>& tab )
: mask_(_ptrs[dims]),
mstep_(_deltas[dims*2 + 1]),
histogramSize_(hist.size.p), histogramType_(hist.type()),
imageWidth_(imsize.width),
tab_((size_t*)&tab[0]),
globalHistogram_(hist.data)
{
p_[0] = (uchar*)(&_ptrs[0])[0]; p_[1] = (uchar*)(&_ptrs[0])[1]; p_[2] = (uchar*)(&_ptrs[0])[2];
step_[0] = (&_deltas[0])[1]; step_[1] = (&_deltas[0])[3]; step_[2] = (&_deltas[0])[5];
d_[0] = (&_deltas[0])[0]; d_[1] = (&_deltas[0])[2]; d_[2] = (&_deltas[0])[4];
}
void operator()( const BlockedRange& range ) const
{
uchar* p0 = p_[0] + range.begin()*(step_[0] + imageWidth_*d_[0]);
uchar* p1 = p_[1] + range.begin()*(step_[1] + imageWidth_*d_[1]);
uchar* p2 = p_[2] + range.begin()*(step_[2] + imageWidth_*d_[2]);
uchar* mask = mask_ + range.begin()*mstep_;
for(int i = range.begin(); i < range.end(); i++, p0 += step_[0], p1 += step_[1], p2 += step_[2] )
{
if( !mask_ )
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
{
size_t idx = tab_[*p0] + tab_[*p1 + 256] + tab_[*p2 + 512];
if( idx < OUT_OF_RANGE )
{
( *(tbb::atomic<int>*)(globalHistogram_ + idx) ).fetch_and_add(1);
}
}
}
else
{
for( int x = 0; x < imageWidth_; x++, p0 += d_[0], p1 += d_[1], p2 += d_[2] )
{
size_t idx;
if( mask[x] && (idx = tab_[*p0] + tab_[*p1 + 256] + tab_[*p2 + 512]) < OUT_OF_RANGE )
{
(*(tbb::atomic<int>*)(globalHistogram_ + idx)).fetch_and_add(1);
}
}
mask += mstep_;
}
}
}
static bool isFit( const Mat& histogram, const Size imageSize )
{
return ( histogram.total() >= 128*128*128
&& imageSize.width * imageSize.width >= 320*240 );
}
private:
uchar* p_[three];
uchar* mask_;
int mstep_;
int step_[three];
int d_[three];
int* histogramSize_;
int histogramType_;
int imageWidth_;
size_t* tab_;
uchar* globalHistogram_;
};
static void
callCalcHist2D_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Size imsize, Mat& hist, int dims, std::vector<size_t>& _tab )
{
int grainSize = imsize.height / tbb::task_scheduler_init::default_num_threads();
tbb::mutex histogramWriteLock;
CalcHist2D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab, &histogramWriteLock);
parallel_for(BlockedRange(0, imsize.height, grainSize), body);
}
static void
callCalcHist3D_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
Size imsize, Mat& hist, int dims, std::vector<size_t>& _tab )
{
CalcHist3D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab);
parallel_for(BlockedRange(0, imsize.height), body);
}
#endif
template<typename T> static void template<typename T> static void
calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas, calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
@ -778,10 +238,6 @@ calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
if( dims == 1 ) if( dims == 1 )
{ {
#ifdef HAVE_TBB
calcHist1D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size[0], dims, imsize);
parallel_for(BlockedRange(0, imsize.height), body);
#else
double a = uniranges[0], b = uniranges[1]; double a = uniranges[0], b = uniranges[1];
int sz = size[0], d0 = deltas[0], step0 = deltas[1]; int sz = size[0], d0 = deltas[0], step0 = deltas[1];
const T* p0 = (const T*)ptrs[0]; const T* p0 = (const T*)ptrs[0];
@ -804,15 +260,10 @@ calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
((int*)H)[idx]++; ((int*)H)[idx]++;
} }
} }
#endif //HAVE_TBB
return; return;
} }
else if( dims == 2 ) else if( dims == 2 )
{ {
#ifdef HAVE_TBB
calcHist2D_Invoker<T> body(_ptrs, _deltas, hist, _uniranges, size, dims, imsize, hstep);
parallel_for(BlockedRange(0, imsize.height), body);
#else
double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3]; double a0 = uniranges[0], b0 = uniranges[1], a1 = uniranges[2], b1 = uniranges[3];
int sz0 = size[0], sz1 = size[1]; int sz0 = size[0], sz1 = size[1];
int d0 = deltas[0], step0 = deltas[1], int d0 = deltas[0], step0 = deltas[1],
@ -841,19 +292,10 @@ calcHist_( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
((int*)(H + hstep0*idx0))[idx1]++; ((int*)(H + hstep0*idx0))[idx1]++;
} }
} }
#endif //HAVE_TBB
return; return;
} }
else if( dims == 3 ) else if( dims == 3 )
{ {
#ifdef HAVE_TBB
if( calcHist3D_Invoker<T>::isFit(hist, imsize) )
{
calcHist3D_Invoker<T> body(_ptrs, _deltas, imsize, hist, uniranges, dims, hstep, size);
parallel_for(BlockedRange(0, imsize.height), body);
return;
}
#endif
double a0 = uniranges[0], b0 = uniranges[1], double a0 = uniranges[0], b0 = uniranges[1],
a1 = uniranges[2], b1 = uniranges[3], a1 = uniranges[2], b1 = uniranges[3],
a2 = uniranges[4], b2 = uniranges[5]; a2 = uniranges[4], b2 = uniranges[5];
@ -1009,18 +451,6 @@ calcHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
if( dims == 1 ) if( dims == 1 )
{ {
#ifdef HAVE_TBB
if( CalcHist1D_8uInvoker::isFit(hist, imsize) )
{
int treadsNumber = tbb::task_scheduler_init::default_num_threads();
int grainSize = imsize.height/treadsNumber;
tbb::mutex histogramWriteLock;
CalcHist1D_8uInvoker body(_ptrs, _deltas, imsize, hist, dims, _tab, &histogramWriteLock);
parallel_for(BlockedRange(0, imsize.height, grainSize), body);
return;
}
#endif
int d0 = deltas[0], step0 = deltas[1]; int d0 = deltas[0], step0 = deltas[1];
int matH[256] = { 0, }; int matH[256] = { 0, };
const uchar* p0 = (const uchar*)ptrs[0]; const uchar* p0 = (const uchar*)ptrs[0];
@ -1069,13 +499,6 @@ calcHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
} }
else if( dims == 2 ) else if( dims == 2 )
{ {
#ifdef HAVE_TBB
if( CalcHist2D_8uInvoker::isFit(hist, imsize) )
{
callCalcHist2D_8u(_ptrs, _deltas, imsize, hist, dims, _tab);
return;
}
#endif
int d0 = deltas[0], step0 = deltas[1], int d0 = deltas[0], step0 = deltas[1],
d1 = deltas[2], step1 = deltas[3]; d1 = deltas[2], step1 = deltas[3];
const uchar* p0 = (const uchar*)ptrs[0]; const uchar* p0 = (const uchar*)ptrs[0];
@ -1101,13 +524,6 @@ calcHist_8u( std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
} }
else if( dims == 3 ) else if( dims == 3 )
{ {
#ifdef HAVE_TBB
if( CalcHist3D_8uInvoker::isFit(hist, imsize) )
{
callCalcHist3D_8u(_ptrs, _deltas, imsize, hist, dims, _tab);
return;
}
#endif
int d0 = deltas[0], step0 = deltas[1], int d0 = deltas[0], step0 = deltas[1],
d1 = deltas[2], step1 = deltas[3], d1 = deltas[2], step1 = deltas[3],
d2 = deltas[4], step2 = deltas[5]; d2 = deltas[4], step2 = deltas[5];

View File

@ -101,9 +101,6 @@ public:
context(_context), depthGenerator(_depthGenerator), imageGenerator(_imageGenerator), context(_context), depthGenerator(_depthGenerator), imageGenerator(_imageGenerator),
maxBufferSize(_maxBufferSize), isCircleBuffer(_isCircleBuffer), maxTimeDuration(_maxTimeDuration) maxBufferSize(_maxBufferSize), isCircleBuffer(_isCircleBuffer), maxTimeDuration(_maxTimeDuration)
{ {
#ifdef HAVE_TBB
task = 0;
#endif
CV_Assert( depthGenerator.IsValid() ); CV_Assert( depthGenerator.IsValid() );
CV_Assert( imageGenerator.IsValid() ); CV_Assert( imageGenerator.IsValid() );
@ -112,9 +109,6 @@ public:
void setMaxBufferSize( int _maxBufferSize ) void setMaxBufferSize( int _maxBufferSize )
{ {
maxBufferSize = _maxBufferSize; maxBufferSize = _maxBufferSize;
#ifdef HAVE_TBB
task->setMaxBufferSize();
#endif
} }
inline int getMaxBufferSize() const { return maxBufferSize; } inline int getMaxBufferSize() const { return maxBufferSize; }
@ -132,9 +126,7 @@ public:
while( task->grab(depthMetaData, imageMetaData) == false ) while( task->grab(depthMetaData, imageMetaData) == false )
{ {
#ifndef HAVE_TBB
task->spin(); task->spin();
#endif
} }
return true; return true;
@ -144,22 +136,12 @@ public:
{ {
CV_Assert( depthGenerator.IsValid() ); CV_Assert( depthGenerator.IsValid() );
CV_Assert( imageGenerator.IsValid() ); CV_Assert( imageGenerator.IsValid() );
#ifdef HAVE_TBB
task = new( tbb::task::allocate_root() ) TBBApproximateSynchronizerTask( *this );
tbb::task::enqueue(*task);
#else
task.reset( new ApproximateSynchronizer( *this ) ); task.reset( new ApproximateSynchronizer( *this ) );
#endif
} }
void finish() void finish()
{ {
#ifdef HAVE_TBB
if( task )
tbb::task::destroy( *task );
#else
task.release(); task.release();
#endif
} }
bool isRun() const { return task != 0; } bool isRun() const { return task != 0; }
@ -305,120 +287,7 @@ private:
std::queue<cv::Ptr<xn::ImageMetaData> > imageQueue; std::queue<cv::Ptr<xn::ImageMetaData> > imageQueue;
}; };
#ifdef HAVE_TBB
// If there is TBB the synchronization will be executed in own thread.
class TBBApproximateSynchronizer: public ApproximateSynchronizerBase
{
public:
TBBApproximateSynchronizer( ApproximateSyncGrabber& _approxSyncGrabber ) :
ApproximateSynchronizerBase(_approxSyncGrabber)
{
setMaxBufferSize();
}
void setMaxBufferSize()
{
int maxBufferSize = approxSyncGrabber.getMaxBufferSize();
if( maxBufferSize >= 0 )
{
depthQueue.set_capacity( maxBufferSize );
imageQueue.set_capacity( maxBufferSize );
}
}
virtual inline bool isSpinContinue() const { return true; }
virtual inline void pushDepthMetaData( xn::DepthMetaData& depthMetaData )
{
cv::Ptr<xn::DepthMetaData> depthPtr = cv::makePtr<xn::DepthMetaData>(), tmp;
depthPtr->CopyFrom(depthMetaData);
tbb::mutex mtx;
mtx.lock();
if( depthQueue.try_push(depthPtr) == false )
{
if( approxSyncGrabber.getIsCircleBuffer() )
{
CV_Assert( depthQueue.try_pop(tmp) );
CV_Assert( depthQueue.try_push(depthPtr) );
}
}
mtx.unlock();
}
virtual inline void pushImageMetaData( xn::ImageMetaData& imageMetaData )
{
cv::Ptr<xn::ImageMetaData> imagePtr = cv::makePtr<xn::ImageMetaData>(), tmp;
imagePtr->CopyFrom(imageMetaData);
tbb::mutex mtx;
mtx.lock();
if( imageQueue.try_push(imagePtr) == false )
{
if( approxSyncGrabber.getIsCircleBuffer() )
{
CV_Assert( imageQueue.try_pop(tmp) );
CV_Assert( imageQueue.try_push(imagePtr) );
}
}
mtx.unlock();
}
virtual inline bool popDepthMetaData( xn::DepthMetaData& depthMetaData )
{
cv::Ptr<xn::DepthMetaData> depthPtr;
bool isPop = depthQueue.try_pop(depthPtr);
if( isPop )
depthMetaData.CopyFrom(*depthPtr);
return isPop;
}
virtual inline bool popImageMetaData( xn::ImageMetaData& imageMetaData )
{
cv::Ptr<xn::ImageMetaData> imagePtr;
bool isPop = imageQueue.try_pop(imagePtr);
if( isPop )
imageMetaData.CopyFrom(*imagePtr);
return isPop;
}
private:
tbb::concurrent_bounded_queue<cv::Ptr<xn::DepthMetaData> > depthQueue;
tbb::concurrent_bounded_queue<cv::Ptr<xn::ImageMetaData> > imageQueue;
};
class TBBApproximateSynchronizerTask: public tbb::task
{
public:
TBBApproximateSynchronizerTask( ApproximateSyncGrabber& approxSyncGrabber ) :
synchronizer(approxSyncGrabber)
{}
void setMaxBufferSize()
{
synchronizer.setMaxBufferSize();
}
bool grab( xn::DepthMetaData& depthMetaData,
xn::ImageMetaData& imageMetaData )
{
return synchronizer.grab( depthMetaData, imageMetaData );
}
private:
tbb::task* execute()
{
synchronizer.spin();
return 0;
}
TBBApproximateSynchronizer synchronizer;
};
#endif // HAVE_TBB
#ifdef HAVE_TBB
TBBApproximateSynchronizerTask* task;
#else
cv::Ptr<ApproximateSynchronizer> task; cv::Ptr<ApproximateSynchronizer> task;
#endif
}; };
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -430,11 +299,7 @@ public:
static const int INVALID_PIXEL_VAL = 0; static const int INVALID_PIXEL_VAL = 0;
static const int INVALID_COORDINATE_VAL = 0; static const int INVALID_COORDINATE_VAL = 0;
#ifdef HAVE_TBB
static const int DEFAULT_MAX_BUFFER_SIZE = 8;
#else
static const int DEFAULT_MAX_BUFFER_SIZE = 2; static const int DEFAULT_MAX_BUFFER_SIZE = 2;
#endif
static const int DEFAULT_IS_CIRCLE_BUFFER = 0; static const int DEFAULT_IS_CIRCLE_BUFFER = 0;
static const int DEFAULT_MAX_TIME_DURATION = 20; static const int DEFAULT_MAX_TIME_DURATION = 20;

View File

@ -78,11 +78,7 @@ public:
static const int INVALID_PIXEL_VAL = 0; static const int INVALID_PIXEL_VAL = 0;
static const int INVALID_COORDINATE_VAL = 0; static const int INVALID_COORDINATE_VAL = 0;
#ifdef HAVE_TBB
static const int DEFAULT_MAX_BUFFER_SIZE = 8;
#else
static const int DEFAULT_MAX_BUFFER_SIZE = 2; static const int DEFAULT_MAX_BUFFER_SIZE = 2;
#endif
static const int DEFAULT_IS_CIRCLE_BUFFER = 0; static const int DEFAULT_IS_CIRCLE_BUFFER = 0;
static const int DEFAULT_MAX_TIME_DURATION = 20; static const int DEFAULT_MAX_TIME_DURATION = 20;