From 5a6114e29975f66240ab7acae6e35fca65064a2c Mon Sep 17 00:00:00 2001 From: Alexandre Benoit Date: Fri, 31 Aug 2012 14:14:57 +0200 Subject: [PATCH] updated code for more flexible parallelisation : TBB parallel for loops are replaced by opencv parallel_for_ wrapper --- modules/contrib/src/basicretinafilter.cpp | 28 ++++++------ modules/contrib/src/basicretinafilter.hpp | 54 +++++++++++------------ modules/contrib/src/magnoretinafilter.cpp | 4 +- modules/contrib/src/magnoretinafilter.hpp | 22 ++++----- modules/contrib/src/parvoretinafilter.cpp | 4 +- modules/contrib/src/parvoretinafilter.hpp | 22 ++++----- modules/contrib/src/retinacolor.cpp | 14 +++--- modules/contrib/src/retinacolor.hpp | 26 +++++------ modules/contrib/src/templatebuffer.hpp | 19 ++++---- 9 files changed, 96 insertions(+), 97 deletions(-) diff --git a/modules/contrib/src/basicretinafilter.cpp b/modules/contrib/src/basicretinafilter.cpp index a4270aa74d..c9e6a92e4d 100644 --- a/modules/contrib/src/basicretinafilter.cpp +++ b/modules/contrib/src/basicretinafilter.cpp @@ -345,8 +345,8 @@ void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const //float tempMeanValue=meanLuminance+_meanInputValue*_tau; updateCompressionParameter(meanLuminance); } -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue)); #else //std::cout<(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau)); #else for (unsigned int IDrow=IDrowStart; IDrow(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a )); #else for (unsigned int IDrow=IDrowStart; IDrow(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a )); #else for (unsigned int IDcolumn=IDcolumnStart; IDcolumn(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain )); #else float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns(); //#pragma omp parallel for @@ -819,8 +819,8 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float * // horizontal anticausal filter (basic way, no add on) void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns())); #else register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1; register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1; @@ -841,8 +841,8 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame // vertical anticausal filter void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer) { -#ifdef HAVE_TBB - tbb::parallel_for(tbb::blocked_range(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL + cv::parallel_for_(cv::Range(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns())); #else for (unsigned int IDcolumn=IDcolumnStart; IDcolumn main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary ** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised ** ==> functors constructors can differ from the parameters used with their related serial functions */ #define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout - class Parallel_horizontalAnticausalFilter + class Parallel_horizontalAnticausalFilter: public cv::ParallelLoopBody { private: float *outputFrame; @@ -465,16 +465,16 @@ protected: #endif } - void operator()( const tbb::blocked_range& r ) const { + virtual void operator()( const Range& r ) const { #ifdef DEBUG_TBB std::cout<<"Parallel_horizontalAnticausalFilter::operator() :" <<"\n\t range size="< declare usefull generic tools template -class Parallel_clipBufferValues +class Parallel_clipBufferValues: public cv::ParallelLoopBody { private: type *bufferToClip; @@ -89,9 +88,9 @@ public: Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max) : bufferToClip(bufferToProcess), minValue(min), maxValue(max){} - void operator()( const tbb::blocked_range& r ) const { - register type *inputOutputBufferPTR=bufferToClip+r.begin(); - for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR) + virtual void operator()( const cv::Range &r ) const { + register type *inputOutputBufferPTR=bufferToClip+r.start; + for (register int jf = r.start; jf != r.end; ++jf, ++inputOutputBufferPTR) { if (*inputOutputBufferPTR>maxValue) *inputOutputBufferPTR=maxValue; @@ -389,8 +388,8 @@ public: std::cout<<"this->min()"<min()<<"minThreshold="<Buffer(); -#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method - tbb::parallel_for(tbb::blocked_range(0,this->size()), Parallel_clipBufferValues(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner()); +#ifdef MAKE_PARALLEL // call the TemplateBuffer TBB clipping method + parallel_for_(tbb::blocked_range(0,this->size()), Parallel_clipBufferValues(bufferPTR, updatedLowValue, updatedHighValue)); #else for (unsigned int i=0;isize();++i, ++bufferPTR)