Retina module is now parallelized thanks to the TBB library. Speed increase expected on multicore plateforms

This commit is contained in:
noob 2012-08-29 17:44:21 +02:00
parent c78884c780
commit 424bc609b6
9 changed files with 577 additions and 169 deletions

View File

@ -316,28 +316,50 @@ void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const std::valarray
_spatiotemporalLPfilter(get_data(inputFrame), &_filterOutput[0]);
_localLuminanceAdaptation(get_data(inputFrame), &_filterOutput[0], &outputFrame[0]);
}
// local luminance adaptation of the input in regard of localLuminance buffer
void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame)
{
float meanLuminance=0;
const float *luminancePTR=inputFrame;
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
meanLuminance+=*(luminancePTR++);
meanLuminance/=_filterOutput.getNBpixels();
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
updateCompressionParameter(meanLuminance);
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
{
_localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false);
/* const float *localLuminancePTR=localLuminance;
float *inputOutputFramePTR=inputOutputFrame;
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
{
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
*(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0+0.00000000001);
}
*/
}
// local luminance adaptation of the input in regard of localLuminance buffer
void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean)
{
if (updateLuminanceMean)
{ float meanLuminance=0;
const float *luminancePTR=inputFrame;
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
meanLuminance+=*(luminancePTR++);
meanLuminance/=_filterOutput.getNBpixels();
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
updateCompressionParameter(meanLuminance);
}
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner());
#else
//std::cout<<meanLuminance<<std::endl;
const float *localLuminancePTR=localLuminance;
const float *inputFramePTR=inputFrame;
float *outputFramePTR=outputFrame;
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR)
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
{
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
// TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...
*(outputFramePTR++) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001f);
*(outputFramePTR) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
//std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
}
#endif
}
// local adaptation applied on a range of values which can be positive and negative
@ -355,27 +377,6 @@ void BasicRetinaFilter::_localLuminanceAdaptationPosNegValues(const float *input
}
}
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
{
/*float meanLuminance=0;
const float *luminancePTR=inputOutputFrame;
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
meanLuminance+=*(luminancePTR++);
meanLuminance/=_filterOutput.getNBpixels();
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
updateCompressionParameter(meanLuminance);
*/
const float *localLuminancePTR=localLuminance;
float *inputOutputFramePTR=inputOutputFrame;
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
{
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
*(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0);
}
}
///////////////////////////////////////////////////////////////////////
/// Spatio temporal Low Pass filter functions
// run LP filter and save result in the basic retina element buffer
@ -465,7 +466,9 @@ void BasicRetinaFilter::_horizontalCausalFilter(float *outputFrame, unsigned int
// horizontal causal filter which adds the input inside
void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
//#pragma omp parallel for
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner());
#else
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowStart+IDrow)*_filterOutput.getNBcolumns();
@ -477,14 +480,16 @@ void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame
*(outputPTR++) = result;
}
}
#endif
}
// horizontal anticausal filter (basic way, no add on)
void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
//#pragma omp parallel for
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
#else
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(_filterOutput.getNBcolumns())-1;
@ -495,9 +500,9 @@ void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned
*(outputPTR--) = result;
}
}
#endif
}
// horizontal anticausal filter which multiplies the output by _gain
void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
@ -518,8 +523,10 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame,
// vertical anticausal filter
void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
//#pragma omp parallel for
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
#else
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
@ -532,6 +539,7 @@ void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int I
}
}
#endif
}
@ -558,7 +566,10 @@ void BasicRetinaFilter::_verticalAnticausalFilter(float *outputFrame, unsigned i
// vertical anticausal filter which multiplies the output by _gain
void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner());
#else
float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
//#pragma omp parallel for
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
@ -573,7 +584,7 @@ void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, u
}
}
#endif
}
/////////////////////////////////////////
@ -745,8 +756,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(float *inputOutputFram
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
_horizontalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
_horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
_verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
_verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
_verticalAnticausalFilter_Irregular_multGain(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
}
@ -765,8 +776,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(const float *inputFram
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
_horizontalCausalFilter_Irregular_addInput(inputFrame, outputFrame, 0, (int)_filterOutput.getNBrows());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows());
_verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
_verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
_verticalAnticausalFilter_Irregular_multGain(outputFrame, 0, (int)_filterOutput.getNBcolumns());
}
@ -806,10 +817,13 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float *
}
// horizontal anticausal filter (basic way, no add on)
void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register const float* spatialConstantPTR=&_progressiveSpatialConstant[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1;
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
@ -820,18 +834,21 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame
*(outputPTR--) = result;
}
}
#endif
}
// vertical anticausal filter
void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register const float *spatialConstantPTR=&_progressiveSpatialConstant[0]+IDcolumn;
register const float *spatialConstantPTR=spatialConstantBuffer+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + *(spatialConstantPTR) * result;
@ -840,6 +857,7 @@ void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsi
spatialConstantPTR+=_filterOutput.getNBcolumns();
}
}
#endif
}
// vertical anticausal filter which multiplies the output by _gain

View File

@ -393,48 +393,261 @@ protected:
// LP filter that squares the input and computes the output ONLY on the areas where the integrationAreas map are TRUE
void _localSquaringSpatioTemporalLPfilter(const float *inputFrame, float *LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex=0);
// local luminance adaptation of the input in regard of localLuminance buffer
void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame);
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
// local adaptation applied on a range of values which can be positive and negative
void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
// local luminance adaptation of the input in regard of localLuminance buffer
void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean=true);
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
// local adaptation applied on a range of values which can be positive and negative
void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
//////////////////////////////////////////////////////////////
// 1D directional filters used for the 2D low pass filtering
// 1D filters with image input
void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// 1D filters with image input that is squared in the function
void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// vertical anticausal filter that returns the mean value of its result
float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// 1D filters with image input
void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// 1D filters with image input that is squared in the function // parallelized with TBB
void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
// vertical anticausal filter that returns the mean value of its result
float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// most simple functions: only perform 1D filtering with output=input (no add on)
void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// most simple functions: only perform 1D filtering with output=input (no add on)
void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); // parallelized with TBB
void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // parallelized with TBB
void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// perform 1D filtering with output with varrying spatial coefficient
void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// perform 1D filtering with output with varrying spatial coefficient
void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer); // parallelized with TBB
void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer); // parallelized with TBB
void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
// 1D filters in which the output is multiplied by _gain
void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
// 1D filters in which the output is multiplied by _gain
void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output // parallelized with TBB
void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
// LP filter on specific parts of the picture instead of all the image
// same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
// LP filter on specific parts of the picture instead of all the image
// same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
#define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout
class Parallel_horizontalAnticausalFilter
{
private:
float *outputFrame;
const unsigned int IDrowEnd, nbColumns;
const float filterParam_a;
public:
// constructor which takes the input image pointer reference reference and limits
Parallel_horizontalAnticausalFilter(float *bufferToProcess, const unsigned int idEnd, const unsigned int nbCols, const float a )
:outputFrame(bufferToProcess), IDrowEnd(idEnd), nbColumns(nbCols), filterParam_a(a)
{
#ifdef DEBUG_TBB
std::cout<<"Parallel_horizontalAnticausalFilter::Parallel_horizontalAnticausalFilter :"
<<"\n\t idEnd="<<IDrowEnd
<<"\n\t nbCols="<<nbColumns
<<"\n\t filterParam="<<filterParam_a
<<std::endl;
#endif
}
void operator()( const tbb::blocked_range<size_t>& r ) const {
#ifdef DEBUG_TBB
std::cout<<"Parallel_horizontalAnticausalFilter::operator() :"
<<"\n\t range size="<<r.size()
<<"\n\t first index="<<r.begin()
//<<"\n\t last index="<<filterParam
<<std::endl;
#endif
for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(outputPTR)+ filterParam_a* result;
*(outputPTR--) = result;
}
}
}
};
class Parallel_horizontalCausalFilter_addInput
{
private:
const float *inputFrame;
float *outputFrame;
const unsigned int IDrowStart, nbColumns;
const float filterParam_a, filterParam_tau;
public:
Parallel_horizontalCausalFilter_addInput(const float *bufferToAddAsInputProcess, float *bufferToProcess, const unsigned int idStart, const unsigned int nbCols, const float a, const float tau)
:inputFrame(bufferToAddAsInputProcess), outputFrame(bufferToProcess), IDrowStart(idStart), nbColumns(nbCols), filterParam_a(a), filterParam_tau(tau){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowStart+IDrow)*nbColumns;
register const float* inputPTR=inputFrame+(IDrowStart+IDrow)*nbColumns;
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(inputPTR++) + filterParam_tau**(outputPTR)+ filterParam_a* result;
*(outputPTR++) = result;
}
}
}
};
class Parallel_verticalCausalFilter
{
private:
float *outputFrame;
const unsigned int nbRows, nbColumns;
const float filterParam_a;
public:
Parallel_verticalCausalFilter(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a )
:outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + filterParam_a * result;
*(outputPTR) = result;
outputPTR+=nbColumns;
}
}
}
};
class Parallel_verticalAnticausalFilter_multGain
{
private:
float *outputFrame;
const unsigned int nbRows, nbColumns;
const float filterParam_a, filterParam_gain;
public:
Parallel_verticalAnticausalFilter_multGain(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a, const float gain)
:outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a), filterParam_gain(gain){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=offset+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + filterParam_a * result;
*(outputPTR) = filterParam_gain*result;
outputPTR-=nbColumns;
}
}
}
};
class Parallel_localAdaptation
{
private:
const float *localLuminance, *inputFrame;
float *outputFrame;
const float localLuminanceFactor, localLuminanceAddon, maxInputValue;
public:
Parallel_localAdaptation(const float *localLum, const float *inputImg, float *bufferToProcess, const float localLuminanceFact, const float localLuminanceAdd, const float maxInputVal)
:localLuminance(localLum), inputFrame(inputImg),outputFrame(bufferToProcess), localLuminanceFactor(localLuminanceFact), localLuminanceAddon(localLuminanceAdd), maxInputValue(maxInputVal) {};
void operator()( const tbb::blocked_range<size_t>& r ) const {
const float *localLuminancePTR=localLuminance+r.begin();
const float *inputFramePTR=inputFrame+r.begin();
float *outputFramePTR=outputFrame+r.begin();
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
{
float X0=*(localLuminancePTR++)*localLuminanceFactor+localLuminanceAddon;
// TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...
*(outputFramePTR) = (maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
//std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
}
}
};
//////////////////////////////////////////
/// Specific filtering methods which manage non const spatial filtering parameter (used By retinacolor and LogProjectors)
class Parallel_horizontalAnticausalFilter_Irregular
{
private:
float *outputFrame;
const float *spatialConstantBuffer;
const unsigned int IDrowEnd, nbColumns;
public:
Parallel_horizontalAnticausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int idEnd, const unsigned int nbCols)
:outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), IDrowEnd(idEnd), nbColumns(nbCols){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
register const float* spatialConstantPTR=spatialConstantBuffer+(IDrowEnd-IDrow)*(nbColumns)-1;
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(outputPTR)+ *(spatialConstantPTR--)* result;
*(outputPTR--) = result;
}
}
}
};
class Parallel_verticalCausalFilter_Irregular
{
private:
float *outputFrame;
const float *spatialConstantBuffer;
const unsigned int nbRows, nbColumns;
public:
Parallel_verticalCausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int nbRws, const unsigned int nbCols)
:outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), nbRows(nbRws), nbColumns(nbCols){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register const float* spatialConstantPTR=spatialConstantBuffer+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + *(spatialConstantPTR) * result;
*(outputPTR) = result;
outputPTR+=nbColumns;
spatialConstantPTR+=nbColumns;
}
}
}
};
#endif
};

View File

@ -153,6 +153,9 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons
void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner());
#else
register const float *OPL_ON_PTR=OPL_ON;
register const float *OPL_OFF_PTR=OPL_OFF;
register float *previousInput_ON_PTR= &_previousInput_ON[0];
@ -175,6 +178,7 @@ void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
}
#endif
}
// launch filter that runs all the IPL filter

View File

@ -190,10 +190,52 @@ private:
// varialbles
float _temporalCoefficient;
// amacrine cells filter : high pass temporal filter
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
// amacrine cells filter : high pass temporal filter
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
class Parallel_amacrineCellsComputing
{
private:
const float *OPL_ON, *OPL_OFF;
float *previousInput_ON, *previousInput_OFF, *amacrinCellsTempOutput_ON, *amacrinCellsTempOutput_OFF;
const float temporalCoefficient;
public:
Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal)
:OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
register const float *OPL_ON_PTR=OPL_ON+r.begin();
register const float *OPL_OFF_PTR=OPL_OFF+r.begin();
register float *previousInput_ON_PTR= previousInput_ON+r.begin();
register float *previousInput_OFF_PTR= previousInput_OFF+r.begin();
register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin();
register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin();
for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel)
{
/* Compute ON and OFF amacrin cells high pass temporal filter */
float magnoXonPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_ON_PTR+ *OPL_ON_PTR-*previousInput_ON_PTR);
*(amacrinCellsTempOutput_ON_PTR++)=((float)(magnoXonPixelResult>0))*magnoXonPixelResult;
float magnoXoffPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_OFF_PTR+ *OPL_OFF_PTR-*previousInput_OFF_PTR);
*(amacrinCellsTempOutput_OFF_PTR++)=((float)(magnoXoffPixelResult>0))*magnoXoffPixelResult;
/* prepare next loop */
*(previousInput_ON_PTR++)=*(OPL_ON_PTR++);
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
}
}
};
#endif
};
}

View File

@ -199,17 +199,20 @@ const std::valarray<float> &ParvoRetinaFilter::runFilter(const std::valarray<flo
return (*_parvocellularOutputONminusOFF);
}
void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requires many buffer accesses, parallelizing can increase bandwith & core efficacy
{
// loop that makes the difference between photoreceptor cells output and horizontal cells
// positive part goes on the ON way, negative pat goes on the OFF way
register float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
register float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
register float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
register float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
register float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
register float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner());
#else
float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel)
@ -222,6 +225,7 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
}
#endif
}
}

View File

@ -216,6 +216,45 @@ private:
// private functions
void _OPL_OnOffWaysComputing();
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
class Parallel_OPL_OnOffWaysComputing
{
private:
float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF;
public:
Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR)
:photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin();
float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin();
float *bipolarCellsON_PTR = bipolarCellsON+r.begin();
float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin();
float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin();
float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin();
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel)
{
float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++);
// test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over
float isPositive=(float) (pixelDifference>0.0f);
// ON and OFF channels writing step
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
}
}
};
#endif
};
}
#endif

View File

@ -89,7 +89,7 @@ RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns
_demultiplexedColorFrame(NBrows*NBcolumns*3),
_chrominance(NBrows*NBcolumns*3),
_colorLocalDensity(NBrows*NBcolumns*3),
_imageGradient(NBrows*NBcolumns*3)
_imageGradient(NBrows*NBcolumns*2)
{
// link to parent buffers (let's recycle !)
_luminance=&_filterOutput;
@ -126,12 +126,12 @@ RetinaColor::~RetinaColor()
void RetinaColor::clearAllBuffers()
{
BasicRetinaFilter::clearAllBuffers();
_tempMultiplexedFrame=0;
_demultiplexedTempBuffer=0;
_tempMultiplexedFrame=0.f;
_demultiplexedTempBuffer=0.f;
_demultiplexedColorFrame=0;
_chrominance=0;
_imageGradient=1;
_demultiplexedColorFrame=0.f;
_chrominance=0.f;
_imageGradient=0.57f;
}
/**
@ -149,7 +149,7 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns
_demultiplexedColorFrame.resize(NBrows*NBcolumns*3);
_chrominance.resize(NBrows*NBcolumns*3);
_colorLocalDensity.resize(NBrows*NBcolumns*3);
_imageGradient.resize(NBrows*NBcolumns*3);
_imageGradient.resize(NBrows*NBcolumns*2);
// link to parent buffers (let's recycle !)
_luminance=&_filterOutput;
@ -325,15 +325,15 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
}else
{
register const float *multiplexedColorFramePTR1= get_data(multiplexedColorFrame);
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR1)
register const float *multiplexedColorFramePTR= get_data(multiplexedColorFrame);
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR)
{
// normalize by photoreceptors density
float Cr=*(chrominancePTR)*_colorLocalDensity[indexc];
float Cg=*(chrominancePTR+_filterOutput.getNBpixels())*_colorLocalDensity[indexc+_filterOutput.getNBpixels()];
float Cb=*(chrominancePTR+_filterOutput.getDoubleNBpixels())*_colorLocalDensity[indexc+_filterOutput.getDoubleNBpixels()];
*luminance=(Cr+Cg+Cb)*_pG;
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR1 - *luminance;
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR - *luminance;
}
@ -349,8 +349,9 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getNBpixels());
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getDoubleNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getDoubleNBpixels());
for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait <20>tre supprimee en passant la densit<69> <20> la fonction de filtrage
_demultiplexedColorFrame[index] /= _chrominance[index];
/* for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait <20>tre supprimee en passant la densit<69> <20> la fonction de filtrage
_demultiplexedColorFrame[index] /= _chrominance[index];*/
_demultiplexedColorFrame/=_chrominance; // more optimal ;o)
// compute and substract the residual luminance
for (unsigned int index=0; index<_filterOutput.getNBpixels() ; ++index)
@ -432,6 +433,9 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
if (inputOutputBuffer==NULL)
inputOutputBuffer= &_demultiplexedColorFrame[0];
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner());
#else
register float *inputOutputBufferPTR=inputOutputBuffer;
for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR)
{
@ -440,6 +444,7 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
else if (*inputOutputBufferPTR<0)
*inputOutputBufferPTR=0;
}
#endif
//std::cout<<"RetinaColor::...normalizing RGB frame OK"<<std::endl;
}
@ -535,8 +540,8 @@ void RetinaColor::_applyRIFfilter(const float *sourceBuffer, float *destinationB
void RetinaColor::_getNormalizedContoursImage(const float *inputFrame, float *outputFrame)
{
float maxValue=0;
float normalisationFactor=1.f/3;
float maxValue=0.f;
float normalisationFactor=1.f/3.f;
for (unsigned int indexr=1 ; indexr<_filterOutput.getNBrows()-1; ++indexr)
{
for (unsigned int indexc=1 ; indexc<_filterOutput.getNBcolumns()-1; ++indexc)
@ -564,19 +569,23 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu
_gain = (1-0.57f)*(1-0.57f)*(1-0.06f)*(1-0.06f);
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
// -> horizontal filters work with the first layer of imageGradient
_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, 0, _filterOutput.getNBrows());
_adaptiveHorizontalAnticausalFilter(outputFrame, 0, _filterOutput.getNBrows());
_adaptiveVerticalCausalFilter(outputFrame, 0, _filterOutput.getNBcolumns());
_horizontalAnticausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBrows(), &_imageGradient[0]);
// -> horizontal filters work with the second layer of imageGradient
_verticalCausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBcolumns(), &_imageGradient[0]+_filterOutput.getNBpixels());
_adaptiveVerticalAnticausalFilter_multGain(outputFrame, 0, _filterOutput.getNBcolumns());
}
// horizontal causal filter which adds the input inside
// horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel
void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner());
#else
register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns();
register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns();
register float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
register const float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float result=0;
@ -589,51 +598,17 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra
}
// std::cout<<" "<<std::endl;
}
#endif
}
// horizontal anticausal filter (basic way, no add on)
void RetinaColor::_adaptiveHorizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
{
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
register float *imageGradientPTR= &_imageGradient[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
{
register float result=0;
for (unsigned int index=0; index<_filterOutput.getNBcolumns(); ++index)
{
result = *(outputPTR)+ (*imageGradientPTR)* result;
*(outputPTR--) = result;
--imageGradientPTR;
}
}
}
// vertical anticausal filter
void RetinaColor::_adaptiveVerticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
register float result=0;
register float *outputPTR=outputFrame+IDcolumn;
register float *imageGradientPTR= &_imageGradient[0]+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
*(outputPTR) = result;
outputPTR+=_filterOutput.getNBcolumns();
imageGradientPTR+=_filterOutput.getNBcolumns();
}
}
}
// vertical anticausal filter which multiplies the output by _gain
// vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer
void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
{
#ifdef HAVE_TBB
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner());
#else
float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns();
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
{
@ -642,12 +617,13 @@ void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame,
register float *imageGradientPTR=gradOffset+IDcolumn;
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
{
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
result = *(outputPTR) + (*(imageGradientPTR)) * result;
*(outputPTR) = _gain*result;
outputPTR-=_filterOutput.getNBcolumns();
imageGradientPTR-=_filterOutput.getNBcolumns();
}
}
#endif
}
///////////////////////////

View File

@ -248,9 +248,7 @@ protected:
void _getNormalizedContoursImage(const float *inputFrame, float *outputFrame);
// -> special adaptive filters dedicated to low pass filtering on the chrominance (skeeps filtering on the edges)
void _adaptiveSpatialLPfilter(const float *inputFrame, float *outputFrame);
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
void _adaptiveHorizontalAnticausalFilter(float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
void _adaptiveVerticalCausalFilter(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); // TBB parallelized
void _adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
void _computeGradient(const float *luminance);
void _normalizeOutputs_0_maxOutputValue(void);
@ -258,6 +256,84 @@ protected:
// color space transform
void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable);
#ifdef HAVE_TBB
/******************************************************
** IF TBB is useable, then, main loops are parallelized using these functors
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
** ==> functors constructors can differ from the parameters used with their related serial functions
*/
/* Template :
class
{
private:
public:
Parallel_()
: {}
void operator()( const tbb::blocked_range<size_t>& r ) const {
}
}:
*/
class Parallel_adaptiveHorizontalCausalFilter_addInput
{
private:
float *outputFrame;
const float *inputFrame, *imageGradient;
const unsigned int nbColumns;
public:
Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols)
:outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {};
void operator()( const tbb::blocked_range<size_t>& r ) const {
register float* outputPTR=outputFrame+r.begin()*nbColumns;
register const float* inputPTR=inputFrame+r.begin()*nbColumns;
register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns;
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
{
register float result=0;
for (unsigned int index=0; index<nbColumns; ++index)
{
result = *(inputPTR++) + (*imageGradientPTR++)* result;
*(outputPTR++) = result;
}
}
}
};
class Parallel_adaptiveVerticalAnticausalFilter_multGain
{
private:
float *outputFrame;
const float *imageGradient;
const unsigned int nbRows, nbColumns;
const float filterParam_gain;
public:
Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float gain)
:outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns;
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
{
register float result=0;
register float *outputPTR=offset+IDcolumn;
register const float *imageGradientPTR=gradOffset+IDcolumn;
for (unsigned int index=0; index<nbRows; ++index)
{
result = *(outputPTR) + *(imageGradientPTR) * result;
*(outputPTR) = filterParam_gain*result;
outputPTR-=nbColumns;
imageGradientPTR-=nbColumns;
}
}
}
};
#endif
};
}

View File

@ -70,6 +70,38 @@
#include <iostream>
#include <cmath>
//// If TBB is used
// ==> then include required includes
#ifdef HAVE_TBB
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
// ==> declare usefull generic tools
template <class type>
class Parallel_clipBufferValues
{
private:
type *bufferToClip;
const type minValue, maxValue;
public:
Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max)
: bufferToClip(bufferToProcess), minValue(min), maxValue(max){}
void operator()( const tbb::blocked_range<size_t>& r ) const {
register type *inputOutputBufferPTR=bufferToClip+r.begin();
for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR)
{
if (*inputOutputBufferPTR>maxValue)
*inputOutputBufferPTR=maxValue;
else if (*inputOutputBufferPTR<minValue)
*inputOutputBufferPTR=minValue;
}
}
};
#endif
//#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
namespace cv
@ -351,21 +383,25 @@ public:
}
}
std::cout<<"Tdebug"<<std::endl;
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
// clipping values outside than the updated thresholds
bufferPTR=this->Buffer();
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
{
if (*bufferPTR<updatedLowValue)
*bufferPTR=updatedLowValue;
else if (*bufferPTR>updatedHighValue)
*bufferPTR=updatedHighValue;
}
std::cout<<"Tdebug"<<std::endl;
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
// clipping values outside than the updated thresholds
bufferPTR=this->Buffer();
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner());
#else
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
{
if (*bufferPTR<updatedLowValue)
*bufferPTR=updatedLowValue;
else if (*bufferPTR>updatedHighValue)
*bufferPTR=updatedHighValue;
}
#endif
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
}