mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 06:03:15 +08:00
Retina module is now parallelized thanks to the TBB library. Speed increase expected on multicore plateforms
This commit is contained in:
parent
c78884c780
commit
424bc609b6
@ -316,28 +316,50 @@ void BasicRetinaFilter::runFilter_LocalAdapdation_autonomous(const std::valarray
|
||||
_spatiotemporalLPfilter(get_data(inputFrame), &_filterOutput[0]);
|
||||
_localLuminanceAdaptation(get_data(inputFrame), &_filterOutput[0], &outputFrame[0]);
|
||||
}
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer
|
||||
void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame)
|
||||
{
|
||||
float meanLuminance=0;
|
||||
const float *luminancePTR=inputFrame;
|
||||
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
|
||||
meanLuminance+=*(luminancePTR++);
|
||||
meanLuminance/=_filterOutput.getNBpixels();
|
||||
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
|
||||
|
||||
updateCompressionParameter(meanLuminance);
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
|
||||
void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
|
||||
{
|
||||
_localLuminanceAdaptation(inputOutputFrame, localLuminance, inputOutputFrame, false);
|
||||
|
||||
/* const float *localLuminancePTR=localLuminance;
|
||||
float *inputOutputFramePTR=inputOutputFrame;
|
||||
|
||||
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
|
||||
{
|
||||
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
|
||||
*(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0+0.00000000001);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer
|
||||
void BasicRetinaFilter::_localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean)
|
||||
{
|
||||
if (updateLuminanceMean)
|
||||
{ float meanLuminance=0;
|
||||
const float *luminancePTR=inputFrame;
|
||||
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
|
||||
meanLuminance+=*(luminancePTR++);
|
||||
meanLuminance/=_filterOutput.getNBpixels();
|
||||
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
|
||||
updateCompressionParameter(meanLuminance);
|
||||
}
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_localAdaptation(localLuminance, inputFrame, outputFrame, _localLuminanceFactor, _localLuminanceAddon, _maxInputValue), tbb::auto_partitioner());
|
||||
#else
|
||||
//std::cout<<meanLuminance<<std::endl;
|
||||
const float *localLuminancePTR=localLuminance;
|
||||
const float *inputFramePTR=inputFrame;
|
||||
float *outputFramePTR=outputFrame;
|
||||
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR)
|
||||
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
|
||||
{
|
||||
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
|
||||
// TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...
|
||||
*(outputFramePTR++) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001f);
|
||||
*(outputFramePTR) = (_maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
|
||||
//std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// local adaptation applied on a range of values which can be positive and negative
|
||||
@ -355,27 +377,6 @@ void BasicRetinaFilter::_localLuminanceAdaptationPosNegValues(const float *input
|
||||
}
|
||||
}
|
||||
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
|
||||
void BasicRetinaFilter::_localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance)
|
||||
{
|
||||
/*float meanLuminance=0;
|
||||
const float *luminancePTR=inputOutputFrame;
|
||||
for (unsigned int i=0;i<_filterOutput.getNBpixels();++i)
|
||||
meanLuminance+=*(luminancePTR++);
|
||||
meanLuminance/=_filterOutput.getNBpixels();
|
||||
//float tempMeanValue=meanLuminance+_meanInputValue*_tau;
|
||||
|
||||
updateCompressionParameter(meanLuminance);
|
||||
*/
|
||||
const float *localLuminancePTR=localLuminance;
|
||||
float *inputOutputFramePTR=inputOutputFrame;
|
||||
|
||||
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel, ++inputOutputFramePTR)
|
||||
{
|
||||
float X0=*(localLuminancePTR++)*_localLuminanceFactor+_localLuminanceAddon;
|
||||
*(inputOutputFramePTR) = (_maxInputValue+X0)**inputOutputFramePTR/(*inputOutputFramePTR +X0);
|
||||
}
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
/// Spatio temporal Low Pass filter functions
|
||||
// run LP filter and save result in the basic retina element buffer
|
||||
@ -465,7 +466,9 @@ void BasicRetinaFilter::_horizontalCausalFilter(float *outputFrame, unsigned int
|
||||
// horizontal causal filter which adds the input inside
|
||||
void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
|
||||
{
|
||||
//#pragma omp parallel for
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalCausalFilter_addInput(inputFrame, outputFrame, IDrowStart, _filterOutput.getNBcolumns(), _a, _tau), tbb::auto_partitioner());
|
||||
#else
|
||||
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
|
||||
{
|
||||
register float* outputPTR=outputFrame+(IDrowStart+IDrow)*_filterOutput.getNBcolumns();
|
||||
@ -477,14 +480,16 @@ void BasicRetinaFilter::_horizontalCausalFilter_addInput(const float *inputFrame
|
||||
*(outputPTR++) = result;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// horizontal anticausal filter (basic way, no add on)
|
||||
void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
|
||||
{
|
||||
|
||||
//#pragma omp parallel for
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter(outputFrame, IDrowEnd, _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
|
||||
#else
|
||||
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
|
||||
{
|
||||
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(_filterOutput.getNBcolumns())-1;
|
||||
@ -495,9 +500,9 @@ void BasicRetinaFilter::_horizontalAnticausalFilter(float *outputFrame, unsigned
|
||||
*(outputPTR--) = result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// horizontal anticausal filter which multiplies the output by _gain
|
||||
void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
|
||||
{
|
||||
@ -518,8 +523,10 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_multGain(float *outputFrame,
|
||||
// vertical anticausal filter
|
||||
void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
|
||||
{
|
||||
//#pragma omp parallel for
|
||||
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a ), tbb::auto_partitioner());
|
||||
#else
|
||||
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=outputFrame+IDcolumn;
|
||||
@ -532,6 +539,7 @@ void BasicRetinaFilter::_verticalCausalFilter(float *outputFrame, unsigned int I
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -558,7 +566,10 @@ void BasicRetinaFilter::_verticalAnticausalFilter(float *outputFrame, unsigned i
|
||||
// vertical anticausal filter which multiplies the output by _gain
|
||||
void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
|
||||
{
|
||||
float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalAnticausalFilter_multGain(outputFrame, _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _a, _gain ), tbb::auto_partitioner());
|
||||
#else
|
||||
float* offset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
|
||||
//#pragma omp parallel for
|
||||
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
|
||||
{
|
||||
@ -573,7 +584,7 @@ void BasicRetinaFilter::_verticalAnticausalFilter_multGain(float *outputFrame, u
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/////////////////////////////////////////
|
||||
@ -745,8 +756,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(float *inputOutputFram
|
||||
|
||||
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
|
||||
_horizontalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
|
||||
_horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows());
|
||||
_verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
|
||||
_horizontalAnticausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
|
||||
_verticalCausalFilter_Irregular(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
|
||||
_verticalAnticausalFilter_Irregular_multGain(inputOutputFrame, 0, (int)_filterOutput.getNBcolumns());
|
||||
|
||||
}
|
||||
@ -765,8 +776,8 @@ void BasicRetinaFilter::_spatiotemporalLPfilter_Irregular(const float *inputFram
|
||||
|
||||
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
|
||||
_horizontalCausalFilter_Irregular_addInput(inputFrame, outputFrame, 0, (int)_filterOutput.getNBrows());
|
||||
_horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows());
|
||||
_verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns());
|
||||
_horizontalAnticausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBrows(), &_progressiveSpatialConstant[0]);
|
||||
_verticalCausalFilter_Irregular(outputFrame, 0, (int)_filterOutput.getNBcolumns(), &_progressiveSpatialConstant[0]);
|
||||
_verticalAnticausalFilter_Irregular_multGain(outputFrame, 0, (int)_filterOutput.getNBcolumns());
|
||||
|
||||
}
|
||||
@ -806,10 +817,13 @@ void BasicRetinaFilter::_horizontalCausalFilter_Irregular_addInput(const float *
|
||||
}
|
||||
|
||||
// horizontal anticausal filter (basic way, no add on)
|
||||
void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
|
||||
void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer)
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_horizontalAnticausalFilter_Irregular(outputFrame, spatialConstantBuffer, IDrowEnd, _filterOutput.getNBcolumns()), tbb::auto_partitioner());
|
||||
#else
|
||||
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
|
||||
register const float* spatialConstantPTR=&_progressiveSpatialConstant[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
|
||||
register const float* spatialConstantPTR=spatialConstantBuffer+IDrowEnd*(_filterOutput.getNBcolumns())-1;
|
||||
|
||||
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
|
||||
{
|
||||
@ -820,18 +834,21 @@ void BasicRetinaFilter::_horizontalAnticausalFilter_Irregular(float *outputFrame
|
||||
*(outputPTR--) = result;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
// vertical anticausal filter
|
||||
void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
|
||||
void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer)
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_verticalCausalFilter_Irregular(outputFrame, spatialConstantBuffer, _filterOutput.getNBrows(), _filterOutput.getNBcolumns()), tbb::auto_partitioner());
|
||||
#else
|
||||
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=outputFrame+IDcolumn;
|
||||
register const float *spatialConstantPTR=&_progressiveSpatialConstant[0]+IDcolumn;
|
||||
register const float *spatialConstantPTR=spatialConstantBuffer+IDcolumn;
|
||||
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
|
||||
{
|
||||
result = *(outputPTR) + *(spatialConstantPTR) * result;
|
||||
@ -840,6 +857,7 @@ void BasicRetinaFilter::_verticalCausalFilter_Irregular(float *outputFrame, unsi
|
||||
spatialConstantPTR+=_filterOutput.getNBcolumns();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// vertical anticausal filter which multiplies the output by _gain
|
||||
|
@ -393,48 +393,261 @@ protected:
|
||||
// LP filter that squares the input and computes the output ONLY on the areas where the integrationAreas map are TRUE
|
||||
void _localSquaringSpatioTemporalLPfilter(const float *inputFrame, float *LPfilterOutput, const unsigned int *integrationAreas, const unsigned int filterIndex=0);
|
||||
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer
|
||||
void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame);
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
|
||||
void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
|
||||
// local adaptation applied on a range of values which can be positive and negative
|
||||
void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer
|
||||
void _localLuminanceAdaptation(const float *inputFrame, const float *localLuminance, float *outputFrame, const bool updateLuminanceMean=true);
|
||||
// local luminance adaptation of the input in regard of localLuminance buffer, the input is rewrited and becomes the output
|
||||
void _localLuminanceAdaptation(float *inputOutputFrame, const float *localLuminance);
|
||||
// local adaptation applied on a range of values which can be positive and negative
|
||||
void _localLuminanceAdaptationPosNegValues(const float *inputFrame, const float *localLuminance, float *outputFrame);
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
// 1D directional filters used for the 2D low pass filtering
|
||||
|
||||
// 1D filters with image input
|
||||
void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
// 1D filters with image input that is squared in the function
|
||||
void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
// vertical anticausal filter that returns the mean value of its result
|
||||
float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
// 1D filters with image input
|
||||
void _horizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
// 1D filters with image input that is squared in the function // parallelized with TBB
|
||||
void _squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
// vertical anticausal filter that returns the mean value of its result
|
||||
float _verticalAnticausalFilter_returnMeanValue(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
|
||||
// most simple functions: only perform 1D filtering with output=input (no add on)
|
||||
void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
// most simple functions: only perform 1D filtering with output=input (no add on)
|
||||
void _horizontalCausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd); // parallelized with TBB
|
||||
void _verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // parallelized with TBB
|
||||
void _verticalAnticausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
|
||||
// perform 1D filtering with output with varrying spatial coefficient
|
||||
void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
// perform 1D filtering with output with varrying spatial coefficient
|
||||
void _horizontalCausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _horizontalCausalFilter_Irregular_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd);
|
||||
void _horizontalAnticausalFilter_Irregular(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const float *spatialConstantBuffer); // parallelized with TBB
|
||||
void _verticalCausalFilter_Irregular(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const float *spatialConstantBuffer); // parallelized with TBB
|
||||
void _verticalAnticausalFilter_Irregular_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd);
|
||||
|
||||
|
||||
// 1D filters in which the output is multiplied by _gain
|
||||
void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
|
||||
void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
|
||||
// 1D filters in which the output is multiplied by _gain
|
||||
void _verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output // parallelized with TBB
|
||||
void _horizontalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd); // this functions affects _gain at the output
|
||||
|
||||
// LP filter on specific parts of the picture instead of all the image
|
||||
// same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
|
||||
void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
|
||||
void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
|
||||
void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
|
||||
void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
|
||||
// LP filter on specific parts of the picture instead of all the image
|
||||
// same functions (some of them) but take a binary flag to allow integration, false flag means, 0 at the output...
|
||||
void _local_squaringHorizontalCausalFilter(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
|
||||
void _local_horizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd, const unsigned int *integrationAreas);
|
||||
void _local_verticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas);
|
||||
void _local_verticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd, const unsigned int *integrationAreas); // this functions affects _gain at the output
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
/******************************************************
|
||||
** IF TBB is useable, then, main loops are parallelized using these functors
|
||||
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
|
||||
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
|
||||
** ==> functors constructors can differ from the parameters used with their related serial functions
|
||||
*/
|
||||
|
||||
#define _DEBUG_TBB // define DEBUG_TBB in order to display additionnal data on stdout
|
||||
class Parallel_horizontalAnticausalFilter
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const unsigned int IDrowEnd, nbColumns;
|
||||
const float filterParam_a;
|
||||
public:
|
||||
// constructor which takes the input image pointer reference reference and limits
|
||||
Parallel_horizontalAnticausalFilter(float *bufferToProcess, const unsigned int idEnd, const unsigned int nbCols, const float a )
|
||||
:outputFrame(bufferToProcess), IDrowEnd(idEnd), nbColumns(nbCols), filterParam_a(a)
|
||||
{
|
||||
#ifdef DEBUG_TBB
|
||||
std::cout<<"Parallel_horizontalAnticausalFilter::Parallel_horizontalAnticausalFilter :"
|
||||
<<"\n\t idEnd="<<IDrowEnd
|
||||
<<"\n\t nbCols="<<nbColumns
|
||||
<<"\n\t filterParam="<<filterParam_a
|
||||
<<std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
|
||||
#ifdef DEBUG_TBB
|
||||
std::cout<<"Parallel_horizontalAnticausalFilter::operator() :"
|
||||
<<"\n\t range size="<<r.size()
|
||||
<<"\n\t first index="<<r.begin()
|
||||
//<<"\n\t last index="<<filterParam
|
||||
<<std::endl;
|
||||
#endif
|
||||
for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
|
||||
{
|
||||
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
|
||||
register float result=0;
|
||||
for (unsigned int index=0; index<nbColumns; ++index)
|
||||
{
|
||||
result = *(outputPTR)+ filterParam_a* result;
|
||||
*(outputPTR--) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Parallel_horizontalCausalFilter_addInput
|
||||
{
|
||||
private:
|
||||
const float *inputFrame;
|
||||
float *outputFrame;
|
||||
const unsigned int IDrowStart, nbColumns;
|
||||
const float filterParam_a, filterParam_tau;
|
||||
public:
|
||||
Parallel_horizontalCausalFilter_addInput(const float *bufferToAddAsInputProcess, float *bufferToProcess, const unsigned int idStart, const unsigned int nbCols, const float a, const float tau)
|
||||
:inputFrame(bufferToAddAsInputProcess), outputFrame(bufferToProcess), IDrowStart(idStart), nbColumns(nbCols), filterParam_a(a), filterParam_tau(tau){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
|
||||
{
|
||||
register float* outputPTR=outputFrame+(IDrowStart+IDrow)*nbColumns;
|
||||
register const float* inputPTR=inputFrame+(IDrowStart+IDrow)*nbColumns;
|
||||
register float result=0;
|
||||
for (unsigned int index=0; index<nbColumns; ++index)
|
||||
{
|
||||
result = *(inputPTR++) + filterParam_tau**(outputPTR)+ filterParam_a* result;
|
||||
*(outputPTR++) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Parallel_verticalCausalFilter
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const unsigned int nbRows, nbColumns;
|
||||
const float filterParam_a;
|
||||
public:
|
||||
Parallel_verticalCausalFilter(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a )
|
||||
:outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=outputFrame+IDcolumn;
|
||||
|
||||
for (unsigned int index=0; index<nbRows; ++index)
|
||||
{
|
||||
result = *(outputPTR) + filterParam_a * result;
|
||||
*(outputPTR) = result;
|
||||
outputPTR+=nbColumns;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Parallel_verticalAnticausalFilter_multGain
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const unsigned int nbRows, nbColumns;
|
||||
const float filterParam_a, filterParam_gain;
|
||||
public:
|
||||
Parallel_verticalAnticausalFilter_multGain(float *bufferToProcess, const unsigned int nbRws, const unsigned int nbCols, const float a, const float gain)
|
||||
:outputFrame(bufferToProcess), nbRows(nbRws), nbColumns(nbCols), filterParam_a(a), filterParam_gain(gain){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
|
||||
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=offset+IDcolumn;
|
||||
|
||||
for (unsigned int index=0; index<nbRows; ++index)
|
||||
{
|
||||
result = *(outputPTR) + filterParam_a * result;
|
||||
*(outputPTR) = filterParam_gain*result;
|
||||
outputPTR-=nbColumns;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Parallel_localAdaptation
|
||||
{
|
||||
private:
|
||||
const float *localLuminance, *inputFrame;
|
||||
float *outputFrame;
|
||||
const float localLuminanceFactor, localLuminanceAddon, maxInputValue;
|
||||
public:
|
||||
Parallel_localAdaptation(const float *localLum, const float *inputImg, float *bufferToProcess, const float localLuminanceFact, const float localLuminanceAdd, const float maxInputVal)
|
||||
:localLuminance(localLum), inputFrame(inputImg),outputFrame(bufferToProcess), localLuminanceFactor(localLuminanceFact), localLuminanceAddon(localLuminanceAdd), maxInputValue(maxInputVal) {};
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
const float *localLuminancePTR=localLuminance+r.begin();
|
||||
const float *inputFramePTR=inputFrame+r.begin();
|
||||
float *outputFramePTR=outputFrame+r.begin();
|
||||
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel, ++inputFramePTR, ++outputFramePTR)
|
||||
{
|
||||
float X0=*(localLuminancePTR++)*localLuminanceFactor+localLuminanceAddon;
|
||||
// TODO : the following line can lead to a divide by zero ! A small offset is added, take care if the offset is too large in case of High Dynamic Range images which can use very small values...
|
||||
*(outputFramePTR) = (maxInputValue+X0)**inputFramePTR/(*inputFramePTR +X0+0.00000000001);
|
||||
//std::cout<<"BasicRetinaFilter::inputFrame[IDpixel]=%f, X0=%f, outputFrame[IDpixel]=%f\n", inputFrame[IDpixel], X0, outputFrame[IDpixel]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////
|
||||
/// Specific filtering methods which manage non const spatial filtering parameter (used By retinacolor and LogProjectors)
|
||||
class Parallel_horizontalAnticausalFilter_Irregular
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const float *spatialConstantBuffer;
|
||||
const unsigned int IDrowEnd, nbColumns;
|
||||
public:
|
||||
Parallel_horizontalAnticausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int idEnd, const unsigned int nbCols)
|
||||
:outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), IDrowEnd(idEnd), nbColumns(nbCols){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
|
||||
for (size_t IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
|
||||
{
|
||||
register float* outputPTR=outputFrame+(IDrowEnd-IDrow)*(nbColumns)-1;
|
||||
register const float* spatialConstantPTR=spatialConstantBuffer+(IDrowEnd-IDrow)*(nbColumns)-1;
|
||||
register float result=0;
|
||||
for (unsigned int index=0; index<nbColumns; ++index)
|
||||
{
|
||||
result = *(outputPTR)+ *(spatialConstantPTR--)* result;
|
||||
*(outputPTR--) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Parallel_verticalCausalFilter_Irregular
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const float *spatialConstantBuffer;
|
||||
const unsigned int nbRows, nbColumns;
|
||||
public:
|
||||
Parallel_verticalCausalFilter_Irregular(float *bufferToProcess, const float *spatialConst, const unsigned int nbRws, const unsigned int nbCols)
|
||||
:outputFrame(bufferToProcess), spatialConstantBuffer(spatialConst), nbRows(nbRws), nbColumns(nbCols){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=outputFrame+IDcolumn;
|
||||
register const float* spatialConstantPTR=spatialConstantBuffer+IDcolumn;
|
||||
for (unsigned int index=0; index<nbRows; ++index)
|
||||
{
|
||||
result = *(outputPTR) + *(spatialConstantPTR) * result;
|
||||
*(outputPTR) = result;
|
||||
outputPTR+=nbColumns;
|
||||
spatialConstantPTR+=nbColumns;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
|
@ -153,6 +153,9 @@ void MagnoRetinaFilter::setCoefficientsTable(const float parasolCells_beta, cons
|
||||
|
||||
void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float *OPL_OFF)
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_amacrineCellsComputing(OPL_ON, OPL_OFF, &_previousInput_ON[0], &_previousInput_OFF[0], &_amacrinCellsTempOutput_ON[0], &_amacrinCellsTempOutput_OFF[0], _temporalCoefficient), tbb::auto_partitioner());
|
||||
#else
|
||||
register const float *OPL_ON_PTR=OPL_ON;
|
||||
register const float *OPL_OFF_PTR=OPL_OFF;
|
||||
register float *previousInput_ON_PTR= &_previousInput_ON[0];
|
||||
@ -175,6 +178,7 @@ void MagnoRetinaFilter::_amacrineCellsComputing(const float *OPL_ON, const float
|
||||
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// launch filter that runs all the IPL filter
|
||||
|
@ -190,10 +190,52 @@ private:
|
||||
// varialbles
|
||||
float _temporalCoefficient;
|
||||
|
||||
// amacrine cells filter : high pass temporal filter
|
||||
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
|
||||
// amacrine cells filter : high pass temporal filter
|
||||
void _amacrineCellsComputing(const float *ONinput, const float *OFFinput);
|
||||
#ifdef HAVE_TBB
|
||||
/******************************************************
|
||||
** IF TBB is useable, then, main loops are parallelized using these functors
|
||||
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
|
||||
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
|
||||
** ==> functors constructors can differ from the parameters used with their related serial functions
|
||||
*/
|
||||
class Parallel_amacrineCellsComputing
|
||||
{
|
||||
private:
|
||||
const float *OPL_ON, *OPL_OFF;
|
||||
float *previousInput_ON, *previousInput_OFF, *amacrinCellsTempOutput_ON, *amacrinCellsTempOutput_OFF;
|
||||
const float temporalCoefficient;
|
||||
public:
|
||||
Parallel_amacrineCellsComputing(const float *OPL_ON_PTR, const float *OPL_OFF_PTR, float *previousInput_ON_PTR, float *previousInput_OFF_PTR, float *amacrinCellsTempOutput_ON_PTR, float *amacrinCellsTempOutput_OFF_PTR, float temporalCoefficientVal)
|
||||
:OPL_ON(OPL_ON_PTR), OPL_OFF(OPL_OFF_PTR), previousInput_ON(previousInput_ON_PTR), previousInput_OFF(previousInput_OFF_PTR), amacrinCellsTempOutput_ON(amacrinCellsTempOutput_ON_PTR), amacrinCellsTempOutput_OFF(amacrinCellsTempOutput_OFF_PTR), temporalCoefficient(temporalCoefficientVal) {}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
register const float *OPL_ON_PTR=OPL_ON+r.begin();
|
||||
register const float *OPL_OFF_PTR=OPL_OFF+r.begin();
|
||||
register float *previousInput_ON_PTR= previousInput_ON+r.begin();
|
||||
register float *previousInput_OFF_PTR= previousInput_OFF+r.begin();
|
||||
register float *amacrinCellsTempOutput_ON_PTR= amacrinCellsTempOutput_ON+r.begin();
|
||||
register float *amacrinCellsTempOutput_OFF_PTR= amacrinCellsTempOutput_OFF+r.begin();
|
||||
|
||||
for (unsigned int IDpixel=r.begin() ; IDpixel!=r.end(); ++IDpixel)
|
||||
{
|
||||
|
||||
/* Compute ON and OFF amacrin cells high pass temporal filter */
|
||||
float magnoXonPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_ON_PTR+ *OPL_ON_PTR-*previousInput_ON_PTR);
|
||||
*(amacrinCellsTempOutput_ON_PTR++)=((float)(magnoXonPixelResult>0))*magnoXonPixelResult;
|
||||
|
||||
float magnoXoffPixelResult = temporalCoefficient*(*amacrinCellsTempOutput_OFF_PTR+ *OPL_OFF_PTR-*previousInput_OFF_PTR);
|
||||
*(amacrinCellsTempOutput_OFF_PTR++)=((float)(magnoXoffPixelResult>0))*magnoXoffPixelResult;
|
||||
|
||||
/* prepare next loop */
|
||||
*(previousInput_ON_PTR++)=*(OPL_ON_PTR++);
|
||||
*(previousInput_OFF_PTR++)=*(OPL_OFF_PTR++);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -199,17 +199,20 @@ const std::valarray<float> &ParvoRetinaFilter::runFilter(const std::valarray<flo
|
||||
return (*_parvocellularOutputONminusOFF);
|
||||
}
|
||||
|
||||
void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
|
||||
void ParvoRetinaFilter::_OPL_OnOffWaysComputing() // WARNING : this method requires many buffer accesses, parallelizing can increase bandwith & core efficacy
|
||||
{
|
||||
// loop that makes the difference between photoreceptor cells output and horizontal cells
|
||||
// positive part goes on the ON way, negative pat goes on the OFF way
|
||||
register float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
|
||||
register float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
|
||||
register float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
|
||||
register float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
|
||||
register float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
|
||||
register float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()), Parallel_OPL_OnOffWaysComputing(&_photoreceptorsOutput[0], &_horizontalCellsOutput[0], &_bipolarCellsOutputON[0], &_bipolarCellsOutputOFF[0], &_parvocellularOutputON[0], &_parvocellularOutputOFF[0]), tbb::auto_partitioner());
|
||||
#else
|
||||
float *photoreceptorsOutput_PTR= &_photoreceptorsOutput[0];
|
||||
float *horizontalCellsOutput_PTR= &_horizontalCellsOutput[0];
|
||||
float *bipolarCellsON_PTR = &_bipolarCellsOutputON[0];
|
||||
float *bipolarCellsOFF_PTR = &_bipolarCellsOutputOFF[0];
|
||||
float *parvocellularOutputON_PTR= &_parvocellularOutputON[0];
|
||||
float *parvocellularOutputOFF_PTR= &_parvocellularOutputOFF[0];
|
||||
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
|
||||
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
|
||||
for (register unsigned int IDpixel=0 ; IDpixel<_filterOutput.getNBpixels() ; ++IDpixel)
|
||||
@ -222,6 +225,7 @@ void ParvoRetinaFilter::_OPL_OnOffWaysComputing()
|
||||
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
|
||||
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -216,6 +216,45 @@ private:
|
||||
// private functions
|
||||
void _OPL_OnOffWaysComputing();
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
/******************************************************
|
||||
** IF TBB is useable, then, main loops are parallelized using these functors
|
||||
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
|
||||
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
|
||||
** ==> functors constructors can differ from the parameters used with their related serial functions
|
||||
*/
|
||||
class Parallel_OPL_OnOffWaysComputing
|
||||
{
|
||||
private:
|
||||
float *photoreceptorsOutput, *horizontalCellsOutput, *bipolarCellsON, *bipolarCellsOFF, *parvocellularOutputON, *parvocellularOutputOFF;
|
||||
public:
|
||||
Parallel_OPL_OnOffWaysComputing(float *photoreceptorsOutput_PTR, float *horizontalCellsOutput_PTR, float *bipolarCellsON_PTR, float *bipolarCellsOFF_PTR, float *parvocellularOutputON_PTR, float *parvocellularOutputOFF_PTR)
|
||||
:photoreceptorsOutput(photoreceptorsOutput_PTR), horizontalCellsOutput(horizontalCellsOutput_PTR), bipolarCellsON(bipolarCellsON_PTR), bipolarCellsOFF(bipolarCellsOFF_PTR), parvocellularOutputON(parvocellularOutputON_PTR), parvocellularOutputOFF(parvocellularOutputOFF_PTR) {}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
// compute bipolar cells response equal to photoreceptors minus horizontal cells response
|
||||
// and copy the result on parvo cellular outputs... keeping time before their local contrast adaptation for final result
|
||||
float *photoreceptorsOutput_PTR= photoreceptorsOutput+r.begin();
|
||||
float *horizontalCellsOutput_PTR= horizontalCellsOutput+r.begin();
|
||||
float *bipolarCellsON_PTR = bipolarCellsON+r.begin();
|
||||
float *bipolarCellsOFF_PTR = bipolarCellsOFF+r.begin();
|
||||
float *parvocellularOutputON_PTR= parvocellularOutputON+r.begin();
|
||||
float *parvocellularOutputOFF_PTR= parvocellularOutputOFF+r.begin();
|
||||
|
||||
for (register unsigned int IDpixel=r.begin() ; IDpixel!=r.end() ; ++IDpixel)
|
||||
{
|
||||
float pixelDifference = *(photoreceptorsOutput_PTR++) -*(horizontalCellsOutput_PTR++);
|
||||
// test condition to allow write pixelDifference in ON or OFF buffer and 0 in the over
|
||||
float isPositive=(float) (pixelDifference>0.0f);
|
||||
|
||||
// ON and OFF channels writing step
|
||||
*(parvocellularOutputON_PTR++)=*(bipolarCellsON_PTR++) = isPositive*pixelDifference;
|
||||
*(parvocellularOutputOFF_PTR++)=*(bipolarCellsOFF_PTR++)= (isPositive-1.0f)*pixelDifference;
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
@ -89,7 +89,7 @@ RetinaColor::RetinaColor(const unsigned int NBrows, const unsigned int NBcolumns
|
||||
_demultiplexedColorFrame(NBrows*NBcolumns*3),
|
||||
_chrominance(NBrows*NBcolumns*3),
|
||||
_colorLocalDensity(NBrows*NBcolumns*3),
|
||||
_imageGradient(NBrows*NBcolumns*3)
|
||||
_imageGradient(NBrows*NBcolumns*2)
|
||||
{
|
||||
// link to parent buffers (let's recycle !)
|
||||
_luminance=&_filterOutput;
|
||||
@ -126,12 +126,12 @@ RetinaColor::~RetinaColor()
|
||||
void RetinaColor::clearAllBuffers()
|
||||
{
|
||||
BasicRetinaFilter::clearAllBuffers();
|
||||
_tempMultiplexedFrame=0;
|
||||
_demultiplexedTempBuffer=0;
|
||||
_tempMultiplexedFrame=0.f;
|
||||
_demultiplexedTempBuffer=0.f;
|
||||
|
||||
_demultiplexedColorFrame=0;
|
||||
_chrominance=0;
|
||||
_imageGradient=1;
|
||||
_demultiplexedColorFrame=0.f;
|
||||
_chrominance=0.f;
|
||||
_imageGradient=0.57f;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -149,7 +149,7 @@ void RetinaColor::resize(const unsigned int NBrows, const unsigned int NBcolumns
|
||||
_demultiplexedColorFrame.resize(NBrows*NBcolumns*3);
|
||||
_chrominance.resize(NBrows*NBcolumns*3);
|
||||
_colorLocalDensity.resize(NBrows*NBcolumns*3);
|
||||
_imageGradient.resize(NBrows*NBcolumns*3);
|
||||
_imageGradient.resize(NBrows*NBcolumns*2);
|
||||
|
||||
// link to parent buffers (let's recycle !)
|
||||
_luminance=&_filterOutput;
|
||||
@ -325,15 +325,15 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
|
||||
|
||||
}else
|
||||
{
|
||||
register const float *multiplexedColorFramePTR1= get_data(multiplexedColorFrame);
|
||||
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR1)
|
||||
register const float *multiplexedColorFramePTR= get_data(multiplexedColorFrame);
|
||||
for (unsigned int indexc=0; indexc<_filterOutput.getNBpixels() ; ++indexc, ++chrominancePTR, ++colorLocalDensityPTR, ++luminance, ++multiplexedColorFramePTR)
|
||||
{
|
||||
// normalize by photoreceptors density
|
||||
float Cr=*(chrominancePTR)*_colorLocalDensity[indexc];
|
||||
float Cg=*(chrominancePTR+_filterOutput.getNBpixels())*_colorLocalDensity[indexc+_filterOutput.getNBpixels()];
|
||||
float Cb=*(chrominancePTR+_filterOutput.getDoubleNBpixels())*_colorLocalDensity[indexc+_filterOutput.getDoubleNBpixels()];
|
||||
*luminance=(Cr+Cg+Cb)*_pG;
|
||||
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR1 - *luminance;
|
||||
_demultiplexedTempBuffer[_colorSampling[indexc]] = *multiplexedColorFramePTR - *luminance;
|
||||
|
||||
}
|
||||
|
||||
@ -349,8 +349,9 @@ void RetinaColor::runColorDemultiplexing(const std::valarray<float> &multiplexed
|
||||
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getNBpixels());
|
||||
_adaptiveSpatialLPfilter(&_demultiplexedTempBuffer[0]+_filterOutput.getDoubleNBpixels(), &_demultiplexedColorFrame[0]+_filterOutput.getDoubleNBpixels());
|
||||
|
||||
for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait <20>tre supprimee en passant la densit<69> <20> la fonction de filtrage
|
||||
_demultiplexedColorFrame[index] /= _chrominance[index];
|
||||
/* for (unsigned int index=0; index<_filterOutput.getNBpixels()*3 ; ++index) // cette boucle pourrait <20>tre supprimee en passant la densit<69> <20> la fonction de filtrage
|
||||
_demultiplexedColorFrame[index] /= _chrominance[index];*/
|
||||
_demultiplexedColorFrame/=_chrominance; // more optimal ;o)
|
||||
|
||||
// compute and substract the residual luminance
|
||||
for (unsigned int index=0; index<_filterOutput.getNBpixels() ; ++index)
|
||||
@ -432,6 +433,9 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
|
||||
if (inputOutputBuffer==NULL)
|
||||
inputOutputBuffer= &_demultiplexedColorFrame[0];
|
||||
|
||||
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0,_filterOutput.getNBpixels()*3), Parallel_clipBufferValues<float>(inputOutputBuffer, 0, maxInputValue), tbb::auto_partitioner());
|
||||
#else
|
||||
register float *inputOutputBufferPTR=inputOutputBuffer;
|
||||
for (register unsigned int jf = 0; jf < _filterOutput.getNBpixels()*3; ++jf, ++inputOutputBufferPTR)
|
||||
{
|
||||
@ -440,6 +444,7 @@ void RetinaColor::clipRGBOutput_0_maxInputValue(float *inputOutputBuffer, const
|
||||
else if (*inputOutputBufferPTR<0)
|
||||
*inputOutputBufferPTR=0;
|
||||
}
|
||||
#endif
|
||||
//std::cout<<"RetinaColor::...normalizing RGB frame OK"<<std::endl;
|
||||
}
|
||||
|
||||
@ -535,8 +540,8 @@ void RetinaColor::_applyRIFfilter(const float *sourceBuffer, float *destinationB
|
||||
|
||||
void RetinaColor::_getNormalizedContoursImage(const float *inputFrame, float *outputFrame)
|
||||
{
|
||||
float maxValue=0;
|
||||
float normalisationFactor=1.f/3;
|
||||
float maxValue=0.f;
|
||||
float normalisationFactor=1.f/3.f;
|
||||
for (unsigned int indexr=1 ; indexr<_filterOutput.getNBrows()-1; ++indexr)
|
||||
{
|
||||
for (unsigned int indexc=1 ; indexc<_filterOutput.getNBcolumns()-1; ++indexc)
|
||||
@ -564,19 +569,23 @@ void RetinaColor::_adaptiveSpatialLPfilter(const float *inputFrame, float *outpu
|
||||
_gain = (1-0.57f)*(1-0.57f)*(1-0.06f)*(1-0.06f);
|
||||
|
||||
// launch the serie of 1D directional filters in order to compute the 2D low pass filter
|
||||
// -> horizontal filters work with the first layer of imageGradient
|
||||
_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, 0, _filterOutput.getNBrows());
|
||||
_adaptiveHorizontalAnticausalFilter(outputFrame, 0, _filterOutput.getNBrows());
|
||||
_adaptiveVerticalCausalFilter(outputFrame, 0, _filterOutput.getNBcolumns());
|
||||
_horizontalAnticausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBrows(), &_imageGradient[0]);
|
||||
// -> horizontal filters work with the second layer of imageGradient
|
||||
_verticalCausalFilter_Irregular(outputFrame, 0, _filterOutput.getNBcolumns(), &_imageGradient[0]+_filterOutput.getNBpixels());
|
||||
_adaptiveVerticalAnticausalFilter_multGain(outputFrame, 0, _filterOutput.getNBcolumns());
|
||||
|
||||
}
|
||||
|
||||
// horizontal causal filter which adds the input inside
|
||||
// horizontal causal filter which adds the input inside... replaces the parent _horizontalCausalFilter_Irregular_addInput by avoiding a product for each pixel
|
||||
void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDrowStart,IDrowEnd), Parallel_adaptiveHorizontalCausalFilter_addInput(inputFrame, outputFrame, &_imageGradient[0], _filterOutput.getNBcolumns()), tbb::auto_partitioner());
|
||||
#else
|
||||
register float* outputPTR=outputFrame+IDrowStart*_filterOutput.getNBcolumns();
|
||||
register const float* inputPTR=inputFrame+IDrowStart*_filterOutput.getNBcolumns();
|
||||
register float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
|
||||
register const float *imageGradientPTR= &_imageGradient[0]+IDrowStart*_filterOutput.getNBcolumns();
|
||||
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
|
||||
{
|
||||
register float result=0;
|
||||
@ -589,51 +598,17 @@ void RetinaColor::_adaptiveHorizontalCausalFilter_addInput(const float *inputFra
|
||||
}
|
||||
// std::cout<<" "<<std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// horizontal anticausal filter (basic way, no add on)
|
||||
void RetinaColor::_adaptiveHorizontalAnticausalFilter(float *outputFrame, unsigned int IDrowStart, unsigned int IDrowEnd)
|
||||
{
|
||||
register float* outputPTR=outputFrame+IDrowEnd*(_filterOutput.getNBcolumns())-1;
|
||||
register float *imageGradientPTR= &_imageGradient[0]+IDrowEnd*(_filterOutput.getNBcolumns())-1;
|
||||
|
||||
for (unsigned int IDrow=IDrowStart; IDrow<IDrowEnd; ++IDrow)
|
||||
{
|
||||
register float result=0;
|
||||
for (unsigned int index=0; index<_filterOutput.getNBcolumns(); ++index)
|
||||
{
|
||||
result = *(outputPTR)+ (*imageGradientPTR)* result;
|
||||
*(outputPTR--) = result;
|
||||
--imageGradientPTR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vertical anticausal filter
|
||||
void RetinaColor::_adaptiveVerticalCausalFilter(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
|
||||
{
|
||||
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=outputFrame+IDcolumn;
|
||||
register float *imageGradientPTR= &_imageGradient[0]+IDcolumn;
|
||||
|
||||
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
|
||||
{
|
||||
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
|
||||
*(outputPTR) = result;
|
||||
outputPTR+=_filterOutput.getNBcolumns();
|
||||
imageGradientPTR+=_filterOutput.getNBcolumns();
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vertical anticausal filter which multiplies the output by _gain
|
||||
// vertical anticausal filter which multiplies the output by _gain... replaces the parent _verticalAnticausalFilter_multGain by avoiding a product for each pixel and taking into account the second layer of the _imageGradient buffer
|
||||
void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, unsigned int IDcolumnStart, unsigned int IDcolumnEnd)
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(IDcolumnStart,IDcolumnEnd), Parallel_adaptiveVerticalAnticausalFilter_multGain(outputFrame, &_imageGradient[0]+_filterOutput.getNBpixels(), _filterOutput.getNBrows(), _filterOutput.getNBcolumns(), _gain), tbb::auto_partitioner());
|
||||
#else
|
||||
float* outputOffset=outputFrame+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
|
||||
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()-_filterOutput.getNBcolumns();
|
||||
float* gradOffset= &_imageGradient[0]+_filterOutput.getNBpixels()*2-_filterOutput.getNBcolumns();
|
||||
|
||||
for (unsigned int IDcolumn=IDcolumnStart; IDcolumn<IDcolumnEnd; ++IDcolumn)
|
||||
{
|
||||
@ -642,12 +617,13 @@ void RetinaColor::_adaptiveVerticalAnticausalFilter_multGain(float *outputFrame,
|
||||
register float *imageGradientPTR=gradOffset+IDcolumn;
|
||||
for (unsigned int index=0; index<_filterOutput.getNBrows(); ++index)
|
||||
{
|
||||
result = *(outputPTR) + (*(imageGradientPTR+_filterOutput.getNBpixels())) * result;
|
||||
result = *(outputPTR) + (*(imageGradientPTR)) * result;
|
||||
*(outputPTR) = _gain*result;
|
||||
outputPTR-=_filterOutput.getNBcolumns();
|
||||
imageGradientPTR-=_filterOutput.getNBcolumns();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
///////////////////////////
|
||||
|
@ -248,9 +248,7 @@ protected:
|
||||
void _getNormalizedContoursImage(const float *inputFrame, float *outputFrame);
|
||||
// -> special adaptive filters dedicated to low pass filtering on the chrominance (skeeps filtering on the edges)
|
||||
void _adaptiveSpatialLPfilter(const float *inputFrame, float *outputFrame);
|
||||
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
|
||||
void _adaptiveHorizontalAnticausalFilter(float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd);
|
||||
void _adaptiveVerticalCausalFilter(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
|
||||
void _adaptiveHorizontalCausalFilter_addInput(const float *inputFrame, float *outputFrame, const unsigned int IDrowStart, const unsigned int IDrowEnd); // TBB parallelized
|
||||
void _adaptiveVerticalAnticausalFilter_multGain(float *outputFrame, const unsigned int IDcolumnStart, const unsigned int IDcolumnEnd);
|
||||
void _computeGradient(const float *luminance);
|
||||
void _normalizeOutputs_0_maxOutputValue(void);
|
||||
@ -258,6 +256,84 @@ protected:
|
||||
// color space transform
|
||||
void _applyImageColorSpaceConversion(const std::valarray<float> &inputFrame, std::valarray<float> &outputFrame, const float *transformTable);
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
/******************************************************
|
||||
** IF TBB is useable, then, main loops are parallelized using these functors
|
||||
** ==> main idea paralellise main filters loops, then, only the most used methods are parallelized... TODO : increase the number of parallelised methods as necessary
|
||||
** ==> functors names = Parallel_$$$ where $$$= the name of the serial method that is parallelised
|
||||
** ==> functors constructors can differ from the parameters used with their related serial functions
|
||||
*/
|
||||
|
||||
/* Template :
|
||||
class
|
||||
{
|
||||
private:
|
||||
|
||||
public:
|
||||
Parallel_()
|
||||
: {}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
|
||||
}
|
||||
}:
|
||||
*/
|
||||
class Parallel_adaptiveHorizontalCausalFilter_addInput
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const float *inputFrame, *imageGradient;
|
||||
const unsigned int nbColumns;
|
||||
public:
|
||||
Parallel_adaptiveHorizontalCausalFilter_addInput(const float *inputImg, float *bufferToProcess, const float *imageGrad, const unsigned int nbCols)
|
||||
:outputFrame(bufferToProcess), inputFrame(inputImg), imageGradient(imageGrad), nbColumns(nbCols) {};
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
register float* outputPTR=outputFrame+r.begin()*nbColumns;
|
||||
register const float* inputPTR=inputFrame+r.begin()*nbColumns;
|
||||
register const float *imageGradientPTR= imageGradient+r.begin()*nbColumns;
|
||||
for (unsigned int IDrow=r.begin(); IDrow!=r.end(); ++IDrow)
|
||||
{
|
||||
register float result=0;
|
||||
for (unsigned int index=0; index<nbColumns; ++index)
|
||||
{
|
||||
result = *(inputPTR++) + (*imageGradientPTR++)* result;
|
||||
*(outputPTR++) = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Parallel_adaptiveVerticalAnticausalFilter_multGain
|
||||
{
|
||||
private:
|
||||
float *outputFrame;
|
||||
const float *imageGradient;
|
||||
const unsigned int nbRows, nbColumns;
|
||||
const float filterParam_gain;
|
||||
public:
|
||||
Parallel_adaptiveVerticalAnticausalFilter_multGain(float *bufferToProcess, const float *imageGrad, const unsigned int nbRws, const unsigned int nbCols, const float gain)
|
||||
:outputFrame(bufferToProcess), imageGradient(imageGrad), nbRows(nbRws), nbColumns(nbCols), filterParam_gain(gain){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
float* offset=outputFrame+nbColumns*nbRows-nbColumns;
|
||||
const float* gradOffset= imageGradient+nbColumns*nbRows-nbColumns;
|
||||
for (unsigned int IDcolumn=r.begin(); IDcolumn!=r.end(); ++IDcolumn)
|
||||
{
|
||||
register float result=0;
|
||||
register float *outputPTR=offset+IDcolumn;
|
||||
register const float *imageGradientPTR=gradOffset+IDcolumn;
|
||||
for (unsigned int index=0; index<nbRows; ++index)
|
||||
{
|
||||
result = *(outputPTR) + *(imageGradientPTR) * result;
|
||||
*(outputPTR) = filterParam_gain*result;
|
||||
outputPTR-=nbColumns;
|
||||
imageGradientPTR-=nbColumns;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -70,6 +70,38 @@
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
//// If TBB is used
|
||||
// ==> then include required includes
|
||||
#ifdef HAVE_TBB
|
||||
#include "tbb/parallel_for.h"
|
||||
#include "tbb/blocked_range.h"
|
||||
|
||||
// ==> declare usefull generic tools
|
||||
template <class type>
|
||||
class Parallel_clipBufferValues
|
||||
{
|
||||
private:
|
||||
type *bufferToClip;
|
||||
const type minValue, maxValue;
|
||||
|
||||
public:
|
||||
Parallel_clipBufferValues(type* bufferToProcess, const type min, const type max)
|
||||
: bufferToClip(bufferToProcess), minValue(min), maxValue(max){}
|
||||
|
||||
void operator()( const tbb::blocked_range<size_t>& r ) const {
|
||||
register type *inputOutputBufferPTR=bufferToClip+r.begin();
|
||||
for (register unsigned int jf = r.begin(); jf != r.end(); ++jf, ++inputOutputBufferPTR)
|
||||
{
|
||||
if (*inputOutputBufferPTR>maxValue)
|
||||
*inputOutputBufferPTR=maxValue;
|
||||
else if (*inputOutputBufferPTR<minValue)
|
||||
*inputOutputBufferPTR=minValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
//#define __TEMPLATEBUFFERDEBUG //define TEMPLATEBUFFERDEBUG in order to display debug information
|
||||
|
||||
namespace cv
|
||||
@ -351,21 +383,25 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
std::cout<<"Tdebug"<<std::endl;
|
||||
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
|
||||
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
|
||||
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
|
||||
// clipping values outside than the updated thresholds
|
||||
bufferPTR=this->Buffer();
|
||||
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
|
||||
{
|
||||
if (*bufferPTR<updatedLowValue)
|
||||
*bufferPTR=updatedLowValue;
|
||||
else if (*bufferPTR>updatedHighValue)
|
||||
*bufferPTR=updatedHighValue;
|
||||
}
|
||||
std::cout<<"Tdebug"<<std::endl;
|
||||
std::cout<<"deltaL="<<deltaL<<", deltaH="<<deltaH<<std::endl;
|
||||
std::cout<<"this->max()"<<this->max()<<"maxThreshold="<<maxThreshold<<"updatedHighValue="<<updatedHighValue<<std::endl;
|
||||
std::cout<<"this->min()"<<this->min()<<"minThreshold="<<minThreshold<<"updatedLowValue="<<updatedLowValue<<std::endl;
|
||||
// clipping values outside than the updated thresholds
|
||||
bufferPTR=this->Buffer();
|
||||
#ifdef HAVE_TBB // call the TemplateBuffer TBB clipping method
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0,this->size()), Parallel_clipBufferValues<type>(bufferPTR, updatedLowValue, updatedHighValue), tbb::auto_partitioner());
|
||||
#else
|
||||
|
||||
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
|
||||
for (unsigned int i=0;i<this->size();++i, ++bufferPTR)
|
||||
{
|
||||
if (*bufferPTR<updatedLowValue)
|
||||
*bufferPTR=updatedLowValue;
|
||||
else if (*bufferPTR>updatedHighValue)
|
||||
*bufferPTR=updatedHighValue;
|
||||
}
|
||||
#endif
|
||||
normalizeGrayOutput_0_maxOutputValue(this->Buffer(), this->size(), maxOutputValue);
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user