opencv/modules/video/src/bgfg_KNN.cpp

654 lines
22 KiB
C++
Executable File

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
//#include <math.h>
#include "precomp.hpp"
namespace cv
{
/*!
The class implements the following algorithm:
"Efficient Adaptive Density Estimation per Image Pixel for the Task of Background Subtraction"
Z.Zivkovic, F. van der Heijden
Pattern Recognition Letters, vol. 27, no. 7, pages 773-780, 2006
http://www.zoranz.net/Publications/zivkovicPRL2006.pdf
*/
// default parameters of gaussian background detection algorithm
static const int defaultHistory2 = 500; // Learning rate; alpha = 1/defaultHistory2
static const int defaultNsamples = 7; // number of samples saved in memory
static const float defaultDist2Threshold = 20.0f*20.0f;//threshold on distance from the sample
// additional parameters
static const unsigned char defaultnShadowDetection2 = (unsigned char)127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
static const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
class BackgroundSubtractorKNNImpl : public BackgroundSubtractorKNN
{
public:
//! the default constructor
BackgroundSubtractorKNNImpl()
{
frameSize = Size(0,0);
frameType = 0;
nframes = 0;
history = defaultHistory2;
//set parameters
// N - the number of samples stored in memory per model
nN = defaultNsamples;
//kNN - k nearest neighbour - number on NN for detecting background - default K=[0.1*nN]
nkNN=MAX(1,cvRound(0.1*nN*3+0.40));
//Tb - Threshold Tb*kernelwidth
fTb = defaultDist2Threshold;
// Shadow detection
bShadowDetection = 1;//turn on
nShadowDetection = defaultnShadowDetection2;
fTau = defaultfTau;// Tau - shadow threshold
name_ = "BackgroundSubtractor.KNN";
}
//! the full constructor that takes the length of the history,
// the number of gaussian mixtures, the background ratio parameter and the noise strength
BackgroundSubtractorKNNImpl(int _history, float _dist2Threshold, bool _bShadowDetection=true)
{
frameSize = Size(0,0);
frameType = 0;
nframes = 0;
history = _history > 0 ? _history : defaultHistory2;
//set parameters
// N - the number of samples stored in memory per model
nN = defaultNsamples;
//kNN - k nearest neighbour - number on NN for detcting background - default K=[0.1*nN]
nkNN=MAX(1,cvRound(0.1*nN*3+0.40));
//Tb - Threshold Tb*kernelwidth
fTb = _dist2Threshold>0? _dist2Threshold : defaultDist2Threshold;
bShadowDetection = _bShadowDetection;
nShadowDetection = defaultnShadowDetection2;
fTau = defaultfTau;
name_ = "BackgroundSubtractor.KNN";
}
//! the destructor
~BackgroundSubtractorKNNImpl() {}
//! the update operator
void apply(InputArray image, OutputArray fgmask, double learningRate=-1);
//! computes a background image which are the mean of all background gaussians
virtual void getBackgroundImage(OutputArray backgroundImage) const;
//! re-initiaization method
void initialize(Size _frameSize, int _frameType)
{
frameSize = _frameSize;
frameType = _frameType;
nframes = 0;
int nchannels = CV_MAT_CN(frameType);
CV_Assert( nchannels <= CV_CN_MAX );
// Reserve memory for the model
int size=frameSize.height*frameSize.width;
// for each sample of 3 speed pixel models each pixel bg model we store ...
// values + flag (nchannels+1 values)
bgmodel.create( 1,(nN * 3) * (nchannels+1)* size,CV_8U);
//index through the three circular lists
aModelIndexShort.create(1,size,CV_8U);
aModelIndexMid.create(1,size,CV_8U);
aModelIndexLong.create(1,size,CV_8U);
//when to update next
nNextShortUpdate.create(1,size,CV_8U);
nNextMidUpdate.create(1,size,CV_8U);
nNextLongUpdate.create(1,size,CV_8U);
//Reset counters
nShortCounter = 0;
nMidCounter = 0;
nLongCounter = 0;
aModelIndexShort = Scalar::all(0);//random? //((m_nN)*rand())/(RAND_MAX+1);//0...m_nN-1
aModelIndexMid = Scalar::all(0);
aModelIndexLong = Scalar::all(0);
nNextShortUpdate = Scalar::all(0);
nNextMidUpdate = Scalar::all(0);
nNextLongUpdate = Scalar::all(0);
}
virtual int getHistory() const { return history; }
virtual void setHistory(int _nframes) { history = _nframes; }
virtual int getNSamples() const { return nN; }
virtual void setNSamples(int _nN) { nN = _nN; }//needs reinitialization!
virtual int getkNNSamples() const { return nkNN; }
virtual void setkNNSamples(int _nkNN) { nkNN = _nkNN; }
virtual double getDist2Threshold() const { return fTb; }
virtual void setDist2Threshold(double _dist2Threshold) { fTb = (float)_dist2Threshold; }
virtual bool getDetectShadows() const { return bShadowDetection; }
virtual void setDetectShadows(bool detectshadows) { bShadowDetection = detectshadows; }
virtual int getShadowValue() const { return nShadowDetection; }
virtual void setShadowValue(int value) { nShadowDetection = (uchar)value; }
virtual double getShadowThreshold() const { return fTau; }
virtual void setShadowThreshold(double value) { fTau = (float)value; }
virtual void write(FileStorage& fs) const
{
fs << "name" << name_
<< "history" << history
<< "nsamples" << nN
<< "nKNN" << nkNN
<< "dist2Threshold" << fTb
<< "detectShadows" << (int)bShadowDetection
<< "shadowValue" << (int)nShadowDetection
<< "shadowThreshold" << fTau;
}
virtual void read(const FileNode& fn)
{
CV_Assert( (String)fn["name"] == name_ );
history = (int)fn["history"];
nN = (int)fn["nsamples"];
nkNN = (int)fn["nKNN"];
fTb = (float)fn["dist2Threshold"];
bShadowDetection = (int)fn["detectShadows"] != 0;
nShadowDetection = saturate_cast<uchar>((int)fn["shadowValue"]);
fTau = (float)fn["shadowThreshold"];
}
protected:
Size frameSize;
int frameType;
int nframes;
/////////////////////////
//very important parameters - things you will change
////////////////////////
int history;
//alpha=1/history - speed of update - if the time interval you want to average over is T
//set alpha=1/history. It is also usefull at start to make T slowly increase
//from 1 until the desired T
float fTb;
//Tb - threshold on the squared distance from the sample used to decide if it is well described
//by the background model or not. A typical value could be 2 sigma
//and that is Tb=2*2*10*10 =400; where we take typical pixel level sigma=10
/////////////////////////
//less important parameters - things you might change but be carefull
////////////////////////
int nN;//totlal number of samples
int nkNN;//number on NN for detcting background - default K=[0.1*nN]
//shadow detection parameters
bool bShadowDetection;//default 1 - do shadow detection
unsigned char nShadowDetection;//do shadow detection - insert this value as the detection result - 127 default value
float fTau;
// Tau - shadow threshold. The shadow is detected if the pixel is darker
//version of the background. Tau is a threshold on how much darker the shadow can be.
//Tau= 0.5 means that if pixel is more than 2 times darker then it is not shadow
//See: Prati,Mikic,Trivedi,Cucchiarra,"Detecting Moving Shadows...",IEEE PAMI,2003.
//model data
int nLongCounter;//circular counter
int nMidCounter;
int nShortCounter;
Mat bgmodel; // model data pixel values
Mat aModelIndexShort;// index into the models
Mat aModelIndexMid;
Mat aModelIndexLong;
Mat nNextShortUpdate;//random update points per model
Mat nNextMidUpdate;
Mat nNextLongUpdate;
String name_;
};
//{ to do - paralelization ...
//struct KNNInvoker....
CV_INLINE void
_cvUpdatePixelBackgroundNP( long pixel,const uchar* data, int nchannels, int m_nN,
uchar* m_aModel,
uchar* m_nNextLongUpdate,
uchar* m_nNextMidUpdate,
uchar* m_nNextShortUpdate,
uchar* m_aModelIndexLong,
uchar* m_aModelIndexMid,
uchar* m_aModelIndexShort,
int m_nLongCounter,
int m_nMidCounter,
int m_nShortCounter,
int m_nLongUpdate,
int m_nMidUpdate,
int m_nShortUpdate,
uchar include
)
{
// hold the offset
int ndata=1+nchannels;
long offsetLong = ndata * (pixel * m_nN * 3 + m_aModelIndexLong[pixel] + m_nN * 2);
long offsetMid = ndata * (pixel * m_nN * 3 + m_aModelIndexMid[pixel] + m_nN * 1);
long offsetShort = ndata * (pixel * m_nN * 3 + m_aModelIndexShort[pixel]);
// Long update?
if (m_nNextLongUpdate[pixel] == m_nLongCounter)
{
// add the oldest pixel from Mid to the list of values (for each color)
memcpy(&m_aModel[offsetLong],&m_aModel[offsetMid],ndata*sizeof(unsigned char));
// increase the index
m_aModelIndexLong[pixel] = (m_aModelIndexLong[pixel] >= (m_nN-1)) ? 0 : (m_aModelIndexLong[pixel] + 1);
};
if (m_nLongCounter == (m_nLongUpdate-1))
{
//m_nNextLongUpdate[pixel] = (uchar)(((m_nLongUpdate)*(rand()-1))/RAND_MAX);//0,...m_nLongUpdate-1;
m_nNextLongUpdate[pixel] = (uchar)( rand() % m_nLongUpdate );//0,...m_nLongUpdate-1;
};
// Mid update?
if (m_nNextMidUpdate[pixel] == m_nMidCounter)
{
// add this pixel to the list of values (for each color)
memcpy(&m_aModel[offsetMid],&m_aModel[offsetShort],ndata*sizeof(unsigned char));
// increase the index
m_aModelIndexMid[pixel] = (m_aModelIndexMid[pixel] >= (m_nN-1)) ? 0 : (m_aModelIndexMid[pixel] + 1);
};
if (m_nMidCounter == (m_nMidUpdate-1))
{
m_nNextMidUpdate[pixel] = (uchar)( rand() % m_nMidUpdate );
};
// Short update?
if (m_nNextShortUpdate[pixel] == m_nShortCounter)
{
// add this pixel to the list of values (for each color)
memcpy(&m_aModel[offsetShort],data,ndata*sizeof(unsigned char));
//set the include flag
m_aModel[offsetShort+nchannels]=include;
// increase the index
m_aModelIndexShort[pixel] = (m_aModelIndexShort[pixel] >= (m_nN-1)) ? 0 : (m_aModelIndexShort[pixel] + 1);
};
if (m_nShortCounter == (m_nShortUpdate-1))
{
m_nNextShortUpdate[pixel] = (uchar)( rand() % m_nShortUpdate );
};
};
CV_INLINE int
_cvCheckPixelBackgroundNP(long pixel,
const uchar* data, int nchannels,
int m_nN,
uchar* m_aModel,
float m_fTb,
int m_nkNN,
float tau,
int m_nShadowDetection,
uchar& include)
{
int Pbf = 0; // the total probability that this pixel is background
int Pb = 0; //background model probability
float dData[CV_CN_MAX];
//uchar& include=data[nchannels];
include=0;//do we include this pixel into background model?
int ndata=nchannels+1;
long posPixel = pixel * ndata * m_nN * 3;
// float k;
// now increase the probability for each pixel
for (int n = 0; n < m_nN*3; n++)
{
uchar* mean_m = &m_aModel[posPixel + n*ndata];
//calculate difference and distance
float dist2;
if( nchannels == 3 )
{
dData[0] = (float)mean_m[0] - data[0];
dData[1] = (float)mean_m[1] - data[1];
dData[2] = (float)mean_m[2] - data[2];
dist2 = dData[0]*dData[0] + dData[1]*dData[1] + dData[2]*dData[2];
}
else
{
dist2 = 0.f;
for( int c = 0; c < nchannels; c++ )
{
dData[c] = (float)mean_m[c] - data[c];
dist2 += dData[c]*dData[c];
}
}
if (dist2<m_fTb)
{
Pbf++;//all
//background only
//if(m_aModel[subPosPixel + nchannels])//indicator
if(mean_m[nchannels])//indicator
{
Pb++;
if (Pb >= m_nkNN)//Tb
{
include=1;//include
return 1;//background ->exit
};
}
};
};
//include?
if (Pbf>=m_nkNN)//m_nTbf)
{
include=1;
}
int Ps = 0; // the total probability that this pixel is background shadow
// Detected as moving object, perform shadow detection
if (m_nShadowDetection)
{
for (int n = 0; n < m_nN*3; n++)
{
//long subPosPixel = posPixel + n*ndata;
uchar* mean_m = &m_aModel[posPixel + n*ndata];
if(mean_m[nchannels])//check only background
{
float numerator = 0.0f;
float denominator = 0.0f;
for( int c = 0; c < nchannels; c++ )
{
numerator += (float)data[c] * mean_m[c];
denominator += (float)mean_m[c] * mean_m[c];
}
// no division by zero allowed
if( denominator == 0 )
return 0;
// if tau < a < 1 then also check the color distortion
if( numerator <= denominator && numerator >= tau*denominator )
{
float a = numerator / denominator;
float dist2a = 0.0f;
for( int c = 0; c < nchannels; c++ )
{
float dD= a*mean_m[c] - data[c];
dist2a += dD*dD;
}
if (dist2a<m_fTb*a*a)
{
Ps++;
if (Ps >= m_nkNN)//shadow
return 2;
};
};
};
};
}
return 0;
};
CV_INLINE void
icvUpdatePixelBackgroundNP(const Mat& _src, Mat& _dst,
Mat& _bgmodel,
Mat& _nNextLongUpdate,
Mat& _nNextMidUpdate,
Mat& _nNextShortUpdate,
Mat& _aModelIndexLong,
Mat& _aModelIndexMid,
Mat& _aModelIndexShort,
int& _nLongCounter,
int& _nMidCounter,
int& _nShortCounter,
int _nN,
float _fAlphaT,
float _fTb,
int _nkNN,
float _fTau,
int _bShadowDetection,
uchar nShadowDetection
)
{
int nchannels = CV_MAT_CN(_src.type());
//model
uchar* m_aModel=_bgmodel.ptr(0);
uchar* m_nNextLongUpdate=_nNextLongUpdate.ptr(0);
uchar* m_nNextMidUpdate=_nNextMidUpdate.ptr(0);
uchar* m_nNextShortUpdate=_nNextShortUpdate.ptr(0);
uchar* m_aModelIndexLong=_aModelIndexLong.ptr(0);
uchar* m_aModelIndexMid=_aModelIndexMid.ptr(0);
uchar* m_aModelIndexShort=_aModelIndexShort.ptr(0);
//some constants
int m_nN=_nN;
float m_fAlphaT=_fAlphaT;
float m_fTb=_fTb;//Tb - threshold on the distance
float m_fTau=_fTau;
int m_nkNN=_nkNN;
int m_bShadowDetection=_bShadowDetection;
//recalculate update rates - in case alpha is changed
// calculate update parameters (using alpha)
int Kshort,Kmid,Klong;
//approximate exponential learning curve
Kshort=(int)(log(0.7)/log(1-m_fAlphaT))+1;//Kshort
Kmid=(int)(log(0.4)/log(1-m_fAlphaT))-Kshort+1;//Kmid
Klong=(int)(log(0.1)/log(1-m_fAlphaT))-Kshort-Kmid+1;//Klong
//refresh rates
int m_nShortUpdate = (Kshort/m_nN)+1;
int m_nMidUpdate = (Kmid/m_nN)+1;
int m_nLongUpdate = (Klong/m_nN)+1;
//int m_nShortUpdate = MAX((Kshort/m_nN),m_nN);
//int m_nMidUpdate = MAX((Kmid/m_nN),m_nN);
//int m_nLongUpdate = MAX((Klong/m_nN),m_nN);
//update counters for the refresh rate
int m_nLongCounter=_nLongCounter;
int m_nMidCounter=_nMidCounter;
int m_nShortCounter=_nShortCounter;
_nShortCounter++;//0,1,...,m_nShortUpdate-1
_nMidCounter++;
_nLongCounter++;
if (_nShortCounter >= m_nShortUpdate) _nShortCounter = 0;
if (_nMidCounter >= m_nMidUpdate) _nMidCounter = 0;
if (_nLongCounter >= m_nLongUpdate) _nLongCounter = 0;
//go through the image
long i = 0;
for (long y = 0; y < _src.rows; y++)
{
for (long x = 0; x < _src.cols; x++)
{
const uchar* data = _src.ptr(y, x);
//update model+ background subtract
uchar include=0;
int result= _cvCheckPixelBackgroundNP(i, data, nchannels,
m_nN, m_aModel, m_fTb,m_nkNN, m_fTau,m_bShadowDetection,include);
_cvUpdatePixelBackgroundNP(i,data,nchannels,
m_nN, m_aModel,
m_nNextLongUpdate,
m_nNextMidUpdate,
m_nNextShortUpdate,
m_aModelIndexLong,
m_aModelIndexMid,
m_aModelIndexShort,
m_nLongCounter,
m_nMidCounter,
m_nShortCounter,
m_nLongUpdate,
m_nMidUpdate,
m_nShortUpdate,
include
);
switch (result)
{
case 0:
//foreground
*_dst.ptr(y, x) = 255;
break;
case 1:
//background
*_dst.ptr(y, x) = 0;
break;
case 2:
//shadow
*_dst.ptr(y, x) = nShadowDetection;
break;
}
i++;
}
}
};
void BackgroundSubtractorKNNImpl::apply(InputArray _image, OutputArray _fgmask, double learningRate)
{
Mat image = _image.getMat();
bool needToInitialize = nframes == 0 || learningRate >= 1 || image.size() != frameSize || image.type() != frameType;
if( needToInitialize )
initialize(image.size(), image.type());
_fgmask.create( image.size(), CV_8U );
Mat fgmask = _fgmask.getMat();
++nframes;
learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./std::min( 2*nframes, history );
CV_Assert(learningRate >= 0);
//parallel_for_(Range(0, image.rows),
// KNNInvoker(image, fgmask,
icvUpdatePixelBackgroundNP(image, fgmask,
bgmodel,
nNextLongUpdate,
nNextMidUpdate,
nNextShortUpdate,
aModelIndexLong,
aModelIndexMid,
aModelIndexShort,
nLongCounter,
nMidCounter,
nShortCounter,
nN,
(float)learningRate,
fTb,
nkNN,
fTau,
bShadowDetection,
nShadowDetection
);
}
void BackgroundSubtractorKNNImpl::getBackgroundImage(OutputArray backgroundImage) const
{
int nchannels = CV_MAT_CN(frameType);
//CV_Assert( nchannels == 3 );
Mat meanBackground(frameSize, CV_8UC3, Scalar::all(0));
int ndata=nchannels+1;
int modelstep=(ndata * nN * 3);
const uchar* pbgmodel=bgmodel.ptr(0);
for(int row=0; row<meanBackground.rows; row++)
{
for(int col=0; col<meanBackground.cols; col++)
{
for (int n = 0; n < nN*3; n++)
{
const uchar* mean_m = &pbgmodel[n*ndata];
if (mean_m[nchannels])
{
meanBackground.at<Vec3b>(row, col) = Vec3b(mean_m);
break;
}
}
pbgmodel=pbgmodel+modelstep;
}
}
switch(CV_MAT_CN(frameType))
{
case 1:
{
std::vector<Mat> channels;
split(meanBackground, channels);
channels[0].copyTo(backgroundImage);
break;
}
case 3:
{
meanBackground.copyTo(backgroundImage);
break;
}
default:
CV_Error(Error::StsUnsupportedFormat, "");
}
}
Ptr<BackgroundSubtractorKNN> createBackgroundSubtractorKNN(int _history, double _threshold2,
bool _bShadowDetection)
{
return makePtr<BackgroundSubtractorKNNImpl>(_history, (float)_threshold2, _bShadowDetection);
}
}
/* End of file. */