opencv/doc/features2d_detection_description.tex
2010-11-26 12:06:07 +00:00

831 lines
31 KiB
TeX

\section{Feature detection and description}
\ifCpp
\cvCppFunc{FAST}
Detects corners using FAST algorithm by E. Rosten (''Machine learning for high-speed corner detection'', 2006).
\fi
\cvdefCpp{
void FAST( const Mat\& image, vector<KeyPoint>\& keypoints,
int threshold, bool nonmaxSupression=true );
}
\begin{description}
\cvarg{image}{The image. Keypoints (corners) will be detected on this.}
\cvarg{keypoints}{Keypoints detected on the image.}
\cvarg{threshold}{Threshold on difference between intensity of center pixel and
pixels on circle around this pixel. See description of the algorithm.}
\cvarg{nonmaxSupression}{If it is true then non-maximum supression will be applied to detected corners (keypoints). }
\end{description}
\ifCPy
\ifPy
\cvclass{CvSURFPoint}
A SURF keypoint, represented as a tuple \texttt{((x, y), laplacian, size, dir, hessian)}.
\begin{description}
\cvarg{x}{x-coordinate of the feature within the image}
\cvarg{y}{y-coordinate of the feature within the image}
\cvarg{laplacian}{-1, 0 or +1. sign of the laplacian at the point. Can be used to speedup feature comparison since features with laplacians of different signs can not match}
\cvarg{size}{size of the feature}
\cvarg{dir}{orientation of the feature: 0..360 degrees}
\cvarg{hessian}{value of the hessian (can be used to approximately estimate the feature strengths; see also params.hessianThreshold)}
\end{description}
\fi
\cvCPyFunc{ExtractSURF}
Extracts Speeded Up Robust Features from an image.
\cvdefC{
void cvExtractSURF( \par const CvArr* image,\par const CvArr* mask,\par CvSeq** keypoints,\par CvSeq** descriptors,\par CvMemStorage* storage,\par CvSURFParams params );
}
\cvdefPy{ExtractSURF(image,mask,storage,params)-> (keypoints,descriptors)}
\begin{description}
\cvarg{image}{The input 8-bit grayscale image}
\cvarg{mask}{The optional input 8-bit mask. The features are only found in the areas that contain more than 50\% of non-zero mask pixels}
\ifC
\cvarg{keypoints}{The output parameter; double pointer to the sequence of keypoints. The sequence of CvSURFPoint structures is as follows:}
\begin{lstlisting}
typedef struct CvSURFPoint
{
CvPoint2D32f pt; // position of the feature within the image
int laplacian; // -1, 0 or +1. sign of the laplacian at the point.
// can be used to speedup feature comparison
// (normally features with laplacians of different
// signs can not match)
int size; // size of the feature
float dir; // orientation of the feature: 0..360 degrees
float hessian; // value of the hessian (can be used to
// approximately estimate the feature strengths;
// see also params.hessianThreshold)
}
CvSURFPoint;
\end{lstlisting}
\cvarg{descriptors}{The optional output parameter; double pointer to the sequence of descriptors. Depending on the params.extended value, each element of the sequence will be either a 64-element or a 128-element floating-point (\texttt{CV\_32F}) vector. If the parameter is NULL, the descriptors are not computed}
\else
\cvarg{keypoints}{sequence of keypoints.}
\cvarg{descriptors}{sequence of descriptors. Each SURF descriptor is a list of floats, of length 64 or 128.}
\fi
\cvarg{storage}{Memory storage where keypoints and descriptors will be stored}
\ifC
\cvarg{params}{Various algorithm parameters put to the structure CvSURFParams:}
\begin{lstlisting}
typedef struct CvSURFParams
{
int extended; // 0 means basic descriptors (64 elements each),
// 1 means extended descriptors (128 elements each)
double hessianThreshold; // only features with keypoint.hessian
// larger than that are extracted.
// good default value is ~300-500 (can depend on the
// average local contrast and sharpness of the image).
// user can further filter out some features based on
// their hessian values and other characteristics.
int nOctaves; // the number of octaves to be used for extraction.
// With each next octave the feature size is doubled
// (3 by default)
int nOctaveLayers; // The number of layers within each octave
// (4 by default)
}
CvSURFParams;
CvSURFParams cvSURFParams(double hessianThreshold, int extended=0);
// returns default parameters
\end{lstlisting}
\else
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(extended, hessianThreshold, nOctaves, nOctaveLayers)}:
\begin{description}
\cvarg{extended}{0 means basic descriptors (64 elements each), 1 means extended descriptors (128 elements each)}
\cvarg{hessianThreshold}{only features with hessian larger than that are extracted. good default value is ~300-500 (can depend on the average local contrast and sharpness of the image). user can further filter out some features based on their hessian values and other characteristics.}
\cvarg{nOctaves}{the number of octaves to be used for extraction. With each next octave the feature size is doubled (3 by default)}
\cvarg{nOctaveLayers}{The number of layers within each octave (4 by default)}
\end{description}}
\fi
\end{description}
The function cvExtractSURF finds robust features in the image, as
described in \cite{Bay06}. For each feature it returns its location, size,
orientation and optionally the descriptor, basic or extended. The function
can be used for object tracking and localization, image stitching etc.
\ifC
See the
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
\else
To extract strong SURF features from an image
\begin{lstlisting}
>>> import cv
>>> im = cv.LoadImageM("building.jpg", cv.CV_LOAD_IMAGE_GRAYSCALE)
>>> (keypoints, descriptors) = cv.ExtractSURF(im, None, cv.CreateMemStorage(), (0, 30000, 3, 1))
>>> print len(keypoints), len(descriptors)
6 6
>>> for ((x, y), laplacian, size, dir, hessian) in keypoints:
... print "x=\%d y=\%d laplacian=\%d size=\%d dir=\%f hessian=\%f" \% (x, y, laplacian, size, dir, hessian)
x=30 y=27 laplacian=-1 size=31 dir=69.778503 hessian=36979.789062
x=296 y=197 laplacian=1 size=33 dir=111.081039 hessian=31514.349609
x=296 y=266 laplacian=1 size=32 dir=107.092300 hessian=31477.908203
x=254 y=284 laplacian=1 size=31 dir=279.137360 hessian=34169.800781
x=498 y=525 laplacian=-1 size=33 dir=278.006592 hessian=31002.759766
x=777 y=281 laplacian=1 size=70 dir=167.940964 hessian=35538.363281
\end{lstlisting}
\fi
\cvCPyFunc{GetStarKeypoints}
Retrieves keypoints using the StarDetector algorithm.
\cvdefC{
CvSeq* cvGetStarKeypoints( \par const CvArr* image,\par CvMemStorage* storage,\par CvStarDetectorParams params=cvStarDetectorParams() );
}
\cvdefPy{GetStarKeypoints(image,storage,params)-> keypoints}
\begin{description}
\cvarg{image}{The input 8-bit grayscale image}
\cvarg{storage}{Memory storage where the keypoints will be stored}
\ifC
\cvarg{params}{Various algorithm parameters given to the structure CvStarDetectorParams:}
\begin{lstlisting}
typedef struct CvStarDetectorParams
{
int maxSize; // maximal size of the features detected. The following
// values of the parameter are supported:
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
int responseThreshold; // threshold for the approximatd laplacian,
// used to eliminate weak features
int lineThresholdProjected; // another threshold for laplacian to
// eliminate edges
int lineThresholdBinarized; // another threshold for the feature
// scale to eliminate edges
int suppressNonmaxSize; // linear size of a pixel neighborhood
// for non-maxima suppression
}
CvStarDetectorParams;
\end{lstlisting}
\else
\cvarg{params}{Various algorithm parameters in a tuple \texttt{(maxSize, responseThreshold, lineThresholdProjected, lineThresholdBinarized, suppressNonmaxSize)}:
\begin{description}
\cvarg{maxSize}{maximal size of the features detected. The following values of the parameter are supported: 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128}
\cvarg{responseThreshold}{threshold for the approximatd laplacian, used to eliminate weak features}
\cvarg{lineThresholdProjected}{another threshold for laplacian to eliminate edges}
\cvarg{lineThresholdBinarized}{another threshold for the feature scale to eliminate edges}
\cvarg{suppressNonmaxSize}{linear size of a pixel neighborhood for non-maxima suppression}
\end{description}
}
\fi
\end{description}
The function GetStarKeypoints extracts keypoints that are local
scale-space extremas. The scale-space is constructed by computing
approximate values of laplacians with different sigma's at each
pixel. Instead of using pyramids, a popular approach to save computing
time, all of the laplacians are computed at each pixel of the original
high-resolution image. But each approximate laplacian value is computed
in O(1) time regardless of the sigma, thanks to the use of integral
images. The algorithm is based on the paper
Agrawal08
, but instead
of a square, hexagon or octagon it uses an 8-end star shape, hence the name,
consisting of overlapping upright and tilted squares.
\ifC
Each computed feature is represented by the following structure:
\begin{lstlisting}
typedef struct CvStarKeypoint
{
CvPoint pt; // coordinates of the feature
int size; // feature size, see CvStarDetectorParams::maxSize
float response; // the approximated laplacian value at that point.
}
CvStarKeypoint;
inline CvStarKeypoint cvStarKeypoint(CvPoint pt, int size, float response);
\end{lstlisting}
\else
Each keypoint is represented by a tuple \texttt{((x, y), size, response)}:
\begin{description}
\cvarg{x, y}{Screen coordinates of the keypoint}
\cvarg{size}{feature size, up to \texttt{maxSize}}
\cvarg{response}{approximated laplacian value for the keypoint}
\end{description}
\fi
\ifC
Below is the small usage sample:
\begin{lstlisting}
#include "cv.h"
#include "highgui.h"
int main(int argc, char** argv)
{
const char* filename = argc > 1 ? argv[1] : "lena.jpg";
IplImage* img = cvLoadImage( filename, 0 ), *cimg;
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq* keypoints = 0;
int i;
if( !img )
return 0;
cvNamedWindow( "image", 1 );
cvShowImage( "image", img );
cvNamedWindow( "features", 1 );
cimg = cvCreateImage( cvGetSize(img), 8, 3 );
cvCvtColor( img, cimg, CV_GRAY2BGR );
keypoints = cvGetStarKeypoints( img, storage, cvStarDetectorParams(45) );
for( i = 0; i < (keypoints ? keypoints->total : 0); i++ )
{
CvStarKeypoint kpt = *(CvStarKeypoint*)cvGetSeqElem(keypoints, i);
int r = kpt.size/2;
cvCircle( cimg, kpt.pt, r, CV_RGB(0,255,0));
cvLine( cimg, cvPoint(kpt.pt.x + r, kpt.pt.y + r),
cvPoint(kpt.pt.x - r, kpt.pt.y - r), CV_RGB(0,255,0));
cvLine( cimg, cvPoint(kpt.pt.x - r, kpt.pt.y + r),
cvPoint(kpt.pt.x + r, kpt.pt.y - r), CV_RGB(0,255,0));
}
cvShowImage( "features", cimg );
cvWaitKey();
}
\end{lstlisting}
\fi
\fi
\ifCpp
\cvclass{MSER}
Maximally-Stable Extremal Region Extractor
\begin{lstlisting}
class MSER : public CvMSERParams
{
public:
// default constructor
MSER();
// constructor that initializes all the algorithm parameters
MSER( int _delta, int _min_area, int _max_area,
float _max_variation, float _min_diversity,
int _max_evolution, double _area_threshold,
double _min_margin, int _edge_blur_size );
// runs the extractor on the specified image; returns the MSERs,
// each encoded as a contour (vector<Point>, see findContours)
// the optional mask marks the area where MSERs are searched for
void operator()( const Mat& image, vector<vector<Point> >& msers, const Mat& mask ) const;
};
\end{lstlisting}
The class encapsulates all the parameters of MSER (see \url{http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions}) extraction algorithm.
\cvclass{StarDetector}
Implements Star keypoint detector
\begin{lstlisting}
class StarDetector : CvStarDetectorParams
{
public:
// default constructor
StarDetector();
// the full constructor initialized all the algorithm parameters:
// maxSize - maximum size of the features. The following
// values of the parameter are supported:
// 4, 6, 8, 11, 12, 16, 22, 23, 32, 45, 46, 64, 90, 128
// responseThreshold - threshold for the approximated laplacian,
// used to eliminate weak features. The larger it is,
// the less features will be retrieved
// lineThresholdProjected - another threshold for the laplacian to
// eliminate edges
// lineThresholdBinarized - another threshold for the feature
// size to eliminate edges.
// The larger the 2 threshold, the more points you get.
StarDetector(int maxSize, int responseThreshold,
int lineThresholdProjected,
int lineThresholdBinarized,
int suppressNonmaxSize);
// finds keypoints in an image
void operator()(const Mat& image, vector<KeyPoint>& keypoints) const;
};
\end{lstlisting}
The class implements a modified version of CenSurE keypoint detector described in
\cite{Agrawal08}
\cvclass{SIFT}
Class for extracting keypoints and computing descriptors using approach named Scale Invariant Feature Transform (SIFT).
\begin{lstlisting}
class CV_EXPORTS SIFT
{
public:
struct CommonParams
{
static const int DEFAULT_NOCTAVES = 4;
static const int DEFAULT_NOCTAVE_LAYERS = 3;
static const int DEFAULT_FIRST_OCTAVE = -1;
enum{ FIRST_ANGLE = 0, AVERAGE_ANGLE = 1 };
CommonParams();
CommonParams( int _nOctaves, int _nOctaveLayers, int _firstOctave,
int _angleMode );
int nOctaves, nOctaveLayers, firstOctave;
int angleMode;
};
struct DetectorParams
{
static double GET_DEFAULT_THRESHOLD()
{ return 0.04 / SIFT::CommonParams::DEFAULT_NOCTAVE_LAYERS / 2.0; }
static double GET_DEFAULT_EDGE_THRESHOLD() { return 10.0; }
DetectorParams();
DetectorParams( double _threshold, double _edgeThreshold );
double threshold, edgeThreshold;
};
struct DescriptorParams
{
static double GET_DEFAULT_MAGNIFICATION() { return 3.0; }
static const bool DEFAULT_IS_NORMALIZE = true;
static const int DESCRIPTOR_SIZE = 128;
DescriptorParams();
DescriptorParams( double _magnification, bool _isNormalize,
bool _recalculateAngles );
double magnification;
bool isNormalize;
bool recalculateAngles;
};
SIFT();
//! sift-detector constructor
SIFT( double _threshold, double _edgeThreshold,
int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
int _angleMode=CommonParams::FIRST_ANGLE );
//! sift-descriptor constructor
SIFT( double _magnification, bool _isNormalize=true,
bool _recalculateAngles = true,
int _nOctaves=CommonParams::DEFAULT_NOCTAVES,
int _nOctaveLayers=CommonParams::DEFAULT_NOCTAVE_LAYERS,
int _firstOctave=CommonParams::DEFAULT_FIRST_OCTAVE,
int _angleMode=CommonParams::FIRST_ANGLE );
SIFT( const CommonParams& _commParams,
const DetectorParams& _detectorParams = DetectorParams(),
const DescriptorParams& _descriptorParams = DescriptorParams() );
//! returns the descriptor size in floats (128)
int descriptorSize() const { return DescriptorParams::DESCRIPTOR_SIZE; }
//! finds the keypoints using SIFT algorithm
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const;
//! finds the keypoints and computes descriptors for them using SIFT algorithm.
//! Optionally it can compute descriptors for the user-provided keypoints
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
Mat& descriptors,
bool useProvidedKeypoints=false) const;
CommonParams getCommonParams () const { return commParams; }
DetectorParams getDetectorParams () const { return detectorParams; }
DescriptorParams getDescriptorParams () const { return descriptorParams; }
protected:
...
};
\end{lstlisting}
\cvclass{SURF}
Class for extracting Speeded Up Robust Features from an image.
\begin{lstlisting}
class SURF : public CvSURFParams
{
public:
// default constructor
SURF();
// constructor that initializes all the algorithm parameters
SURF(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false);
// returns the number of elements in each descriptor (64 or 128)
int descriptorSize() const;
// detects keypoints using fast multi-scale Hessian detector
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints) const;
// detects keypoints and computes the SURF descriptors for them;
// output vector "descriptors" stores elements of descriptors and has size
// equal descriptorSize()*keypoints.size() as each descriptor is
// descriptorSize() elements of this vector.
void operator()(const Mat& img, const Mat& mask,
vector<KeyPoint>& keypoints,
vector<float>& descriptors,
bool useProvidedKeypoints=false) const;
};
\end{lstlisting}
The class \texttt{SURF} implements Speeded Up Robust Features descriptor \cite{Bay06}.
There is fast multi-scale Hessian keypoint detector that can be used to find the keypoints
(which is the default option), but the descriptors can be also computed for the user-specified keypoints.
The function can be used for object tracking and localization, image stitching etc. See the
\texttt{find\_obj.cpp} demo in OpenCV samples directory.
\cvclass{RandomizedTree}
The class contains base structure for \texttt{RTreeClassifier}
\begin{lstlisting}
class CV_EXPORTS RandomizedTree
{
public:
friend class RTreeClassifier;
RandomizedTree();
~RandomizedTree();
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng, int depth, int views,
size_t reduced_num_dim, int num_quant_bits);
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng, PatchGenerator &make_patch, int depth,
int views, size_t reduced_num_dim, int num_quant_bits);
// following two funcs are EXPERIMENTAL
//(do not use unless you know exactly what you do)
static void quantizeVector(float *vec, int dim, int N, float bnds[2],
int clamp_mode=0);
static void quantizeVector(float *src, int dim, int N, float bnds[2],
uchar *dst);
// patch_data must be a 32x32 array (no row padding)
float* getPosterior(uchar* patch_data);
const float* getPosterior(uchar* patch_data) const;
uchar* getPosterior2(uchar* patch_data);
void read(const char* file_name, int num_quant_bits);
void read(std::istream &is, int num_quant_bits);
void write(const char* file_name) const;
void write(std::ostream &os) const;
int classes() { return classes_; }
int depth() { return depth_; }
void discardFloatPosteriors() { freePosteriors(1); }
inline void applyQuantization(int num_quant_bits)
{ makePosteriors2(num_quant_bits); }
private:
int classes_;
int depth_;
int num_leaves_;
std::vector<RTreeNode> nodes_;
float **posteriors_; // 16-bytes aligned posteriors
uchar **posteriors2_; // 16-bytes aligned posteriors
std::vector<int> leaf_counts_;
void createNodes(int num_nodes, cv::RNG &rng);
void allocPosteriorsAligned(int num_leaves, int num_classes);
void freePosteriors(int which);
// which: 1=posteriors_, 2=posteriors2_, 3=both
void init(int classes, int depth, cv::RNG &rng);
void addExample(int class_id, uchar* patch_data);
void finalize(size_t reduced_num_dim, int num_quant_bits);
int getIndex(uchar* patch_data) const;
inline float* getPosteriorByIndex(int index);
inline uchar* getPosteriorByIndex2(int index);
inline const float* getPosteriorByIndex(int index) const;
void convertPosteriorsToChar();
void makePosteriors2(int num_quant_bits);
void compressLeaves(size_t reduced_num_dim);
void estimateQuantPercForPosteriors(float perc[2]);
};
\end{lstlisting}
\cvCppFunc{RandomizedTree::train}
Trains a randomized tree using input set of keypoints
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
int num\_quant\_bits);
}
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
int num\_quant\_bits);
}
\begin{description}
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
\cvarg{rng} {Random numbers generator is used for training}
\cvarg{make\_patch} {Patch generator is used for training}
\cvarg{depth} {Maximum tree depth}
%\cvarg{views} {}
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvCppFunc{RandomizedTree::read}
Reads pre-saved randomized tree from file or stream
\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}
\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}
\begin{description}
\cvarg{file\_name}{Filename of file contains randomized tree data}
\cvarg{is}{Input stream associated with file contains randomized tree data}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvCppFunc{RandomizedTree::write}
Writes current randomized tree to a file or stream
\cvdefCpp{void write(const char* file\_name) const;}
\cvdefCpp{void write(std::ostream \&os) const;}
\begin{description}
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
\end{description}
\cvCppFunc{RandomizedTree::applyQuantization}
Applies quantization to the current randomized tree
\cvdefCpp{void applyQuantization(int num\_quant\_bits)}
\begin{description}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
\cvclass{RTreeNode}
The class contains base structure for \texttt{RandomizedTree}
\begin{lstlisting}
struct RTreeNode
{
short offset1, offset2;
RTreeNode() {}
RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
: offset1(y1*PATCH_SIZE + x1),
offset2(y2*PATCH_SIZE + x2)
{}
//! Left child on 0, right child on 1
inline bool operator() (uchar* patch_data) const
{
return patch_data[offset1] > patch_data[offset2];
}
};
\end{lstlisting}
\cvclass{RTreeClassifier}
The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
\begin{lstlisting}
class CV_EXPORTS RTreeClassifier
{
public:
static const int DEFAULT_TREES = 48;
static const size_t DEFAULT_NUM_QUANT_BITS = 4;
RTreeClassifier();
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng,
int num_trees = RTreeClassifier::DEFAULT_TREES,
int depth = DEFAULT_DEPTH,
int views = DEFAULT_VIEWS,
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
bool print_status = true);
void train(std::vector<BaseKeypoint> const& base_set,
cv::RNG &rng,
PatchGenerator &make_patch,
int num_trees = RTreeClassifier::DEFAULT_TREES,
int depth = DEFAULT_DEPTH,
int views = DEFAULT_VIEWS,
size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
bool print_status = true);
// sig must point to a memory block of at least
//classes()*sizeof(float|uchar) bytes
void getSignature(IplImage *patch, uchar *sig);
void getSignature(IplImage *patch, float *sig);
void getSparseSignature(IplImage *patch, float *sig,
float thresh);
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
int sig_len=176);
static inline uchar* safeSignatureAlloc(int num_sig=1,
int sig_len=176);
inline int classes() { return classes_; }
inline int original_num_classes()
{ return original_num_classes_; }
void setQuantization(int num_quant_bits);
void discardFloatPosteriors();
void read(const char* file_name);
void read(std::istream &is);
void write(const char* file_name) const;
void write(std::ostream &os) const;
std::vector<RandomizedTree> trees_;
private:
int classes_;
int num_quant_bits_;
uchar **posteriors_;
ushort *ptemp_;
int original_num_classes_;
bool keep_floats_;
};
\end{lstlisting}
\cvCppFunc{RTreeClassifier::train}
Trains a randomized tree classificator using input set of keypoints
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set,
cv::RNG \&rng,
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
int depth = DEFAULT\_DEPTH,
int views = DEFAULT\_VIEWS,
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
}
\cvdefCpp{
void train(std::vector<BaseKeypoint> const\& base\_set,
cv::RNG \&rng,
PatchGenerator \&make\_patch,
int num\_trees = RTreeClassifier::DEFAULT\_TREES,
int depth = DEFAULT\_DEPTH,
int views = DEFAULT\_VIEWS,
size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
}
\begin{description}
\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
\cvarg{rng} {Random numbers generator is used for training}
\cvarg{make\_patch} {Patch generator is used for training}
\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
\cvarg{depth} {Maximum tree depth}
%\cvarg{views} {}
\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\cvarg{print\_status} {Print current status of training on the console}
\end{description}
\cvCppFunc{RTreeClassifier::getSignature}
Returns signature for image patch
\cvdefCpp{
void getSignature(IplImage *patch, uchar *sig)
}
\cvdefCpp{
void getSignature(IplImage *patch, float *sig)
}
\begin{description}
\cvarg{patch} {Image patch to calculate signature for}
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
\end{description}
\cvCppFunc{RTreeClassifier::getSparseSignature}
The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
\cvdefCpp{
void getSparseSignature(IplImage *patch, float *sig,
float thresh);
}
\begin{description}
\cvarg{patch} {Image patch to calculate signature for}
\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
\cvarg{tresh} {The threshold that is used for compressing the signature}
\end{description}
\cvCppFunc{RTreeClassifier::countNonZeroElements}
The function returns the number of non-zero elements in the input array.
\cvdefCpp{
static int countNonZeroElements(float *vec, int n, double tol=1e-10);
}
\begin{description}
\cvarg{vec}{Input vector contains float elements}
\cvarg{n}{Input vector size}
\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
\end{description}
\cvCppFunc{RTreeClassifier::read}
Reads pre-saved RTreeClassifier from file or stream
\cvdefCpp{read(const char* file\_name)}
\cvdefCpp{read(std::istream \&is)}
\begin{description}
\cvarg{file\_name}{Filename of file contains randomized tree data}
\cvarg{is}{Input stream associated with file contains randomized tree data}
\end{description}
\cvCppFunc{RTreeClassifier::write}
Writes current RTreeClassifier to a file or stream
\cvdefCpp{void write(const char* file\_name) const;}
\cvdefCpp{void write(std::ostream \&os) const;}
\begin{description}
\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
\end{description}
\cvCppFunc{RTreeClassifier::setQuantization}
Applies quantization to the current randomized tree
\cvdefCpp{void setQuantization(int num\_quant\_bits)}
\begin{description}
\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
\end{description}
Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
% ===== Example. Using RTreeClassifier for features matching =====
\begin{lstlisting}
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
CvSURFParams params = cvSURFParams(500, 1);
cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
storage, params );
cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
storage, params );
cv::RTreeClassifier detector;
int patch_width = cv::PATCH_SIZE;
iint patch_height = cv::PATCH_SIZE;
vector<cv::BaseKeypoint> base_set;
int i=0;
CvSURFPoint* point;
for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
{
point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
base_set.push_back(
cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
}
//Detector training
cv::RNG rng( cvGetTickCount() );
cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
-CV_PI/3,CV_PI/3);
printf("RTree Classifier training...\n");
detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
(int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
printf("Done\n");
float* signature = new float[detector.original_num_classes()];
float* best_corr;
int* best_corr_idx;
if (imageKeypoints->total > 0)
{
best_corr = new float[imageKeypoints->total];
best_corr_idx = new int[imageKeypoints->total];
}
for(i=0; i < imageKeypoints->total; i++)
{
point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
int part_idx = -1;
float prob = 0.0f;
CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
(int)(point->pt.y) - patch_height/2,
patch_width, patch_height);
cvSetImageROI(test_image, roi);
roi = cvGetImageROI(test_image);
if(roi.width != patch_width || roi.height != patch_height)
{
best_corr_idx[i] = part_idx;
best_corr[i] = prob;
}
else
{
cvSetImageROI(test_image, roi);
IplImage* roi_image =
cvCreateImage(cvSize(roi.width, roi.height),
test_image->depth, test_image->nChannels);
cvCopy(test_image,roi_image);
detector.getSignature(roi_image, signature);
for (int j = 0; j< detector.original_num_classes();j++)
{
if (prob < signature[j])
{
part_idx = j;
prob = signature[j];
}
}
best_corr_idx[i] = part_idx;
best_corr[i] = prob;
if (roi_image)
cvReleaseImage(&roi_image);
}
cvResetImageROI(test_image);
}
\end{lstlisting}
\fi