2010-05-12 01:44:00 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// Intel License Agreement
|
|
|
|
//
|
|
|
|
// Copyright (C) 2000, Intel Corporation, all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of Intel Corporation may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
#ifndef __OPENCV_ML_PRECOMP_HPP__
|
|
|
|
#define __OPENCV_ML_PRECOMP_HPP__
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2013-03-21 00:13:46 +08:00
|
|
|
#include "opencv2/core.hpp"
|
2013-03-13 20:22:44 +08:00
|
|
|
#include "opencv2/ml.hpp"
|
2010-05-12 01:44:00 +08:00
|
|
|
#include "opencv2/core/core_c.h"
|
2013-03-15 00:10:54 +08:00
|
|
|
#include "opencv2/core/utility.hpp"
|
2013-04-01 21:29:10 +08:00
|
|
|
|
|
|
|
#include "opencv2/core/private.hpp"
|
2010-05-12 01:44:00 +08:00
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <float.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <math.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
2014-07-30 03:54:23 +08:00
|
|
|
#include <vector>
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
/****************************************************************************************\
|
|
|
|
* Main struct definitions *
|
|
|
|
\****************************************************************************************/
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
/* log(2*PI) */
|
|
|
|
#define CV_LOG2PI (1.8378770664093454835606594728112)
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
namespace cv
|
|
|
|
{
|
|
|
|
namespace ml
|
|
|
|
{
|
|
|
|
using std::vector;
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
#define CV_DTREE_CAT_DIR(idx,subset) \
|
|
|
|
(2*((subset[(idx)>>5]&(1 << ((idx) & 31)))==0)-1)
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
template<typename _Tp> struct cmp_lt_idx
|
|
|
|
{
|
|
|
|
cmp_lt_idx(const _Tp* _arr) : arr(_arr) {}
|
|
|
|
bool operator ()(int a, int b) const { return arr[a] < arr[b]; }
|
|
|
|
const _Tp* arr;
|
|
|
|
};
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
template<typename _Tp> struct cmp_lt_ptr
|
|
|
|
{
|
|
|
|
cmp_lt_ptr() {}
|
|
|
|
bool operator ()(const _Tp* a, const _Tp* b) const { return *a < *b; }
|
|
|
|
};
|
2010-05-12 01:44:00 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
static inline void setRangeVector(std::vector<int>& vec, int n)
|
|
|
|
{
|
|
|
|
vec.resize(n);
|
|
|
|
for( int i = 0; i < n; i++ )
|
|
|
|
vec[i] = i;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
static inline void writeTermCrit(FileStorage& fs, const TermCriteria& termCrit)
|
|
|
|
{
|
|
|
|
if( (termCrit.type & TermCriteria::EPS) != 0 )
|
|
|
|
fs << "epsilon" << termCrit.epsilon;
|
|
|
|
if( (termCrit.type & TermCriteria::COUNT) != 0 )
|
|
|
|
fs << "iterations" << termCrit.maxCount;
|
2010-05-12 01:44:00 +08:00
|
|
|
}
|
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
static inline TermCriteria readTermCrit(const FileNode& fn)
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2014-07-30 03:54:23 +08:00
|
|
|
TermCriteria termCrit;
|
|
|
|
double epsilon = (double)fn["epsilon"];
|
|
|
|
if( epsilon > 0 )
|
|
|
|
{
|
|
|
|
termCrit.type |= TermCriteria::EPS;
|
|
|
|
termCrit.epsilon = epsilon;
|
|
|
|
}
|
2014-08-03 16:46:28 +08:00
|
|
|
int iters = (int)fn["iterations"];
|
2014-07-30 03:54:23 +08:00
|
|
|
if( iters > 0 )
|
|
|
|
{
|
|
|
|
termCrit.type |= TermCriteria::COUNT;
|
|
|
|
termCrit.maxCount = iters;
|
|
|
|
}
|
|
|
|
return termCrit;
|
|
|
|
}
|
2012-06-08 01:21:29 +08:00
|
|
|
|
2015-02-11 18:24:14 +08:00
|
|
|
struct TreeParams
|
|
|
|
{
|
|
|
|
TreeParams();
|
|
|
|
TreeParams( int maxDepth, int minSampleCount,
|
|
|
|
double regressionAccuracy, bool useSurrogates,
|
|
|
|
int maxCategories, int CVFolds,
|
|
|
|
bool use1SERule, bool truncatePrunedTree,
|
|
|
|
const Mat& priors );
|
|
|
|
|
|
|
|
inline void setMaxCategories(int val)
|
|
|
|
{
|
|
|
|
if( val < 2 )
|
|
|
|
CV_Error( CV_StsOutOfRange, "max_categories should be >= 2" );
|
|
|
|
maxCategories = std::min(val, 15 );
|
|
|
|
}
|
|
|
|
inline void setMaxDepth(int val)
|
|
|
|
{
|
|
|
|
if( val < 0 )
|
|
|
|
CV_Error( CV_StsOutOfRange, "max_depth should be >= 0" );
|
|
|
|
maxDepth = std::min( val, 25 );
|
|
|
|
}
|
|
|
|
inline void setMinSampleCount(int val)
|
|
|
|
{
|
|
|
|
minSampleCount = std::max(val, 1);
|
|
|
|
}
|
|
|
|
inline void setCVFolds(int val)
|
|
|
|
{
|
|
|
|
if( val < 0 )
|
|
|
|
CV_Error( CV_StsOutOfRange,
|
|
|
|
"params.CVFolds should be =0 (the tree is not pruned) "
|
|
|
|
"or n>0 (tree is pruned using n-fold cross-validation)" );
|
|
|
|
if( val == 1 )
|
|
|
|
val = 0;
|
|
|
|
CVFolds = val;
|
|
|
|
}
|
|
|
|
inline void setRegressionAccuracy(float val)
|
|
|
|
{
|
|
|
|
if( val < 0 )
|
|
|
|
CV_Error( CV_StsOutOfRange, "params.regression_accuracy should be >= 0" );
|
|
|
|
regressionAccuracy = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline int getMaxCategories() const { return maxCategories; }
|
|
|
|
inline int getMaxDepth() const { return maxDepth; }
|
|
|
|
inline int getMinSampleCount() const { return minSampleCount; }
|
|
|
|
inline int getCVFolds() const { return CVFolds; }
|
|
|
|
inline float getRegressionAccuracy() const { return regressionAccuracy; }
|
|
|
|
|
|
|
|
CV_IMPL_PROPERTY(bool, UseSurrogates, useSurrogates)
|
|
|
|
CV_IMPL_PROPERTY(bool, Use1SERule, use1SERule)
|
|
|
|
CV_IMPL_PROPERTY(bool, TruncatePrunedTree, truncatePrunedTree)
|
|
|
|
CV_IMPL_PROPERTY_S(cv::Mat, Priors, priors)
|
|
|
|
|
|
|
|
public:
|
|
|
|
bool useSurrogates;
|
|
|
|
bool use1SERule;
|
|
|
|
bool truncatePrunedTree;
|
|
|
|
Mat priors;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
int maxCategories;
|
|
|
|
int maxDepth;
|
|
|
|
int minSampleCount;
|
|
|
|
int CVFolds;
|
|
|
|
float regressionAccuracy;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct RTreeParams
|
|
|
|
{
|
|
|
|
RTreeParams();
|
|
|
|
RTreeParams(bool calcVarImportance, int nactiveVars, TermCriteria termCrit );
|
|
|
|
bool calcVarImportance;
|
|
|
|
int nactiveVars;
|
|
|
|
TermCriteria termCrit;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct BoostTreeParams
|
|
|
|
{
|
|
|
|
BoostTreeParams();
|
|
|
|
BoostTreeParams(int boostType, int weakCount, double weightTrimRate);
|
|
|
|
int boostType;
|
|
|
|
int weakCount;
|
|
|
|
double weightTrimRate;
|
|
|
|
};
|
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
class DTreesImpl : public DTrees
|
2010-05-12 01:44:00 +08:00
|
|
|
{
|
2014-07-30 03:54:23 +08:00
|
|
|
public:
|
|
|
|
struct WNode
|
|
|
|
{
|
|
|
|
WNode()
|
|
|
|
{
|
|
|
|
class_idx = sample_count = depth = complexity = 0;
|
|
|
|
parent = left = right = split = defaultDir = -1;
|
|
|
|
Tn = INT_MAX;
|
|
|
|
value = maxlr = alpha = node_risk = tree_risk = tree_error = 0.;
|
|
|
|
}
|
|
|
|
|
|
|
|
int class_idx;
|
2014-08-03 16:46:28 +08:00
|
|
|
double Tn;
|
2014-07-30 03:54:23 +08:00
|
|
|
double value;
|
|
|
|
|
|
|
|
int parent;
|
|
|
|
int left;
|
|
|
|
int right;
|
|
|
|
int defaultDir;
|
|
|
|
|
|
|
|
int split;
|
|
|
|
|
|
|
|
int sample_count;
|
|
|
|
int depth;
|
|
|
|
double maxlr;
|
|
|
|
|
|
|
|
// global pruning data
|
|
|
|
int complexity;
|
|
|
|
double alpha;
|
|
|
|
double node_risk, tree_risk, tree_error;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct WSplit
|
|
|
|
{
|
|
|
|
WSplit()
|
|
|
|
{
|
2014-08-03 17:08:03 +08:00
|
|
|
varIdx = next = 0;
|
|
|
|
inversed = false;
|
2014-07-30 03:54:23 +08:00
|
|
|
quality = c = 0.f;
|
|
|
|
subsetOfs = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int varIdx;
|
2014-08-03 16:46:28 +08:00
|
|
|
bool inversed;
|
2014-07-30 03:54:23 +08:00
|
|
|
float quality;
|
|
|
|
int next;
|
|
|
|
float c;
|
|
|
|
int subsetOfs;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct WorkData
|
|
|
|
{
|
|
|
|
WorkData(const Ptr<TrainData>& _data);
|
|
|
|
|
|
|
|
Ptr<TrainData> data;
|
|
|
|
vector<WNode> wnodes;
|
|
|
|
vector<WSplit> wsplits;
|
|
|
|
vector<int> wsubsets;
|
2014-08-03 16:46:28 +08:00
|
|
|
vector<double> cv_Tn;
|
2014-07-30 03:54:23 +08:00
|
|
|
vector<double> cv_node_risk;
|
|
|
|
vector<double> cv_node_error;
|
|
|
|
vector<int> cv_labels;
|
|
|
|
vector<double> sample_weights;
|
|
|
|
vector<int> cat_responses;
|
|
|
|
vector<double> ord_responses;
|
|
|
|
vector<int> sidx;
|
|
|
|
int maxSubsetSize;
|
|
|
|
};
|
|
|
|
|
2015-02-11 18:24:14 +08:00
|
|
|
CV_WRAP_SAME_PROPERTY(int, MaxCategories, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(int, MaxDepth, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(int, MinSampleCount, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(int, CVFolds, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(bool, UseSurrogates, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(bool, Use1SERule, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(bool, TruncatePrunedTree, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY(float, RegressionAccuracy, params)
|
|
|
|
CV_WRAP_SAME_PROPERTY_S(cv::Mat, Priors, params)
|
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
DTreesImpl();
|
|
|
|
virtual ~DTreesImpl();
|
|
|
|
virtual void clear();
|
|
|
|
|
2015-04-07 21:44:26 +08:00
|
|
|
String getDefaultName() const { return "opencv_ml_dtree"; }
|
2014-07-30 03:54:23 +08:00
|
|
|
bool isTrained() const { return !roots.empty(); }
|
|
|
|
bool isClassifier() const { return _isClassifier; }
|
|
|
|
int getVarCount() const { return varType.empty() ? 0 : (int)(varType.size() - 1); }
|
|
|
|
int getCatCount(int vi) const { return catOfs[vi][1] - catOfs[vi][0]; }
|
|
|
|
int getSubsetSize(int vi) const { return (getCatCount(vi) + 31)/32; }
|
|
|
|
|
2015-02-11 18:24:14 +08:00
|
|
|
virtual void setDParams(const TreeParams& _params);
|
2014-07-30 03:54:23 +08:00
|
|
|
virtual void startTraining( const Ptr<TrainData>& trainData, int flags );
|
|
|
|
virtual void endTraining();
|
|
|
|
virtual void initCompVarIdx();
|
|
|
|
virtual bool train( const Ptr<TrainData>& trainData, int flags );
|
|
|
|
|
|
|
|
virtual int addTree( const vector<int>& sidx );
|
|
|
|
virtual int addNodeAndTrySplit( int parent, const vector<int>& sidx );
|
|
|
|
virtual const vector<int>& getActiveVars();
|
|
|
|
virtual int findBestSplit( const vector<int>& _sidx );
|
|
|
|
virtual void calcValue( int nidx, const vector<int>& _sidx );
|
|
|
|
|
|
|
|
virtual WSplit findSplitOrdClass( int vi, const vector<int>& _sidx, double initQuality );
|
|
|
|
|
|
|
|
// simple k-means, slightly modified to take into account the "weight" (L1-norm) of each vector.
|
|
|
|
virtual void clusterCategories( const double* vectors, int n, int m, double* csums, int k, int* labels );
|
|
|
|
virtual WSplit findSplitCatClass( int vi, const vector<int>& _sidx, double initQuality, int* subset );
|
|
|
|
|
|
|
|
virtual WSplit findSplitOrdReg( int vi, const vector<int>& _sidx, double initQuality );
|
|
|
|
virtual WSplit findSplitCatReg( int vi, const vector<int>& _sidx, double initQuality, int* subset );
|
|
|
|
|
|
|
|
virtual int calcDir( int splitidx, const vector<int>& _sidx, vector<int>& _sleft, vector<int>& _sright );
|
|
|
|
virtual int pruneCV( int root );
|
|
|
|
|
|
|
|
virtual double updateTreeRNC( int root, double T, int fold );
|
|
|
|
virtual bool cutTree( int root, double T, int fold, double min_alpha );
|
|
|
|
virtual float predictTrees( const Range& range, const Mat& sample, int flags ) const;
|
|
|
|
virtual float predict( InputArray inputs, OutputArray outputs, int flags ) const;
|
|
|
|
|
|
|
|
virtual void writeTrainingParams( FileStorage& fs ) const;
|
|
|
|
virtual void writeParams( FileStorage& fs ) const;
|
|
|
|
virtual void writeSplit( FileStorage& fs, int splitidx ) const;
|
|
|
|
virtual void writeNode( FileStorage& fs, int nidx, int depth ) const;
|
|
|
|
virtual void writeTree( FileStorage& fs, int root ) const;
|
|
|
|
virtual void write( FileStorage& fs ) const;
|
|
|
|
|
|
|
|
virtual void readParams( const FileNode& fn );
|
|
|
|
virtual int readSplit( const FileNode& fn );
|
|
|
|
virtual int readNode( const FileNode& fn );
|
|
|
|
virtual int readTree( const FileNode& fn );
|
|
|
|
virtual void read( const FileNode& fn );
|
|
|
|
|
|
|
|
virtual const std::vector<int>& getRoots() const { return roots; }
|
|
|
|
virtual const std::vector<Node>& getNodes() const { return nodes; }
|
|
|
|
virtual const std::vector<Split>& getSplits() const { return splits; }
|
|
|
|
virtual const std::vector<int>& getSubsets() const { return subsets; }
|
2014-08-03 07:08:25 +08:00
|
|
|
|
2015-02-11 18:24:14 +08:00
|
|
|
TreeParams params;
|
2014-08-03 07:08:25 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
vector<int> varIdx;
|
|
|
|
vector<int> compVarIdx;
|
|
|
|
vector<uchar> varType;
|
|
|
|
vector<Vec2i> catOfs;
|
|
|
|
vector<int> catMap;
|
|
|
|
vector<int> roots;
|
|
|
|
vector<Node> nodes;
|
|
|
|
vector<Split> splits;
|
|
|
|
vector<int> subsets;
|
|
|
|
vector<int> classLabels;
|
|
|
|
vector<float> missingSubst;
|
2014-12-16 23:15:50 +08:00
|
|
|
vector<int> varMapping;
|
2014-07-30 03:54:23 +08:00
|
|
|
bool _isClassifier;
|
2014-08-03 07:08:25 +08:00
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
Ptr<WorkData> w;
|
2010-05-12 01:44:00 +08:00
|
|
|
};
|
|
|
|
|
2014-12-16 23:15:50 +08:00
|
|
|
template <typename T>
|
|
|
|
static inline void readVectorOrMat(const FileNode & node, std::vector<T> & v)
|
|
|
|
{
|
|
|
|
if (node.type() == FileNode::MAP)
|
|
|
|
{
|
|
|
|
Mat m;
|
|
|
|
node >> m;
|
|
|
|
m.copyTo(v);
|
|
|
|
}
|
|
|
|
else if (node.type() == FileNode::SEQ)
|
|
|
|
{
|
|
|
|
node >> v;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-30 03:54:23 +08:00
|
|
|
}}
|
|
|
|
|
|
|
|
#endif /* __OPENCV_ML_PRECOMP_HPP__ */
|