opencv/samples/cpp/tree_engine.cpp

#include "opencv2/ml.hpp"
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include <stdio.h>
#include <string>
#include <map>

using namespace cv;
using namespace cv::ml;

static void help(char** argv)
{
    printf(
        "\nThis sample demonstrates how to use different decision trees and forests including boosting and random trees.\n"
        "Usage:\n\t%s [-r=<response_column>] [-ts=type_spec] <csv filename>\n"
        "where -r=<response_column> specified the 0-based index of the response (0 by default)\n"
        "-ts= specifies the var type spec in the form ord[n1,n2-n3,n4-n5,...]cat[m1-m2,m3,m4-m5,...]\n"
        "<csv filename> is the name of training data file in comma-separated value format\n\n", argv[0]);
}

static void train_and_print_errs(Ptr<StatModel> model, const Ptr<TrainData>& data)
{
    bool ok = model->train(data);
    if( !ok )
    {
        printf("Training failed\n");
    }
    else
    {
        printf( "train error: %f\n", model->calcError(data, false, noArray()) );
        printf( "test error: %f\n\n", model->calcError(data, true, noArray()) );
    }
}

int main(int argc, char** argv)
{
    cv::CommandLineParser parser(argc, argv, "{ help h | | }{r | 0 | }{ts | | }{@input | | }");
    if (parser.has("help"))
    {
        help(argv);
        return 0;
    }
    std::string filename = parser.get<std::string>("@input");
    int response_idx;
    std::string typespec;
    response_idx = parser.get<int>("r");
    typespec = parser.get<std::string>("ts");
    if( filename.empty() || !parser.check() )
    {
        parser.printErrors();
        help(argv);
        return 0;
    }
    printf("\nReading in %s...\n\n",filename.c_str());
    const double train_test_split_ratio = 0.5;

    Ptr<TrainData> data = TrainData::loadFromCSV(filename, 0, response_idx, response_idx+1, typespec);
    if( data.empty() )
    {
        printf("ERROR: File %s can not be read\n", filename.c_str());
        return 0;
    }

    data->setTrainTestSplitRatio(train_test_split_ratio);
    std::cout << "Test/Train: " << data->getNTestSamples() << "/" << data->getNTrainSamples();

    printf("======DTREE=====\n");
    Ptr<DTrees> dtree = DTrees::create();
    dtree->setMaxDepth(10);
    dtree->setMinSampleCount(2);
    dtree->setRegressionAccuracy(0);
    dtree->setUseSurrogates(false);
    dtree->setMaxCategories(16);
    dtree->setCVFolds(0);
    dtree->setUse1SERule(false);
    dtree->setTruncatePrunedTree(false);
    dtree->setPriors(Mat());
    train_and_print_errs(dtree, data);

    if( (int)data->getClassLabels().total() <= 2 ) // regression or 2-class classification problem
    {
        printf("======BOOST=====\n");
        Ptr<Boost> boost = Boost::create();
        boost->setBoostType(Boost::GENTLE);
        boost->setWeakCount(100);
        boost->setWeightTrimRate(0.95);
        boost->setMaxDepth(2);
        boost->setUseSurrogates(false);
        boost->setPriors(Mat());
        train_and_print_errs(boost, data);
    }

    printf("======RTREES=====\n");
    Ptr<RTrees> rtrees = RTrees::create();
    rtrees->setMaxDepth(10);
    rtrees->setMinSampleCount(2);
    rtrees->setRegressionAccuracy(0);
    rtrees->setUseSurrogates(false);
    rtrees->setMaxCategories(16);
    rtrees->setPriors(Mat());
    rtrees->setCalculateVarImportance(true);
    rtrees->setActiveVarCount(0);
    rtrees->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 0));
    train_and_print_errs(rtrees, data);
    cv::Mat ref_labels = data->getClassLabels();
    cv::Mat test_data = data->getTestSampleIdx();
    cv::Mat predict_labels;
    rtrees->predict(data->getSamples(), predict_labels);

    cv::Mat variable_importance = rtrees->getVarImportance();
    std::cout << "Estimated variable importance" << std::endl;
    for (int i = 0; i < variable_importance.rows; i++) {
        std::cout << "Variable " << i << ": " << variable_importance.at<float>(i, 0) << std::endl;
    }
    return 0;
}
update cpp samples and tutorials 2016-02-15 21:37:29 +08:00			`#include "opencv2/ml.hpp"`
			`#include "opencv2/core.hpp"`
Move cv::Mat out of core.hpp 2013-03-29 01:01:12 +08:00			`#include "opencv2/core/utility.hpp"`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`#include <stdio.h>`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`#include <string>`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00			`#include <map>`
documented use 2010-12-03 10:49:09 +08:00
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`using namespace cv;`
			`using namespace cv::ml;`

using argv[0] represent binary executable files' name in help() function in sample codes instead of cpp files' name. 2020-02-23 21:38:04 +08:00			`static void help(char** argv)`
documented use 2010-12-03 10:49:09 +08:00			`{`
Set stricter warning rules for gcc 2012-06-08 01:21:29 +08:00			`printf(`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`"\nThis sample demonstrates how to use different decision trees and forests including boosting and random trees.\n"`
using argv[0] represent binary executable files' name in help() function in sample codes instead of cpp files' name. 2020-02-23 21:38:04 +08:00			`"Usage:\n\t%s [-r=<response_column>] [-ts=type_spec] <csv filename>\n"`
Feature #3957 2015-08-01 23:24:23 +08:00			`"where -r=<response_column> specified the 0-based index of the response (0 by default)\n"`
			`"-ts= specifies the var type spec in the form ord[n1,n2-n3,n4-n5,...]cat[m1-m2,m3,m4-m5,...]\n"`
using argv[0] represent binary executable files' name in help() function in sample codes instead of cpp files' name. 2020-02-23 21:38:04 +08:00			`"<csv filename> is the name of training data file in comma-separated value format\n\n", argv[0]);`
documented use 2010-12-03 10:49:09 +08:00			`}`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`static void train_and_print_errs(Ptr<StatModel> model, const Ptr<TrainData>& data)`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00			`{`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`bool ok = model->train(data);`
			`if( !ok )`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00			`{`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`printf("Training failed\n");`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00			`}`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`else`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`{`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`printf( "train error: %f\n", model->calcError(data, false, noArray()) );`
			`printf( "test error: %f\n\n", model->calcError(data, true, noArray()) );`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`}`
			`}`

reverted samples with new command argument parser. will be continued after OpenCV release. 2011-06-09 20:01:47 +08:00			`int main(int argc, char** argv)`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`{`
Feature #3957 2015-08-01 23:24:23 +08:00			`cv::CommandLineParser parser(argc, argv, "{ help h \| \| }{r \| 0 \| }{ts \| \| }{@input \| \| }");`
			`if (parser.has("help"))`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00			`{`
using argv[0] represent binary executable files' name in help() function in sample codes instead of cpp files' name. 2020-02-23 21:38:04 +08:00			`help(argv);`
reverted samples with new command argument parser. will be continued after OpenCV release. 2011-06-09 20:01:47 +08:00			`return 0;`
			`}`
Feature #3957 2015-08-01 23:24:23 +08:00			`std::string filename = parser.get<std::string>("@input");`
			`int response_idx;`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`std::string typespec;`
Feature #3957 2015-08-01 23:24:23 +08:00			`response_idx = parser.get<int>("r");`
			`typespec = parser.get<std::string>("ts");`
			`if( filename.empty() \|\| !parser.check() )`
reverted samples with new command argument parser. will be continued after OpenCV release. 2011-06-09 20:01:47 +08:00			`{`
Feature #3957 2015-08-01 23:24:23 +08:00			`parser.printErrors();`
using argv[0] represent binary executable files' name in help() function in sample codes instead of cpp files' name. 2020-02-23 21:38:04 +08:00			`help(argv);`
Feature #3957 2015-08-01 23:24:23 +08:00			`return 0;`
improved tree_engine.cpp sample (added train file data specification; print sorted variable importance table) 2011-04-05 23:13:10 +08:00			`}`
Feature #3957 2015-08-01 23:24:23 +08:00			`printf("\nReading in %s...\n\n",filename.c_str());`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`const double train_test_split_ratio = 0.5;`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`Ptr<TrainData> data = TrainData::loadFromCSV(filename, 0, response_idx, response_idx+1, typespec);`
			`if( data.empty() )`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`{`
Feature #3957 2015-08-01 23:24:23 +08:00			`printf("ERROR: File %s can not be read\n", filename.c_str());`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`return 0;`
			`}`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`data->setTrainTestSplitRatio(train_test_split_ratio);`
Fixed variable importance in rtrees 2015-11-30 06:25:46 +08:00			`std::cout << "Test/Train: " << data->getNTestSamples() << "/" << data->getNTrainSamples();`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`printf("======DTREE=====\n");`
Updated ml module interfaces and documentation 2015-02-11 18:24:14 +08:00			`Ptr<DTrees> dtree = DTrees::create();`
			`dtree->setMaxDepth(10);`
			`dtree->setMinSampleCount(2);`
			`dtree->setRegressionAccuracy(0);`
			`dtree->setUseSurrogates(false);`
			`dtree->setMaxCategories(16);`
			`dtree->setCVFolds(0);`
			`dtree->setUse1SERule(false);`
			`dtree->setTruncatePrunedTree(false);`
			`dtree->setPriors(Mat());`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`train_and_print_errs(dtree, data);`
GBT train/test was added to tree_engine sample 2010-10-18 03:18:42 +08:00
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`if( (int)data->getClassLabels().total() <= 2 ) // regression or 2-class classification problem`
			`{`
			`printf("======BOOST=====\n");`
Updated ml module interfaces and documentation 2015-02-11 18:24:14 +08:00			`Ptr<Boost> boost = Boost::create();`
			`boost->setBoostType(Boost::GENTLE);`
			`boost->setWeakCount(100);`
			`boost->setWeightTrimRate(0.95);`
			`boost->setMaxDepth(2);`
			`boost->setUseSurrogates(false);`
			`boost->setPriors(Mat());`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`train_and_print_errs(boost, data);`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`}`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00
			`printf("======RTREES=====\n");`
Updated ml module interfaces and documentation 2015-02-11 18:24:14 +08:00			`Ptr<RTrees> rtrees = RTrees::create();`
			`rtrees->setMaxDepth(10);`
			`rtrees->setMinSampleCount(2);`
			`rtrees->setRegressionAccuracy(0);`
			`rtrees->setUseSurrogates(false);`
			`rtrees->setMaxCategories(16);`
			`rtrees->setPriors(Mat());`
Fixed variable importance in rtrees 2015-11-30 06:25:46 +08:00			`rtrees->setCalculateVarImportance(true);`
Updated ml module interfaces and documentation 2015-02-11 18:24:14 +08:00			`rtrees->setActiveVarCount(0);`
			`rtrees->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, 100, 0));`
made everything compile and even run somehow 2014-08-03 05:41:09 +08:00			`train_and_print_errs(rtrees, data);`
Fixed variable importance in rtrees 2015-11-30 06:25:46 +08:00			`cv::Mat ref_labels = data->getClassLabels();`
			`cv::Mat test_data = data->getTestSampleIdx();`
			`cv::Mat predict_labels;`
			`rtrees->predict(data->getSamples(), predict_labels);`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00
Fixed variable importance in rtrees 2015-11-30 06:25:46 +08:00			`cv::Mat variable_importance = rtrees->getVarImportance();`
			`std::cout << "Estimated variable importance" << std::endl;`
			`for (int i = 0; i < variable_importance.rows; i++) {`
			`std::cout << "Variable " << i << ": " << variable_importance.at<float>(i, 0) << std::endl;`
			`}`
"atomic bomb" commit. Reorganized OpenCV directory structure 2010-05-12 01:44:00 +08:00			`return 0;`
make Qt bindings compile with any Qt 4.x 2010-07-08 19:24:32 +08:00			`}`