made everything compile and even run somehow

This commit is contained in:
Vadim Pisarevsky 2014-08-03 01:41:09 +04:00
parent 10b60f8d16
commit c20ff6ce19
31 changed files with 11910 additions and 9061 deletions

View File

@ -1,4 +1,4 @@
set(OPENCV_TRAINCASCADE_DEPS opencv_core opencv_ml opencv_imgproc opencv_photo opencv_objdetect opencv_imgcodecs opencv_videoio opencv_highgui opencv_calib3d opencv_video opencv_features2d)
set(OPENCV_TRAINCASCADE_DEPS opencv_core opencv_imgproc opencv_objdetect opencv_imgcodecs opencv_highgui opencv_calib3d opencv_features2d)
ocv_check_dependencies(${OPENCV_TRAINCASCADE_DEPS})
if(NOT OCV_DEPENDENCIES_FOUND)
@ -10,13 +10,10 @@ project(traincascade)
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" "${OpenCV_SOURCE_DIR}/include/opencv")
ocv_include_modules(${OPENCV_TRAINCASCADE_DEPS})
set(traincascade_files traincascade.cpp
cascadeclassifier.cpp cascadeclassifier.h
boost.cpp boost.h features.cpp traincascade_features.h
haarfeatures.cpp haarfeatures.h
lbpfeatures.cpp lbpfeatures.h
HOGfeatures.cpp HOGfeatures.h
imagestorage.cpp imagestorage.h)
file(GLOB SRCS *.cpp)
file(GLOB HDRS *.h*)
set(traincascade_files ${SRCS} ${HDRS})
set(the_target opencv_traincascade)
add_executable(${the_target} ${traincascade_files})

View File

@ -2,7 +2,7 @@
#define _OPENCV_BOOST_H_
#include "traincascade_features.h"
#include "ml.h"
#include "old_ml.hpp"
struct CvCascadeBoostParams : CvBoostParams
{

View File

@ -7,8 +7,6 @@
#include "lbpfeatures.h"
#include "HOGfeatures.h" //new
#include "boost.h"
#include "cv.h"
#include "cxcore.h"
#define CC_CASCADE_FILENAME "cascade.xml"
#define CC_PARAMS_FILENAME "params.xml"

2165
apps/traincascade/old_ml.hpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,792 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "old_ml_precomp.hpp"
#include <ctype.h>
#define MISS_VAL FLT_MAX
#define CV_VAR_MISS 0
CvTrainTestSplit::CvTrainTestSplit()
{
train_sample_part_mode = CV_COUNT;
train_sample_part.count = -1;
mix = false;
}
CvTrainTestSplit::CvTrainTestSplit( int _train_sample_count, bool _mix )
{
train_sample_part_mode = CV_COUNT;
train_sample_part.count = _train_sample_count;
mix = _mix;
}
CvTrainTestSplit::CvTrainTestSplit( float _train_sample_portion, bool _mix )
{
train_sample_part_mode = CV_PORTION;
train_sample_part.portion = _train_sample_portion;
mix = _mix;
}
////////////////
CvMLData::CvMLData()
{
values = missing = var_types = var_idx_mask = response_out = var_idx_out = var_types_out = 0;
train_sample_idx = test_sample_idx = 0;
header_lines_number = 0;
sample_idx = 0;
response_idx = -1;
train_sample_count = -1;
delimiter = ',';
miss_ch = '?';
//flt_separator = '.';
rng = &cv::theRNG();
}
CvMLData::~CvMLData()
{
clear();
}
void CvMLData::free_train_test_idx()
{
cvReleaseMat( &train_sample_idx );
cvReleaseMat( &test_sample_idx );
sample_idx = 0;
}
void CvMLData::clear()
{
class_map.clear();
cvReleaseMat( &values );
cvReleaseMat( &missing );
cvReleaseMat( &var_types );
cvReleaseMat( &var_idx_mask );
cvReleaseMat( &response_out );
cvReleaseMat( &var_idx_out );
cvReleaseMat( &var_types_out );
free_train_test_idx();
total_class_count = 0;
response_idx = -1;
train_sample_count = -1;
}
void CvMLData::set_header_lines_number( int idx )
{
header_lines_number = std::max(0, idx);
}
int CvMLData::get_header_lines_number() const
{
return header_lines_number;
}
static char *fgets_chomp(char *str, int n, FILE *stream)
{
char *head = fgets(str, n, stream);
if( head )
{
for(char *tail = head + strlen(head) - 1; tail >= head; --tail)
{
if( *tail != '\r' && *tail != '\n' )
break;
*tail = '\0';
}
}
return head;
}
int CvMLData::read_csv(const char* filename)
{
const int M = 1000000;
const char str_delimiter[3] = { ' ', delimiter, '\0' };
FILE* file = 0;
CvMemStorage* storage;
CvSeq* seq;
char *ptr;
float* el_ptr;
CvSeqReader reader;
int cols_count = 0;
uchar *var_types_ptr = 0;
clear();
file = fopen( filename, "rt" );
if( !file )
return -1;
std::vector<char> _buf(M);
char* buf = &_buf[0];
// skip header lines
for( int i = 0; i < header_lines_number; i++ )
{
if( fgets( buf, M, file ) == 0 )
{
fclose(file);
return -1;
}
}
// read the first data line and determine the number of variables
if( !fgets_chomp( buf, M, file ))
{
fclose(file);
return -1;
}
ptr = buf;
while( *ptr == ' ' )
ptr++;
for( ; *ptr != '\0'; )
{
if(*ptr == delimiter || *ptr == ' ')
{
cols_count++;
ptr++;
while( *ptr == ' ' ) ptr++;
}
else
ptr++;
}
cols_count++;
if ( cols_count == 0)
{
fclose(file);
return -1;
}
// create temporary memory storage to store the whole database
el_ptr = new float[cols_count];
storage = cvCreateMemStorage();
seq = cvCreateSeq( 0, sizeof(*seq), cols_count*sizeof(float), storage );
var_types = cvCreateMat( 1, cols_count, CV_8U );
cvZero( var_types );
var_types_ptr = var_types->data.ptr;
for(;;)
{
char *token = NULL;
int type;
token = strtok(buf, str_delimiter);
if (!token)
break;
for (int i = 0; i < cols_count-1; i++)
{
str_to_flt_elem( token, el_ptr[i], type);
var_types_ptr[i] |= type;
token = strtok(NULL, str_delimiter);
if (!token)
{
fclose(file);
delete [] el_ptr;
return -1;
}
}
str_to_flt_elem( token, el_ptr[cols_count-1], type);
var_types_ptr[cols_count-1] |= type;
cvSeqPush( seq, el_ptr );
if( !fgets_chomp( buf, M, file ) )
break;
}
fclose(file);
values = cvCreateMat( seq->total, cols_count, CV_32FC1 );
missing = cvCreateMat( seq->total, cols_count, CV_8U );
var_idx_mask = cvCreateMat( 1, values->cols, CV_8UC1 );
cvSet( var_idx_mask, cvRealScalar(1) );
train_sample_count = seq->total;
cvStartReadSeq( seq, &reader );
for(int i = 0; i < seq->total; i++ )
{
const float* sdata = (float*)reader.ptr;
float* ddata = values->data.fl + cols_count*i;
uchar* dm = missing->data.ptr + cols_count*i;
for( int j = 0; j < cols_count; j++ )
{
ddata[j] = sdata[j];
dm[j] = ( fabs( MISS_VAL - sdata[j] ) <= FLT_EPSILON );
}
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
}
if ( cvNorm( missing, 0, CV_L1 ) <= FLT_EPSILON )
cvReleaseMat( &missing );
cvReleaseMemStorage( &storage );
delete []el_ptr;
return 0;
}
const CvMat* CvMLData::get_values() const
{
return values;
}
const CvMat* CvMLData::get_missing() const
{
CV_FUNCNAME( "CvMLData::get_missing" );
__BEGIN__;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
__END__;
return missing;
}
const std::map<cv::String, int>& CvMLData::get_class_labels_map() const
{
return class_map;
}
void CvMLData::str_to_flt_elem( const char* token, float& flt_elem, int& type)
{
char* stopstring = NULL;
flt_elem = (float)strtod( token, &stopstring );
assert( stopstring );
type = CV_VAR_ORDERED;
if ( *stopstring == miss_ch && strlen(stopstring) == 1 ) // missed value
{
flt_elem = MISS_VAL;
type = CV_VAR_MISS;
}
else
{
if ( (*stopstring != 0) && (*stopstring != '\n') && (strcmp(stopstring, "\r\n") != 0) ) // class label
{
int idx = class_map[token];
if ( idx == 0)
{
total_class_count++;
idx = total_class_count;
class_map[token] = idx;
}
flt_elem = (float)idx;
type = CV_VAR_CATEGORICAL;
}
}
}
void CvMLData::set_delimiter(char ch)
{
CV_FUNCNAME( "CvMLData::set_delimited" );
__BEGIN__;
if (ch == miss_ch /*|| ch == flt_separator*/)
CV_ERROR(CV_StsBadArg, "delimited, miss_character and flt_separator must be different");
delimiter = ch;
__END__;
}
char CvMLData::get_delimiter() const
{
return delimiter;
}
void CvMLData::set_miss_ch(char ch)
{
CV_FUNCNAME( "CvMLData::set_miss_ch" );
__BEGIN__;
if (ch == delimiter/* || ch == flt_separator*/)
CV_ERROR(CV_StsBadArg, "delimited, miss_character and flt_separator must be different");
miss_ch = ch;
__END__;
}
char CvMLData::get_miss_ch() const
{
return miss_ch;
}
void CvMLData::set_response_idx( int idx )
{
CV_FUNCNAME( "CvMLData::set_response_idx" );
__BEGIN__;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
if ( idx >= values->cols)
CV_ERROR( CV_StsBadArg, "idx value is not correct" );
if ( response_idx >= 0 )
chahge_var_idx( response_idx, true );
if ( idx >= 0 )
chahge_var_idx( idx, false );
response_idx = idx;
__END__;
}
int CvMLData::get_response_idx() const
{
CV_FUNCNAME( "CvMLData::get_response_idx" );
__BEGIN__;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
__END__;
return response_idx;
}
void CvMLData::change_var_type( int var_idx, int type )
{
CV_FUNCNAME( "CvMLData::change_var_type" );
__BEGIN__;
int var_count = 0;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
var_count = values->cols;
if ( var_idx < 0 || var_idx >= var_count)
CV_ERROR( CV_StsBadArg, "var_idx is not correct" );
if ( type != CV_VAR_ORDERED && type != CV_VAR_CATEGORICAL)
CV_ERROR( CV_StsBadArg, "type is not correct" );
assert( var_types );
if ( var_types->data.ptr[var_idx] == CV_VAR_CATEGORICAL && type == CV_VAR_ORDERED)
CV_ERROR( CV_StsBadArg, "it`s impossible to assign CV_VAR_ORDERED type to categorical variable" );
var_types->data.ptr[var_idx] = (uchar)type;
__END__;
return;
}
void CvMLData::set_var_types( const char* str )
{
CV_FUNCNAME( "CvMLData::set_var_types" );
__BEGIN__;
const char* ord = 0, *cat = 0;
int var_count = 0, set_var_type_count = 0;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
var_count = values->cols;
assert( var_types );
ord = strstr( str, "ord" );
cat = strstr( str, "cat" );
if ( !ord && !cat )
CV_ERROR( CV_StsBadArg, "types string is not correct" );
if ( !ord && strlen(cat) == 3 ) // str == "cat"
{
cvSet( var_types, cvScalarAll(CV_VAR_CATEGORICAL) );
return;
}
if ( !cat && strlen(ord) == 3 ) // str == "ord"
{
cvSet( var_types, cvScalarAll(CV_VAR_ORDERED) );
return;
}
if ( ord ) // parse ord str
{
char* stopstring = NULL;
if ( ord[3] != '[')
CV_ERROR( CV_StsBadArg, "types string is not correct" );
ord += 4; // pass "ord["
do
{
int b1 = (int)strtod( ord, &stopstring );
if ( *stopstring == 0 || (*stopstring != ',' && *stopstring != ']' && *stopstring != '-') )
CV_ERROR( CV_StsBadArg, "types string is not correct" );
ord = stopstring + 1;
if ( (stopstring[0] == ',') || (stopstring[0] == ']'))
{
if ( var_types->data.ptr[b1] == CV_VAR_CATEGORICAL)
CV_ERROR( CV_StsBadArg, "it`s impossible to assign CV_VAR_ORDERED type to categorical variable" );
var_types->data.ptr[b1] = CV_VAR_ORDERED;
set_var_type_count++;
}
else
{
if ( stopstring[0] == '-')
{
int b2 = (int)strtod( ord, &stopstring);
if ( (*stopstring == 0) || (*stopstring != ',' && *stopstring != ']') )
CV_ERROR( CV_StsBadArg, "types string is not correct" );
ord = stopstring + 1;
for (int i = b1; i <= b2; i++)
{
if ( var_types->data.ptr[i] == CV_VAR_CATEGORICAL)
CV_ERROR( CV_StsBadArg, "it`s impossible to assign CV_VAR_ORDERED type to categorical variable" );
var_types->data.ptr[i] = CV_VAR_ORDERED;
}
set_var_type_count += b2 - b1 + 1;
}
else
CV_ERROR( CV_StsBadArg, "types string is not correct" );
}
}
while (*stopstring != ']');
if ( stopstring[1] != '\0' && stopstring[1] != ',')
CV_ERROR( CV_StsBadArg, "types string is not correct" );
}
if ( cat ) // parse cat str
{
char* stopstring = NULL;
if ( cat[3] != '[')
CV_ERROR( CV_StsBadArg, "types string is not correct" );
cat += 4; // pass "cat["
do
{
int b1 = (int)strtod( cat, &stopstring );
if ( *stopstring == 0 || (*stopstring != ',' && *stopstring != ']' && *stopstring != '-') )
CV_ERROR( CV_StsBadArg, "types string is not correct" );
cat = stopstring + 1;
if ( (stopstring[0] == ',') || (stopstring[0] == ']'))
{
var_types->data.ptr[b1] = CV_VAR_CATEGORICAL;
set_var_type_count++;
}
else
{
if ( stopstring[0] == '-')
{
int b2 = (int)strtod( cat, &stopstring);
if ( (*stopstring == 0) || (*stopstring != ',' && *stopstring != ']') )
CV_ERROR( CV_StsBadArg, "types string is not correct" );
cat = stopstring + 1;
for (int i = b1; i <= b2; i++)
var_types->data.ptr[i] = CV_VAR_CATEGORICAL;
set_var_type_count += b2 - b1 + 1;
}
else
CV_ERROR( CV_StsBadArg, "types string is not correct" );
}
}
while (*stopstring != ']');
if ( stopstring[1] != '\0' && stopstring[1] != ',')
CV_ERROR( CV_StsBadArg, "types string is not correct" );
}
if (set_var_type_count != var_count)
CV_ERROR( CV_StsBadArg, "types string is not correct" );
__END__;
}
const CvMat* CvMLData::get_var_types()
{
CV_FUNCNAME( "CvMLData::get_var_types" );
__BEGIN__;
uchar *var_types_out_ptr = 0;
int avcount, vt_size;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
assert( var_idx_mask );
avcount = cvFloor( cvNorm( var_idx_mask, 0, CV_L1 ) );
vt_size = avcount + (response_idx >= 0);
if ( avcount == values->cols || (avcount == values->cols-1 && response_idx == values->cols-1) )
return var_types;
if ( !var_types_out || ( var_types_out && var_types_out->cols != vt_size ) )
{
cvReleaseMat( &var_types_out );
var_types_out = cvCreateMat( 1, vt_size, CV_8UC1 );
}
var_types_out_ptr = var_types_out->data.ptr;
for( int i = 0; i < var_types->cols; i++)
{
if (i == response_idx || !var_idx_mask->data.ptr[i]) continue;
*var_types_out_ptr = var_types->data.ptr[i];
var_types_out_ptr++;
}
if ( response_idx >= 0 )
*var_types_out_ptr = var_types->data.ptr[response_idx];
__END__;
return var_types_out;
}
int CvMLData::get_var_type( int var_idx ) const
{
return var_types->data.ptr[var_idx];
}
const CvMat* CvMLData::get_responses()
{
CV_FUNCNAME( "CvMLData::get_responses_ptr" );
__BEGIN__;
int var_count = 0;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
var_count = values->cols;
if ( response_idx < 0 || response_idx >= var_count )
return 0;
if ( !response_out )
response_out = cvCreateMatHeader( values->rows, 1, CV_32FC1 );
else
cvInitMatHeader( response_out, values->rows, 1, CV_32FC1);
cvGetCol( values, response_out, response_idx );
__END__;
return response_out;
}
void CvMLData::set_train_test_split( const CvTrainTestSplit * spl)
{
CV_FUNCNAME( "CvMLData::set_division" );
__BEGIN__;
int sample_count = 0;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
sample_count = values->rows;
float train_sample_portion;
if (spl->train_sample_part_mode == CV_COUNT)
{
train_sample_count = spl->train_sample_part.count;
if (train_sample_count > sample_count)
CV_ERROR( CV_StsBadArg, "train samples count is not correct" );
train_sample_count = train_sample_count<=0 ? sample_count : train_sample_count;
}
else // dtype.train_sample_part_mode == CV_PORTION
{
train_sample_portion = spl->train_sample_part.portion;
if ( train_sample_portion > 1)
CV_ERROR( CV_StsBadArg, "train samples count is not correct" );
train_sample_portion = train_sample_portion <= FLT_EPSILON ||
1 - train_sample_portion <= FLT_EPSILON ? 1 : train_sample_portion;
train_sample_count = std::max(1, cvFloor( train_sample_portion * sample_count ));
}
if ( train_sample_count == sample_count )
{
free_train_test_idx();
return;
}
if ( train_sample_idx && train_sample_idx->cols != train_sample_count )
free_train_test_idx();
if ( !sample_idx)
{
int test_sample_count = sample_count- train_sample_count;
sample_idx = (int*)cvAlloc( sample_count * sizeof(sample_idx[0]) );
for (int i = 0; i < sample_count; i++ )
sample_idx[i] = i;
train_sample_idx = cvCreateMatHeader( 1, train_sample_count, CV_32SC1 );
*train_sample_idx = cvMat( 1, train_sample_count, CV_32SC1, &sample_idx[0] );
CV_Assert(test_sample_count > 0);
test_sample_idx = cvCreateMatHeader( 1, test_sample_count, CV_32SC1 );
*test_sample_idx = cvMat( 1, test_sample_count, CV_32SC1, &sample_idx[train_sample_count] );
}
mix = spl->mix;
if ( mix )
mix_train_and_test_idx();
__END__;
}
const CvMat* CvMLData::get_train_sample_idx() const
{
CV_FUNCNAME( "CvMLData::get_train_sample_idx" );
__BEGIN__;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
__END__;
return train_sample_idx;
}
const CvMat* CvMLData::get_test_sample_idx() const
{
CV_FUNCNAME( "CvMLData::get_test_sample_idx" );
__BEGIN__;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
__END__;
return test_sample_idx;
}
void CvMLData::mix_train_and_test_idx()
{
CV_FUNCNAME( "CvMLData::mix_train_and_test_idx" );
__BEGIN__;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
__END__;
if ( !sample_idx)
return;
if ( train_sample_count > 0 && train_sample_count < values->rows )
{
int n = values->rows;
for (int i = 0; i < n; i++)
{
int a = (*rng)(n);
int b = (*rng)(n);
int t;
CV_SWAP( sample_idx[a], sample_idx[b], t );
}
}
}
const CvMat* CvMLData::get_var_idx()
{
CV_FUNCNAME( "CvMLData::get_var_idx" );
__BEGIN__;
int avcount = 0;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
assert( var_idx_mask );
avcount = cvFloor( cvNorm( var_idx_mask, 0, CV_L1 ) );
int* vidx;
if ( avcount == values->cols )
return 0;
if ( !var_idx_out || ( var_idx_out && var_idx_out->cols != avcount ) )
{
cvReleaseMat( &var_idx_out );
var_idx_out = cvCreateMat( 1, avcount, CV_32SC1);
if ( response_idx >=0 )
var_idx_mask->data.ptr[response_idx] = 0;
}
vidx = var_idx_out->data.i;
for(int i = 0; i < var_idx_mask->cols; i++)
if ( var_idx_mask->data.ptr[i] )
{
*vidx = i;
vidx++;
}
__END__;
return var_idx_out;
}
void CvMLData::chahge_var_idx( int vi, bool state )
{
change_var_idx( vi, state );
}
void CvMLData::change_var_idx( int vi, bool state )
{
CV_FUNCNAME( "CvMLData::change_var_idx" );
__BEGIN__;
int var_count = 0;
if ( !values )
CV_ERROR( CV_StsInternal, "data is empty" );
var_count = values->cols;
if ( vi < 0 || vi >= var_count)
CV_ERROR( CV_StsBadArg, "variable index is not correct" );
assert( var_idx_mask );
var_idx_mask->data.ptr[vi] = state;
__END__;
}
/* End of file. */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,376 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#include "opencv2/core.hpp"
#include "old_ml.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include <assert.h>
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#define ML_IMPL CV_IMPL
#define __BEGIN__ __CV_BEGIN__
#define __END__ __CV_END__
#define EXIT __CV_EXIT__
#define CV_MAT_ELEM_FLAG( mat, type, comp, vect, tflag ) \
(( tflag == CV_ROW_SAMPLE ) \
? (CV_MAT_ELEM( mat, type, comp, vect )) \
: (CV_MAT_ELEM( mat, type, vect, comp )))
/* Convert matrix to vector */
#define ICV_MAT2VEC( mat, vdata, vstep, num ) \
if( MIN( (mat).rows, (mat).cols ) != 1 ) \
CV_ERROR( CV_StsBadArg, "" ); \
(vdata) = ((mat).data.ptr); \
if( (mat).rows == 1 ) \
{ \
(vstep) = CV_ELEM_SIZE( (mat).type ); \
(num) = (mat).cols; \
} \
else \
{ \
(vstep) = (mat).step; \
(num) = (mat).rows; \
}
/* get raw data */
#define ICV_RAWDATA( mat, flags, rdata, sstep, cstep, m, n ) \
(rdata) = (mat).data.ptr; \
if( CV_IS_ROW_SAMPLE( flags ) ) \
{ \
(sstep) = (mat).step; \
(cstep) = CV_ELEM_SIZE( (mat).type ); \
(m) = (mat).rows; \
(n) = (mat).cols; \
} \
else \
{ \
(cstep) = (mat).step; \
(sstep) = CV_ELEM_SIZE( (mat).type ); \
(n) = (mat).rows; \
(m) = (mat).cols; \
}
#define ICV_IS_MAT_OF_TYPE( mat, mat_type) \
(CV_IS_MAT( mat ) && CV_MAT_TYPE( mat->type ) == (mat_type) && \
(mat)->cols > 0 && (mat)->rows > 0)
/*
uchar* data; int sstep, cstep; - trainData->data
uchar* classes; int clstep; int ncl;- trainClasses
uchar* tmask; int tmstep; int ntm; - typeMask
uchar* missed;int msstep, mcstep; -missedMeasurements...
int mm, mn; == m,n == size,dim
uchar* sidx;int sistep; - sampleIdx
uchar* cidx;int cistep; - compIdx
int k, l; == n,m == dim,size (length of cidx, sidx)
int m, n; == size,dim
*/
#define ICV_DECLARE_TRAIN_ARGS() \
uchar* data; \
int sstep, cstep; \
uchar* classes; \
int clstep; \
int ncl; \
uchar* tmask; \
int tmstep; \
int ntm; \
uchar* missed; \
int msstep, mcstep; \
int mm, mn; \
uchar* sidx; \
int sistep; \
uchar* cidx; \
int cistep; \
int k, l; \
int m, n; \
\
data = classes = tmask = missed = sidx = cidx = NULL; \
sstep = cstep = clstep = ncl = tmstep = ntm = msstep = mcstep = mm = mn = 0; \
sistep = cistep = k = l = m = n = 0;
#define ICV_TRAIN_DATA_REQUIRED( param, flags ) \
if( !ICV_IS_MAT_OF_TYPE( (param), CV_32FC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_RAWDATA( *(param), (flags), data, sstep, cstep, m, n ); \
k = n; \
l = m; \
}
#define ICV_TRAIN_CLASSES_REQUIRED( param ) \
if( !ICV_IS_MAT_OF_TYPE( (param), CV_32FC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_MAT2VEC( *(param), classes, clstep, ncl ); \
if( m != ncl ) \
{ \
CV_ERROR( CV_StsBadArg, "Unmatched sizes" ); \
} \
}
#define ICV_ARG_NULL( param ) \
if( (param) != NULL ) \
{ \
CV_ERROR( CV_StsBadArg, #param " parameter must be NULL" ); \
}
#define ICV_MISSED_MEASUREMENTS_OPTIONAL( param, flags ) \
if( param ) \
{ \
if( !ICV_IS_MAT_OF_TYPE( param, CV_8UC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_RAWDATA( *(param), (flags), missed, msstep, mcstep, mm, mn ); \
if( mm != m || mn != n ) \
{ \
CV_ERROR( CV_StsBadArg, "Unmatched sizes" ); \
} \
} \
}
#define ICV_COMP_IDX_OPTIONAL( param ) \
if( param ) \
{ \
if( !ICV_IS_MAT_OF_TYPE( param, CV_32SC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_MAT2VEC( *(param), cidx, cistep, k ); \
if( k > n ) \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
}
#define ICV_SAMPLE_IDX_OPTIONAL( param ) \
if( param ) \
{ \
if( !ICV_IS_MAT_OF_TYPE( param, CV_32SC1 ) ) \
{ \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
else \
{ \
ICV_MAT2VEC( *sampleIdx, sidx, sistep, l ); \
if( l > m ) \
CV_ERROR( CV_StsBadArg, "Invalid " #param " parameter" ); \
} \
}
/****************************************************************************************/
#define ICV_CONVERT_FLOAT_ARRAY_TO_MATRICE( array, matrice ) \
{ \
CvMat a, b; \
int dims = (matrice)->cols; \
int nsamples = (matrice)->rows; \
int type = CV_MAT_TYPE((matrice)->type); \
int i, offset = dims; \
\
CV_ASSERT( type == CV_32FC1 || type == CV_64FC1 ); \
offset *= ((type == CV_32FC1) ? sizeof(float) : sizeof(double));\
\
b = cvMat( 1, dims, CV_32FC1 ); \
cvGetRow( matrice, &a, 0 ); \
for( i = 0; i < nsamples; i++, a.data.ptr += offset ) \
{ \
b.data.fl = (float*)array[i]; \
CV_CALL( cvConvert( &b, &a ) ); \
} \
}
/****************************************************************************************\
* Auxiliary functions declarations *
\****************************************************************************************/
/* Generates a set of classes centers in quantity <num_of_clusters> that are generated as
uniform random vectors in parallelepiped, where <data> is concentrated. Vectors in
<data> should have horizontal orientation. If <centers> != NULL, the function doesn't
allocate any memory and stores generated centers in <centers>, returns <centers>.
If <centers> == NULL, the function allocates memory and creates the matrice. Centers
are supposed to be oriented horizontally. */
CvMat* icvGenerateRandomClusterCenters( int seed,
const CvMat* data,
int num_of_clusters,
CvMat* centers CV_DEFAULT(0));
/* Fills the <labels> using <probs> by choosing the maximal probability. Outliers are
fixed by <oulier_tresh> and have cluster label (-1). Function also controls that there
weren't "empty" clusters by filling empty clusters with the maximal probability vector.
If probs_sums != NULL, filles it with the sums of probabilities for each sample (it is
useful for normalizing probabilities' matrice of FCM) */
void icvFindClusterLabels( const CvMat* probs, float outlier_thresh, float r,
const CvMat* labels );
typedef struct CvSparseVecElem32f
{
int idx;
float val;
}
CvSparseVecElem32f;
/* Prepare training data and related parameters */
#define CV_TRAIN_STATMODEL_DEFRAGMENT_TRAIN_DATA 1
#define CV_TRAIN_STATMODEL_SAMPLES_AS_ROWS 2
#define CV_TRAIN_STATMODEL_SAMPLES_AS_COLUMNS 4
#define CV_TRAIN_STATMODEL_CATEGORICAL_RESPONSE 8
#define CV_TRAIN_STATMODEL_ORDERED_RESPONSE 16
#define CV_TRAIN_STATMODEL_RESPONSES_ON_OUTPUT 32
#define CV_TRAIN_STATMODEL_ALWAYS_COPY_TRAIN_DATA 64
#define CV_TRAIN_STATMODEL_SPARSE_AS_SPARSE 128
int
cvPrepareTrainData( const char* /*funcname*/,
const CvMat* train_data, int tflag,
const CvMat* responses, int response_type,
const CvMat* var_idx,
const CvMat* sample_idx,
bool always_copy_data,
const float*** out_train_samples,
int* _sample_count,
int* _var_count,
int* _var_all,
CvMat** out_responses,
CvMat** out_response_map,
CvMat** out_var_idx,
CvMat** out_sample_idx=0 );
void
cvSortSamplesByClasses( const float** samples, const CvMat* classes,
int* class_ranges, const uchar** mask CV_DEFAULT(0) );
void
cvCombineResponseMaps (CvMat* _responses,
const CvMat* old_response_map,
CvMat* new_response_map,
CvMat** out_response_map);
void
cvPreparePredictData( const CvArr* sample, int dims_all, const CvMat* comp_idx,
int class_count, const CvMat* prob, float** row_sample,
int as_sparse CV_DEFAULT(0) );
/* copies clustering [or batch "predict"] results
(labels and/or centers and/or probs) back to the output arrays */
void
cvWritebackLabels( const CvMat* labels, CvMat* dst_labels,
const CvMat* centers, CvMat* dst_centers,
const CvMat* probs, CvMat* dst_probs,
const CvMat* sample_idx, int samples_all,
const CvMat* comp_idx, int dims_all );
#define cvWritebackResponses cvWritebackLabels
#define XML_FIELD_NAME "_name"
CvFileNode* icvFileNodeGetChild(CvFileNode* father, const char* name);
CvFileNode* icvFileNodeGetChildArrayElem(CvFileNode* father, const char* name,int index);
CvFileNode* icvFileNodeGetNext(CvFileNode* n, const char* name);
void cvCheckTrainData( const CvMat* train_data, int tflag,
const CvMat* missing_mask,
int* var_all, int* sample_all );
CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false );
CvMat* cvPreprocessVarType( const CvMat* type_mask, const CvMat* var_idx,
int var_all, int* response_type );
CvMat* cvPreprocessOrderedResponses( const CvMat* responses,
const CvMat* sample_idx, int sample_all );
CvMat* cvPreprocessCategoricalResponses( const CvMat* responses,
const CvMat* sample_idx, int sample_all,
CvMat** out_response_map, CvMat** class_counts=0 );
const float** cvGetTrainSamples( const CvMat* train_data, int tflag,
const CvMat* var_idx, const CvMat* sample_idx,
int* _var_count, int* _sample_count,
bool always_copy_data=false );
namespace cv
{
struct DTreeBestSplitFinder
{
DTreeBestSplitFinder(){ splitSize = 0, tree = 0; node = 0; }
DTreeBestSplitFinder( CvDTree* _tree, CvDTreeNode* _node);
DTreeBestSplitFinder( const DTreeBestSplitFinder& finder, Split );
virtual ~DTreeBestSplitFinder() {}
virtual void operator()(const BlockedRange& range);
void join( DTreeBestSplitFinder& rhs );
Ptr<CvDTreeSplit> bestSplit;
Ptr<CvDTreeSplit> split;
int splitSize;
CvDTree* tree;
CvDTreeNode* node;
};
struct ForestTreeBestSplitFinder : DTreeBestSplitFinder
{
ForestTreeBestSplitFinder() : DTreeBestSplitFinder() {}
ForestTreeBestSplitFinder( CvForestTree* _tree, CvDTreeNode* _node );
ForestTreeBestSplitFinder( const ForestTreeBestSplitFinder& finder, Split );
virtual void operator()(const BlockedRange& range);
};
}
#endif /* __ML_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,4 @@
#include "opencv2/core.hpp"
#include "cv.h"
#include "cascadeclassifier.h"
using namespace std;

View File

@ -2,9 +2,6 @@
#define _OPENCV_FEATURES_H_
#include "imagestorage.h"
#include "cxcore.h"
#include "cv.h"
#include "ml.h"
#include <stdio.h>
#define FEATURES "features"

View File

@ -135,7 +135,7 @@ public:
virtual Mat getCatMap() const = 0;
virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
virtual void setTrainTestSplitRatio(float ratio, bool shuffle=true) = 0;
virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
virtual void shuffleTrainTest() = 0;
static Mat getSubVector(const Mat& vec, const Mat& idx);
@ -156,7 +156,6 @@ class CV_EXPORTS_W StatModel : public Algorithm
{
public:
enum { UPDATE_MODEL = 1, RAW_OUTPUT=1, COMPRESSED_INPUT=2, PREPROCESSED_INPUT=4 };
virtual ~StatModel();
virtual void clear();
virtual int getVarCount() const = 0;
@ -164,16 +163,30 @@ public:
virtual bool isTrained() const = 0;
virtual bool isClassifier() const = 0;
virtual bool train( const Ptr<TrainData>& trainData, int flags=0 ) = 0;
virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
virtual bool train( InputArray samples, int layout, InputArray responses );
virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
template<typename _Tp> static Ptr<_Tp> load(const String& filename)
{
FileStorage fs(filename, FileStorage::READ);
Ptr<_Tp> p = _Tp::create();
p->read(fs.getFirstTopLevelNode());
return p->isTrained() ? p : Ptr<_Tp>();
Ptr<_Tp> model = _Tp::create();
model->read(fs.getFirstTopLevelNode());
return model->isTrained() ? model : Ptr<_Tp>();
}
template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, const typename _Tp::Params& p, int flags=0)
{
Ptr<_Tp> model = _Tp::create(p);
return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
}
template<typename _Tp> static Ptr<_Tp> train(InputArray samples, int layout, InputArray responses,
const typename _Tp::Params& p, int flags=0)
{
Ptr<_Tp> model = _Tp::create(p);
return !model.empty() && model->train(TrainData::create(samples, layout, responses), flags) ? model : Ptr<_Tp>();
}
virtual void save(const String& filename) const;
@ -192,11 +205,17 @@ public:
class CV_EXPORTS_W NormalBayesClassifier : public StatModel
{
public:
virtual ~NormalBayesClassifier();
class CV_EXPORTS_W_MAP Params
{
public:
Params();
};
virtual float predictProb( InputArray inputs, OutputArray outputs,
OutputArray outputProbs, int flags=0 ) const = 0;
virtual void setParams(const Params& params) = 0;
virtual Params getParams() const = 0;
static Ptr<NormalBayesClassifier> create();
static Ptr<NormalBayesClassifier> create(const Params& params=Params());
};
/****************************************************************************************\
@ -207,13 +226,21 @@ public:
class CV_EXPORTS_W KNearest : public StatModel
{
public:
virtual void setDefaultK(int k) = 0;
virtual int getDefaultK() const = 0;
class CV_EXPORTS_W_MAP Params
{
public:
Params(int defaultK=10, bool isclassifier=true);
int defaultK;
bool isclassifier;
};
virtual void setParams(const Params& p) = 0;
virtual Params getParams() const = 0;
virtual float findNearest( InputArray samples, int k,
OutputArray results,
OutputArray neighborResponses=noArray(),
OutputArray dist=noArray() ) const = 0;
static Ptr<KNearest> create(bool isclassifier=true);
static Ptr<KNearest> create(const Params& params=Params());
};
/****************************************************************************************\
@ -247,7 +274,6 @@ public:
class CV_EXPORTS Kernel : public Algorithm
{
public:
virtual ~Kernel();
virtual int getType() const = 0;
virtual void calc( int vcount, int n, const float* vecs, const float* another, float* results ) = 0;
};
@ -261,8 +287,6 @@ public:
// SVM params type
enum { C=0, GAMMA=1, P=2, NU=3, COEF=4, DEGREE=5 };
virtual ~SVM();
virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C),
ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA),
@ -399,8 +423,6 @@ public:
int subsetOfs;
};
virtual ~DTrees();
virtual void setDParams(const Params& p);
virtual Params getDParams() const;
@ -464,7 +486,6 @@ public:
// Boosting type
enum { DISCRETE=0, REAL=1, LOGIT=2, GENTLE=3 };
virtual ~Boost();
virtual Params getBParams() const = 0;
virtual void setBParams(const Params& p) = 0;
@ -491,7 +512,6 @@ public:
};
enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
virtual ~GBTrees();
virtual void setK(int k) = 0;
@ -513,10 +533,16 @@ public:
struct CV_EXPORTS_W_MAP Params
{
Params();
Params( TermCriteria termCrit, int trainMethod, double param1, double param2=0 );
Params( const Mat& layerSizes, int activateFunc, double fparam1, double fparam2,
TermCriteria termCrit, int trainMethod, double param1, double param2=0 );
enum { BACKPROP=0, RPROP=1 };
CV_PROP_RW Mat layerSizes;
CV_PROP_RW int activateFunc;
CV_PROP_RW double fparam1;
CV_PROP_RW double fparam2;
CV_PROP_RW TermCriteria termCrit;
CV_PROP_RW int trainMethod;
@ -527,23 +553,17 @@ public:
CV_PROP_RW double rpDW0, rpDWPlus, rpDWMinus, rpDWMin, rpDWMax;
};
virtual ~ANN_MLP();
// possible activation functions
enum { IDENTITY = 0, SIGMOID_SYM = 1, GAUSSIAN = 2 };
// available training flags
enum { UPDATE_WEIGHTS = 1, NO_INPUT_SCALE = 2, NO_OUTPUT_SCALE = 4 };
virtual Mat getLayerSizes() const = 0;
virtual Mat getWeights(int layerIdx) const = 0;
virtual void setParams(const Params& p) = 0;
virtual Params getParams() const = 0;
static Ptr<ANN_MLP> create(InputArray layerSizes=noArray(),
const Params& params=Params(),
int activateFunc=ANN_MLP::SIGMOID_SYM,
double fparam1=0, double fparam2=0);
static Ptr<ANN_MLP> create(const Params& params=Params());
};
/****************************************************************************************\

View File

@ -42,10 +42,11 @@
namespace cv { namespace ml {
ANN_MLP::~ANN_MLP() {}
ANN_MLP::Params::Params()
{
layerSizes = Mat();
activateFunc = SIGMOID_SYM;
fparam1 = fparam2 = 0;
termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
trainMethod = RPROP;
bpDWScale = bpMomentScale = 0.1;
@ -54,8 +55,13 @@ ANN_MLP::Params::Params()
}
ANN_MLP::Params::Params( TermCriteria _termCrit, int _trainMethod, double _param1, double _param2 )
ANN_MLP::Params::Params( const Mat& _layerSizes, int _activateFunc, double _fparam1, double _fparam2,
TermCriteria _termCrit, int _trainMethod, double _param1, double _param2 )
{
layerSizes = _layerSizes;
activateFunc = _activateFunc;
fparam1 = _fparam1;
fparam2 = _fparam2;
termCrit = _termCrit;
trainMethod = _trainMethod;
bpDWScale = bpMomentScale = 0.1;
@ -95,15 +101,25 @@ public:
clear();
}
ANN_MLPImpl( const Mat& _layer_sizes, int _activ_func,
double _f_param1, double _f_param2 )
ANN_MLPImpl( const Params& p )
{
clear();
create( _layer_sizes, _activ_func, _f_param1, _f_param2 );
setParams(p);
}
virtual ~ANN_MLPImpl() {}
void setParams(const Params& p)
{
params = p;
create( params.layerSizes );
set_activ_func( params.activateFunc, params.fparam1, params.fparam2 );
}
Params getParams() const
{
return params;
}
void clear()
{
min_val = max_val = min_val1 = max_val1 = 0.;
@ -183,16 +199,13 @@ public:
}
}
void create( InputArray _layer_sizes, int _activ_func,
double _f_param1, double _f_param2 )
void create( InputArray _layer_sizes )
{
clear();
_layer_sizes.copyTo(layer_sizes);
int l_count = layer_count();
set_activ_func( _activ_func, _f_param1, _f_param2 );
weights.resize(l_count + 2);
max_lsize = 0;
@ -665,16 +678,6 @@ public:
calc_output_scale( outputs, flags );
}
void setParams( const Params& _params )
{
params = _params;
}
Params getParams() const
{
return params;
}
bool train( const Ptr<TrainData>& trainData, int flags )
{
const int MAX_ITER = 1000;
@ -1240,7 +1243,7 @@ public:
vector<int> _layer_sizes;
fn["layer_sizes"] >> _layer_sizes;
create( _layer_sizes, SIGMOID_SYM, 0, 0 );
create( _layer_sizes );
int i, l_count = layer_count();
read_params(fn);
@ -1307,15 +1310,9 @@ public:
};
Ptr<ANN_MLP> ANN_MLP::create(InputArray _layerSizes,
const ANN_MLP::Params& params,
int activateFunc,
double fparam1, double fparam2)
Ptr<ANN_MLP> ANN_MLP::create(const ANN_MLP::Params& params)
{
Mat layerSizes = _layerSizes.getMat();
Ptr<ANN_MLPImpl> ann = makePtr<ANN_MLPImpl>(layerSizes, activateFunc, fparam1, fparam2);
ann->setParams(params);
Ptr<ANN_MLPImpl> ann = makePtr<ANN_MLPImpl>(params);
return ann;
}

View File

@ -54,8 +54,6 @@ log_ratio( double val )
}
Boost::~Boost() {}
Boost::Params::Params()
{
boostType = Boost::REAL;
@ -106,6 +104,7 @@ public:
void startTraining( const Ptr<TrainData>& trainData, int flags )
{
DTreesImpl::startTraining(trainData, flags);
sumResult.assign(w->sidx.size(), 0.);
if( bparams.boostType != Boost::DISCRETE )
{
@ -114,14 +113,10 @@ public:
w->ord_responses.resize(n);
double a = -1, b = 1;
if( bparams.boostType == Boost::REAL )
a = 0;
else if( bparams.boostType == Boost::LOGIT )
if( bparams.boostType == Boost::LOGIT )
{
sumResult.assign(w->sidx.size(), 0.);
a = -2, b = 2;
}
for( i = 0; i < n; i++ )
w->ord_responses[i] = w->cat_responses[i] > 0 ? b : a;
}
@ -197,7 +192,7 @@ public:
}
else if( bparams.boostType == Boost::REAL )
{
double p = node->value;
double p = (node->value+1)*0.5;
node->value = 0.5*log_ratio(p);
}
}
@ -227,7 +222,7 @@ public:
{
int i, n = (int)w->sidx.size();
int nvars = (int)varIdx.size();
double sumw = 0.;
double sumw = 0., C = 1.;
cv::AutoBuffer<double> buf(n*3 + nvars);
double* result = buf;
float* sbuf = (float*)(result + n*3);
@ -261,7 +256,7 @@ public:
if( sumw != 0 )
err /= sumw;
double C = -log_ratio( err );
C = -log_ratio( err );
double scale = std::exp(C);
sumw = 0;
@ -289,6 +284,7 @@ public:
for( i = 0; i < n; i++ )
{
int si = w->sidx[i];
CV_Assert( std::abs(w->ord_responses[si]) == 1 );
double wval = w->sample_weights[si]*std::exp(-result[i]*w->ord_responses[si]);
sumw += wval;
w->sample_weights[si] = wval;
@ -330,6 +326,20 @@ public:
}
else
CV_Error(CV_StsNotImplemented, "Unknown boosting type");
/*if( bparams.boostType != Boost::LOGIT )
{
double err = 0;
for( i = 0; i < n; i++ )
{
sumResult[i] += result[i]*C;
if( bparams.boostType != Boost::DISCRETE )
err += sumResult[i]*w->ord_responses[w->sidx[i]] < 0;
else
err += sumResult[i]*w->cat_responses[w->sidx[i]] < 0;
}
printf("%d trees. C=%.2f, training error=%.1f%%, working set size=%d (out of %d)\n", (int)roots.size(), C, err*100./n, (int)sidx.size(), n);
}*/
// renormalize weights
if( sumw > FLT_EPSILON )

View File

@ -379,7 +379,7 @@ public:
tempCatOfs.push_back(ofs);
std::copy(labels.begin(), labels.end(), std::back_inserter(tempCatMap));
}
else if( haveMissing )
else
{
tempCatOfs.push_back(Vec2i(0, 0));
/*Mat missing_i = layout == ROW_SAMPLE ? missing.col(i) : missing.row(i);
@ -741,9 +741,9 @@ public:
CV_Error( CV_StsBadArg, "type of some variables is not specified" );
}
void setTrainTestSplitRatio(float ratio, bool shuffle)
void setTrainTestSplitRatio(double ratio, bool shuffle)
{
CV_Assert( 0 <= ratio && ratio <= 1 );
CV_Assert( 0. <= ratio && ratio <= 1. );
setTrainTestSplit(cvRound(getNSamples()*ratio), shuffle);
}

View File

@ -50,7 +50,6 @@ ParamGrid::ParamGrid(double _minVal, double _maxVal, double _logStep)
logStep = std::max(_logStep, 1.);
}
StatModel::~StatModel() {}
void StatModel::clear() {}
int StatModel::getVarCount() const { return 0; }
@ -61,6 +60,11 @@ bool StatModel::train( const Ptr<TrainData>&, int )
return false;
}
bool StatModel::train( InputArray samples, int layout, InputArray responses )
{
return train(TrainData::create(samples, layout, responses));
}
float StatModel::calcError( const Ptr<TrainData>& data, bool testerr, OutputArray _resp ) const
{
Mat samples = data->getSamples();

View File

@ -49,18 +49,27 @@
namespace cv {
namespace ml {
KNearest::Params::Params(int k, bool isclassifier_)
{
defaultK = k;
isclassifier = isclassifier_;
}
class KNearestImpl : public KNearest
{
public:
KNearestImpl(bool __isClassifier=true)
KNearestImpl(const Params& p)
{
defaultK = 3;
_isClassifier = __isClassifier;
params = p;
}
virtual ~KNearestImpl() {}
bool isClassifier() const { return _isClassifier; }
Params getParams() const { return params; }
void setParams(const Params& p) { params = p; }
bool isClassifier() const { return params.isclassifier; }
bool isTrained() const { return !samples.empty(); }
String getDefaultModelName() const { return "opencv_ml_knn"; }
@ -188,7 +197,7 @@ public:
if( results || testidx+range.start == 0 )
{
if( !_isClassifier || k == 1 )
if( !params.isclassifier || k == 1 )
{
float s = 0.f;
for( j = 0; j < k; j++ )
@ -316,12 +325,13 @@ public:
float predict(InputArray inputs, OutputArray outputs, int) const
{
return findNearest( inputs, defaultK, outputs, noArray(), noArray() );
return findNearest( inputs, params.defaultK, outputs, noArray(), noArray() );
}
void write( FileStorage& fs ) const
{
fs << "is_classifier" << (int)_isClassifier;
fs << "is_classifier" << (int)params.isclassifier;
fs << "default_k" << params.defaultK;
fs << "samples" << samples;
fs << "responses" << responses;
@ -330,24 +340,21 @@ public:
void read( const FileNode& fn )
{
clear();
_isClassifier = (int)fn["is_classifier"] != 0;
params.isclassifier = (int)fn["is_classifier"] != 0;
params.defaultK = (int)fn["default_k"];
fn["samples"] >> samples;
fn["responses"] >> responses;
}
void setDefaultK(int _k) { defaultK = _k; }
int getDefaultK() const { return defaultK; }
Mat samples;
Mat responses;
bool _isClassifier;
int defaultK;
Params params;
};
Ptr<KNearest> KNearest::create(bool isClassifier)
Ptr<KNearest> KNearest::create(const Params& p)
{
return makePtr<KNearestImpl>(isClassifier);
return makePtr<KNearestImpl>(p);
}
}

View File

@ -43,7 +43,7 @@
namespace cv {
namespace ml {
NormalBayesClassifier::~NormalBayesClassifier() {}
NormalBayesClassifier::Params::Params() {}
class NormalBayesClassifierImpl : public NormalBayesClassifier
{
@ -53,6 +53,9 @@ public:
nallvars = 0;
}
void setParams(const Params&) {}
Params getParams() const { return Params(); }
bool train( const Ptr<TrainData>& trainData, int flags )
{
const float min_variation = FLT_EPSILON;
@ -452,7 +455,7 @@ public:
};
Ptr<NormalBayesClassifier> NormalBayesClassifier::create()
Ptr<NormalBayesClassifier> NormalBayesClassifier::create(const Params&)
{
Ptr<NormalBayesClassifierImpl> p = makePtr<NormalBayesClassifierImpl>();
return p;

View File

@ -134,8 +134,6 @@ SVM::Params::Params( int _svmType, int _kernelType,
termCrit = _termCrit;
}
SVM::Kernel::~Kernel() {}
/////////////////////////////////////// SVM kernel ///////////////////////////////////////
class SVMKernelImpl : public SVM::Kernel
{
@ -358,7 +356,51 @@ static void sortSamplesByClasses( const Mat& _samples, const Mat& _responses,
//////////////////////// SVM implementation //////////////////////////////
SVM::~SVM() {}
ParamGrid SVM::getDefaultGrid( int param_id )
{
ParamGrid grid;
if( param_id == SVM::C )
{
grid.minVal = 0.1;
grid.maxVal = 500;
grid.logStep = 5; // total iterations = 5
}
else if( param_id == SVM::GAMMA )
{
grid.minVal = 1e-5;
grid.maxVal = 0.6;
grid.logStep = 15; // total iterations = 4
}
else if( param_id == SVM::P )
{
grid.minVal = 0.01;
grid.maxVal = 100;
grid.logStep = 7; // total iterations = 4
}
else if( param_id == SVM::NU )
{
grid.minVal = 0.01;
grid.maxVal = 0.2;
grid.logStep = 3; // total iterations = 3
}
else if( param_id == SVM::COEF )
{
grid.minVal = 0.1;
grid.maxVal = 300;
grid.logStep = 14; // total iterations = 3
}
else if( param_id == SVM::DEGREE )
{
grid.minVal = 0.01;
grid.maxVal = 4;
grid.logStep = 7; // total iterations = 3
}
else
cvError( CV_StsBadArg, "SVM::getDefaultGrid", "Invalid type of parameter "
"(use one of SVM::C, SVM::GAMMA et al.)", __FILE__, __LINE__ );
return grid;
}
class SVMImpl : public SVM
{
@ -371,52 +413,6 @@ public:
int ofs;
};
virtual ParamGrid getDefaultGrid( int param_id ) const
{
ParamGrid grid;
if( param_id == SVM::C )
{
grid.minVal = 0.1;
grid.maxVal = 500;
grid.logStep = 5; // total iterations = 5
}
else if( param_id == SVM::GAMMA )
{
grid.minVal = 1e-5;
grid.maxVal = 0.6;
grid.logStep = 15; // total iterations = 4
}
else if( param_id == SVM::P )
{
grid.minVal = 0.01;
grid.maxVal = 100;
grid.logStep = 7; // total iterations = 4
}
else if( param_id == SVM::NU )
{
grid.minVal = 0.01;
grid.maxVal = 0.2;
grid.logStep = 3; // total iterations = 3
}
else if( param_id == SVM::COEF )
{
grid.minVal = 0.1;
grid.maxVal = 300;
grid.logStep = 14; // total iterations = 3
}
else if( param_id == SVM::DEGREE )
{
grid.minVal = 0.01;
grid.maxVal = 4;
grid.logStep = 7; // total iterations = 3
}
else
cvError( CV_StsBadArg, "SVM::getDefaultGrid", "Invalid type of parameter "
"(use one of SVM::C, SVM::GAMMA et al.)", __FILE__, __LINE__ );
return grid;
}
// Generalized SMO+SVMlight algorithm
// Solves:
//
@ -1568,6 +1564,9 @@ public:
if( svmType == C_SVC || svmType == NU_SVC )
{
responses = data->getTrainNormCatResponses();
if( responses.empty() )
CV_Error(CV_StsBadArg, "in the case of classification problem the responses must be categorical; "
"either specify varType when creating TrainData, or pass integer responses");
class_labels = data->getClassLabels();
}
else
@ -1793,7 +1792,7 @@ public:
{
int svmType = svm->params.svmType;
int sv_total = svm->sv.rows;
int class_count = !svm->class_labels.empty() ? svm->class_labels.cols : svmType == ONE_CLASS ? 1 : 0;
int class_count = !svm->class_labels.empty() ? (int)svm->class_labels.total() : svmType == ONE_CLASS ? 1 : 0;
AutoBuffer<float> _buffer(sv_total + (class_count+1)*2);
float* buffer = _buffer;

View File

@ -48,8 +48,6 @@ namespace ml {
using std::vector;
DTrees::~DTrees() {}
void DTrees::setDParams(const DTrees::Params&)
{
CV_Error(CV_StsNotImplemented, "");

View File

@ -313,7 +313,7 @@ void CV_KNearestTest::run( int /*start_from*/ )
int code = cvtest::TS::OK;
Ptr<KNearest> knearest = KNearest::create(true);
knearest->train(TrainData::create(trainData, cv::ml::ROW_SAMPLE, trainLabels), 0);;
knearest->train(trainData, cv::ml::ROW_SAMPLE, trainLabels);
knearest->findNearest( testData, 4, bestLabels);
float err;
if( !calcErr( bestLabels, testLabels, sizes, err, true ) )

View File

@ -371,8 +371,9 @@ int CV_MLBaseTest::train( int testCaseIdx )
data->getVarIdx(), data->getTrainSampleIdx());
int layer_sz[] = { data->getNAllVars(), 100, 100, (int)cls_map.size() };
Mat layer_sizes( 1, (int)(sizeof(layer_sz)/sizeof(layer_sz[0])), CV_32S, layer_sz );
model = ANN_MLP::create(layer_sizes, ANN_MLP::Params(TermCriteria(TermCriteria::COUNT,300,0.01),
str_to_ann_train_method(train_method_str), param1, param2));
model = ANN_MLP::create(ANN_MLP::Params(layer_sizes, ANN_MLP::SIGMOID_SYM, 0, 0,
TermCriteria(TermCriteria::COUNT,300,0.01),
str_to_ann_train_method(train_method_str), param1, param2));
}
else if( modelName == CV_DTREE )
{

File diff suppressed because it is too large Load Diff

View File

@ -2326,14 +2326,14 @@ static void removeBowImageDescriptorsByCount( vector<ObdImage>& images, vector<M
CV_Assert( bowImageDescriptors.size() == objectPresent.size() );
}
static void setSVMParams( const SVM::Params& svmParams, Mat& class_wts_cv, const Mat& responses, bool balanceClasses )
static void setSVMParams( SVM::Params& svmParams, Mat& class_wts_cv, const Mat& responses, bool balanceClasses )
{
int pos_ex = countNonZero(responses == 1);
int neg_ex = countNonZero(responses == -1);
cout << pos_ex << " positive training samples; " << neg_ex << " negative training samples" << endl;
svmParams.svm_type = CvSVM::C_SVC;
svmParams.kernel_type = CvSVM::RBF;
svmParams.svmType = SVM::C_SVC;
svmParams.kernelType = SVM::RBF;
if( balanceClasses )
{
Mat class_wts( 2, 1, CV_32FC1 );
@ -2351,43 +2351,44 @@ static void setSVMParams( const SVM::Params& svmParams, Mat& class_wts_cv, const
class_wts.at<float>(1) = static_cast<float>(pos_ex)/static_cast<float>(pos_ex+neg_ex);
}
class_wts_cv = class_wts;
svmParams.class_weights = &class_wts_cv;
svmParams.classWeights = class_wts_cv;
}
}
static void setSVMTrainAutoParams( CvParamGrid& c_grid, CvParamGrid& gamma_grid,
CvParamGrid& p_grid, CvParamGrid& nu_grid,
CvParamGrid& coef_grid, CvParamGrid& degree_grid )
static void setSVMTrainAutoParams( ParamGrid& c_grid, ParamGrid& gamma_grid,
ParamGrid& p_grid, ParamGrid& nu_grid,
ParamGrid& coef_grid, ParamGrid& degree_grid )
{
c_grid = CvSVM::get_default_grid(CvSVM::C);
c_grid = SVM::getDefaultGrid(SVM::C);
gamma_grid = CvSVM::get_default_grid(CvSVM::GAMMA);
gamma_grid = SVM::getDefaultGrid(SVM::GAMMA);
p_grid = CvSVM::get_default_grid(CvSVM::P);
p_grid.step = 0;
p_grid = SVM::getDefaultGrid(SVM::P);
p_grid.logStep = 0;
nu_grid = CvSVM::get_default_grid(CvSVM::NU);
nu_grid.step = 0;
nu_grid = SVM::getDefaultGrid(SVM::NU);
nu_grid.logStep = 0;
coef_grid = CvSVM::get_default_grid(CvSVM::COEF);
coef_grid.step = 0;
coef_grid = SVM::getDefaultGrid(SVM::COEF);
coef_grid.logStep = 0;
degree_grid = CvSVM::get_default_grid(CvSVM::DEGREE);
degree_grid.step = 0;
degree_grid = SVM::getDefaultGrid(SVM::DEGREE);
degree_grid.logStep = 0;
}
static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
static Ptr<SVM> trainSVMClassifier( const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
{
/* first check if a previously trained svm for the current class has been saved to file */
string svmFilename = resPath + svmsDir + "/" + objClassName + ".xml.gz";
Ptr<SVM> svm;
FileStorage fs( svmFilename, FileStorage::READ);
if( fs.isOpened() )
{
cout << "*** LOADING SVM CLASSIFIER FOR CLASS " << objClassName << " ***" << endl;
svm.load( svmFilename.c_str() );
svm = StatModel::load<SVM>( svmFilename );
}
else
{
@ -2438,20 +2439,24 @@ static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsEx
}
cout << "TRAINING SVM FOR CLASS ..." << objClassName << "..." << endl;
CvSVMParams svmParams;
CvMat class_wts_cv;
SVM::Params svmParams;
Mat class_wts_cv;
setSVMParams( svmParams, class_wts_cv, responses, svmParamsExt.balanceClasses );
CvParamGrid c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid;
svm = SVM::create(svmParams);
ParamGrid c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid;
setSVMTrainAutoParams( c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid );
svm.train_auto( trainData, responses, Mat(), Mat(), svmParams, 10, c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid );
svm->trainAuto(TrainData::create(trainData, ROW_SAMPLE, responses), 10,
c_grid, gamma_grid, p_grid, nu_grid, coef_grid, degree_grid);
cout << "SVM TRAINING FOR CLASS " << objClassName << " COMPLETED" << endl;
svm.save( svmFilename.c_str() );
svm->save( svmFilename );
cout << "SAVED CLASSIFIER TO FILE" << endl;
}
return svm;
}
static void computeConfidences( CvSVM& svm, const string& objClassName, VocData& vocData,
static void computeConfidences( const Ptr<SVM>& svm, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
{
@ -2477,12 +2482,12 @@ static void computeConfidences( CvSVM& svm, const string& objClassName, VocData&
if( imageIdx == 0 )
{
// In the first iteration, determine the sign of the positive class
float classVal = confidences[imageIdx] = svm.predict( bowImageDescriptors[imageIdx], false );
float scoreVal = confidences[imageIdx] = svm.predict( bowImageDescriptors[imageIdx], true );
float classVal = confidences[imageIdx] = svm->predict( bowImageDescriptors[imageIdx], noArray(), 0 );
float scoreVal = confidences[imageIdx] = svm->predict( bowImageDescriptors[imageIdx], noArray(), StatModel::RAW_OUTPUT );
signMul = (classVal < 0) == (scoreVal < 0) ? 1.f : -1.f;
}
// svm output of decision function
confidences[imageIdx] = signMul * svm.predict( bowImageDescriptors[imageIdx], true );
confidences[imageIdx] = signMul * svm->predict( bowImageDescriptors[imageIdx], noArray(), StatModel::RAW_OUTPUT );
}
cout << "WRITING QUERY RESULTS TO VOC RESULTS FILE FOR CLASS " << objClassName << "..." << endl;
@ -2592,9 +2597,8 @@ int main(int argc, char** argv)
for( size_t classIdx = 0; classIdx < objClasses.size(); ++classIdx )
{
// Train a classifier on train dataset
CvSVM svm;
trainSVMClassifier( svm, svmTrainParamsExt, objClasses[classIdx], vocData,
bowExtractor, featureDetector, resPath );
Ptr<SVM> svm = trainSVMClassifier( svmTrainParamsExt, objClasses[classIdx], vocData,
bowExtractor, featureDetector, resPath );
// Now use the classifier over all images on the test dataset and rank according to score order
// also calculating precision-recall etc.

View File

@ -179,10 +179,7 @@ build_rtrees_classifier( const string& data_filename,
// create classifier by using <data> and <responses>
cout << "Training the classifier ...\n";
Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples);
// 3. train classifier
model = RTrees::create(RTrees::Params(10,10,0,false,15,Mat(),true,4,TC(100,0.01f)));
model->train( tdata );
model = StatModel::train<RTrees>(tdata, RTrees::Params(10,10,0,false,15,Mat(),true,4,TC(100,0.01f)));
cout << endl;
}
@ -267,10 +264,12 @@ build_boost_classifier( const string& data_filename,
Ptr<TrainData> tdata = TrainData::create(new_data, ROW_SAMPLE, new_responses,
noArray(), noArray(), noArray(), var_type);
model = Boost::create(Boost::Params(Boost::REAL, 100, 0.95, 5, false, Mat() ));
vector<double> priors(2);
priors[0] = 1;
priors[1] = 26;
cout << "Training the classifier (may take a few minutes)...\n";
model->train(tdata);
model = StatModel::train<Boost>(tdata, Boost::Params(Boost::GENTLE, 100, 0.95, 5, false, Mat(priors) ));
cout << endl;
}
@ -333,7 +332,6 @@ build_mlp_classifier( const string& data_filename,
if( !ok )
return ok;
int i, j;
Ptr<ANN_MLP> model;
int nsamples_all = data.rows;
@ -360,14 +358,14 @@ build_mlp_classifier( const string& data_filename,
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Mat train_data = data.rowRange(0, ntrain_samples);
Mat new_responses = Mat::zeros( ntrain_samples, class_count, CV_32F );
Mat train_responses = Mat::zeros( ntrain_samples, class_count, CV_32F );
// 1. unroll the responses
cout << "Unrolling the responses...\n";
for( i = 0; i < ntrain_samples; i++ )
for( int i = 0; i < ntrain_samples; i++ )
{
int cls_label = responses.at<int>(i) - 'A'
new_responses.at<float>(i, cls_label) = 1.f;
int cls_label = responses.at<int>(i) - 'A';
train_responses.at<float>(i, cls_label) = 1.f;
}
// 2. train classifier
@ -385,180 +383,63 @@ build_mlp_classifier( const string& data_filename,
int max_iter = 1000;
#endif
mlp.train( &train_data, new_responses, 0, 0,
ANN_MLP::Params(TC(max_iter,0), method, method_param));
Ptr<TrainData> tdata = TrainData::create(train_data, ROW_SAMPLE, train_responses);
model = ANN_MLP::create() mlp.create( &layer_sizes );
printf( "Training the classifier (may take a few minutes)...\n");
cvReleaseMat( &new_responses );
printf("\n");
cout << "Training the classifier (may take a few minutes)...\n";
model = StatModel::train<ANN_MLP>(tdata, ANN_MLP::Params(layer_sizes, ANN_MLP::SIGMOID_SYM, 0, 0, TC(max_iter,0), method, method_param));
cout << endl;
}
Mat mlp_response;
// compute prediction error on train and test data
for( i = 0; i < nsamples_all; i++ )
{
int best_class;
CvMat sample;
cvGetRow( data, &sample, i );
CvPoint max_loc;
mlp.predict( &sample, mlp_response );
cvMinMaxLoc( mlp_response, 0, 0, 0, &max_loc, 0 );
best_class = max_loc.x + 'A';
int r = fabs((double)best_class - responses->data.fl[i]) < FLT_EPSILON ? 1 : 0;
if( i < ntrain_samples )
train_hr += r;
else
test_hr += r;
}
test_hr /= (double)(nsamples_all-ntrain_samples);
train_hr /= (double)ntrain_samples;
printf( "Recognition rate: train = %.1f%%, test = %.1f%%\n",
train_hr*100., test_hr*100. );
if( !filename_to_save.empty() )
model->save( filename_to_save );
test_and_save_classifier(model, data, responses, ntrain_samples, 'A', filename_to_save);
return true;
}
static bool
build_knearest_classifier( const string& data_filename, int K )
{
const int var_count = 16;
Mat data;
CvMat train_data;
Mat responses;
bool ok = read_num_class_data( data_filename, 16, &data, &responses );
if( !ok )
return ok;
int nsamples_all = 0, ntrain_samples = 0;
Ptr<KNearest> model;
nsamples_all = data->rows;
ntrain_samples = (int)(nsamples_all*0.8);
int nsamples_all = data.rows;
int ntrain_samples = (int)(nsamples_all*0.8);
// 1. unroll the responses
printf( "Unrolling the responses...\n");
cvGetRows( data, &train_data, 0, ntrain_samples );
// create classifier by using <data> and <responses>
cout << "Training the classifier ...\n";
Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples);
model = StatModel::train<KNearest>(tdata, KNearest::Params(K, true));
cout << endl;
// 2. train classifier
Mat train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
for (int i = 0; i < ntrain_samples; i++)
train_resp->data.fl[i] = responses->data.fl[i];
Ptr<KNearest> model = KNearest::create(true);
model->train(train_data, train_resp);
Mat nearests = cvCreateMat( (nsamples_all - ntrain_samples), K, CV_32FC1);
float* _sample = new float[var_count * (nsamples_all - ntrain_samples)];
CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, _sample );
float* true_results = new float[nsamples_all - ntrain_samples];
for (int j = ntrain_samples; j < nsamples_all; j++)
{
float *s = data->data.fl + j * var_count;
for (int i = 0; i < var_count; i++)
{
sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i];
}
true_results[j - ntrain_samples] = responses->data.fl[j];
}
CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1);
knearest.find_nearest(&sample, K, result, 0, nearests, 0);
int true_resp = 0;
int accuracy = 0;
for (int i = 0; i < nsamples_all - ntrain_samples; i++)
{
if (result->data.fl[i] == true_results[i])
true_resp++;
for(int k = 0; k < K; k++ )
{
if( nearests->data.fl[i * K + k] == true_results[i])
accuracy++;
}
}
printf("true_resp = %f%%\tavg accuracy = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100,
(float)accuracy / (nsamples_all - ntrain_samples) / K * 100);
delete[] true_results;
delete[] _sample;
cvReleaseMat( &train_resp );
cvReleaseMat( &nearests );
cvReleaseMat( &result );
cvReleaseMat( &data );
cvReleaseMat( &responses );
return 0;
test_and_save_classifier(model, data, responses, ntrain_samples, 0, string());
return true;
}
static bool
build_nbayes_classifier( const string& data_filename )
{
const int var_count = 16;
Mat data;
CvMat train_data;
Mat responses;
bool ok = read_num_class_data( data_filename, 16, &data, &responses );
if( !ok )
return ok;
int nsamples_all = 0, ntrain_samples = 0;
Ptr<NormalBayesClassifier> model;
nsamples_all = data->rows;
ntrain_samples = (int)(nsamples_all*0.5);
int nsamples_all = data.rows;
int ntrain_samples = (int)(nsamples_all*0.8);
// 1. unroll the responses
printf( "Unrolling the responses...\n");
cvGetRows( data, &train_data, 0, ntrain_samples );
// create classifier by using <data> and <responses>
cout << "Training the classifier ...\n";
Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples);
model = StatModel::train<NormalBayesClassifier>(tdata, NormalBayesClassifier::Params());
cout << endl;
// 2. train classifier
Mat train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
for (int i = 0; i < ntrain_samples; i++)
train_resp->data.fl[i] = responses->data.fl[i];
CvNormalBayesClassifier nbayes(&train_data, train_resp);
float* _sample = new float[var_count * (nsamples_all - ntrain_samples)];
CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, _sample );
float* true_results = new float[nsamples_all - ntrain_samples];
for (int j = ntrain_samples; j < nsamples_all; j++)
{
float *s = data->data.fl + j * var_count;
for (int i = 0; i < var_count; i++)
{
sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i];
}
true_results[j - ntrain_samples] = responses->data.fl[j];
}
CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1);
nbayes.predict(&sample, result);
int true_resp = 0;
//int accuracy = 0;
for (int i = 0; i < nsamples_all - ntrain_samples; i++)
{
if (result->data.fl[i] == true_results[i])
true_resp++;
}
printf("true_resp = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100);
delete[] true_results;
delete[] _sample;
cvReleaseMat( &train_resp );
cvReleaseMat( &result );
cvReleaseMat( &data );
cvReleaseMat( &responses );
return 0;
test_and_save_classifier(model, data, responses, ntrain_samples, 0, string());
return true;
}
static bool
@ -568,95 +449,47 @@ build_svm_classifier( const string& data_filename,
{
Mat data;
Mat responses;
Mat train_resp;
CvMat train_data;
int nsamples_all = 0, ntrain_samples = 0;
int var_count;
Ptr<SVM> model;
bool ok = read_num_class_data( data_filename, 16, &data, &responses );
if( !ok )
return ok;
////////// SVM parameters ///////////////////////////////
CvSVMParams param;
param.kernel_type=CvSVM::LINEAR;
param.svm_type=CvSVM::C_SVC;
param.C=1;
///////////////////////////////////////////////////////////
Ptr<SVM> model;
printf( "The database %s is loaded.\n", data_filename );
nsamples_all = data->rows;
ntrain_samples = (int)(nsamples_all*0.1);
var_count = data->cols;
int nsamples_all = data.rows;
int ntrain_samples = (int)(nsamples_all*0.8);
// Create or load Random Trees classifier
if( filename_to_load )
if( !filename_to_load.empty() )
{
// load classifier from the specified file
svm.load( filename_to_load );
model = load_classifier<SVM>(filename_to_load);
if( model.empty() )
return false;
ntrain_samples = 0;
if( svm.get_var_count() == 0 )
{
printf( "Could not read the classifier %s\n", filename_to_load );
return -1;
}
printf( "The classifier %s is loaded.\n", filename_to_load );
}
else
{
// train classifier
printf( "Training the classifier (may take a few minutes)...\n");
cvGetRows( data, &train_data, 0, ntrain_samples );
train_resp = cvCreateMat( ntrain_samples, 1, CV_32FC1);
for (int i = 0; i < ntrain_samples; i++)
train_resp->data.fl[i] = responses->data.fl[i];
svm.train(&train_data, train_resp, 0, 0, param);
// create classifier by using <data> and <responses>
cout << "Training the classifier ...\n";
Ptr<TrainData> tdata = prepare_train_data(data, responses, ntrain_samples);
SVM::Params params;
params.svmType = SVM::C_SVC;
params.kernelType = SVM::LINEAR;
params.C = 1;
model = StatModel::train<SVM>(tdata, params);
cout << endl;
}
// classification
std::vector<float> _sample(var_count * (nsamples_all - ntrain_samples));
CvMat sample = cvMat( nsamples_all - ntrain_samples, 16, CV_32FC1, &_sample[0] );
std::vector<float> true_results(nsamples_all - ntrain_samples);
for (int j = ntrain_samples; j < nsamples_all; j++)
{
float *s = data->data.fl + j * var_count;
for (int i = 0; i < var_count; i++)
{
sample.data.fl[(j - ntrain_samples) * var_count + i] = s[i];
}
true_results[j - ntrain_samples] = responses->data.fl[j];
}
CvMat *result = cvCreateMat(1, nsamples_all - ntrain_samples, CV_32FC1);
printf("Classification (may take a few minutes)...\n");
double t = (double)cvGetTickCount();
svm.predict(&sample, result);
t = (double)cvGetTickCount() - t;
printf("Prediction type: %gms\n", t/(cvGetTickFrequency()*1000.));
int true_resp = 0;
for (int i = 0; i < nsamples_all - ntrain_samples; i++)
{
if (result->data.fl[i] == true_results[i])
true_resp++;
}
printf("true_resp = %f%%\n", (float)true_resp / (nsamples_all - ntrain_samples) * 100);
if( !filename_to_save.empty() )
model->save( filename_to_save );
test_and_save_classifier(model, data, responses, ntrain_samples, 0, filename_to_save);
return true;
}
int main( int argc, char *argv[] )
{
char* filename_to_save = 0;
char* filename_to_load = 0;
char default_data_filename[] = "./letter-recognition.data";
char* data_filename = default_data_filename;
string filename_to_save = "";
string filename_to_load = "";
string data_filename = "./letter-recognition.data";
int method = 0;
int i;
@ -685,15 +518,15 @@ int main( int argc, char *argv[] )
{
method = 2;
}
else if ( strcmp(argv[i], "-knearest") == 0)
else if( strcmp(argv[i], "-knearest") == 0 || strcmp(argv[i], "-knn") == 0 )
{
method = 3;
}
else if ( strcmp(argv[i], "-nbayes") == 0)
else if( strcmp(argv[i], "-nbayes") == 0)
{
method = 4;
}
else if ( strcmp(argv[i], "-svm") == 0)
else if( strcmp(argv[i], "-svm") == 0)
{
method = 5;
}

View File

@ -1,322 +0,0 @@
#include "opencv2/core/core_c.h"
#include "opencv2/ml/ml.hpp"
#include <stdio.h>
static void help()
{
printf("\nThis program demonstrated the use of OpenCV's decision tree function for learning and predicting data\n"
"Usage :\n"
"./mushroom <path to agaricus-lepiota.data>\n"
"\n"
"The sample demonstrates how to build a decision tree for classifying mushrooms.\n"
"It uses the sample base agaricus-lepiota.data from UCI Repository, here is the link:\n"
"\n"
"Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).\n"
"UCI Repository of machine learning databases\n"
"[http://www.ics.uci.edu/~mlearn/MLRepository.html].\n"
"Irvine, CA: University of California, Department of Information and Computer Science.\n"
"\n"
"// loads the mushroom database, which is a text file, containing\n"
"// one training sample per row, all the input variables and the output variable are categorical,\n"
"// the values are encoded by characters.\n\n");
}
static int mushroom_read_database( const char* filename, CvMat** data, CvMat** missing, CvMat** responses )
{
const int M = 1024;
FILE* f = fopen( filename, "rt" );
CvMemStorage* storage;
CvSeq* seq;
char buf[M+2], *ptr;
float* el_ptr;
CvSeqReader reader;
int i, j, var_count = 0;
if( !f )
return 0;
// read the first line and determine the number of variables
if( !fgets( buf, M, f ))
{
fclose(f);
return 0;
}
for( ptr = buf; *ptr != '\0'; ptr++ )
var_count += *ptr == ',';
assert( ptr - buf == (var_count+1)*2 );
// create temporary memory storage to store the whole database
el_ptr = new float[var_count+1];
storage = cvCreateMemStorage();
seq = cvCreateSeq( 0, sizeof(*seq), (var_count+1)*sizeof(float), storage );
for(;;)
{
for( i = 0; i <= var_count; i++ )
{
int c = buf[i*2];
el_ptr[i] = c == '?' ? -1.f : (float)c;
}
if( i != var_count+1 )
break;
cvSeqPush( seq, el_ptr );
if( !fgets( buf, M, f ) || !strchr( buf, ',' ) )
break;
}
fclose(f);
// allocate the output matrices and copy the base there
*data = cvCreateMat( seq->total, var_count, CV_32F );
*missing = cvCreateMat( seq->total, var_count, CV_8U );
*responses = cvCreateMat( seq->total, 1, CV_32F );
cvStartReadSeq( seq, &reader );
for( i = 0; i < seq->total; i++ )
{
const float* sdata = (float*)reader.ptr + 1;
float* ddata = data[0]->data.fl + var_count*i;
float* dr = responses[0]->data.fl + i;
uchar* dm = missing[0]->data.ptr + var_count*i;
for( j = 0; j < var_count; j++ )
{
ddata[j] = sdata[j];
dm[j] = sdata[j] < 0;
}
*dr = sdata[-1];
CV_NEXT_SEQ_ELEM( seq->elem_size, reader );
}
cvReleaseMemStorage( &storage );
delete [] el_ptr;
return 1;
}
static CvDTree* mushroom_create_dtree( const CvMat* data, const CvMat* missing,
const CvMat* responses, float p_weight )
{
CvDTree* dtree;
CvMat* var_type;
int i, hr1 = 0, hr2 = 0, p_total = 0;
float priors[] = { 1, p_weight };
var_type = cvCreateMat( data->cols + 1, 1, CV_8U );
cvSet( var_type, cvScalarAll(CV_VAR_CATEGORICAL) ); // all the variables are categorical
dtree = new CvDTree;
dtree->train( data, CV_ROW_SAMPLE, responses, 0, 0, var_type, missing,
CvDTreeParams( 8, // max depth
10, // min sample count
0, // regression accuracy: N/A here
true, // compute surrogate split, as we have missing data
15, // max number of categories (use sub-optimal algorithm for larger numbers)
10, // the number of cross-validation folds
true, // use 1SE rule => smaller tree
true, // throw away the pruned tree branches
priors // the array of priors, the bigger p_weight, the more attention
// to the poisonous mushrooms
// (a mushroom will be judjed to be poisonous with bigger chance)
));
// compute hit-rate on the training database, demonstrates predict usage.
for( i = 0; i < data->rows; i++ )
{
CvMat sample, mask;
cvGetRow( data, &sample, i );
cvGetRow( missing, &mask, i );
double r = dtree->predict( &sample, &mask )->value;
int d = fabs(r - responses->data.fl[i]) >= FLT_EPSILON;
if( d )
{
if( r != 'p' )
hr1++;
else
hr2++;
}
p_total += responses->data.fl[i] == 'p';
}
printf( "Results on the training database:\n"
"\tPoisonous mushrooms mis-predicted: %d (%g%%)\n"
"\tFalse-alarms: %d (%g%%)\n", hr1, (double)hr1*100/p_total,
hr2, (double)hr2*100/(data->rows - p_total) );
cvReleaseMat( &var_type );
return dtree;
}
static const char* var_desc[] =
{
"cap shape (bell=b,conical=c,convex=x,flat=f)",
"cap surface (fibrous=f,grooves=g,scaly=y,smooth=s)",
"cap color (brown=n,buff=b,cinnamon=c,gray=g,green=r,\n\tpink=p,purple=u,red=e,white=w,yellow=y)",
"bruises? (bruises=t,no=f)",
"odor (almond=a,anise=l,creosote=c,fishy=y,foul=f,\n\tmusty=m,none=n,pungent=p,spicy=s)",
"gill attachment (attached=a,descending=d,free=f,notched=n)",
"gill spacing (close=c,crowded=w,distant=d)",
"gill size (broad=b,narrow=n)",
"gill color (black=k,brown=n,buff=b,chocolate=h,gray=g,\n\tgreen=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y)",
"stalk shape (enlarging=e,tapering=t)",
"stalk root (bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r)",
"stalk surface above ring (ibrous=f,scaly=y,silky=k,smooth=s)",
"stalk surface below ring (ibrous=f,scaly=y,silky=k,smooth=s)",
"stalk color above ring (brown=n,buff=b,cinnamon=c,gray=g,orange=o,\n\tpink=p,red=e,white=w,yellow=y)",
"stalk color below ring (brown=n,buff=b,cinnamon=c,gray=g,orange=o,\n\tpink=p,red=e,white=w,yellow=y)",
"veil type (partial=p,universal=u)",
"veil color (brown=n,orange=o,white=w,yellow=y)",
"ring number (none=n,one=o,two=t)",
"ring type (cobwebby=c,evanescent=e,flaring=f,large=l,\n\tnone=n,pendant=p,sheathing=s,zone=z)",
"spore print color (black=k,brown=n,buff=b,chocolate=h,green=r,\n\torange=o,purple=u,white=w,yellow=y)",
"population (abundant=a,clustered=c,numerous=n,\n\tscattered=s,several=v,solitary=y)",
"habitat (grasses=g,leaves=l,meadows=m,paths=p\n\turban=u,waste=w,woods=d)",
0
};
static void print_variable_importance( CvDTree* dtree )
{
const CvMat* var_importance = dtree->get_var_importance();
int i;
char input[1000];
if( !var_importance )
{
printf( "Error: Variable importance can not be retrieved\n" );
return;
}
printf( "Print variable importance information? (y/n) " );
int values_read = scanf( "%1s", input );
CV_Assert(values_read == 1);
if( input[0] != 'y' && input[0] != 'Y' )
return;
for( i = 0; i < var_importance->cols*var_importance->rows; i++ )
{
double val = var_importance->data.db[i];
char buf[100];
int len = (int)(strchr( var_desc[i], '(' ) - var_desc[i] - 1);
strncpy( buf, var_desc[i], len );
buf[len] = '\0';
printf( "%s", buf );
printf( ": %g%%\n", val*100. );
}
}
static void interactive_classification( CvDTree* dtree )
{
char input[1000];
const CvDTreeNode* root;
CvDTreeTrainData* data;
if( !dtree )
return;
root = dtree->get_root();
data = dtree->get_data();
for(;;)
{
const CvDTreeNode* node;
printf( "Start/Proceed with interactive mushroom classification (y/n): " );
int values_read = scanf( "%1s", input );
CV_Assert(values_read == 1);
if( input[0] != 'y' && input[0] != 'Y' )
break;
printf( "Enter 1-letter answers, '?' for missing/unknown value...\n" );
// custom version of predict
node = root;
for(;;)
{
CvDTreeSplit* split = node->split;
int dir = 0;
if( !node->left || node->Tn <= dtree->get_pruned_tree_idx() || !node->split )
break;
for( ; split != 0; )
{
int vi = split->var_idx, j;
int count = data->cat_count->data.i[vi];
const int* map = data->cat_map->data.i + data->cat_ofs->data.i[vi];
printf( "%s: ", var_desc[vi] );
values_read = scanf( "%1s", input );
CV_Assert(values_read == 1);
if( input[0] == '?' )
{
split = split->next;
continue;
}
// convert the input character to the normalized value of the variable
for( j = 0; j < count; j++ )
if( map[j] == input[0] )
break;
if( j < count )
{
dir = (split->subset[j>>5] & (1 << (j&31))) ? -1 : 1;
if( split->inversed )
dir = -dir;
break;
}
else
printf( "Error: unrecognized value\n" );
}
if( !dir )
{
printf( "Impossible to classify the sample\n");
node = 0;
break;
}
node = dir < 0 ? node->left : node->right;
}
if( node )
printf( "Prediction result: the mushroom is %s\n",
node->class_idx == 0 ? "EDIBLE" : "POISONOUS" );
printf( "\n-----------------------------\n" );
}
}
int main( int argc, char** argv )
{
CvMat *data = 0, *missing = 0, *responses = 0;
CvDTree* dtree;
const char* base_path = argc >= 2 ? argv[1] : "agaricus-lepiota.data";
help();
if( !mushroom_read_database( base_path, &data, &missing, &responses ) )
{
printf( "\nUnable to load the training database\n\n");
help();
return -1;
}
dtree = mushroom_create_dtree( data, missing, responses,
10 // poisonous mushrooms will have 10x higher weight in the decision tree
);
cvReleaseMat( &data );
cvReleaseMat( &missing );
cvReleaseMat( &responses );
print_variable_importance( dtree );
interactive_classification( dtree );
delete dtree;
return 0;
}

View File

@ -102,8 +102,7 @@ static void predict_and_paint(const Ptr<StatModel>& model, Mat& dst)
static void find_decision_boundary_NBC()
{
// learn classifier
Ptr<NormalBayesClassifier> normalBayesClassifier = NormalBayesClassifier::create();
normalBayesClassifier->train(prepare_train_data());
Ptr<NormalBayesClassifier> normalBayesClassifier = StatModel::train<NormalBayesClassifier>(prepare_train_data(), NormalBayesClassifier::Params());
predict_and_paint(normalBayesClassifier, imgDst);
}
@ -113,10 +112,7 @@ static void find_decision_boundary_NBC()
#if _KNN_
static void find_decision_boundary_KNN( int K )
{
Ptr<KNearest> knn = KNearest::create(true);
knn->setDefaultK(K);
knn->train(prepare_train_data());
Ptr<KNearest> knn = StatModel::train<KNearest>(prepare_train_data(), KNearest::Params(K, true));
predict_and_paint(knn, imgDst);
}
#endif
@ -124,9 +120,7 @@ static void find_decision_boundary_KNN( int K )
#if _SVM_
static void find_decision_boundary_SVM( SVM::Params params )
{
Ptr<SVM> svm = SVM::create(params);
svm->train(prepare_train_data());
Ptr<SVM> svm = StatModel::train<SVM>(prepare_train_data(), params);
predict_and_paint(svm, imgDst);
Mat sv = svm->getSupportVectors();
@ -149,8 +143,7 @@ static void find_decision_boundary_DT()
params.use1SERule = false;
params.truncatePrunedTree = false;
Ptr<DTrees> dtree = DTrees::create(params);
dtree->train(prepare_train_data());
Ptr<DTrees> dtree = StatModel::train<DTrees>(prepare_train_data(), params);
predict_and_paint(dtree, imgDst);
}
@ -167,8 +160,7 @@ static void find_decision_boundary_BT()
Mat() // priors
);
Ptr<Boost> boost = Boost::create(params);
boost->train(prepare_train_data());
Ptr<Boost> boost = StatModel::train<Boost>(prepare_train_data(), params);
predict_and_paint(boost, imgDst);
}
@ -185,8 +177,7 @@ static void find_decision_boundary_GBT()
false // use_surrogates )
);
Ptr<GBTrees> gbtrees = GBTrees::create(params);
gbtrees->train(prepare_train_data());
Ptr<GBTrees> gbtrees = StatModel::train<GBTrees>(prepare_train_data(), params);
predict_and_paint(gbtrees, imgDst);
}
#endif
@ -205,8 +196,7 @@ static void find_decision_boundary_RF()
TermCriteria(TermCriteria::MAX_ITER, 5, 0) // max_num_of_trees_in_the_forest,
);
Ptr<RTrees> rtrees = RTrees::create(params);
rtrees->train(prepare_train_data());
Ptr<RTrees> rtrees = StatModel::train<RTrees>(prepare_train_data(), params);
predict_and_paint(rtrees, imgDst);
}
@ -215,9 +205,8 @@ static void find_decision_boundary_RF()
#if _ANN_
static void find_decision_boundary_ANN( const Mat& layer_sizes )
{
ANN_MLP::Params params(TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 300, FLT_EPSILON),
ANN_MLP::Params params(layer_sizes, ANN_MLP::SIGMOID_SYM, 1, 1, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 300, FLT_EPSILON),
ANN_MLP::Params::BACKPROP, 0.001);
Ptr<ANN_MLP> ann = ANN_MLP::create(layer_sizes, params, ANN_MLP::SIGMOID_SYM, 1, 1 );
Mat trainClasses = Mat::zeros( trainedPoints.size(), classColors.size(), CV_32FC1 );
for( int i = 0; i < trainClasses.rows; i++ )
@ -228,7 +217,7 @@ static void find_decision_boundary_ANN( const Mat& layer_sizes )
Mat samples = prepare_train_samples(trainedPoints);
Ptr<TrainData> tdata = TrainData::create(samples, ROW_SAMPLE, trainClasses);
ann->train(tdata);
Ptr<ANN_MLP> ann = StatModel::train<ANN_MLP>(tdata, params);
predict_and_paint(ann, imgDst);
}
#endif
@ -340,18 +329,15 @@ int main()
img.copyTo( imgDst );
#if _NBC_
find_decision_boundary_NBC();
namedWindow( "NormalBayesClassifier", WINDOW_AUTOSIZE );
imshow( "NormalBayesClassifier", imgDst );
#endif
#if _KNN_
int K = 3;
find_decision_boundary_KNN( K );
namedWindow( "kNN", WINDOW_AUTOSIZE );
imshow( "kNN", imgDst );
K = 15;
find_decision_boundary_KNN( K );
namedWindow( "kNN2", WINDOW_AUTOSIZE );
imshow( "kNN2", imgDst );
#endif
@ -369,36 +355,30 @@ int main()
params.termCrit = TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 1000, 0.01);
find_decision_boundary_SVM( params );
namedWindow( "classificationSVM1", WINDOW_AUTOSIZE );
imshow( "classificationSVM1", imgDst );
params.C = 10;
find_decision_boundary_SVM( params );
namedWindow( "classificationSVM2", WINDOW_AUTOSIZE );
imshow( "classificationSVM2", imgDst );
#endif
#if _DT_
find_decision_boundary_DT();
namedWindow( "DT", WINDOW_AUTOSIZE );
imshow( "DT", imgDst );
#endif
#if _BT_
find_decision_boundary_BT();
namedWindow( "BT", WINDOW_AUTOSIZE );
imshow( "BT", imgDst);
#endif
#if _GBT_
find_decision_boundary_GBT();
namedWindow( "GBT", WINDOW_AUTOSIZE );
imshow( "GBT", imgDst);
#endif
#if _RF_
find_decision_boundary_RF();
namedWindow( "RF", WINDOW_AUTOSIZE );
imshow( "RF", imgDst);
#endif
@ -408,13 +388,11 @@ int main()
layer_sizes1.at<int>(1) = 5;
layer_sizes1.at<int>(2) = classColors.size();
find_decision_boundary_ANN( layer_sizes1 );
namedWindow( "ANN", WINDOW_AUTOSIZE );
imshow( "ANN", imgDst );
#endif
#if _EM_
find_decision_boundary_EM();
namedWindow( "EM", WINDOW_AUTOSIZE );
imshow( "EM", imgDst );
#endif
}

View File

@ -8,9 +8,10 @@
#include <time.h>
using namespace cv;
using namespace cv::ml;
using namespace std;
void get_svm_detector(const SVM& svm, vector< float > & hog_detector );
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector );
void convert_to_ml(const std::vector< cv::Mat > & train_samples, cv::Mat& trainData );
void load_images( const string & prefix, const string & filename, vector< Mat > & img_lst );
void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );
@ -20,49 +21,24 @@ void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels
void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color );
void test_it( const Size & size );
void get_svm_detector(const SVM& svm, vector< float > & hog_detector )
void get_svm_detector(const Ptr<SVM>& svm, vector< float > & hog_detector )
{
// get the number of variables
const int var_all = svm.get_var_count();
// get the number of support vectors
const int sv_total = svm.get_support_vector_count();
// get the decision function
const CvSVMDecisionFunc* decision_func = svm.get_decision_function();
// get the support vectors
const float** sv = new const float*[ sv_total ];
for( int i = 0 ; i < sv_total ; ++i )
sv[ i ] = svm.get_support_vector(i);
Mat sv = svm->getSupportVectors();
const int sv_total = sv.rows;
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction(0, alpha, svidx);
CV_Assert( var_all > 0 &&
sv_total > 0 &&
decision_func != 0 &&
decision_func->alpha != 0 &&
decision_func->sv_count == sv_total );
CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );
CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||
(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );
CV_Assert( sv.type() == CV_32F );
hog_detector.clear();
float svi = 0.f;
hog_detector.clear(); //clear stuff in vector.
hog_detector.reserve( var_all + 1 ); //reserve place for memory efficiency.
/**
* hog_detector^i = \sum_j support_vector_j^i * \alpha_j
* hog_detector^dim = -\rho
*/
for( int i = 0 ; i < var_all ; ++i )
{
svi = 0.f;
for( int j = 0 ; j < sv_total ; ++j )
{
if( decision_func->sv_index != NULL ) // sometime the sv_index isn't store on YML/XML.
svi += (float)( sv[decision_func->sv_index[j]][i] * decision_func->alpha[ j ] );
else
svi += (float)( sv[j][i] * decision_func->alpha[ j ] );
}
hog_detector.push_back( svi );
}
hog_detector.push_back( (float)-decision_func->rho );
delete[] sv;
hog_detector.resize(sv.cols + 1);
memcpy(&hog_detector[0], sv.data, sv.cols*sizeof(hog_detector[0]));
hog_detector[sv.cols] = (float)-rho;
}
@ -263,7 +239,7 @@ Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorVa
int mx = drawX + cellSize/2;
int my = drawY + cellSize/2;
rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), CV_RGB(100,100,100), 1);
rectangle(visu, Point((int)(drawX*zoomFac), (int)(drawY*zoomFac)), Point((int)((drawX+cellSize)*zoomFac), (int)((drawY+cellSize)*zoomFac)), Scalar(100,100,100), 1);
// draw in each cell all 9 gradient strengths
for (int bin=0; bin<gradientBinSize; bin++)
@ -288,7 +264,7 @@ Mat get_hogdescriptor_visu(const Mat& color_origImg, vector<float>& descriptorVa
float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
// draw gradient visualization
line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), CV_RGB(0,255,0), 1);
line(visu, Point((int)(x1*zoomFac),(int)(y1*zoomFac)), Point((int)(x2*zoomFac),(int)(y2*zoomFac)), Scalar(0,255,0), 1);
} // for (all bins)
@ -337,28 +313,26 @@ void compute_hog( const vector< Mat > & img_lst, vector< Mat > & gradient_lst, c
void train_svm( const vector< Mat > & gradient_lst, const vector< int > & labels )
{
SVM svm;
/* Default values to train SVM */
SVMParams params;
SVM::Params params;
params.coef0 = 0.0;
params.degree = 3;
params.term_crit.epsilon = 1e-3;
params.termCrit.epsilon = 1e-3;
params.gamma = 0;
params.kernel_type = SVM::LINEAR;
params.kernelType = SVM::LINEAR;
params.nu = 0.5;
params.p = 0.1; // for EPSILON_SVR, epsilon in loss function?
params.C = 0.01; // From paper, soft classifier
params.svm_type = SVM::EPS_SVR; // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
params.svmType = SVM::EPS_SVR; // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
Mat train_data;
convert_to_ml( gradient_lst, train_data );
clog << "Start training...";
svm.train( train_data, Mat( labels ), Mat(), Mat(), params );
Ptr<SVM> svm = StatModel::train<SVM>(train_data, ROW_SAMPLE, Mat(labels), params);
clog << "...[done]" << endl;
svm.save( "my_people_detector.yml" );
svm->save( "my_people_detector.yml" );
}
void draw_locations( Mat & img, const vector< Rect > & locations, const Scalar & color )
@ -380,7 +354,7 @@ void test_it( const Size & size )
Scalar reference( 0, 255, 0 );
Scalar trained( 0, 0, 255 );
Mat img, draw;
SVM svm;
Ptr<SVM> svm;
HOGDescriptor hog;
HOGDescriptor my_hog;
my_hog.winSize = size;
@ -388,7 +362,7 @@ void test_it( const Size & size )
vector< Rect > locations;
// Load the trained SVM.
svm.load( "my_people_detector.yml" );
svm = StatModel::load<SVM>( "my_people_detector.yml" );
// Set the trained svm to my_hog
vector< float > hog_detector;
get_svm_detector( svm, hog_detector );

View File

@ -1,63 +1,35 @@
#include "opencv2/ml/ml.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/core/core.hpp"
#include "opencv2/core/utility.hpp"
#include <stdio.h>
#include <string>
#include <map>
using namespace cv;
using namespace cv::ml;
static void help()
{
printf(
"\nThis sample demonstrates how to use different decision trees and forests including boosting and random trees:\n"
"CvDTree dtree;\n"
"CvBoost boost;\n"
"CvRTrees rtrees;\n"
"CvERTrees ertrees;\n"
"CvGBTrees gbtrees;\n"
"Call:\n\t./tree_engine [-r <response_column>] [-c] <csv filename>\n"
"\nThis sample demonstrates how to use different decision trees and forests including boosting and random trees.\n"
"Usage:\n\t./tree_engine [-r <response_column>] [-ts type_spec] <csv filename>\n"
"where -r <response_column> specified the 0-based index of the response (0 by default)\n"
"-c specifies that the response is categorical (it's ordered by default) and\n"
"-ts specifies the var type spec in the form ord[n1,n2-n3,n4-n5,...]cat[m1-m2,m3,m4-m5,...]\n"
"<csv filename> is the name of training data file in comma-separated value format\n\n");
}
static int count_classes(CvMLData& data)
static void train_and_print_errs(Ptr<StatModel> model, const Ptr<TrainData>& data)
{
cv::Mat r = cv::cvarrToMat(data.get_responses());
std::map<int, int> rmap;
int i, n = (int)r.total();
for( i = 0; i < n; i++ )
bool ok = model->train(data);
if( !ok )
{
float val = r.at<float>(i);
int ival = cvRound(val);
if( ival != val )
return -1;
rmap[ival] = 1;
printf("Training failed\n");
}
return (int)rmap.size();
}
static void print_result(float train_err, float test_err, const CvMat* _var_imp)
{
printf( "train error %f\n", train_err );
printf( "test error %f\n\n", test_err );
if (_var_imp)
else
{
cv::Mat var_imp = cv::cvarrToMat(_var_imp), sorted_idx;
cv::sortIdx(var_imp, sorted_idx, CV_SORT_EVERY_ROW + CV_SORT_DESCENDING);
printf( "variable importance:\n" );
int i, n = (int)var_imp.total();
int type = var_imp.type();
CV_Assert(type == CV_32F || type == CV_64F);
for( i = 0; i < n; i++)
{
int k = sorted_idx.at<int>(i);
printf( "%d\t%f\n", k, type == CV_32F ? var_imp.at<float>(k) : var_imp.at<double>(k));
}
printf( "train error: %f\n", model->calcError(data, false, noArray()) );
printf( "test error: %f\n\n", model->calcError(data, true, noArray()) );
}
printf("\n");
}
int main(int argc, char** argv)
@ -69,14 +41,14 @@ int main(int argc, char** argv)
}
const char* filename = 0;
int response_idx = 0;
bool categorical_response = false;
std::string typespec;
for(int i = 1; i < argc; i++)
{
if(strcmp(argv[i], "-r") == 0)
sscanf(argv[++i], "%d", &response_idx);
else if(strcmp(argv[i], "-c") == 0)
categorical_response = true;
else if(strcmp(argv[i], "-ts") == 0)
typespec = argv[++i];
else if(argv[i][0] != '-' )
filename = argv[i];
else
@ -88,52 +60,32 @@ int main(int argc, char** argv)
}
printf("\nReading in %s...\n\n",filename);
CvDTree dtree;
CvBoost boost;
CvRTrees rtrees;
CvERTrees ertrees;
CvGBTrees gbtrees;
const double train_test_split_ratio = 0.5;
CvMLData data;
Ptr<TrainData> data = TrainData::loadFromCSV(filename, 0, response_idx, response_idx+1, typespec);
CvTrainTestSplit spl( 0.5f );
if ( data.read_csv( filename ) == 0)
if( data.empty() )
{
data.set_response_idx( response_idx );
if(categorical_response)
data.change_var_type( response_idx, CV_VAR_CATEGORICAL );
data.set_train_test_split( &spl );
printf("======DTREE=====\n");
dtree.train( &data, CvDTreeParams( 10, 2, 0, false, 16, 0, false, false, 0 ));
print_result( dtree.calc_error( &data, CV_TRAIN_ERROR), dtree.calc_error( &data, CV_TEST_ERROR ), dtree.get_var_importance() );
if( categorical_response && count_classes(data) == 2 )
{
printf("======BOOST=====\n");
boost.train( &data, CvBoostParams(CvBoost::DISCRETE, 100, 0.95, 2, false, 0));
print_result( boost.calc_error( &data, CV_TRAIN_ERROR ), boost.calc_error( &data, CV_TEST_ERROR ), 0 ); //doesn't compute importance
}
printf("======RTREES=====\n");
rtrees.train( &data, CvRTParams( 10, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
print_result( rtrees.calc_error( &data, CV_TRAIN_ERROR), rtrees.calc_error( &data, CV_TEST_ERROR ), rtrees.get_var_importance() );
printf("======ERTREES=====\n");
ertrees.train( &data, CvRTParams( 18, 2, 0, false, 16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER ));
print_result( ertrees.calc_error( &data, CV_TRAIN_ERROR), ertrees.calc_error( &data, CV_TEST_ERROR ), ertrees.get_var_importance() );
printf("======GBTREES=====\n");
if (categorical_response)
gbtrees.train( &data, CvGBTreesParams(CvGBTrees::DEVIANCE_LOSS, 100, 0.1f, 0.8f, 5, false));
else
gbtrees.train( &data, CvGBTreesParams(CvGBTrees::SQUARED_LOSS, 100, 0.1f, 0.8f, 5, false));
print_result( gbtrees.calc_error( &data, CV_TRAIN_ERROR), gbtrees.calc_error( &data, CV_TEST_ERROR ), 0 ); //doesn't compute importance
printf("ERROR: File %s can not be read\n", filename);
return 0;
}
else
printf("File can not be read");
data->setTrainTestSplitRatio(train_test_split_ratio);
printf("======DTREE=====\n");
Ptr<DTrees> dtree = DTrees::create(DTrees::Params( 10, 2, 0, false, 16, 0, false, false, Mat() ));
train_and_print_errs(dtree, data);
if( (int)data->getClassLabels().total() <= 2 ) // regression or 2-class classification problem
{
printf("======BOOST=====\n");
Ptr<Boost> boost = Boost::create(Boost::Params(Boost::GENTLE, 100, 0.95, 2, false, Mat()));
train_and_print_errs(boost, data);
}
printf("======RTREES=====\n");
Ptr<RTrees> rtrees = RTrees::create(RTrees::Params(10, 2, 0, false, 16, Mat(), false, 0, TermCriteria(TermCriteria::MAX_ITER, 100, 0)));
train_and_print_errs(rtrees, data);
return 0;
}

View File

@ -4,29 +4,29 @@
#include <opencv2/ml/ml.hpp>
using namespace cv;
using namespace cv::ml;
int main()
int main(int, char**)
{
// Data for visual representation
int width = 512, height = 512;
Mat image = Mat::zeros(height, width, CV_8UC3);
// Set up training data
float labels[4] = {1.0, -1.0, -1.0, -1.0};
Mat labelsMat(4, 1, CV_32FC1, labels);
int labels[4] = {1, -1, -1, -1};
Mat labelsMat(4, 1, CV_32SC1, labels);
float trainingData[4][2] = { {501, 10}, {255, 10}, {501, 255}, {10, 501} };
Mat trainingDataMat(4, 2, CV_32FC1, trainingData);
// Set up SVM's parameters
CvSVMParams params;
params.svm_type = CvSVM::C_SVC;
params.kernel_type = CvSVM::LINEAR;
params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 1e-6);
SVM::Params params;
params.svmType = SVM::C_SVC;
params.kernelType = SVM::LINEAR;
params.termCrit = TermCriteria(TermCriteria::MAX_ITER, 100, 1e-6);
// Train the SVM
CvSVM SVM;
SVM.train(trainingDataMat, labelsMat, Mat(), Mat(), params);
Ptr<SVM> svm = StatModel::train<SVM>(trainingDataMat, ROW_SAMPLE, labelsMat, params);
Vec3b green(0,255,0), blue (255,0,0);
// Show the decision regions given by the SVM
@ -34,30 +34,30 @@ int main()
for (int j = 0; j < image.cols; ++j)
{
Mat sampleMat = (Mat_<float>(1,2) << j,i);
float response = SVM.predict(sampleMat);
float response = svm->predict(sampleMat);
if (response == 1)
image.at<Vec3b>(i,j) = green;
else if (response == -1)
image.at<Vec3b>(i,j) = blue;
image.at<Vec3b>(i,j) = blue;
}
// Show the training data
int thickness = -1;
int lineType = 8;
circle( image, Point(501, 10), 5, Scalar( 0, 0, 0), thickness, lineType);
circle( image, Point(255, 10), 5, Scalar(255, 255, 255), thickness, lineType);
circle( image, Point(501, 255), 5, Scalar(255, 255, 255), thickness, lineType);
circle( image, Point( 10, 501), 5, Scalar(255, 255, 255), thickness, lineType);
circle( image, Point(501, 10), 5, Scalar( 0, 0, 0), thickness, lineType );
circle( image, Point(255, 10), 5, Scalar(255, 255, 255), thickness, lineType );
circle( image, Point(501, 255), 5, Scalar(255, 255, 255), thickness, lineType );
circle( image, Point( 10, 501), 5, Scalar(255, 255, 255), thickness, lineType );
// Show support vectors
thickness = 2;
lineType = 8;
int c = SVM.get_support_vector_count();
Mat sv = svm->getSupportVectors();
for (int i = 0; i < c; ++i)
for (int i = 0; i < sv.rows; ++i)
{
const float* v = SVM.get_support_vector(i);
const float* v = sv.ptr<float>(i);
circle( image, Point( (int) v[0], (int) v[1]), 6, Scalar(128, 128, 128), thickness, lineType);
}

View File

@ -8,6 +8,7 @@
#define FRAC_LINEAR_SEP 0.9f // Fraction of samples which compose the linear separable part
using namespace cv;
using namespace cv::ml;
using namespace std;
static void help()
@ -30,7 +31,7 @@ int main()
//--------------------- 1. Set up training data randomly ---------------------------------------
Mat trainData(2*NTRAINING_SAMPLES, 2, CV_32FC1);
Mat labels (2*NTRAINING_SAMPLES, 1, CV_32FC1);
Mat labels (2*NTRAINING_SAMPLES, 1, CV_32SC1);
RNG rng(100); // Random value generation class
@ -71,16 +72,15 @@ int main()
labels.rowRange(NTRAINING_SAMPLES, 2*NTRAINING_SAMPLES).setTo(2); // Class 2
//------------------------ 2. Set up the support vector machines parameters --------------------
CvSVMParams params;
params.svm_type = SVM::C_SVC;
SVM::Params params;
params.svmType = SVM::C_SVC;
params.C = 0.1;
params.kernel_type = SVM::LINEAR;
params.term_crit = TermCriteria(CV_TERMCRIT_ITER, (int)1e7, 1e-6);
params.kernelType = SVM::LINEAR;
params.termCrit = TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6);
//------------------------ 3. Train the svm ----------------------------------------------------
cout << "Starting training process" << endl;
CvSVM svm;
svm.train(trainData, labels, Mat(), Mat(), params);
Ptr<SVM> svm = StatModel::train<SVM>(trainData, ROW_SAMPLE, labels, params);
cout << "Finished training process" << endl;
//------------------------ 4. Show the decision regions ----------------------------------------
@ -89,7 +89,7 @@ int main()
for (int j = 0; j < I.cols; ++j)
{
Mat sampleMat = (Mat_<float>(1,2) << i, j);
float response = svm.predict(sampleMat);
float response = svm->predict(sampleMat);
if (response == 1) I.at<Vec3b>(j, i) = green;
else if (response == 2) I.at<Vec3b>(j, i) = blue;
@ -117,11 +117,11 @@ int main()
//------------------------- 6. Show support vectors --------------------------------------------
thick = 2;
lineType = 8;
int x = svm.get_support_vector_count();
Mat sv = svm->getSupportVectors();
for (int i = 0; i < x; ++i)
for (int i = 0; i < sv.rows; ++i)
{
const float* v = svm.get_support_vector(i);
const float* v = sv.ptr<float>(i);
circle( I, Point( (int) v[0], (int) v[1]), 6, Scalar(128, 128, 128), thick, lineType);
}