From dc1b01e002e7d8dae898264e5e98663a2115a140 Mon Sep 17 00:00:00 2001 From: Abe Friesen Date: Mon, 2 Nov 2015 16:16:49 -0800 Subject: [PATCH 1/3] - LogisticRegressionImpl::predict() was changed to return the predicted value and to only write to the OutputArray results if specified (no longer segfaults). - Refactored batch and mini_batch training to use a common gradient computation function (removed duplicate code). - Altered the cost computation so that NAN is not computed unnecessarily. - Greatly simplified (and sped up) the code that appends a column of 1s to the data. - Minor code cleanup. --- modules/ml/src/lr.cpp | 195 +++++++++++++++++++----------------------- 1 file changed, 89 insertions(+), 106 deletions(-) diff --git a/modules/ml/src/lr.cpp b/modules/ml/src/lr.cpp index 24fc29f67c..9bef74c4be 100644 --- a/modules/ml/src/lr.cpp +++ b/modules/ml/src/lr.cpp @@ -108,8 +108,9 @@ public: protected: Mat calc_sigmoid(const Mat& data) const; double compute_cost(const Mat& _data, const Mat& _labels, const Mat& _init_theta); - Mat compute_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta); - Mat compute_mini_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta); + void compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient ); + Mat batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta); + Mat mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta); bool set_label_map(const Mat& _labels_i); Mat remap_labels(const Mat& _labels_i, const map& lmap) const; protected: @@ -156,13 +157,8 @@ bool LogisticRegressionImpl::train(const Ptr& trainData, int) int num_classes = (int) this->forward_mapper.size(); // add a column of ones - Mat data_t = Mat::zeros(_data_i.rows, _data_i.cols+1, CV_32F); - vconcat(Mat(_data_i.rows, 1, _data_i.type(), Scalar::all(1.0)), data_t.col(0)); - - for (int i=1;i& trainData, int) { labels_l.convertTo(labels, CV_32F); if(this->params.train_method == LogisticRegression::BATCH) - new_theta = compute_batch_gradient(data_t, labels, init_theta); + new_theta = batch_gradient_descent(data_t, labels, init_theta); else - new_theta = compute_mini_batch_gradient(data_t, labels, init_theta); + new_theta = mini_batch_gradient_descent(data_t, labels, init_theta); thetas = new_theta.t(); } else @@ -204,9 +200,9 @@ bool LogisticRegressionImpl::train(const Ptr& trainData, int) new_local_labels = (labels_l == it->second)/255; new_local_labels.convertTo(labels, CV_32F); if(this->params.train_method == LogisticRegression::BATCH) - new_theta = compute_batch_gradient(data_t, labels, init_theta); + new_theta = batch_gradient_descent(data_t, labels, init_theta); else - new_theta = compute_mini_batch_gradient(data_t, labels, init_theta); + new_theta = mini_batch_gradient_descent(data_t, labels, init_theta); hconcat(new_theta.t(), thetas.row(ii)); ii += 1; } @@ -221,13 +217,15 @@ bool LogisticRegressionImpl::train(const Ptr& trainData, int) return ok; } -float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int) const +float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, int flags) const { /* returns a class of the predicted class class names can be 1,2,3,4, .... etc */ Mat thetas, data, pred_labs; data = samples.getMat(); + const bool rawout = flags & StatModel::RAW_OUTPUT; + // check if learnt_mats array is populated if(this->learnt_thetas.total()<=0) { @@ -239,18 +237,17 @@ float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, i } // add a column of ones - Mat data_t = Mat::zeros(data.rows, data.cols+1, CV_32F); - for (int i=0;ilearnt_thetas.convertTo(thetas, CV_32F); + if ( learnt_thetas.type() == CV_32F ) + { + thetas = learnt_thetas; + } + else + { + this->learnt_thetas.convertTo( thetas, CV_32F ); + } CV_Assert(thetas.rows > 0); @@ -292,9 +289,21 @@ float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, i pred_labs = remap_labels(labels_c, this->reverse_mapper); // convert pred_labs to integer type pred_labs.convertTo(pred_labs, CV_32S); - pred_labs.copyTo(results); - // TODO: determine - return 0; + + // return either the labels or the raw output + if ( results.needed() ) + { + if ( rawout ) + { + pred_m.copyTo( results ); + } + else + { + pred_labs.copyTo(results); + } + } + + return ( pred_labs.empty() ? 0 : pred_labs.at< int >( 0 ) ); } Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const @@ -320,7 +329,6 @@ double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels n = _data.cols; theta_b = _init_theta(Range(1, n), Range::all()); - multiply(theta_b, theta_b, theta_c, 1); if (params.norm != REG_DISABLE) { @@ -334,31 +342,66 @@ double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels else { // assuming it to be L2 by default + multiply(theta_b, theta_b, theta_c, 1); rparameter = (llambda/(2*m)) * sum(theta_c)[0]; } - d_a = calc_sigmoid(_data* _init_theta); - - + d_a = calc_sigmoid(_data * _init_theta); log(d_a, d_a); multiply(d_a, _labels, d_a); - d_b = 1 - calc_sigmoid(_data * _init_theta); + // use the fact that: log(1 - sigmoid(x)) = log(sigmoid(-x)) + d_b = calc_sigmoid(- _data * _init_theta); log(d_b, d_b); multiply(d_b, 1-_labels, d_b); + double sda = sum(d_a)[0]; + double sdb = sum(d_b)[0]; + cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]); cost = cost + rparameter; + if(cvIsNaN( cost ) == 1) + { + CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" ); + } + return cost; } -Mat LogisticRegressionImpl::compute_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta) + +void LogisticRegressionImpl::compute_gradient(const Mat& _data, const Mat& _labels, const Mat &_theta, const double _lambda, Mat & _gradient ) +{ + const int m = _data.rows; + Mat pcal_a, pcal_b, pcal_ab; + + const Mat z = _data * _theta; + + CV_Assert( _gradient.rows == _theta.rows && _gradient.cols == _theta.cols ); + + pcal_a = calc_sigmoid(z) - _labels; + pcal_b = _data(Range::all(), Range(0,1)); + multiply(pcal_a, pcal_b, pcal_ab, 1); + + _gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0]; + + //cout<<"for each training data entry"<params.alpha<=0) { - CV_Error( CV_StsBadArg, "check training parameters for the classifier" ); + CV_Error( CV_StsBadArg, "check training parameters (learning rate) for the classifier" ); } if(this->params.num_iters <= 0) @@ -369,11 +412,8 @@ Mat LogisticRegressionImpl::compute_batch_gradient(const Mat& _data, const Mat& int llambda = 0; double ccost; int m, n; - Mat pcal_a; - Mat pcal_b; - Mat pcal_ab; - Mat gradient; Mat theta_p = _init_theta.clone(); + Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() ); m = _data.rows; n = _data.cols; @@ -384,45 +424,17 @@ Mat LogisticRegressionImpl::compute_batch_gradient(const Mat& _data, const Mat& for(int i = 0;iparams.num_iters;i++) { + // this seems to only be called to ensure that cost is not NaN ccost = compute_cost(_data, _labels, theta_p); - if( cvIsNaN( ccost ) ) - { - CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" ); - } - - pcal_b = calc_sigmoid((_data*theta_p) - _labels); - - pcal_a = (static_cast(1/m)) * _data.t(); - - gradient = pcal_a * pcal_b; - - pcal_a = calc_sigmoid(_data*theta_p) - _labels; - - pcal_b = _data(Range::all(), Range(0,1)); - - multiply(pcal_a, pcal_b, pcal_ab, 1); - - gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0]; - - pcal_b = _data(Range::all(), Range(1,n)); - - //cout<<"for each training data entry"<(this->params.alpha)/m)*gradient; } return theta_p; } -Mat LogisticRegressionImpl::compute_mini_batch_gradient(const Mat& _data, const Mat& _labels, const Mat& _init_theta) +Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const Mat& _labels, const Mat& _init_theta) { // implements batch gradient descent int lambda_l = 0; @@ -441,11 +453,8 @@ Mat LogisticRegressionImpl::compute_mini_batch_gradient(const Mat& _data, const CV_Error( CV_StsBadArg, "number of iterations cannot be zero or a negative number" ); } - Mat pcal_a; - Mat pcal_b; - Mat pcal_ab; - Mat gradient; Mat theta_p = _init_theta.clone(); + Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() ); Mat data_d; Mat labels_l; @@ -470,44 +479,18 @@ Mat LogisticRegressionImpl::compute_mini_batch_gradient(const Mat& _data, const m = data_d.rows; n = data_d.cols; + // this seems to only be called to ensure that cost is not NaN ccost = compute_cost(data_d, labels_l, theta_p); - if( cvIsNaN( ccost ) == 1) - { - CV_Error( CV_StsBadArg, "check training parameters. Invalid training classifier" ); - } - - pcal_b = calc_sigmoid((data_d*theta_p) - labels_l); - - pcal_a = (static_cast(1/m)) * data_d.t(); - - gradient = pcal_a * pcal_b; - - pcal_a = calc_sigmoid(data_d*theta_p) - labels_l; - - pcal_b = data_d(Range::all(), Range(0,1)); - - multiply(pcal_a, pcal_b, pcal_ab, 1); - - gradient.row(0) = ((float)1/m) * sum(pcal_ab)[0]; - - pcal_b = data_d(Range::all(), Range(1,n)); - - for(int k = 1;k(this->params.alpha)/m)*gradient; - j+=this->params.mini_batch_size; + j += this->params.mini_batch_size; - if(j+size_b>_data.rows) - { - // if parsed through all data variables - break; + // if parsed through all data variables + if (j >= _data.rows) { + j = 0; } } return theta_p; From db78fcd11acfe07f8ca852018421b54298315b3c Mon Sep 17 00:00:00 2001 From: Abe Friesen Date: Mon, 2 Nov 2015 17:17:35 -0800 Subject: [PATCH 2/3] Removed unused variables. --- modules/ml/src/lr.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/modules/ml/src/lr.cpp b/modules/ml/src/lr.cpp index 9bef74c4be..988aa313ed 100644 --- a/modules/ml/src/lr.cpp +++ b/modules/ml/src/lr.cpp @@ -355,9 +355,6 @@ double LogisticRegressionImpl::compute_cost(const Mat& _data, const Mat& _labels log(d_b, d_b); multiply(d_b, 1-_labels, d_b); - double sda = sum(d_a)[0]; - double sdb = sum(d_b)[0]; - cost = (-1.0/m) * (sum(d_a)[0] + sum(d_b)[0]); cost = cost + rparameter; @@ -410,12 +407,10 @@ Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& } int llambda = 0; - double ccost; - int m, n; + int m; Mat theta_p = _init_theta.clone(); Mat gradient( theta_p.rows, theta_p.cols, theta_p.type() ); m = _data.rows; - n = _data.cols; if (params.norm != REG_DISABLE) { @@ -425,7 +420,7 @@ Mat LogisticRegressionImpl::batch_gradient_descent(const Mat& _data, const Mat& for(int i = 0;iparams.num_iters;i++) { // this seems to only be called to ensure that cost is not NaN - ccost = compute_cost(_data, _labels, theta_p); + compute_cost(_data, _labels, theta_p); compute_gradient( _data, _labels, theta_p, llambda, gradient ); @@ -438,8 +433,7 @@ Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const { // implements batch gradient descent int lambda_l = 0; - double ccost; - int m, n; + int m; int j = 0; int size_b = this->params.mini_batch_size; @@ -477,10 +471,9 @@ Mat LogisticRegressionImpl::mini_batch_gradient_descent(const Mat& _data, const } m = data_d.rows; - n = data_d.cols; // this seems to only be called to ensure that cost is not NaN - ccost = compute_cost(data_d, labels_l, theta_p); + compute_cost(data_d, labels_l, theta_p); compute_gradient(data_d, labels_l, theta_p, lambda_l, gradient); From d367e159c13ccb39fb44a236b52615d898fc7ecb Mon Sep 17 00:00:00 2001 From: Abe Friesen Date: Mon, 2 Nov 2015 17:36:27 -0800 Subject: [PATCH 3/3] Added cast to float to remove warning --- modules/ml/src/lr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ml/src/lr.cpp b/modules/ml/src/lr.cpp index 988aa313ed..a378947914 100644 --- a/modules/ml/src/lr.cpp +++ b/modules/ml/src/lr.cpp @@ -303,7 +303,7 @@ float LogisticRegressionImpl::predict(InputArray samples, OutputArray results, i } } - return ( pred_labs.empty() ? 0 : pred_labs.at< int >( 0 ) ); + return ( pred_labs.empty() ? 0.f : (float) pred_labs.at< int >( 0 ) ); } Mat LogisticRegressionImpl::calc_sigmoid(const Mat& data) const