2007-03-08 04:03:40 +08:00
|
|
|
|
/**********************************************************************
|
|
|
|
|
* File: linlsq.cpp (Formerly llsq.c)
|
|
|
|
|
* Description: Linear Least squares fitting code.
|
2018-07-01 00:28:24 +08:00
|
|
|
|
* Author: Ray Smith
|
2007-03-08 04:03:40 +08:00
|
|
|
|
*
|
|
|
|
|
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
|
|
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
** you may not use this file except in compliance with the License.
|
|
|
|
|
** You may obtain a copy of the License at
|
|
|
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
** See the License for the specific language governing permissions and
|
|
|
|
|
** limitations under the License.
|
|
|
|
|
*
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2018-09-19 00:51:11 +08:00
|
|
|
|
#include <cstdio>
|
2018-10-16 23:57:26 +08:00
|
|
|
|
#include <cmath> // for std::sqrt
|
2018-07-01 00:28:24 +08:00
|
|
|
|
#include "errcode.h"
|
|
|
|
|
#include "linlsq.h"
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
|
|
const ERRCODE EMPTY_LLSQ = "Can't delete from an empty LLSQ";
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
|
* LLSQ::clear
|
|
|
|
|
*
|
|
|
|
|
* Function to initialize a LLSQ.
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
void LLSQ::clear() { // initialize
|
|
|
|
|
total_weight = 0.0; // no elements
|
|
|
|
|
sigx = 0.0; // update accumulators
|
|
|
|
|
sigy = 0.0;
|
|
|
|
|
sigxx = 0.0;
|
|
|
|
|
sigxy = 0.0;
|
|
|
|
|
sigyy = 0.0;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
|
* LLSQ::add
|
|
|
|
|
*
|
|
|
|
|
* Add an element to the accumulator.
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
void LLSQ::add(double x, double y) { // add an element
|
|
|
|
|
total_weight++; // count elements
|
|
|
|
|
sigx += x; // update accumulators
|
2007-03-08 04:03:40 +08:00
|
|
|
|
sigy += y;
|
|
|
|
|
sigxx += x * x;
|
|
|
|
|
sigxy += x * y;
|
|
|
|
|
sigyy += y * y;
|
|
|
|
|
}
|
2011-03-22 05:44:45 +08:00
|
|
|
|
// Adds an element with a specified weight.
|
|
|
|
|
void LLSQ::add(double x, double y, double weight) {
|
|
|
|
|
total_weight += weight;
|
|
|
|
|
sigx += x * weight; // update accumulators
|
|
|
|
|
sigy += y * weight;
|
|
|
|
|
sigxx += x * x * weight;
|
|
|
|
|
sigxy += x * y * weight;
|
|
|
|
|
sigyy += y * y * weight;
|
|
|
|
|
}
|
|
|
|
|
// Adds a whole LLSQ.
|
|
|
|
|
void LLSQ::add(const LLSQ& other) {
|
|
|
|
|
total_weight += other.total_weight;
|
|
|
|
|
sigx += other.sigx; // update accumulators
|
|
|
|
|
sigy += other.sigy;
|
|
|
|
|
sigxx += other.sigxx;
|
|
|
|
|
sigxy += other.sigxy;
|
|
|
|
|
sigyy += other.sigyy;
|
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
|
* LLSQ::remove
|
|
|
|
|
*
|
|
|
|
|
* Delete an element from the acculuator.
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
void LLSQ::remove(double x, double y) { // delete an element
|
|
|
|
|
if (total_weight <= 0.0) // illegal
|
2016-12-13 00:20:05 +08:00
|
|
|
|
EMPTY_LLSQ.error("LLSQ::remove", ABORT, nullptr);
|
2011-03-22 05:44:45 +08:00
|
|
|
|
total_weight--; // count elements
|
|
|
|
|
sigx -= x; // update accumulators
|
2007-03-08 04:03:40 +08:00
|
|
|
|
sigy -= y;
|
|
|
|
|
sigxx -= x * x;
|
|
|
|
|
sigxy -= x * y;
|
|
|
|
|
sigyy -= y * y;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
|
* LLSQ::m
|
|
|
|
|
*
|
|
|
|
|
* Return the gradient of the line fit.
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
double LLSQ::m() const { // get gradient
|
|
|
|
|
double covar = covariance();
|
|
|
|
|
double x_var = x_variance();
|
|
|
|
|
if (x_var != 0.0)
|
|
|
|
|
return covar / x_var;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
else
|
2011-03-22 05:44:45 +08:00
|
|
|
|
return 0.0; // too little
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
|
* LLSQ::c
|
|
|
|
|
*
|
|
|
|
|
* Return the constant of the line fit.
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
double LLSQ::c(double m) const { // get constant
|
|
|
|
|
if (total_weight > 0.0)
|
|
|
|
|
return (sigy - m * sigx) / total_weight;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
else
|
2011-03-22 05:44:45 +08:00
|
|
|
|
return 0; // too little
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
|
|
|
* LLSQ::rms
|
|
|
|
|
*
|
|
|
|
|
* Return the rms error of the fit.
|
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
double LLSQ::rms(double m, double c) const { // get error
|
|
|
|
|
double error; // total error
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
if (total_weight > 0) {
|
|
|
|
|
error = sigyy + m * (m * sigxx + 2 * (c * sigx - sigxy)) + c *
|
|
|
|
|
(total_weight * c - 2 * sigy);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
if (error >= 0)
|
2018-10-16 23:57:26 +08:00
|
|
|
|
error = std::sqrt(error / total_weight); // sqrt of mean
|
2007-03-08 04:03:40 +08:00
|
|
|
|
else
|
|
|
|
|
error = 0;
|
2011-03-22 05:44:45 +08:00
|
|
|
|
} else {
|
|
|
|
|
error = 0; // too little
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************
|
2011-03-22 05:44:45 +08:00
|
|
|
|
* LLSQ::pearson
|
2007-03-08 04:03:40 +08:00
|
|
|
|
*
|
2011-03-22 05:44:45 +08:00
|
|
|
|
* Return the pearson product moment correlation coefficient.
|
2007-03-08 04:03:40 +08:00
|
|
|
|
**********************************************************************/
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
double LLSQ::pearson() const { // get correlation
|
2018-05-28 00:40:13 +08:00
|
|
|
|
double r = 0.0; // Correlation is 0 if insufficient data.
|
2007-03-08 04:03:40 +08:00
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
double covar = covariance();
|
|
|
|
|
if (covar != 0.0) {
|
|
|
|
|
double var_product = x_variance() * y_variance();
|
|
|
|
|
if (var_product > 0.0)
|
2018-10-16 23:57:26 +08:00
|
|
|
|
r = covar / std::sqrt(var_product);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
2011-03-22 05:44:45 +08:00
|
|
|
|
return r;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
// Returns the x,y means as an FCOORD.
|
|
|
|
|
FCOORD LLSQ::mean_point() const {
|
|
|
|
|
if (total_weight > 0.0) {
|
|
|
|
|
return FCOORD(sigx / total_weight, sigy / total_weight);
|
|
|
|
|
} else {
|
|
|
|
|
return FCOORD(0.0f, 0.0f);
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|
2011-03-22 05:44:45 +08:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-21 03:43:47 +08:00
|
|
|
|
// Returns the sqrt of the mean squared error measured perpendicular from the
|
|
|
|
|
// line through mean_point() in the direction dir.
|
|
|
|
|
//
|
|
|
|
|
// Derivation:
|
|
|
|
|
// Lemma: Let v and x_i (i=1..N) be a k-dimensional vectors (1xk matrices).
|
|
|
|
|
// Let % be dot product and ' be transpose. Note that:
|
|
|
|
|
// Sum[i=1..N] (v % x_i)^2
|
|
|
|
|
// = v * [x_1' x_2' ... x_N'] * [x_1' x_2' .. x_N']' * v'
|
|
|
|
|
// If x_i have average 0 we have:
|
|
|
|
|
// = v * (N * COVARIANCE_MATRIX(X)) * v'
|
|
|
|
|
// Expanded for the case that k = 2, where we treat the dimensions
|
|
|
|
|
// as x_i and y_i, this is:
|
|
|
|
|
// = v * (N * [VAR(X), COV(X,Y); COV(X,Y) VAR(Y)]) * v'
|
|
|
|
|
// Now, we are trying to calculate the mean squared error, where v is
|
|
|
|
|
// perpendicular to our line of interest:
|
|
|
|
|
// Mean squared error
|
|
|
|
|
// = E [ (v % (x_i - x_avg))) ^2 ]
|
|
|
|
|
// = Sum (v % (x_i - x_avg))^2 / N
|
|
|
|
|
// = v * N * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] / N * v'
|
|
|
|
|
// = v * [VAR(X) COV(X,Y); COV(X,Y) VAR(Y)] * v'
|
|
|
|
|
// = code below
|
|
|
|
|
double LLSQ::rms_orth(const FCOORD &dir) const {
|
|
|
|
|
FCOORD v = !dir;
|
|
|
|
|
v.normalise();
|
2018-10-16 23:57:26 +08:00
|
|
|
|
return std::sqrt(v.x() * v.x() * x_variance() +
|
|
|
|
|
2 * v.x() * v.y() * covariance() +
|
|
|
|
|
v.y() * v.y() * y_variance());
|
2013-09-21 03:43:47 +08:00
|
|
|
|
}
|
|
|
|
|
|
2011-03-22 05:44:45 +08:00
|
|
|
|
// Returns the direction of the fitted line as a unit vector, using the
|
|
|
|
|
// least mean squared perpendicular distance. The line runs through the
|
|
|
|
|
// mean_point, i.e. a point p on the line is given by:
|
|
|
|
|
// p = mean_point() + lambda * vector_fit() for some real number lambda.
|
|
|
|
|
// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
|
|
|
|
|
// and may be negated without changing its meaning.
|
2013-09-21 03:43:47 +08:00
|
|
|
|
// Fitting a line m + 𝜆v to a set of N points Pi = (xi, yi), where
|
|
|
|
|
// m is the mean point (𝝁, 𝝂) and
|
|
|
|
|
// v is the direction vector (cos𝜃, sin𝜃)
|
|
|
|
|
// The perpendicular distance of each Pi from the line is:
|
|
|
|
|
// (Pi - m) x v, where x is the scalar cross product.
|
|
|
|
|
// Total squared error is thus:
|
|
|
|
|
// E = ∑((xi - 𝝁)sin𝜃 - (yi - 𝝂)cos𝜃)²
|
|
|
|
|
// = ∑(xi - 𝝁)²sin²𝜃 - 2∑(xi - 𝝁)(yi - 𝝂)sin𝜃 cos𝜃 + ∑(yi - 𝝂)²cos²𝜃
|
|
|
|
|
// = NVar(xi)sin²𝜃 - 2NCovar(xi, yi)sin𝜃 cos𝜃 + NVar(yi)cos²𝜃 (Eq 1)
|
|
|
|
|
// where Var(xi) is the variance of xi,
|
|
|
|
|
// and Covar(xi, yi) is the covariance of xi, yi.
|
|
|
|
|
// Taking the derivative wrt 𝜃 and setting to 0 to obtain the min/max:
|
|
|
|
|
// 0 = 2NVar(xi)sin𝜃 cos𝜃 -2NCovar(xi, yi)(cos²𝜃 - sin²𝜃) -2NVar(yi)sin𝜃 cos𝜃
|
|
|
|
|
// => Covar(xi, yi)(cos²𝜃 - sin²𝜃) = (Var(xi) - Var(yi))sin𝜃 cos𝜃
|
|
|
|
|
// Using double angles:
|
|
|
|
|
// 2Covar(xi, yi)cos2𝜃 = (Var(xi) - Var(yi))sin2𝜃 (Eq 2)
|
|
|
|
|
// So 𝜃 = 0.5 atan2(2Covar(xi, yi), Var(xi) - Var(yi)) (Eq 3)
|
|
|
|
|
|
|
|
|
|
// Because it involves 2𝜃 , Eq 2 has 2 solutions 90 degrees apart, but which
|
|
|
|
|
// is the min and which is the max? From Eq1:
|
|
|
|
|
// E/N = Var(xi)sin²𝜃 - 2Covar(xi, yi)sin𝜃 cos𝜃 + Var(yi)cos²𝜃
|
|
|
|
|
// and 90 degrees away, using sin/cos equivalences:
|
|
|
|
|
// E'/N = Var(xi)cos²𝜃 + 2Covar(xi, yi)sin𝜃 cos𝜃 + Var(yi)sin²𝜃
|
|
|
|
|
// The second error is smaller (making it the minimum) iff
|
|
|
|
|
// E'/N < E/N ie:
|
|
|
|
|
// (Var(xi) - Var(yi))(cos²𝜃 - sin²𝜃) < -4Covar(xi, yi)sin𝜃 cos𝜃
|
|
|
|
|
// Using double angles:
|
|
|
|
|
// (Var(xi) - Var(yi))cos2𝜃 < -2Covar(xi, yi)sin2𝜃 (InEq 1)
|
|
|
|
|
// But atan2(2Covar(xi, yi), Var(xi) - Var(yi)) picks 2𝜃 such that:
|
|
|
|
|
// sgn(cos2𝜃) = sgn(Var(xi) - Var(yi)) and sgn(sin2𝜃) = sgn(Covar(xi, yi))
|
|
|
|
|
// so InEq1 can *never* be true, making the atan2 result *always* the min!
|
|
|
|
|
// In the degenerate case, where Covar(xi, yi) = 0 AND Var(xi) = Var(yi),
|
|
|
|
|
// the 2 solutions have equal error and the inequality is still false.
|
|
|
|
|
// Therefore the solution really is as trivial as Eq 3.
|
|
|
|
|
|
|
|
|
|
// This is equivalent to returning the Principal Component in PCA, or the
|
|
|
|
|
// eigenvector corresponding to the largest eigenvalue in the covariance
|
|
|
|
|
// matrix. However, atan2 is much simpler! The one reference I found that
|
|
|
|
|
// uses this formula is http://web.mit.edu/18.06/www/Essays/tlsfit.pdf but
|
|
|
|
|
// that is still a much more complex derivation. It seems Pearson had already
|
|
|
|
|
// found this simple solution in 1901.
|
|
|
|
|
// http://books.google.com/books?id=WXwvAQAAIAAJ&pg=PA559
|
2011-03-22 05:44:45 +08:00
|
|
|
|
FCOORD LLSQ::vector_fit() const {
|
|
|
|
|
double x_var = x_variance();
|
|
|
|
|
double y_var = y_variance();
|
|
|
|
|
double covar = covariance();
|
2013-09-21 03:43:47 +08:00
|
|
|
|
double theta = 0.5 * atan2(2.0 * covar, x_var - y_var);
|
|
|
|
|
FCOORD result(cos(theta), sin(theta));
|
2011-03-22 05:44:45 +08:00
|
|
|
|
return result;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
}
|