mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 18:27:48 +08:00
42144b9698
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@870 d0cd1f9f-072b-0410-8dd7-cf729c803f20
135 lines
4.7 KiB
C++
135 lines
4.7 KiB
C++
/**********************************************************************
|
|
* File: linlsq.h (Formerly llsq.h)
|
|
* Description: Linear Least squares fitting code.
|
|
* Author: Ray Smith
|
|
* Created: Thu Sep 12 08:44:51 BST 1991
|
|
*
|
|
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
#ifndef TESSERACT_CCSTRUCT_LINLSQ_H_
|
|
#define TESSERACT_CCSTRUCT_LINLSQ_H_
|
|
|
|
#include "points.h"
|
|
#include "params.h"
|
|
|
|
class LLSQ {
|
|
public:
|
|
LLSQ() { // constructor
|
|
clear(); // set to zeros
|
|
}
|
|
void clear(); // initialize
|
|
|
|
// Adds an element with a weight of 1.
|
|
void add(double x, double y);
|
|
// Adds an element with a specified weight.
|
|
void add(double x, double y, double weight);
|
|
// Adds a whole LLSQ.
|
|
void add(const LLSQ& other);
|
|
// Deletes an element with a weight of 1.
|
|
void remove(double x, double y);
|
|
inT32 count() const { // no of elements
|
|
return static_cast<int>(total_weight + 0.5);
|
|
}
|
|
|
|
double m() const; // get gradient
|
|
double c(double m) const; // get constant
|
|
double rms(double m, double c) const; // get error
|
|
double pearson() const; // get correlation coefficient.
|
|
|
|
// Returns the x,y means as an FCOORD.
|
|
FCOORD mean_point() const;
|
|
|
|
// Returns the average sum of squared perpendicular error from a line
|
|
// through mean_point() in the direction dir.
|
|
double rms_orth(const FCOORD &dir) const;
|
|
|
|
// Returns the direction of the fitted line as a unit vector, using the
|
|
// least mean squared perpendicular distance. The line runs through the
|
|
// mean_point, i.e. a point p on the line is given by:
|
|
// p = mean_point() + lambda * vector_fit() for some real number lambda.
|
|
// Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
|
|
// and may be negated without changing its meaning, since a line is only
|
|
// unique to a range of pi radians.
|
|
// Modernists prefer to think of this as an Eigenvalue problem, but
|
|
// Pearson had the simple solution in 1901.
|
|
//
|
|
// Note that this is equivalent to returning the Principal Component in PCA,
|
|
// or the eigenvector corresponding to the largest eigenvalue in the
|
|
// covariance matrix.
|
|
FCOORD vector_fit() const;
|
|
|
|
// Returns the covariance.
|
|
double covariance() const {
|
|
if (total_weight > 0.0)
|
|
return (sigxy - sigx * sigy / total_weight) / total_weight;
|
|
else
|
|
return 0.0;
|
|
}
|
|
double x_variance() const {
|
|
if (total_weight > 0.0)
|
|
return (sigxx - sigx * sigx / total_weight) / total_weight;
|
|
else
|
|
return 0.0;
|
|
}
|
|
double y_variance() const {
|
|
if (total_weight > 0.0)
|
|
return (sigyy - sigy * sigy / total_weight) / total_weight;
|
|
else
|
|
return 0.0;
|
|
}
|
|
|
|
private:
|
|
double total_weight; // no of elements or sum of weights.
|
|
double sigx; // sum of x
|
|
double sigy; // sum of y
|
|
double sigxx; // sum x squared
|
|
double sigxy; // sum of xy
|
|
double sigyy; // sum y squared
|
|
};
|
|
|
|
|
|
// Returns the median value of the vector, given that the values are
|
|
// circular, with the given modulus. Values may be signed or unsigned,
|
|
// eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi).
|
|
// NOTE that the array is shuffled, but the time taken is linear.
|
|
// An assumption is made that most of the values are spread over no more than
|
|
// half the range, but wrap-around is accounted for if the median is near
|
|
// the wrap-around point.
|
|
// Cannot be a member of GenericVector, as it makes heavy used of LLSQ.
|
|
// T must be an integer or float/double type.
|
|
template<typename T> T MedianOfCircularValues(T modulus, GenericVector<T>* v) {
|
|
LLSQ stats;
|
|
T halfrange = static_cast<T>(modulus / 2);
|
|
int num_elements = v->size();
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
stats.add((*v)[i], (*v)[i] + halfrange);
|
|
}
|
|
bool offset_needed = stats.y_variance() < stats.x_variance();
|
|
if (offset_needed) {
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
(*v)[i] += halfrange;
|
|
}
|
|
}
|
|
int median_index = v->choose_nth_item(num_elements / 2);
|
|
if (offset_needed) {
|
|
for (int i = 0; i < num_elements; ++i) {
|
|
(*v)[i] -= halfrange;
|
|
}
|
|
}
|
|
return (*v)[median_index];
|
|
}
|
|
|
|
|
|
#endif // TESSERACT_CCSTRUCT_LINLSQ_H_
|