tesseract/ccstruct/detlinefit.h
theraysmith 903a4ffe9d Changes to ccstruct for 3.00
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@289 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2009-07-11 02:14:57 +00:00

81 lines
3.4 KiB
C++

///////////////////////////////////////////////////////////////////////
// File: detlinefit.h
// Description: Deterministic least upper-quartile squares line fitting.
// Author: Ray Smith
// Created: Thu Feb 28 14:35:01 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_CCSTRUCT_DETLINEFIT_H_
#define TESSERACT_CCSTRUCT_DETLINEFIT_H_
#include "points.h"
namespace tesseract {
// This class fits a line to a set of ICOORD points.
// There is no restriction on the direction of the line, as it
// uses a vector method, ie no concern over infinite gradients.
// The fitted line has the least upper quartile of squares of perpendicular
// distances of all source points from the line, subject to the constraint
// that the line is made from one of the pairs of [{p1,p2,p3},{pn-2, pn-1, pn}]
// i.e. the 9 combinations of one of the first 3 and last 3 points.
// A fundamental assumption of this algorithm is that one of the first 3 and
// one of the last 3 points are near the best line fit.
// The points must be Added in line order for the algorithm to work properly.
// No floating point calculations are needed* to make an accurate fit,
// and no random numbers are needed** so the algorithm is deterministic,
// architecture-stable, and compiler-stable as well as stable to minor
// changes in the input.
// *A single floating point division is used to compute each line's distance.
// This is unlikely to result in choice of a different line, but if it does,
// it would be easy to replace with a 64 bit integer calculation.
// **Random numbers are used in the nth_item function, but the worst
// non-determinism that can result is picking a different result among equals,
// and that wouldn't make any difference to the end-result distance, so the
// randomness does not affect the determinism of the algorithm. The random
// numbers are only there to guarantee average linear time.
// Fitting time is linear, but with a high constant, as it tries 9 different
// lines and computes the distance of all points each time.
// This class is aimed at replacing the LLSQ (linear least squares) and
// LMS (least median of squares) classes that are currently used for most
// of the line fitting in Tesseract.
class DetLineFit {
public:
DetLineFit();
~DetLineFit();
// Delete all Added points.
void Clear();
// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
// Add must be called on points in sequence along the line.
void Add(const ICOORD& pt);
// Fit a line to the points, returning the fitted line as a pair of
// points, and the upper quartile error.
double Fit(ICOORD* pt1, ICOORD* pt2);
private:
double ComputeErrors(const ICOORD start, const ICOORD end, int* distances);
ICOORDELT_LIST pt_list_; // All the added points.
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_DETLINEFIT_H_