mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-25 03:29:05 +08:00
af319b4d90
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@883 d0cd1f9f-072b-0410-8dd7-cf729c803f20
873 lines
35 KiB
C++
873 lines
35 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: baselinedetect.cpp
|
|
// Description: Initial Baseline Determination.
|
|
// Copyright 2012 Google Inc. All Rights Reserved.
|
|
// Author: rays@google.com (Ray Smith)
|
|
// Created: Mon Apr 30 10:15:31 PDT 2012
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef _MSC_VER
|
|
#define _USE_MATH_DEFINES
|
|
#endif // _MSC_VER
|
|
|
|
#include "baselinedetect.h"
|
|
|
|
#include <math.h>
|
|
#include "allheaders.h"
|
|
#include "blobbox.h"
|
|
#include "detlinefit.h"
|
|
#include "drawtord.h"
|
|
#include "helpers.h"
|
|
#include "linlsq.h"
|
|
#include "makerow.h"
|
|
#include "textord.h"
|
|
#include "tprintf.h"
|
|
#include "underlin.h"
|
|
|
|
// Number of displacement modes kept in displacement_modes_;
|
|
const int kMaxDisplacementsModes = 3;
|
|
// Number of points to skip when retrying initial fit.
|
|
const int kNumSkipPoints = 3;
|
|
// Max angle deviation (in radians) allowed to keep the independent baseline.
|
|
const double kMaxSkewDeviation = 1.0 / 64;
|
|
// Fraction of line spacing estimate for quantization of blob displacements.
|
|
const double kOffsetQuantizationFactor = 3.0 / 64;
|
|
// Fraction of line spacing estimate for computing blob fit error.
|
|
const double kFitHalfrangeFactor = 6.0 / 64;
|
|
// Max fraction of line spacing allowed before a baseline counts as badly fitting.
|
|
const double kMaxBaselineError = 3.0 / 64;
|
|
// Multiple of linespacing that sets max_blob_size in TO_BLOCK.
|
|
// Copied from textord_excess_blobsize.
|
|
const double kMaxBlobSizeMultiple = 1.3;
|
|
// Min fraction of linespacing gaps that should be close to the model before
|
|
// we will force the linespacing model on all the lines.
|
|
const double kMinFittingLinespacings = 0.25;
|
|
// A y-coordinate within a textline that is to be debugged.
|
|
//#define kDebugYCoord 1525
|
|
|
|
namespace tesseract {
|
|
|
|
BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
|
|
: blobs_(to_row->blob_list()),
|
|
baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
|
|
baseline_error_(0.0), good_baseline_(false) {
|
|
ComputeBoundingBox();
|
|
// Compute a scale factor for rounding to ints.
|
|
disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
|
|
fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
|
|
max_baseline_error_ = kMaxBaselineError * line_spacing;
|
|
}
|
|
|
|
// Sets the TO_ROW with the output straight line.
|
|
void BaselineRow::SetupOldLineParameters(TO_ROW* row) const {
|
|
// TODO(rays) get rid of this when m and c are no longer used.
|
|
double gradient = tan(BaselineAngle());
|
|
// para_c is the actual intercept of the baseline on the y-axis.
|
|
float para_c = StraightYAtX(0.0);
|
|
row->set_line(gradient, para_c, baseline_error_);
|
|
row->set_parallel_line(gradient, para_c, baseline_error_);
|
|
}
|
|
|
|
// Outputs diagnostic information.
|
|
void BaselineRow::Print() const {
|
|
tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
|
|
baseline_pt1_.x(), baseline_pt1_.y(),
|
|
baseline_pt2_.x(), baseline_pt2_.y(),
|
|
BaselineAngle(), StraightYAtX(0.0));
|
|
tprintf("Quant factor=%g, error=%g, good=%d, box:",
|
|
disp_quant_factor_, baseline_error_, good_baseline_);
|
|
bounding_box_.print();
|
|
}
|
|
|
|
// Returns the skew angle (in radians) of the current baseline in [-pi,pi].
|
|
double BaselineRow::BaselineAngle() const {
|
|
FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
|
|
double angle = baseline_dir.angle();
|
|
// Baseline directions are only unique in a range of pi so constrain to
|
|
// [-pi/2, pi/2].
|
|
return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
|
|
}
|
|
|
|
// Computes and returns the linespacing at the middle of the overlap
|
|
// between this and other.
|
|
double BaselineRow::SpaceBetween(const BaselineRow& other) const {
|
|
// Find the x-centre of overlap of the lines.
|
|
float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) +
|
|
MIN(bounding_box_.right(), other.bounding_box_.right())) / 2;
|
|
// Find the vertical centre between them.
|
|
float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
|
|
// Find the perpendicular distance of (x,y) from each line.
|
|
FCOORD pt(x, y);
|
|
return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
|
|
}
|
|
|
|
// Computes and returns the displacement of the center of the line
|
|
// perpendicular to the given direction.
|
|
double BaselineRow::PerpDisp(const FCOORD& direction) const {
|
|
float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
|
|
FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
|
|
return direction * middle_pos / direction.length();
|
|
}
|
|
|
|
// Computes the y coordinate at the given x using the straight baseline
|
|
// defined by baseline_pt1_ and baseline_pt2__.
|
|
double BaselineRow::StraightYAtX(double x) const {
|
|
double denominator = baseline_pt2_.x() - baseline_pt1_.x();
|
|
if (denominator == 0.0)
|
|
return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
|
|
return baseline_pt1_.y() +
|
|
(x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
|
|
denominator;
|
|
}
|
|
|
|
// Fits a straight baseline to the points. Returns true if it had enough
|
|
// points to be reasonably sure of the fitted baseline.
|
|
// If use_box_bottoms is false, baselines positions are formed by
|
|
// considering the outlines of the blobs.
|
|
bool BaselineRow::FitBaseline(bool use_box_bottoms) {
|
|
// Deterministic fitting is used wherever possible.
|
|
fitter_.Clear();
|
|
// Linear least squares is a backup if the DetLineFit produces a bad line.
|
|
LLSQ llsq;
|
|
BLOBNBOX_IT blob_it(blobs_);
|
|
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
BLOBNBOX* blob = blob_it.data();
|
|
if (!use_box_bottoms) blob->EstimateBaselinePosition();
|
|
const TBOX& box = blob->bounding_box();
|
|
int x_middle = (box.left() + box.right()) / 2;
|
|
#ifdef kDebugYCoord
|
|
if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
|
|
tprintf("Box bottom = %d, baseline pos=%d for box at:",
|
|
box.bottom(), blob->baseline_position());
|
|
box.print();
|
|
}
|
|
#endif
|
|
fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
|
|
llsq.add(x_middle, blob->baseline_position());
|
|
}
|
|
// Fit the line.
|
|
ICOORD pt1, pt2;
|
|
baseline_error_ = fitter_.Fit(&pt1, &pt2);
|
|
baseline_pt1_ = pt1;
|
|
baseline_pt2_ = pt2;
|
|
if (baseline_error_ > max_baseline_error_ &&
|
|
fitter_.SufficientPointsForIndependentFit()) {
|
|
// The fit was bad but there were plenty of points, so try skipping
|
|
// the first and last few, and use the new line if it dramatically improves
|
|
// the error of fit.
|
|
double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
|
|
if (error < baseline_error_ / 2.0) {
|
|
baseline_error_ = error;
|
|
baseline_pt1_ = pt1;
|
|
baseline_pt2_ = pt2;
|
|
}
|
|
}
|
|
int debug = 0;
|
|
#ifdef kDebugYCoord
|
|
Print();
|
|
debug = bounding_box_.bottom() < kDebugYCoord &&
|
|
bounding_box_.top() > kDebugYCoord
|
|
? 3 : 2;
|
|
#endif
|
|
// Now we obtained a direction from that fit, see if we can improve the
|
|
// fit using the same direction and some other start point.
|
|
FCOORD direction(pt2 - pt1);
|
|
double target_offset = direction * pt1;
|
|
good_baseline_ = false;
|
|
FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
|
|
// Wild lines can be produced because DetLineFit allows vertical lines, but
|
|
// vertical text has been rotated so angles over pi/4 should be disallowed.
|
|
// Near vertical lines can still be produced by vertically aligned components
|
|
// on very short lines.
|
|
double angle = BaselineAngle();
|
|
if (fabs(angle) > M_PI * 0.25) {
|
|
// Use the llsq fit as a backup.
|
|
baseline_pt1_ = llsq.mean_point();
|
|
baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
|
|
// TODO(rays) get rid of this when m and c are no longer used.
|
|
double m = llsq.m();
|
|
double c = llsq.c(m);
|
|
baseline_error_ = llsq.rms(m, c);
|
|
good_baseline_ = false;
|
|
}
|
|
return good_baseline_;
|
|
}
|
|
|
|
// Modifies an existing result of FitBaseline to be parallel to the given
|
|
// direction vector if that produces a better result.
|
|
void BaselineRow::AdjustBaselineToParallel(int debug,
|
|
const FCOORD& direction) {
|
|
SetupBlobDisplacements(direction);
|
|
if (displacement_modes_.empty())
|
|
return;
|
|
#ifdef kDebugYCoord
|
|
if (bounding_box_.bottom() < kDebugYCoord &&
|
|
bounding_box_.top() > kDebugYCoord && debug < 3)
|
|
debug = 3;
|
|
#endif
|
|
FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
|
|
}
|
|
|
|
// Modifies the baseline to snap to the textline grid if the existing
|
|
// result is not good enough.
|
|
double BaselineRow::AdjustBaselineToGrid(int debug,
|
|
const FCOORD& direction,
|
|
double line_spacing,
|
|
double line_offset) {
|
|
if (blobs_->empty()) {
|
|
if (debug > 1) {
|
|
tprintf("Row empty at:");
|
|
bounding_box_.print();
|
|
}
|
|
return line_offset;
|
|
}
|
|
// Find the displacement_modes_ entry nearest to the grid.
|
|
double best_error = 0.0;
|
|
int best_index = -1;
|
|
for (int i = 0; i < displacement_modes_.size(); ++i) {
|
|
double blob_y = displacement_modes_[i];
|
|
double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
|
|
line_offset);
|
|
if (debug > 1) {
|
|
tprintf("Mode at %g has error %g from model \n", blob_y, error);
|
|
}
|
|
if (best_index < 0 || error < best_error) {
|
|
best_error = error;
|
|
best_index = i;
|
|
}
|
|
}
|
|
// We will move the baseline only if the chosen mode is close enough to the
|
|
// model.
|
|
double model_margin = max_baseline_error_ - best_error;
|
|
if (best_index >= 0 && model_margin > 0.0) {
|
|
// But if the current baseline is already close to the mode there is no
|
|
// point, and only the potential to damage accuracy by changing its angle.
|
|
double perp_disp = PerpDisp(direction);
|
|
double shift = displacement_modes_[best_index] - perp_disp;
|
|
if (fabs(shift) > max_baseline_error_) {
|
|
if (debug > 1) {
|
|
tprintf("Attempting linespacing model fit with mode %g to row at:",
|
|
displacement_modes_[best_index]);
|
|
bounding_box_.print();
|
|
}
|
|
FitConstrainedIfBetter(debug, direction, model_margin,
|
|
displacement_modes_[best_index]);
|
|
} else if (debug > 1) {
|
|
tprintf("Linespacing model only moves current line by %g for row at:",
|
|
shift);
|
|
bounding_box_.print();
|
|
}
|
|
} else if (debug > 1) {
|
|
tprintf("Linespacing model not close enough to any mode for row at:");
|
|
bounding_box_.print();
|
|
}
|
|
return fmod(PerpDisp(direction), line_spacing);
|
|
}
|
|
|
|
// Sets up displacement_modes_ with the top few modes of the perpendicular
|
|
// distance of each blob from the given direction vector, after rounding.
|
|
void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
|
|
// Set of perpendicular displacements of the blob bottoms from the required
|
|
// baseline direction.
|
|
GenericVector<double> perp_blob_dists;
|
|
displacement_modes_.truncate(0);
|
|
// Gather the skew-corrected position of every blob.
|
|
double min_dist = MAX_FLOAT32;
|
|
double max_dist = -MAX_FLOAT32;
|
|
BLOBNBOX_IT blob_it(blobs_);
|
|
bool debug = false;
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
BLOBNBOX* blob = blob_it.data();
|
|
const TBOX& box = blob->bounding_box();
|
|
#ifdef kDebugYCoord
|
|
if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
|
|
#endif
|
|
FCOORD blob_pos((box.left() + box.right()) / 2.0f,
|
|
blob->baseline_position());
|
|
double offset = direction * blob_pos;
|
|
perp_blob_dists.push_back(offset);
|
|
if (debug) {
|
|
tprintf("Displacement %g for blob at:", offset);
|
|
box.print();
|
|
}
|
|
UpdateRange(offset, &min_dist, &max_dist);
|
|
}
|
|
// Set up a histogram using disp_quant_factor_ as the bucket size.
|
|
STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
|
|
IntCastRounded(max_dist / disp_quant_factor_) + 1);
|
|
for (int i = 0; i < perp_blob_dists.size(); ++i) {
|
|
dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
|
|
}
|
|
GenericVector<KDPairInc<float, int> > scaled_modes;
|
|
dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
|
|
if (debug) {
|
|
for (int i = 0; i < scaled_modes.size(); ++i) {
|
|
tprintf("Top mode = %g * %d\n",
|
|
scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
|
|
}
|
|
}
|
|
for (int i = 0; i < scaled_modes.size(); ++i)
|
|
displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
|
|
}
|
|
|
|
// Fits a line in the given direction to blobs that are close to the given
|
|
// target_offset perpendicular displacement from the direction. The fit
|
|
// error is allowed to be cheat_allowance worse than the existing fit, and
|
|
// will still be used.
|
|
// If cheat_allowance > 0, the new fit will be good and replace the current
|
|
// fit if it has better fit (with cheat) OR its error is below
|
|
// max_baseline_error_ and the old fit is marked bad.
|
|
// Otherwise the new fit will only replace the old if it is really better,
|
|
// or the old fit is marked bad and the new fit has sufficient points, as
|
|
// well as being within the max_baseline_error_.
|
|
void BaselineRow::FitConstrainedIfBetter(int debug,
|
|
const FCOORD& direction,
|
|
double cheat_allowance,
|
|
double target_offset) {
|
|
double halfrange = fit_halfrange_ * direction.length();
|
|
double min_dist = target_offset - halfrange;
|
|
double max_dist = target_offset + halfrange;
|
|
ICOORD line_pt;
|
|
double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
|
|
debug > 2, &line_pt);
|
|
// Allow cheat_allowance off the new error
|
|
new_error -= cheat_allowance;
|
|
double old_angle = BaselineAngle();
|
|
double new_angle = direction.angle();
|
|
if (debug > 1) {
|
|
tprintf("Constrained error = %g, original = %g",
|
|
new_error, baseline_error_);
|
|
tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
|
|
old_angle, new_angle,
|
|
new_angle - old_angle, kMaxSkewDeviation);
|
|
}
|
|
bool new_good_baseline = new_error <= max_baseline_error_ &&
|
|
(cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
|
|
// The new will replace the old if any are true:
|
|
// 1. the new error is better
|
|
// 2. the old is NOT good, but the new is
|
|
// 3. there is a wild angular difference between them (assuming that the new
|
|
// is a better guess at the angle.)
|
|
if (new_error <= baseline_error_ ||
|
|
(!good_baseline_ && new_good_baseline) ||
|
|
fabs(new_angle - old_angle) > kMaxSkewDeviation) {
|
|
baseline_error_ = new_error;
|
|
baseline_pt1_ = line_pt;
|
|
baseline_pt2_ = baseline_pt1_ + direction;
|
|
good_baseline_ = new_good_baseline;
|
|
if (debug > 1) {
|
|
tprintf("Replacing with constrained baseline, good = %d\n",
|
|
good_baseline_);
|
|
}
|
|
} else if (debug > 1) {
|
|
tprintf("Keeping old baseline\n");
|
|
}
|
|
}
|
|
|
|
// Returns the perpendicular distance of the point from the straight
|
|
// baseline.
|
|
double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
|
|
FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
|
|
FCOORD offset_vector(pt - baseline_pt1_);
|
|
double distance = baseline_vector * offset_vector;
|
|
return sqrt(distance * distance / baseline_vector.sqlength());
|
|
}
|
|
|
|
// Computes the bounding box of the row.
|
|
void BaselineRow::ComputeBoundingBox() {
|
|
BLOBNBOX_IT it(blobs_);
|
|
TBOX box;
|
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
|
box += it.data()->bounding_box();
|
|
}
|
|
bounding_box_ = box;
|
|
}
|
|
|
|
|
|
BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
|
|
: block_(block), debug_level_(debug_level), non_text_block_(non_text),
|
|
good_skew_angle_(false), skew_angle_(0.0),
|
|
line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
|
|
TO_ROW_IT row_it(block_->get_rows());
|
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
|
// Sort the blobs on the rows.
|
|
row_it.data()->blob_list()->sort(blob_x_order);
|
|
rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
|
|
}
|
|
}
|
|
|
|
// Computes and returns the absolute error of the given perp_disp from the
|
|
// given linespacing model.
|
|
double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
|
|
double line_offset) {
|
|
// Round to the nearest multiple of line_spacing + line offset.
|
|
int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
|
|
double model_y = line_spacing * multiple + line_offset;
|
|
return fabs(perp_disp - model_y);
|
|
}
|
|
|
|
// Fits straight line baselines and computes the skew angle from the
|
|
// median angle. Returns true if a good angle is found.
|
|
// If use_box_bottoms is false, baseline positions are formed by
|
|
// considering the outlines of the blobs.
|
|
bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
|
|
if (non_text_block_) return false;
|
|
GenericVector<double> angles;
|
|
for (int r = 0; r < rows_.size(); ++r) {
|
|
BaselineRow* row = rows_[r];
|
|
if (row->FitBaseline(use_box_bottoms)) {
|
|
double angle = row->BaselineAngle();
|
|
angles.push_back(angle);
|
|
}
|
|
if (debug_level_ > 1)
|
|
row->Print();
|
|
}
|
|
|
|
if (!angles.empty()) {
|
|
skew_angle_ = MedianOfCircularValues(M_PI, &angles);
|
|
good_skew_angle_ = true;
|
|
} else {
|
|
skew_angle_ = 0.0f;
|
|
good_skew_angle_ = false;
|
|
}
|
|
if (debug_level_ > 0) {
|
|
tprintf("Initial block skew angle = %g, good = %d\n",
|
|
skew_angle_, good_skew_angle_);
|
|
}
|
|
return good_skew_angle_;
|
|
}
|
|
|
|
// Refits the baseline to a constrained angle, using the stored block
|
|
// skew if good enough, otherwise the supplied default skew.
|
|
void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
|
|
if (non_text_block_) return;
|
|
if (!good_skew_angle_) skew_angle_ = default_block_skew;
|
|
if (debug_level_ > 0)
|
|
tprintf("Adjusting block to skew angle %g\n", skew_angle_);
|
|
FCOORD direction(cos(skew_angle_), sin(skew_angle_));
|
|
for (int r = 0; r < rows_.size(); ++r) {
|
|
BaselineRow* row = rows_[r];
|
|
row->AdjustBaselineToParallel(debug_level_, direction);
|
|
if (debug_level_ > 1)
|
|
row->Print();
|
|
}
|
|
if (rows_.size() < 3 || !ComputeLineSpacing())
|
|
return;
|
|
// Enforce the line spacing model on all lines that don't yet have a good
|
|
// baseline.
|
|
// Start by finding the row that is best fitted to the model.
|
|
int best_row = 0;
|
|
double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
|
|
line_spacing_, line_offset_);
|
|
for (int r = 1; r < rows_.size(); ++r) {
|
|
double error = SpacingModelError(rows_[r]->PerpDisp(direction),
|
|
line_spacing_, line_offset_);
|
|
if (error < best_error) {
|
|
best_error = error;
|
|
best_row = r;
|
|
}
|
|
}
|
|
// Starting at the best fitting row, work outwards, syncing the offset.
|
|
double offset = line_offset_;
|
|
for (int r = best_row + 1; r < rows_.size(); ++r) {
|
|
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
|
|
line_spacing_, offset);
|
|
}
|
|
offset = line_offset_;
|
|
for (int r = best_row - 1; r >= 0; --r) {
|
|
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
|
|
line_spacing_, offset);
|
|
}
|
|
}
|
|
|
|
// Sets the parameters in TO_BLOCK that are needed by subsequent processes.
|
|
void BaselineBlock::SetupBlockParameters() const {
|
|
if (line_spacing_ > 0.0) {
|
|
// Where was block_line_spacing set before?
|
|
float min_spacing = MIN(block_->line_spacing, line_spacing_);
|
|
if (min_spacing < block_->line_size)
|
|
block_->line_size = min_spacing;
|
|
block_->line_spacing = line_spacing_;
|
|
block_->baseline_offset = line_offset_;
|
|
block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
|
|
}
|
|
// Setup the parameters on all the rows.
|
|
TO_ROW_IT row_it(block_->get_rows());
|
|
for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
|
|
BaselineRow* row = rows_[r];
|
|
TO_ROW* to_row = row_it.data();
|
|
row->SetupOldLineParameters(to_row);
|
|
}
|
|
}
|
|
|
|
// Processing that is required before fitting baseline splines, but requires
|
|
// linear baselines in order to be successful:
|
|
// Removes noise if required
|
|
// Separates out underlines
|
|
// Pre-associates blob fragments.
|
|
// TODO(rays/joeliu) This entire section of code is inherited from the past
|
|
// and could be improved/eliminated.
|
|
// page_tr is used to size a debug window.
|
|
void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
|
|
if (non_text_block_) return;
|
|
if (remove_noise) {
|
|
vigorous_noise_removal(block_);
|
|
}
|
|
FCOORD rotation(1.0f, 0.0f);
|
|
double gradient = tan(skew_angle_);
|
|
separate_underlines(block_, gradient, rotation, true);
|
|
pre_associate_blobs(page_tr, block_, rotation, true);
|
|
}
|
|
|
|
// Fits splines to the textlines, or creates fake QSPLINES from the straight
|
|
// baselines that are already on the TO_ROWs.
|
|
// As a side-effect, computes the xheights of the rows and the block.
|
|
// Although x-height estimation is conceptually separate, it is part of
|
|
// detecting perspective distortion and therefore baseline fitting.
|
|
void BaselineBlock::FitBaselineSplines(bool enable_splines,
|
|
bool show_final_rows,
|
|
Textord* textord) {
|
|
double gradient = tan(skew_angle_);
|
|
FCOORD rotation(1.0f, 0.0f);
|
|
|
|
if (enable_splines) {
|
|
textord->make_spline_rows(block_, gradient, show_final_rows);
|
|
} else {
|
|
// Make a fake spline from the existing line.
|
|
TBOX block_box= block_->block->bounding_box();
|
|
TO_ROW_IT row_it = block_->get_rows();
|
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
|
TO_ROW* row = row_it.data();
|
|
inT32 xstarts[2] = { block_box.left(), block_box.right() };
|
|
double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
|
|
row->baseline = QSPLINE(1, xstarts, coeffs);
|
|
textord->compute_row_xheight(row, block_->block->classify_rotation(),
|
|
row->line_m(), block_->line_size);
|
|
}
|
|
}
|
|
textord->compute_block_xheight(block_, gradient);
|
|
block_->block->set_xheight(block_->xheight);
|
|
if (textord_restore_underlines) // fix underlines
|
|
restore_underlined_blobs(block_);
|
|
}
|
|
|
|
// Draws the (straight) baselines and final blobs colored according to
|
|
// what was discarded as noise and what is associated with each row.
|
|
void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
|
|
#ifndef GRAPHICS_DISABLED
|
|
if (non_text_block_) return;
|
|
double gradient = tan(skew_angle_);
|
|
FCOORD rotation(1.0f, 0.0f);
|
|
int left_edge = block_->block->bounding_box().left();
|
|
ScrollView* win = create_to_win(page_tr);
|
|
ScrollView::Color colour = ScrollView::RED;
|
|
TO_ROW_IT row_it = block_->get_rows();
|
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
|
plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
|
|
colour = static_cast<ScrollView::Color>(colour + 1);
|
|
if (colour > ScrollView::MAGENTA)
|
|
colour = ScrollView::RED;
|
|
}
|
|
plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
|
|
// Show discarded blobs.
|
|
plot_blob_list(win, &block_->underlines,
|
|
ScrollView::YELLOW, ScrollView::CORAL);
|
|
if (block_->blobs.length() > 0)
|
|
tprintf("%d blobs discarded as noise\n", block_->blobs.length());
|
|
draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
|
|
#endif
|
|
}
|
|
|
|
void BaselineBlock::DrawPixSpline(Pix* pix_in) {
|
|
if (non_text_block_) return;
|
|
TO_ROW_IT row_it = block_->get_rows();
|
|
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
|
row_it.data()->baseline.plot(pix_in);
|
|
}
|
|
}
|
|
|
|
// Top-level line-spacing calculation. Computes an estimate of the line-
|
|
// spacing, using the current baselines in the TO_ROWS of the block, and
|
|
// then refines it by fitting a regression line to the baseline positions
|
|
// as a function of their integer index.
|
|
// Returns true if it seems that the model is a reasonable fit to the
|
|
// observations.
|
|
bool BaselineBlock::ComputeLineSpacing() {
|
|
FCOORD direction(cos(skew_angle_), sin(skew_angle_));
|
|
GenericVector<double> row_positions;
|
|
ComputeBaselinePositions(direction, &row_positions);
|
|
if (row_positions.size() < 2) return false;
|
|
EstimateLineSpacing();
|
|
RefineLineSpacing(row_positions);
|
|
// Verify that the model is reasonable.
|
|
double max_baseline_error = kMaxBaselineError * line_spacing_;
|
|
int non_trivial_gaps = 0;
|
|
int fitting_gaps = 0;
|
|
for (int i = 1; i < row_positions.size(); ++i) {
|
|
double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
|
|
if (row_gap > max_baseline_error) {
|
|
++non_trivial_gaps;
|
|
if (fabs(row_gap - line_spacing_) <= max_baseline_error)
|
|
++fitting_gaps;
|
|
}
|
|
}
|
|
if (debug_level_ > 0) {
|
|
tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
|
|
line_spacing_, row_positions.size(), fitting_gaps,
|
|
non_trivial_gaps);
|
|
}
|
|
return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
|
|
}
|
|
|
|
// Computes the deskewed vertical position of each baseline in the block and
|
|
// stores them in the given vector.
|
|
// This is calculated as the perpendicular distance of the middle of each
|
|
// baseline (in case it has a different skew angle) from the line passing
|
|
// through the origin parallel to the block baseline angle.
|
|
// NOTE that "distance" above is a signed quantity so we can tell which side
|
|
// of the block baseline a line sits, hence the function and argument name
|
|
// positions not distances.
|
|
void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
|
|
GenericVector<double>* positions) {
|
|
positions->clear();
|
|
for (int r = 0; r < rows_.size(); ++r) {
|
|
BaselineRow* row = rows_[r];
|
|
const TBOX& row_box = row->bounding_box();
|
|
float x_middle = (row_box.left() + row_box.right()) / 2.0f;
|
|
FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
|
|
float offset = direction * row_pos;
|
|
positions->push_back(offset);
|
|
}
|
|
}
|
|
|
|
// Computes an estimate of the line spacing of the block from the median
|
|
// of the spacings between adjacent overlapping textlines.
|
|
void BaselineBlock::EstimateLineSpacing() {
|
|
GenericVector<float> spacings;
|
|
for (int r = 0; r < rows_.size(); ++r) {
|
|
BaselineRow* row = rows_[r];
|
|
// Exclude silly lines.
|
|
if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
|
|
// Find the first row after row that overlaps it significantly.
|
|
const TBOX& row_box = row->bounding_box();
|
|
int r2;
|
|
for (r2 = r + 1; r2 < rows_.size() &&
|
|
!row_box.major_x_overlap(rows_[r2]->bounding_box());
|
|
++r2);
|
|
if (r2 < rows_.size()) {
|
|
BaselineRow* row2 = rows_[r2];
|
|
// Exclude silly lines.
|
|
if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
|
|
float spacing = row->SpaceBetween(*row2);
|
|
spacings.push_back(spacing);
|
|
}
|
|
}
|
|
// If we have at least one value, use it, otherwise leave the previous
|
|
// value unchanged.
|
|
if (!spacings.empty()) {
|
|
line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
|
|
if (debug_level_ > 1)
|
|
tprintf("Estimate of linespacing = %g\n", line_spacing_);
|
|
}
|
|
}
|
|
|
|
// Refines the line spacing of the block by fitting a regression
|
|
// line to the deskewed y-position of each baseline as a function of its
|
|
// estimated line index, allowing for a small error in the initial linespacing
|
|
// and choosing the best available model.
|
|
void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
|
|
double spacings[3], offsets[3], errors[3];
|
|
int index_range;
|
|
errors[0] = FitLineSpacingModel(positions, line_spacing_,
|
|
&spacings[0], &offsets[0], &index_range);
|
|
if (index_range > 1) {
|
|
double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
|
|
// Try the hypotheses that there might be index_range +/- 1 line spaces.
|
|
errors[1] = FitLineSpacingModel(positions, spacing_plus,
|
|
&spacings[1], &offsets[1], NULL);
|
|
double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
|
|
errors[2] = FitLineSpacingModel(positions, spacing_minus,
|
|
&spacings[2], &offsets[2], NULL);
|
|
for (int i = 1; i <= 2; ++i) {
|
|
if (errors[i] < errors[0]) {
|
|
spacings[0] = spacings[i];
|
|
offsets[0] = offsets[i];
|
|
errors[0] = errors[i];
|
|
}
|
|
}
|
|
}
|
|
if (spacings[0] > 0.0) {
|
|
line_spacing_ = spacings[0];
|
|
line_offset_ = offsets[0];
|
|
model_error_ = errors[0];
|
|
if (debug_level_ > 0) {
|
|
tprintf("Final linespacing model = %g + offset %g, error %g\n",
|
|
line_spacing_, line_offset_, model_error_);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Given an initial estimate of line spacing (m_in) and the positions of each
|
|
// baseline, computes the line spacing of the block more accurately in m_out,
|
|
// and the corresponding intercept in c_out, and the number of spacings seen
|
|
// in index_delta. Returns the error of fit to the line spacing model.
|
|
// Uses a simple linear regression, but optimized the offset using the median.
|
|
double BaselineBlock::FitLineSpacingModel(
|
|
const GenericVector<double>& positions, double m_in,
|
|
double* m_out, double* c_out, int* index_delta) {
|
|
if (m_in == 0.0f || positions.size() < 2) {
|
|
*m_out = m_in;
|
|
*c_out = 0.0;
|
|
if (index_delta != NULL) *index_delta = 0;
|
|
return 0.0;
|
|
}
|
|
GenericVector<double> offsets;
|
|
// Get the offset (remainder) linespacing for each line and choose the median.
|
|
for (int i = 0; i < positions.size(); ++i)
|
|
offsets.push_back(fmod(positions[i], m_in));
|
|
// Get the median offset.
|
|
double median_offset = MedianOfCircularValues(m_in, &offsets);
|
|
// Now fit a line to quantized line number and offset.
|
|
LLSQ llsq;
|
|
int min_index = MAX_INT32;
|
|
int max_index = -MAX_INT32;
|
|
for (int i = 0; i < positions.size(); ++i) {
|
|
double y_pos = positions[i];
|
|
int row_index = IntCastRounded((y_pos - median_offset) / m_in);
|
|
UpdateRange(row_index, &min_index, &max_index);
|
|
llsq.add(row_index, y_pos);
|
|
}
|
|
// Get the refined line spacing.
|
|
*m_out = llsq.m();
|
|
// Use the median offset rather than the mean.
|
|
offsets.truncate(0);
|
|
for (int i = 0; i < positions.size(); ++i)
|
|
offsets.push_back(fmod(positions[i], *m_out));
|
|
// Get the median offset.
|
|
if (debug_level_ > 2) {
|
|
for (int i = 0; i < offsets.size(); ++i)
|
|
tprintf("%d: %g\n", i, offsets[i]);
|
|
}
|
|
*c_out = MedianOfCircularValues(*m_out, &offsets);
|
|
if (debug_level_ > 1) {
|
|
tprintf("Median offset = %g, compared to mean of %g.\n",
|
|
*c_out, llsq.c(*m_out));
|
|
}
|
|
// Index_delta is the number of hypothesized line gaps present.
|
|
if (index_delta != NULL)
|
|
*index_delta = max_index - min_index;
|
|
// Use the regression model's intercept to compute the error, as it may be
|
|
// a full line-spacing in disagreement with the median.
|
|
double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
|
|
if (debug_level_ > 1) {
|
|
tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
|
|
m_in, median_offset, *m_out, *c_out, rms_error);
|
|
}
|
|
return rms_error;
|
|
}
|
|
|
|
|
|
BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
|
|
TO_BLOCK_LIST* blocks)
|
|
: page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL),
|
|
debug_file_prefix_("") {
|
|
TO_BLOCK_IT it(blocks);
|
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
|
TO_BLOCK* to_block = it.data();
|
|
BLOCK* block = to_block->block;
|
|
POLY_BLOCK* pb = block->poly_block();
|
|
// A note about non-text blocks.
|
|
// On output, non-text blocks are supposed to contain a single empty word
|
|
// in each incoming text line. These mark out the polygonal bounds of the
|
|
// block. Ideally no baselines should be required, but currently
|
|
// make_words crashes if a baseline and xheight are not provided, so we
|
|
// include non-text blocks here, but flag them for special treatment.
|
|
bool non_text = pb != NULL && !pb->IsText();
|
|
blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
|
|
}
|
|
}
|
|
|
|
BaselineDetect::~BaselineDetect() {
|
|
pixDestroy(&pix_debug_);
|
|
}
|
|
|
|
// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
|
|
// block-wise and page-wise data to smooth small blocks/rows, and applies
|
|
// smoothing based on block/page-level skew and block-level linespacing.
|
|
void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
|
|
GenericVector<double> block_skew_angles;
|
|
for (int i = 0; i < blocks_.size(); ++i) {
|
|
BaselineBlock* bl_block = blocks_[i];
|
|
if (debug_level_ > 0)
|
|
tprintf("Fitting initial baselines...\n");
|
|
if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
|
|
block_skew_angles.push_back(bl_block->skew_angle());
|
|
}
|
|
}
|
|
// Compute a page-wide default skew for blocks with too little information.
|
|
double default_block_skew = page_skew_.angle();
|
|
if (!block_skew_angles.empty()) {
|
|
default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
|
|
}
|
|
if (debug_level_ > 0) {
|
|
tprintf("Page skew angle = %g\n", default_block_skew);
|
|
}
|
|
// Set bad lines in each block to the default block skew and then force fit
|
|
// a linespacing model where it makes sense to do so.
|
|
for (int i = 0; i < blocks_.size(); ++i) {
|
|
BaselineBlock* bl_block = blocks_[i];
|
|
bl_block->ParallelizeBaselines(default_block_skew);
|
|
bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
|
|
}
|
|
}
|
|
|
|
// Computes the baseline splines for each TO_ROW in each TO_BLOCK and
|
|
// other associated side-effects, including pre-associating blobs, computing
|
|
// x-heights and displaying debug information.
|
|
// NOTE that ComputeStraightBaselines must have been called first as this
|
|
// sets up data in the TO_ROWs upon which this function depends.
|
|
void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
|
|
bool enable_splines,
|
|
bool remove_noise,
|
|
bool show_final_rows,
|
|
Textord* textord) {
|
|
Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL;
|
|
for (int i = 0; i < blocks_.size(); ++i) {
|
|
BaselineBlock* bl_block = blocks_[i];
|
|
bl_block->PrepareForSplineFitting(page_tr, remove_noise);
|
|
bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
|
|
if (pix_spline) {
|
|
bl_block->DrawPixSpline(pix_spline);
|
|
}
|
|
if (show_final_rows) {
|
|
bl_block->DrawFinalRows(page_tr);
|
|
}
|
|
}
|
|
|
|
if (pix_spline) {
|
|
STRING outfile_name = debug_file_prefix_ + "_spline.png";
|
|
pixWrite(outfile_name.string(), pix_spline, IFF_PNG);
|
|
pixDestroy(&pix_spline);
|
|
}
|
|
}
|
|
|
|
void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) {
|
|
pixDestroy(&pix_debug_);
|
|
pix_debug_ = pixClone(pixIn);
|
|
debug_file_prefix_ = output_path;
|
|
}
|
|
|
|
} // namespace tesseract.
|