tesseract/textord/baselinedetect.cpp

873 lines
35 KiB
C++
Raw Normal View History

///////////////////////////////////////////////////////////////////////
// File: baselinedetect.cpp
// Description: Initial Baseline Determination.
// Copyright 2012 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
// Created: Mon Apr 30 10:15:31 PDT 2012
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifdef _MSC_VER
#define _USE_MATH_DEFINES
#endif // _MSC_VER
#include "baselinedetect.h"
#include <math.h>
#include "allheaders.h"
#include "blobbox.h"
#include "detlinefit.h"
#include "drawtord.h"
#include "helpers.h"
#include "linlsq.h"
#include "makerow.h"
#include "textord.h"
#include "tprintf.h"
#include "underlin.h"
// Number of displacement modes kept in displacement_modes_;
const int kMaxDisplacementsModes = 3;
// Number of points to skip when retrying initial fit.
const int kNumSkipPoints = 3;
// Max angle deviation (in radians) allowed to keep the independent baseline.
const double kMaxSkewDeviation = 1.0 / 64;
// Fraction of line spacing estimate for quantization of blob displacements.
const double kOffsetQuantizationFactor = 3.0 / 64;
// Fraction of line spacing estimate for computing blob fit error.
const double kFitHalfrangeFactor = 6.0 / 64;
// Max fraction of line spacing allowed before a baseline counts as badly fitting.
const double kMaxBaselineError = 3.0 / 64;
// Multiple of linespacing that sets max_blob_size in TO_BLOCK.
// Copied from textord_excess_blobsize.
const double kMaxBlobSizeMultiple = 1.3;
// Min fraction of linespacing gaps that should be close to the model before
// we will force the linespacing model on all the lines.
const double kMinFittingLinespacings = 0.25;
// A y-coordinate within a textline that is to be debugged.
//#define kDebugYCoord 1525
namespace tesseract {
BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
: blobs_(to_row->blob_list()),
baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
baseline_error_(0.0), good_baseline_(false) {
ComputeBoundingBox();
// Compute a scale factor for rounding to ints.
disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
max_baseline_error_ = kMaxBaselineError * line_spacing;
}
// Sets the TO_ROW with the output straight line.
void BaselineRow::SetupOldLineParameters(TO_ROW* row) const {
// TODO(rays) get rid of this when m and c are no longer used.
double gradient = tan(BaselineAngle());
// para_c is the actual intercept of the baseline on the y-axis.
float para_c = StraightYAtX(0.0);
row->set_line(gradient, para_c, baseline_error_);
row->set_parallel_line(gradient, para_c, baseline_error_);
}
// Outputs diagnostic information.
void BaselineRow::Print() const {
tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
baseline_pt1_.x(), baseline_pt1_.y(),
baseline_pt2_.x(), baseline_pt2_.y(),
BaselineAngle(), StraightYAtX(0.0));
tprintf("Quant factor=%g, error=%g, good=%d, box:",
disp_quant_factor_, baseline_error_, good_baseline_);
bounding_box_.print();
}
// Returns the skew angle (in radians) of the current baseline in [-pi,pi].
double BaselineRow::BaselineAngle() const {
FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
double angle = baseline_dir.angle();
// Baseline directions are only unique in a range of pi so constrain to
// [-pi/2, pi/2].
return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
}
// Computes and returns the linespacing at the middle of the overlap
// between this and other.
double BaselineRow::SpaceBetween(const BaselineRow& other) const {
// Find the x-centre of overlap of the lines.
float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) +
MIN(bounding_box_.right(), other.bounding_box_.right())) / 2;
// Find the vertical centre between them.
float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
// Find the perpendicular distance of (x,y) from each line.
FCOORD pt(x, y);
return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
}
// Computes and returns the displacement of the center of the line
// perpendicular to the given direction.
double BaselineRow::PerpDisp(const FCOORD& direction) const {
float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
return direction * middle_pos / direction.length();
}
// Computes the y coordinate at the given x using the straight baseline
// defined by baseline_pt1_ and baseline_pt2__.
double BaselineRow::StraightYAtX(double x) const {
double denominator = baseline_pt2_.x() - baseline_pt1_.x();
if (denominator == 0.0)
return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
return baseline_pt1_.y() +
(x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
denominator;
}
// Fits a straight baseline to the points. Returns true if it had enough
// points to be reasonably sure of the fitted baseline.
// If use_box_bottoms is false, baselines positions are formed by
// considering the outlines of the blobs.
bool BaselineRow::FitBaseline(bool use_box_bottoms) {
// Deterministic fitting is used wherever possible.
fitter_.Clear();
// Linear least squares is a backup if the DetLineFit produces a bad line.
LLSQ llsq;
BLOBNBOX_IT blob_it(blobs_);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
if (!use_box_bottoms) blob->EstimateBaselinePosition();
const TBOX& box = blob->bounding_box();
int x_middle = (box.left() + box.right()) / 2;
#ifdef kDebugYCoord
if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
tprintf("Box bottom = %d, baseline pos=%d for box at:",
box.bottom(), blob->baseline_position());
box.print();
}
#endif
fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
llsq.add(x_middle, blob->baseline_position());
}
// Fit the line.
ICOORD pt1, pt2;
baseline_error_ = fitter_.Fit(&pt1, &pt2);
baseline_pt1_ = pt1;
baseline_pt2_ = pt2;
if (baseline_error_ > max_baseline_error_ &&
fitter_.SufficientPointsForIndependentFit()) {
// The fit was bad but there were plenty of points, so try skipping
// the first and last few, and use the new line if it dramatically improves
// the error of fit.
double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
if (error < baseline_error_ / 2.0) {
baseline_error_ = error;
baseline_pt1_ = pt1;
baseline_pt2_ = pt2;
}
}
int debug = 0;
#ifdef kDebugYCoord
Print();
debug = bounding_box_.bottom() < kDebugYCoord &&
bounding_box_.top() > kDebugYCoord
? 3 : 2;
#endif
// Now we obtained a direction from that fit, see if we can improve the
// fit using the same direction and some other start point.
FCOORD direction(pt2 - pt1);
double target_offset = direction * pt1;
good_baseline_ = false;
FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
// Wild lines can be produced because DetLineFit allows vertical lines, but
// vertical text has been rotated so angles over pi/4 should be disallowed.
// Near vertical lines can still be produced by vertically aligned components
// on very short lines.
double angle = BaselineAngle();
if (fabs(angle) > M_PI * 0.25) {
// Use the llsq fit as a backup.
baseline_pt1_ = llsq.mean_point();
baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
// TODO(rays) get rid of this when m and c are no longer used.
double m = llsq.m();
double c = llsq.c(m);
baseline_error_ = llsq.rms(m, c);
good_baseline_ = false;
}
return good_baseline_;
}
// Modifies an existing result of FitBaseline to be parallel to the given
// direction vector if that produces a better result.
void BaselineRow::AdjustBaselineToParallel(int debug,
const FCOORD& direction) {
SetupBlobDisplacements(direction);
if (displacement_modes_.empty())
return;
#ifdef kDebugYCoord
if (bounding_box_.bottom() < kDebugYCoord &&
bounding_box_.top() > kDebugYCoord && debug < 3)
debug = 3;
#endif
FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
}
// Modifies the baseline to snap to the textline grid if the existing
// result is not good enough.
double BaselineRow::AdjustBaselineToGrid(int debug,
const FCOORD& direction,
double line_spacing,
double line_offset) {
if (blobs_->empty()) {
if (debug > 1) {
tprintf("Row empty at:");
bounding_box_.print();
}
return line_offset;
}
// Find the displacement_modes_ entry nearest to the grid.
double best_error = 0.0;
int best_index = -1;
for (int i = 0; i < displacement_modes_.size(); ++i) {
double blob_y = displacement_modes_[i];
double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
line_offset);
if (debug > 1) {
tprintf("Mode at %g has error %g from model \n", blob_y, error);
}
if (best_index < 0 || error < best_error) {
best_error = error;
best_index = i;
}
}
// We will move the baseline only if the chosen mode is close enough to the
// model.
double model_margin = max_baseline_error_ - best_error;
if (best_index >= 0 && model_margin > 0.0) {
// But if the current baseline is already close to the mode there is no
// point, and only the potential to damage accuracy by changing its angle.
double perp_disp = PerpDisp(direction);
double shift = displacement_modes_[best_index] - perp_disp;
if (fabs(shift) > max_baseline_error_) {
if (debug > 1) {
tprintf("Attempting linespacing model fit with mode %g to row at:",
displacement_modes_[best_index]);
bounding_box_.print();
}
FitConstrainedIfBetter(debug, direction, model_margin,
displacement_modes_[best_index]);
} else if (debug > 1) {
tprintf("Linespacing model only moves current line by %g for row at:",
shift);
bounding_box_.print();
}
} else if (debug > 1) {
tprintf("Linespacing model not close enough to any mode for row at:");
bounding_box_.print();
}
return fmod(PerpDisp(direction), line_spacing);
}
// Sets up displacement_modes_ with the top few modes of the perpendicular
// distance of each blob from the given direction vector, after rounding.
void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
// Set of perpendicular displacements of the blob bottoms from the required
// baseline direction.
GenericVector<double> perp_blob_dists;
displacement_modes_.truncate(0);
// Gather the skew-corrected position of every blob.
double min_dist = MAX_FLOAT32;
double max_dist = -MAX_FLOAT32;
BLOBNBOX_IT blob_it(blobs_);
bool debug = false;
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
const TBOX& box = blob->bounding_box();
#ifdef kDebugYCoord
if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
#endif
FCOORD blob_pos((box.left() + box.right()) / 2.0f,
blob->baseline_position());
double offset = direction * blob_pos;
perp_blob_dists.push_back(offset);
if (debug) {
tprintf("Displacement %g for blob at:", offset);
box.print();
}
UpdateRange(offset, &min_dist, &max_dist);
}
// Set up a histogram using disp_quant_factor_ as the bucket size.
STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
IntCastRounded(max_dist / disp_quant_factor_) + 1);
for (int i = 0; i < perp_blob_dists.size(); ++i) {
dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
}
GenericVector<KDPairInc<float, int> > scaled_modes;
dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
if (debug) {
for (int i = 0; i < scaled_modes.size(); ++i) {
tprintf("Top mode = %g * %d\n",
scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
}
}
for (int i = 0; i < scaled_modes.size(); ++i)
displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
}
// Fits a line in the given direction to blobs that are close to the given
// target_offset perpendicular displacement from the direction. The fit
// error is allowed to be cheat_allowance worse than the existing fit, and
// will still be used.
// If cheat_allowance > 0, the new fit will be good and replace the current
// fit if it has better fit (with cheat) OR its error is below
// max_baseline_error_ and the old fit is marked bad.
// Otherwise the new fit will only replace the old if it is really better,
// or the old fit is marked bad and the new fit has sufficient points, as
// well as being within the max_baseline_error_.
void BaselineRow::FitConstrainedIfBetter(int debug,
const FCOORD& direction,
double cheat_allowance,
double target_offset) {
double halfrange = fit_halfrange_ * direction.length();
double min_dist = target_offset - halfrange;
double max_dist = target_offset + halfrange;
ICOORD line_pt;
double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
debug > 2, &line_pt);
// Allow cheat_allowance off the new error
new_error -= cheat_allowance;
double old_angle = BaselineAngle();
double new_angle = direction.angle();
if (debug > 1) {
tprintf("Constrained error = %g, original = %g",
new_error, baseline_error_);
tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
old_angle, new_angle,
new_angle - old_angle, kMaxSkewDeviation);
}
bool new_good_baseline = new_error <= max_baseline_error_ &&
(cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
// The new will replace the old if any are true:
// 1. the new error is better
// 2. the old is NOT good, but the new is
// 3. there is a wild angular difference between them (assuming that the new
// is a better guess at the angle.)
if (new_error <= baseline_error_ ||
(!good_baseline_ && new_good_baseline) ||
fabs(new_angle - old_angle) > kMaxSkewDeviation) {
baseline_error_ = new_error;
baseline_pt1_ = line_pt;
baseline_pt2_ = baseline_pt1_ + direction;
good_baseline_ = new_good_baseline;
if (debug > 1) {
tprintf("Replacing with constrained baseline, good = %d\n",
good_baseline_);
}
} else if (debug > 1) {
tprintf("Keeping old baseline\n");
}
}
// Returns the perpendicular distance of the point from the straight
// baseline.
double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
FCOORD offset_vector(pt - baseline_pt1_);
double distance = baseline_vector * offset_vector;
return sqrt(distance * distance / baseline_vector.sqlength());
}
// Computes the bounding box of the row.
void BaselineRow::ComputeBoundingBox() {
BLOBNBOX_IT it(blobs_);
TBOX box;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
box += it.data()->bounding_box();
}
bounding_box_ = box;
}
BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
: block_(block), debug_level_(debug_level), non_text_block_(non_text),
good_skew_angle_(false), skew_angle_(0.0),
line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
TO_ROW_IT row_it(block_->get_rows());
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
// Sort the blobs on the rows.
row_it.data()->blob_list()->sort(blob_x_order);
rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
}
}
// Computes and returns the absolute error of the given perp_disp from the
// given linespacing model.
double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
double line_offset) {
// Round to the nearest multiple of line_spacing + line offset.
int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
double model_y = line_spacing * multiple + line_offset;
return fabs(perp_disp - model_y);
}
// Fits straight line baselines and computes the skew angle from the
// median angle. Returns true if a good angle is found.
// If use_box_bottoms is false, baseline positions are formed by
// considering the outlines of the blobs.
bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
if (non_text_block_) return false;
GenericVector<double> angles;
for (int r = 0; r < rows_.size(); ++r) {
BaselineRow* row = rows_[r];
if (row->FitBaseline(use_box_bottoms)) {
double angle = row->BaselineAngle();
angles.push_back(angle);
}
if (debug_level_ > 1)
row->Print();
}
if (!angles.empty()) {
skew_angle_ = MedianOfCircularValues(M_PI, &angles);
good_skew_angle_ = true;
} else {
skew_angle_ = 0.0f;
good_skew_angle_ = false;
}
if (debug_level_ > 0) {
tprintf("Initial block skew angle = %g, good = %d\n",
skew_angle_, good_skew_angle_);
}
return good_skew_angle_;
}
// Refits the baseline to a constrained angle, using the stored block
// skew if good enough, otherwise the supplied default skew.
void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
if (non_text_block_) return;
if (!good_skew_angle_) skew_angle_ = default_block_skew;
if (debug_level_ > 0)
tprintf("Adjusting block to skew angle %g\n", skew_angle_);
FCOORD direction(cos(skew_angle_), sin(skew_angle_));
for (int r = 0; r < rows_.size(); ++r) {
BaselineRow* row = rows_[r];
row->AdjustBaselineToParallel(debug_level_, direction);
if (debug_level_ > 1)
row->Print();
}
if (rows_.size() < 3 || !ComputeLineSpacing())
return;
// Enforce the line spacing model on all lines that don't yet have a good
// baseline.
// Start by finding the row that is best fitted to the model.
int best_row = 0;
double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
line_spacing_, line_offset_);
for (int r = 1; r < rows_.size(); ++r) {
double error = SpacingModelError(rows_[r]->PerpDisp(direction),
line_spacing_, line_offset_);
if (error < best_error) {
best_error = error;
best_row = r;
}
}
// Starting at the best fitting row, work outwards, syncing the offset.
double offset = line_offset_;
for (int r = best_row + 1; r < rows_.size(); ++r) {
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
line_spacing_, offset);
}
offset = line_offset_;
for (int r = best_row - 1; r >= 0; --r) {
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
line_spacing_, offset);
}
}
// Sets the parameters in TO_BLOCK that are needed by subsequent processes.
void BaselineBlock::SetupBlockParameters() const {
if (line_spacing_ > 0.0) {
// Where was block_line_spacing set before?
float min_spacing = MIN(block_->line_spacing, line_spacing_);
if (min_spacing < block_->line_size)
block_->line_size = min_spacing;
block_->line_spacing = line_spacing_;
block_->baseline_offset = line_offset_;
block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
}
// Setup the parameters on all the rows.
TO_ROW_IT row_it(block_->get_rows());
for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
BaselineRow* row = rows_[r];
TO_ROW* to_row = row_it.data();
row->SetupOldLineParameters(to_row);
}
}
// Processing that is required before fitting baseline splines, but requires
// linear baselines in order to be successful:
// Removes noise if required
// Separates out underlines
// Pre-associates blob fragments.
// TODO(rays/joeliu) This entire section of code is inherited from the past
// and could be improved/eliminated.
// page_tr is used to size a debug window.
void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
if (non_text_block_) return;
if (remove_noise) {
vigorous_noise_removal(block_);
}
FCOORD rotation(1.0f, 0.0f);
double gradient = tan(skew_angle_);
separate_underlines(block_, gradient, rotation, true);
pre_associate_blobs(page_tr, block_, rotation, true);
}
// Fits splines to the textlines, or creates fake QSPLINES from the straight
// baselines that are already on the TO_ROWs.
// As a side-effect, computes the xheights of the rows and the block.
// Although x-height estimation is conceptually separate, it is part of
// detecting perspective distortion and therefore baseline fitting.
void BaselineBlock::FitBaselineSplines(bool enable_splines,
bool show_final_rows,
Textord* textord) {
double gradient = tan(skew_angle_);
FCOORD rotation(1.0f, 0.0f);
if (enable_splines) {
textord->make_spline_rows(block_, gradient, show_final_rows);
} else {
// Make a fake spline from the existing line.
TBOX block_box= block_->block->bounding_box();
TO_ROW_IT row_it = block_->get_rows();
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
TO_ROW* row = row_it.data();
inT32 xstarts[2] = { block_box.left(), block_box.right() };
double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
row->baseline = QSPLINE(1, xstarts, coeffs);
textord->compute_row_xheight(row, block_->block->classify_rotation(),
row->line_m(), block_->line_size);
}
}
textord->compute_block_xheight(block_, gradient);
block_->block->set_xheight(block_->xheight);
if (textord_restore_underlines) // fix underlines
restore_underlined_blobs(block_);
}
// Draws the (straight) baselines and final blobs colored according to
// what was discarded as noise and what is associated with each row.
void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
#ifndef GRAPHICS_DISABLED
if (non_text_block_) return;
double gradient = tan(skew_angle_);
FCOORD rotation(1.0f, 0.0f);
int left_edge = block_->block->bounding_box().left();
ScrollView* win = create_to_win(page_tr);
ScrollView::Color colour = ScrollView::RED;
TO_ROW_IT row_it = block_->get_rows();
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
colour = static_cast<ScrollView::Color>(colour + 1);
if (colour > ScrollView::MAGENTA)
colour = ScrollView::RED;
}
plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
// Show discarded blobs.
plot_blob_list(win, &block_->underlines,
ScrollView::YELLOW, ScrollView::CORAL);
if (block_->blobs.length() > 0)
tprintf("%d blobs discarded as noise\n", block_->blobs.length());
draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
#endif
}
void BaselineBlock::DrawPixSpline(Pix* pix_in) {
if (non_text_block_) return;
TO_ROW_IT row_it = block_->get_rows();
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
row_it.data()->baseline.plot(pix_in);
}
}
// Top-level line-spacing calculation. Computes an estimate of the line-
// spacing, using the current baselines in the TO_ROWS of the block, and
// then refines it by fitting a regression line to the baseline positions
// as a function of their integer index.
// Returns true if it seems that the model is a reasonable fit to the
// observations.
bool BaselineBlock::ComputeLineSpacing() {
FCOORD direction(cos(skew_angle_), sin(skew_angle_));
GenericVector<double> row_positions;
ComputeBaselinePositions(direction, &row_positions);
if (row_positions.size() < 2) return false;
EstimateLineSpacing();
RefineLineSpacing(row_positions);
// Verify that the model is reasonable.
double max_baseline_error = kMaxBaselineError * line_spacing_;
int non_trivial_gaps = 0;
int fitting_gaps = 0;
for (int i = 1; i < row_positions.size(); ++i) {
double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
if (row_gap > max_baseline_error) {
++non_trivial_gaps;
if (fabs(row_gap - line_spacing_) <= max_baseline_error)
++fitting_gaps;
}
}
if (debug_level_ > 0) {
tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
line_spacing_, row_positions.size(), fitting_gaps,
non_trivial_gaps);
}
return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
}
// Computes the deskewed vertical position of each baseline in the block and
// stores them in the given vector.
// This is calculated as the perpendicular distance of the middle of each
// baseline (in case it has a different skew angle) from the line passing
// through the origin parallel to the block baseline angle.
// NOTE that "distance" above is a signed quantity so we can tell which side
// of the block baseline a line sits, hence the function and argument name
// positions not distances.
void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
GenericVector<double>* positions) {
positions->clear();
for (int r = 0; r < rows_.size(); ++r) {
BaselineRow* row = rows_[r];
const TBOX& row_box = row->bounding_box();
float x_middle = (row_box.left() + row_box.right()) / 2.0f;
FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
float offset = direction * row_pos;
positions->push_back(offset);
}
}
// Computes an estimate of the line spacing of the block from the median
// of the spacings between adjacent overlapping textlines.
void BaselineBlock::EstimateLineSpacing() {
GenericVector<float> spacings;
for (int r = 0; r < rows_.size(); ++r) {
BaselineRow* row = rows_[r];
// Exclude silly lines.
if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
// Find the first row after row that overlaps it significantly.
const TBOX& row_box = row->bounding_box();
int r2;
for (r2 = r + 1; r2 < rows_.size() &&
!row_box.major_x_overlap(rows_[r2]->bounding_box());
++r2);
if (r2 < rows_.size()) {
BaselineRow* row2 = rows_[r2];
// Exclude silly lines.
if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
float spacing = row->SpaceBetween(*row2);
spacings.push_back(spacing);
}
}
// If we have at least one value, use it, otherwise leave the previous
// value unchanged.
if (!spacings.empty()) {
line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
if (debug_level_ > 1)
tprintf("Estimate of linespacing = %g\n", line_spacing_);
}
}
// Refines the line spacing of the block by fitting a regression
// line to the deskewed y-position of each baseline as a function of its
// estimated line index, allowing for a small error in the initial linespacing
// and choosing the best available model.
void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
double spacings[3], offsets[3], errors[3];
int index_range;
errors[0] = FitLineSpacingModel(positions, line_spacing_,
&spacings[0], &offsets[0], &index_range);
if (index_range > 1) {
double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
// Try the hypotheses that there might be index_range +/- 1 line spaces.
errors[1] = FitLineSpacingModel(positions, spacing_plus,
&spacings[1], &offsets[1], NULL);
double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
errors[2] = FitLineSpacingModel(positions, spacing_minus,
&spacings[2], &offsets[2], NULL);
for (int i = 1; i <= 2; ++i) {
if (errors[i] < errors[0]) {
spacings[0] = spacings[i];
offsets[0] = offsets[i];
errors[0] = errors[i];
}
}
}
if (spacings[0] > 0.0) {
line_spacing_ = spacings[0];
line_offset_ = offsets[0];
model_error_ = errors[0];
if (debug_level_ > 0) {
tprintf("Final linespacing model = %g + offset %g, error %g\n",
line_spacing_, line_offset_, model_error_);
}
}
}
// Given an initial estimate of line spacing (m_in) and the positions of each
// baseline, computes the line spacing of the block more accurately in m_out,
// and the corresponding intercept in c_out, and the number of spacings seen
// in index_delta. Returns the error of fit to the line spacing model.
// Uses a simple linear regression, but optimized the offset using the median.
double BaselineBlock::FitLineSpacingModel(
const GenericVector<double>& positions, double m_in,
double* m_out, double* c_out, int* index_delta) {
if (m_in == 0.0f || positions.size() < 2) {
*m_out = m_in;
*c_out = 0.0;
if (index_delta != NULL) *index_delta = 0;
return 0.0;
}
GenericVector<double> offsets;
// Get the offset (remainder) linespacing for each line and choose the median.
for (int i = 0; i < positions.size(); ++i)
offsets.push_back(fmod(positions[i], m_in));
// Get the median offset.
double median_offset = MedianOfCircularValues(m_in, &offsets);
// Now fit a line to quantized line number and offset.
LLSQ llsq;
int min_index = MAX_INT32;
int max_index = -MAX_INT32;
for (int i = 0; i < positions.size(); ++i) {
double y_pos = positions[i];
int row_index = IntCastRounded((y_pos - median_offset) / m_in);
UpdateRange(row_index, &min_index, &max_index);
llsq.add(row_index, y_pos);
}
// Get the refined line spacing.
*m_out = llsq.m();
// Use the median offset rather than the mean.
offsets.truncate(0);
for (int i = 0; i < positions.size(); ++i)
offsets.push_back(fmod(positions[i], *m_out));
// Get the median offset.
if (debug_level_ > 2) {
for (int i = 0; i < offsets.size(); ++i)
tprintf("%d: %g\n", i, offsets[i]);
}
*c_out = MedianOfCircularValues(*m_out, &offsets);
if (debug_level_ > 1) {
tprintf("Median offset = %g, compared to mean of %g.\n",
*c_out, llsq.c(*m_out));
}
// Index_delta is the number of hypothesized line gaps present.
if (index_delta != NULL)
*index_delta = max_index - min_index;
// Use the regression model's intercept to compute the error, as it may be
// a full line-spacing in disagreement with the median.
double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
if (debug_level_ > 1) {
tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
m_in, median_offset, *m_out, *c_out, rms_error);
}
return rms_error;
}
BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
TO_BLOCK_LIST* blocks)
: page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL),
debug_file_prefix_("") {
TO_BLOCK_IT it(blocks);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
TO_BLOCK* to_block = it.data();
BLOCK* block = to_block->block;
POLY_BLOCK* pb = block->poly_block();
// A note about non-text blocks.
// On output, non-text blocks are supposed to contain a single empty word
// in each incoming text line. These mark out the polygonal bounds of the
// block. Ideally no baselines should be required, but currently
// make_words crashes if a baseline and xheight are not provided, so we
// include non-text blocks here, but flag them for special treatment.
bool non_text = pb != NULL && !pb->IsText();
blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
}
}
BaselineDetect::~BaselineDetect() {
pixDestroy(&pix_debug_);
}
// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
// block-wise and page-wise data to smooth small blocks/rows, and applies
// smoothing based on block/page-level skew and block-level linespacing.
void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
GenericVector<double> block_skew_angles;
for (int i = 0; i < blocks_.size(); ++i) {
BaselineBlock* bl_block = blocks_[i];
if (debug_level_ > 0)
tprintf("Fitting initial baselines...\n");
if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
block_skew_angles.push_back(bl_block->skew_angle());
}
}
// Compute a page-wide default skew for blocks with too little information.
double default_block_skew = page_skew_.angle();
if (!block_skew_angles.empty()) {
default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
}
if (debug_level_ > 0) {
tprintf("Page skew angle = %g\n", default_block_skew);
}
// Set bad lines in each block to the default block skew and then force fit
// a linespacing model where it makes sense to do so.
for (int i = 0; i < blocks_.size(); ++i) {
BaselineBlock* bl_block = blocks_[i];
bl_block->ParallelizeBaselines(default_block_skew);
bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
}
}
// Computes the baseline splines for each TO_ROW in each TO_BLOCK and
// other associated side-effects, including pre-associating blobs, computing
// x-heights and displaying debug information.
// NOTE that ComputeStraightBaselines must have been called first as this
// sets up data in the TO_ROWs upon which this function depends.
void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
bool enable_splines,
bool remove_noise,
bool show_final_rows,
Textord* textord) {
Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL;
for (int i = 0; i < blocks_.size(); ++i) {
BaselineBlock* bl_block = blocks_[i];
bl_block->PrepareForSplineFitting(page_tr, remove_noise);
bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
if (pix_spline) {
bl_block->DrawPixSpline(pix_spline);
}
if (show_final_rows) {
bl_block->DrawFinalRows(page_tr);
}
}
if (pix_spline) {
STRING outfile_name = debug_file_prefix_ + "_spline.png";
pixWrite(outfile_name.string(), pix_spline, IFF_PNG);
pixDestroy(&pix_spline);
}
}
void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) {
pixDestroy(&pix_debug_);
pix_debug_ = pixClone(pixIn);
debug_file_prefix_ = output_path;
}
} // namespace tesseract.