mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-23 06:57:50 +08:00
bef8cad38d
Most of them were found by codespell. Signed-off-by: Stefan Weil <sw@weilnetz.de>
1090 lines
38 KiB
C++
1090 lines
38 KiB
C++
/**********************************************************************
|
|
* File: blobbox.cpp (Formerly blobnbox.c)
|
|
* Description: Code for the textord blob class.
|
|
* Author: Ray Smith
|
|
* Created: Thu Jul 30 09:08:51 BST 1992
|
|
*
|
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
// Include automatically generated configuration file if running autoconf.
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config_auto.h"
|
|
#endif
|
|
|
|
#include "blobbox.h"
|
|
#include "allheaders.h"
|
|
#include "blobs.h"
|
|
#include "helpers.h"
|
|
#include "normalis.h"
|
|
|
|
#define PROJECTION_MARGIN 10 //arbitrary
|
|
#define EXTERN
|
|
|
|
ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
|
|
|
|
// Up to 30 degrees is allowed for rotations of diacritic blobs.
|
|
const double kCosSmallAngle = 0.866;
|
|
// Min aspect ratio for a joined word to indicate an obvious flow direction.
|
|
const double kDefiniteAspectRatio = 2.0;
|
|
// Multiple of short length in perimeter to make a joined word.
|
|
const double kComplexShapePerimeterRatio = 1.5;
|
|
// Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
|
|
const double kMinMediumSizeRatio = 0.25;
|
|
// Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
|
|
const double kMaxMediumSizeRatio = 4.0;
|
|
|
|
// Rotates the box and the underlying blob.
|
|
void BLOBNBOX::rotate(FCOORD rotation) {
|
|
cblob_ptr->rotate(rotation);
|
|
rotate_box(rotation);
|
|
compute_bounding_box();
|
|
}
|
|
|
|
// Reflect the box in the y-axis, leaving the underlying blob untouched.
|
|
void BLOBNBOX::reflect_box_in_y_axis() {
|
|
int left = -box.right();
|
|
box.set_right(-box.left());
|
|
box.set_left(left);
|
|
}
|
|
|
|
// Rotates the box by the angle given by rotation.
|
|
// If the blob is a diacritic, then only small rotations for skew
|
|
// correction can be applied.
|
|
void BLOBNBOX::rotate_box(FCOORD rotation) {
|
|
if (IsDiacritic()) {
|
|
ASSERT_HOST(rotation.x() >= kCosSmallAngle)
|
|
ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
|
|
ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
|
|
top_pt.rotate(rotation);
|
|
base_char_top_ = top_pt.y();
|
|
bottom_pt.rotate(rotation);
|
|
base_char_bottom_ = bottom_pt.y();
|
|
box.rotate(rotation);
|
|
} else {
|
|
box.rotate(rotation);
|
|
set_diacritic_box(box);
|
|
}
|
|
}
|
|
|
|
/**********************************************************************
|
|
* BLOBNBOX::merge
|
|
*
|
|
* Merge this blob with the given blob, which should be after this.
|
|
**********************************************************************/
|
|
void BLOBNBOX::merge( //merge blobs
|
|
BLOBNBOX *nextblob //blob to join with
|
|
) {
|
|
box += nextblob->box; //merge boxes
|
|
set_diacritic_box(box);
|
|
nextblob->joined = TRUE;
|
|
}
|
|
|
|
|
|
// Merge this with other, taking the outlines from other.
|
|
// Other is not deleted, but left for the caller to handle.
|
|
void BLOBNBOX::really_merge(BLOBNBOX* other) {
|
|
if (cblob_ptr != NULL && other->cblob_ptr != NULL) {
|
|
C_OUTLINE_IT ol_it(cblob_ptr->out_list());
|
|
ol_it.add_list_after(other->cblob_ptr->out_list());
|
|
}
|
|
compute_bounding_box();
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* BLOBNBOX::chop
|
|
*
|
|
* Chop this blob into equal sized pieces using the x height as a guide.
|
|
* The blob is not actually chopped. Instead, fake blobs are inserted
|
|
* with the relevant bounding boxes.
|
|
**********************************************************************/
|
|
|
|
void BLOBNBOX::chop( //chop blobs
|
|
BLOBNBOX_IT *start_it, //location of this
|
|
BLOBNBOX_IT *end_it, //iterator
|
|
FCOORD rotation, //for landscape
|
|
float xheight //of line
|
|
) {
|
|
inT16 blobcount; //no of blobs
|
|
BLOBNBOX *newblob; //fake blob
|
|
BLOBNBOX *blob; //current blob
|
|
inT16 blobindex; //number of chop
|
|
inT16 leftx; //left edge of blob
|
|
float blobwidth; //width of each
|
|
float rightx; //right edge to scan
|
|
float ymin, ymax; //limits of new blob
|
|
float test_ymin, test_ymax; //limits of part blob
|
|
ICOORD bl, tr; //corners of box
|
|
BLOBNBOX_IT blob_it; //blob iterator
|
|
|
|
//get no of chops
|
|
blobcount = (inT16) floor (box.width () / xheight);
|
|
if (blobcount > 1 && cblob_ptr != NULL) {
|
|
//width of each
|
|
blobwidth = (float) (box.width () + 1) / blobcount;
|
|
for (blobindex = blobcount - 1, rightx = box.right ();
|
|
blobindex >= 0; blobindex--, rightx -= blobwidth) {
|
|
ymin = (float) MAX_INT32;
|
|
ymax = (float) -MAX_INT32;
|
|
blob_it = *start_it;
|
|
do {
|
|
blob = blob_it.data ();
|
|
find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth,
|
|
rightx,
|
|
/*rotation, */ test_ymin, test_ymax);
|
|
blob_it.forward ();
|
|
UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
|
|
}
|
|
while (blob != end_it->data ());
|
|
if (ymin < ymax) {
|
|
leftx = (inT16) floor (rightx - blobwidth);
|
|
if (leftx < box.left ())
|
|
leftx = box.left (); //clip to real box
|
|
bl = ICOORD (leftx, (inT16) floor (ymin));
|
|
tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
|
|
if (blobindex == 0)
|
|
box = TBOX (bl, tr); //change box
|
|
else {
|
|
newblob = new BLOBNBOX;
|
|
//box is all it has
|
|
newblob->box = TBOX (bl, tr);
|
|
//stay on current
|
|
newblob->base_char_top_ = tr.y();
|
|
newblob->base_char_bottom_ = bl.y();
|
|
end_it->add_after_stay_put (newblob);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns the box gaps between this and its neighbours_ in an array
|
|
// indexed by BlobNeighbourDir.
|
|
void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
|
|
for (int dir = 0; dir < BND_COUNT; ++dir) {
|
|
gaps[dir] = MAX_INT16;
|
|
BLOBNBOX* neighbour = neighbours_[dir];
|
|
if (neighbour != NULL) {
|
|
TBOX n_box = neighbour->bounding_box();
|
|
if (dir == BND_LEFT || dir == BND_RIGHT) {
|
|
gaps[dir] = box.x_gap(n_box);
|
|
} else {
|
|
gaps[dir] = box.y_gap(n_box);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
|
|
// modified so that if the max exceeds the max dimension of the blob, and
|
|
// the min is less, the max is replaced with the min.
|
|
// The objective is to catch cases where there is only a single neighbour
|
|
// and avoid reporting the other gap as a ridiculously large number
|
|
void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max,
|
|
int* v_min, int* v_max) const {
|
|
int max_dimension = MAX(box.width(), box.height());
|
|
int gaps[BND_COUNT];
|
|
NeighbourGaps(gaps);
|
|
*h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]);
|
|
*h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]);
|
|
if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min;
|
|
*v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]);
|
|
*v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]);
|
|
if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min;
|
|
}
|
|
|
|
// NULLs out any neighbours that are DeletableNoise to remove references.
|
|
void BLOBNBOX::CleanNeighbours() {
|
|
for (int dir = 0; dir < BND_COUNT; ++dir) {
|
|
BLOBNBOX* neighbour = neighbours_[dir];
|
|
if (neighbour != NULL && neighbour->DeletableNoise()) {
|
|
neighbours_[dir] = NULL;
|
|
good_stroke_neighbours_[dir] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns positive if there is at least one side neighbour that has a similar
|
|
// stroke width and is not on the other side of a rule line.
|
|
int BLOBNBOX::GoodTextBlob() const {
|
|
int score = 0;
|
|
for (int dir = 0; dir < BND_COUNT; ++dir) {
|
|
BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
|
|
if (good_stroke_neighbour(bnd))
|
|
++score;
|
|
}
|
|
return score;
|
|
}
|
|
|
|
// Returns the number of side neighbours that are of type BRT_NOISE.
|
|
int BLOBNBOX::NoisyNeighbours() const {
|
|
int count = 0;
|
|
for (int dir = 0; dir < BND_COUNT; ++dir) {
|
|
BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
|
|
BLOBNBOX* blob = neighbour(bnd);
|
|
if (blob != NULL && blob->region_type() == BRT_NOISE)
|
|
++count;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// Returns true, and sets vert_possible/horz_possible if the blob has some
|
|
// feature that makes it individually appear to flow one way.
|
|
// eg if it has a high aspect ratio, yet has a complex shape, such as a
|
|
// joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
|
|
bool BLOBNBOX::DefiniteIndividualFlow() {
|
|
if (cblob() == NULL) return false;
|
|
int box_perimeter = 2 * (box.height() + box.width());
|
|
if (box.width() > box.height() * kDefiniteAspectRatio) {
|
|
// Attempt to distinguish a wide joined word from a dash.
|
|
// If it is a dash, then its perimeter is approximately
|
|
// 2 * (box width + stroke width), but more if the outline is noisy,
|
|
// so perimeter - 2*(box width + stroke width) should be close to zero.
|
|
// A complex shape such as a joined word should have a much larger value.
|
|
int perimeter = cblob()->perimeter();
|
|
if (vert_stroke_width() > 0 || perimeter <= 0)
|
|
perimeter -= 2 * vert_stroke_width();
|
|
else
|
|
perimeter -= 4 * cblob()->area() / perimeter;
|
|
perimeter -= 2 * box.width();
|
|
// Use a multiple of the box perimeter as a threshold.
|
|
if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
|
|
set_vert_possible(false);
|
|
set_horz_possible(true);
|
|
return true;
|
|
}
|
|
}
|
|
if (box.height() > box.width() * kDefiniteAspectRatio) {
|
|
// As above, but for a putative vertical word vs a I/1/l.
|
|
int perimeter = cblob()->perimeter();
|
|
if (horz_stroke_width() > 0 || perimeter <= 0)
|
|
perimeter -= 2 * horz_stroke_width();
|
|
else
|
|
perimeter -= 4 * cblob()->area() / perimeter;
|
|
perimeter -= 2 * box.height();
|
|
if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
|
|
set_vert_possible(true);
|
|
set_horz_possible(false);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Returns true if there is no tabstop violation in merging this and other.
|
|
bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const {
|
|
if (box.left() < other.box.left() && box.left() < other.left_rule_)
|
|
return false;
|
|
if (other.box.left() < box.left() && other.box.left() < left_rule_)
|
|
return false;
|
|
if (box.right() > other.box.right() && box.right() > other.right_rule_)
|
|
return false;
|
|
if (other.box.right() > box.right() && other.box.right() > right_rule_)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
// Returns true if other has a similar stroke width to this.
|
|
bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other,
|
|
double fractional_tolerance,
|
|
double constant_tolerance) const {
|
|
// The perimeter-based width is used as a backup in case there is
|
|
// no information in the blob.
|
|
double p_width = area_stroke_width();
|
|
double n_p_width = other.area_stroke_width();
|
|
float h_tolerance = horz_stroke_width_ * fractional_tolerance
|
|
+ constant_tolerance;
|
|
float v_tolerance = vert_stroke_width_ * fractional_tolerance
|
|
+ constant_tolerance;
|
|
double p_tolerance = p_width * fractional_tolerance
|
|
+ constant_tolerance;
|
|
bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
|
|
bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
|
|
bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_,
|
|
other.horz_stroke_width_, h_tolerance);
|
|
bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_,
|
|
other.vert_stroke_width_, v_tolerance);
|
|
bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
|
|
// For a match, at least one of the horizontal and vertical widths
|
|
// must match, and the other one must either match or be zero.
|
|
// Only if both are zero will we look at the perimeter metric.
|
|
return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
|
|
}
|
|
|
|
// Returns a bounding box of the outline contained within the
|
|
// given horizontal range.
|
|
TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
|
|
FCOORD no_rotation(1.0f, 0.0f);
|
|
float top = box.top();
|
|
float bottom = box.bottom();
|
|
if (cblob_ptr != NULL) {
|
|
find_cblob_limits(cblob_ptr, static_cast<float>(left),
|
|
static_cast<float>(right), no_rotation,
|
|
bottom, top);
|
|
}
|
|
|
|
if (top < bottom) {
|
|
top = box.top();
|
|
bottom = box.bottom();
|
|
}
|
|
FCOORD bot_left(left, bottom);
|
|
FCOORD top_right(right, top);
|
|
TBOX shrunken_box(bot_left);
|
|
TBOX shrunken_box2(top_right);
|
|
shrunken_box += shrunken_box2;
|
|
return shrunken_box;
|
|
}
|
|
|
|
// Estimates and stores the baseline position based on the shape of the
|
|
// outline.
|
|
void BLOBNBOX::EstimateBaselinePosition() {
|
|
baseline_y_ = box.bottom(); // The default.
|
|
if (cblob_ptr == NULL) return;
|
|
baseline_y_ = cblob_ptr->EstimateBaselinePosition();
|
|
}
|
|
|
|
// Helper to call CleanNeighbours on all blobs on the list.
|
|
void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) {
|
|
BLOBNBOX_IT blob_it(blobs);
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
blob_it.data()->CleanNeighbours();
|
|
}
|
|
}
|
|
|
|
// Helper to delete all the deletable blobs on the list.
|
|
void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
|
|
BLOBNBOX_IT blob_it(blobs);
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
BLOBNBOX* blob = blob_it.data();
|
|
if (blob->DeletableNoise()) {
|
|
delete blob->cblob();
|
|
delete blob_it.extract();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper to compute edge offsets for all the blobs on the list.
|
|
// See coutln.h for an explanation of edge offsets.
|
|
void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey,
|
|
BLOBNBOX_LIST* blobs) {
|
|
int grey_height = 0;
|
|
int thr_height = 0;
|
|
int scale_factor = 1;
|
|
if (thresholds != NULL && grey != NULL) {
|
|
grey_height = pixGetHeight(grey);
|
|
thr_height = pixGetHeight(thresholds);
|
|
scale_factor =
|
|
IntCastRounded(static_cast<double>(grey_height) / thr_height);
|
|
}
|
|
BLOBNBOX_IT blob_it(blobs);
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
BLOBNBOX* blob = blob_it.data();
|
|
if (blob->cblob() != NULL) {
|
|
// Get the threshold that applies to this blob.
|
|
l_uint32 threshold = 128;
|
|
if (thresholds != NULL && grey != NULL) {
|
|
const TBOX& box = blob->cblob()->bounding_box();
|
|
// Transform the coordinates if required.
|
|
TPOINT pt((box.left() + box.right()) / 2,
|
|
(box.top() + box.bottom()) / 2);
|
|
pixGetPixel(thresholds, pt.x / scale_factor,
|
|
thr_height - 1 - pt.y / scale_factor, &threshold);
|
|
}
|
|
blob->cblob()->ComputeEdgeOffsets(threshold, grey);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#ifndef GRAPHICS_DISABLED
|
|
// Helper to draw all the blobs on the list in the given body_colour,
|
|
// with child outlines in the child_colour.
|
|
void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list,
|
|
ScrollView::Color body_colour,
|
|
ScrollView::Color child_colour,
|
|
ScrollView* win) {
|
|
BLOBNBOX_IT it(list);
|
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
|
it.data()->plot(win, body_colour, child_colour);
|
|
}
|
|
}
|
|
|
|
// Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
|
|
// given list in the given body_colour, with child outlines in the
|
|
// child_colour.
|
|
void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
|
|
ScrollView::Color body_colour,
|
|
ScrollView::Color child_colour,
|
|
ScrollView* win) {
|
|
BLOBNBOX_IT it(list);
|
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
|
BLOBNBOX* blob = it.data();
|
|
if (blob->DeletableNoise())
|
|
blob->plot(win, body_colour, child_colour);
|
|
}
|
|
}
|
|
|
|
ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type,
|
|
BlobTextFlowType flow_type) {
|
|
switch (region_type) {
|
|
case BRT_HLINE:
|
|
return ScrollView::BROWN;
|
|
case BRT_VLINE:
|
|
return ScrollView::DARK_GREEN;
|
|
case BRT_RECTIMAGE:
|
|
return ScrollView::RED;
|
|
case BRT_POLYIMAGE:
|
|
return ScrollView::ORANGE;
|
|
case BRT_UNKNOWN:
|
|
return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
|
|
case BRT_VERT_TEXT:
|
|
if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
|
|
return ScrollView::GREEN;
|
|
if (flow_type == BTFT_CHAIN)
|
|
return ScrollView::LIME_GREEN;
|
|
return ScrollView::YELLOW;
|
|
case BRT_TEXT:
|
|
if (flow_type == BTFT_STRONG_CHAIN)
|
|
return ScrollView::BLUE;
|
|
if (flow_type == BTFT_TEXT_ON_IMAGE)
|
|
return ScrollView::LIGHT_BLUE;
|
|
if (flow_type == BTFT_CHAIN)
|
|
return ScrollView::MEDIUM_BLUE;
|
|
if (flow_type == BTFT_LEADER)
|
|
return ScrollView::WHEAT;
|
|
if (flow_type == BTFT_NONTEXT)
|
|
return ScrollView::PINK;
|
|
return ScrollView::MAGENTA;
|
|
default:
|
|
return ScrollView::GREY;
|
|
}
|
|
}
|
|
|
|
// Keep in sync with BlobRegionType.
|
|
ScrollView::Color BLOBNBOX::BoxColor() const {
|
|
return TextlineColor(region_type_, flow_);
|
|
}
|
|
|
|
void BLOBNBOX::plot(ScrollView* window, // window to draw in
|
|
ScrollView::Color blob_colour, // for outer bits
|
|
ScrollView::Color child_colour) { // for holes
|
|
if (cblob_ptr != NULL)
|
|
cblob_ptr->plot(window, blob_colour, child_colour);
|
|
}
|
|
#endif
|
|
/**********************************************************************
|
|
* find_cblob_limits
|
|
*
|
|
* Scan the outlines of the cblob to locate the y min and max
|
|
* between the given x limits.
|
|
**********************************************************************/
|
|
|
|
void find_cblob_limits( //get y limits
|
|
C_BLOB *blob, //blob to search
|
|
float leftx, //x limits
|
|
float rightx,
|
|
FCOORD rotation, //for landscape
|
|
float &ymin, //output y limits
|
|
float &ymax) {
|
|
inT16 stepindex; //current point
|
|
ICOORD pos; //current coords
|
|
ICOORD vec; //rotated step
|
|
C_OUTLINE *outline; //current outline
|
|
//outlines
|
|
C_OUTLINE_IT out_it = blob->out_list ();
|
|
|
|
ymin = (float) MAX_INT32;
|
|
ymax = (float) -MAX_INT32;
|
|
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
outline = out_it.data ();
|
|
pos = outline->start_pos (); //get coords
|
|
pos.rotate (rotation);
|
|
for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
|
|
//inside
|
|
if (pos.x () >= leftx && pos.x () <= rightx) {
|
|
UpdateRange(pos.y(), &ymin, &ymax);
|
|
}
|
|
vec = outline->step (stepindex);
|
|
vec.rotate (rotation);
|
|
pos += vec; //move to next
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* find_cblob_vlimits
|
|
*
|
|
* Scan the outlines of the cblob to locate the y min and max
|
|
* between the given x limits.
|
|
**********************************************************************/
|
|
|
|
void find_cblob_vlimits( //get y limits
|
|
C_BLOB *blob, //blob to search
|
|
float leftx, //x limits
|
|
float rightx,
|
|
float &ymin, //output y limits
|
|
float &ymax) {
|
|
inT16 stepindex; //current point
|
|
ICOORD pos; //current coords
|
|
ICOORD vec; //rotated step
|
|
C_OUTLINE *outline; //current outline
|
|
//outlines
|
|
C_OUTLINE_IT out_it = blob->out_list ();
|
|
|
|
ymin = (float) MAX_INT32;
|
|
ymax = (float) -MAX_INT32;
|
|
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
outline = out_it.data ();
|
|
pos = outline->start_pos (); //get coords
|
|
for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
|
|
//inside
|
|
if (pos.x () >= leftx && pos.x () <= rightx) {
|
|
UpdateRange(pos.y(), &ymin, &ymax);
|
|
}
|
|
vec = outline->step (stepindex);
|
|
pos += vec; //move to next
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* find_cblob_hlimits
|
|
*
|
|
* Scan the outlines of the cblob to locate the x min and max
|
|
* between the given y limits.
|
|
**********************************************************************/
|
|
|
|
void find_cblob_hlimits( //get x limits
|
|
C_BLOB *blob, //blob to search
|
|
float bottomy, //y limits
|
|
float topy,
|
|
float &xmin, //output x limits
|
|
float &xmax) {
|
|
inT16 stepindex; //current point
|
|
ICOORD pos; //current coords
|
|
ICOORD vec; //rotated step
|
|
C_OUTLINE *outline; //current outline
|
|
//outlines
|
|
C_OUTLINE_IT out_it = blob->out_list ();
|
|
|
|
xmin = (float) MAX_INT32;
|
|
xmax = (float) -MAX_INT32;
|
|
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
outline = out_it.data ();
|
|
pos = outline->start_pos (); //get coords
|
|
for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
|
|
//inside
|
|
if (pos.y () >= bottomy && pos.y () <= topy) {
|
|
UpdateRange(pos.x(), &xmin, &xmax);
|
|
}
|
|
vec = outline->step (stepindex);
|
|
pos += vec; //move to next
|
|
}
|
|
}
|
|
}
|
|
|
|
/**********************************************************************
|
|
* crotate_cblob
|
|
*
|
|
* Rotate the copy by the given vector and return a C_BLOB.
|
|
**********************************************************************/
|
|
|
|
C_BLOB *crotate_cblob( //rotate it
|
|
C_BLOB *blob, //blob to search
|
|
FCOORD rotation //for landscape
|
|
) {
|
|
C_OUTLINE_LIST out_list; //output outlines
|
|
//input outlines
|
|
C_OUTLINE_IT in_it = blob->out_list ();
|
|
//output outlines
|
|
C_OUTLINE_IT out_it = &out_list;
|
|
|
|
for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
|
|
out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
|
|
}
|
|
return new C_BLOB (&out_list);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* box_next
|
|
*
|
|
* Compute the bounding box of this blob with merging of x overlaps
|
|
* but no pre-chopping.
|
|
* Then move the iterator on to the start of the next blob.
|
|
**********************************************************************/
|
|
|
|
TBOX box_next( //get bounding box
|
|
BLOBNBOX_IT *it //iterator to blobds
|
|
) {
|
|
BLOBNBOX *blob; //current blob
|
|
TBOX result; //total box
|
|
|
|
blob = it->data ();
|
|
result = blob->bounding_box ();
|
|
do {
|
|
it->forward ();
|
|
blob = it->data ();
|
|
if (blob->cblob() == NULL)
|
|
//was pre-chopped
|
|
result += blob->bounding_box ();
|
|
}
|
|
//until next real blob
|
|
while ((blob->cblob() == NULL) || blob->joined_to_prev());
|
|
return result;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* box_next_pre_chopped
|
|
*
|
|
* Compute the bounding box of this blob with merging of x overlaps
|
|
* but WITH pre-chopping.
|
|
* Then move the iterator on to the start of the next pre-chopped blob.
|
|
**********************************************************************/
|
|
|
|
TBOX box_next_pre_chopped( //get bounding box
|
|
BLOBNBOX_IT *it //iterator to blobds
|
|
) {
|
|
BLOBNBOX *blob; //current blob
|
|
TBOX result; //total box
|
|
|
|
blob = it->data ();
|
|
result = blob->bounding_box ();
|
|
do {
|
|
it->forward ();
|
|
blob = it->data ();
|
|
}
|
|
//until next real blob
|
|
while (blob->joined_to_prev ());
|
|
return result;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* TO_ROW::TO_ROW
|
|
*
|
|
* Constructor to make a row from a blob.
|
|
**********************************************************************/
|
|
|
|
TO_ROW::TO_ROW ( //constructor
|
|
BLOBNBOX * blob, //first blob
|
|
float top, //corrected top
|
|
float bottom, //of row
|
|
float row_size //ideal
|
|
) {
|
|
clear();
|
|
y_min = bottom;
|
|
y_max = top;
|
|
initial_y_min = bottom;
|
|
|
|
float diff; //in size
|
|
BLOBNBOX_IT it = &blobs; //list of blobs
|
|
|
|
it.add_to_end (blob);
|
|
diff = top - bottom - row_size;
|
|
if (diff > 0) {
|
|
y_max -= diff / 2;
|
|
y_min += diff / 2;
|
|
}
|
|
//very small object
|
|
else if ((top - bottom) * 3 < row_size) {
|
|
diff = row_size / 3 + bottom - top;
|
|
y_max += diff / 2;
|
|
y_min -= diff / 2;
|
|
}
|
|
}
|
|
|
|
void TO_ROW::print() const {
|
|
tprintf("pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g,"
|
|
" spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g,"
|
|
" body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n",
|
|
pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp,
|
|
spacing, xheight, y_origin, xheight_evidence, ascrise, descdrop,
|
|
body_size, min_space, max_nonspace, space_threshold, kern_size,
|
|
space_size);
|
|
}
|
|
|
|
/**********************************************************************
|
|
* TO_ROW:add_blob
|
|
*
|
|
* Add the blob to the end of the row.
|
|
**********************************************************************/
|
|
|
|
void TO_ROW::add_blob( //constructor
|
|
BLOBNBOX *blob, //first blob
|
|
float top, //corrected top
|
|
float bottom, //of row
|
|
float row_size //ideal
|
|
) {
|
|
float allowed; //allowed expansion
|
|
float available; //expansion
|
|
BLOBNBOX_IT it = &blobs; //list of blobs
|
|
|
|
it.add_to_end (blob);
|
|
allowed = row_size + y_min - y_max;
|
|
if (allowed > 0) {
|
|
available = top > y_max ? top - y_max : 0;
|
|
if (bottom < y_min)
|
|
//total available
|
|
available += y_min - bottom;
|
|
if (available > 0) {
|
|
available += available; //do it gradually
|
|
if (available < allowed)
|
|
available = allowed;
|
|
if (bottom < y_min)
|
|
y_min -= (y_min - bottom) * allowed / available;
|
|
if (top > y_max)
|
|
y_max += (top - y_max) * allowed / available;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* TO_ROW:insert_blob
|
|
*
|
|
* Add the blob to the row in the correct position.
|
|
**********************************************************************/
|
|
|
|
void TO_ROW::insert_blob( //constructor
|
|
BLOBNBOX *blob //first blob
|
|
) {
|
|
BLOBNBOX_IT it = &blobs; //list of blobs
|
|
|
|
if (it.empty ())
|
|
it.add_before_then_move (blob);
|
|
else {
|
|
it.mark_cycle_pt ();
|
|
while (!it.cycled_list ()
|
|
&& it.data ()->bounding_box ().left () <=
|
|
blob->bounding_box ().left ())
|
|
it.forward ();
|
|
if (it.cycled_list ())
|
|
it.add_to_end (blob);
|
|
else
|
|
it.add_before_stay_put (blob);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* TO_ROW::compute_vertical_projection
|
|
*
|
|
* Compute the vertical projection of a TO_ROW from its blobs.
|
|
**********************************************************************/
|
|
|
|
void TO_ROW::compute_vertical_projection() { //project whole row
|
|
TBOX row_box; //bound of row
|
|
BLOBNBOX *blob; //current blob
|
|
TBOX blob_box; //bounding box
|
|
BLOBNBOX_IT blob_it = blob_list ();
|
|
|
|
if (blob_it.empty ())
|
|
return;
|
|
row_box = blob_it.data ()->bounding_box ();
|
|
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
|
|
row_box += blob_it.data ()->bounding_box ();
|
|
|
|
projection.set_range (row_box.left () - PROJECTION_MARGIN,
|
|
row_box.right () + PROJECTION_MARGIN);
|
|
projection_left = row_box.left () - PROJECTION_MARGIN;
|
|
projection_right = row_box.right () + PROJECTION_MARGIN;
|
|
for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
|
|
blob = blob_it.data();
|
|
if (blob->cblob() != NULL)
|
|
vertical_cblob_projection(blob->cblob(), &projection);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* TO_ROW::clear
|
|
*
|
|
* Zero out all scalar members.
|
|
**********************************************************************/
|
|
void TO_ROW::clear() {
|
|
all_caps = 0;
|
|
used_dm_model = 0;
|
|
projection_left = 0;
|
|
projection_right = 0;
|
|
pitch_decision = PITCH_DUNNO;
|
|
fixed_pitch = 0.0;
|
|
fp_space = 0.0;
|
|
fp_nonsp = 0.0;
|
|
pr_space = 0.0;
|
|
pr_nonsp = 0.0;
|
|
spacing = 0.0;
|
|
xheight = 0.0;
|
|
xheight_evidence = 0;
|
|
body_size = 0.0;
|
|
ascrise = 0.0;
|
|
descdrop = 0.0;
|
|
min_space = 0;
|
|
max_nonspace = 0;
|
|
space_threshold = 0;
|
|
kern_size = 0.0;
|
|
space_size = 0.0;
|
|
y_min = 0.0;
|
|
y_max = 0.0;
|
|
initial_y_min = 0.0;
|
|
m = 0.0;
|
|
c = 0.0;
|
|
error = 0.0;
|
|
para_c = 0.0;
|
|
para_error = 0.0;
|
|
y_origin = 0.0;
|
|
credibility = 0.0;
|
|
num_repeated_sets_ = -1;
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* vertical_cblob_projection
|
|
*
|
|
* Compute the vertical projection of a cblob from its outlines
|
|
* and add to the given STATS.
|
|
**********************************************************************/
|
|
|
|
void vertical_cblob_projection( //project outlines
|
|
C_BLOB *blob, //blob to project
|
|
STATS *stats //output
|
|
) {
|
|
//outlines of blob
|
|
C_OUTLINE_IT out_it = blob->out_list ();
|
|
|
|
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
vertical_coutline_projection (out_it.data (), stats);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* vertical_coutline_projection
|
|
*
|
|
* Compute the vertical projection of a outline from its outlines
|
|
* and add to the given STATS.
|
|
**********************************************************************/
|
|
|
|
void vertical_coutline_projection( //project outlines
|
|
C_OUTLINE *outline, //outline to project
|
|
STATS *stats //output
|
|
) {
|
|
ICOORD pos; //current point
|
|
ICOORD step; //edge step
|
|
inT32 length; //of outline
|
|
inT16 stepindex; //current step
|
|
C_OUTLINE_IT out_it = outline->child ();
|
|
|
|
pos = outline->start_pos ();
|
|
length = outline->pathlength ();
|
|
for (stepindex = 0; stepindex < length; stepindex++) {
|
|
step = outline->step (stepindex);
|
|
if (step.x () > 0) {
|
|
stats->add (pos.x (), -pos.y ());
|
|
} else if (step.x () < 0) {
|
|
stats->add (pos.x () - 1, pos.y ());
|
|
}
|
|
pos += step;
|
|
}
|
|
|
|
for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
|
|
vertical_coutline_projection (out_it.data (), stats);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* TO_BLOCK::TO_BLOCK
|
|
*
|
|
* Constructor to make a TO_BLOCK from a real block.
|
|
**********************************************************************/
|
|
|
|
TO_BLOCK::TO_BLOCK( //make a block
|
|
BLOCK *src_block //real block
|
|
) {
|
|
clear();
|
|
block = src_block;
|
|
}
|
|
|
|
static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
|
|
BLOBNBOX_IT it = boxes;
|
|
// A BLOBNBOX generally doesn't own its blobs, so if they do, you
|
|
// have to delete them explicitly.
|
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
|
BLOBNBOX* box = it.data();
|
|
if (box->cblob() != NULL)
|
|
delete box->cblob();
|
|
}
|
|
}
|
|
|
|
/**********************************************************************
|
|
* TO_BLOCK::clear
|
|
*
|
|
* Zero out all scalar members.
|
|
**********************************************************************/
|
|
void TO_BLOCK::clear() {
|
|
block = NULL;
|
|
pitch_decision = PITCH_DUNNO;
|
|
line_spacing = 0.0;
|
|
line_size = 0.0;
|
|
max_blob_size = 0.0;
|
|
baseline_offset = 0.0;
|
|
xheight = 0.0;
|
|
fixed_pitch = 0.0;
|
|
kern_size = 0.0;
|
|
space_size = 0.0;
|
|
min_space = 0;
|
|
max_nonspace = 0;
|
|
fp_space = 0.0;
|
|
fp_nonsp = 0.0;
|
|
pr_space = 0.0;
|
|
pr_nonsp = 0.0;
|
|
key_row = NULL;
|
|
}
|
|
|
|
|
|
TO_BLOCK::~TO_BLOCK() {
|
|
// Any residual BLOBNBOXes at this stage own their blobs, so delete them.
|
|
clear_blobnboxes(&blobs);
|
|
clear_blobnboxes(&underlines);
|
|
clear_blobnboxes(&noise_blobs);
|
|
clear_blobnboxes(&small_blobs);
|
|
clear_blobnboxes(&large_blobs);
|
|
}
|
|
|
|
// Helper function to divide the input blobs over noise, small, medium
|
|
// and large lists. Blobs small in height and (small in width or large in width)
|
|
// go in the noise list. Dash (-) candidates go in the small list, and
|
|
// medium and large are by height.
|
|
// SIDE-EFFECT: reset all blobs to initial state by calling Init().
|
|
static void SizeFilterBlobs(int min_height, int max_height,
|
|
BLOBNBOX_LIST* src_list,
|
|
BLOBNBOX_LIST* noise_list,
|
|
BLOBNBOX_LIST* small_list,
|
|
BLOBNBOX_LIST* medium_list,
|
|
BLOBNBOX_LIST* large_list) {
|
|
BLOBNBOX_IT noise_it(noise_list);
|
|
BLOBNBOX_IT small_it(small_list);
|
|
BLOBNBOX_IT medium_it(medium_list);
|
|
BLOBNBOX_IT large_it(large_list);
|
|
for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
|
|
BLOBNBOX* blob = src_it.extract();
|
|
blob->ReInit();
|
|
int width = blob->bounding_box().width();
|
|
int height = blob->bounding_box().height();
|
|
if (height < min_height &&
|
|
(width < min_height || width > max_height))
|
|
noise_it.add_after_then_move(blob);
|
|
else if (height > max_height)
|
|
large_it.add_after_then_move(blob);
|
|
else if (height < min_height)
|
|
small_it.add_after_then_move(blob);
|
|
else
|
|
medium_it.add_after_then_move(blob);
|
|
}
|
|
}
|
|
|
|
// Reorganize the blob lists with a different definition of small, medium
|
|
// and large, compared to the original definition.
|
|
// Height is still the primary filter key, but medium width blobs of small
|
|
// height become small, and very wide blobs of small height stay noise, along
|
|
// with small dot-shaped blobs.
|
|
void TO_BLOCK::ReSetAndReFilterBlobs() {
|
|
int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
|
|
int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
|
|
BLOBNBOX_LIST noise_list;
|
|
BLOBNBOX_LIST small_list;
|
|
BLOBNBOX_LIST medium_list;
|
|
BLOBNBOX_LIST large_list;
|
|
SizeFilterBlobs(min_height, max_height, &blobs,
|
|
&noise_list, &small_list, &medium_list, &large_list);
|
|
SizeFilterBlobs(min_height, max_height, &large_blobs,
|
|
&noise_list, &small_list, &medium_list, &large_list);
|
|
SizeFilterBlobs(min_height, max_height, &small_blobs,
|
|
&noise_list, &small_list, &medium_list, &large_list);
|
|
SizeFilterBlobs(min_height, max_height, &noise_blobs,
|
|
&noise_list, &small_list, &medium_list, &large_list);
|
|
BLOBNBOX_IT blob_it(&blobs);
|
|
blob_it.add_list_after(&medium_list);
|
|
blob_it.set_to_list(&large_blobs);
|
|
blob_it.add_list_after(&large_list);
|
|
blob_it.set_to_list(&small_blobs);
|
|
blob_it.add_list_after(&small_list);
|
|
blob_it.set_to_list(&noise_blobs);
|
|
blob_it.add_list_after(&noise_list);
|
|
}
|
|
|
|
// Deletes noise blobs from all lists where not owned by a ColPartition.
|
|
void TO_BLOCK::DeleteUnownedNoise() {
|
|
BLOBNBOX::CleanNeighbours(&blobs);
|
|
BLOBNBOX::CleanNeighbours(&small_blobs);
|
|
BLOBNBOX::CleanNeighbours(&noise_blobs);
|
|
BLOBNBOX::CleanNeighbours(&large_blobs);
|
|
BLOBNBOX::DeleteNoiseBlobs(&blobs);
|
|
BLOBNBOX::DeleteNoiseBlobs(&small_blobs);
|
|
BLOBNBOX::DeleteNoiseBlobs(&noise_blobs);
|
|
BLOBNBOX::DeleteNoiseBlobs(&large_blobs);
|
|
}
|
|
|
|
// Computes and stores the edge offsets on each blob for use in feature
|
|
// extraction, using greyscale if the supplied grey and thresholds pixes
|
|
// are 8-bit or otherwise (if NULL or not 8 bit) the original binary
|
|
// edge step outlines.
|
|
// Thresholds must either be the same size as grey or an integer down-scale
|
|
// of grey.
|
|
// See coutln.h for an explanation of edge offsets.
|
|
void TO_BLOCK::ComputeEdgeOffsets(Pix* thresholds, Pix* grey) {
|
|
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
|
|
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
|
|
BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);
|
|
}
|
|
|
|
#ifndef GRAPHICS_DISABLED
|
|
// Draw the noise blobs from all lists in red.
|
|
void TO_BLOCK::plot_noise_blobs(ScrollView* win) {
|
|
BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win);
|
|
BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win);
|
|
BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win);
|
|
BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win);
|
|
}
|
|
|
|
// Draw the blobs on the various lists in the block in different colors.
|
|
void TO_BLOCK::plot_graded_blobs(ScrollView* win) {
|
|
BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win);
|
|
BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW,
|
|
win);
|
|
BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW,
|
|
win);
|
|
BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win);
|
|
}
|
|
|
|
/**********************************************************************
|
|
* plot_blob_list
|
|
*
|
|
* Draw a list of blobs.
|
|
**********************************************************************/
|
|
|
|
void plot_blob_list(ScrollView* win, // window to draw in
|
|
BLOBNBOX_LIST *list, // blob list
|
|
ScrollView::Color body_colour, // colour to draw
|
|
ScrollView::Color child_colour) { // colour of child
|
|
BLOBNBOX_IT it = list;
|
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
|
it.data()->plot(win, body_colour, child_colour);
|
|
}
|
|
}
|
|
#endif // GRAPHICS_DISABLED
|