tesseract/textord/colpartitiongrid.cpp
Ray Smith 0e868ef377 Major change to improve layout analysis for heavily diacritic languages:
Tha, Vie, Kan, Tel etc.
There is a new overlap detector that detects when diacritics
cause a big increase in textline overlap. In such cases, diacritics from
overlap regions are kept separate from layout analysis completely, allowing
textline formation to happen without them. The diacritics are then assigned
to 0, 1 or 2 close words at the end of layout analysis, using and modifying
an old noise detection data path.
The stored diacritics are used or not during recognition according to the
character classifier's liking for them.
2015-05-12 16:47:02 -07:00

1801 lines
73 KiB
C++

///////////////////////////////////////////////////////////////////////
// File: colpartitionrid.h
// Description: Class collecting code that acts on a BBGrid of ColPartitions.
// Author: Ray Smith
// Created: Mon Oct 05 08:42:01 PDT 2009
//
// (C) Copyright 2009, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#include "colpartitiongrid.h"
#include "colpartitionset.h"
#include "imagefind.h"
namespace tesseract {
BOOL_VAR(textord_tabfind_show_color_fit, false, "Show stroke widths");
// Max pad factor used to search the neighbourhood of a partition to smooth
// partition types.
const int kMaxPadFactor = 6;
// Max multiple of size (min(height, width)) for the distance of the nearest
// neighbour for the change of type to be used.
const int kMaxNeighbourDistFactor = 4;
// Max RMS color noise to compare colors.
const int kMaxRMSColorNoise = 128;
// Minimum number of blobs in text to make a strong text partition.
const int kHorzStrongTextlineCount = 10;
// Maximum number of lines in a credible figure caption.
const int kMaxCaptionLines = 7;
// Min ratio between biggest and smallest gap to bound a caption.
const double kMinCaptionGapRatio = 2.0;
// Min ratio between biggest gap and mean line height to bound a caption.
const double kMinCaptionGapHeightRatio = 0.5;
// Min fraction of ColPartition height to be overlapping for margin purposes.
const double kMarginOverlapFraction = 0.25;
// Size ratio required to consider an unmerged overlapping partition to be big.
const double kBigPartSizeRatio = 1.75;
// Allowed proportional change in stroke width to match for smoothing.
const double kStrokeWidthFractionTolerance = 0.25;
// Allowed constant change in stroke width to match for smoothing.
const double kStrokeWidthConstantTolerance = 2.0;
// Fraction of gridsize to allow arbitrary overlap between partitions.
const double kTinyEnoughTextlineOverlapFraction = 0.25;
// Max vertical distance of neighbouring ColPartition as a multiple of
// partition height for it to be a partner.
// TODO(rays) fix the problem that causes a larger number to not work well.
// The value needs to be larger as sparse text blocks in a page that gets
// marked as single column will not find adjacent lines as partners, and
// will merge horizontally distant, but aligned lines. See rep.4B3 p5.
// The value needs to be small because double-spaced legal docs written
// in a single column, but justified courier have widely spaced lines
// that need to get merged before they partner-up with the lines above
// and below. See legal.3B5 p13/17. Neither of these should depend on
// the value of kMaxPartitionSpacing to be successful, and ColPartition
// merging needs attention to fix this problem.
const double kMaxPartitionSpacing = 1.75;
// Margin by which text has to beat image or vice-versa to make a firm
// decision in GridSmoothNeighbour.
const int kSmoothDecisionMargin = 4;
ColPartitionGrid::ColPartitionGrid() {
}
ColPartitionGrid::ColPartitionGrid(int gridsize,
const ICOORD& bleft, const ICOORD& tright)
: BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
bleft, tright) {
}
ColPartitionGrid::~ColPartitionGrid() {
}
// Handles a click event in a display window.
void ColPartitionGrid::HandleClick(int x, int y) {
BBGrid<ColPartition,
ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
// Run a radial search for partitions that overlap.
ColPartitionGridSearch radsearch(this);
radsearch.SetUniqueMode(true);
radsearch.StartRadSearch(x, y, 1);
ColPartition* neighbour;
FCOORD click(x, y);
while ((neighbour = radsearch.NextRadSearch()) != NULL) {
TBOX nbox = neighbour->bounding_box();
if (nbox.contains(click)) {
tprintf("Block box:");
neighbour->bounding_box().print();
neighbour->Print();
}
}
}
// Merges ColPartitions in the grid that look like they belong in the same
// textline.
// For all partitions in the grid, calls the box_cb permanent callback
// to compute the search box, seaches the box, and if a candidate is found,
// calls the confirm_cb to check any more rules. If the confirm_cb returns
// true, then the partitions are merged.
// Both callbacks are deleted before returning.
void ColPartitionGrid::Merges(
TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
TessResultCallback2<bool, const ColPartition*,
const ColPartition*>* confirm_cb) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
if (MergePart(box_cb, confirm_cb, part))
gsearch.RepositionIterator();
}
delete box_cb;
delete confirm_cb;
}
// For the given partition, calls the box_cb permanent callback
// to compute the search box, searches the box, and if a candidate is found,
// calls the confirm_cb to check any more rules. If the confirm_cb returns
// true, then the partitions are merged.
// Returns true if the partition is consumed by one or more merges.
bool ColPartitionGrid::MergePart(
TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
TessResultCallback2<bool, const ColPartition*,
const ColPartition*>* confirm_cb,
ColPartition* part) {
if (part->IsUnMergeableType())
return false;
bool any_done = false;
// Repeatedly merge part while we find a best merge candidate that works.
bool merge_done = false;
do {
merge_done = false;
TBOX box = part->bounding_box();
bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
if (debug) {
tprintf("Merge candidate:");
box.print();
}
// Set up a rectangle search bounded by the part.
if (!box_cb->Run(part, &box))
continue;
// Create a list of merge candidates.
ColPartition_CLIST merge_candidates;
FindMergeCandidates(part, box, debug, &merge_candidates);
// Find the best merge candidate based on minimal overlap increase.
int overlap_increase;
ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
confirm_cb,
&overlap_increase);
if (neighbour != NULL && overlap_increase <= 0) {
if (debug) {
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
overlap_increase);
}
// Looks like a good candidate so merge it.
RemoveBBox(neighbour);
// We will modify the box of part, so remove it from the grid, merge
// it and then re-insert it into the grid.
RemoveBBox(part);
part->Absorb(neighbour, NULL);
InsertBBox(true, true, part);
merge_done = true;
any_done = true;
} else if (neighbour != NULL) {
if (debug) {
tprintf("Overlapped when merged with increase %d: ", overlap_increase);
neighbour->bounding_box().print();
}
} else if (debug) {
tprintf("No candidate neighbour returned\n");
}
} while (merge_done);
return any_done;
}
// Returns true if the given part and merge candidate might believably
// be part of a single text line according to the default rules.
// In general we only want to merge partitions that look like they
// are on the same text line, ie their median limits overlap, but we have
// to make exceptions for diacritics and stray punctuation.
static bool OKMergeCandidate(const ColPartition* part,
const ColPartition* candidate,
bool debug) {
const TBOX& part_box = part->bounding_box();
if (candidate == part)
return false; // Ignore itself.
if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType())
return false; // Don't mix inappropriate types.
const TBOX& c_box = candidate->bounding_box();
if (debug) {
tprintf("Examining merge candidate:");
c_box.print();
}
// Candidates must be within a reasonable distance.
if (candidate->IsVerticalType() || part->IsVerticalType()) {
int h_dist = -part->HCoreOverlap(*candidate);
if (h_dist >= MAX(part_box.width(), c_box.width()) / 2) {
if (debug)
tprintf("Too far away: h_dist = %d\n", h_dist);
return false;
}
} else {
// Coarse filter by vertical distance between partitions.
int v_dist = -part->VCoreOverlap(*candidate);
if (v_dist >= MAX(part_box.height(), c_box.height()) / 2) {
if (debug)
tprintf("Too far away: v_dist = %d\n", v_dist);
return false;
}
// Candidates must either overlap in median y,
// or part or candidate must be an acceptable diacritic.
if (!part->VSignificantCoreOverlap(*candidate) &&
!part->OKDiacriticMerge(*candidate, debug) &&
!candidate->OKDiacriticMerge(*part, debug)) {
if (debug)
tprintf("Candidate fails overlap and diacritic tests!\n");
return false;
}
}
return true;
}
// Helper function to compute the increase in overlap of the parts list of
// Colpartitions with the combination of merge1 and merge2, compared to
// the overlap with them uncombined.
// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
// as the pixel overlap limit. merge1 and merge2 must both be non-NULL.
static int IncreaseInOverlap(const ColPartition* merge1,
const ColPartition* merge2,
int ok_overlap,
ColPartition_CLIST* parts) {
ASSERT_HOST(merge1 != NULL && merge2 != NULL);
int total_area = 0;
ColPartition_C_IT it(parts);
TBOX merged_box(merge1->bounding_box());
merged_box += merge2->bounding_box();
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition* part = it.data();
if (part == merge1 || part == merge2)
continue;
TBOX part_box = part->bounding_box();
// Compute the overlap of the merged box with part.
int overlap_area = part_box.intersection(merged_box).area();
if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2,
ok_overlap, false)) {
total_area += overlap_area;
// Subtract the overlap of merge1 and merge2 individually.
overlap_area = part_box.intersection(merge1->bounding_box()).area();
if (overlap_area > 0)
total_area -= overlap_area;
TBOX intersection_box = part_box.intersection(merge2->bounding_box());
overlap_area = intersection_box.area();
if (overlap_area > 0) {
total_area -= overlap_area;
// Add back the 3-way area.
intersection_box &= merge1->bounding_box(); // In-place intersection.
overlap_area = intersection_box.area();
if (overlap_area > 0)
total_area += overlap_area;
}
}
}
return total_area;
}
// Helper function to test that each partition in candidates is either a
// good diacritic merge with part or an OK merge candidate with all others
// in the candidates list.
// ASCII Art Scenario:
// We sometimes get text such as "join-this" where the - is actually a long
// dash culled from a standard set of extra characters that don't match the
// font of the text. This makes its strokewidth not match and forms a broken
// set of 3 partitions for "join", "-" and "this" and the dash may slightly
// overlap BOTH words.
// ------- -------
// | ==== |
// ------- -------
// The standard merge rule: "you can merge 2 partitions as long as there is
// no increase in overlap elsewhere" fails miserably here. Merge any pair
// of partitions and the combined box overlaps more with the third than
// before. To allow the merge, we need to consider whether it is safe to
// merge everything, without merging separate text lines. For that we need
// everything to be an OKMergeCandidate (which is supposed to prevent
// separate text lines merging), but this is hard for diacritics to satisfy,
// so an alternative to being OKMergeCandidate with everything is to be an
// OKDiacriticMerge with part as the base character.
static bool TestCompatibleCandidates(const ColPartition& part, bool debug,
ColPartition_CLIST* candidates) {
ColPartition_C_IT it(candidates);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition* candidate = it.data();
if (!candidate->OKDiacriticMerge(part, false)) {
ColPartition_C_IT it2(it);
for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
ColPartition* candidate2 = it2.data();
if (candidate2 != candidate &&
!OKMergeCandidate(candidate, candidate2, false)) {
if (debug) {
tprintf("NC overlap failed:Candidate:");
candidate2->bounding_box().print();
tprintf("fails to be a good merge with:");
candidate->bounding_box().print();
}
return false;
}
}
}
}
return true;
}
// Computes and returns the total overlap of all partitions in the grid.
// If overlap_grid is non-null, it is filled with a grid that holds empty
// partitions representing the union of all overlapped partitions.
int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid** overlap_grid) {
int total_overlap = 0;
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
ColPartition_CLIST neighbors;
const TBOX& part_box = part->bounding_box();
FindOverlappingPartitions(part_box, part, &neighbors);
ColPartition_C_IT n_it(&neighbors);
bool any_part_overlap = false;
for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
const TBOX& n_box = n_it.data()->bounding_box();
int overlap = n_box.intersection(part_box).area();
if (overlap > 0 && overlap_grid != NULL) {
if (*overlap_grid == NULL) {
*overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
}
(*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
if (!any_part_overlap) {
(*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
}
}
any_part_overlap = true;
total_overlap += overlap;
}
}
return total_overlap;
}
// Finds all the ColPartitions in the grid that overlap with the given
// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
// Any partition equal to not_this (may be NULL) is excluded.
void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box,
const ColPartition* not_this,
ColPartition_CLIST* parts) {
ColPartitionGridSearch rsearch(this);
rsearch.StartRectSearch(box);
ColPartition* part;
while ((part = rsearch.NextRectSearch()) != NULL) {
if (part != not_this)
parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
}
}
// Finds and returns the best candidate ColPartition to merge with part,
// selected from the candidates list, based on the minimum increase in
// pairwise overlap among all the partitions overlapped by the combined box.
// If overlap_increase is not NULL then it returns the increase in overlap
// that would result from the merge.
// confirm_cb is a permanent callback that (if non-null) will be used to
// confirm the validity of a proposed merge candidate before selecting it.
//
// ======HOW MERGING WORKS======
// The problem:
// We want to merge all the parts of a textline together, but avoid merging
// separate textlines. Diacritics, i dots, punctuation, and broken characters
// are examples of small bits that need merging with the main textline.
// Drop-caps and descenders in one line that touch ascenders in the one below
// are examples of cases where we don't want to merge.
//
// The solution:
// Merges that increase overlap among other partitions are generally bad.
// Those that don't increase overlap (much) and minimize the total area
// seem to be good.
//
// Ascii art example:
// The text:
// groggy descenders
// minimum ascenders
// The boxes: The === represents a small box near or overlapping the lower box.
// -----------------
// | |
// -----------------
// -===-------------
// | |
// -----------------
// In considering what to do with the small === box, we find the 2 larger
// boxes as neighbours and possible merge candidates, but merging with the
// upper box increases overlap with the lower box, whereas merging with the
// lower box does not increase overlap.
// If the small === box didn't overlap either to start with, total area
// would be minimized by merging with the nearer (lower) box.
//
// This is a simple example. In reality, we have to allow some increase
// in overlap, or tightly spaced text would end up in bits.
ColPartition* ColPartitionGrid::BestMergeCandidate(
const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
TessResultCallback2<bool, const ColPartition*, const ColPartition*>* confirm_cb,
int* overlap_increase) {
if (overlap_increase != NULL)
*overlap_increase = 0;
if (candidates->empty())
return NULL;
int ok_overlap =
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
// The best neighbour to merge with is the one that causes least
// total pairwise overlap among all the neighbours.
// If more than one offers the same total overlap, choose the one
// with the least total area.
const TBOX& part_box = part->bounding_box();
ColPartition_C_IT it(candidates);
ColPartition* best_candidate = NULL;
// Find the total combined box of all candidates and the original.
TBOX full_box(part_box);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition* candidate = it.data();
full_box += candidate->bounding_box();
}
// Keep valid neighbours in a list.
ColPartition_CLIST neighbours;
// Now run a rect search of the merged box for overlapping neighbours, as
// we need anything that might be overlapped by the merged box.
FindOverlappingPartitions(full_box, part, &neighbours);
if (debug) {
tprintf("Finding best merge candidate from %d, %d neighbours for box:",
candidates->length(), neighbours.length());
part_box.print();
}
// If the best increase in overlap is positive, then we also check the
// worst non-candidate overlap. This catches the case of multiple good
// candidates that overlap each other when merged. If the worst
// non-candidate overlap is better than the best overlap, then return
// the worst non-candidate overlap instead.
ColPartition_CLIST non_candidate_neighbours;
non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
&neighbours, candidates);
int worst_nc_increase = 0;
int best_increase = MAX_INT32;
int best_area = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition* candidate = it.data();
if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) {
if (debug) {
tprintf("Candidate not confirmed:");
candidate->bounding_box().print();
}
continue;
}
int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
const TBOX& cand_box = candidate->bounding_box();
if (best_candidate == NULL || increase < best_increase) {
best_candidate = candidate;
best_increase = increase;
best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
if (debug) {
tprintf("New best merge candidate has increase %d, area %d, over box:",
increase, best_area);
full_box.print();
candidate->Print();
}
} else if (increase == best_increase) {
int area = cand_box.bounding_union(part_box).area() - cand_box.area();
if (area < best_area) {
best_area = area;
best_candidate = candidate;
}
}
increase = IncreaseInOverlap(part, candidate, ok_overlap,
&non_candidate_neighbours);
if (increase > worst_nc_increase)
worst_nc_increase = increase;
}
if (best_increase > 0) {
// If the worst non-candidate increase is less than the best increase
// including the candidates, then all the candidates can merge together
// and the increase in outside overlap would be less, so use that result,
// but only if each candidate is either a good diacritic merge with part,
// or an ok merge candidate with all the others.
// See TestCompatibleCandidates for more explanation and a picture.
if (worst_nc_increase < best_increase &&
TestCompatibleCandidates(*part, debug, candidates)) {
best_increase = worst_nc_increase;
}
}
if (overlap_increase != NULL)
*overlap_increase = best_increase;
return best_candidate;
}
// Helper to remove the given box from the given partition, put it in its
// own partition, and add to the partition list.
static void RemoveBadBox(BLOBNBOX* box, ColPartition* part,
ColPartition_LIST* part_list) {
part->RemoveBox(box);
ColPartition::MakeBigPartition(box, part_list);
}
// Split partitions where it reduces overlap between their bounding boxes.
// ColPartitions are after all supposed to be a partitioning of the blobs
// AND of the space on the page!
// Blobs that cause overlaps get removed, put in individual partitions
// and added to the big_parts list. They are most likely characters on
// 2 textlines that touch, or something big like a dropcap.
void ColPartitionGrid::SplitOverlappingPartitions(
ColPartition_LIST* big_parts) {
int ok_overlap =
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
// Set up a rectangle search bounded by the part.
const TBOX& box = part->bounding_box();
ColPartitionGridSearch rsearch(this);
rsearch.SetUniqueMode(true);
rsearch.StartRectSearch(box);
int unresolved_overlaps = 0;
ColPartition* neighbour;
while ((neighbour = rsearch.NextRectSearch()) != NULL) {
if (neighbour == part)
continue;
const TBOX& neighbour_box = neighbour->bounding_box();
if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
continue; // The overlap is OK both ways.
// If removal of the biggest box from either partition eliminates the
// overlap, and it is much bigger than the box left behind, then
// it is either a drop-cap, an inter-line join, or some junk that
// we don't want anyway, so put it in the big_parts list.
if (!part->IsSingleton()) {
BLOBNBOX* excluded = part->BiggestBox();
TBOX shrunken = part->BoundsWithoutBox(excluded);
if (!shrunken.overlap(neighbour_box) &&
excluded->bounding_box().height() >
kBigPartSizeRatio * shrunken.height()) {
// Removing the biggest box fixes the overlap, so do it!
gsearch.RemoveBBox();
RemoveBadBox(excluded, part, big_parts);
InsertBBox(true, true, part);
gsearch.RepositionIterator();
break;
}
} else if (box.contains(neighbour_box)) {
++unresolved_overlaps;
continue; // No amount of splitting will fix it.
}
if (!neighbour->IsSingleton()) {
BLOBNBOX* excluded = neighbour->BiggestBox();
TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
if (!shrunken.overlap(box) &&
excluded->bounding_box().height() >
kBigPartSizeRatio * shrunken.height()) {
// Removing the biggest box fixes the overlap, so do it!
rsearch.RemoveBBox();
RemoveBadBox(excluded, neighbour, big_parts);
InsertBBox(true, true, neighbour);
gsearch.RepositionIterator();
break;
}
}
int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
ColPartition* right_part = NULL;
if (neighbour_overlap_count <= part_overlap_count ||
part->IsSingleton()) {
// Try to split the neighbour to reduce overlap.
BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
if (split_blob != NULL) {
rsearch.RemoveBBox();
right_part = neighbour->SplitAtBlob(split_blob);
InsertBBox(true, true, neighbour);
ASSERT_HOST(right_part != NULL);
}
} else {
// Try to split part to reduce overlap.
BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
if (split_blob != NULL) {
gsearch.RemoveBBox();
right_part = part->SplitAtBlob(split_blob);
InsertBBox(true, true, part);
ASSERT_HOST(right_part != NULL);
}
}
if (right_part != NULL) {
InsertBBox(true, true, right_part);
gsearch.RepositionIterator();
rsearch.RepositionIterator();
break;
}
}
if (unresolved_overlaps > 2 && part->IsSingleton()) {
// This part is no good so just add to big_parts.
RemoveBBox(part);
ColPartition_IT big_it(big_parts);
part->set_block_owned(true);
big_it.add_to_end(part);
gsearch.RepositionIterator();
}
}
}
// Filters partitions of source_type by looking at local neighbours.
// Where a majority of neighbours have a text type, the partitions are
// changed to text, where the neighbours have image type, they are changed
// to image, and partitions that have no definite neighbourhood type are
// left unchanged.
// im_box and rerotation are used to map blob coordinates onto the
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if anything was changed.
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type,
Pix* nontext_map,
const TBOX& im_box,
const FCOORD& rotation) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
bool any_changed = false;
while ((part = gsearch.NextFullSearch()) != NULL) {
if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
continue;
const TBOX& box = part->bounding_box();
bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
any_changed = true;
}
return any_changed;
}
// Compute the mean RGB of the light and dark pixels in each ColPartition
// and also the rms error in the linearity of color.
void ColPartitionGrid::ComputePartitionColors(Pix* scaled_color,
int scaled_factor,
const FCOORD& rerotation) {
if (scaled_color == NULL)
return;
Pix* color_map1 = NULL;
Pix* color_map2 = NULL;
Pix* rms_map = NULL;
if (textord_tabfind_show_color_fit) {
int width = pixGetWidth(scaled_color);
int height = pixGetHeight(scaled_color);
color_map1 = pixCreate(width, height, 32);
color_map2 = pixCreate(width, height, 32);
rms_map = pixCreate(width, height, 8);
}
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
TBOX part_box = part->bounding_box();
part_box.rotate_large(rerotation);
ImageFind::ComputeRectangleColors(part_box, scaled_color,
scaled_factor,
color_map1, color_map2, rms_map,
part->color1(), part->color2());
}
if (color_map1 != NULL) {
pixWrite("swcolorinput.png", scaled_color, IFF_PNG);
pixWrite("swcolor1.png", color_map1, IFF_PNG);
pixWrite("swcolor2.png", color_map2, IFF_PNG);
pixWrite("swrms.png", rms_map, IFF_PNG);
pixDestroy(&color_map1);
pixDestroy(&color_map2);
pixDestroy(&rms_map);
}
}
// Reflects the grid and its colpartitions in the y-axis, assuming that
// all blob boxes have already been done.
void ColPartitionGrid::ReflectInYAxis() {
ColPartition_LIST parts;
ColPartition_IT part_it(&parts);
// Iterate the ColPartitions in the grid to extract them.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part_it.add_after_then_move(part);
}
ICOORD bot_left(-tright().x(), bleft().y());
ICOORD top_right(-bleft().x(), tright().y());
// Reinitializing the grid with reflected coords also clears all the
// pointers, so parts will now own the ColPartitions. (Briefly).
Init(gridsize(), bot_left, top_right);
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
part = part_it.extract();
part->ReflectInYAxis();
InsertBBox(true, true, part);
}
}
// Transforms the grid of partitions to the output blocks, putting each
// partition into a separate block. We don't really care about the order,
// as we just want to get as much text as possible without trying to organize
// it into proper blocks or columns.
// TODO(rays) some kind of sort function would be useful and probably better
// than the default here, which is to sort by order of the grid search.
void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
TO_BLOCK_LIST* to_blocks) {
TO_BLOCK_IT to_block_it(to_blocks);
BLOCK_IT block_it(blocks);
// All partitions will be put on this list and deleted on return.
ColPartition_LIST parts;
ColPartition_IT part_it(&parts);
// Iterate the ColPartitions in the grid to extract them.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part_it.add_after_then_move(part);
// The partition has to be at least vaguely like text.
BlobRegionType blob_type = part->blob_type();
if (BLOBNBOX::IsTextType(blob_type) ||
(blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT
: PT_FLOWING_TEXT;
// Get metrics from the row that will be used for the block.
TBOX box = part->bounding_box();
int median_width = part->median_width();
int median_height = part->median_size();
// Turn the partition into a TO_ROW.
TO_ROW* row = part->MakeToRow();
if (row == NULL) {
// This partition is dead.
part->DeleteBoxes();
continue;
}
BLOCK* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
box.right(), box.top());
block->set_poly_block(new POLY_BLOCK(box, type));
TO_BLOCK* to_block = new TO_BLOCK(block);
TO_ROW_IT row_it(to_block->get_rows());
row_it.add_after_then_move(row);
// We haven't differentially rotated vertical and horizontal text at
// this point, so use width or height as appropriate.
if (blob_type == BRT_VERT_TEXT) {
to_block->line_size = static_cast<float>(median_width);
to_block->line_spacing = static_cast<float>(box.width());
to_block->max_blob_size = static_cast<float>(box.width() + 1);
} else {
to_block->line_size = static_cast<float>(median_height);
to_block->line_spacing = static_cast<float>(box.height());
to_block->max_blob_size = static_cast<float>(box.height() + 1);
}
block_it.add_to_end(block);
to_block_it.add_to_end(to_block);
} else {
// This partition is dead.
part->DeleteBoxes();
}
}
Clear();
// Now it is safe to delete the ColPartitions as parts goes out of scope.
}
// Rotates the grid and its colpartitions by the given angle, assuming that
// all blob boxes have already been done.
void ColPartitionGrid::Deskew(const FCOORD& deskew) {
ColPartition_LIST parts;
ColPartition_IT part_it(&parts);
// Iterate the ColPartitions in the grid to extract them.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part_it.add_after_then_move(part);
}
// Rebuild the grid to the new size.
TBOX grid_box(bleft_, tright_);
grid_box.rotate_large(deskew);
Init(gridsize(), grid_box.botleft(), grid_box.topright());
// Reinitializing the grid with rotated coords also clears all the
// pointers, so parts will now own the ColPartitions. (Briefly).
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
part = part_it.extract();
part->ComputeLimits();
InsertBBox(true, true, part);
}
}
// Sets the left and right tabs of the partitions in the grid.
void ColPartitionGrid::SetTabStops(TabFind* tabgrid) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
const TBOX& part_box = part->bounding_box();
TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
// If the overlapping line is not a left tab, try for non-overlapping.
if (left_line != NULL && !left_line->IsLeftTab())
left_line = tabgrid->LeftTabForBox(part_box, false, false);
if (left_line != NULL && left_line->IsLeftTab())
part->SetLeftTab(left_line);
TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
if (right_line != NULL && !right_line->IsRightTab())
right_line = tabgrid->RightTabForBox(part_box, false, false);
if (right_line != NULL && right_line->IsRightTab())
part->SetRightTab(right_line);
part->SetColumnGoodness(tabgrid->WidthCB());
}
}
// Makes the ColPartSets and puts them in the PartSetVector ready
// for finding column bounds. Returns false if no partitions were found.
bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) {
ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()];
part_sets->reserve(gridheight());
// Iterate the ColPartitions in the grid to get parts onto lists for the
// y bottom of each.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
bool any_parts_found = false;
while ((part = gsearch.NextFullSearch()) != NULL) {
BlobRegionType blob_type = part->blob_type();
if (blob_type != BRT_NOISE &&
(blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
int grid_x, grid_y;
const TBOX& part_box = part->bounding_box();
GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
ColPartition_IT part_it(&part_lists[grid_y]);
part_it.add_to_end(part);
any_parts_found = true;
}
}
if (any_parts_found) {
for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
ColPartitionSet* line_set = NULL;
if (!part_lists[grid_y].empty()) {
line_set = new ColPartitionSet(&part_lists[grid_y]);
}
part_sets->push_back(line_set);
}
}
delete [] part_lists;
return any_parts_found;
}
// Makes a single ColPartitionSet consisting of a single ColPartition that
// represents the total horizontal extent of the significant content on the
// page. Used for the single column setting in place of automatic detection.
// Returns NULL if the page is empty of significant content.
ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback* cb) {
ColPartition* single_column_part = NULL;
// Iterate the ColPartitions in the grid to get parts onto lists for the
// y bottom of each.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
BlobRegionType blob_type = part->blob_type();
if (blob_type != BRT_NOISE &&
(blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
// Consider for single column.
BlobTextFlowType flow = part->flow();
if ((blob_type == BRT_TEXT &&
(flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
if (single_column_part == NULL) {
single_column_part = part->ShallowCopy();
single_column_part->set_blob_type(BRT_TEXT);
// Copy the tabs from itself to properly setup the margins.
single_column_part->CopyLeftTab(*single_column_part, false);
single_column_part->CopyRightTab(*single_column_part, false);
} else {
if (part->left_key() < single_column_part->left_key())
single_column_part->CopyLeftTab(*part, false);
if (part->right_key() > single_column_part->right_key())
single_column_part->CopyRightTab(*part, false);
}
}
}
}
if (single_column_part != NULL) {
// Make a ColPartitionSet out of the single_column_part as a candidate
// for the single column case.
single_column_part->SetColumnGoodness(cb);
return new ColPartitionSet(single_column_part);
}
return NULL;
}
// Mark the BLOBNBOXes in each partition as being owned by that partition.
void ColPartitionGrid::ClaimBoxes() {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part->ClaimBoxes();
}
}
// Retypes all the blobs referenced by the partitions in the grid.
// Image blobs are found and returned in the im_blobs list, as they are not
// owned by the block.
void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) {
BLOBNBOX_IT im_blob_it(im_blobs);
ColPartition_LIST dead_parts;
ColPartition_IT dead_part_it(&dead_parts);
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
BlobRegionType blob_type = part->blob_type();
BlobTextFlowType flow = part->flow();
bool any_blobs_moved = false;
if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
BLOBNBOX_C_IT blob_it(part->boxes());
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
im_blob_it.add_after_then_move(blob);
}
} else if (blob_type != BRT_NOISE) {
// Make sure the blobs are marked with the correct type and flow.
BLOBNBOX_C_IT blob_it(part->boxes());
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
if (blob->region_type() == BRT_NOISE) {
// TODO(rays) Deprecated. Change this section to an assert to verify
// and then delete.
ASSERT_HOST(blob->cblob()->area() != 0);
blob->set_owner(NULL);
blob_it.extract();
any_blobs_moved = true;
} else {
blob->set_region_type(blob_type);
if (blob->flow() != BTFT_LEADER)
blob->set_flow(flow);
}
}
}
if (blob_type == BRT_NOISE || part->boxes()->empty()) {
BLOBNBOX_C_IT blob_it(part->boxes());
part->DisownBoxes();
dead_part_it.add_to_end(part);
gsearch.RemoveBBox();
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
if (blob->cblob()->area() == 0) {
// Any blob with zero area is a fake image blob and should be deleted.
delete blob->cblob();
delete blob;
}
}
} else if (any_blobs_moved) {
gsearch.RemoveBBox();
part->ComputeLimits();
InsertBBox(true, true, part);
gsearch.RepositionIterator();
}
}
}
// The boxes within the partitions have changed (by deskew) so recompute
// the bounds of all the partitions and reinsert them into the grid.
void ColPartitionGrid::RecomputeBounds(int gridsize,
const ICOORD& bleft,
const ICOORD& tright,
const ICOORD& vertical) {
ColPartition_LIST saved_parts;
ColPartition_IT part_it(&saved_parts);
// Iterate the ColPartitions in the grid to get parts onto a list.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part_it.add_to_end(part);
}
// Reinitialize grid to the new size.
Init(gridsize, bleft, tright);
// Recompute the bounds of the parts and put them back in the new grid.
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
part = part_it.extract();
part->set_vertical(vertical);
part->ComputeLimits();
InsertBBox(true, true, part);
}
}
// Improves the margins of the ColPartitions in the grid by calling
// FindPartitionMargins on each.
// best_columns, which may be NULL, is an array of pointers indicating the
// column set at each y-coordinate in the grid.
// best_columns is usually the best_columns_ member of ColumnFinder.
void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) {
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
// Set up a rectangle search x-bounded by the column and y by the part.
ColPartitionSet* columns = best_columns != NULL
? best_columns[gsearch.GridY()]
: NULL;
FindPartitionMargins(columns, part);
const TBOX& box = part->bounding_box();
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
tprintf("Computed margins for part:");
part->Print();
}
}
}
// Improves the margins of the ColPartitions in the list by calling
// FindPartitionMargins on each.
// best_columns, which may be NULL, is an array of pointers indicating the
// column set at each y-coordinate in the grid.
// best_columns is usually the best_columns_ member of ColumnFinder.
void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns,
ColPartition_LIST* parts) {
ColPartition_IT part_it(parts);
for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
ColPartition* part = part_it.data();
ColPartitionSet* columns = NULL;
if (best_columns != NULL) {
TBOX part_box = part->bounding_box();
// Get the columns from the y grid coord.
int grid_x, grid_y;
GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
columns = best_columns[grid_y];
}
FindPartitionMargins(columns, part);
}
}
// Deletes all the partitions in the grid after disowning all the blobs.
void ColPartitionGrid::DeleteParts() {
ColPartition_LIST dead_parts;
ColPartition_IT dead_it(&dead_parts);
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part->DisownBoxes();
dead_it.add_to_end(part); // Parts will be deleted on return.
}
Clear();
}
// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
// all the blobs in them.
void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) {
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
if (part->blob_type() == BRT_UNKNOWN) {
gsearch.RemoveBBox();
// Once marked, the blobs will be swept up by DeleteUnownedNoise.
part->set_flow(BTFT_NONTEXT);
part->set_blob_type(BRT_NOISE);
part->SetBlobTypes();
part->DisownBoxes();
delete part;
}
}
block->DeleteUnownedNoise();
}
// Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER.
void ColPartitionGrid::DeleteNonLeaderParts() {
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
if (part->flow() != BTFT_LEADER) {
gsearch.RemoveBBox();
if (part->ReleaseNonLeaderBoxes()) {
InsertBBox(true, true, part);
gsearch.RepositionIterator();
} else {
delete part;
}
}
}
}
// Finds and marks text partitions that represent figure captions.
void ColPartitionGrid::FindFigureCaptions() {
// For each image region find its best candidate text caption region,
// if any and mark it as such.
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
if (part->IsImageType()) {
const TBOX& part_box = part->bounding_box();
bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
part_box.bottom());
ColPartition* best_caption = NULL;
int best_dist = 0; // Distance to best_caption.
int best_upper = 0; // Direction of best_caption.
// Handle both lower and upper directions.
for (int upper = 0; upper < 2; ++upper) {
ColPartition_C_IT partner_it(upper ? part->upper_partners()
: part->lower_partners());
// If there are no image partners, then this direction is ok.
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
partner_it.forward()) {
ColPartition* partner = partner_it.data();
if (partner->IsImageType()) {
break;
}
}
if (!partner_it.cycled_list()) continue;
// Find the nearest totally overlapping text partner.
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
partner_it.forward()) {
ColPartition* partner = partner_it.data();
if (!partner->IsTextType() || partner->type() == PT_TABLE) continue;
const TBOX& partner_box = partner->bounding_box();
if (debug) {
tprintf("Finding figure captions for image part:");
part_box.print();
tprintf("Considering partner:");
partner_box.print();
}
if (partner_box.left() >= part_box.left() &&
partner_box.right() <= part_box.right()) {
int dist = partner_box.y_gap(part_box);
if (best_caption == NULL || dist < best_dist) {
best_dist = dist;
best_caption = partner;
best_upper = upper;
}
}
}
}
if (best_caption != NULL) {
if (debug) {
tprintf("Best caption candidate:");
best_caption->bounding_box().print();
}
// We have a candidate caption. Qualify it as being separable from
// any body text. We are looking for either a small number of lines
// or a big gap that indicates a separation from the body text.
int line_count = 0;
int biggest_gap = 0;
int smallest_gap = MAX_INT16;
int total_height = 0;
int mean_height = 0;
ColPartition* end_partner = NULL;
ColPartition* next_partner = NULL;
for (ColPartition* partner = best_caption; partner != NULL &&
line_count <= kMaxCaptionLines;
partner = next_partner) {
if (!partner->IsTextType()) {
end_partner = partner;
break;
}
++line_count;
total_height += partner->bounding_box().height();
next_partner = partner->SingletonPartner(best_upper);
if (next_partner != NULL) {
int gap = partner->bounding_box().y_gap(
next_partner->bounding_box());
if (gap > biggest_gap) {
biggest_gap = gap;
end_partner = next_partner;
mean_height = total_height / line_count;
} else if (gap < smallest_gap) {
smallest_gap = gap;
}
// If the gap looks big compared to the text size and the smallest
// gap seen so far, then we can stop.
if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
biggest_gap > smallest_gap * kMinCaptionGapRatio)
break;
}
}
if (debug) {
tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
line_count, biggest_gap, smallest_gap, mean_height);
if (end_partner != NULL) {
tprintf("End partner:");
end_partner->bounding_box().print();
}
}
if (next_partner == NULL && line_count <= kMaxCaptionLines)
end_partner = NULL; // No gap, but line count is small.
if (line_count <= kMaxCaptionLines) {
// This is a qualified caption. Mark the text as caption.
for (ColPartition* partner = best_caption; partner != NULL &&
partner != end_partner;
partner = next_partner) {
partner->set_type(PT_CAPTION_TEXT);
partner->SetBlobTypes();
if (debug) {
tprintf("Set caption type for partition:");
partner->bounding_box().print();
}
next_partner = partner->SingletonPartner(best_upper);
}
}
}
}
}
}
//////// Functions that manipulate ColPartitions in the part_grid_ /////
//////// to find chains of partner partitions of the same type. ///////
// For every ColPartition in the grid, finds its upper and lower neighbours.
void ColPartitionGrid::FindPartitionPartners() {
ColPartitionGridSearch gsearch(this);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
if (part->IsVerticalType()) {
FindVPartitionPartners(true, part);
FindVPartitionPartners(false, part);
} else {
FindPartitionPartners(true, part);
FindPartitionPartners(false, part);
}
}
}
// Finds the best partner in the given direction for the given partition.
// Stores the result with AddPartner.
void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) {
if (part->type() == PT_NOISE)
return; // Noise is not allowed to partner anything.
const TBOX& box = part->bounding_box();
int top = part->median_top();
int bottom = part->median_bottom();
int height = top - bottom;
int mid_y = (bottom + top) / 2;
ColPartitionGridSearch vsearch(this);
// Search down for neighbour below
vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
ColPartition* neighbour;
ColPartition* best_neighbour = NULL;
int best_dist = MAX_INT32;
while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) {
if (neighbour == part || neighbour->type() == PT_NOISE)
continue; // Noise is not allowed to partner anything.
int neighbour_bottom = neighbour->median_bottom();
int neighbour_top = neighbour->median_top();
int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
if (upper != (neighbour_y > mid_y))
continue;
if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
continue;
if (!part->TypesMatch(*neighbour)) {
if (best_neighbour == NULL)
best_neighbour = neighbour;
continue;
}
int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
if (dist <= kMaxPartitionSpacing * height) {
if (dist < best_dist) {
best_dist = dist;
best_neighbour = neighbour;
}
} else {
break;
}
}
if (best_neighbour != NULL)
part->AddPartner(upper, best_neighbour);
}
// Finds the best partner in the given direction for the given partition.
// Stores the result with AddPartner.
void ColPartitionGrid::FindVPartitionPartners(bool to_the_left,
ColPartition* part) {
if (part->type() == PT_NOISE)
return; // Noise is not allowed to partner anything.
const TBOX& box = part->bounding_box();
int left = part->median_left();
int right = part->median_right();
int width = right - left;
int mid_x = (left + right) / 2;
ColPartitionGridSearch hsearch(this);
// Search left for neighbour to_the_left
hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
ColPartition* neighbour;
ColPartition* best_neighbour = NULL;
int best_dist = MAX_INT32;
while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) {
if (neighbour == part || neighbour->type() == PT_NOISE)
continue; // Noise is not allowed to partner anything.
int neighbour_left = neighbour->median_left();
int neighbour_right = neighbour->median_right();
int neighbour_x = (neighbour_left + neighbour_right) / 2;
if (to_the_left != (neighbour_x < mid_x))
continue;
if (!part->VOverlaps(*neighbour))
continue;
if (!part->TypesMatch(*neighbour))
continue; // Only match to other vertical text.
int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
if (dist <= kMaxPartitionSpacing * width) {
if (dist < best_dist || best_neighbour == NULL) {
best_dist = dist;
best_neighbour = neighbour;
}
} else {
break;
}
}
// For vertical partitions, the upper partner is to the left, and lower is
// to the right.
if (best_neighbour != NULL)
part->AddPartner(to_the_left, best_neighbour);
}
// For every ColPartition with multiple partners in the grid, reduces the
// number of partners to 0 or 1. If get_desperate is true, goes to more
// desperate merge methods to merge flowing text before breaking partnerships.
void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
ColPartitionGridSearch gsearch(this);
// Refine in type order so that chasing multiple partners can be done
// before eliminating type mis-matching partners.
for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
// Iterate the ColPartitions in the grid.
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
part->RefinePartners(static_cast<PolyBlockType>(type),
get_desperate, this);
// Iterator may have been messed up by a merge.
gsearch.RepositionIterator();
}
}
}
// ========================== PRIVATE CODE ========================
// Finds and returns a list of candidate ColPartitions to merge with part.
// The candidates must overlap search_box, and when merged must not
// overlap any other partitions that are not overlapped by each individually.
void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
const TBOX& search_box, bool debug,
ColPartition_CLIST* candidates) {
int ok_overlap =
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
const TBOX& part_box = part->bounding_box();
// Now run the rect search.
ColPartitionGridSearch rsearch(this);
rsearch.SetUniqueMode(true);
rsearch.StartRectSearch(search_box);
ColPartition* candidate;
while ((candidate = rsearch.NextRectSearch()) != NULL) {
if (!OKMergeCandidate(part, candidate, debug))
continue;
const TBOX& c_box = candidate->bounding_box();
// Candidate seems to be a potential merge with part. If one contains
// the other, then the merge is a no-brainer. Otherwise, search the
// combined box to see if anything else is inappropriately overlapped.
if (!part_box.contains(c_box) && !c_box.contains(part_box)) {
// Search the combined rectangle to see if anything new is overlapped.
// This is a preliminary test designed to quickly weed-out stupid
// merge candidates that would create a big list of overlapped objects
// for the squared-order overlap analysis. Eg. vertical and horizontal
// line-like objects that overlap real text when merged:
// || ==========================
// ||
// || r e a l t e x t
// ||
// ||
TBOX merged_box(part_box);
merged_box += c_box;
ColPartitionGridSearch msearch(this);
msearch.SetUniqueMode(true);
msearch.StartRectSearch(merged_box);
ColPartition* neighbour;
while ((neighbour = msearch.NextRectSearch()) != NULL) {
if (neighbour == part || neighbour == candidate)
continue; // Ignore itself.
if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false))
continue; // This kind of merge overlap is OK.
TBOX n_box = neighbour->bounding_box();
// The overlap is OK if:
// * the n_box already overlapped the part or the candidate OR
// * the n_box is a suitable merge with either part or candidate
if (!n_box.overlap(part_box) && !n_box.overlap(c_box) &&
!OKMergeCandidate(part, neighbour, false) &&
!OKMergeCandidate(candidate, neighbour, false))
break;
}
if (neighbour != NULL) {
if (debug) {
tprintf("Combined box overlaps another that is not OK despite"
" allowance of %d:", ok_overlap);
neighbour->bounding_box().print();
tprintf("Reason:");
OKMergeCandidate(part, neighbour, true);
tprintf("...and:");
OKMergeCandidate(candidate, neighbour, true);
tprintf("Overlap:");
neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true);
}
continue;
}
}
if (debug) {
tprintf("Adding candidate:");
candidate->bounding_box().print();
}
// Unique elements as they arrive.
candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate);
}
}
// Smoothes the region type/flow type of the given part by looking at local
// neigbours and the given image mask. Searches a padded rectangle with the
// padding truncated on one size of the part's box in turn for each side,
// using the result (if any) that has the least distance to all neighbours
// that contribute to the decision. This biases in favor of rectangular
// regions without completely enforcing them.
// If a good decision cannot be reached, the part is left unchanged.
// im_box and rerotation are used to map blob coordinates onto the
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if the partition was changed.
bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map,
const TBOX& im_box,
const FCOORD& rerotation,
bool debug,
ColPartition* part) {
const TBOX& part_box = part->bounding_box();
if (debug) {
tprintf("Smooothing part at:");
part_box.print();
}
BlobRegionType best_type = BRT_UNKNOWN;
int best_dist = MAX_INT32;
int max_dist = MIN(part_box.width(), part_box.height());
max_dist = MAX(max_dist * kMaxNeighbourDistFactor, gridsize() * 2);
// Search with the pad truncated on each side of the box in turn.
bool any_image = false;
bool all_image = true;
for (int d = 0; d < BND_COUNT; ++d) {
int dist;
BlobNeighbourDir dir = static_cast<BlobNeighbourDir>(d);
BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
rerotation, debug, *part,
&dist);
if (debug) {
tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
}
if (type != BRT_UNKNOWN && dist < best_dist) {
best_dist = dist;
best_type = type;
}
if (type == BRT_POLYIMAGE)
any_image = true;
else
all_image = false;
}
if (best_dist > max_dist)
return false; // Too far away to set the type with it.
if (part->flow() == BTFT_STRONG_CHAIN && !all_image) {
return false; // We are not modifying it.
}
BlobRegionType new_type = part->blob_type();
BlobTextFlowType new_flow = part->flow();
if (best_type == BRT_TEXT && !any_image) {
new_flow = BTFT_STRONG_CHAIN;
new_type = BRT_TEXT;
} else if (best_type == BRT_VERT_TEXT && !any_image) {
new_flow = BTFT_STRONG_CHAIN;
new_type = BRT_VERT_TEXT;
} else if (best_type == BRT_POLYIMAGE) {
new_flow = BTFT_NONTEXT;
new_type = BRT_UNKNOWN;
}
if (new_type != part->blob_type() || new_flow != part->flow()) {
part->set_flow(new_flow);
part->set_blob_type(new_type);
part->SetBlobTypes();
if (debug) {
tprintf("Modified part:");
part->Print();
}
return true;
} else {
return false;
}
}
// Sets up a search box based on the part_box, padded in all directions
// except direction. Also setup dist_scaling to weight x,y distances according
// to the given direction.
static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
const TBOX& part_box,
int min_padding,
TBOX* search_box,
ICOORD* dist_scaling) {
*search_box = part_box;
// Generate a pad value based on the min dimension of part_box, but at least
// min_padding and then scaled by kMaxPadFactor.
int padding = MIN(part_box.height(), part_box.width());
padding = MAX(padding, min_padding);
padding *= kMaxPadFactor;
search_box->pad(padding, padding);
// Truncate the box in the appropriate direction and make the distance
// metric slightly biased in the truncated direction.
switch (direction) {
case BND_LEFT:
search_box->set_left(part_box.left());
*dist_scaling = ICOORD(2, 1);
break;
case BND_BELOW:
search_box->set_bottom(part_box.bottom());
*dist_scaling = ICOORD(1, 2);
break;
case BND_RIGHT:
search_box->set_right(part_box.right());
*dist_scaling = ICOORD(2, 1);
break;
case BND_ABOVE:
search_box->set_top(part_box.top());
*dist_scaling = ICOORD(1, 2);
break;
default:
ASSERT_HOST(false);
}
}
// Local enum used by SmoothInOneDirection and AccumulatePartDistances
// for the different types of partition neighbour.
enum NeighbourPartitionType {
NPT_HTEXT, // Definite horizontal text.
NPT_VTEXT, // Definite vertical text.
NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
// image for image and VTEXT.
NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
// image for image and HTEXT.
NPT_IMAGE, // Defininte non-text.
NPT_COUNT // Number of array elements.
};
// Executes the search for SmoothRegionType in a single direction.
// Creates a bounding box that is padded in all directions except direction,
// and searches it for other partitions. Finds the nearest collection of
// partitions that makes a decisive result (if any) and returns the type
// and the distance of the collection. If there are any pixels in the
// nontext_map, then the decision is biased towards image.
BlobRegionType ColPartitionGrid::SmoothInOneDirection(
BlobNeighbourDir direction, Pix* nontext_map,
const TBOX& im_box, const FCOORD& rerotation,
bool debug, const ColPartition& part, int* best_distance) {
// Set up a rectangle search bounded by the part.
TBOX part_box = part.bounding_box();
TBOX search_box;
ICOORD dist_scaling;
ComputeSearchBoxAndScaling(direction, part_box, gridsize(),
&search_box, &dist_scaling);
bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box,
rerotation,
nontext_map) > 0;
GenericVector<int> dists[NPT_COUNT];
AccumulatePartDistances(part, dist_scaling, search_box,
nontext_map, im_box, rerotation, debug, dists);
// By iteratively including the next smallest distance across the vectors,
// (as in a merge sort) we can use the vector indices as counts of each type
// and find the nearest set of objects that give us a definite decision.
int counts[NPT_COUNT];
memset(counts, 0, sizeof(counts[0]) * NPT_COUNT);
// If there is image in the search box, tip the balance in image's favor.
int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0;
BlobRegionType text_dir = part.blob_type();
BlobTextFlowType flow_type = part.flow();
int min_dist = 0;
do {
// Find the minimum new entry across the vectors
min_dist = MAX_INT32;
for (int i = 0; i < NPT_COUNT; ++i) {
if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist)
min_dist = dists[i][counts[i]];
}
// Step all the indices/counts forward to include min_dist.
for (int i = 0; i < NPT_COUNT; ++i) {
while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist)
++counts[i];
}
*best_distance = min_dist;
if (debug) {
tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n",
counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT],
counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT],
counts[NPT_IMAGE], image_bias, min_dist);
}
// See if we have a decision yet.
int image_count = counts[NPT_IMAGE];
int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
(image_count + counts[NPT_WEAK_VTEXT]);
int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
(image_count + counts[NPT_WEAK_HTEXT]);
if (image_count > 0 &&
image_bias - htext_score >= kSmoothDecisionMargin &&
image_bias - vtext_score >= kSmoothDecisionMargin) {
*best_distance = dists[NPT_IMAGE][0];
if (dists[NPT_WEAK_VTEXT].size() > 0 &&
*best_distance > dists[NPT_WEAK_VTEXT][0])
*best_distance = dists[NPT_WEAK_VTEXT][0];
if (dists[NPT_WEAK_HTEXT].size() > 0 &&
*best_distance > dists[NPT_WEAK_HTEXT][0])
*best_distance = dists[NPT_WEAK_HTEXT][0];
return BRT_POLYIMAGE;
}
if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
*best_distance = dists[NPT_HTEXT][0];
return BRT_TEXT;
} else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
*best_distance = dists[NPT_VTEXT][0];
return BRT_VERT_TEXT;
}
} while (min_dist < MAX_INT32);
return BRT_UNKNOWN;
}
// Counts the partitions in the given search_box by appending the gap
// distance (scaled by dist_scaling) of the part from the base_part to the
// vector of the appropriate type for the partition. Prior to return, the
// vectors in the dists array are sorted in increasing order.
// The nontext_map (+im_box, rerotation) is used to make text invisible if
// there is non-text in between.
// dists must be an array of GenericVectors of size NPT_COUNT.
void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part,
const ICOORD& dist_scaling,
const TBOX& search_box,
Pix* nontext_map,
const TBOX& im_box,
const FCOORD& rerotation,
bool debug,
GenericVector<int>* dists) {
const TBOX& part_box = base_part.bounding_box();
ColPartitionGridSearch rsearch(this);
rsearch.SetUniqueMode(true);
rsearch.StartRectSearch(search_box);
ColPartition* neighbour;
// Search for compatible neighbours with a similar strokewidth, but not
// on the other side of a tab vector.
while ((neighbour = rsearch.NextRectSearch()) != NULL) {
if (neighbour->IsUnMergeableType() ||
!base_part.ConfirmNoTabViolation(*neighbour) ||
neighbour == &base_part)
continue;
TBOX nbox = neighbour->bounding_box();
BlobRegionType n_type = neighbour->blob_type();
if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
!ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
nontext_map))
continue; // Text not visible the other side of image.
if (BLOBNBOX::IsLineType(n_type))
continue; // Don't use horizontal lines as neighbours.
int x_gap = MAX(part_box.x_gap(nbox), 0);
int y_gap = MAX(part_box.y_gap(nbox), 0);
int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y();
if (debug) {
tprintf("Part has x-gap=%d, y=%d, dist=%d at:",
x_gap, y_gap, n_dist);
nbox.print();
}
// Truncate the number of boxes, so text doesn't get too much advantage.
int n_boxes = MIN(neighbour->boxes_count(), kSmoothDecisionMargin);
BlobTextFlowType n_flow = neighbour->flow();
GenericVector<int>* count_vector = NULL;
if (n_flow == BTFT_STRONG_CHAIN) {
if (n_type == BRT_TEXT)
count_vector = &dists[NPT_HTEXT];
else
count_vector = &dists[NPT_VTEXT];
if (debug) {
tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes);
}
} else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
(n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) {
// Medium text counts as weak, and all else counts as image.
if (n_type == BRT_TEXT)
count_vector = &dists[NPT_WEAK_HTEXT];
else
count_vector = &dists[NPT_WEAK_VTEXT];
if (debug) tprintf("Weak %d\n", n_boxes);
} else {
count_vector = &dists[NPT_IMAGE];
if (debug) tprintf("Image %d\n", n_boxes);
}
if (count_vector != NULL) {
for (int i = 0; i < n_boxes; ++i)
count_vector->push_back(n_dist);
}
if (debug) {
neighbour->Print();
}
}
for (int i = 0; i < NPT_COUNT; ++i)
dists[i].sort();
}
// Improves the margins of the part ColPartition by searching for
// neighbours that vertically overlap significantly.
// columns may be NULL, and indicates the assigned column structure this
// is applicable to part.
void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
ColPartition* part) {
// Set up a rectangle search x-bounded by the column and y by the part.
TBOX box = part->bounding_box();
int y = part->MidY();
// Initial left margin is based on the column, if there is one.
int left_margin = bleft().x();
int right_margin = tright().x();
if (columns != NULL) {
ColPartition* column = columns->ColumnContaining(box.left(), y);
if (column != NULL)
left_margin = column->LeftAtY(y);
column = columns->ColumnContaining(box.right(), y);
if (column != NULL)
right_margin = column->RightAtY(y);
}
left_margin -= kColumnWidthFactor;
right_margin += kColumnWidthFactor;
// Search for ColPartitions that reduce the margin.
left_margin = FindMargin(box.left() + box.height(), true, left_margin,
box.bottom(), box.top(), part);
part->set_left_margin(left_margin);
// Search for ColPartitions that reduce the margin.
right_margin = FindMargin(box.right() - box.height(), false, right_margin,
box.bottom(), box.top(), part);
part->set_right_margin(right_margin);
}
// Starting at x, and going in the specified direction, upto x_limit, finds
// the margin for the given y range by searching sideways,
// and ignoring not_this.
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
int y_bottom, int y_top,
const ColPartition* not_this) {
int height = y_top - y_bottom;
// Iterate the ColPartitions in the grid.
ColPartitionGridSearch side_search(this);
side_search.SetUniqueMode(true);
side_search.StartSideSearch(x, y_bottom, y_top);
ColPartition* part;
while ((part = side_search.NextSideSearch(right_to_left)) != NULL) {
// Ignore itself.
if (part == not_this) // || part->IsLineType())
continue;
// Must overlap by enough, based on the min of the heights, so
// large partitions can't smash through small ones.
TBOX box = part->bounding_box();
int min_overlap = MIN(height, box.height());
min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5);
int y_overlap = MIN(y_top, box.top()) - MAX(y_bottom, box.bottom());
if (y_overlap < min_overlap)
continue;
// Must be going the right way.
int x_edge = right_to_left ? box.right() : box.left();
if ((x_edge < x) != right_to_left)
continue;
// If we have gone past x_limit, then x_limit will do.
if ((x_edge < x_limit) == right_to_left)
break;
// It reduces x limit, so save the new one.
x_limit = x_edge;
}
return x_limit;
}
} // namespace tesseract.