/////////////////////////////////////////////////////////////////////// // File: colpartition.cpp // Description: Class to hold partitions of the page that correspond // roughly to text lines. // Author: Ray Smith // Created: Thu Aug 14 10:54:01 PDT 2008 // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable:4244) // Conversion warnings #endif #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif #include "colpartition.h" #include "colpartitiongrid.h" #include "colpartitionset.h" #include "detlinefit.h" #include "dppoint.h" #include "imagefind.h" #include "workingpartset.h" namespace tesseract { ELIST2IZE(ColPartition) CLISTIZE(ColPartition) //////////////// ColPartition Implementation //////////////// // Maximum change in spacing (in inches) to ignore. const double kMaxSpacingDrift = 1.0 / 72; // 1/72 is one point. // Maximum fraction of line height used as an additional allowance // for top spacing. const double kMaxTopSpacingFraction = 0.25; // What multiple of the largest line height should be used as an upper bound // for whether lines are in the same text block? const double kMaxSameBlockLineSpacing = 3; // Maximum ratio of sizes for lines to be considered the same size. const double kMaxSizeRatio = 1.5; // Fraction of max of leader width and gap for max IQR of gaps. const double kMaxLeaderGapFractionOfMax = 0.25; // Fraction of min of leader width and gap for max IQR of gaps. const double kMaxLeaderGapFractionOfMin = 0.5; // Minimum number of blobs to be considered a leader. const int kMinLeaderCount = 5; // Minimum score for a STRONG_CHAIN textline. const int kMinStrongTextValue = 6; // Minimum score for a CHAIN textline. const int kMinChainTextValue = 3; // Minimum number of blobs for strong horizontal text lines. const int kHorzStrongTextlineCount = 8; // Minimum height (in image pixels) for strong horizontal text lines. const int kHorzStrongTextlineHeight = 10; // Minimum aspect ratio for strong horizontal text lines. const int kHorzStrongTextlineAspect = 5; // Maximum upper quartile error allowed on a baseline fit as a fraction // of height. const double kMaxBaselineError = 0.4375; // Min coverage for a good baseline between vectors const double kMinBaselineCoverage = 0.5; // Max RMS color noise to compare colors. const int kMaxRMSColorNoise = 128; // Maximum distance to allow a partition color to be to use that partition // in smoothing neighbouring types. This is a squared distance. const int kMaxColorDistance = 900; // blob_type is the blob_region_type_ of the blobs in this partition. // Vertical is the direction of logical vertical on the possibly skewed image. ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical) : left_margin_(-MAX_INT32), right_margin_(MAX_INT32), median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0), median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0), blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), good_width_(false), good_column_(false), left_key_tab_(false), right_key_tab_(false), left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical), working_set_(NULL), last_add_was_vertical_(false), block_owned_(false), desperately_merged_(false), first_column_(-1), last_column_(-1), column_set_(NULL), side_step_(0), top_spacing_(0), bottom_spacing_(0), type_before_table_(PT_UNKNOWN), inside_table_column_(false), nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL), space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0), owns_blobs_(true) { memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); } // Constructs a fake ColPartition with a single fake BLOBNBOX, all made // from a single TBOX. // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and // the ColPartition owns the BLOBNBOX!!! // Call DeleteBoxes before deleting the ColPartition. ColPartition* ColPartition::FakePartition(const TBOX& box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow) { ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1)); part->set_type(block_type); part->set_flow(flow); part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box))); part->set_left_margin(box.left()); part->set_right_margin(box.right()); part->SetBlobTypes(); part->ComputeLimits(); part->ClaimBoxes(); return part; } // Constructs and returns a ColPartition with the given real BLOBNBOX, // and sets it up to be a "big" partition (single-blob partition bigger // than the surrounding text that may be a dropcap, two or more vertically // touching characters, or some graphic element. // If the given list is not NULL, the partition is also added to the list. ColPartition* ColPartition::MakeBigPartition(BLOBNBOX* box, ColPartition_LIST* big_part_list) { box->set_owner(NULL); ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); single->set_flow(BTFT_NONE); single->AddBox(box); single->ComputeLimits(); single->ClaimBoxes(); single->SetBlobTypes(); single->set_block_owned(true); if (big_part_list != NULL) { ColPartition_IT part_it(big_part_list); part_it.add_to_end(single); } return single; } ColPartition::~ColPartition() { // Remove this as a partner of all partners, as we don't want them // referring to a deleted object. ColPartition_C_IT it(&upper_partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->RemovePartner(false, this); } it.set_to_list(&lower_partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { it.data()->RemovePartner(true, this); } } // Constructs a fake ColPartition with no BLOBNBOXes to represent a // horizontal or vertical line, given a type and a bounding box. ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, const ICOORD& vertical, int left, int bottom, int right, int top) { ColPartition* part = new ColPartition(blob_type, vertical); part->bounding_box_ = TBOX(left, bottom, right, top); part->median_bottom_ = bottom; part->median_top_ = top; part->median_size_ = top - bottom; part->median_width_ = right - left; part->left_key_ = part->BoxLeftKey(); part->right_key_ = part->BoxRightKey(); return part; } // Adds the given box to the partition, updating the partition bounds. // The list of boxes in the partition is updated, ensuring that no box is // recorded twice, and the boxes are kept in increasing left position. void ColPartition::AddBox(BLOBNBOX* bbox) { TBOX box = bbox->bounding_box(); // Update the partition limits. if (boxes_.length() == 0) { bounding_box_ = box; } else { bounding_box_ += box; } if (IsVerticalType()) { if (!last_add_was_vertical_) { boxes_.sort(SortByBoxBottom); last_add_was_vertical_ = true; } boxes_.add_sorted(SortByBoxBottom, true, bbox); } else { if (last_add_was_vertical_) { boxes_.sort(SortByBoxLeft); last_add_was_vertical_ = false; } boxes_.add_sorted(SortByBoxLeft, true, bbox); } if (!left_key_tab_) left_key_ = BoxLeftKey(); if (!right_key_tab_) right_key_ = BoxRightKey(); if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", box.left(), box.bottom(), box.right(), box.top(), bounding_box_.left(), bounding_box_.right()); } // Removes the given box from the partition, updating the bounds. void ColPartition::RemoveBox(BLOBNBOX* box) { BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { if (box == bb_it.data()) { bb_it.extract(); ComputeLimits(); return; } } } // Returns the tallest box in the partition, as measured perpendicular to the // presumed flow of text. BLOBNBOX* ColPartition::BiggestBox() { BLOBNBOX* biggest = NULL; BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bbox = bb_it.data(); if (IsVerticalType()) { if (biggest == NULL || bbox->bounding_box().width() > biggest->bounding_box().width()) biggest = bbox; } else { if (biggest == NULL || bbox->bounding_box().height() > biggest->bounding_box().height()) biggest = bbox; } } return biggest; } // Returns the bounding box excluding the given box. TBOX ColPartition::BoundsWithoutBox(BLOBNBOX* box) { TBOX result; BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { if (box != bb_it.data()) { result += bb_it.data()->bounding_box(); } } return result; } // Claims the boxes in the boxes_list by marking them with a this owner // pointer. If a box is already owned, then it must be owned by this. void ColPartition::ClaimBoxes() { BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); ColPartition* other = bblob->owner(); if (other == NULL) { // Normal case: ownership is available. bblob->set_owner(this); } else { ASSERT_HOST(other == this); } } } // NULL the owner of the blobs in this partition, so they can be deleted // independently of the ColPartition. void ColPartition::DisownBoxes() { BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); ASSERT_HOST(bblob->owner() == this || bblob->owner() == NULL); bblob->set_owner(NULL); } } // NULL the owner of the blobs in this partition that are owned by this // partition, so they can be deleted independently of the ColPartition. // Any blobs that are not owned by this partition get to keep their owner // without an assert failure. void ColPartition::DisownBoxesNoAssert() { BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); if (bblob->owner() == this) bblob->set_owner(NULL); } } // NULLs the owner of the blobs in this partition that are owned by this // partition and not leader blobs, removing them from the boxes_ list, thus // turning this partition back to a leader partition if it contains a leader, // or otherwise leaving it empty. Returns true if any boxes remain. bool ColPartition::ReleaseNonLeaderBoxes() { BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.data(); if (bblob->flow() != BTFT_LEADER) { if (bblob->owner() == this) bblob->set_owner(NULL); bb_it.extract(); } } if (bb_it.empty()) return false; flow_ = BTFT_LEADER; ComputeLimits(); return true; } // Delete the boxes that this partition owns. void ColPartition::DeleteBoxes() { // Although the boxes_ list is a C_LIST, in some cases it owns the // BLOBNBOXes, as the ColPartition takes ownership from the grid, // and the BLOBNBOXes own the underlying C_BLOBs. for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.extract(); delete bblob->cblob(); delete bblob; } } // Reflects the partition in the y-axis, assuming that its blobs have // already been done. Corrects only a limited part of the members, since // this function is assumed to be used shortly after initial creation, which // is before a lot of the members are used. void ColPartition::ReflectInYAxis() { BLOBNBOX_CLIST reversed_boxes; BLOBNBOX_C_IT reversed_it(&reversed_boxes); // Reverse the order of the boxes_. BLOBNBOX_C_IT bb_it(&boxes_); for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { reversed_it.add_before_then_move(bb_it.extract()); } bb_it.add_list_after(&reversed_boxes); ASSERT_HOST(!left_key_tab_ && !right_key_tab_); int tmp = left_margin_; left_margin_ = -right_margin_; right_margin_ = -tmp; ComputeLimits(); } // Returns true if this is a legal partition - meaning that the conditions // left_margin <= bounding_box left // left_key <= bounding box left key // bounding box left <= bounding box right // and likewise for right margin and key // are all met. bool ColPartition::IsLegal() { if (bounding_box_.left() > bounding_box_.right()) { if (textord_debug_bugs) { tprintf("Bounding box invalid\n"); Print(); } return false; // Bounding box invalid. } if (left_margin_ > bounding_box_.left() || right_margin_ < bounding_box_.right()) { if (textord_debug_bugs) { tprintf("Margins invalid\n"); Print(); } return false; // Margins invalid. } if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) { if (textord_debug_bugs) { tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), right_key_, BoxRightKey()); Print(); } return false; // Keys inside the box. } return true; } // Returns true if the left and right edges are approximately equal. bool ColPartition::MatchingColumns(const ColPartition& other) const { int y = (MidY() + other.MidY()) / 2; if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, LeftAtY(y) / kColumnWidthFactor, 1)) return false; if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, RightAtY(y) / kColumnWidthFactor, 1)) return false; return true; } // Returns true if the colors match for two text partitions. bool ColPartition::MatchingTextColor(const ColPartition& other) const { if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise && other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise) return false; // Too noisy. // Colors must match for other to count. double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_); double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_); double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_); double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_); // All 4 distances must be small enough. return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance && d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance; } // Returns true if the sizes match for two text partitions, // taking orientation into account. See also SizesSimilar. bool ColPartition::MatchingSizes(const ColPartition& other) const { if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) return !TabFind::DifferentSizes(median_width_, other.median_width_); else return !TabFind::DifferentSizes(median_size_, other.median_size_); } // Returns true if there is no tabstop violation in merging this and other. bool ColPartition::ConfirmNoTabViolation(const ColPartition& other) const { if (bounding_box_.right() < other.bounding_box_.left() && bounding_box_.right() < other.LeftBlobRule()) return false; if (other.bounding_box_.right() < bounding_box_.left() && other.bounding_box_.right() < LeftBlobRule()) return false; if (bounding_box_.left() > other.bounding_box_.right() && bounding_box_.left() > other.RightBlobRule()) return false; if (other.bounding_box_.left() > bounding_box_.right() && other.bounding_box_.left() > RightBlobRule()) return false; return true; } // Returns true if other has a similar stroke width to this. bool ColPartition::MatchingStrokeWidth(const ColPartition& other, double fractional_tolerance, double constant_tolerance) const { int match_count = 0; int nonmatch_count = 0; BLOBNBOX_C_IT box_it(const_cast(&boxes_)); BLOBNBOX_C_IT other_it(const_cast(&other.boxes_)); box_it.mark_cycle_pt(); other_it.mark_cycle_pt(); while (!box_it.cycled_list() && !other_it.cycled_list()) { if (box_it.data()->MatchingStrokeWidth(*other_it.data(), fractional_tolerance, constant_tolerance)) ++match_count; else ++nonmatch_count; box_it.forward(); other_it.forward(); } return match_count > nonmatch_count; } // Returns true if base is an acceptable diacritic base char merge // with this as the diacritic. // Returns true if: // (1) this is a ColPartition containing only diacritics, and // (2) the base characters indicated on the diacritics all believably lie // within the text line of the candidate ColPartition. bool ColPartition::OKDiacriticMerge(const ColPartition& candidate, bool debug) const { BLOBNBOX_C_IT it(const_cast(&boxes_)); int min_top = MAX_INT32; int max_bottom = -MAX_INT32; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); if (!blob->IsDiacritic()) { if (debug) { tprintf("Blob is not a diacritic:"); blob->bounding_box().print(); } return false; // All blobs must have diacritic bases. } if (blob->base_char_top() < min_top) min_top = blob->base_char_top(); if (blob->base_char_bottom() > max_bottom) max_bottom = blob->base_char_bottom(); } // If the intersection of all vertical ranges of all base characters // overlaps the median range of this, then it is OK. bool result = min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_; if (debug) { if (result) tprintf("OKDiacritic!\n"); else tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, median_bottom_, median_top_); } return result; } // Sets the sort key using either the tab vector, or the bounding box if // the tab vector is NULL. If the tab_vector lies inside the bounding_box, // use the edge of the box as a key any way. void ColPartition::SetLeftTab(const TabVector* tab_vector) { if (tab_vector != NULL) { left_key_ = tab_vector->sort_key(); left_key_tab_ = left_key_ <= BoxLeftKey(); } else { left_key_tab_ = false; } if (!left_key_tab_) left_key_ = BoxLeftKey(); } // As SetLeftTab, but with the right. void ColPartition::SetRightTab(const TabVector* tab_vector) { if (tab_vector != NULL) { right_key_ = tab_vector->sort_key(); right_key_tab_ = right_key_ >= BoxRightKey(); } else { right_key_tab_ = false; } if (!right_key_tab_) right_key_ = BoxRightKey(); } // Copies the left/right tab from the src partition, but if take_box is // true, copies the box instead and uses that as a key. void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) { left_key_tab_ = take_box ? false : src.left_key_tab_; if (left_key_tab_) { left_key_ = src.left_key_; } else { bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY())); left_key_ = BoxLeftKey(); } if (left_margin_ > bounding_box_.left()) left_margin_ = src.left_margin_; } // As CopyLeftTab, but with the right. void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) { right_key_tab_ = take_box ? false : src.right_key_tab_; if (right_key_tab_) { right_key_ = src.right_key_; } else { bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY())); right_key_ = BoxRightKey(); } if (right_margin_ < bounding_box_.right()) right_margin_ = src.right_margin_; } // Returns the left rule line x coord of the leftmost blob. int ColPartition::LeftBlobRule() const { BLOBNBOX_C_IT it(const_cast(&boxes_)); return it.data()->left_rule(); } // Returns the right rule line x coord of the rightmost blob. int ColPartition::RightBlobRule() const { BLOBNBOX_C_IT it(const_cast(&boxes_)); it.move_to_last(); return it.data()->right_rule(); } float ColPartition::SpecialBlobsDensity(const BlobSpecialTextType type) const { ASSERT_HOST(type < BSTT_COUNT); return special_blobs_densities_[type]; } int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) { ASSERT_HOST(type < BSTT_COUNT); BLOBNBOX_C_IT blob_it(&boxes_); int count = 0; for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); BlobSpecialTextType blob_type = blob->special_text_type(); if (blob_type == type) { count++; } } return count; } void ColPartition::SetSpecialBlobsDensity( const BlobSpecialTextType type, const float density) { ASSERT_HOST(type < BSTT_COUNT); special_blobs_densities_[type] = density; } void ColPartition::ComputeSpecialBlobsDensity() { memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); if (boxes_.empty()) { return; } BLOBNBOX_C_IT blob_it(&boxes_); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); BlobSpecialTextType type = blob->special_text_type(); special_blobs_densities_[type]++; } for (int type = 0; type < BSTT_COUNT; ++type) { special_blobs_densities_[type] /= boxes_.length(); } } // Add a partner above if upper, otherwise below. // Add them uniquely and keep the list sorted by box left. // Partnerships are added symmetrically to partner and this. void ColPartition::AddPartner(bool upper, ColPartition* partner) { if (upper) { partner->lower_partners_.add_sorted(SortByBoxLeft, true, this); upper_partners_.add_sorted(SortByBoxLeft, true, partner); } else { partner->upper_partners_.add_sorted(SortByBoxLeft, true, this); lower_partners_.add_sorted(SortByBoxLeft, true, partner); } } // Removes the partner from this, but does not remove this from partner. // This asymmetric removal is so as not to mess up the iterator that is // working on partner's partner list. void ColPartition::RemovePartner(bool upper, ColPartition* partner) { ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { if (it.data() == partner) { it.extract(); break; } } } // Returns the partner if the given partner is a singleton, otherwise NULL. ColPartition* ColPartition::SingletonPartner(bool upper) { ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; if (!partners->singleton()) return NULL; ColPartition_C_IT it(partners); return it.data(); } // Merge with the other partition and delete it. void ColPartition::Absorb(ColPartition* other, WidthCallback* cb) { // The result has to either own all of the blobs or none of them. // Verify the flag is consisent. ASSERT_HOST(owns_blobs() == other->owns_blobs()); // TODO(nbeato): check owns_blobs better. Right now owns_blobs // should always be true when this is called. So there is no issues. if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()) || TabFind::WithinTestRegion(2, other->bounding_box_.left(), other->bounding_box_.bottom())) { tprintf("Merging:"); Print(); other->Print(); } // Update the special_blobs_densities_. memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); for (int type = 0; type < BSTT_COUNT; ++type) { int w1 = boxes_.length(), w2 = other->boxes_.length(); float new_val = special_blobs_densities_[type] * w1 + other->special_blobs_densities_[type] * w2; if (!w1 || !w2) { special_blobs_densities_[type] = new_val / (w1 + w2); } } // Merge the two sorted lists. BLOBNBOX_C_IT it(&boxes_); BLOBNBOX_C_IT it2(&other->boxes_); for (; !it2.empty(); it2.forward()) { BLOBNBOX* bbox2 = it2.extract(); ColPartition* prev_owner = bbox2->owner(); if (prev_owner != other && prev_owner != NULL) { // A blob on other's list is owned by someone else; let them have it. continue; } ASSERT_HOST(prev_owner == other || prev_owner == NULL); if (prev_owner == other) bbox2->set_owner(this); it.add_to_end(bbox2); } left_margin_ = MIN(left_margin_, other->left_margin_); right_margin_ = MAX(right_margin_, other->right_margin_); if (other->left_key_ < left_key_) { left_key_ = other->left_key_; left_key_tab_ = other->left_key_tab_; } if (other->right_key_ > right_key_) { right_key_ = other->right_key_; right_key_tab_ = other->right_key_tab_; } // Combine the flow and blob_type in a sensible way. // Dominant flows stay. if (!DominatesInMerge(flow_, other->flow_)) { flow_ = other->flow_; blob_type_ = other->blob_type_; } SetBlobTypes(); if (IsVerticalType()) { boxes_.sort(SortByBoxBottom); last_add_was_vertical_ = true; } else { boxes_.sort(SortByBoxLeft); last_add_was_vertical_ = false; } ComputeLimits(); // Fix partner lists. other is going away, so remove it as a // partner of all its partners and add this in its place. for (int upper = 0; upper < 2; ++upper) { ColPartition_CLIST partners; ColPartition_C_IT part_it(&partners); part_it.add_list_after(upper ? &other->upper_partners_ : &other->lower_partners_); for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { ColPartition* partner = part_it.extract(); partner->RemovePartner(!upper, other); partner->RemovePartner(!upper, this); partner->AddPartner(!upper, this); } } delete other; if (cb != NULL) { SetColumnGoodness(cb); } } // Merge1 and merge2 are candidates to be merged, yet their combined box // overlaps this. Is that allowed? // Returns true if the overlap between this and the merged pair of // merge candidates is sufficiently trivial to be allowed. // The merged box can graze the edge of this by the ok_box_overlap // if that exceeds the margin to the median top and bottom. // ok_box_overlap should be set by the caller appropriate to the sizes of // the text involved, and is usually a fraction of the median size of merge1 // and/or merge2, or this. // TODO(rays) Determine whether vertical text needs to be considered. bool ColPartition::OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2, int ok_box_overlap, bool debug) { // Vertical partitions are not allowed to be involved. if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { if (debug) tprintf("Vertical partition\n"); return false; } // The merging partitions must strongly overlap each other. if (!merge1.VSignificantCoreOverlap(merge2)) { if (debug) tprintf("Voverlap %d (%d)\n", merge1.VCoreOverlap(merge2), merge1.VSignificantCoreOverlap(merge2)); return false; } // The merged box must not overlap the median bounds of this. TBOX merged_box(merge1.bounding_box()); merged_box += merge2.bounding_box(); if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ && merged_box.bottom() < bounding_box_.top() - ok_box_overlap && merged_box.top() > bounding_box_.bottom() + ok_box_overlap) { if (debug) tprintf("Excessive box overlap\n"); return false; } // Looks OK! return true; } // Find the blob at which to split this to minimize the overlap with the // given box. Returns the first blob to go in the second partition. BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) { if (boxes_.empty() || boxes_.singleton()) return NULL; BLOBNBOX_C_IT it(&boxes_); TBOX left_box(it.data()->bounding_box()); for (it.forward(); !it.at_first(); it.forward()) { BLOBNBOX* bbox = it.data(); left_box += bbox->bounding_box(); if (left_box.overlap(box)) return bbox; } return NULL; } // Split this partition keeping the first half in this and returning // the second half. // Splits by putting the split_blob and the blobs that follow // in the second half, and the rest in the first half. ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) { ColPartition* split_part = ShallowCopy(); split_part->set_owns_blobs(owns_blobs()); BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); ColPartition* prev_owner = bbox->owner(); ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL); if (bbox == split_blob || !split_part->boxes_.empty()) { split_part->AddBox(it.extract()); if (owns_blobs() && prev_owner != NULL) bbox->set_owner(split_part); } } ASSERT_HOST(!it.empty()); if (split_part->IsEmpty()) { // Split part ended up with nothing. Possible if split_blob is not // in the list of blobs. delete split_part; return NULL; } right_key_tab_ = false; split_part->left_key_tab_ = false; ComputeLimits(); // TODO(nbeato) Merge Ray's CL like this: // if (owns_blobs()) // SetBlobTextlineGoodness(); split_part->ComputeLimits(); // TODO(nbeato) Merge Ray's CL like this: // if (split_part->owns_blobs()) // split_part->SetBlobTextlineGoodness(); return split_part; } // Split this partition at the given x coordinate, returning the right // half and keeping the left half in this. ColPartition* ColPartition::SplitAt(int split_x) { if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) return NULL; // There will be no change. ColPartition* split_part = ShallowCopy(); split_part->set_owns_blobs(owns_blobs()); BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); ColPartition* prev_owner = bbox->owner(); ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL); const TBOX& box = bbox->bounding_box(); if (box.left() >= split_x) { split_part->AddBox(it.extract()); if (owns_blobs() && prev_owner != NULL) bbox->set_owner(split_part); } } if (it.empty()) { // Possible if split-x passes through the first blob. it.add_list_after(&split_part->boxes_); } ASSERT_HOST(!it.empty()); if (split_part->IsEmpty()) { // Split part ended up with nothing. Possible if split_x passes // through the last blob. delete split_part; return NULL; } right_key_tab_ = false; split_part->left_key_tab_ = false; right_margin_ = split_x; split_part->left_margin_ = split_x; ComputeLimits(); split_part->ComputeLimits(); return split_part; } // Recalculates all the coordinate limits of the partition. void ColPartition::ComputeLimits() { bounding_box_ = TBOX(); // Clear it BLOBNBOX_C_IT it(&boxes_); BLOBNBOX* bbox = NULL; int non_leader_count = 0; if (it.empty()) { bounding_box_.set_left(left_margin_); bounding_box_.set_right(right_margin_); bounding_box_.set_bottom(0); bounding_box_.set_top(0); } else { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); bounding_box_ += bbox->bounding_box(); if (bbox->flow() != BTFT_LEADER) ++non_leader_count; } } if (!left_key_tab_) left_key_ = BoxLeftKey(); if (left_key_ > BoxLeftKey() && textord_debug_bugs) { // TODO(rays) investigate the causes of these error messages, to find // out if they are genuinely harmful, or just indicative of junk input. tprintf("Computed left-illegal partition\n"); Print(); } if (!right_key_tab_) right_key_ = BoxRightKey(); if (right_key_ < BoxRightKey() && textord_debug_bugs) { tprintf("Computed right-illegal partition\n"); Print(); } if (it.empty()) return; if (IsImageType() || blob_type() == BRT_RECTIMAGE || blob_type() == BRT_POLYIMAGE) { median_top_ = bounding_box_.top(); median_bottom_ = bounding_box_.bottom(); median_size_ = bounding_box_.height(); median_left_ = bounding_box_.left(); median_right_ = bounding_box_.right(); median_width_ = bounding_box_.width(); } else { STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); STATS size_stats(0, bounding_box_.height() + 1); STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); STATS width_stats(0, bounding_box_.width() + 1); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { TBOX box = bbox->bounding_box(); int area = box.area(); top_stats.add(box.top(), area); bottom_stats.add(box.bottom(), area); size_stats.add(box.height(), area); left_stats.add(box.left(), area); right_stats.add(box.right(), area); width_stats.add(box.width(), area); } } median_top_ = static_cast(top_stats.median() + 0.5); median_bottom_ = static_cast(bottom_stats.median() + 0.5); median_size_ = static_cast(size_stats.median() + 0.5); median_left_ = static_cast(left_stats.median() + 0.5); median_right_ = static_cast(right_stats.median() + 0.5); median_width_ = static_cast(width_stats.median() + 0.5); } if (right_margin_ < bounding_box_.right() && textord_debug_bugs) { tprintf("Made partition with bad right coords"); Print(); } if (left_margin_ > bounding_box_.left() && textord_debug_bugs) { tprintf("Made partition with bad left coords"); Print(); } // Fix partner lists. The bounding box has changed and partners are stored // in bounding box order, so remove and reinsert this as a partner // of all its partners. for (int upper = 0; upper < 2; ++upper) { ColPartition_CLIST partners; ColPartition_C_IT part_it(&partners); part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_); for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { ColPartition* partner = part_it.extract(); partner->RemovePartner(!upper, this); partner->AddPartner(!upper, this); } } if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { tprintf("Recomputed box for partition %p\n", this); Print(); } } // Returns the number of boxes that overlap the given box. int ColPartition::CountOverlappingBoxes(const TBOX& box) { BLOBNBOX_C_IT it(&boxes_); int overlap_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); if (box.overlap(bbox->bounding_box())) ++overlap_count; } return overlap_count; } // Computes and sets the type_ and first_column_, last_column_ and column_set_. // resolution refers to the ppi resolution of the image. void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) { int first_spanned_col = -1; ColumnSpanningType span_type = columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(), MIN(bounding_box_.height(), bounding_box_.width()), MidY(), left_margin_, right_margin_, &first_column_, &last_column_, &first_spanned_col); column_set_ = columns; if (first_column_ < last_column_ && span_type == CST_PULLOUT && !IsLineType()) { // Unequal columns may indicate that the pullout spans one of the columns // it lies in, so force it to be allocated to just that column. if (first_spanned_col >= 0) { first_column_ = first_spanned_col; last_column_ = first_spanned_col; } else { if ((first_column_ & 1) == 0) last_column_ = first_column_; else if ((last_column_ & 1) == 0) first_column_ = last_column_; else first_column_ = last_column_ = (first_column_ + last_column_) / 2; } } type_ = PartitionType(span_type); } // Returns the PartitionType from the current BlobRegionType and a column // flow spanning type ColumnSpanningType, generated by // ColPartitionSet::SpanningType, that indicates how the partition sits // in the columns. PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { if (flow == CST_NOISE) { if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) return PT_NOISE; flow = CST_FLOWING; } switch (blob_type_) { case BRT_NOISE: return PT_NOISE; case BRT_HLINE: return PT_HORZ_LINE; case BRT_VLINE: return PT_VERT_LINE; case BRT_RECTIMAGE: case BRT_POLYIMAGE: switch (flow) { case CST_FLOWING: return PT_FLOWING_IMAGE; case CST_HEADING: return PT_HEADING_IMAGE; case CST_PULLOUT: return PT_PULLOUT_IMAGE; default: ASSERT_HOST(!"Undefined flow type for image!"); } break; case BRT_VERT_TEXT: return PT_VERTICAL_TEXT; case BRT_TEXT: case BRT_UNKNOWN: default: switch (flow) { case CST_FLOWING: return PT_FLOWING_TEXT; case CST_HEADING: return PT_HEADING_TEXT; case CST_PULLOUT: return PT_PULLOUT_TEXT; default: ASSERT_HOST(!"Undefined flow type for text!"); } } ASSERT_HOST(!"Should never get here!"); return PT_NOISE; } // Returns the first and last column touched by this partition. // resolution refers to the ppi resolution of the image. void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns, int* first_col, int* last_col) { int first_spanned_col = -1; ColumnSpanningType span_type = columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(), MIN(bounding_box_.height(), bounding_box_.width()), MidY(), left_margin_, right_margin_, first_col, last_col, &first_spanned_col); type_ = PartitionType(span_type); } // Sets the internal flags good_width_ and good_column_. void ColPartition::SetColumnGoodness(WidthCallback* cb) { int y = MidY(); int width = RightAtY(y) - LeftAtY(y); good_width_ = cb->Run(width); good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_; } // Determines whether the blobs in this partition mostly represent // a leader (fixed pitch sequence) and sets the member blobs accordingly. // Note that height is assumed to have been tested elsewhere, and that this // function will find most fixed-pitch text as leader without a height filter. // Leader detection is limited to sequences of identical width objects, // such as .... or ----, so patterns, such as .-.-.-.-. will not be found. bool ColPartition::MarkAsLeaderIfMonospaced() { bool result = false; // Gather statistics on the gaps between blobs and the widths of the blobs. int part_width = bounding_box_.width(); STATS gap_stats(0, part_width); STATS width_stats(0, part_width); BLOBNBOX_C_IT it(&boxes_); BLOBNBOX* prev_blob = it.data(); prev_blob->set_flow(BTFT_NEIGHBOURS); width_stats.add(prev_blob->bounding_box().width(), 1); int blob_count = 1; for (it.forward(); !it.at_first(); it.forward()) { BLOBNBOX* blob = it.data(); int left = blob->bounding_box().left(); int right = blob->bounding_box().right(); gap_stats.add(left - prev_blob->bounding_box().right(), 1); width_stats.add(right - left, 1); blob->set_flow(BTFT_NEIGHBOURS); prev_blob = blob; ++blob_count; } double median_gap = gap_stats.median(); double median_width = width_stats.median(); double max_width = MAX(median_gap, median_width); double min_width = MIN(median_gap, median_width); double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); if (textord_debug_tabfind >= 4) { tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax, min_width * kMaxLeaderGapFractionOfMin); } if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && gap_iqr < min_width * kMaxLeaderGapFractionOfMin && blob_count >= kMinLeaderCount) { // This is stable enough to be called a leader, so check the widths. // Since leader dashes can join, run a dp cutting algorithm and go // on the cost. int offset = static_cast(ceil(gap_iqr * 2)); int min_step = static_cast(median_gap + median_width + 0.5); int max_step = min_step + offset; min_step -= offset; // Pad the buffer with min_step/2 on each end. int part_left = bounding_box_.left() - min_step / 2; part_width += min_step; DPPoint* projection = new DPPoint[part_width]; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); int left = blob->bounding_box().left(); int right = blob->bounding_box().right(); int height = blob->bounding_box().height(); for (int x = left; x < right; ++x) { projection[left - part_left].AddLocalCost(height); } } DPPoint* best_end = DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance, part_width, projection); if (best_end != NULL && best_end->total_cost() < blob_count) { // Good enough. Call it a leader. result = true; bool modified_blob_list = false; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); TBOX box = blob->bounding_box(); // If the first or last blob is spaced too much, don't mark it. if (it.at_first()) { int gap = it.data_relative(1)->bounding_box().left() - blob->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); modified_blob_list = true; continue; } } if (it.at_last()) { int gap = blob->bounding_box().left() - it.data_relative(-1)->bounding_box().right(); if (blob->bounding_box().width() + gap > max_step) { it.extract(); modified_blob_list = true; break; } } blob->set_region_type(BRT_TEXT); blob->set_flow(BTFT_LEADER); } if (modified_blob_list) ComputeLimits(); blob_type_ = BRT_TEXT; flow_ = BTFT_LEADER; } else if (textord_debug_tabfind) { if (best_end == NULL) { tprintf("No path\n"); } else { tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost() < blob_count); } } delete [] projection; } return result; } // Given the result of TextlineProjection::EvaluateColPartition, (positive for // horizontal text, negative for vertical text, and near zero for non-text), // sets the blob_type_ and flow_ for this partition to indicate whether it // is strongly or weakly vertical or horizontal text, or non-text. // The function assumes that the blob neighbours are valid (from // StrokeWidth::SetNeighbours) and that those neighbours have their // region_type() set. void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { int blob_count = 0; // Total # blobs. int good_blob_score_ = 0; // Total # good strokewidth neighbours. int noisy_count = 0; // Total # neighbours marked as noise. int hline_count = 0; int vline_count = 0; BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); ++blob_count; noisy_count += blob->NoisyNeighbours(); good_blob_score_ += blob->GoodTextBlob(); if (blob->region_type() == BRT_HLINE) ++hline_count; if (blob->region_type() == BRT_VLINE) ++vline_count; } flow_ = BTFT_NEIGHBOURS; blob_type_ = BRT_UNKNOWN; if (hline_count > vline_count) { flow_ = BTFT_NONE; blob_type_ = BRT_HLINE; } else if (vline_count > hline_count) { flow_ = BTFT_NONE; blob_type_ = BRT_VLINE; } else if (value < -1 || 1 < value) { int long_side; int short_side; if (value > 0) { long_side = bounding_box_.width(); short_side = bounding_box_.height(); blob_type_ = BRT_TEXT; } else { long_side = bounding_box_.height(); short_side = bounding_box_.width(); blob_type_ = BRT_VERT_TEXT; } // We will combine the old metrics using aspect ratio and blob counts // with the input value by allowing a strong indication to flip the // STRONG_CHAIN/CHAIN flow values. int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0; if (short_side > kHorzStrongTextlineHeight) ++strong_score; if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score; if (abs(value) >= kMinStrongTextValue) flow_ = BTFT_STRONG_CHAIN; else if (abs(value) >= kMinChainTextValue) flow_ = BTFT_CHAIN; else flow_ = BTFT_NEIGHBOURS; // Upgrade chain to strong chain if the other indicators are good if (flow_ == BTFT_CHAIN && strong_score == 3) flow_ = BTFT_STRONG_CHAIN; // Downgrade strong vertical text to chain if the indicators are bad. if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) flow_ = BTFT_CHAIN; } if (flow_ == BTFT_NEIGHBOURS) { // Check for noisy neighbours. if (noisy_count >= blob_count) { flow_ = BTFT_NONTEXT; blob_type_= BRT_NOISE; } } if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) { tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", blob_count, noisy_count, good_blob_score_); tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, blob_type_); Print(); } SetBlobTypes(); } // Sets all blobs with the partition blob type and flow, but never overwrite // leader blobs, as we need to be able to identify them later. void ColPartition::SetBlobTypes() { if (!owns_blobs()) return; BLOBNBOX_C_IT it(&boxes_); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); if (blob->flow() != BTFT_LEADER) blob->set_flow(flow_); blob->set_region_type(blob_type_); ASSERT_HOST(blob->owner() == NULL || blob->owner() == this); } } // Returns true if a decent baseline can be fitted through the blobs. // Works for both horizontal and vertical text. bool ColPartition::HasGoodBaseline() { // Approximation of the baseline. DetLineFit linepoints; // Calculation of the mean height on this line segment. Note that these // variable names apply to the context of a horizontal line, and work // analogously, rather than literally in the case of a vertical line. int total_height = 0; int coverage = 0; int height_count = 0; int width = 0; BLOBNBOX_C_IT it(&boxes_); TBOX box(it.data()->bounding_box()); // Accumulate points representing the baseline at the middle of each blob, // but add an additional point for each end of the line. This makes it // harder to fit a severe skew angle, as it is most likely not right. if (IsVerticalType()) { // For a vertical line, use the right side as the baseline. ICOORD first_pt(box.right(), box.bottom()); // Use the bottom-right of the first (bottom) box, the top-right of the // last, and the middle-right of all others. linepoints.Add(first_pt); for (it.forward(); !it.at_last(); it.forward()) { BLOBNBOX* blob = it.data(); box = blob->bounding_box(); ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2); linepoints.Add(box_pt); total_height += box.width(); coverage += box.height(); ++height_count; } box = it.data()->bounding_box(); ICOORD last_pt(box.right(), box.top()); linepoints.Add(last_pt); width = last_pt.y() - first_pt.y(); } else { // Horizontal lines use the bottom as the baseline. TBOX box(it.data()->bounding_box()); // Use the bottom-left of the first box, the the bottom-right of the last, // and the middle of all others. ICOORD first_pt(box.left(), box.bottom()); linepoints.Add(first_pt); for (it.forward(); !it.at_last(); it.forward()) { BLOBNBOX* blob = it.data(); box = blob->bounding_box(); ICOORD box_pt((box.left() + box.right()) / 2, box.bottom()); linepoints.Add(box_pt); total_height += box.height(); coverage += box.width(); ++height_count; } box = it.data()->bounding_box(); ICOORD last_pt(box.right(), box.bottom()); linepoints.Add(last_pt); width = last_pt.x() - first_pt.x(); } // Maximum median error allowed to be a good text line. double max_error = kMaxBaselineError * total_height / height_count; ICOORD start_pt, end_pt; double error = linepoints.Fit(&start_pt, &end_pt); return error < max_error && coverage >= kMinBaselineCoverage * width; } // Adds this ColPartition to a matching WorkingPartSet if one can be found, // otherwise starts a new one in the appropriate column, ending the previous. void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, int resolution, ColPartition_LIST* used_parts, WorkingPartSet_LIST* working_sets) { if (block_owned_) return; // Done it already. block_owned_ = true; WorkingPartSet_IT it(working_sets); // If there is an upper partner use its working_set_ directly. ColPartition* partner = SingletonPartner(true); if (partner != NULL && partner->working_set_ != NULL) { working_set_ = partner->working_set_; working_set_->AddPartition(this); return; } if (partner != NULL && textord_debug_bugs) { tprintf("Partition with partner has no working set!:"); Print(); partner->Print(); } // Search for the column that the left edge fits in. WorkingPartSet* work_set = NULL; it.move_to_first(); int col_index = 0; for (it.mark_cycle_pt(); !it.cycled_list() && col_index != first_column_; it.forward(), ++col_index); if (textord_debug_tabfind >= 2) { tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between"); Print(); } if (it.cycled_list() && textord_debug_bugs) { tprintf("Target column=%d, only had %d\n", first_column_, col_index); } ASSERT_HOST(!it.cycled_list()); work_set = it.data(); // If last_column_ != first_column, then we need to scoop up all blocks // between here and the last_column_ and put back in work_set. if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) { // Find the column that the right edge falls in. BLOCK_LIST completed_blocks; TO_BLOCK_LIST to_blocks; for (; !it.cycled_list() && col_index <= last_column_; it.forward(), ++col_index) { WorkingPartSet* end_set = it.data(); end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks, &to_blocks); } work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); } working_set_ = work_set; work_set->AddPartition(this); } // From the given block_parts list, builds one or more BLOCKs and // corresponding TO_BLOCKs, such that the line spacing is uniform in each. // Created blocks are appended to the end of completed_blocks and to_blocks. // The used partitions are put onto used_parts, as they may still be referred // to in the partition grid. bleft, tright and resolution are the bounds // and resolution of the original image. void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts, BLOCK_LIST* completed_blocks, TO_BLOCK_LIST* to_blocks) { int page_height = tright.y() - bleft.y(); // Compute the initial spacing stats. ColPartition_IT it(block_parts); int part_count = 0; int max_line_height = 0; // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type // because their line spacing with their neighbors maybe smaller and their // height may be slightly larger. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* part = it.data(); ASSERT_HOST(!part->boxes()->empty()); STATS side_steps(0, part->bounding_box().height()); if (part->bounding_box().height() > max_line_height) max_line_height = part->bounding_box().height(); BLOBNBOX_C_IT blob_it(part->boxes()); int prev_bottom = blob_it.data()->bounding_box().bottom(); for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); int bottom = blob->bounding_box().bottom(); int step = bottom - prev_bottom; if (step < 0) step = -step; side_steps.add(step, 1); prev_bottom = bottom; } part->set_side_step(static_cast(side_steps.median() + 0.5)); if (!it.at_last()) { ColPartition* next_part = it.data_relative(1); part->set_bottom_spacing(part->median_bottom() - next_part->median_bottom()); part->set_top_spacing(part->median_top() - next_part->median_top()); } else { part->set_bottom_spacing(page_height); part->set_top_spacing(page_height); } if (textord_debug_tabfind) { part->Print(); tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", side_steps.median(), part->top_spacing(), part->bottom_spacing()); } ++part_count; } if (part_count == 0) return; SmoothSpacings(resolution, page_height, block_parts); // Move the partitions into individual block lists and make the blocks. BLOCK_IT block_it(completed_blocks); TO_BLOCK_IT to_block_it(to_blocks); ColPartition_LIST spacing_parts; ColPartition_IT sp_block_it(&spacing_parts); int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing; for (it.mark_cycle_pt(); !it.empty();) { ColPartition* part = it.extract(); sp_block_it.add_to_end(part); it.forward(); if (it.empty() || part->bottom_spacing() > same_block_threshold || !part->SpacingsEqual(*it.data(), resolution)) { // There is a spacing boundary. Check to see if it.data() belongs // better in the current block or the next one. if (!it.empty() && part->bottom_spacing() <= same_block_threshold) { ColPartition* next_part = it.data(); // If there is a size match one-way, then the middle line goes with // its matched size, otherwise it goes with the smallest spacing. ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1); if (textord_debug_tabfind) { tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d," " sizes %d %d %d\n", part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), part->median_size(), next_part->median_size(), third_part != NULL ? third_part->median_size() : 0); } // We can only consider adding the next line to the block if the sizes // match and the lines are close enough for their size. if (part->SizesSimilar(*next_part) && next_part->median_size() * kMaxSameBlockLineSpacing > part->bottom_spacing() && part->median_size() * kMaxSameBlockLineSpacing > part->top_spacing()) { // Even now, we can only add it as long as the third line doesn't // match in the same way and have a smaller bottom spacing. if (third_part == NULL || !next_part->SizesSimilar(*third_part) || third_part->median_size() * kMaxSameBlockLineSpacing <= next_part->bottom_spacing() || next_part->median_size() * kMaxSameBlockLineSpacing <= next_part->top_spacing() || next_part->bottom_spacing() > part->bottom_spacing()) { // Add to the current block. sp_block_it.add_to_end(it.extract()); it.forward(); if (textord_debug_tabfind) { tprintf("Added line to current block.\n"); } } } } TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts); if (to_block != NULL) { to_block_it.add_to_end(to_block); block_it.add_to_end(to_block->block); } sp_block_it.set_to_list(&spacing_parts); } else { if (textord_debug_tabfind && !it.empty()) { ColPartition* next_part = it.data(); tprintf("Spacings equal: upper:%d/%d, lower:%d/%d\n", part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), part->median_size(), next_part->median_size()); } } } } // Helper function to clip the input pos to the given bleft, tright bounds. static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) { if (pos->x() < bleft.x()) pos->set_x(bleft.x()); if (pos->x() > tright.x()) pos->set_x(tright.x()); if (pos->y() < bleft.y()) pos->set_y(bleft.y()); if (pos->y() > tright.y()) pos->set_y(tright.y()); } // Helper moves the blobs from the given list of block_parts into the block // itself. Sets up the block for (old) textline formation correctly for // vertical and horizontal text. The partitions are moved to used_parts // afterwards, as they cannot be deleted yet. static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK* block, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts) { // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it. // Move all the parts to a done list as they are no longer needed, except // that have have to continue to exist until the part grid is deleted. // Compute the median blob size as we go, as the block needs to know. TBOX block_box(block->bounding_box()); STATS sizes(0, MAX(block_box.width(), block_box.height())); bool text_type = block->poly_block()->IsText(); ColPartition_IT it(block_parts); TO_BLOCK* to_block = new TO_BLOCK(block); BLOBNBOX_IT blob_it(&to_block->blobs); ColPartition_IT used_it(used_parts); for (it.move_to_first(); !it.empty(); it.forward()) { ColPartition* part = it.extract(); // Transfer blobs from all regions to the output blocks. // Blobs for non-text regions will be used to define the polygonal // bounds of the region. for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty(); bb_it.forward()) { BLOBNBOX* bblob = bb_it.extract(); if (bblob->owner() != part) { tprintf("Ownership incorrect for blob:"); bblob->bounding_box().print(); tprintf("Part="); part->Print(); if (bblob->owner() == NULL) { tprintf("Not owned\n"); } else { tprintf("Owner part:"); bblob->owner()->Print(); } } ASSERT_HOST(bblob->owner() == part); // Assert failure here is caused by arbitrarily changing the partition // type without also changing the blob type, such as in // InsertSmallBlobsAsUnknowns. ASSERT_HOST(!text_type || bblob->region_type() >= BRT_UNKNOWN); C_OUTLINE_LIST* outlines = bblob->cblob()->out_list(); C_OUTLINE_IT ol_it(outlines); ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0); if (vertical_text) sizes.add(bblob->bounding_box().width(), 1); else sizes.add(bblob->bounding_box().height(), 1); blob_it.add_after_then_move(bblob); } used_it.add_to_end(part); } if (text_type && blob_it.empty()) { delete block; delete to_block; return NULL; } to_block->line_size = sizes.median(); if (vertical_text) { int block_width = block->bounding_box().width(); if (block_width < line_spacing) line_spacing = block_width; to_block->line_spacing = static_cast(line_spacing); to_block->max_blob_size = static_cast(block_width + 1); } else { int block_height = block->bounding_box().height(); if (block_height < line_spacing) line_spacing = block_height; to_block->line_spacing = static_cast(line_spacing); to_block->max_blob_size = static_cast(block_height + 1); } return to_block; } // Constructs a block from the given list of partitions. // Arguments are as LineSpacingBlocks above. TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts) { if (block_parts->empty()) return NULL; // Nothing to do. ColPartition_IT it(block_parts); ColPartition* part = it.data(); PolyBlockType type = part->type(); if (type == PT_VERTICAL_TEXT) return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts); // LineSpacingBlocks has handed us a collection of evenly spaced lines and // put the average spacing in each partition, so we can just take the // linespacing from the first partition. int line_spacing = part->bottom_spacing(); if (line_spacing < part->median_size()) line_spacing = part->bounding_box().height(); ICOORDELT_LIST vertices; ICOORDELT_IT vert_it(&vertices); ICOORD start, end; int min_x = MAX_INT32; int max_x = -MAX_INT32; int min_y = MAX_INT32; int max_y = -MAX_INT32; int iteration = 0; do { if (iteration == 0) ColPartition::LeftEdgeRun(&it, &start, &end); else ColPartition::RightEdgeRun(&it, &start, &end); ClipCoord(bleft, tright, &start); ClipCoord(bleft, tright, &end); vert_it.add_after_then_move(new ICOORDELT(start)); vert_it.add_after_then_move(new ICOORDELT(end)); UpdateRange(start.x(), &min_x, &max_x); UpdateRange(end.x(), &min_x, &max_x); UpdateRange(start.y(), &min_y, &max_y); UpdateRange(end.y(), &min_y, &max_y); if ((iteration == 0 && it.at_first()) || (iteration == 1 && it.at_last())) { ++iteration; it.move_to_last(); } } while (iteration < 2); if (textord_debug_tabfind) tprintf("Making block at (%d,%d)->(%d,%d)\n", min_x, min_y, max_x, max_y); BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y); block->set_poly_block(new POLY_BLOCK(&vertices, type)); return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts); } // Constructs a block from the given list of vertical text partitions. // Currently only creates rectangular blocks. TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft, const ICOORD& tright, ColPartition_LIST* block_parts, ColPartition_LIST* used_parts) { if (block_parts->empty()) return NULL; // Nothing to do. ColPartition_IT it(block_parts); ColPartition* part = it.data(); TBOX block_box = part->bounding_box(); int line_spacing = block_box.width(); PolyBlockType type = it.data()->type(); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { block_box += it.data()->bounding_box(); } if (textord_debug_tabfind) { tprintf("Making block at:"); block_box.print(); } BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), block_box.right(), block_box.top()); block->set_poly_block(new POLY_BLOCK(block_box, type)); return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts); } // Makes a TO_ROW matching this and moves all the blobs to it, transferring // ownership to to returned TO_ROW. TO_ROW* ColPartition::MakeToRow() { BLOBNBOX_C_IT blob_it(&boxes_); TO_ROW* row = NULL; int line_size = IsVerticalType() ? median_width_ : median_size_; // Add all the blobs to a single TO_ROW. for (; !blob_it.empty(); blob_it.forward()) { BLOBNBOX* blob = blob_it.extract(); // blob->compute_bounding_box(); int top = blob->bounding_box().top(); int bottom = blob->bounding_box().bottom(); if (row == NULL) { row = new TO_ROW(blob, static_cast(top), static_cast(bottom), static_cast(line_size)); } else { row->add_blob(blob, static_cast(top), static_cast(bottom), static_cast(line_size)); } } return row; } // Returns a copy of everything except the list of boxes. The resulting // ColPartition is only suitable for keeping in a column candidate list. ColPartition* ColPartition::ShallowCopy() const { ColPartition* part = new ColPartition(blob_type_, vertical_); part->left_margin_ = left_margin_; part->right_margin_ = right_margin_; part->bounding_box_ = bounding_box_; memcpy(part->special_blobs_densities_, special_blobs_densities_, sizeof(special_blobs_densities_)); part->median_bottom_ = median_bottom_; part->median_top_ = median_top_; part->median_size_ = median_size_; part->median_left_ = median_left_; part->median_right_ = median_right_; part->median_width_ = median_width_; part->good_width_ = good_width_; part->good_column_ = good_column_; part->left_key_tab_ = left_key_tab_; part->right_key_tab_ = right_key_tab_; part->type_ = type_; part->flow_ = flow_; part->left_key_ = left_key_; part->right_key_ = right_key_; part->first_column_ = first_column_; part->last_column_ = last_column_; part->owns_blobs_ = false; return part; } ColPartition* ColPartition::CopyButDontOwnBlobs() { ColPartition* copy = ShallowCopy(); copy->set_owns_blobs(false); BLOBNBOX_C_IT inserter(copy->boxes()); BLOBNBOX_C_IT traverser(boxes()); for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) inserter.add_after_then_move(traverser.data()); return copy; } #ifndef GRAPHICS_DISABLED // Provides a color for BBGrid to draw the rectangle. // Must be kept in sync with PolyBlockType. ScrollView::Color ColPartition::BoxColor() const { if (type_ == PT_UNKNOWN) return BLOBNBOX::TextlineColor(blob_type_, flow_); return POLY_BLOCK::ColorForPolyBlockType(type_); } #endif // GRAPHICS_DISABLED // Keep in sync with BlobRegionType. static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT"; // Prints debug information on this. void ColPartition::Print() const { int y = MidY(); tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" " ts=%d bs=%d ls=%d rs=%d\n", boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(), median_bottom_, bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', right_margin_, median_right_, bounding_box_.top(), median_top_, good_width_, good_column_, type_, kBlobTypes[blob_type_], flow_, first_column_, last_column_, boxes_.length(), space_above_, space_below_, space_to_left_, space_to_right_); } // Prints debug information on the colors. void ColPartition::PrintColors() { tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); } // Sets the types of all partitions in the run to be the max of the types. void ColPartition::SmoothPartnerRun(int working_set_count) { STATS left_stats(0, working_set_count); STATS right_stats(0, working_set_count); PolyBlockType max_type = type_; ColPartition* partner; for (partner = SingletonPartner(false); partner != NULL; partner = partner->SingletonPartner(false)) { if (partner->type_ > max_type) max_type = partner->type_; if (column_set_ == partner->column_set_) { left_stats.add(partner->first_column_, 1); right_stats.add(partner->last_column_, 1); } } type_ = max_type; // TODO(rays) Either establish that it isn't necessary to set the columns, // or find a way to do it that does not cause an assert failure in // AddToWorkingSet. #if 0 first_column_ = left_stats.mode(); last_column_ = right_stats.mode(); if (last_column_ < first_column_) last_column_ = first_column_; #endif for (partner = SingletonPartner(false); partner != NULL; partner = partner->SingletonPartner(false)) { partner->type_ = max_type; #if 0 // See TODO above if (column_set_ == partner->column_set_) { partner->first_column_ = first_column_; partner->last_column_ = last_column_; } #endif } } // ======= Scenario common to all Refine*Partners* functions ======= // ColPartitions are aiming to represent textlines, or horizontal slices // of images, and we are trying to form bi-directional (upper/lower) chains // of UNIQUE partner ColPartitions that can be made into blocks. // The ColPartitions have previously been typed (see SetPartitionType) // according to a combination of the content type and // how they lie on the columns. We want to chain text into // groups of a single type, but image ColPartitions may have been typed // differently in different parts of the image, due to being non-rectangular. // // We previously ran a search for upper and lower partners, but there may // be more than one, and they may be of mixed types, so now we wish to // refine the partners down to at most one. // A heading may have multiple partners: // =============================== // ======== ========== ========= // ======== ========== ========= // but it should be a different type. // A regular flowing text line may have multiple partners: // ================== =================== // ======= ================= =========== // This could be the start of a pull-out, or it might all be in a single // column and might be caused by tightly spaced text, bold words, bullets, // funny punctuation etc, all of which can cause textlines to be split into // multiple ColPartitions. Pullouts and figure captions should now be different // types so we can more aggressively merge groups of partners that all sit // in a single column. // // Cleans up the partners of the given type so that there is at most // one partner. This makes block creation simpler. // If get_desperate is true, goes to more desperate merge methods // to merge flowing text before breaking partnerships. void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid* grid) { if (TypesSimilar(type_, type)) { RefinePartnersInternal(true, get_desperate, grid); RefinePartnersInternal(false, get_desperate, grid); } else if (type == PT_COUNT) { // This is the final pass. Make sure only the correctly typed // partners surivive, however many there are. RefinePartnersByType(true, &upper_partners_); RefinePartnersByType(false, &lower_partners_); // It is possible for a merge to have given a partition multiple // partners again, so the last resort is to use overlap which is // guaranteed to leave at most one partner left. if (!upper_partners_.empty() && !upper_partners_.singleton()) RefinePartnersByOverlap(true, &upper_partners_); if (!lower_partners_.empty() && !lower_partners_.singleton()) RefinePartnersByOverlap(false, &lower_partners_); } } ////////////////// PRIVATE CODE ///////////////////////////// // Cleans up the partners above if upper is true, else below. // If get_desperate is true, goes to more desperate merge methods // to merge flowing text before breaking partnerships. void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid* grid) { ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; if (!partners->empty() && !partners->singleton()) { RefinePartnersByType(upper, partners); if (!partners->empty() && !partners->singleton()) { // Check for transitive partnerships and break the cycle. RefinePartnerShortcuts(upper, partners); if (!partners->empty() && !partners->singleton()) { // Types didn't fix it. Flowing text keeps the one with the longest // sequence of singleton matching partners. All others max overlap. if (TypesSimilar(type_, PT_FLOWING_TEXT) && get_desperate) { RefineTextPartnersByMerge(upper, false, partners, grid); if (!partners->empty() && !partners->singleton()) RefineTextPartnersByMerge(upper, true, partners, grid); } // The last resort is to use overlap. if (!partners->empty() && !partners->singleton()) RefinePartnersByOverlap(upper, partners); } } } } // Cleans up the partners above if upper is true, else below. // Restricts the partners to only desirable types. For text and BRT_HLINE this // means the same type_ , and for image types it means any image type. void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST* partners) { bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); if (debug) { tprintf("Refining %d %s partners by type for:\n", partners->length(), upper ? "Upper" : "Lower"); Print(); } ColPartition_C_IT it(partners); // Purify text by type. if (!IsImageType() && !IsLineType() && type() != PT_TABLE) { // Keep only partners matching type_. // Exception: PT_VERTICAL_TEXT is allowed to stay with the other // text types if it is the only partner. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); if (!TypesSimilar(type_, partner->type_)) { if (debug) { tprintf("Removing partner:"); partner->Print(); } partner->RemovePartner(!upper, this); it.extract(); } else if (debug) { tprintf("Keeping partner:"); partner->Print(); } } } else { // Only polyimages are allowed to have partners of any kind! for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); if (partner->blob_type() != BRT_POLYIMAGE || blob_type() != BRT_POLYIMAGE) { if (debug) { tprintf("Removing partner:"); partner->Print(); } partner->RemovePartner(!upper, this); it.extract(); } else if (debug) { tprintf("Keeping partner:"); partner->Print(); } } } } // Cleans up the partners above if upper is true, else below. // Remove transitive partnerships: this<->a, and a<->b and this<->b. // Gets rid of this<->b, leaving a clean chain. // Also if we have this<->a and a<->this, then gets rid of this<->a, as // this has multiple partners. void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners) { bool done_any = false; do { done_any = false; ColPartition_C_IT it(partners); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* a = it.data(); // Check for a match between all of a's partners (it1/b1) and all // of this's partners (it2/b2). ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_); for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { ColPartition* b1 = it1.data(); if (b1 == this) { done_any = true; it.extract(); a->RemovePartner(!upper, this); break; } ColPartition_C_IT it2(partners); for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { ColPartition* b2 = it2.data(); if (b1 == b2) { // Jackpot! b2 should not be a partner of this. it2.extract(); b2->RemovePartner(!upper, this); done_any = true; // That potentially invalidated all the iterators, so break out // and start again. break; } } if (done_any) break; } if (done_any) break; } } while (done_any && !partners->empty() && !partners->singleton()); } // Cleans up the partners above if upper is true, else below. // If multiple text partners can be merged, (with each other, NOT with this), // then do so. // If desperate is true, then an increase in overlap with the merge is // allowed. If the overlap increases, then the desperately_merged_ flag // is set, indicating that the textlines probably need to be regenerated // by aggressive line fitting/splitting, as there are probably vertically // joined blobs that cross textlines. void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, ColPartition_CLIST* partners, ColPartitionGrid* grid) { bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); if (debug) { tprintf("Refining %d %s partners by merge for:\n", partners->length(), upper ? "Upper" : "Lower"); Print(); } while (!partners->empty() && !partners->singleton()) { // Absorb will mess up the iterators, so we have to merge one partition // at a time and rebuild the iterators each time. ColPartition_C_IT it(partners); ColPartition* part = it.data(); // Gather a list of merge candidates, from the list of partners, that // are all in the same single column. See general scenario comment above. ColPartition_CLIST candidates; ColPartition_C_IT cand_it(&candidates); for (it.forward(); !it.at_first(); it.forward()) { ColPartition* candidate = it.data(); if (part->first_column_ == candidate->last_column_ && part->last_column_ == candidate->first_column_) cand_it.add_after_then_move(it.data()); } int overlap_increase; ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug, NULL, &overlap_increase); if (candidate != NULL && (overlap_increase <= 0 || desperate)) { if (debug) { tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate), overlap_increase); } // Remove before merge and re-insert to keep the integrity of the grid. grid->RemoveBBox(candidate); grid->RemoveBBox(part); part->Absorb(candidate, NULL); // We modified the box of part, so re-insert it into the grid. grid->InsertBBox(true, true, part); if (overlap_increase > 0) part->desperately_merged_ = true; } else { break; // Can't merge. } } } // Cleans up the partners above if upper is true, else below. // Keep the partner with the biggest overlap. void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners) { bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()); if (debug) { tprintf("Refining %d %s partners by overlap for:\n", partners->length(), upper ? "Upper" : "Lower"); Print(); } ColPartition_C_IT it(partners); ColPartition* best_partner = it.data(); // Find the partner with the best overlap. int best_overlap = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); int overlap = MIN(bounding_box_.right(), partner->bounding_box_.right()) - MAX(bounding_box_.left(), partner->bounding_box_.left()); if (overlap > best_overlap) { best_overlap = overlap; best_partner = partner; } } // Keep only the best partner. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { ColPartition* partner = it.data(); if (partner != best_partner) { if (debug) { tprintf("Removing partner:"); partner->Print(); } partner->RemovePartner(!upper, this); it.extract(); } } } // Return true if bbox belongs better in this than other. bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other) { TBOX box = bbox->bounding_box(); // Margins take priority. int left = box.left(); int right = box.right(); if (left < left_margin_ || right > right_margin_) return false; if (left < other.left_margin_ || right > other.right_margin_) return true; int top = box.top(); int bottom = box.bottom(); int this_overlap = MIN(top, median_top_) - MAX(bottom, median_bottom_); int other_overlap = MIN(top, other.median_top_) - MAX(bottom, other.median_bottom_); int this_miss = median_top_ - median_bottom_ - this_overlap; int other_miss = other.median_top_ - other.median_bottom_ - other_overlap; if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) { tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", box.left(), box.bottom(), box.right(), box.top(), this_overlap, other_overlap, this_miss, other_miss, median_top_, other.median_top_); } if (this_miss < other_miss) return true; if (this_miss > other_miss) return false; if (this_overlap > other_overlap) return true; if (this_overlap < other_overlap) return false; return median_top_ >= other.median_top_; } // Returns the median line-spacing between the current position and the end // of the list. // The iterator is passed by value so the iteration does not modify the // caller's iterator. static int MedianSpacing(int page_height, ColPartition_IT it) { STATS stats(0, page_height); while (!it.cycled_list()) { ColPartition* part = it.data(); it.forward(); stats.add(part->bottom_spacing(), 1); stats.add(part->top_spacing(), 1); } return static_cast(stats.median() + 0.5); } // Returns true if this column partition is in the same column as // part. This function will only work after the SetPartitionType function // has been called on both column partitions. This is useful for // doing a SideSearch when you want things in the same page column. // // Currently called by the table detection code to identify if potential table // partitions exist in the same column. bool ColPartition::IsInSameColumnAs(const ColPartition& part) const { // Overlap does not occur when last < part.first or first > part.last. // In other words, one is completely to the side of the other. // This is just DeMorgan's law applied to that so the function returns true. return (last_column_ >= part.first_column_) && (first_column_ <= part.last_column_); } // Smoothes the spacings in the list into groups of equal linespacing. // resolution is the resolution of the original image, used as a basis // for thresholds in change of spacing. page_height is in pixels. void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_LIST* parts) { // The task would be trivial if we didn't have to allow for blips - // occasional offsets in spacing caused by anomalous text, such as all // caps, groups of descenders, joined words, Arabic etc. // The neighbourhood stores a consecutive group of partitions so that // blips can be detected correctly, yet conservatively enough to not // mistake genuine spacing changes for blips. See example below. ColPartition* neighbourhood[PN_COUNT]; ColPartition_IT it(parts); it.mark_cycle_pt(); // Although we know nothing about the spacings is this list, the median is // used as an approximation to allow blips. // If parts of this block aren't spaced to the median, then we can't // accept blips in those parts, but we'll recalculate it each time we // split the block, so the median becomes more likely to match all the text. int median_space = MedianSpacing(page_height, it); ColPartition_IT start_it(it); ColPartition_IT end_it(it); for (int i = 0; i < PN_COUNT; ++i) { if (i < PN_UPPER || it.cycled_list()) { neighbourhood[i] = NULL; } else { if (i == PN_LOWER) end_it = it; neighbourhood[i] = it.data(); it.forward(); } } while (neighbourhood[PN_UPPER] != NULL) { // Test for end of a group. Normally SpacingsEqual is true within a group, // but in the case of a blip, it will be false. Here is an example: // Line enum Spacing below (spacing between tops of lines) // 1 ABOVE2 20 // 2 ABOVE1 20 // 3 UPPER 15 // 4 LOWER 25 // 5 BELOW1 20 // 6 BELOW2 20 // Line 4 is all in caps (regular caps), so the spacing between line 3 // and line 4 (looking at the tops) is smaller than normal, and the // spacing between line 4 and line 5 is larger than normal, but the // two of them add to twice the normal spacing. // The following if has to accept unequal spacings 3 times to pass the // blip (20/15, 15/25 and 25/20) // When the blip is in the middle, OKSpacingBlip tests that one of // ABOVE1 and BELOW1 matches the median. // The first time, everything is shifted down 1, so we present // OKSpacingBlip with neighbourhood+1 and check that PN_UPPER is median. // The last time, everything is shifted up 1, so we present OKSpacingBlip // with neighbourhood-1 and check that PN_LOWER matches the median. if (neighbourhood[PN_LOWER] == NULL || (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], resolution) && !OKSpacingBlip(resolution, median_space, neighbourhood) && (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) || !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) && (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) || !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) { // The group has ended. PN_UPPER is the last member. // Compute the mean spacing over the group. ColPartition_IT sum_it(start_it); ColPartition* last_part = neighbourhood[PN_UPPER]; double total_bottom = 0.0; double total_top = 0.0; int total_count = 0; ColPartition* upper = sum_it.data(); // We do not process last_part, as its spacing is different. while (upper != last_part) { total_bottom += upper->bottom_spacing(); total_top += upper->top_spacing(); ++total_count; sum_it.forward(); upper = sum_it.data(); } if (total_count > 0) { // There were at least 2 lines, so set them all to the mean. int top_spacing = static_cast(total_top / total_count + 0.5); int bottom_spacing = static_cast(total_bottom / total_count + 0.5); if (textord_debug_tabfind) { tprintf("Spacing run ended. Cause:"); if (neighbourhood[PN_LOWER] == NULL) { tprintf("No more lines\n"); } else { tprintf("Spacing change. Spacings:\n"); for (int i = 0; i < PN_COUNT; ++i) { if (neighbourhood[i] == NULL) { tprintf("NULL"); if (i > 0 && neighbourhood[i - 1] != NULL) { if (neighbourhood[i - 1]->SingletonPartner(false) != NULL) { tprintf(" Lower partner:"); neighbourhood[i - 1]->SingletonPartner(false)->Print(); } else { tprintf(" NULL lower partner:\n"); } } else { tprintf("\n"); } } else { tprintf("Top = %d, bottom = %d\n", neighbourhood[i]->top_spacing(), neighbourhood[i]->bottom_spacing()); } } } tprintf("Mean spacing = %d/%d\n", top_spacing, bottom_spacing); } sum_it = start_it; upper = sum_it.data(); while (upper != last_part) { upper->set_top_spacing(top_spacing); upper->set_bottom_spacing(bottom_spacing); if (textord_debug_tabfind) { tprintf("Setting mean on:"); upper->Print(); } sum_it.forward(); upper = sum_it.data(); } } // PN_LOWER starts the next group and end_it is the next start_it. start_it = end_it; // Recalculate the median spacing to maximize the chances of detecting // spacing blips. median_space = MedianSpacing(page_height, end_it); } // Shuffle pointers. for (int j = 1; j < PN_COUNT; ++j) { neighbourhood[j - 1] = neighbourhood[j]; } if (it.cycled_list()) { neighbourhood[PN_COUNT - 1] = NULL; } else { neighbourhood[PN_COUNT - 1] = it.data(); it.forward(); } end_it.forward(); } } // Returns true if the parts array of pointers to partitions matches the // condition for a spacing blip. See SmoothSpacings for what this means // and how it is used. bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, ColPartition** parts) { if (parts[PN_UPPER] == NULL || parts[PN_LOWER] == NULL) return false; // The blip is OK if upper and lower sum to an OK value and at least // one of above1 and below1 is equal to the median. return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing, resolution) && ((parts[PN_ABOVE1] != NULL && parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) || (parts[PN_BELOW1] != NULL && parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution))); } // Returns true if both the top and bottom spacings of this match the given // spacing to within suitable margins dictated by the image resolution. bool ColPartition::SpacingEqual(int spacing, int resolution) const { int bottom_error = BottomSpacingMargin(resolution); int top_error = TopSpacingMargin(resolution); return NearlyEqual(bottom_spacing_, spacing, bottom_error) && NearlyEqual(top_spacing_, spacing, top_error); } // Returns true if both the top and bottom spacings of this and other // match to within suitable margins dictated by the image resolution. bool ColPartition::SpacingsEqual(const ColPartition& other, int resolution) const { int bottom_error = MAX(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution)); int top_error = MAX(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution)); return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) && (NearlyEqual(top_spacing_, other.top_spacing_, top_error) || NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, bottom_error)); } // Returns true if the sum spacing of this and other match the given // spacing (or twice the given spacing) to within a suitable margin dictated // by the image resolution. bool ColPartition::SummedSpacingOK(const ColPartition& other, int spacing, int resolution) const { int bottom_error = MAX(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution)); int top_error = MAX(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution)); int bottom_total = bottom_spacing_ + other.bottom_spacing_; int top_total = top_spacing_ + other.top_spacing_; return (NearlyEqual(spacing, bottom_total, bottom_error) && NearlyEqual(spacing, top_total, top_error)) || (NearlyEqual(spacing * 2, bottom_total, bottom_error) && NearlyEqual(spacing * 2, top_total, top_error)); } // Returns a suitable spacing margin that can be applied to bottoms of // text lines, based on the resolution and the stored side_step_. int ColPartition::BottomSpacingMargin(int resolution) const { return static_cast(kMaxSpacingDrift * resolution + 0.5) + side_step_; } // Returns a suitable spacing margin that can be applied to tops of // text lines, based on the resolution and the stored side_step_. int ColPartition::TopSpacingMargin(int resolution) const { return static_cast(kMaxTopSpacingFraction * median_size_ + 0.5) + BottomSpacingMargin(resolution); } // Returns true if the median text sizes of this and other agree to within // a reasonable multiplicative factor. bool ColPartition::SizesSimilar(const ColPartition& other) const { return median_size_ <= other.median_size_ * kMaxSizeRatio && other.median_size_ <= median_size_ * kMaxSizeRatio; } // Helper updates margin_left and margin_right, being the bounds of the left // margin of part of a block. Returns false and does not update the bounds if // this partition has a disjoint margin with the established margin. static bool UpdateLeftMargin(const ColPartition& part, int* margin_left, int* margin_right) { const TBOX& part_box = part.bounding_box(); int top = part_box.top(); int bottom = part_box.bottom(); int tl_key = part.SortKey(part.left_margin(), top); int tr_key = part.SortKey(part_box.left(), top); int bl_key = part.SortKey(part.left_margin(), bottom); int br_key = part.SortKey(part_box.left(), bottom); int left_key = MAX(tl_key, bl_key); int right_key = MIN(tr_key, br_key); if (left_key <= *margin_right && right_key >= *margin_left) { // This part is good - let's keep it. *margin_right = MIN(*margin_right, right_key); *margin_left = MAX(*margin_left, left_key); return true; } return false; } // Computes and returns in start, end a line segment formed from a // forwards-iterated group of left edges of partitions that satisfy the // condition that the intersection of the left margins is non-empty, ie the // rightmost left margin is to the left of the leftmost left bounding box edge. // On return the iterator is set to the start of the next run. void ColPartition::LeftEdgeRun(ColPartition_IT* part_it, ICOORD* start, ICOORD* end) { ColPartition* part = part_it->data(); ColPartition* start_part = part; int start_y = part->bounding_box_.top(); if (!part_it->at_first()) { int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom(); if (prev_bottom < start_y) start_y = prev_bottom; else if (prev_bottom > start_y) start_y = (start_y + prev_bottom) / 2; } int end_y = part->bounding_box_.bottom(); int margin_right = MAX_INT32; int margin_left = -MAX_INT32; UpdateLeftMargin(*part, &margin_left, &margin_right); do { part_it->forward(); part = part_it->data(); } while (!part_it->at_first() && UpdateLeftMargin(*part, &margin_left, &margin_right)); // The run ended. If we were pushed inwards, compute the next run and // extend it backwards into the run we just calculated to find the end of // this run that provides a tight box. int next_margin_right = MAX_INT32; int next_margin_left = -MAX_INT32; UpdateLeftMargin(*part, &next_margin_left, &next_margin_right); if (next_margin_left > margin_right) { ColPartition_IT next_it(*part_it); do { next_it.forward(); part = next_it.data(); } while (!next_it.at_first() && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); // Now extend the next run backwards into the original run to get the // tightest fit. do { part_it->backward(); part = part_it->data(); } while (part != start_part && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); part_it->forward(); } // Now calculate the end_y. part = part_it->data_relative(-1); end_y = part->bounding_box_.bottom(); if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y) end_y = (end_y + part_it->data()->bounding_box_.top()) / 2; start->set_y(start_y); start->set_x(part->XAtY(margin_right, start_y)); end->set_y(end_y); end->set_x(part->XAtY(margin_right, end_y)); if (textord_debug_tabfind && !part_it->at_first()) tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y, part->XAtY(margin_left, end_y), end->x(), part->left_margin_, part->bounding_box_.left()); } // Helper updates margin_left and margin_right, being the bounds of the right // margin of part of a block. Returns false and does not update the bounds if // this partition has a disjoint margin with the established margin. static bool UpdateRightMargin(const ColPartition& part, int* margin_left, int* margin_right) { const TBOX& part_box = part.bounding_box(); int top = part_box.top(); int bottom = part_box.bottom(); int tl_key = part.SortKey(part_box.right(), top); int tr_key = part.SortKey(part.right_margin(), top); int bl_key = part.SortKey(part_box.right(), bottom); int br_key = part.SortKey(part.right_margin(), bottom); int left_key = MAX(tl_key, bl_key); int right_key = MIN(tr_key, br_key); if (left_key <= *margin_right && right_key >= *margin_left) { // This part is good - let's keep it. *margin_right = MIN(*margin_right, right_key); *margin_left = MAX(*margin_left, left_key); return true; } return false; } // Computes and returns in start, end a line segment formed from a // backwards-iterated group of right edges of partitions that satisfy the // condition that the intersection of the right margins is non-empty, ie the // leftmost right margin is to the right of the rightmost right bounding box // edge. // On return the iterator is set to the start of the next run. void ColPartition::RightEdgeRun(ColPartition_IT* part_it, ICOORD* start, ICOORD* end) { ColPartition* part = part_it->data(); ColPartition* start_part = part; int start_y = part->bounding_box_.bottom(); if (!part_it->at_last()) { int next_y = part_it->data_relative(1)->bounding_box_.top(); if (next_y > start_y) start_y = next_y; else if (next_y < start_y) start_y = (start_y + next_y) / 2; } int end_y = part->bounding_box_.top(); int margin_right = MAX_INT32; int margin_left = -MAX_INT32; UpdateRightMargin(*part, &margin_left, &margin_right); do { part_it->backward(); part = part_it->data(); } while (!part_it->at_last() && UpdateRightMargin(*part, &margin_left, &margin_right)); // The run ended. If we were pushed inwards, compute the next run and // extend it backwards to find the end of this run for a tight box. int next_margin_right = MAX_INT32; int next_margin_left = -MAX_INT32; UpdateRightMargin(*part, &next_margin_left, &next_margin_right); if (next_margin_right < margin_left) { ColPartition_IT next_it(*part_it); do { next_it.backward(); part = next_it.data(); } while (!next_it.at_last() && UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); // Now extend the next run forwards into the original run to get the // tightest fit. do { part_it->forward(); part = part_it->data(); } while (part != start_part && UpdateRightMargin(*part, &next_margin_left, &next_margin_right)); part_it->backward(); } // Now calculate the end_y. part = part_it->data_relative(1); end_y = part->bounding_box().top(); if (!part_it->at_last() && part_it->data()->bounding_box_.bottom() > end_y) end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2; start->set_y(start_y); start->set_x(part->XAtY(margin_left, start_y)); end->set_y(end_y); end->set_x(part->XAtY(margin_left, end_y)); if (textord_debug_tabfind && !part_it->at_last()) tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y, end->x(), part->XAtY(margin_right, end_y), part->bounding_box_.right(), part->right_margin_); } } // namespace tesseract.