From 6a0f8e8c07b89599ebf30039387fadb58a29ca17 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 3 Aug 2018 08:46:38 +0200 Subject: [PATCH] ColPartition: Rename median_size_ -> median_height_ This implements a TODO. Rename also some related items. Signed-off-by: Stefan Weil --- src/textord/colfind.cpp | 4 ++-- src/textord/colpartition.cpp | 40 ++++++++++++++++---------------- src/textord/colpartition.h | 11 ++++----- src/textord/colpartitiongrid.cpp | 2 +- src/textord/strokewidth.cpp | 20 ++++++++-------- src/textord/tablefind.cpp | 24 +++++++++---------- 6 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/textord/colfind.cpp b/src/textord/colfind.cpp index c8e6d0bf..5ed4f334 100644 --- a/src/textord/colfind.cpp +++ b/src/textord/colfind.cpp @@ -1177,12 +1177,12 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { } } if (best_part != nullptr && - best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) { + best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) { // Close enough to merge. if (debug) { tprintf("Adding noise blob with distance %d, thr=%g:box:", best_distance, - kMaxDistToPartSizeRatio * best_part->median_size()); + kMaxDistToPartSizeRatio * best_part->median_height()); blob->bounding_box().print(); tprintf("To partition:"); best_part->Print(); diff --git a/src/textord/colpartition.cpp b/src/textord/colpartition.cpp index ad2c6d88..629e196a 100644 --- a/src/textord/colpartition.cpp +++ b/src/textord/colpartition.cpp @@ -80,7 +80,7 @@ const int kMaxColorDistance = 900; // Vertical is the direction of logical vertical on the possibly skewed image. ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical) : left_margin_(-INT32_MAX), right_margin_(INT32_MAX), - median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_size_(0), + median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0), median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0), blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0), good_width_(false), good_column_(false), @@ -163,7 +163,7 @@ ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, part->bounding_box_ = TBOX(left, bottom, right, top); part->median_bottom_ = bottom; part->median_top_ = top; - part->median_size_ = top - bottom; + part->median_height_ = top - bottom; part->median_left_ = left; part->median_right_ = right; part->median_width_ = right - left; @@ -416,7 +416,7 @@ bool ColPartition::MatchingSizes(const ColPartition& other) const { if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) return !TabFind::DifferentSizes(median_width_, other.median_width_); else - return !TabFind::DifferentSizes(median_size_, other.median_size_); + return !TabFind::DifferentSizes(median_height_, other.median_height_); } // Returns true if there is no tabstop violation in merging this and other. @@ -904,14 +904,14 @@ void ColPartition::ComputeLimits() { blob_type() == BRT_POLYIMAGE) { median_top_ = bounding_box_.top(); median_bottom_ = bounding_box_.bottom(); - median_size_ = bounding_box_.height(); + median_height_ = bounding_box_.height(); median_left_ = bounding_box_.left(); median_right_ = bounding_box_.right(); median_width_ = bounding_box_.width(); } else { STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); - STATS size_stats(0, bounding_box_.height() + 1); + STATS height_stats(0, bounding_box_.height() + 1); STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); STATS width_stats(0, bounding_box_.width() + 1); @@ -922,7 +922,7 @@ void ColPartition::ComputeLimits() { int area = box.area(); top_stats.add(box.top(), area); bottom_stats.add(box.bottom(), area); - size_stats.add(box.height(), area); + height_stats.add(box.height(), area); left_stats.add(box.left(), area); right_stats.add(box.right(), area); width_stats.add(box.width(), area); @@ -930,7 +930,7 @@ void ColPartition::ComputeLimits() { } median_top_ = static_cast(top_stats.median() + 0.5); median_bottom_ = static_cast(bottom_stats.median() + 0.5); - median_size_ = static_cast(size_stats.median() + 0.5); + median_height_ = static_cast(height_stats.median() + 0.5); median_left_ = static_cast(left_stats.median() + 0.5); median_right_ = static_cast(right_stats.median() + 0.5); median_width_ = static_cast(width_stats.median() + 0.5); @@ -1492,23 +1492,23 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, " sizes %d %d %d\n", part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), - part->median_size(), next_part->median_size(), - third_part != nullptr ? third_part->median_size() : 0); + part->median_height(), next_part->median_height(), + third_part != nullptr ? third_part->median_height() : 0); } // We can only consider adding the next line to the block if the sizes // match and the lines are close enough for their size. if (part->SizesSimilar(*next_part) && - next_part->median_size() * kMaxSameBlockLineSpacing > + next_part->median_height() * kMaxSameBlockLineSpacing > part->bottom_spacing() && - part->median_size() * kMaxSameBlockLineSpacing > + part->median_height() * kMaxSameBlockLineSpacing > part->top_spacing()) { // Even now, we can only add it as long as the third line doesn't // match in the same way and have a smaller bottom spacing. if (third_part == nullptr || !next_part->SizesSimilar(*third_part) || - third_part->median_size() * kMaxSameBlockLineSpacing <= + third_part->median_height() * kMaxSameBlockLineSpacing <= next_part->bottom_spacing() || - next_part->median_size() * kMaxSameBlockLineSpacing <= + next_part->median_height() * kMaxSameBlockLineSpacing <= next_part->top_spacing() || next_part->bottom_spacing() > part->bottom_spacing()) { // Add to the current block. @@ -1532,7 +1532,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(), - part->median_size(), next_part->median_size()); + part->median_height(), next_part->median_height()); } } } @@ -1647,7 +1647,7 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, // put the average spacing in each partition, so we can just take the // linespacing from the first partition. int line_spacing = part->bottom_spacing(); - if (line_spacing < part->median_size()) + if (line_spacing < part->median_height()) line_spacing = part->bounding_box().height(); ICOORDELT_LIST vertices; ICOORDELT_IT vert_it(&vertices); @@ -1715,7 +1715,7 @@ TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft, TO_ROW* ColPartition::MakeToRow() { BLOBNBOX_C_IT blob_it(&boxes_); TO_ROW* row = nullptr; - int line_size = IsVerticalType() ? median_width_ : median_size_; + int line_size = IsVerticalType() ? median_width_ : median_height_; // Add all the blobs to a single TO_ROW. for (; !blob_it.empty(); blob_it.forward()) { BLOBNBOX* blob = blob_it.extract(); @@ -1746,7 +1746,7 @@ ColPartition* ColPartition::ShallowCopy() const { sizeof(special_blobs_densities_)); part->median_bottom_ = median_bottom_; part->median_top_ = median_top_; - part->median_size_ = median_size_; + part->median_height_ = median_height_; part->median_left_ = median_left_; part->median_right_ = median_right_; part->median_width_ = median_width_; @@ -2398,15 +2398,15 @@ int ColPartition::BottomSpacingMargin(int resolution) const { // Returns a suitable spacing margin that can be applied to tops of // text lines, based on the resolution and the stored side_step_. int ColPartition::TopSpacingMargin(int resolution) const { - return static_cast(kMaxTopSpacingFraction * median_size_ + 0.5) + + return static_cast(kMaxTopSpacingFraction * median_height_ + 0.5) + BottomSpacingMargin(resolution); } // Returns true if the median text sizes of this and other agree to within // a reasonable multiplicative factor. bool ColPartition::SizesSimilar(const ColPartition& other) const { - return median_size_ <= other.median_size_ * kMaxSizeRatio && - other.median_size_ <= median_size_ * kMaxSizeRatio; + return median_height_ <= other.median_height_ * kMaxSizeRatio && + other.median_height_ <= median_height_ * kMaxSizeRatio; } // Helper updates margin_left and margin_right, being the bounds of the left diff --git a/src/textord/colpartition.h b/src/textord/colpartition.h index c712adda..abd40e30 100644 --- a/src/textord/colpartition.h +++ b/src/textord/colpartition.h @@ -134,11 +134,11 @@ class ColPartition : public ELIST2_LINK { int median_right() const { return median_right_; } - int median_size() const { - return median_size_; + int median_height() const { + return median_height_; } - void set_median_size(int size) { - median_size_ = size; + void set_median_height(int height) { + median_height_ = height; } int median_width() const { return median_width_; @@ -839,8 +839,7 @@ class ColPartition : public ELIST2_LINK { int median_bottom_; int median_top_; // Median height of blobs in this partition. - // TODO(rays) rename median_height_. - int median_size_; + int median_height_; // Median left and right of blobs in this partition. int median_left_; int median_right_; diff --git a/src/textord/colpartitiongrid.cpp b/src/textord/colpartitiongrid.cpp index e06f4a66..3ed8152a 100644 --- a/src/textord/colpartitiongrid.cpp +++ b/src/textord/colpartitiongrid.cpp @@ -694,7 +694,7 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, // Get metrics from the row that will be used for the block. TBOX box = part->bounding_box(); int median_width = part->median_width(); - int median_height = part->median_size(); + int median_height = part->median_height(); // Turn the partition into a TO_ROW. TO_ROW* row = part->MakeToRow(); if (row == nullptr) { diff --git a/src/textord/strokewidth.cpp b/src/textord/strokewidth.cpp index 6992a587..84399bc8 100644 --- a/src/textord/strokewidth.cpp +++ b/src/textord/strokewidth.cpp @@ -297,21 +297,21 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) { box.bottom()); // Find the largest object in the search box not equal to bbox. BlobGridSearch rsearch(this); - int max_size = 0; + int max_height = 0; BLOBNBOX* n; rsearch.StartRectSearch(search_box); while ((n = rsearch.NextRectSearch()) != nullptr) { if (n == bbox) continue; TBOX nbox = n->bounding_box(); - if (nbox.height() > max_size) { - max_size = nbox.height(); + if (nbox.height() > max_height) { + max_height = nbox.height(); } } if (debug) { - tprintf("Max neighbour size=%d for candidate line box at:", max_size); + tprintf("Max neighbour size=%d for candidate line box at:", max_height); box.print(); } - if (max_size * kLineResidueSizeRatio < box.height()) { + if (max_height * kLineResidueSizeRatio < box.height()) { #ifndef GRAPHICS_DISABLED if (leaders_win_ != nullptr) { // We are debugging, so display deleted in pink blobs in the same @@ -582,7 +582,7 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) { BLOBNBOX_LIST* blobs = &block->blobs; int median_height = UpperQuartileCJKSize(gridsize(), blobs); int max_dist = static_cast(median_height * kCJKBrokenDistanceFraction); - int max_size = static_cast(median_height * kCJKAspectRatio); + int max_height = static_cast(median_height * kCJKAspectRatio); int num_fixed = 0; BLOBNBOX_IT blob_it(blobs); @@ -594,12 +594,12 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) { bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(), bbox.bottom()); if (debug) { - tprintf("Checking for Broken CJK (max size=%d):", max_size); + tprintf("Checking for Broken CJK (max size=%d):", max_height); bbox.print(); } // Generate a list of blobs that overlap or are near enough to merge. BLOBNBOX_CLIST overlapped_blobs; - AccumulateOverlaps(blob, debug, max_size, max_dist, + AccumulateOverlaps(blob, debug, max_height, max_dist, &bbox, &overlapped_blobs); if (!overlapped_blobs.empty()) { // There are overlapping blobs, so qualify them as being satisfactory @@ -1596,10 +1596,10 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n", x_gap, y_gap, total_distance); if (total_distance > - neighbour->owner()->median_size() * kMaxDiacriticDistanceRatio) { + neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) { if (debug) { tprintf("Neighbour with median size %d too far away:", - neighbour->owner()->median_size()); + neighbour->owner()->median_height()); neighbour->bounding_box().print(); } continue; // Diacritics must not be too distant. diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp index ba5d0a4f..f7ee3d27 100644 --- a/src/textord/tablefind.cpp +++ b/src/textord/tablefind.cpp @@ -81,7 +81,7 @@ const double kMaxBlobOverlapFactor = 4.0; const double kMaxTableCellXheight = 2.0; // Maximum line spacing between a table column header and column contents -// for merging the two (as a multiple of the partition's median_size). +// for merging the two (as a multiple of the partition's median_height). const int kMaxColumnHeaderDistance = 4; // Minimum ratio of num_table_partitions to num_text_partitions in a column @@ -493,7 +493,7 @@ bool TableFinder::AllowTextPartition(const ColPartition& part) const { const int median_area = global_median_xheight_ * global_median_blob_width_; const double kAreaPerBlobRequired = median_area * kAllowTextArea; // Keep comparisons strictly greater to disallow 0! - return part.median_size() > kHeightRequired && + return part.median_height() > kHeightRequired && part.median_width() > kWidthRequired && part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count(); } @@ -724,7 +724,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) { // table find runs. Alternative solution. // part->ComputeLimits(); if (part->IsTextType()) { - // xheight_stats.add(part->median_size(), part->boxes_count()); + // xheight_stats.add(part->median_height(), part->boxes_count()); // width_stats.add(part->median_width(), part->boxes_count()); // This loop can be removed when above issues are fixed. @@ -835,7 +835,7 @@ void TableFinder::MarkPartitionsUsingLocalInformation() { if (!part->IsTextType()) // Only consider text partitions continue; // Only consider partitions in dominant font size or smaller - if (part->median_size() > kMaxTableCellXheight * global_median_xheight_) + if (part->median_height() > kMaxTableCellXheight * global_median_xheight_) continue; // Mark partitions with a large gap, or no significant gap as // table partitions. @@ -863,7 +863,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { BLOBNBOX_C_IT it(part_boxes); // Check if this is a relatively small partition (such as a single word) if (part->bounding_box().width() < - kMinBoxesInTextPartition * part->median_size() && + kMinBoxesInTextPartition * part->median_height() && part_boxes->length() < kMinBoxesInTextPartition) return true; @@ -876,8 +876,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { // Text partition gap limits. If this is text (and not a table), // there should be at least one gap larger than min_gap and no gap // larger than max_gap. - const double max_gap = kMaxGapInTextPartition * part->median_size(); - const double min_gap = kMinMaxGapInTextPartition * part->median_size(); + const double max_gap = kMaxGapInTextPartition * part->median_height(); + const double min_gap = kMinMaxGapInTextPartition * part->median_height(); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* blob = it.data(); @@ -895,7 +895,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { // More likely case, the blobs slightly overlap. This can happen // with diacritics (accents) or broken alphabet symbols (characters). // Merge boxes together by taking max of right sides. - if (-gap < part->median_size() * kMaxBlobOverlapFactor) { + if (-gap < part->median_height() * kMaxBlobOverlapFactor) { previous_x1 = std::max(previous_x1, current_x1); continue; } @@ -918,7 +918,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { // Since no large gap was found, return false if the partition is too // long to be a data cell if (part->bounding_box().width() > - kMaxBoxesInDataPartition * part->median_size() || + kMaxBoxesInDataPartition * part->median_height() || part_boxes->length() > kMaxBoxesInDataPartition) return false; @@ -1051,7 +1051,7 @@ void TableFinder::FilterParagraphEndings() { // TODO(nbeato): This would be untrue if the text was right aligned. // How often is that? if (part->space_to_left() > - kMaxParagraphEndingLeftSpaceMultiple * part->median_size()) + kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) continue; // The line above it should be right aligned (assuming justified format). // Since we can't assume justified text, we compare whitespace to text. @@ -1647,7 +1647,7 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part, extra_space_to_left++; continue; } - int space_threshold = kSideSpaceMargin * part.median_size(); + int space_threshold = kSideSpaceMargin * part.median_height(); if (extra_part->space_to_right() > space_threshold) extra_space_to_right++; if (extra_part->space_to_left() > space_threshold) @@ -1672,7 +1672,7 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) { while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) { // Max distance to find a table heading. const int max_distance = kMaxColumnHeaderDistance * - neighbor->median_size(); + neighbor->median_height(); int table_top = table_box->top(); const TBOX& box = neighbor->bounding_box(); // Do not continue if the next box is way above