ColPartition: Rename median_size_ -> median_height_

This implements a TODO. Rename also some related items.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2018-08-03 08:46:38 +02:00
parent 4370714779
commit 6a0f8e8c07
6 changed files with 50 additions and 51 deletions

View File

@ -1177,12 +1177,12 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
}
}
if (best_part != nullptr &&
best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) {
// Close enough to merge.
if (debug) {
tprintf("Adding noise blob with distance %d, thr=%g:box:",
best_distance,
kMaxDistToPartSizeRatio * best_part->median_size());
kMaxDistToPartSizeRatio * best_part->median_height());
blob->bounding_box().print();
tprintf("To partition:");
best_part->Print();

View File

@ -80,7 +80,7 @@ const int kMaxColorDistance = 900;
// Vertical is the direction of logical vertical on the possibly skewed image.
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
: left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_size_(0),
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
good_width_(false), good_column_(false),
@ -163,7 +163,7 @@ ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type,
part->bounding_box_ = TBOX(left, bottom, right, top);
part->median_bottom_ = bottom;
part->median_top_ = top;
part->median_size_ = top - bottom;
part->median_height_ = top - bottom;
part->median_left_ = left;
part->median_right_ = right;
part->median_width_ = right - left;
@ -416,7 +416,7 @@ bool ColPartition::MatchingSizes(const ColPartition& other) const {
if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
return !TabFind::DifferentSizes(median_width_, other.median_width_);
else
return !TabFind::DifferentSizes(median_size_, other.median_size_);
return !TabFind::DifferentSizes(median_height_, other.median_height_);
}
// Returns true if there is no tabstop violation in merging this and other.
@ -904,14 +904,14 @@ void ColPartition::ComputeLimits() {
blob_type() == BRT_POLYIMAGE) {
median_top_ = bounding_box_.top();
median_bottom_ = bounding_box_.bottom();
median_size_ = bounding_box_.height();
median_height_ = bounding_box_.height();
median_left_ = bounding_box_.left();
median_right_ = bounding_box_.right();
median_width_ = bounding_box_.width();
} else {
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
STATS size_stats(0, bounding_box_.height() + 1);
STATS height_stats(0, bounding_box_.height() + 1);
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
STATS width_stats(0, bounding_box_.width() + 1);
@ -922,7 +922,7 @@ void ColPartition::ComputeLimits() {
int area = box.area();
top_stats.add(box.top(), area);
bottom_stats.add(box.bottom(), area);
size_stats.add(box.height(), area);
height_stats.add(box.height(), area);
left_stats.add(box.left(), area);
right_stats.add(box.right(), area);
width_stats.add(box.width(), area);
@ -930,7 +930,7 @@ void ColPartition::ComputeLimits() {
}
median_top_ = static_cast<int>(top_stats.median() + 0.5);
median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
median_size_ = static_cast<int>(size_stats.median() + 0.5);
median_height_ = static_cast<int>(height_stats.median() + 0.5);
median_left_ = static_cast<int>(left_stats.median() + 0.5);
median_right_ = static_cast<int>(right_stats.median() + 0.5);
median_width_ = static_cast<int>(width_stats.median() + 0.5);
@ -1492,23 +1492,23 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
" sizes %d %d %d\n",
part->top_spacing(), part->bottom_spacing(),
next_part->top_spacing(), next_part->bottom_spacing(),
part->median_size(), next_part->median_size(),
third_part != nullptr ? third_part->median_size() : 0);
part->median_height(), next_part->median_height(),
third_part != nullptr ? third_part->median_height() : 0);
}
// We can only consider adding the next line to the block if the sizes
// match and the lines are close enough for their size.
if (part->SizesSimilar(*next_part) &&
next_part->median_size() * kMaxSameBlockLineSpacing >
next_part->median_height() * kMaxSameBlockLineSpacing >
part->bottom_spacing() &&
part->median_size() * kMaxSameBlockLineSpacing >
part->median_height() * kMaxSameBlockLineSpacing >
part->top_spacing()) {
// Even now, we can only add it as long as the third line doesn't
// match in the same way and have a smaller bottom spacing.
if (third_part == nullptr ||
!next_part->SizesSimilar(*third_part) ||
third_part->median_size() * kMaxSameBlockLineSpacing <=
third_part->median_height() * kMaxSameBlockLineSpacing <=
next_part->bottom_spacing() ||
next_part->median_size() * kMaxSameBlockLineSpacing <=
next_part->median_height() * kMaxSameBlockLineSpacing <=
next_part->top_spacing() ||
next_part->bottom_spacing() > part->bottom_spacing()) {
// Add to the current block.
@ -1532,7 +1532,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
part->top_spacing(), part->bottom_spacing(),
next_part->top_spacing(), next_part->bottom_spacing(),
part->median_size(), next_part->median_size());
part->median_height(), next_part->median_height());
}
}
}
@ -1647,7 +1647,7 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
// put the average spacing in each partition, so we can just take the
// linespacing from the first partition.
int line_spacing = part->bottom_spacing();
if (line_spacing < part->median_size())
if (line_spacing < part->median_height())
line_spacing = part->bounding_box().height();
ICOORDELT_LIST vertices;
ICOORDELT_IT vert_it(&vertices);
@ -1715,7 +1715,7 @@ TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft,
TO_ROW* ColPartition::MakeToRow() {
BLOBNBOX_C_IT blob_it(&boxes_);
TO_ROW* row = nullptr;
int line_size = IsVerticalType() ? median_width_ : median_size_;
int line_size = IsVerticalType() ? median_width_ : median_height_;
// Add all the blobs to a single TO_ROW.
for (; !blob_it.empty(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.extract();
@ -1746,7 +1746,7 @@ ColPartition* ColPartition::ShallowCopy() const {
sizeof(special_blobs_densities_));
part->median_bottom_ = median_bottom_;
part->median_top_ = median_top_;
part->median_size_ = median_size_;
part->median_height_ = median_height_;
part->median_left_ = median_left_;
part->median_right_ = median_right_;
part->median_width_ = median_width_;
@ -2398,15 +2398,15 @@ int ColPartition::BottomSpacingMargin(int resolution) const {
// Returns a suitable spacing margin that can be applied to tops of
// text lines, based on the resolution and the stored side_step_.
int ColPartition::TopSpacingMargin(int resolution) const {
return static_cast<int>(kMaxTopSpacingFraction * median_size_ + 0.5) +
return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
BottomSpacingMargin(resolution);
}
// Returns true if the median text sizes of this and other agree to within
// a reasonable multiplicative factor.
bool ColPartition::SizesSimilar(const ColPartition& other) const {
return median_size_ <= other.median_size_ * kMaxSizeRatio &&
other.median_size_ <= median_size_ * kMaxSizeRatio;
return median_height_ <= other.median_height_ * kMaxSizeRatio &&
other.median_height_ <= median_height_ * kMaxSizeRatio;
}
// Helper updates margin_left and margin_right, being the bounds of the left

View File

@ -134,11 +134,11 @@ class ColPartition : public ELIST2_LINK {
int median_right() const {
return median_right_;
}
int median_size() const {
return median_size_;
int median_height() const {
return median_height_;
}
void set_median_size(int size) {
median_size_ = size;
void set_median_height(int height) {
median_height_ = height;
}
int median_width() const {
return median_width_;
@ -839,8 +839,7 @@ class ColPartition : public ELIST2_LINK {
int median_bottom_;
int median_top_;
// Median height of blobs in this partition.
// TODO(rays) rename median_height_.
int median_size_;
int median_height_;
// Median left and right of blobs in this partition.
int median_left_;
int median_right_;

View File

@ -694,7 +694,7 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
// Get metrics from the row that will be used for the block.
TBOX box = part->bounding_box();
int median_width = part->median_width();
int median_height = part->median_size();
int median_height = part->median_height();
// Turn the partition into a TO_ROW.
TO_ROW* row = part->MakeToRow();
if (row == nullptr) {

View File

@ -297,21 +297,21 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
box.bottom());
// Find the largest object in the search box not equal to bbox.
BlobGridSearch rsearch(this);
int max_size = 0;
int max_height = 0;
BLOBNBOX* n;
rsearch.StartRectSearch(search_box);
while ((n = rsearch.NextRectSearch()) != nullptr) {
if (n == bbox) continue;
TBOX nbox = n->bounding_box();
if (nbox.height() > max_size) {
max_size = nbox.height();
if (nbox.height() > max_height) {
max_height = nbox.height();
}
}
if (debug) {
tprintf("Max neighbour size=%d for candidate line box at:", max_size);
tprintf("Max neighbour size=%d for candidate line box at:", max_height);
box.print();
}
if (max_size * kLineResidueSizeRatio < box.height()) {
if (max_height * kLineResidueSizeRatio < box.height()) {
#ifndef GRAPHICS_DISABLED
if (leaders_win_ != nullptr) {
// We are debugging, so display deleted in pink blobs in the same
@ -582,7 +582,7 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
BLOBNBOX_LIST* blobs = &block->blobs;
int median_height = UpperQuartileCJKSize(gridsize(), blobs);
int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
int max_size = static_cast<int>(median_height * kCJKAspectRatio);
int max_height = static_cast<int>(median_height * kCJKAspectRatio);
int num_fixed = 0;
BLOBNBOX_IT blob_it(blobs);
@ -594,12 +594,12 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
bbox.bottom());
if (debug) {
tprintf("Checking for Broken CJK (max size=%d):", max_size);
tprintf("Checking for Broken CJK (max size=%d):", max_height);
bbox.print();
}
// Generate a list of blobs that overlap or are near enough to merge.
BLOBNBOX_CLIST overlapped_blobs;
AccumulateOverlaps(blob, debug, max_size, max_dist,
AccumulateOverlaps(blob, debug, max_height, max_dist,
&bbox, &overlapped_blobs);
if (!overlapped_blobs.empty()) {
// There are overlapping blobs, so qualify them as being satisfactory
@ -1596,10 +1596,10 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) {
if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
x_gap, y_gap, total_distance);
if (total_distance >
neighbour->owner()->median_size() * kMaxDiacriticDistanceRatio) {
neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) {
if (debug) {
tprintf("Neighbour with median size %d too far away:",
neighbour->owner()->median_size());
neighbour->owner()->median_height());
neighbour->bounding_box().print();
}
continue; // Diacritics must not be too distant.

View File

@ -81,7 +81,7 @@ const double kMaxBlobOverlapFactor = 4.0;
const double kMaxTableCellXheight = 2.0;
// Maximum line spacing between a table column header and column contents
// for merging the two (as a multiple of the partition's median_size).
// for merging the two (as a multiple of the partition's median_height).
const int kMaxColumnHeaderDistance = 4;
// Minimum ratio of num_table_partitions to num_text_partitions in a column
@ -493,7 +493,7 @@ bool TableFinder::AllowTextPartition(const ColPartition& part) const {
const int median_area = global_median_xheight_ * global_median_blob_width_;
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
// Keep comparisons strictly greater to disallow 0!
return part.median_size() > kHeightRequired &&
return part.median_height() > kHeightRequired &&
part.median_width() > kWidthRequired &&
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
}
@ -724,7 +724,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) {
// table find runs. Alternative solution.
// part->ComputeLimits();
if (part->IsTextType()) {
// xheight_stats.add(part->median_size(), part->boxes_count());
// xheight_stats.add(part->median_height(), part->boxes_count());
// width_stats.add(part->median_width(), part->boxes_count());
// This loop can be removed when above issues are fixed.
@ -835,7 +835,7 @@ void TableFinder::MarkPartitionsUsingLocalInformation() {
if (!part->IsTextType()) // Only consider text partitions
continue;
// Only consider partitions in dominant font size or smaller
if (part->median_size() > kMaxTableCellXheight * global_median_xheight_)
if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
continue;
// Mark partitions with a large gap, or no significant gap as
// table partitions.
@ -863,7 +863,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
BLOBNBOX_C_IT it(part_boxes);
// Check if this is a relatively small partition (such as a single word)
if (part->bounding_box().width() <
kMinBoxesInTextPartition * part->median_size() &&
kMinBoxesInTextPartition * part->median_height() &&
part_boxes->length() < kMinBoxesInTextPartition)
return true;
@ -876,8 +876,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
// Text partition gap limits. If this is text (and not a table),
// there should be at least one gap larger than min_gap and no gap
// larger than max_gap.
const double max_gap = kMaxGapInTextPartition * part->median_size();
const double min_gap = kMinMaxGapInTextPartition * part->median_size();
const double max_gap = kMaxGapInTextPartition * part->median_height();
const double min_gap = kMinMaxGapInTextPartition * part->median_height();
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
@ -895,7 +895,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
// More likely case, the blobs slightly overlap. This can happen
// with diacritics (accents) or broken alphabet symbols (characters).
// Merge boxes together by taking max of right sides.
if (-gap < part->median_size() * kMaxBlobOverlapFactor) {
if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
previous_x1 = std::max(previous_x1, current_x1);
continue;
}
@ -918,7 +918,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
// Since no large gap was found, return false if the partition is too
// long to be a data cell
if (part->bounding_box().width() >
kMaxBoxesInDataPartition * part->median_size() ||
kMaxBoxesInDataPartition * part->median_height() ||
part_boxes->length() > kMaxBoxesInDataPartition)
return false;
@ -1051,7 +1051,7 @@ void TableFinder::FilterParagraphEndings() {
// TODO(nbeato): This would be untrue if the text was right aligned.
// How often is that?
if (part->space_to_left() >
kMaxParagraphEndingLeftSpaceMultiple * part->median_size())
kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
continue;
// The line above it should be right aligned (assuming justified format).
// Since we can't assume justified text, we compare whitespace to text.
@ -1647,7 +1647,7 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part,
extra_space_to_left++;
continue;
}
int space_threshold = kSideSpaceMargin * part.median_size();
int space_threshold = kSideSpaceMargin * part.median_height();
if (extra_part->space_to_right() > space_threshold)
extra_space_to_right++;
if (extra_part->space_to_left() > space_threshold)
@ -1672,7 +1672,7 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) {
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
// Max distance to find a table heading.
const int max_distance = kMaxColumnHeaderDistance *
neighbor->median_size();
neighbor->median_height();
int table_top = table_box->top();
const TBOX& box = neighbor->bounding_box();
// Do not continue if the next box is way above