mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-11 23:19:04 +08:00
Merge pull request #1822 from stweil/clean
ColPartition: Rename median_size_ -> median_height_
This commit is contained in:
commit
e9b4e21e6f
@ -1177,12 +1177,12 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
|
||||
}
|
||||
}
|
||||
if (best_part != nullptr &&
|
||||
best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
|
||||
best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) {
|
||||
// Close enough to merge.
|
||||
if (debug) {
|
||||
tprintf("Adding noise blob with distance %d, thr=%g:box:",
|
||||
best_distance,
|
||||
kMaxDistToPartSizeRatio * best_part->median_size());
|
||||
kMaxDistToPartSizeRatio * best_part->median_height());
|
||||
blob->bounding_box().print();
|
||||
tprintf("To partition:");
|
||||
best_part->Print();
|
||||
|
@ -80,7 +80,7 @@ const int kMaxColorDistance = 900;
|
||||
// Vertical is the direction of logical vertical on the possibly skewed image.
|
||||
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
|
||||
: left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
|
||||
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_size_(0),
|
||||
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
|
||||
median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
|
||||
blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
|
||||
good_width_(false), good_column_(false),
|
||||
@ -163,7 +163,7 @@ ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type,
|
||||
part->bounding_box_ = TBOX(left, bottom, right, top);
|
||||
part->median_bottom_ = bottom;
|
||||
part->median_top_ = top;
|
||||
part->median_size_ = top - bottom;
|
||||
part->median_height_ = top - bottom;
|
||||
part->median_left_ = left;
|
||||
part->median_right_ = right;
|
||||
part->median_width_ = right - left;
|
||||
@ -416,7 +416,7 @@ bool ColPartition::MatchingSizes(const ColPartition& other) const {
|
||||
if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
|
||||
return !TabFind::DifferentSizes(median_width_, other.median_width_);
|
||||
else
|
||||
return !TabFind::DifferentSizes(median_size_, other.median_size_);
|
||||
return !TabFind::DifferentSizes(median_height_, other.median_height_);
|
||||
}
|
||||
|
||||
// Returns true if there is no tabstop violation in merging this and other.
|
||||
@ -904,14 +904,14 @@ void ColPartition::ComputeLimits() {
|
||||
blob_type() == BRT_POLYIMAGE) {
|
||||
median_top_ = bounding_box_.top();
|
||||
median_bottom_ = bounding_box_.bottom();
|
||||
median_size_ = bounding_box_.height();
|
||||
median_height_ = bounding_box_.height();
|
||||
median_left_ = bounding_box_.left();
|
||||
median_right_ = bounding_box_.right();
|
||||
median_width_ = bounding_box_.width();
|
||||
} else {
|
||||
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
||||
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
||||
STATS size_stats(0, bounding_box_.height() + 1);
|
||||
STATS height_stats(0, bounding_box_.height() + 1);
|
||||
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
||||
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
||||
STATS width_stats(0, bounding_box_.width() + 1);
|
||||
@ -922,7 +922,7 @@ void ColPartition::ComputeLimits() {
|
||||
int area = box.area();
|
||||
top_stats.add(box.top(), area);
|
||||
bottom_stats.add(box.bottom(), area);
|
||||
size_stats.add(box.height(), area);
|
||||
height_stats.add(box.height(), area);
|
||||
left_stats.add(box.left(), area);
|
||||
right_stats.add(box.right(), area);
|
||||
width_stats.add(box.width(), area);
|
||||
@ -930,7 +930,7 @@ void ColPartition::ComputeLimits() {
|
||||
}
|
||||
median_top_ = static_cast<int>(top_stats.median() + 0.5);
|
||||
median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
|
||||
median_size_ = static_cast<int>(size_stats.median() + 0.5);
|
||||
median_height_ = static_cast<int>(height_stats.median() + 0.5);
|
||||
median_left_ = static_cast<int>(left_stats.median() + 0.5);
|
||||
median_right_ = static_cast<int>(right_stats.median() + 0.5);
|
||||
median_width_ = static_cast<int>(width_stats.median() + 0.5);
|
||||
@ -1492,23 +1492,23 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
|
||||
" sizes %d %d %d\n",
|
||||
part->top_spacing(), part->bottom_spacing(),
|
||||
next_part->top_spacing(), next_part->bottom_spacing(),
|
||||
part->median_size(), next_part->median_size(),
|
||||
third_part != nullptr ? third_part->median_size() : 0);
|
||||
part->median_height(), next_part->median_height(),
|
||||
third_part != nullptr ? third_part->median_height() : 0);
|
||||
}
|
||||
// We can only consider adding the next line to the block if the sizes
|
||||
// match and the lines are close enough for their size.
|
||||
if (part->SizesSimilar(*next_part) &&
|
||||
next_part->median_size() * kMaxSameBlockLineSpacing >
|
||||
next_part->median_height() * kMaxSameBlockLineSpacing >
|
||||
part->bottom_spacing() &&
|
||||
part->median_size() * kMaxSameBlockLineSpacing >
|
||||
part->median_height() * kMaxSameBlockLineSpacing >
|
||||
part->top_spacing()) {
|
||||
// Even now, we can only add it as long as the third line doesn't
|
||||
// match in the same way and have a smaller bottom spacing.
|
||||
if (third_part == nullptr ||
|
||||
!next_part->SizesSimilar(*third_part) ||
|
||||
third_part->median_size() * kMaxSameBlockLineSpacing <=
|
||||
third_part->median_height() * kMaxSameBlockLineSpacing <=
|
||||
next_part->bottom_spacing() ||
|
||||
next_part->median_size() * kMaxSameBlockLineSpacing <=
|
||||
next_part->median_height() * kMaxSameBlockLineSpacing <=
|
||||
next_part->top_spacing() ||
|
||||
next_part->bottom_spacing() > part->bottom_spacing()) {
|
||||
// Add to the current block.
|
||||
@ -1532,7 +1532,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
|
||||
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
|
||||
part->top_spacing(), part->bottom_spacing(),
|
||||
next_part->top_spacing(), next_part->bottom_spacing(),
|
||||
part->median_size(), next_part->median_size());
|
||||
part->median_height(), next_part->median_height());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1647,7 +1647,7 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
|
||||
// put the average spacing in each partition, so we can just take the
|
||||
// linespacing from the first partition.
|
||||
int line_spacing = part->bottom_spacing();
|
||||
if (line_spacing < part->median_size())
|
||||
if (line_spacing < part->median_height())
|
||||
line_spacing = part->bounding_box().height();
|
||||
ICOORDELT_LIST vertices;
|
||||
ICOORDELT_IT vert_it(&vertices);
|
||||
@ -1715,7 +1715,7 @@ TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft,
|
||||
TO_ROW* ColPartition::MakeToRow() {
|
||||
BLOBNBOX_C_IT blob_it(&boxes_);
|
||||
TO_ROW* row = nullptr;
|
||||
int line_size = IsVerticalType() ? median_width_ : median_size_;
|
||||
int line_size = IsVerticalType() ? median_width_ : median_height_;
|
||||
// Add all the blobs to a single TO_ROW.
|
||||
for (; !blob_it.empty(); blob_it.forward()) {
|
||||
BLOBNBOX* blob = blob_it.extract();
|
||||
@ -1746,7 +1746,7 @@ ColPartition* ColPartition::ShallowCopy() const {
|
||||
sizeof(special_blobs_densities_));
|
||||
part->median_bottom_ = median_bottom_;
|
||||
part->median_top_ = median_top_;
|
||||
part->median_size_ = median_size_;
|
||||
part->median_height_ = median_height_;
|
||||
part->median_left_ = median_left_;
|
||||
part->median_right_ = median_right_;
|
||||
part->median_width_ = median_width_;
|
||||
@ -2398,15 +2398,15 @@ int ColPartition::BottomSpacingMargin(int resolution) const {
|
||||
// Returns a suitable spacing margin that can be applied to tops of
|
||||
// text lines, based on the resolution and the stored side_step_.
|
||||
int ColPartition::TopSpacingMargin(int resolution) const {
|
||||
return static_cast<int>(kMaxTopSpacingFraction * median_size_ + 0.5) +
|
||||
return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
|
||||
BottomSpacingMargin(resolution);
|
||||
}
|
||||
|
||||
// Returns true if the median text sizes of this and other agree to within
|
||||
// a reasonable multiplicative factor.
|
||||
bool ColPartition::SizesSimilar(const ColPartition& other) const {
|
||||
return median_size_ <= other.median_size_ * kMaxSizeRatio &&
|
||||
other.median_size_ <= median_size_ * kMaxSizeRatio;
|
||||
return median_height_ <= other.median_height_ * kMaxSizeRatio &&
|
||||
other.median_height_ <= median_height_ * kMaxSizeRatio;
|
||||
}
|
||||
|
||||
// Helper updates margin_left and margin_right, being the bounds of the left
|
||||
|
@ -134,11 +134,11 @@ class ColPartition : public ELIST2_LINK {
|
||||
int median_right() const {
|
||||
return median_right_;
|
||||
}
|
||||
int median_size() const {
|
||||
return median_size_;
|
||||
int median_height() const {
|
||||
return median_height_;
|
||||
}
|
||||
void set_median_size(int size) {
|
||||
median_size_ = size;
|
||||
void set_median_height(int height) {
|
||||
median_height_ = height;
|
||||
}
|
||||
int median_width() const {
|
||||
return median_width_;
|
||||
@ -839,8 +839,7 @@ class ColPartition : public ELIST2_LINK {
|
||||
int median_bottom_;
|
||||
int median_top_;
|
||||
// Median height of blobs in this partition.
|
||||
// TODO(rays) rename median_height_.
|
||||
int median_size_;
|
||||
int median_height_;
|
||||
// Median left and right of blobs in this partition.
|
||||
int median_left_;
|
||||
int median_right_;
|
||||
|
@ -694,7 +694,7 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
|
||||
// Get metrics from the row that will be used for the block.
|
||||
TBOX box = part->bounding_box();
|
||||
int median_width = part->median_width();
|
||||
int median_height = part->median_size();
|
||||
int median_height = part->median_height();
|
||||
// Turn the partition into a TO_ROW.
|
||||
TO_ROW* row = part->MakeToRow();
|
||||
if (row == nullptr) {
|
||||
|
@ -297,21 +297,21 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
|
||||
box.bottom());
|
||||
// Find the largest object in the search box not equal to bbox.
|
||||
BlobGridSearch rsearch(this);
|
||||
int max_size = 0;
|
||||
int max_height = 0;
|
||||
BLOBNBOX* n;
|
||||
rsearch.StartRectSearch(search_box);
|
||||
while ((n = rsearch.NextRectSearch()) != nullptr) {
|
||||
if (n == bbox) continue;
|
||||
TBOX nbox = n->bounding_box();
|
||||
if (nbox.height() > max_size) {
|
||||
max_size = nbox.height();
|
||||
if (nbox.height() > max_height) {
|
||||
max_height = nbox.height();
|
||||
}
|
||||
}
|
||||
if (debug) {
|
||||
tprintf("Max neighbour size=%d for candidate line box at:", max_size);
|
||||
tprintf("Max neighbour size=%d for candidate line box at:", max_height);
|
||||
box.print();
|
||||
}
|
||||
if (max_size * kLineResidueSizeRatio < box.height()) {
|
||||
if (max_height * kLineResidueSizeRatio < box.height()) {
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
if (leaders_win_ != nullptr) {
|
||||
// We are debugging, so display deleted in pink blobs in the same
|
||||
@ -582,7 +582,7 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
|
||||
BLOBNBOX_LIST* blobs = &block->blobs;
|
||||
int median_height = UpperQuartileCJKSize(gridsize(), blobs);
|
||||
int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
|
||||
int max_size = static_cast<int>(median_height * kCJKAspectRatio);
|
||||
int max_height = static_cast<int>(median_height * kCJKAspectRatio);
|
||||
int num_fixed = 0;
|
||||
BLOBNBOX_IT blob_it(blobs);
|
||||
|
||||
@ -594,12 +594,12 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
|
||||
bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
|
||||
bbox.bottom());
|
||||
if (debug) {
|
||||
tprintf("Checking for Broken CJK (max size=%d):", max_size);
|
||||
tprintf("Checking for Broken CJK (max size=%d):", max_height);
|
||||
bbox.print();
|
||||
}
|
||||
// Generate a list of blobs that overlap or are near enough to merge.
|
||||
BLOBNBOX_CLIST overlapped_blobs;
|
||||
AccumulateOverlaps(blob, debug, max_size, max_dist,
|
||||
AccumulateOverlaps(blob, debug, max_height, max_dist,
|
||||
&bbox, &overlapped_blobs);
|
||||
if (!overlapped_blobs.empty()) {
|
||||
// There are overlapping blobs, so qualify them as being satisfactory
|
||||
@ -1596,10 +1596,10 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) {
|
||||
if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
|
||||
x_gap, y_gap, total_distance);
|
||||
if (total_distance >
|
||||
neighbour->owner()->median_size() * kMaxDiacriticDistanceRatio) {
|
||||
neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) {
|
||||
if (debug) {
|
||||
tprintf("Neighbour with median size %d too far away:",
|
||||
neighbour->owner()->median_size());
|
||||
neighbour->owner()->median_height());
|
||||
neighbour->bounding_box().print();
|
||||
}
|
||||
continue; // Diacritics must not be too distant.
|
||||
|
@ -81,7 +81,7 @@ const double kMaxBlobOverlapFactor = 4.0;
|
||||
const double kMaxTableCellXheight = 2.0;
|
||||
|
||||
// Maximum line spacing between a table column header and column contents
|
||||
// for merging the two (as a multiple of the partition's median_size).
|
||||
// for merging the two (as a multiple of the partition's median_height).
|
||||
const int kMaxColumnHeaderDistance = 4;
|
||||
|
||||
// Minimum ratio of num_table_partitions to num_text_partitions in a column
|
||||
@ -493,7 +493,7 @@ bool TableFinder::AllowTextPartition(const ColPartition& part) const {
|
||||
const int median_area = global_median_xheight_ * global_median_blob_width_;
|
||||
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
|
||||
// Keep comparisons strictly greater to disallow 0!
|
||||
return part.median_size() > kHeightRequired &&
|
||||
return part.median_height() > kHeightRequired &&
|
||||
part.median_width() > kWidthRequired &&
|
||||
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
|
||||
}
|
||||
@ -724,7 +724,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) {
|
||||
// table find runs. Alternative solution.
|
||||
// part->ComputeLimits();
|
||||
if (part->IsTextType()) {
|
||||
// xheight_stats.add(part->median_size(), part->boxes_count());
|
||||
// xheight_stats.add(part->median_height(), part->boxes_count());
|
||||
// width_stats.add(part->median_width(), part->boxes_count());
|
||||
|
||||
// This loop can be removed when above issues are fixed.
|
||||
@ -835,7 +835,7 @@ void TableFinder::MarkPartitionsUsingLocalInformation() {
|
||||
if (!part->IsTextType()) // Only consider text partitions
|
||||
continue;
|
||||
// Only consider partitions in dominant font size or smaller
|
||||
if (part->median_size() > kMaxTableCellXheight * global_median_xheight_)
|
||||
if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
|
||||
continue;
|
||||
// Mark partitions with a large gap, or no significant gap as
|
||||
// table partitions.
|
||||
@ -863,7 +863,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
||||
BLOBNBOX_C_IT it(part_boxes);
|
||||
// Check if this is a relatively small partition (such as a single word)
|
||||
if (part->bounding_box().width() <
|
||||
kMinBoxesInTextPartition * part->median_size() &&
|
||||
kMinBoxesInTextPartition * part->median_height() &&
|
||||
part_boxes->length() < kMinBoxesInTextPartition)
|
||||
return true;
|
||||
|
||||
@ -876,8 +876,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
||||
// Text partition gap limits. If this is text (and not a table),
|
||||
// there should be at least one gap larger than min_gap and no gap
|
||||
// larger than max_gap.
|
||||
const double max_gap = kMaxGapInTextPartition * part->median_size();
|
||||
const double min_gap = kMinMaxGapInTextPartition * part->median_size();
|
||||
const double max_gap = kMaxGapInTextPartition * part->median_height();
|
||||
const double min_gap = kMinMaxGapInTextPartition * part->median_height();
|
||||
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
BLOBNBOX* blob = it.data();
|
||||
@ -895,7 +895,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
||||
// More likely case, the blobs slightly overlap. This can happen
|
||||
// with diacritics (accents) or broken alphabet symbols (characters).
|
||||
// Merge boxes together by taking max of right sides.
|
||||
if (-gap < part->median_size() * kMaxBlobOverlapFactor) {
|
||||
if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
|
||||
previous_x1 = std::max(previous_x1, current_x1);
|
||||
continue;
|
||||
}
|
||||
@ -918,7 +918,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
||||
// Since no large gap was found, return false if the partition is too
|
||||
// long to be a data cell
|
||||
if (part->bounding_box().width() >
|
||||
kMaxBoxesInDataPartition * part->median_size() ||
|
||||
kMaxBoxesInDataPartition * part->median_height() ||
|
||||
part_boxes->length() > kMaxBoxesInDataPartition)
|
||||
return false;
|
||||
|
||||
@ -1051,7 +1051,7 @@ void TableFinder::FilterParagraphEndings() {
|
||||
// TODO(nbeato): This would be untrue if the text was right aligned.
|
||||
// How often is that?
|
||||
if (part->space_to_left() >
|
||||
kMaxParagraphEndingLeftSpaceMultiple * part->median_size())
|
||||
kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
|
||||
continue;
|
||||
// The line above it should be right aligned (assuming justified format).
|
||||
// Since we can't assume justified text, we compare whitespace to text.
|
||||
@ -1647,7 +1647,7 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part,
|
||||
extra_space_to_left++;
|
||||
continue;
|
||||
}
|
||||
int space_threshold = kSideSpaceMargin * part.median_size();
|
||||
int space_threshold = kSideSpaceMargin * part.median_height();
|
||||
if (extra_part->space_to_right() > space_threshold)
|
||||
extra_space_to_right++;
|
||||
if (extra_part->space_to_left() > space_threshold)
|
||||
@ -1672,7 +1672,7 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) {
|
||||
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
|
||||
// Max distance to find a table heading.
|
||||
const int max_distance = kMaxColumnHeaderDistance *
|
||||
neighbor->median_size();
|
||||
neighbor->median_height();
|
||||
int table_top = table_box->top();
|
||||
const TBOX& box = neighbor->bounding_box();
|
||||
// Do not continue if the next box is way above
|
||||
|
Loading…
Reference in New Issue
Block a user