mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-05 02:47:00 +08:00
ColPartition: Rename median_size_ -> median_height_
This implements a TODO. Rename also some related items. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
4370714779
commit
6a0f8e8c07
@ -1177,12 +1177,12 @@ void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (best_part != nullptr &&
|
if (best_part != nullptr &&
|
||||||
best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
|
best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) {
|
||||||
// Close enough to merge.
|
// Close enough to merge.
|
||||||
if (debug) {
|
if (debug) {
|
||||||
tprintf("Adding noise blob with distance %d, thr=%g:box:",
|
tprintf("Adding noise blob with distance %d, thr=%g:box:",
|
||||||
best_distance,
|
best_distance,
|
||||||
kMaxDistToPartSizeRatio * best_part->median_size());
|
kMaxDistToPartSizeRatio * best_part->median_height());
|
||||||
blob->bounding_box().print();
|
blob->bounding_box().print();
|
||||||
tprintf("To partition:");
|
tprintf("To partition:");
|
||||||
best_part->Print();
|
best_part->Print();
|
||||||
|
@ -80,7 +80,7 @@ const int kMaxColorDistance = 900;
|
|||||||
// Vertical is the direction of logical vertical on the possibly skewed image.
|
// Vertical is the direction of logical vertical on the possibly skewed image.
|
||||||
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
|
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
|
||||||
: left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
|
: left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
|
||||||
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_size_(0),
|
median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
|
||||||
median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
|
median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
|
||||||
blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
|
blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
|
||||||
good_width_(false), good_column_(false),
|
good_width_(false), good_column_(false),
|
||||||
@ -163,7 +163,7 @@ ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type,
|
|||||||
part->bounding_box_ = TBOX(left, bottom, right, top);
|
part->bounding_box_ = TBOX(left, bottom, right, top);
|
||||||
part->median_bottom_ = bottom;
|
part->median_bottom_ = bottom;
|
||||||
part->median_top_ = top;
|
part->median_top_ = top;
|
||||||
part->median_size_ = top - bottom;
|
part->median_height_ = top - bottom;
|
||||||
part->median_left_ = left;
|
part->median_left_ = left;
|
||||||
part->median_right_ = right;
|
part->median_right_ = right;
|
||||||
part->median_width_ = right - left;
|
part->median_width_ = right - left;
|
||||||
@ -416,7 +416,7 @@ bool ColPartition::MatchingSizes(const ColPartition& other) const {
|
|||||||
if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
|
if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
|
||||||
return !TabFind::DifferentSizes(median_width_, other.median_width_);
|
return !TabFind::DifferentSizes(median_width_, other.median_width_);
|
||||||
else
|
else
|
||||||
return !TabFind::DifferentSizes(median_size_, other.median_size_);
|
return !TabFind::DifferentSizes(median_height_, other.median_height_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if there is no tabstop violation in merging this and other.
|
// Returns true if there is no tabstop violation in merging this and other.
|
||||||
@ -904,14 +904,14 @@ void ColPartition::ComputeLimits() {
|
|||||||
blob_type() == BRT_POLYIMAGE) {
|
blob_type() == BRT_POLYIMAGE) {
|
||||||
median_top_ = bounding_box_.top();
|
median_top_ = bounding_box_.top();
|
||||||
median_bottom_ = bounding_box_.bottom();
|
median_bottom_ = bounding_box_.bottom();
|
||||||
median_size_ = bounding_box_.height();
|
median_height_ = bounding_box_.height();
|
||||||
median_left_ = bounding_box_.left();
|
median_left_ = bounding_box_.left();
|
||||||
median_right_ = bounding_box_.right();
|
median_right_ = bounding_box_.right();
|
||||||
median_width_ = bounding_box_.width();
|
median_width_ = bounding_box_.width();
|
||||||
} else {
|
} else {
|
||||||
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
||||||
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
|
||||||
STATS size_stats(0, bounding_box_.height() + 1);
|
STATS height_stats(0, bounding_box_.height() + 1);
|
||||||
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
||||||
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
|
||||||
STATS width_stats(0, bounding_box_.width() + 1);
|
STATS width_stats(0, bounding_box_.width() + 1);
|
||||||
@ -922,7 +922,7 @@ void ColPartition::ComputeLimits() {
|
|||||||
int area = box.area();
|
int area = box.area();
|
||||||
top_stats.add(box.top(), area);
|
top_stats.add(box.top(), area);
|
||||||
bottom_stats.add(box.bottom(), area);
|
bottom_stats.add(box.bottom(), area);
|
||||||
size_stats.add(box.height(), area);
|
height_stats.add(box.height(), area);
|
||||||
left_stats.add(box.left(), area);
|
left_stats.add(box.left(), area);
|
||||||
right_stats.add(box.right(), area);
|
right_stats.add(box.right(), area);
|
||||||
width_stats.add(box.width(), area);
|
width_stats.add(box.width(), area);
|
||||||
@ -930,7 +930,7 @@ void ColPartition::ComputeLimits() {
|
|||||||
}
|
}
|
||||||
median_top_ = static_cast<int>(top_stats.median() + 0.5);
|
median_top_ = static_cast<int>(top_stats.median() + 0.5);
|
||||||
median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
|
median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
|
||||||
median_size_ = static_cast<int>(size_stats.median() + 0.5);
|
median_height_ = static_cast<int>(height_stats.median() + 0.5);
|
||||||
median_left_ = static_cast<int>(left_stats.median() + 0.5);
|
median_left_ = static_cast<int>(left_stats.median() + 0.5);
|
||||||
median_right_ = static_cast<int>(right_stats.median() + 0.5);
|
median_right_ = static_cast<int>(right_stats.median() + 0.5);
|
||||||
median_width_ = static_cast<int>(width_stats.median() + 0.5);
|
median_width_ = static_cast<int>(width_stats.median() + 0.5);
|
||||||
@ -1492,23 +1492,23 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
|
|||||||
" sizes %d %d %d\n",
|
" sizes %d %d %d\n",
|
||||||
part->top_spacing(), part->bottom_spacing(),
|
part->top_spacing(), part->bottom_spacing(),
|
||||||
next_part->top_spacing(), next_part->bottom_spacing(),
|
next_part->top_spacing(), next_part->bottom_spacing(),
|
||||||
part->median_size(), next_part->median_size(),
|
part->median_height(), next_part->median_height(),
|
||||||
third_part != nullptr ? third_part->median_size() : 0);
|
third_part != nullptr ? third_part->median_height() : 0);
|
||||||
}
|
}
|
||||||
// We can only consider adding the next line to the block if the sizes
|
// We can only consider adding the next line to the block if the sizes
|
||||||
// match and the lines are close enough for their size.
|
// match and the lines are close enough for their size.
|
||||||
if (part->SizesSimilar(*next_part) &&
|
if (part->SizesSimilar(*next_part) &&
|
||||||
next_part->median_size() * kMaxSameBlockLineSpacing >
|
next_part->median_height() * kMaxSameBlockLineSpacing >
|
||||||
part->bottom_spacing() &&
|
part->bottom_spacing() &&
|
||||||
part->median_size() * kMaxSameBlockLineSpacing >
|
part->median_height() * kMaxSameBlockLineSpacing >
|
||||||
part->top_spacing()) {
|
part->top_spacing()) {
|
||||||
// Even now, we can only add it as long as the third line doesn't
|
// Even now, we can only add it as long as the third line doesn't
|
||||||
// match in the same way and have a smaller bottom spacing.
|
// match in the same way and have a smaller bottom spacing.
|
||||||
if (third_part == nullptr ||
|
if (third_part == nullptr ||
|
||||||
!next_part->SizesSimilar(*third_part) ||
|
!next_part->SizesSimilar(*third_part) ||
|
||||||
third_part->median_size() * kMaxSameBlockLineSpacing <=
|
third_part->median_height() * kMaxSameBlockLineSpacing <=
|
||||||
next_part->bottom_spacing() ||
|
next_part->bottom_spacing() ||
|
||||||
next_part->median_size() * kMaxSameBlockLineSpacing <=
|
next_part->median_height() * kMaxSameBlockLineSpacing <=
|
||||||
next_part->top_spacing() ||
|
next_part->top_spacing() ||
|
||||||
next_part->bottom_spacing() > part->bottom_spacing()) {
|
next_part->bottom_spacing() > part->bottom_spacing()) {
|
||||||
// Add to the current block.
|
// Add to the current block.
|
||||||
@ -1532,7 +1532,7 @@ void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
|
|||||||
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
|
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
|
||||||
part->top_spacing(), part->bottom_spacing(),
|
part->top_spacing(), part->bottom_spacing(),
|
||||||
next_part->top_spacing(), next_part->bottom_spacing(),
|
next_part->top_spacing(), next_part->bottom_spacing(),
|
||||||
part->median_size(), next_part->median_size());
|
part->median_height(), next_part->median_height());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1647,7 +1647,7 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
|
|||||||
// put the average spacing in each partition, so we can just take the
|
// put the average spacing in each partition, so we can just take the
|
||||||
// linespacing from the first partition.
|
// linespacing from the first partition.
|
||||||
int line_spacing = part->bottom_spacing();
|
int line_spacing = part->bottom_spacing();
|
||||||
if (line_spacing < part->median_size())
|
if (line_spacing < part->median_height())
|
||||||
line_spacing = part->bounding_box().height();
|
line_spacing = part->bounding_box().height();
|
||||||
ICOORDELT_LIST vertices;
|
ICOORDELT_LIST vertices;
|
||||||
ICOORDELT_IT vert_it(&vertices);
|
ICOORDELT_IT vert_it(&vertices);
|
||||||
@ -1715,7 +1715,7 @@ TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft,
|
|||||||
TO_ROW* ColPartition::MakeToRow() {
|
TO_ROW* ColPartition::MakeToRow() {
|
||||||
BLOBNBOX_C_IT blob_it(&boxes_);
|
BLOBNBOX_C_IT blob_it(&boxes_);
|
||||||
TO_ROW* row = nullptr;
|
TO_ROW* row = nullptr;
|
||||||
int line_size = IsVerticalType() ? median_width_ : median_size_;
|
int line_size = IsVerticalType() ? median_width_ : median_height_;
|
||||||
// Add all the blobs to a single TO_ROW.
|
// Add all the blobs to a single TO_ROW.
|
||||||
for (; !blob_it.empty(); blob_it.forward()) {
|
for (; !blob_it.empty(); blob_it.forward()) {
|
||||||
BLOBNBOX* blob = blob_it.extract();
|
BLOBNBOX* blob = blob_it.extract();
|
||||||
@ -1746,7 +1746,7 @@ ColPartition* ColPartition::ShallowCopy() const {
|
|||||||
sizeof(special_blobs_densities_));
|
sizeof(special_blobs_densities_));
|
||||||
part->median_bottom_ = median_bottom_;
|
part->median_bottom_ = median_bottom_;
|
||||||
part->median_top_ = median_top_;
|
part->median_top_ = median_top_;
|
||||||
part->median_size_ = median_size_;
|
part->median_height_ = median_height_;
|
||||||
part->median_left_ = median_left_;
|
part->median_left_ = median_left_;
|
||||||
part->median_right_ = median_right_;
|
part->median_right_ = median_right_;
|
||||||
part->median_width_ = median_width_;
|
part->median_width_ = median_width_;
|
||||||
@ -2398,15 +2398,15 @@ int ColPartition::BottomSpacingMargin(int resolution) const {
|
|||||||
// Returns a suitable spacing margin that can be applied to tops of
|
// Returns a suitable spacing margin that can be applied to tops of
|
||||||
// text lines, based on the resolution and the stored side_step_.
|
// text lines, based on the resolution and the stored side_step_.
|
||||||
int ColPartition::TopSpacingMargin(int resolution) const {
|
int ColPartition::TopSpacingMargin(int resolution) const {
|
||||||
return static_cast<int>(kMaxTopSpacingFraction * median_size_ + 0.5) +
|
return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
|
||||||
BottomSpacingMargin(resolution);
|
BottomSpacingMargin(resolution);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if the median text sizes of this and other agree to within
|
// Returns true if the median text sizes of this and other agree to within
|
||||||
// a reasonable multiplicative factor.
|
// a reasonable multiplicative factor.
|
||||||
bool ColPartition::SizesSimilar(const ColPartition& other) const {
|
bool ColPartition::SizesSimilar(const ColPartition& other) const {
|
||||||
return median_size_ <= other.median_size_ * kMaxSizeRatio &&
|
return median_height_ <= other.median_height_ * kMaxSizeRatio &&
|
||||||
other.median_size_ <= median_size_ * kMaxSizeRatio;
|
other.median_height_ <= median_height_ * kMaxSizeRatio;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper updates margin_left and margin_right, being the bounds of the left
|
// Helper updates margin_left and margin_right, being the bounds of the left
|
||||||
|
@ -134,11 +134,11 @@ class ColPartition : public ELIST2_LINK {
|
|||||||
int median_right() const {
|
int median_right() const {
|
||||||
return median_right_;
|
return median_right_;
|
||||||
}
|
}
|
||||||
int median_size() const {
|
int median_height() const {
|
||||||
return median_size_;
|
return median_height_;
|
||||||
}
|
}
|
||||||
void set_median_size(int size) {
|
void set_median_height(int height) {
|
||||||
median_size_ = size;
|
median_height_ = height;
|
||||||
}
|
}
|
||||||
int median_width() const {
|
int median_width() const {
|
||||||
return median_width_;
|
return median_width_;
|
||||||
@ -839,8 +839,7 @@ class ColPartition : public ELIST2_LINK {
|
|||||||
int median_bottom_;
|
int median_bottom_;
|
||||||
int median_top_;
|
int median_top_;
|
||||||
// Median height of blobs in this partition.
|
// Median height of blobs in this partition.
|
||||||
// TODO(rays) rename median_height_.
|
int median_height_;
|
||||||
int median_size_;
|
|
||||||
// Median left and right of blobs in this partition.
|
// Median left and right of blobs in this partition.
|
||||||
int median_left_;
|
int median_left_;
|
||||||
int median_right_;
|
int median_right_;
|
||||||
|
@ -694,7 +694,7 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
|
|||||||
// Get metrics from the row that will be used for the block.
|
// Get metrics from the row that will be used for the block.
|
||||||
TBOX box = part->bounding_box();
|
TBOX box = part->bounding_box();
|
||||||
int median_width = part->median_width();
|
int median_width = part->median_width();
|
||||||
int median_height = part->median_size();
|
int median_height = part->median_height();
|
||||||
// Turn the partition into a TO_ROW.
|
// Turn the partition into a TO_ROW.
|
||||||
TO_ROW* row = part->MakeToRow();
|
TO_ROW* row = part->MakeToRow();
|
||||||
if (row == nullptr) {
|
if (row == nullptr) {
|
||||||
|
@ -297,21 +297,21 @@ void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
|
|||||||
box.bottom());
|
box.bottom());
|
||||||
// Find the largest object in the search box not equal to bbox.
|
// Find the largest object in the search box not equal to bbox.
|
||||||
BlobGridSearch rsearch(this);
|
BlobGridSearch rsearch(this);
|
||||||
int max_size = 0;
|
int max_height = 0;
|
||||||
BLOBNBOX* n;
|
BLOBNBOX* n;
|
||||||
rsearch.StartRectSearch(search_box);
|
rsearch.StartRectSearch(search_box);
|
||||||
while ((n = rsearch.NextRectSearch()) != nullptr) {
|
while ((n = rsearch.NextRectSearch()) != nullptr) {
|
||||||
if (n == bbox) continue;
|
if (n == bbox) continue;
|
||||||
TBOX nbox = n->bounding_box();
|
TBOX nbox = n->bounding_box();
|
||||||
if (nbox.height() > max_size) {
|
if (nbox.height() > max_height) {
|
||||||
max_size = nbox.height();
|
max_height = nbox.height();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (debug) {
|
if (debug) {
|
||||||
tprintf("Max neighbour size=%d for candidate line box at:", max_size);
|
tprintf("Max neighbour size=%d for candidate line box at:", max_height);
|
||||||
box.print();
|
box.print();
|
||||||
}
|
}
|
||||||
if (max_size * kLineResidueSizeRatio < box.height()) {
|
if (max_height * kLineResidueSizeRatio < box.height()) {
|
||||||
#ifndef GRAPHICS_DISABLED
|
#ifndef GRAPHICS_DISABLED
|
||||||
if (leaders_win_ != nullptr) {
|
if (leaders_win_ != nullptr) {
|
||||||
// We are debugging, so display deleted in pink blobs in the same
|
// We are debugging, so display deleted in pink blobs in the same
|
||||||
@ -582,7 +582,7 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
|
|||||||
BLOBNBOX_LIST* blobs = &block->blobs;
|
BLOBNBOX_LIST* blobs = &block->blobs;
|
||||||
int median_height = UpperQuartileCJKSize(gridsize(), blobs);
|
int median_height = UpperQuartileCJKSize(gridsize(), blobs);
|
||||||
int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
|
int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
|
||||||
int max_size = static_cast<int>(median_height * kCJKAspectRatio);
|
int max_height = static_cast<int>(median_height * kCJKAspectRatio);
|
||||||
int num_fixed = 0;
|
int num_fixed = 0;
|
||||||
BLOBNBOX_IT blob_it(blobs);
|
BLOBNBOX_IT blob_it(blobs);
|
||||||
|
|
||||||
@ -594,12 +594,12 @@ bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
|
|||||||
bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
|
bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
|
||||||
bbox.bottom());
|
bbox.bottom());
|
||||||
if (debug) {
|
if (debug) {
|
||||||
tprintf("Checking for Broken CJK (max size=%d):", max_size);
|
tprintf("Checking for Broken CJK (max size=%d):", max_height);
|
||||||
bbox.print();
|
bbox.print();
|
||||||
}
|
}
|
||||||
// Generate a list of blobs that overlap or are near enough to merge.
|
// Generate a list of blobs that overlap or are near enough to merge.
|
||||||
BLOBNBOX_CLIST overlapped_blobs;
|
BLOBNBOX_CLIST overlapped_blobs;
|
||||||
AccumulateOverlaps(blob, debug, max_size, max_dist,
|
AccumulateOverlaps(blob, debug, max_height, max_dist,
|
||||||
&bbox, &overlapped_blobs);
|
&bbox, &overlapped_blobs);
|
||||||
if (!overlapped_blobs.empty()) {
|
if (!overlapped_blobs.empty()) {
|
||||||
// There are overlapping blobs, so qualify them as being satisfactory
|
// There are overlapping blobs, so qualify them as being satisfactory
|
||||||
@ -1596,10 +1596,10 @@ bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) {
|
|||||||
if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
|
if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
|
||||||
x_gap, y_gap, total_distance);
|
x_gap, y_gap, total_distance);
|
||||||
if (total_distance >
|
if (total_distance >
|
||||||
neighbour->owner()->median_size() * kMaxDiacriticDistanceRatio) {
|
neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) {
|
||||||
if (debug) {
|
if (debug) {
|
||||||
tprintf("Neighbour with median size %d too far away:",
|
tprintf("Neighbour with median size %d too far away:",
|
||||||
neighbour->owner()->median_size());
|
neighbour->owner()->median_height());
|
||||||
neighbour->bounding_box().print();
|
neighbour->bounding_box().print();
|
||||||
}
|
}
|
||||||
continue; // Diacritics must not be too distant.
|
continue; // Diacritics must not be too distant.
|
||||||
|
@ -81,7 +81,7 @@ const double kMaxBlobOverlapFactor = 4.0;
|
|||||||
const double kMaxTableCellXheight = 2.0;
|
const double kMaxTableCellXheight = 2.0;
|
||||||
|
|
||||||
// Maximum line spacing between a table column header and column contents
|
// Maximum line spacing between a table column header and column contents
|
||||||
// for merging the two (as a multiple of the partition's median_size).
|
// for merging the two (as a multiple of the partition's median_height).
|
||||||
const int kMaxColumnHeaderDistance = 4;
|
const int kMaxColumnHeaderDistance = 4;
|
||||||
|
|
||||||
// Minimum ratio of num_table_partitions to num_text_partitions in a column
|
// Minimum ratio of num_table_partitions to num_text_partitions in a column
|
||||||
@ -493,7 +493,7 @@ bool TableFinder::AllowTextPartition(const ColPartition& part) const {
|
|||||||
const int median_area = global_median_xheight_ * global_median_blob_width_;
|
const int median_area = global_median_xheight_ * global_median_blob_width_;
|
||||||
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
|
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
|
||||||
// Keep comparisons strictly greater to disallow 0!
|
// Keep comparisons strictly greater to disallow 0!
|
||||||
return part.median_size() > kHeightRequired &&
|
return part.median_height() > kHeightRequired &&
|
||||||
part.median_width() > kWidthRequired &&
|
part.median_width() > kWidthRequired &&
|
||||||
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
|
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
|
||||||
}
|
}
|
||||||
@ -724,7 +724,7 @@ void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) {
|
|||||||
// table find runs. Alternative solution.
|
// table find runs. Alternative solution.
|
||||||
// part->ComputeLimits();
|
// part->ComputeLimits();
|
||||||
if (part->IsTextType()) {
|
if (part->IsTextType()) {
|
||||||
// xheight_stats.add(part->median_size(), part->boxes_count());
|
// xheight_stats.add(part->median_height(), part->boxes_count());
|
||||||
// width_stats.add(part->median_width(), part->boxes_count());
|
// width_stats.add(part->median_width(), part->boxes_count());
|
||||||
|
|
||||||
// This loop can be removed when above issues are fixed.
|
// This loop can be removed when above issues are fixed.
|
||||||
@ -835,7 +835,7 @@ void TableFinder::MarkPartitionsUsingLocalInformation() {
|
|||||||
if (!part->IsTextType()) // Only consider text partitions
|
if (!part->IsTextType()) // Only consider text partitions
|
||||||
continue;
|
continue;
|
||||||
// Only consider partitions in dominant font size or smaller
|
// Only consider partitions in dominant font size or smaller
|
||||||
if (part->median_size() > kMaxTableCellXheight * global_median_xheight_)
|
if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
|
||||||
continue;
|
continue;
|
||||||
// Mark partitions with a large gap, or no significant gap as
|
// Mark partitions with a large gap, or no significant gap as
|
||||||
// table partitions.
|
// table partitions.
|
||||||
@ -863,7 +863,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
|||||||
BLOBNBOX_C_IT it(part_boxes);
|
BLOBNBOX_C_IT it(part_boxes);
|
||||||
// Check if this is a relatively small partition (such as a single word)
|
// Check if this is a relatively small partition (such as a single word)
|
||||||
if (part->bounding_box().width() <
|
if (part->bounding_box().width() <
|
||||||
kMinBoxesInTextPartition * part->median_size() &&
|
kMinBoxesInTextPartition * part->median_height() &&
|
||||||
part_boxes->length() < kMinBoxesInTextPartition)
|
part_boxes->length() < kMinBoxesInTextPartition)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -876,8 +876,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
|||||||
// Text partition gap limits. If this is text (and not a table),
|
// Text partition gap limits. If this is text (and not a table),
|
||||||
// there should be at least one gap larger than min_gap and no gap
|
// there should be at least one gap larger than min_gap and no gap
|
||||||
// larger than max_gap.
|
// larger than max_gap.
|
||||||
const double max_gap = kMaxGapInTextPartition * part->median_size();
|
const double max_gap = kMaxGapInTextPartition * part->median_height();
|
||||||
const double min_gap = kMinMaxGapInTextPartition * part->median_size();
|
const double min_gap = kMinMaxGapInTextPartition * part->median_height();
|
||||||
|
|
||||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||||
BLOBNBOX* blob = it.data();
|
BLOBNBOX* blob = it.data();
|
||||||
@ -895,7 +895,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
|||||||
// More likely case, the blobs slightly overlap. This can happen
|
// More likely case, the blobs slightly overlap. This can happen
|
||||||
// with diacritics (accents) or broken alphabet symbols (characters).
|
// with diacritics (accents) or broken alphabet symbols (characters).
|
||||||
// Merge boxes together by taking max of right sides.
|
// Merge boxes together by taking max of right sides.
|
||||||
if (-gap < part->median_size() * kMaxBlobOverlapFactor) {
|
if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
|
||||||
previous_x1 = std::max(previous_x1, current_x1);
|
previous_x1 = std::max(previous_x1, current_x1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -918,7 +918,7 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
|
|||||||
// Since no large gap was found, return false if the partition is too
|
// Since no large gap was found, return false if the partition is too
|
||||||
// long to be a data cell
|
// long to be a data cell
|
||||||
if (part->bounding_box().width() >
|
if (part->bounding_box().width() >
|
||||||
kMaxBoxesInDataPartition * part->median_size() ||
|
kMaxBoxesInDataPartition * part->median_height() ||
|
||||||
part_boxes->length() > kMaxBoxesInDataPartition)
|
part_boxes->length() > kMaxBoxesInDataPartition)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -1051,7 +1051,7 @@ void TableFinder::FilterParagraphEndings() {
|
|||||||
// TODO(nbeato): This would be untrue if the text was right aligned.
|
// TODO(nbeato): This would be untrue if the text was right aligned.
|
||||||
// How often is that?
|
// How often is that?
|
||||||
if (part->space_to_left() >
|
if (part->space_to_left() >
|
||||||
kMaxParagraphEndingLeftSpaceMultiple * part->median_size())
|
kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
|
||||||
continue;
|
continue;
|
||||||
// The line above it should be right aligned (assuming justified format).
|
// The line above it should be right aligned (assuming justified format).
|
||||||
// Since we can't assume justified text, we compare whitespace to text.
|
// Since we can't assume justified text, we compare whitespace to text.
|
||||||
@ -1647,7 +1647,7 @@ bool TableFinder::HLineBelongsToTable(const ColPartition& part,
|
|||||||
extra_space_to_left++;
|
extra_space_to_left++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int space_threshold = kSideSpaceMargin * part.median_size();
|
int space_threshold = kSideSpaceMargin * part.median_height();
|
||||||
if (extra_part->space_to_right() > space_threshold)
|
if (extra_part->space_to_right() > space_threshold)
|
||||||
extra_space_to_right++;
|
extra_space_to_right++;
|
||||||
if (extra_part->space_to_left() > space_threshold)
|
if (extra_part->space_to_left() > space_threshold)
|
||||||
@ -1672,7 +1672,7 @@ void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) {
|
|||||||
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
|
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
|
||||||
// Max distance to find a table heading.
|
// Max distance to find a table heading.
|
||||||
const int max_distance = kMaxColumnHeaderDistance *
|
const int max_distance = kMaxColumnHeaderDistance *
|
||||||
neighbor->median_size();
|
neighbor->median_height();
|
||||||
int table_top = table_box->top();
|
int table_top = table_box->top();
|
||||||
const TBOX& box = neighbor->bounding_box();
|
const TBOX& box = neighbor->bounding_box();
|
||||||
// Do not continue if the next box is way above
|
// Do not continue if the next box is way above
|
||||||
|
Loading…
Reference in New Issue
Block a user