Major improvements to layout analysis for better image detection, diacritic detection, better textline finding, better tabstop finding

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@648 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2012-02-02 02:53:04 +00:00
parent 04068c7055
commit 6e3d810c1d
42 changed files with 8993 additions and 2833 deletions

View File

@ -6,14 +6,15 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/cutil -I$(top_srcdir)/classify -I$(top_srcdir)/dict
include_HEADERS = \
alignedblob.h bbgrid.h blkocc.h \
colfind.h colpartition.h colpartitionset.h \
alignedblob.h bbgrid.h blkocc.h blobgrid.h \
ccnontextdetect.h cjkpitch.h colfind.h colpartition.h colpartitionset.h \
colpartitiongrid.h \
devanagari_processing.h drawedg.h drawtord.h edgblob.h edgloop.h \
equationdetectbase.h \
fpchop.h gap_map.h imagefind.h linefind.h makerow.h oldbasel.h \
pithsync.h pitsync1.h scanedg.h sortflts.h strokewidth.h \
tabfind.h tablefind.h tabvector.h \
tablerecog.h textord.h \
tablerecog.h textlineprojection.h textord.h \
topitch.h tordmain.h tovars.h \
underlin.h wordseg.h workingpartset.h
@ -34,13 +35,14 @@ libtesseract_textord_la_LIBADD = \
endif
libtesseract_textord_la_SOURCES = \
alignedblob.cpp bbgrid.cpp blkocc.cpp \
colfind.cpp colpartition.cpp colpartitionset.cpp \
alignedblob.cpp bbgrid.cpp blkocc.cpp blobgrid.cpp \
ccnontextdetect.cpp cjkpitch.cpp colfind.cpp colpartition.cpp colpartitionset.cpp \
colpartitiongrid.cpp devanagari_processing.cpp \
drawedg.cpp drawtord.cpp edgblob.cpp edgloop.cpp \
equationdetectbase.cpp \
fpchop.cpp gap_map.cpp imagefind.cpp linefind.cpp makerow.cpp oldbasel.cpp \
pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \
tabfind.cpp tablefind.cpp tabvector.cpp \
tablerecog.cpp textord.cpp \
tablerecog.cpp textlineprojection.cpp textord.cpp \
topitch.cpp tordmain.cpp tospace.cpp tovars.cpp \
underlin.cpp wordseg.cpp workingpartset.cpp

View File

@ -39,11 +39,11 @@ namespace tesseract {
// Fraction of resolution used as alignment tolerance for aligned tabs.
const double kAlignedFraction = 0.03125;
// Fraction of resolution used as alignment tolerance for ragged tabs.
const double kRaggedFraction = 0.5;
const double kRaggedFraction = 2.5;
// Fraction of height used as a minimum gutter gap for aligned blobs.
const double kAlignedGapFraction = 0.75;
// Fraction of height used as a minimum gutter gap for ragged tabs.
const double kRaggedGapFraction = 3.0;
const double kRaggedGapFraction = 1.0;
// Constant number of pixels used as alignment tolerance for line finding.
const int kVLineAlignment = 3;
// Constant number of pixels used as gutter gap tolerance for line finding.
@ -163,7 +163,7 @@ void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) {
AlignedBlob::AlignedBlob(int gridsize,
const ICOORD& bleft, const ICOORD& tright)
: BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) {
: BlobGrid(gridsize, bleft, tright) {
}
AlignedBlob::~AlignedBlob() {
@ -196,24 +196,24 @@ ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
int bottom_y = box.bottom();
TabType tabtype = bbox->left_tab_type();
if (tabtype != TT_NONE) {
if (tabtype == TT_UNCONFIRMED)
if (tabtype == TT_MAYBE_ALIGNED)
tab_win->Pen(ScrollView::BLUE);
else if (tabtype == TT_MAYBE_RAGGED)
tab_win->Pen(ScrollView::YELLOW);
else if (tabtype == TT_CONFIRMED)
tab_win->Pen(ScrollView::GREEN);
else if (tabtype == TT_FAKE)
tab_win->Pen(ScrollView::YELLOW);
else
tab_win->Pen(ScrollView::GREY);
tab_win->Line(left_x, top_y, left_x, bottom_y);
}
tabtype = bbox->right_tab_type();
if (tabtype != TT_NONE) {
if (tabtype == TT_UNCONFIRMED)
if (tabtype == TT_MAYBE_ALIGNED)
tab_win->Pen(ScrollView::MAGENTA);
else if (tabtype == TT_MAYBE_RAGGED)
tab_win->Pen(ScrollView::ORANGE);
else if (tabtype == TT_CONFIRMED)
tab_win->Pen(ScrollView::RED);
else if (tabtype == TT_FAKE)
tab_win->Pen(ScrollView::ORANGE);
else
tab_win->Pen(ScrollView::GREY);
tab_win->Line(right_x, top_y, right_x, bottom_y);
@ -224,6 +224,17 @@ ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
return tab_win;
}
// Helper returns true if the total number of line_crossings of all the blobs
// in the list is at least 2.
static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) {
BLOBNBOX_C_IT it(blobs);
int total_crossings = 0;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
total_crossings += it.data()->line_crossings();
}
return total_crossings >= 2;
}
// Finds a vector corresponding to a set of vertically aligned blob edges
// running through the given box. The type of vector returned and the
// search parameters are determined by the AlignedBlobParams.
@ -237,11 +248,13 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
int ext_start_y, ext_end_y;
BLOBNBOX_CLIST good_points;
// Search up and then down from the starting bbox.
TBOX box = bbox->bounding_box();
bool debug = WithinTestRegion(2, box.left(), box.bottom());
int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
BLOBNBOX_C_IT it(&good_points);
it.move_to_last();
TBOX box = it.data()->bounding_box();
box = it.data()->bounding_box();
int end_y = box.top();
int end_x = align_params.right_tab ? box.right() : box.left();
it.move_to_first();
@ -251,9 +264,14 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
// Acceptable tab vectors must have a mininum number of points,
// have a minimum acceptable length, and have a minimum gradient.
// The gradient corresponds to the skew angle.
if (pt_count >= align_params.min_points &&
// Ragged tabs don't need to satisfy the gradient condition, as they
// will always end up parallel to the vertical direction.
bool at_least_2_crossings = AtLeast2LineCrossings(&good_points);
if ((pt_count >= align_params.min_points &&
end_y - start_y >= align_params.min_length &&
end_y - start_y >= abs(end_x - start_x) * kMinTabGradient) {
(align_params.ragged ||
end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) ||
at_least_2_crossings) {
int confirmed_points = 0;
// Count existing confirmed points to see if vector is acceptable.
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
@ -270,7 +288,7 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
if (!align_params.ragged ||
confirmed_points + confirmed_points < pt_count) {
const TBOX& box = bbox->bounding_box();
if (WithinTestRegion(2, box.left(), box.bottom())) {
if (debug) {
tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
pt_count, box.left(), box.bottom());
}
@ -282,6 +300,9 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
} else {
bbox->set_left_tab_type(align_params.confirmed_type);
}
if (debug) {
bbox->bounding_box().print();
}
}
// Now make the vector and return it.
TabVector* result = TabVector::FitVector(align_params.alignment,
@ -289,12 +310,21 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
ext_start_y, ext_end_y,
&good_points,
vertical_x, vertical_y);
if (WithinTestRegion(2, box.left(), box.bottom())) {
result->set_intersects_other_lines(at_least_2_crossings);
if (debug) {
tprintf("Box was %d, %d\n", box.left(), box.bottom());
result->Print("After fitting");
}
return result;
} else if (debug) {
tprintf("Ragged tab used too many used points: %d out of %d\n",
confirmed_points, pt_count);
}
} else if (debug) {
tprintf("Tab vector failed basic tests: pt count %d vs min %d, "
"length %d vs min %d, min grad %g\n",
pt_count, align_params.min_points, end_y - start_y,
align_params.min_length, abs(end_x - start_x) * kMinTabGradient);
}
return NULL;
}
@ -310,13 +340,18 @@ int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
BLOBNBOX_C_IT it(good_points);
TBOX box = bbox->bounding_box();
bool debug = WithinTestRegion(2, box.left(), box.bottom());
if (debug) {
tprintf("Starting alignment run at blob:");
box.print();
}
int x_start = params.right_tab ? box.right() : box.left();
while (bbox != NULL) {
// Add the blob to the list if the appropriate side is a tab candidate,
// or if we are working on a ragged tab.
if (((params.right_tab && bbox->right_tab_type() != TT_NONE) ||
(!params.right_tab && bbox->left_tab_type() != TT_NONE) ||
params.ragged) &&
TabType type = params.right_tab ? bbox->right_tab_type()
: bbox->left_tab_type();
if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) &&
(it.empty() || it.data() != bbox)) {
if (top_to_bottom)
it.add_before_then_move(bbox);
@ -335,6 +370,10 @@ int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
x_start = params.right_tab ? box.right() : box.left();
}
}
if (debug) {
tprintf("Alignment run ended with %d pts at blob:", ptcount);
box.print();
}
return ptcount;
}
@ -417,15 +456,12 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
// waiting for a sequence of blobs in a line to end.
// NextVerticalSearch alone does not guarantee this, as there may be
// more than one blob in a grid cell. See comment in AlignTabs.
if ((n_y < start_y) != top_to_bottom || n_y == start_y)
if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box))
continue; // Only look in the required direction.
if (result != NULL &&
((top_to_bottom && n_y < result->bounding_box().bottom()) ||
(!top_to_bottom && n_y > result->bounding_box().top())))
if (result != NULL && result->bounding_box().y_gap(nbox) > gridsize())
return result; // This result is clear.
if (backup_result != NULL && p.ragged &&
((top_to_bottom && n_y < backup_result->bounding_box().bottom()) ||
(!top_to_bottom && n_y > backup_result->bounding_box().top())))
if (backup_result != NULL && p.ragged && result == NULL &&
backup_result->bounding_box().y_gap(nbox) > gridsize())
return backup_result; // This result is clear.
// If the neighbouring blob is the wrong side of a separator line, then it
@ -446,7 +482,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
n_right > x_at_n_y + p.r_align_tolerance &&
(p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) {
// In the gutter so end of line.
if (bbox->right_tab_type() >= TT_UNCONFIRMED)
if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED)
bbox->set_right_tab_type(TT_DELETED);
*end_y = top_to_bottom ? nbox.top() : nbox.bottom();
if (WithinTestRegion(2, x_start, start_y))
@ -458,7 +494,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
n_right > x_at_n_y - p.min_gutter &&
(p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) {
// In the gutter so end of line.
if (bbox->left_tab_type() >= TT_UNCONFIRMED)
if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED)
bbox->set_left_tab_type(TT_DELETED);
*end_y = top_to_bottom ? nbox.top() : nbox.bottom();
if (WithinTestRegion(2, x_start, start_y))
@ -476,15 +512,23 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
tprintf("aligned, seeking%d, l=%d, r=%d\n",
p.right_tab, neighbour->left_tab_type(),
neighbour->right_tab_type());
if ((p.right_tab && neighbour->right_tab_type() != TT_NONE) ||
(!p.right_tab && neighbour->left_tab_type() != TT_NONE)) {
TabType n_type = p.right_tab ? neighbour->right_tab_type()
: neighbour->left_tab_type();
if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) {
if (result == NULL) {
result = neighbour;
} else {
// Keep the closest neighbour.
int old_y = (result->bounding_box().top() +
result->bounding_box().bottom()) / 2;
if (abs(n_y - start_y) < abs(old_y - start_y))
// Keep the closest neighbour by Euclidean distance.
// This prevents it from picking a tab blob in another column.
const TBOX& old_box = result->bounding_box();
int x_diff = p.right_tab ? old_box.right() : old_box.left();
x_diff -= x_at_n_y;
int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y;
int old_dist = x_diff * x_diff + y_diff * y_diff;
x_diff = n_x - x_at_n_y;
y_diff = n_y - start_y;
int new_dist = x_diff * x_diff + y_diff * y_diff;
if (new_dist < old_dist)
result = neighbour;
}
} else if (backup_result == NULL) {

View File

@ -80,7 +80,7 @@ struct AlignedBlobParams {
// The AlignedBlob class contains code to find vertically aligned blobs.
// This is factored out into a separate class, so it can be used by both
// vertical line finding (LineFind) and tabstop finding (TabFind).
class AlignedBlob : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
class AlignedBlob : public BlobGrid {
public:
AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright);
virtual ~AlignedBlob();

View File

@ -153,6 +153,61 @@ IntGrid* IntGrid::NeighbourhoodSum() const {
return sumgrid;
}
// Returns true if more than half the area of the rect is covered by grid
// cells that are over the theshold.
bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
int min_x, min_y, max_x, max_y;
GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
GridCoords(rect.right(), rect.top(), &max_x, &max_y);
int total_area = 0;
for (int y = min_y; y <= max_y; ++y) {
for (int x = min_x; x <= max_x; ++x) {
int value = GridCellValue(x, y);
if (value > threshold) {
TBOX cell_box(x * gridsize_, y * gridsize_,
(x + 1) * gridsize_, (y + 1) * gridsize_);
cell_box &= rect; // This is in-place box intersection.
total_area += cell_box.area();
}
}
}
return total_area * 2 > rect.area();
}
// Returns true if any cell value in the given rectangle is zero.
bool IntGrid::AnyZeroInRect(const TBOX& rect) const {
int min_x, min_y, max_x, max_y;
GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
GridCoords(rect.right(), rect.top(), &max_x, &max_y);
for (int y = min_y; y <= max_y; ++y) {
for (int x = min_x; x <= max_x; ++x) {
if (GridCellValue(x, y) == 0)
return true;
}
}
return false;
}
// Returns a full-resolution binary pix in which each cell over the given
// threshold is filled as a black square. pixDestroy after use.
// Edge cells, which have a zero 4-neighbour, are not marked.
Pix* IntGrid::ThresholdToPix(int threshold) const {
Pix* pix = pixCreate(tright().x() - bleft().x(),
tright().y() - bleft().y(), 1);
int cellsize = gridsize();
for (int y = 0; y < gridheight(); ++y) {
for (int x = 0; x < gridwidth(); ++x) {
if (GridCellValue(x, y) > threshold &&
GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 &&
GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) {
pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize),
cellsize, cellsize, PIX_SET, NULL, 0, 0);
}
}
}
return pix;
}
// Make a Pix of the correct scaled size for the TraceOutline functions.
Pix* GridReducedPix(const TBOX& box, int gridsize,
ICOORD bleft, int* left, int* bottom) {
@ -232,4 +287,3 @@ Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
}
} // namespace tesseract.

View File

@ -123,8 +123,7 @@ class IntGrid : public GridBase {
IntGrid* NeighbourhoodSum() const;
int GridCellValue(int grid_x, int grid_y) const {
ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth());
ASSERT_HOST(grid_y >= 0 && grid_y < gridheight());
ClipGridCoords(&grid_x, &grid_y);
return grid_[grid_y * gridwidth_ + grid_x];
}
void SetGridCell(int grid_x, int grid_y, int value) {
@ -132,6 +131,16 @@ class IntGrid : public GridBase {
ASSERT_HOST(grid_y >= 0 && grid_y < gridheight());
grid_[grid_y * gridwidth_ + grid_x] = value;
}
// Returns true if more than half the area of the rect is covered by grid
// cells that are over the theshold.
bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const;
// Returns true if any cell value in the given rectangle is zero.
bool AnyZeroInRect(const TBOX& rect) const;
// Returns a full-resolution binary pix in which each cell over the given
// threshold is filled as a black square. pixDestroy after use.
Pix* ThresholdToPix(int threshold) const;
private:
int* grid_; // 2-d array of ints.
@ -373,6 +382,24 @@ int SortByBoxLeft(const void* void1, const void* void2) {
return p1->bounding_box().top() - p2->bounding_box().top();
}
// Sort function to sort a BBC by bounding_box().right() in right-to-left order.
template<class BBC>
int SortRightToLeft(const void* void1, const void* void2) {
// The void*s are actually doubly indirected, so get rid of one level.
const BBC* p1 = *reinterpret_cast<const BBC* const *>(void1);
const BBC* p2 = *reinterpret_cast<const BBC* const *>(void2);
int result = p2->bounding_box().right() - p1->bounding_box().right();
if (result != 0)
return result;
result = p2->bounding_box().left() - p1->bounding_box().left();
if (result != 0)
return result;
result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
if (result != 0)
return result;
return p1->bounding_box().top() - p2->bounding_box().top();
}
// Sort function to sort a BBC by bounding_box().bottom().
template<class BBC>
int SortByBoxBottom(const void* void1, const void* void2) {
@ -859,6 +886,9 @@ void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox() {
template<class BBC, class BBC_CLIST, class BBC_C_IT>
void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RepositionIterator() {
// Something was deleted, so we have little choice but to clear the
// returns list.
returns_.shallow_clear();
// Reset the iterator back to one past the previous return.
// If the previous_return_ is no longer in the list, then
// next_return_ serves as a backup.

44
textord/blobgrid.cpp Normal file
View File

@ -0,0 +1,44 @@
///////////////////////////////////////////////////////////////////////
// File: blobgrid.h
// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
// Created: Sat Jun 11 10:30:01 PST 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "blobgrid.h"
namespace tesseract {
BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright)
: BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) {
}
BlobGrid::~BlobGrid() {
}
// Inserts all the blobs from the given list, with x and y spreading,
// without removing from the source list, so ownership remains with the
// source list.
void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) {
BLOBNBOX_IT blob_it(blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
if (!blob->joined_to_prev())
InsertBBox(true, true, blob);
}
}
} // namespace tesseract.

46
textord/blobgrid.h Normal file
View File

@ -0,0 +1,46 @@
///////////////////////////////////////////////////////////////////////
// File: blobgrid.h
// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
// Created: Sat Jun 11 10:26:01 PST 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_TEXTORD_BLOBGRID_H_
#define TESSERACT_TEXTORD_BLOBGRID_H_
#include "bbgrid.h"
#include "blobbox.h"
CLISTIZEH(BLOBNBOX)
namespace tesseract {
typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGridSearch;
class BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
public:
BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
virtual ~BlobGrid();
// Inserts all the blobs from the given list, with x and y spreading,
// without removing from the source list, so ownership remains with the
// source list.
void InsertBlobList(BLOBNBOX_LIST* blobs);
};
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_BLOBGRID_H_

310
textord/ccnontextdetect.cpp Normal file
View File

@ -0,0 +1,310 @@
///////////////////////////////////////////////////////////////////////
// File: ccnontextdetect.cpp
// Description: Connected-Component-based photo (non-text) detection.
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
// Created: Sat Jun 11 10:12:01 PST 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "ccnontextdetect.h"
#include "imagefind.h"
#include "strokewidth.h"
namespace tesseract {
// Max number of neighbour small objects per squared gridsize before a grid
// cell becomes image.
const double kMaxSmallNeighboursPerPix = 1.0 / 32;
// Max number of small blobs a large blob may overlap before it is rejected
// and determined to be image.
const int kMaxLargeOverlapsWithSmall = 3;
// Max number of small blobs a medium blob may overlap before it is rejected
// and determined to be image. Larger than for large blobs as medium blobs
// may be complex Chinese characters. Very large Chinese characters are going
// to overlap more medium blobs than small.
const int kMaxMediumOverlapsWithSmall = 12;
// Max number of normal blobs a large blob may overlap before it is rejected
// and determined to be image. This is set higher to allow for drop caps, which
// may overlap a lot of good text blobs.
const int kMaxLargeOverlapsWithMedium = 12;
// Multiplier of original noise_count used to test for the case of spreading
// noise beyond where it should really be.
const int kOriginalNoiseMultiple = 8;
// Pixel padding for noise blobs when rendering on the image
// mask to encourage them to join together. Make it too big and images
// will fatten out too much and have to be clipped to text.
const int kNoisePadding = 4;
// Fraction of max_noise_count_ to be added to the noise count if there is
// photo mask in the background.
const double kPhotoOffsetFraction = 0.375;
// Min ratio of perimeter^2/16area for a "good" blob in estimating noise
// density. Good blobs are supposed to be highly likely real text.
// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
// of 16. Digital circles are weird and have a minimum ratio of pi/64, not
// the 1/(4pi) that you would expect.
const double kMinGoodTextPARatio = 1.5;
CCNonTextDetect::CCNonTextDetect(int gridsize,
const ICOORD& bleft, const ICOORD& tright)
: BlobGrid(gridsize, bleft, tright),
max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
gridsize * gridsize)),
noise_density_(NULL) {
// TODO(rays) break max_noise_count_ out into an area-proportional
// value, as now plus an additive constant for the number of text blobs
// in the 3x3 neigbourhood - maybe 9.
}
CCNonTextDetect::~CCNonTextDetect() {
delete noise_density_;
}
// Creates and returns a Pix with the same resolution as the original
// in which 1 (black) pixels represent likely non text (photo, line drawing)
// areas of the page, deleting from the blob_block the blobs that were
// determined to be non-text.
// The photo_map is used to bias the decision towards non-text, rather than
// supplying definite decision.
// The blob_block is the usual result of connected component analysis,
// holding the detected blobs.
// The returned Pix should be PixDestroyed after use.
Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
TO_BLOCK* blob_block) {
// Insert the smallest blobs into the grid.
InsertBlobList(&blob_block->small_blobs);
InsertBlobList(&blob_block->noise_blobs);
// Add the medium blobs that don't have a good strokewidth neighbour.
// Those that do go into good_grid as an antidote to spreading beyond the
// real reaches of a noise region.
BlobGrid good_grid(gridsize(), bleft(), tright());
BLOBNBOX_IT blob_it(&blob_block->blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
InsertBBox(true, true, blob);
else
good_grid.InsertBBox(true, true, blob);
}
noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
good_grid.Clear(); // Not needed any more.
Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
if (debug) {
pixWrite("junknoisemask.png", pix, IFF_PNG);
}
ScrollView* win = NULL;
if (debug) {
win = MakeWindow(0, 400, "Photo Mask Blobs");
}
// Large and medium blobs are not text if they overlap with "a lot" of small
// blobs.
MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
kMaxLargeOverlapsWithSmall,
win, ScrollView::DARK_GREEN, pix);
MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
win, ScrollView::WHITE, pix);
// Clear the grid of small blobs and insert the medium blobs.
Clear();
InsertBlobList(&blob_block->blobs);
MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
kMaxLargeOverlapsWithMedium,
win, ScrollView::DARK_GREEN, pix);
// Clear again before we start deleting the blobs in the grid.
Clear();
MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
win, ScrollView::CORAL, pix);
MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
win, ScrollView::GOLDENROD, pix);
MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
win, ScrollView::WHITE, pix);
if (debug) {
win->Update();
pixWrite("junkccphotomask.png", pix, IFF_PNG);
delete win->AwaitEvent(SVET_DESTROY);
delete win;
}
return pix;
}
// Computes and returns the noise_density IntGrid, at the same gridsize as
// this by summing the number of small elements in a 3x3 neighbourhood of
// each grid cell. good_grid is filled with blobs that are considered most
// likely good text, and this is filled with small and medium blobs that are
// more likely non-text.
// The photo_map is used to bias the decision towards non-text, rather than
// supplying definite decision.
IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
BlobGrid* good_grid) {
IntGrid* noise_counts = CountCellElements();
IntGrid* noise_density = noise_counts->NeighbourhoodSum();
IntGrid* good_counts = good_grid->CountCellElements();
// Now increase noise density in photo areas, to bias the decision and
// minimize hallucinated text on image, but trim the noise_density where
// there are good blobs and the original count is low in non-photo areas,
// indicating that most of the result came from neighbouring cells.
int height = pixGetHeight(photo_map);
int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
for (int y = 0; y < gridheight(); ++y) {
for (int x = 0; x < gridwidth(); ++x) {
int noise = noise_density->GridCellValue(x, y);
if (max_noise_count_ < noise + photo_offset &&
noise <= max_noise_count_) {
// Test for photo.
int left = x * gridsize();
int right = left + gridsize();
int bottom = height - y * gridsize();
int top = bottom - gridsize();
if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
&bottom)) {
noise_density->SetGridCell(x, y, noise + photo_offset);
}
}
if (debug && noise > max_noise_count_ &&
good_counts->GridCellValue(x, y) > 0) {
tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
x * gridsize(), y * gridsize(),
noise_density->GridCellValue(x, y),
good_counts->GridCellValue(x, y),
noise_counts->GridCellValue(x, y), max_noise_count_);
}
if (noise > max_noise_count_ &&
good_counts->GridCellValue(x, y) > 0 &&
noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
max_noise_count_) {
noise_density->SetGridCell(x, y, 0);
}
}
}
delete noise_counts;
delete good_counts;
return noise_density;
}
// Helper to expand a box in one of the 4 directions by the given pad,
// provided it does not expand into any cell with a zero noise density.
// If that is not possible, try expanding all round by a small constant.
static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
int pad) {
TBOX expanded_box(box);
expanded_box.set_right(box.right() + pad);
if (!noise_density.AnyZeroInRect(expanded_box))
return expanded_box;
expanded_box = box;
expanded_box.set_left(box.left() - pad);
if (!noise_density.AnyZeroInRect(expanded_box))
return expanded_box;
expanded_box = box;
expanded_box.set_top(box.top() + pad);
if (!noise_density.AnyZeroInRect(expanded_box))
return expanded_box;
expanded_box = box;
expanded_box.set_bottom(box.bottom() + pad);
if (!noise_density.AnyZeroInRect(expanded_box))
return expanded_box;
expanded_box = box;
expanded_box.pad(kNoisePadding, kNoisePadding);
if (!noise_density.AnyZeroInRect(expanded_box))
return expanded_box;
return box;
}
// Tests each blob in the list to see if it is certain non-text using 2
// conditions:
// 1. blob overlaps a cell with high value in noise_density_ (previously set
// by ComputeNoiseDensity).
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
// condition is disabled with max_blob_overlaps == -1.
// If it does, the blob is declared non-text, and is used to mark up the
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
// neighbours reset, as they may now point to deleted data.
// WARNING: The blobs list blobs may be in the *this grid, but they are
// not removed. If any deleted blobs might be in *this, then this must be
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
// blobs are drawn on it in ok_color.
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
int max_blob_overlaps,
ScrollView* win,
ScrollView::Color ok_color,
Pix* nontext_mask) {
int imageheight = tright().y() - bleft().x();
BLOBNBOX_IT blob_it(blobs);
BLOBNBOX_LIST dead_blobs;
BLOBNBOX_IT dead_it(&dead_blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
TBOX box = blob->bounding_box();
if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
(max_blob_overlaps < 0 ||
!BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
blob->ClearNeighbours();
if (win != NULL)
blob->plot(win, ok_color, ok_color);
} else {
if (noise_density_->AnyZeroInRect(box)) {
// There is a danger that the bounding box may overlap real text, so
// we need to render the outline.
Pix* blob_pix = blob->cblob()->render_outline();
pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
box.width(), box.height(), PIX_SRC | PIX_DST,
blob_pix, 0, 0);
pixDestroy(&blob_pix);
} else {
if (box.area() < gridsize() * gridsize()) {
// It is a really bad idea to make lots of small components in the
// photo mask, so try to join it to a bigger area by expanding the
// box in a way that does not touch any zero noise density cell.
box = AttemptBoxExpansion(box, *noise_density_, gridsize());
}
// All overlapped cells are non-zero, so just mark the rectangle.
pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
box.width(), box.height(), PIX_SET, NULL, 0, 0);
}
if (win != NULL)
blob->plot(win, ScrollView::RED, ScrollView::RED);
// It is safe to delete the cblob now, as it isn't used by the grid
// or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
// dead_blobs list.
// TODO(rays) delete the delete when the BLOBNBOX destructor deletes
// the cblob.
delete blob->cblob();
dead_it.add_to_end(blob_it.extract());
}
}
}
// Returns true if the given blob overlaps more than max_overlaps blobs
// in the current grid.
bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
// Search the grid to see what intersects it.
// Setup a Rectangle search for overlapping this blob.
BlobGridSearch rsearch(this);
TBOX box = blob->bounding_box();
rsearch.StartRectSearch(box);
rsearch.SetUniqueMode(true);
BLOBNBOX* neighbour;
int overlap_count = 0;
while (overlap_count <= max_overlaps &&
(neighbour = rsearch.NextRectSearch()) != NULL) {
if (box.major_overlap(neighbour->bounding_box())) {
++overlap_count;
if (overlap_count > max_overlaps)
return true;
}
}
return false;
}
} // namespace tesseract.

87
textord/ccnontextdetect.h Normal file
View File

@ -0,0 +1,87 @@
///////////////////////////////////////////////////////////////////////
// File: ccnontextdetect.h
// Description: Connected-Component-based non-text detection.
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
// Created: Sat Jun 11 09:52:01 PST 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_
#define TESSERACT_TEXTORD_CCPHOTODETECT_H_
#include "blobgrid.h"
#include "scrollview.h"
namespace tesseract {
// The CCNonTextDetect class contains grid-based operations on blobs to create
// a full-resolution image mask analogous yet complementary to
// pixGenHalftoneMask as it is better at line-drawings, graphs and charts.
class CCNonTextDetect : public BlobGrid {
public:
CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright);
virtual ~CCNonTextDetect();
// Creates and returns a Pix with the same resolution as the original
// in which 1 (black) pixels represent likely non text (photo, line drawing)
// areas of the page, deleting from the blob_block the blobs that were
// determined to be non-text.
// The photo_map (binary image mask) is used to bias the decision towards
// non-text, rather than supplying a definite decision.
// The blob_block is the usual result of connected component analysis,
// holding the detected blobs.
// The returned Pix should be PixDestroyed after use.
Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block);
private:
// Computes and returns the noise_density IntGrid, at the same gridsize as
// this by summing the number of small elements in a 3x3 neighbourhood of
// each grid cell. good_grid is filled with blobs that are considered most
// likely good text, and this is filled with small and medium blobs that are
// more likely non-text.
// The photo_map is used to bias the decision towards non-text, rather than
// supplying definite decision.
IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid);
// Tests each blob in the list to see if it is certain non-text using 2
// conditions:
// 1. blob overlaps a cell with high value in noise_density_ (previously set
// by ComputeNoiseDensity).
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
// condition is disabled with max_blob_overlaps == -1.
// If it does, the blob is declared non-text, and is used to mark up the
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
// neighbours reset, as they may now point to deleted data.
// WARNING: The blobs list blobs may be in the *this grid, but they are
// not removed. If any deleted blobs might be in *this, then this must be
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
int max_blob_overlaps,
ScrollView* win, ScrollView::Color ok_color,
Pix* nontext_mask);
// Returns true if the given blob overlaps more than max_overlaps blobs
// in the current grid.
bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps);
// Max entry in noise_density_ before the cell is declared noisy.
int max_noise_count_;
// Completed noise density map, which we keep around to use for secondary
// noise detection.
IntGrid* noise_density_;
};
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_

1094
textord/cjkpitch.cpp Normal file

File diff suppressed because it is too large Load Diff

72
textord/cjkpitch.h Normal file
View File

@ -0,0 +1,72 @@
///////////////////////////////////////////////////////////////////////
// File: cjkpitch.h
// Description: Code to determine fixed pitchness and the pitch if fixed,
// for CJK text.
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: takenaka@google.com (Hiroshi Takenaka)
// Created: Mon Jun 27 12:48:35 JST 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef CJKPITCH_H_
#define CJKPITCH_H_
#include "blobbox.h"
#include "notdll.h"
// Function to test "fixed-pitchness" of the input text and estimating
// character pitch parameters for it, based on CJK fixed-pitch layout
// model.
//
// This function assumes that a fixed-pitch CJK text has following
// characteristics:
//
// - Most glyphs are designed to fit within the same sized square
// (imaginary body). Also they are aligned to the center of their
// imaginary bodies.
// - The imaginary body is always a regular rectangle.
// - There may be some extra space between character bodies
// (tracking).
// - There may be some extra space after punctuations.
// - The text is *not* space-delimited. Thus spaces are rare.
// - Character may consists of multiple unconnected blobs.
//
// And the function works in two passes. On pass 1, it looks for such
// "good" blobs that has the pitch same pitch on the both side and
// looks like a complete CJK character. Then estimates the character
// pitch for every row, based on those good blobs. If we couldn't find
// enough good blobs for a row, then the pitch is estimated from other
// rows with similar character height instead.
//
// Pass 2 is an iterative process to fit the blobs into fixed-pitch
// character cells. Once we have estimated the character pitch, blobs
// that are almost as large as the pitch can be considered to be
// complete characters. And once we know that some characters are
// complete characters, we can estimate the region occupied by its
// neighbors. And so on.
//
// We repeat the process until all ambiguities are resolved. Then make
// the final decision about fixed-pitchness of each row and compute
// pitch and spacing parameters.
//
// (If a row is considered to be propotional, pitch_decision for the
// row is set to PITCH_CORR_PROP and the later phase
// (i.e. Textord::to_spacing()) should determine its spacing
// parameters)
//
// This function doesn't provide all information required by
// fixed_pitch_words() and the rows need to be processed with
// make_prop_words() even if they are fixed pitched.
void compute_fixed_pitch_cjk(ICOORD page_tr, // top right
TO_BLOCK_LIST *port_blocks); // input list
#endif // CJKPITCH_H_

File diff suppressed because it is too large Load Diff

View File

@ -25,25 +25,28 @@
#include "colpartitiongrid.h"
#include "colpartitionset.h"
#include "ocrblock.h"
#include "textlineprojection.h"
class ScrollView;
class TO_BLOCK;
class STATS;
class BLOCK_LIST;
struct Boxa;
struct Pixa;
class DENORM;
class ScrollView;
class STATS;
class TO_BLOCK;
namespace tesseract {
extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection");
class StrokeWidth;
class LineSpacing;
class TempColumn_LIST;
class ColSegment_LIST;
class ColumnGroup_LIST;
class ColPartitionSet;
class ColPartitionSet_LIST;
class ColSegment_LIST;
class ColumnGroup_LIST;
class LineSpacing;
class StrokeWidth;
class TempColumn_LIST;
class EquationDetectBase;
// The ColumnFinder class finds columns in the grid.
class ColumnFinder : public TabFind {
@ -59,25 +62,54 @@ class ColumnFinder : public TabFind {
int vertical_x, int vertical_y);
virtual ~ColumnFinder();
// Accessors for testing
const DENORM* denorm() const {
return denorm_;
}
const TextlineProjection* projection() const {
return &projection_;
}
// ======================================================================
// The main function of ColumnFinder is broken into pieces to facilitate
// optional insertion of orientation and script detection in an efficient
// way. The calling sequence IS MANDATORY however, whether or not
// OSD is being used:
// 1. Construction.
// 2. IsVerticallyAlignedText.
// 3. CorrectOrientation.
// 4. FindBlocks.
// 5. Destruction. Use of a single column finder for multiple images does not
// 2. SetupAndFilterNoise.
// 3. IsVerticallyAlignedText.
// 4. CorrectOrientation.
// 5. FindBlocks.
// 6. Destruction. Use of a single column finder for multiple images does not
// make sense.
// Throughout these steps, the ColPartitions are owned by part_grid_, which
// means that that it must be kept correct. Exception: big_parts_ owns its
// own ColPartitions.
// The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
// for a phase in FindBlocks before TransformToBlocks, when they become
// owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
// indicates more of a betrothal for the majority of layout analysis, ie
// which ColPartition will take ownership when the blobs are release from
// the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
// are part of the image regions, as they are not on any TO_BLOCK list.
// TODO(rays) break up column finder further into smaller classes, as
// there is a lot more to it than column finding now.
// ======================================================================
// Tests for vertical alignment of text (returning true if so), and
// generates a list of blobs for orientation and script detection. Note that
// the vertical alignment may be due to text whose writing direction is
// vertical, like say Japanese, or due to text whose writing direction is
// Performs initial processing on the blobs in the input_block:
// Setup the part_grid, stroke_width_, nontext_map_.
// Obvious noise blobs are filtered out and used to mark the nontext_map_.
// Initial stroke-width analysis is used to get local text alignment
// direction, so the textline projection_ map can be setup.
// On return, IsVerticallyAlignedText may be called (now optionally) to
// determine the gross textline alignment of the page.
void SetupAndFilterNoise(Pix* photo_mask_pix, TO_BLOCK* input_block);
// Tests for vertical alignment of text (returning true if so), and generates
// a list of blobs (in osd_blobs) for orientation and script detection.
// block is the single block for the whole page or rectangle to be OCRed.
// Note that the vertical alignment may be due to text whose writing direction
// is vertical, like say Japanese, or due to text whose writing direction is
// horizontal but whose text appears vertically aligned because the image is
// not the right way up.
bool IsVerticallyAlignedText(TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
@ -96,25 +128,32 @@ class ColumnFinder : public TabFind {
void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
int recognition_rotation);
// Finds the text and image blocks, returning them in the blocks and to_blocks
// lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.)
// If boxa and pixa are not NULL, they are assumed to be the output of
// ImageFinder::FindImages, and are used to generate image blocks.
// The input boxa and pixa are destroyed.
// Imageheight should be the pixel height of the original image.
// The input block is the result of a call to find_components, and contains
// the blobs found in the image. These blobs will be removed and placed
// in the output blocks, while unused ones will be deleted.
// Finds blocks of text, image, rule line, table etc, returning them in the
// blocks and to_blocks
// (Each TO_BLOCK points to the basic BLOCK and adds more information.)
// Image blocks are generated by a combination of photo_mask_pix (which may
// NOT be NULL) and the rejected text found during preliminary textline
// finding.
// The input_block is the result of a call to find_components, and contains
// the blobs found in the image or rectangle to be OCRed. These blobs will be
// removed and placed in the output blocks, while unused ones will be deleted.
// If single_column is true, the input is treated as single column, but
// it is still divided into blocks of equal line spacing/text size.
// Returns -1 if the user requested retry with more debug info.
int FindBlocks(bool single_column, int imageheight,
TO_BLOCK* block, Boxa* boxa, Pixa* pixa,
// scaled_color is scaled down by scaled_factor from the input color image,
// and may be NULL if the input was not color.
// Returns -1 if the user hits the 'd' key in the blocks window while running
// in debug mode, which requests a retry with more debug info.
int FindBlocks(bool single_column,
Pix* scaled_color, int scaled_factor,
TO_BLOCK* block, Pix* photo_mask_pix,
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
// Get the rotation required to deskew, and its inverse rotation.
void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
// Set the equation detection pointer.
void SetEquationDetect(EquationDetectBase* detect);
private:
// Displays the blob and block bounding boxes in a window called Blocks.
void DisplayBlocks(BLOCK_LIST* blocks);
@ -122,75 +161,11 @@ class ColumnFinder : public TabFind {
// best_columns_.
void DisplayColumnBounds(PartSetVector* sets);
// Converts the arrays of Box/Pix to a list of C_OUTLINE, and then to blobs.
// The output is a list of C_BLOBs for the images, but the C_OUTLINEs
// contain no data.
void ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa);
////// Functions involved in making the initial ColPartitions. /////
// Creates the initial ColPartitions, and puts them in a ColPartitionSet
// for each grid y coordinate, storing the ColPartitionSets in part_sets_.
// After creating the ColPartitonSets, attempts to merge them where they
// overlap and unique the BLOBNBOXes within.
// The return value is the number of ColPartitionSets made.
int MakeColumnPartitions();
// Partition the BLOBNBOXES horizontally at the given grid y, creating a
// ColPartitionSet which is returned. NULL is returned if there are no
// BLOBNBOXES at the given grid y.
ColPartitionSet* PartitionsAtGridY(int grid_y);
// Insert the blobs in the given list into the main grid and for
// each one also make it a separate unknown partition.
// If filter is true, use only the blobs that are above a threshold in
// size or a non-isolated.
void InsertSmallBlobsAsUnknowns(bool filter, BLOBNBOX_LIST* blobs);
// Helper function for PartitionsAtGridY, with a long argument list.
// This bbox is of unknown type, so it is added to an unk_partition.
// If the edge is past the unk_right_margin then unk_partition has to be
// completed and a new one made. See CompletePartition and StartPartition
// for the other args.
void ProcessUnknownBlob(int page_edge, BLOBNBOX* bbox,
ColPartition** unk_partition,
ColPartition_IT* unk_part_it,
TabVector** unk_right_line,
int* unk_right_margin,
int* unk_prev_margin,
bool* unk_edge_is_left);
// Creates and returns a new ColPartition of the given start_type
// and adds the given bbox to it.
// Also finds the left and right tabvectors that bound the textline, setting
// the members of the returned ColPartition appropriately:
// If the left tabvector is less constraining than the input left_margin
// (assumed to be the right edge of the previous partition), then the
// tabvector is ignored and the left_margin used instead.
// If the right tabvector is more constraining than the input *right_margin,
// (probably the right edge of the page), then the *right_margin is adjusted
// to use the tabvector.
// *edge_is_left is set to true if the right tabvector is good and used as the
// margin, so we can include blobs that overhang the tabvector in this
// partition.
ColPartition* StartPartition(BlobRegionType start_type, int left_margin,
BLOBNBOX* bbox, TabVector** right_line,
int* right_margin, bool* edge_is_left);
// Completes the given partition, and adds it to the given iterator.
// The right_margin on input is the left edge of the next blob if there is
// one. The right tab vector plus a margin is used as the right margin if
// it is more constraining than the next blob, but if there are no more
// blobs, we want the right margin to make it to the page edge.
// The return value is the next left margin, being the right edge of the
// bounding box of blobs.
int CompletePartition(bool no_more_blobs, int page_edge,
TabVector* right_line, int* right_margin,
ColPartition** partition, ColPartition_IT* part_it);
////// Functions involved in determining the columns used on the page. /////
// Makes an ordered list of candidates to partition the width of the page
// into columns using the part_sets_.
// See AddToColumnSetsIfUnique for the ordering.
// If single_column, then it just makes a single page-wide fake column.
void MakeColumnCandidates(bool single_column);
// Sets up column_sets_ (the determined column layout at each horizontal
// slice). Returns false if the page is empty.
bool MakeColumns(bool single_column);
// Attempt to improve the column_candidates by expanding the columns
// and adding new partitions from the partition sets in src_sets.
// Src_sets may be equal to column_candidates, in which case it will
@ -201,10 +176,10 @@ class ColumnFinder : public TabFind {
void PrintColumnCandidates(const char* title);
// Finds the optimal set of columns that cover the entire image with as
// few changes in column partition as possible.
void AssignColumns();
void AssignColumns(const PartSetVector& part_sets);
// Finds the biggest range in part_sets_ that has no assigned column, but
// column assignment is possible.
bool BiggestUnassignedRange(const bool* any_columns_possible,
bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible,
int* start, int* end);
// Finds the modal compatible column_set_ index within the given range.
int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
@ -236,19 +211,21 @@ class ColumnFinder : public TabFind {
//////// Functions that manipulate ColPartitions in the part_grid_ /////
//////// to split, merge, find margins, and find types. //////////////
// Removes the ColPartitions from part_sets_, the ColPartitionSets that
// contain them, and puts them in the part_grid_ after ensuring that no
// BLOBNBOX is owned by more than one of them.
void MovePartitionsToGrid();
// Hoovers up all un-owned blobs and deletes them.
// The rest get released from the block so the ColPartitions can pass
// ownership to the output blocks.
void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block);
// Splits partitions that cross columns where they have nothing in the gap.
void GridSplitPartitions();
// Merges partitions where there is vertical overlap, within a single column,
// and the horizontal gap is small enough.
void GridMergePartitions();
// Resolves unknown partitions from the unknown_parts_ list by merging them
// with a close neighbour, inserting them into the grid with a known type,
// or declaring them to be noise.
void GridInsertUnknowns();
// Inserts remaining noise blobs into the most applicable partition if any.
// If there is no applicable partition, then the blobs are deleted.
void InsertRemainingNoise(TO_BLOCK* block);
// Remove partitions that come from horizontal lines that look like
// underlines, but are not part of a table.
void GridRemoveUnderlinePartitions();
// Add horizontal line separators as partitions.
void GridInsertHLinePartitions();
// Add vertical line separators as partitions.
@ -272,22 +249,34 @@ class ColumnFinder : public TabFind {
// Transform the grid of partitions to the output blocks.
void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
// Reflect the blob boxes (but not the outlines) in the y-axis so that
// the blocks get created in the correct RTL order. Rotates the blobs
// in the input_block and the bblobs list.
// The reflection is undone in RotateAndReskewBlocks by
// reflecting the blocks themselves, and then recomputing the blob bounding
// boxes.
void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs);
// Undo the deskew that was done in FindTabVectors, as recognition is done
// without correcting blobs or blob outlines for skew.
// Reskew the completed blocks to put them back to the original rotated coords
// that were created by CorrectOrientation.
// If the input_is_rtl, then reflect the blocks in the y-axis to undo the
// reflection that was done before FindTabVectors.
// Blocks that were identified as vertical text (relative to the rotated
// coordinates) are further rotated so the text lines are horizontal.
// blob polygonal outlines are rotated to match the position of the blocks
// that they are in, and their bounding boxes are recalculated to be accurate.
// Record appropriate inverse transformations and required
// classifier transformation in the blocks.
void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks);
void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks);
// Move all the small and noise blobs into the main blobs list of
// the block from the to_blocks list that contains them.
void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks);
// Computes the rotations for the block (to make textlines horizontal) and
// for the blobs (for classification) and sets the appropriate members
// of the given block.
// Returns the rotation that needs to be applied to the blobs to make
// them sit in the rotated block.
FCOORD ComputeBlockAndClassifyRotation(BLOCK* block);
// The minimum gutter width to apply for finding columns.
// Modified when vertical text is detected to prevent detection of
@ -305,9 +294,6 @@ class ColumnFinder : public TabFind {
FCOORD rerotate_;
// The additional rotation vector needed to rotate text for recognition.
FCOORD text_rotation_;
// The part_sets_ are the initial text-line-like partition of the grid,
// and is a vector of ColPartitionSets.
PartSetVector part_sets_;
// The column_sets_ contain the ordered candidate ColPartitionSets that
// define the possible divisions of the page into columns.
PartSetVector column_sets_;
@ -322,14 +308,31 @@ class ColumnFinder : public TabFind {
// turned into regions, but are kept around because they are referenced
// by the part_grid_.
ColPartition_LIST good_parts_;
// List of ColPartitions of unknown type.
ColPartition_LIST unknown_parts_;
// List of ColPartitions that are big and might be dropcap or vertically
// joined.
ColPartition_LIST big_parts_;
// List of ColPartitions that have been declared noise.
ColPartition_LIST noise_parts_;
// The fake blobs that are made from the input boxa/pixa pair.
// The fake blobs that are made from the images.
BLOBNBOX_LIST image_bblobs_;
// Horizontal line separators.
TabVector_LIST horizontal_lines_;
// Image map of photo/noise areas on the page.
Pix* nontext_map_;
// Textline projection map.
TextlineProjection projection_;
// Sequence of DENORMS that indicate how to get back to the original image
// coordinate space. The destructor must delete all the DENORMs in the chain.
DENORM* denorm_;
// Various debug windows that automatically go away on completion.
ScrollView* input_blobs_win_;
// The equation region detector pointer. Note: This pointer is passed in by
// member function SetEquationDetect, and releasing it is NOT owned by this
// class.
EquationDetectBase* equation_detect_;
// Allow a subsequent instance to reuse the blocks window.
// Not thread-safe, but multiple threads shouldn't be using windows anyway.
static ScrollView* blocks_win_;

File diff suppressed because it is too large Load Diff

View File

@ -90,7 +90,18 @@ class ColPartition : public ELIST2_LINK {
// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
// the ColPartition owns the BLOBNBOX!!!
// Call DeleteBoxes before deleting the ColPartition.
static ColPartition* FakePartition(const TBOX& box);
static ColPartition* FakePartition(const TBOX& box,
PolyBlockType block_type,
BlobRegionType blob_type,
BlobTextFlowType flow);
// Constructs and returns a ColPartition with the given real BLOBNBOX,
// and sets it up to be a "big" partition (single-blob partition bigger
// than the surrounding text that may be a dropcap, two or more vertically
// touching characters, or some graphic element.
// If the given list is not NULL, the partition is also added to the list.
static ColPartition* MakeBigPartition(BLOBNBOX* box,
ColPartition_LIST* big_part_list);
~ColPartition();
@ -116,6 +127,12 @@ class ColPartition : public ELIST2_LINK {
int median_bottom() const {
return median_bottom_;
}
int median_left() const {
return median_left_;
}
int median_right() const {
return median_right_;
}
int median_size() const {
return median_size_;
}
@ -185,6 +202,12 @@ class ColPartition : public ELIST2_LINK {
void set_working_set(WorkingPartSet* working_set) {
working_set_ = working_set;
}
bool block_owned() const {
return block_owned_;
}
void set_block_owned(bool owned) {
block_owned_ = owned;
}
bool desperately_merged() const {
return desperately_merged_;
}
@ -342,33 +365,39 @@ class ColPartition : public ELIST2_LINK {
bool HOverlaps(const ColPartition& other) const {
return bounding_box_.x_overlap(other.bounding_box_);
}
// Returns true if this and other can be combined without putting a
// horizontal step in either left or right edge.
bool HCompatible(const ColPartition& other) const {
return left_margin_ <= other.bounding_box_.left() &&
bounding_box_.left() >= other.left_margin_ &&
bounding_box_.right() <= other.right_margin_ &&
right_margin_ >= other.bounding_box_.right();
// Returns true if this and other's bounding boxes overlap vertically.
// TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
bool VOverlaps(const ColPartition& other) const {
return bounding_box_.y_gap(other.bounding_box_) < 0;
}
// Returns the vertical overlap (by median) of this and other.
// WARNING! Only makes sense on horizontal partitions!
int VOverlap(const ColPartition& other) const {
int VCoreOverlap(const ColPartition& other) const {
return MIN(median_top_, other.median_top_) -
MAX(median_bottom_, other.median_bottom_);
}
// Returns the horizontal overlap (by median) of this and other.
// WARNING! Only makes sense on vertical partitions!
int HOverlap(const ColPartition& other) const {
int HCoreOverlap(const ColPartition& other) const {
return MIN(median_right_, other.median_right_) -
MAX(median_left_, other.median_left_);
}
// Returns true if this and other overlap significantly vertically.
bool VOverlaps(const ColPartition& other) const {
int overlap = VOverlap(other);
// WARNING! Only makes sense on horizontal partitions!
bool VSignificantCoreOverlap(const ColPartition& other) const {
int overlap = VCoreOverlap(other);
int height = MIN(median_top_ - median_bottom_,
other.median_top_ - other.median_bottom_);
return overlap * 3 > height;
}
// Returns true if this and other can be combined without putting a
// horizontal step in either left or right edge of the resulting block.
bool WithinSameMargins(const ColPartition& other) const {
return left_margin_ <= other.bounding_box_.left() &&
bounding_box_.left() >= other.left_margin_ &&
bounding_box_.right() <= other.right_margin_ &&
right_margin_ >= other.bounding_box_.right();
}
// Returns true if the region types (aligned_text_) match.
// Lines never match anything, as they should never be merged or chained.
bool TypesMatch(const ColPartition& other) const {
@ -379,6 +408,13 @@ class ColPartition : public ELIST2_LINK {
!BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
}
// Returns true if the types are similar to each other.
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
return (type1 == type2 ||
(type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
(type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
}
// Returns true if partitions is of horizontal line type
bool IsLineType() const {
return PTIsLineType(type_);
@ -430,8 +466,8 @@ class ColPartition : public ELIST2_LINK {
TBOX BoundsWithoutBox(BLOBNBOX* box);
// Claims the boxes in the boxes_list by marking them with a this owner
// pointer. If a box is already owned, then run Unique on it.
void ClaimBoxes(WidthCallback* cb);
// pointer.
void ClaimBoxes();
// NULL the owner of the blobs in this partition, so they can be deleted
// independently of the ColPartition.
@ -440,6 +476,12 @@ class ColPartition : public ELIST2_LINK {
// Delete the boxes that this partition owns.
void DeleteBoxes();
// Reflects the partition in the y-axis, assuming that its blobs have
// already been done. Corrects only a limited part of the members, since
// this function is assumed to be used shortly after initial creation, which
// is before a lot of the members are used.
void ReflectInYAxis();
// Returns true if this is a legal partition - meaning that the conditions
// left_margin <= bounding_box left
// left_key <= bounding box left key
@ -451,6 +493,9 @@ class ColPartition : public ELIST2_LINK {
// Returns true if the left and right edges are approximately equal.
bool MatchingColumns(const ColPartition& other) const;
// Returns true if the colors match for two text partitions.
bool MatchingTextColor(const ColPartition& other) const;
// Returns true if the sizes match for two text partitions,
// taking orientation into account
bool MatchingSizes(const ColPartition& other) const;
@ -482,6 +527,19 @@ class ColPartition : public ELIST2_LINK {
// Returns the right rule line x coord of the rightmost blob.
int RightBlobRule() const;
// Returns the density value for a particular BlobSpecialTextType.
float SpecialBlobsDensity(const BlobSpecialTextType type) const;
// Returns the number of blobs for a particular BlobSpecialTextType.
int SpecialBlobsCount(const BlobSpecialTextType type);
// Set the density value for a particular BlobSpecialTextType, should ONLY be
// used for debugging or testing. In production code, use
// ComputeSpecialBlobsDensity instead.
void SetSpecialBlobsDensity(
const BlobSpecialTextType type, const float density);
// Compute the SpecialTextType density of blobs, where we assume
// that the SpecialTextType in the boxes_ has been set.
void ComputeSpecialBlobsDensity();
// Add a partner above if upper, otherwise below.
// Add them uniquely and keep the list sorted by box left.
// Partnerships are added symmetrically to partner and this.
@ -496,9 +554,6 @@ class ColPartition : public ELIST2_LINK {
// Merge with the other partition and delete it.
void Absorb(ColPartition* other, WidthCallback* cb);
// Shares out any common boxes amongst the partitions, ensuring that no
// box stays in both. Returns true if anything was done.
bool Unique(ColPartition* other, WidthCallback* cb);
// Returns true if the overlap between this and the merged pair of
// merge candidates is sufficiently trivial to be allowed.
// The merged box can graze the edge of this by the ok_box_overlap
@ -551,10 +606,20 @@ class ColPartition : public ELIST2_LINK {
// Leader detection is limited to sequences of identical width objects,
// such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
bool MarkAsLeaderIfMonospaced();
// Given the result of TextlineProjection::EvaluateColPartition, (positive for
// horizontal text, negative for vertical text, and near zero for non-text),
// sets the blob_type_ and flow_ for this partition to indicate whether it
// is strongly or weakly vertical or horizontal text, or non-text.
void SetRegionAndFlowTypesFromProjectionValue(int value);
// Sets all blobs with the partition blob type and flow.
// Sets all blobs with the partition blob type and flow, but never overwrite
// leader blobs, as we need to be able to identify them later.
void SetBlobTypes();
// Returns true if a decent baseline can be fitted through the blobs.
// Works for both horizontal and vertical text.
bool HasGoodBaseline();
// Adds this ColPartition to a matching WorkingPartSet if one can be found,
// otherwise starts a new one in the appropriate column, ending the previous.
void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
@ -579,6 +644,13 @@ class ColPartition : public ELIST2_LINK {
ColPartition_LIST* block_parts,
ColPartition_LIST* used_parts);
// Constructs a block from the given list of vertical text partitions.
// Currently only creates rectangular blocks.
static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft,
const ICOORD& tright,
ColPartition_LIST* block_parts,
ColPartition_LIST* used_parts);
// Returns a copy of everything except the list of boxes. The resulting
// ColPartition is only suitable for keeping in a column candidate list.
@ -769,6 +841,8 @@ class ColPartition : public ELIST2_LINK {
ColPartition_CLIST lower_partners_;
// The WorkingPartSet it lives in while blocks are being made.
WorkingPartSet* working_set_;
// Flag is true when AddBox is sorting vertically, false otherwise.
bool last_add_was_vertical_;
// True when the partition's ownership has been taken from the grid and
// placed in a working set, or, after that, in the good_parts_ list.
bool block_owned_;
@ -809,6 +883,8 @@ class ColPartition : public ELIST2_LINK {
uinT8 color1_[kRGBRMSColors];
uinT8 color2_[kRGBRMSColors];
bool owns_blobs_; // Does the partition own its blobs?
// The density of special blobs.
float special_blobs_densities_[BSTT_COUNT];
};
// Typedef it now in case it becomes a class later.

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@
#include "bbgrid.h"
#include "colpartition.h"
#include "colpartitionset.h"
namespace tesseract {
@ -36,11 +37,32 @@ class ColPartitionGrid : public BBGrid<ColPartition,
ColPartitionGrid();
ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
~ColPartitionGrid();
virtual ~ColPartitionGrid();
// Handles a click event in a display window.
void HandleClick(int x, int y);
// Merges ColPartitions in the grid that look like they belong in the same
// textline.
// For all partitions in the grid, calls the box_cb permanent callback
// to compute the search box, seaches the box, and if a candidate is found,
// calls the confirm_cb to check any more rules. If the confirm_cb returns
// true, then the partitions are merged.
// Both callbacks are deleted before returning.
void Merges(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
TessResultCallback2<bool, const ColPartition*,
const ColPartition*>* confirm_cb);
// For the given partition, calls the box_cb permanent callback
// to compute the search box, searches the box, and if a candidate is found,
// calls the confirm_cb to check any more rules. If the confirm_cb returns
// true, then the partitions are merged.
// Returns true if the partition is consumed by one or more merges.
bool MergePart(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
TessResultCallback2<bool, const ColPartition*,
const ColPartition*>* confirm_cb,
ColPartition* part);
// Finds all the ColPartitions in the grid that overlap with the given
// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
// Any partition equal to not_this (may be NULL) is excluded.
@ -59,6 +81,68 @@ class ColPartitionGrid : public BBGrid<ColPartition,
const ColPartition*>* confirm_cb,
int* overlap_increase);
// Split partitions where it reduces overlap between their bounding boxes.
// ColPartitions are after all supposed to be a partitioning of the blobs
// AND of the space on the page!
// Blobs that cause overlaps get removed, put in individual partitions
// and added to the big_parts list. They are most likely characters on
// 2 textlines that touch, or something big like a dropcap.
void SplitOverlappingPartitions(ColPartition_LIST* big_parts);
// Filters partitions of source_type by looking at local neighbours.
// Where a majority of neighbours have a text type, the partitions are
// changed to text, where the neighbours have image type, they are changed
// to image, and partitions that have no definite neighbourhood type are
// left unchanged.
// im_box and rerotation are used to map blob coordinates onto the
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if anything was changed.
bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map,
const TBOX& im_box, const FCOORD& rerotation);
// Compute the mean RGB of the light and dark pixels in each ColPartition
// and also the rms error in the linearity of color.
void ComputePartitionColors(Pix* scaled_color, int scaled_factor,
const FCOORD& rerotation);
// Reflects the grid and its colpartitions in the y-axis, assuming that
// all blob boxes have already been done.
void ReflectInYAxis();
// Rotates the grid and its colpartitions by the given angle, assuming that
// all blob boxes have already been done.
void Deskew(const FCOORD& deskew);
// Sets the left and right tabs of the partitions in the grid.
void SetTabStops(TabFind* tabgrid);
// Makes the ColPartSets and puts them in the PartSetVector ready
// for finding column bounds. Returns false if no partitions were found.
// Each ColPartition in the grid is placed in a single ColPartSet based
// on the bottom-left of its bounding box.
bool MakeColPartSets(PartSetVector* part_sets);
// Makes a single ColPartitionSet consisting of a single ColPartition that
// represents the total horizontal extent of the significant content on the
// page. Used for the single column setting in place of automatic detection.
// Returns NULL if the page is empty of significant content.
ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb);
// Mark the BLOBNBOXes in each partition as being owned by that partition.
void ClaimBoxes();
// Retypes all the blobs referenced by the partitions in the grid.
// Image blobs are sliced on the grid boundaries to give the tab finder
// a better handle on the edges of the images, and the actual blobs are
// returned in the im_blobs list, as they are not owned by the block.
void ReTypeBlobs(BLOBNBOX_LIST* im_blobs);
// The boxes within the partitions have changed (by deskew) so recompute
// the bounds of all the partitions and reinsert them into the grid.
void RecomputeBounds(int gridsize, const ICOORD& bleft,
const ICOORD& tright, const ICOORD& vertical);
// Improves the margins of the ColPartitions in the grid by calling
// FindPartitionMargins on each.
void GridFindMargins(ColPartitionSet** best_columns);
@ -68,6 +152,13 @@ class ColPartitionGrid : public BBGrid<ColPartition,
void ListFindMargins(ColPartitionSet** best_columns,
ColPartition_LIST* parts);
// Deletes all the partitions in the grid after disowning all the blobs.
void DeleteParts();
// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
// all the blobs in them.
void DeleteUnknownParts(TO_BLOCK* block);
// Finds and marks text partitions that represent figure captions.
void FindFigureCaptions();
@ -78,12 +169,64 @@ class ColPartitionGrid : public BBGrid<ColPartition,
// Finds the best partner in the given direction for the given partition.
// Stores the result with AddPartner.
void FindPartitionPartners(bool upper, ColPartition* part);
// Finds the best partner in the given direction for the given partition.
// Stores the result with AddPartner.
void FindVPartitionPartners(bool to_the_left, ColPartition* part);
// For every ColPartition with multiple partners in the grid, reduces the
// number of partners to 0 or 1. If get_desperate is true, goes to more
// desperate merge methods to merge flowing text before breaking partnerships.
void RefinePartitionPartners(bool get_desperate);
private:
// Finds and returns a list of candidate ColPartitions to merge with part.
// The candidates must overlap search_box, and when merged must not
// overlap any other partitions that are not overlapped by each individually.
void FindMergeCandidates(const ColPartition* part, const TBOX& search_box,
bool debug, ColPartition_CLIST* candidates);
// Smoothes the region type/flow type of the given part by looking at local
// neigbours and the given image mask. Searches a padded rectangle with the
// padding truncated on one size of the part's box in turn for each side,
// using the result (if any) that has the least distance to all neighbours
// that contribute to the decision. This biases in favor of rectangular
// regions without completely enforcing them.
// If a good decision cannot be reached, the part is left unchanged.
// im_box and rerotation are used to map blob coordinates onto the
// nontext_map, which is used to prevent the spread of text neighbourhoods
// into images.
// Returns true if the partition was changed.
bool SmoothRegionType(Pix* nontext_map,
const TBOX& im_box,
const FCOORD& rerotation,
bool debug,
ColPartition* part);
// Executes the search for SmoothRegionType in a single direction.
// Creates a bounding box that is padded in all directions except direction,
// and searches it for other partitions. Finds the nearest collection of
// partitions that makes a decisive result (if any) and returns the type
// and the distance of the collection. If there are any pixels in the
// nontext_map, then the decision is biased towards image.
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
Pix* nontext_map,
const TBOX& im_box,
const FCOORD& rerotation,
bool debug,
const ColPartition& part,
int* best_distance);
// Counts the partitions in the given search_box by appending the gap
// distance (scaled by dist_scaling) of the part from the base_part to the
// vector of the appropriate type for the partition. Prior to return, the
// vectors in the dists array are sorted in increasing order.
// dists must be an array of GenericVectors of size NPT_COUNT.
void AccumulatePartDistances(const ColPartition& base_part,
const ICOORD& dist_scaling,
const TBOX& search_box,
Pix* nontext_map,
const TBOX& im_box,
const FCOORD& rerotation,
bool debug,
GenericVector<int>* dists);
// Improves the margins of the ColPartition by searching for
// neighbours that vertically overlap significantly.
void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);

View File

@ -66,79 +66,13 @@ ColPartition* ColPartitionSet::ColumnContaining(int x, int y) {
return NULL;
}
// Insert the ColPartitions in our list into the given grid.
void ColPartitionSet::ReturnParts(ColPartition_LIST* parts) {
ColPartition_IT it(parts);
it.add_list_before(&parts_);
}
// Merge any significantly overlapping partitions within the this and other,
// and unique the boxes so that no two partitions use the same box.
// Return true if any changes were made to either set.
bool ColPartitionSet::MergeOverlaps(ColPartitionSet* other, WidthCallback* cb) {
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
bounding_box_.bottom()) ||
TabFind::WithinTestRegion(2, other->bounding_box_.left(),
other->bounding_box_.bottom());
if (debug) {
tprintf("Considering merge on:\n");
Print();
other->Print();
// Extract all the parts from the list, relinquishing ownership.
void ColPartitionSet::RelinquishParts() {
ColPartition_IT it(&parts_);
while (!it.empty()) {
it.extract();
it.forward();
}
ColPartition_IT it1(&parts_);
ColPartition_IT it2(&other->parts_);
bool any_merged = false;
it1.mark_cycle_pt();
it2.mark_cycle_pt();
// Iterate the two lists in parallel, using the fact that they are
// sorted by x-coord to keep the iterators in sync.
while (!it1.cycled_list() && !it2.cycled_list()) {
any_merged = false;
ColPartition* part1 = it1.data();
ColPartition* part2 = it2.data();
if (debug) {
tprintf("Vover=%d, HOver=%d, Hcompatible=%d, typesmatch=%d\n",
part1->VOverlaps(*part2), part1->HOverlaps(*part2),
part1->HCompatible(*part2), part1->TypesMatch(*part2));
}
if (part1->VOverlaps(*part2) &&
part1->HCompatible(*part2) && part1->TypesMatch(*part2)) {
// Partitions seem to be mergeable, so absorb part1 into part2.
part1->Absorb(it2.extract(), cb);
any_merged = true;
it1.forward();
it2.forward();
} else if (part1->HOverlaps(*part2) && part1->TypesMatch(*part2) &&
part1->Unique(part2, cb)) {
// Unique moved some boxes, so check to see in either partition was
// left empty. If not, any_merged is not set true.
if (part1->IsEmpty()) {
any_merged = true;
delete it1.extract();
it1.forward();
continue;
}
if (part2->IsEmpty()) {
any_merged = true;
delete it2.extract();
it2.forward();
continue;
}
}
if (!any_merged) {
// Move on the iterator that point to the leftmost partition.
if (part1->IsLeftOf(*part2)) {
it1.forward();
} else {
it2.forward();
}
}
}
if (any_merged) {
ComputeCoverage();
other->ComputeCoverage();
}
return any_merged;
}
// Attempt to improve this by adding partitions or expanding partitions.
@ -245,13 +179,13 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
}
for (int i = 0; i < column_sets->size(); ++i) {
ColPartitionSet* columns = column_sets->get(i);
// In ordering the column set candidates, total_coverage_ is king,
// followed by good_column_count_ and then total column_count.
bool better = total_coverage_ > columns->total_coverage_;
if (total_coverage_ == columns->total_coverage_) {
// In ordering the column set candidates, good_coverage_ is king,
// followed by good_column_count_ and then bad_coverage_.
bool better = good_coverage_ > columns->good_coverage_;
if (good_coverage_ == columns->good_coverage_) {
better = good_column_count_ > columns->good_column_count_;
if (good_column_count_ == columns->good_column_count_) {
better = parts_.length() > columns->parts_.length();
better = bad_coverage_ > columns->bad_coverage_;
}
}
if (better) {
@ -278,7 +212,7 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
WidthCallback* cb) {
if (debug) {
tprintf("CompatibleColumns testing compability\n");
tprintf("CompatibleColumns testing compatibility\n");
Print();
other->Print();
}
@ -295,7 +229,7 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
tprintf("CompatibleColumns ignoring image partition\n");
part->Print();
}
continue; // Image partitions are irrelevant to column compability.
continue; // Image partitions are irrelevant to column compatibility.
}
int y = part->MidY();
int left = part->bounding_box().left();
@ -331,30 +265,15 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
ColPartition* next_left_col = ColumnContaining(next_left, y);
if (right_col == next_left_col) {
// There is a column break in this column.
// Check for the difference between different column layout and
// a pull-out block.
int part_box_width = part->bounding_box().width();
int part_margin_width = part->right_margin() - part->left_margin();
int next_box_width = next_part->bounding_box().width();
int next_margin_width = next_part->right_margin() -
next_part->left_margin();
int next_right = next_part->bounding_box().right();
if (part_box_width < next_margin_width &&
next_box_width < part_margin_width) {
// This can be due to a figure caption within a column, a pull-out
// block, or a simple broken textline that remains to be merged:
// all allowed, or a change in column layout: not allowed.
// If both partitions are of good width, then it is likely
// a change in column layout, otherwise probably an allowed situation.
if (part->good_width() && next_part->good_width()) {
if (debug) {
tprintf("CompatibleColumns false due to equal sized columns\n");
tprintf("part1 %d-%d = %d, part2 %d-%d = %d\n",
left, right, part->ColumnWidth(),
next_left, next_right, next_part->ColumnWidth());
right_col->Print();
}
return false; // Must be a new column layout as they are equal size.
}
ColPartition* next_right_col = ColumnContaining(next_right, y);
if (left_col == right_col && next_right_col == next_left_col) {
// Column completely contains both. Not allowed.
if (debug) {
tprintf("CompatibleColumns false due to containing 2 partitions\n");
int next_right = next_part->bounding_box().right();
tprintf("CompatibleColumns false due to 2 parts of good width\n");
tprintf("part1 %d-%d, part2 %d-%d\n",
left, right, next_left, next_right);
right_col->Print();
@ -654,8 +573,9 @@ void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width,
// Provide debug output for this ColPartitionSet and all the ColPartitions.
void ColPartitionSet::Print() {
ColPartition_IT it(&parts_);
tprintf("Partition set of %d parts, %d good, coverage=%d (%d,%d)->(%d,%d)\n",
it.length(), good_column_count_, total_coverage_,
tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
" (%d,%d)->(%d,%d)\n",
it.length(), good_column_count_, good_coverage_, bad_coverage_,
bounding_box_.left(), bounding_box_.bottom(),
bounding_box_.right(), bounding_box_.top());
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
@ -669,13 +589,7 @@ void ColPartitionSet::Print() {
// Add the given partition to the list in the appropriate place.
void ColPartitionSet::AddPartition(ColPartition* new_part,
ColPartition_IT* it) {
bounding_box_ += new_part->bounding_box();
if (new_part->good_column() || new_part->good_width()) {
total_coverage_ += new_part->ColumnWidth();
++good_column_count_;
if (new_part->good_width())
++good_column_count_;
}
AddPartitionCoverageAndBox(*new_part);
int new_right = new_part->right_key();
if (it->data()->left_key() >= new_right)
it->add_before_stay_put(new_part);
@ -683,22 +597,50 @@ void ColPartitionSet::AddPartition(ColPartition* new_part,
it->add_after_stay_put(new_part);
}
// Compute the coverage and good column count.
// Compute the coverage and good column count. Coverage is the amount of the
// width of the page (in pixels) that is covered by ColPartitions, which are
// used to provide candidate column layouts.
// Coverage is split into good and bad. Good coverage is provided by
// ColPartitions of a frequent width (according to the callback function
// provided by TabFinder::WidthCB, which accesses stored statistics on the
// widths of ColParititions) and bad coverage is provided by all other
// ColPartitions, even if they have tab vectors at both sides. Thus:
// |-----------------------------------------------------------------|
// | Double width heading |
// |-----------------------------------------------------------------|
// |-------------------------------| |-------------------------------|
// | Common width ColParition | | Common width ColPartition |
// |-------------------------------| |-------------------------------|
// the layout with two common-width columns has better coverage than the
// double width heading, because the coverage is "good," even though less in
// total coverage than the heading, because the heading coverage is "bad."
void ColPartitionSet::ComputeCoverage() {
// Count the number of good columns and sum their width.
ColPartition_IT it(&parts_);
good_column_count_ = 0;
total_coverage_ = 0;
good_coverage_ = 0;
bad_coverage_ = 0;
bounding_box_ = TBOX();
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
ColPartition* part = it.data();
bounding_box_ += part->bounding_box();
if (part->good_column() || part->good_width()) {
total_coverage_ += part->ColumnWidth();
AddPartitionCoverageAndBox(*part);
}
}
// Adds the coverage, column count and box for a single partition,
// without adding it to the list. (Helper factored from ComputeCoverage.)
void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
bounding_box_ += part.bounding_box();
int coverage = part.ColumnWidth();
if (part.good_width()) {
good_coverage_ += coverage;
good_column_count_ += 2;
} else {
if (part.blob_type() < BRT_UNKNOWN)
coverage /= 2;
if (part.good_column())
++good_column_count_;
if (part->good_width())
++good_column_count_;
}
bad_coverage_ += coverage;
}
}

View File

@ -66,13 +66,8 @@ class ColPartitionSet : public ELIST_LINK {
// Return the bounding boxes of columns at the given y-range
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
// Move the parts to the output list, giving up ownership.
void ReturnParts(ColPartition_LIST* parts);
// Merge any significantly overlapping partitions within the this and other,
// and unique the boxes so that no two partitions use the same box.
// Return true if any changes were made to either set.
bool MergeOverlaps(ColPartitionSet* other, WidthCallback* cb);
// Extract all the parts from the list, relinquishing ownership.
void RelinquishParts();
// Attempt to improve this by adding partitions or expanding partitions.
void ImproveColumnCandidate(WidthCallback* cb, PartSetVector* src_sets);
@ -133,15 +128,37 @@ class ColPartitionSet : public ELIST_LINK {
// Add the given partition to the list in the appropriate place.
void AddPartition(ColPartition* new_part, ColPartition_IT* it);
// Compute the coverage and good column count.
// Compute the coverage and good column count. Coverage is the amount of the
// width of the page (in pixels) that is covered by ColPartitions, which are
// used to provide candidate column layouts.
// Coverage is split into good and bad. Good coverage is provided by
// ColPartitions of a frequent width (according to the callback function
// provided by TabFinder::WidthCB, which accesses stored statistics on the
// widths of ColParititions) and bad coverage is provided by all other
// ColPartitions, even if they have tab vectors at both sides. Thus:
// |-----------------------------------------------------------------|
// | Double width heading |
// |-----------------------------------------------------------------|
// |-------------------------------| |-------------------------------|
// | Common width ColParition | | Common width ColPartition |
// |-------------------------------| |-------------------------------|
// the layout with two common-width columns has better coverage than the
// double width heading, because the coverage is "good," even though less in
// total coverage than the heading, because the heading coverage is "bad."
void ComputeCoverage();
// Adds the coverage, column count and box for a single partition,
// without adding it to the list. (Helper factored from ComputeCoverage.)
void AddPartitionCoverageAndBox(const ColPartition& part);
// The partitions in this column candidate.
ColPartition_LIST parts_;
// The number of partitions that have a frequent column width.
int good_column_count_;
// Total width of all the ColPartitions.
int total_coverage_;
// Total width of all the good ColPartitions.
int good_coverage_;
// Total width of all the bad ColPartitions.
int bad_coverage_;
// Bounding box of all partitions in the set.
TBOX bounding_box_;
};

View File

@ -165,36 +165,6 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {
return true;
}
// This method changes the input page image and pix_binary to be the same as
// the splitted image owned by this object.
// Any of the parameters can be NULL.
void ShiroRekhaSplitter::CopySplittedImageTo(IMAGE* page_image,
Pix** pix_binary) const {
ASSERT_HOST(splitted_image_);
if (pix_binary) {
pixDestroy(pix_binary);
*pix_binary = pixClone(splitted_image_);
}
if (page_image) {
page_image->FromPix(splitted_image_);
}
}
// This method changes the input page image and pix_binary to be the same as
// the original image provided to this object.
// Any of the parameters can be NULL.
void ShiroRekhaSplitter::CopyOriginalImageTo(IMAGE* page_image,
Pix** pix_binary) const {
ASSERT_HOST(orig_pix_);
if (pix_binary) {
pixDestroy(pix_binary);
*pix_binary = pixClone(orig_pix_);
}
if (page_image) {
page_image->FromPix(orig_pix_);
}
}
// Method to perform a close operation on the input image. The xheight
// estimate decides the size of sel used.
void ShiroRekhaSplitter::PerformClose(Pix* pix, int xheight_estimate) {
@ -395,7 +365,8 @@ void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(
C_BLOB_LIST not_found_blobs;
RefreshWordBlobsFromNewBlobs(segmentation_block_list_,
new_blobs,
&not_found_blobs);
((devanagari_split_debugimage && debug_image_) ?
&not_found_blobs : NULL));
if (devanagari_split_debuglevel > 0) {
tprintf("After refreshing blobs:\n");
@ -525,4 +496,4 @@ void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) {
numaDestroy(&counts);
}
}
} // namespace tesseract.

View File

@ -80,16 +80,6 @@ class ShiroRekhaSplitter {
// splitting. If false, the ocr_split_strategy_ is used.
bool Split(bool split_for_pageseg);
// This method changes the input page image and pix_binary to be the same as
// the splitted image owned by this object.
// Any of the parameters can be NULL.
void CopySplittedImageTo(IMAGE* page_image, Pix** pix_binary) const;
// This method changes the input page image and pix_binary to be the same as
// the original image provided to this object.
// Any of the parameters can be NULL.
void CopyOriginalImageTo(IMAGE* page_image, Pix** pix_binary) const;
// Clears the memory held by this object.
void Clear();
@ -212,5 +202,6 @@ class ShiroRekhaSplitter {
// performed before CCs are run through splitting.
};
}
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_

View File

@ -419,12 +419,11 @@ void empty_buckets( // find blobs
out_it.set_to_list(&outlines);
do {
parent_it = bucket_it; // find outermost
do
bucket_it.forward();
while (!bucket_it.at_first()
&& !(*parent_it.data() < *bucket_it.data()));
}
while (!bucket_it.at_first());
do {
bucket_it.forward();
} while (!bucket_it.at_first() &&
!(*parent_it.data() < *bucket_it.data()));
} while (!bucket_it.at_first());
// move to new list
out_it.add_after_then_move(parent_it.extract());

File diff suppressed because it is too large Load Diff

View File

@ -21,21 +21,40 @@
#ifndef TESSERACT_TEXTORD_IMAGEFIND_H__
#define TESSERACT_TEXTORD_IMAGEFIND_H__
#include "host.h"
struct Boxa;
struct Pix;
struct Pixa;
class TBOX;
class FCOORD;
class TO_BLOCK;
class BLOBNBOX_LIST;
namespace tesseract {
// The ImageFinder class is a simple static function wrapper class that
class ColPartitionGrid;
class ColPartition_LIST;
class TabFind;
// The ImageFind class is a simple static function wrapper class that
// exposes the FindImages function and some useful helper functions.
class ImageFinder {
class ImageFind {
public:
// Finds image regions within the source pix (page image) and returns
// the image regions as a Boxa, Pixa pair, analgous to pixConnComp.
// Finds image regions within the BINARY source pix (page image) and returns
// the image regions as a mask image.
// The returned pix may be NULL, meaning no images found.
// If not NULL, it must be PixDestroyed by the caller.
static Pix* FindImages(Pix* pix);
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
// analgous to pixConnComp, except that connected components which are nearly
// rectangular are replaced with solid rectangles.
// The returned boxa, pixa may be NULL, meaning no images found.
// If not NULL, they must be destroyed by the caller.
static void FindImages(Pix* pix, Boxa** boxa, Pixa** pixa);
// Resolution of pix should match the source image (Tesseract::pix_binary_)
// so the output coordinate systems match.
static void ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa);
// Returns true if there is a rectangle in the source pix, such that all
// pixel rows and column slices outside of it have less than
@ -54,9 +73,84 @@ class ImageFinder {
// Given an input pix, and a bounding rectangle, the sides of the rectangle
// are shrunk inwards until they bound any black pixels found within the
// original rectangle.
static void BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
// original rectangle. Returns false if the rectangle contains no black
// pixels at all.
static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
int* x_end, int* y_end);
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
// of the point from the given line, defined by a pair of points in the 3-D
// (RGB) space, line1 and line2.
static double ColorDistanceFromLine(const uinT8* line1, const uinT8* line2,
const uinT8* point);
// Returns the leptonica combined code for the given RGB triplet.
static uinT32 RGB(uinT32 r, uinT32 g, uinT32 b);
// Returns the input value clipped to a uinT8.
static uinT8 ClipToByte(double pixel);
// Computes the light and dark extremes of color in the given rectangle of
// the given pix, which is factor smaller than the coordinate system in rect.
// The light and dark points are taken to be the upper and lower 8th-ile of
// the most deviant of R, G and B. The value of the other 2 channels are
// computed by linear fit against the most deviant.
// The colors of the two point are returned in color1 and color2, with the
// alpha channel set to a scaled mean rms of the fits.
// If color_map1 is not null then it and color_map2 get rect pasted in them
// with the two calculated colors, and rms map gets a pasted rect of the rms.
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
Pix* color_map1, Pix* color_map2,
Pix* rms_map,
uinT8* color1, uinT8* color2);
// Returns true if there are no black pixels in between the boxes.
// The im_box must represent the bounding box of the pix in tesseract
// coordinates, which may be negative, due to rotations to make the textlines
// horizontal. The boxes are rotated by rotation, which should undo such
// rotations, before mapping them onto the pix.
static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2,
const TBOX& im_box, const FCOORD& rotation,
Pix* pix);
// Returns the number of pixels in box in the pix.
// The im_box must represent the bounding box of the pix in tesseract
// coordinates, which may be negative, due to rotations to make the textlines
// horizontal. The boxes are rotated by rotation, which should undo such
// rotations, before mapping them onto the pix.
static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
const FCOORD& rotation, Pix* pix);
// Locates all the image partitions in the part_grid, that were found by a
// previous call to FindImagePartitions, marks them in the image_mask,
// removes them from the grid, and deletes them. This makes it possble to
// call FindImagePartitions again to produce less broken-up and less
// overlapping image partitions.
// rerotation specifies how to rotate the partition coords to match
// the image_mask, since this function is used after orientation correction.
static void TransferImagePartsToImageMask(const FCOORD& rerotation,
ColPartitionGrid* part_grid,
Pix* image_mask);
// Runs a CC analysis on the image_pix mask image, and creates
// image partitions from them, cutting out strong text, and merging with
// nearby image regions such that they don't interfere with text.
// Rotation and rerotation specify how to rotate image coords to match
// the blob and partition coords and back again.
// The input/output part_grid owns all the created partitions, and
// the partitions own all the fake blobs that belong in the partitions.
// Since the other blobs in the other partitions will be owned by the block,
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
// situation and collect the image blobs.
static void FindImagePartitions(Pix* image_pix,
const FCOORD& rotation,
const FCOORD& rerotation,
TO_BLOCK* block,
TabFind* tab_grid,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts);
};
} // namespace tesseract.

View File

@ -34,129 +34,283 @@
#endif
#include "allheaders.h"
BOOL_VAR(textord_tabfind_show_vlines, false, "Show vertical rule lines");
namespace tesseract {
/// Denominator of resolution makes max pixel width to allow thin lines.
const int kThinLineFraction = 30;
const int kThinLineFraction = 20;
/// Denominator of resolution makes min pixels to demand line lengths to be.
const int kMinLineLengthFraction = 8;
const int kMinLineLengthFraction = 4;
/// Spacing of cracks across the page to break up tall vertical lines.
const int kCrackSpacing = 100;
/// Grid size used by line finder. Not very critical.
const int kLineFindGridSize = 50;
// Min width of a line in pixels to be considered thick.
const int kMinThickLineWidth = 12;
// Max size of line residue. (The pixels that fail the long thin opening, and
// therefore don't make it to the candidate line mask, but are nevertheless
// part of the line.)
const int kMaxLineResidue = 6;
// Min length in inches of a line segment that exceeds kMinThickLineWidth in
// thickness. (Such lines shouldn't break by simple image degradation.)
const double kThickLengthMultiple = 0.75;
// Max fraction of line box area that can be occupied by non-line pixels.
const double kMaxNonLineDensity = 0.25;
// Max height of a music stave in inches.
const double kMaxStaveHeight = 1.0;
// Minimum fraction of pixels in a music rectangle connected to the staves.
const double kMinMusicPixelFraction = 0.75;
// Finds vertical line objects in the given pix.
// Erases the unused blobs from the line_pix image, taking into account
// whether this was a horizontal or vertical line set.
static void RemoveUnusedLineSegments(bool horizontal_lines,
BLOBNBOX_LIST* line_bblobs,
Pix* line_pix) {
int height = pixGetHeight(line_pix);
BLOBNBOX_IT bbox_it(line_bblobs);
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX* blob = bbox_it.data();
if (blob->left_tab_type() == TT_MAYBE_ALIGNED) {
const TBOX& box = blob->bounding_box();
Box* pixbox = NULL;
if (horizontal_lines) {
// Horizontal lines are in tess format and also have x and y flipped
// (to use FindVerticalAlignment) so we have to flip x and y and then
// convert to Leptonica by height - flipped x (ie the right edge).
// See GetLineBoxes for more explanation.
pixbox = boxCreate(box.bottom(), height - box.right(),
box.height(), box.width());
} else {
// For vertical lines, just flip upside-down to convert to Leptonica.
// The y position of the box in Leptonica terms is the distance from
// the top of the image to the top of the box.
pixbox = boxCreate(box.left(), height - box.top(),
box.width(), box.height());
}
pixClearInRect(line_pix, pixbox);
boxDestroy(&pixbox);
}
}
}
// Helper subtracts the line_pix image from the src_pix, and removes residue
// as well by removing components that touch the line, but are not in the
// non_line_pix mask. It is assumed that the non_line_pix mask has already
// been prepared to required accuracy.
static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
int resolution, Pix* src_pix) {
// First remove the lines themselves.
pixSubtract(src_pix, src_pix, line_pix);
// Subtract the non-lines from the image to get the residue.
Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix);
// Dilate the lines so they touch the residue.
Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3);
// Seed fill the fat lines to get all the residue.
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
// Subtract the residue from the original image.
pixSubtract(src_pix, src_pix, fat_line_pix);
pixDestroy(&fat_line_pix);
pixDestroy(&residue_pix);
}
// Returns the maximum strokewidth in the given binary image by doubling
// the maximum of the distance function.
static int MaxStrokeWidth(Pix* pix) {
Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
int width = pixGetWidth(dist_pix);
int height = pixGetHeight(dist_pix);
int wpl = pixGetWpl(dist_pix);
l_uint32* data = pixGetData(dist_pix);
// Find the maximum value in the distance image.
int max_dist = 0;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int pixel = GET_DATA_BYTE(data, x);
if (pixel > max_dist)
max_dist = pixel;
}
data += wpl;
}
pixDestroy(&dist_pix);
return max_dist * 2;
}
// Returns the number of components in the intersection_pix touched by line_box.
static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
if (intersection_pix == NULL) return 0;
Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL);
Boxa* boxa = pixConnComp(rect_pix, NULL, 8);
pixDestroy(&rect_pix);
if (boxa == NULL) return false;
int result = boxaGetCount(boxa);
boxaDestroy(&boxa);
return result;
}
// Returns the number of black pixels found in the box made by adding the line
// width to both sides of the line bounding box. (Increasing the smallest
// dimension of the bounding box.)
static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
Pix* nonline_pix) {
l_int32 x, y, box_width, box_height;
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
if (box_width > box_height) {
// horizontal line.
int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width);
y = MAX(0, y - line_width);
box_height = bottom - y;
} else {
// Vertical line.
int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width);
x = MAX(0, x - line_width);
box_width = right - x;
}
Box* box = boxCreate(x, y, box_width, box_height);
Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL);
boxDestroy(&box);
l_int32 result;
pixCountPixels(rect_pix, &result, NULL);
pixDestroy(&rect_pix);
return result;
}
// Helper erases false-positive line segments from the input/output line_pix.
// 1. Since thick lines shouldn't really break up, we can eliminate some false
// positives by marking segments that are at least kMinThickLineWidth
// thickness, yet have a length less than min_thick_length.
// 2. Lines that don't have at least 2 intersections with other lines and have
// a lot of neighbouring non-lines are probably not lines (perhaps arabic
// or Hindi words, or underlines.)
// Bad line components are erased from line_pix.
// Returns the number of remaining connected components.
static int FilterFalsePositives(int resolution, Pix* nonline_pix,
Pix* intersection_pix, Pix* line_pix) {
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
Pixa* pixa = NULL;
Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
// Iterate over the boxes to remove false positives.
int nboxes = boxaGetCount(boxa);
int remaining_boxes = nboxes;
for (int i = 0; i < nboxes; ++i) {
Box* box = boxaGetBox(boxa, i, L_CLONE);
l_int32 x, y, box_width, box_height;
boxGetGeometry(box, &x, &y, &box_width, &box_height);
Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
int max_width = MaxStrokeWidth(comp_pix);
pixDestroy(&comp_pix);
bool bad_line = false;
// If the length is too short to stand-alone as a line, and the box width
// is thick enough, and the stroke width is thick enough it is bad.
if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
box_width < min_thick_length && box_height < min_thick_length &&
max_width > kMinThickLineWidth) {
// Too thick for the length.
bad_line = true;
}
if (!bad_line &&
(intersection_pix == NULL ||
NumTouchingIntersections(box, intersection_pix) < 2)) {
// Test non-line density near the line.
int nonline_count = CountPixelsAdjacentToLine(max_width, box,
nonline_pix);
if (nonline_count > box_height * box_width * kMaxNonLineDensity)
bad_line = true;
}
if (bad_line) {
// Not a good line.
pixClearInRect(line_pix, box);
--remaining_boxes;
}
boxDestroy(&box);
}
pixaDestroy(&pixa);
boxaDestroy(&boxa);
return remaining_boxes;
}
// Finds vertical and horizontal line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// If pix_music_mask != NULL, and music is detected, a mask of the staves
// and anything that is connected (bars, notes etc.) will be returned in
// pix_music_mask, the mask subtracted from pix, and the lines will not
// appear in v_lines or h_lines.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
void LineFinder::FindVerticalLines(int resolution, Pix* pix,
int* vertical_x, int* vertical_y,
TabVector_LIST* vectors) {
Pix* line_pix;
Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix);
C_BLOB_LIST line_cblobs;
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs);
// Make the BLOBNBOXes from the C_BLOBs.
BLOBNBOX_LIST line_bblobs;
C_BLOB_IT blob_it(&line_cblobs);
BLOBNBOX_IT bbox_it(&line_bblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB* cblob = blob_it.data();
BLOBNBOX* bblob = new BLOBNBOX(cblob);
bbox_it.add_to_end(bblob);
// The detected lines are removed from the pix.
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
int* vertical_x, int* vertical_y,
Pix** pix_music_mask,
TabVector_LIST* v_lines,
TabVector_LIST* h_lines) {
if (pix == NULL || vertical_x == NULL || vertical_y == NULL) {
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
return;
}
ICOORD bleft(0, 0);
ICOORD tright(width, height);
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
if (!vectors->empty()) {
// Some lines were found, so erase the unused blobs from the line image
// and then subtract the line image from the source.
bbox_it.move_to_first();
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX* blob = bbox_it.data();
if (blob->left_tab_type() == TT_UNCONFIRMED) {
const TBOX& box = blob->bounding_box();
Box* pixbox = boxCreate(box.left(), height - box.top(),
box.width(), box.height());
pixClearInRect(line_pix, pixbox);
boxDestroy(&pixbox);
}
Pix* pix_vline = NULL;
Pix* pix_non_vline = NULL;
Pix* pix_hline = NULL;
Pix* pix_non_hline = NULL;
Pix* pix_intersections = NULL;
Pixa* pixa_display = debug ? pixaCreate(0) : NULL;
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
&pix_non_hline, &pix_intersections, pix_music_mask,
pixa_display);
// Find lines, convert to TabVector_LIST and remove those that are used.
FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
&pix_vline, pix_non_vline, pix, v_lines);
if (pix_hline != NULL) {
// Recompute intersections and re-filter false positive h-lines.
if (pix_vline != NULL)
pixAnd(pix_intersections, pix_vline, pix_hline);
else
pixDestroy(&pix_intersections);
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
pix_hline)) {
pixDestroy(&pix_hline);
}
pixDilateBrick(line_pix, line_pix, 1, 3);
pixSubtract(pix, pix, line_pix);
if (textord_tabfind_show_vlines)
pixWrite("vlinesclean.png", line_pix, IFF_PNG);
ICOORD vertical;
vertical.set_with_shrink(*vertical_x, *vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
}
pixDestroy(&line_pix);
}
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
&pix_hline, pix_non_hline, pix, h_lines);
if (pixa_display != NULL && pix_vline != NULL)
pixaAddPix(pixa_display, pix_vline, L_CLONE);
if (pixa_display != NULL && pix_hline != NULL)
pixaAddPix(pixa_display, pix_hline, L_CLONE);
if (pix_vline != NULL && pix_hline != NULL) {
// Remove joins (intersections) where lines cross, and the residue.
// Recalculate the intersections, since some lines have been deleted.
pixAnd(pix_intersections, pix_vline, pix_hline);
// Fatten up the intersections and seed-fill to get the intersection
// residue.
Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5);
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
// Now remove the intersection residue.
pixSubtract(pix, pix, pix_join_residue);
pixDestroy(&pix_join_residue);
}
// Remove any detected music.
if (pix_music_mask != NULL && *pix_music_mask != NULL) {
if (pixa_display != NULL)
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
pixSubtract(pix, pix, *pix_music_mask);
}
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix, L_CLONE);
// Finds horizontal line objects in the given pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
void LineFinder::FindHorizontalLines(int resolution, Pix* pix,
TabVector_LIST* vectors) {
Pix* line_pix;
Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix);
C_BLOB_LIST line_cblobs;
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs);
// Make the BLOBNBOXes from the C_BLOBs.
BLOBNBOX_LIST line_bblobs;
C_BLOB_IT blob_it(&line_cblobs);
BLOBNBOX_IT bbox_it(&line_bblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB* cblob = blob_it.data();
BLOBNBOX* bblob = new BLOBNBOX(cblob);
bbox_it.add_to_end(bblob);
pixDestroy(&pix_vline);
pixDestroy(&pix_non_vline);
pixDestroy(&pix_hline);
pixDestroy(&pix_non_hline);
pixDestroy(&pix_intersections);
if (pixa_display != NULL) {
#if LIBLEPT_MINOR_VERSION >= 69 || LIBLEPT_MAJOR_VERSION > 1
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
"vhlinefinding.pdf");
#endif
pixaDestroy(&pixa_display);
}
ICOORD bleft(0, 0);
ICOORD tright(height, width);
int vertical_x, vertical_y;
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
vectors);
if (!vectors->empty()) {
// Some lines were found, so erase the unused blobs from the line image
// and then subtract the line image from the source.
bbox_it.move_to_first();
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX* blob = bbox_it.data();
if (blob->left_tab_type() == TT_UNCONFIRMED) {
const TBOX& box = blob->bounding_box();
// Coords are in tess format so filp x and y and then covert
// to leptonica by height -y.
Box* pixbox = boxCreate(box.bottom(), height - box.right(),
box.height(), box.width());
pixClearInRect(line_pix, pixbox);
boxDestroy(&pixbox);
}
}
pixDilateBrick(line_pix, line_pix, 3, 1);
pixSubtract(pix, pix, line_pix);
if (textord_tabfind_show_vlines)
pixWrite("hlinesclean.png", line_pix, IFF_PNG);
ICOORD vertical;
vertical.set_with_shrink(vertical_x, vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
// Iterate the vectors to flip them.
TabVector_IT h_it(vectors);
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
h_it.data()->XYFlip();
}
}
pixDestroy(&line_pix);
}
// Converts the Boxa array to a list of C_BLOB, getting rid of severely
@ -176,8 +330,8 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
// Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
// as there is no outline, just a bounding box, but with some very
// small changes to coutln.cpp, it works nicely.
ICOORD top_left(x, image_height - y);
ICOORD bot_right(x + width, image_height - (y + height));
ICOORD top_left(x, y);
ICOORD bot_right(x + width, y + height);
CRACKEDGE startpt;
startpt.pos = top_left;
C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
@ -197,6 +351,85 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
boxaDestroy(boxes);
}
// Finds vertical line objects in pix_vline and removes the from src_pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_vline is destroyed.
// None of the input pointers may be NULL, and if *pix_vline is NULL then
// the function does nothing.
void LineFinder::FindAndRemoveVLines(int resolution,
Pix* pix_intersections,
int* vertical_x, int* vertical_y,
Pix** pix_vline, Pix* pix_non_vline,
Pix* src_pix, TabVector_LIST* vectors) {
if (pix_vline == NULL || *pix_vline == NULL) return;
C_BLOB_LIST line_cblobs;
BLOBNBOX_LIST line_bblobs;
GetLineBoxes(false, *pix_vline, pix_intersections,
&line_cblobs, &line_bblobs);
int width = pixGetWidth(src_pix);
int height = pixGetHeight(src_pix);
ICOORD bleft(0, 0);
ICOORD tright(width, height);
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
if (!vectors->empty()) {
RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
ICOORD vertical;
vertical.set_with_shrink(*vertical_x, *vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
} else {
pixDestroy(pix_vline);
}
}
// Finds horizontal line objects in pix_hline and removes them from src_pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_hline is destroyed.
// None of the input pointers may be NULL, and if *pix_hline is NULL then
// the function does nothing.
void LineFinder::FindAndRemoveHLines(int resolution,
Pix* pix_intersections,
int vertical_x, int vertical_y,
Pix** pix_hline, Pix* pix_non_hline,
Pix* src_pix, TabVector_LIST* vectors) {
if (pix_hline == NULL || *pix_hline == NULL) return;
C_BLOB_LIST line_cblobs;
BLOBNBOX_LIST line_bblobs;
GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
int width = pixGetWidth(src_pix);
int height = pixGetHeight(src_pix);
ICOORD bleft(0, 0);
ICOORD tright(height, width);
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
vectors);
if (!vectors->empty()) {
RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
ICOORD vertical;
vertical.set_with_shrink(vertical_x, vertical_y);
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
// Iterate the vectors to flip them. x and y were flipped for horizontal
// lines, so FindLineVectors can work just with the vertical case.
// See GetLineBoxes for more on the flip.
TabVector_IT h_it(vectors);
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
h_it.data()->XYFlip();
}
} else {
pixDestroy(pix_hline);
}
}
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
// are the bounds of the image on which the input line_bblobs were found.
// The input line_bblobs list is const really.
@ -213,7 +446,7 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
BLOBNBOX* bblob = bbox_it.data();
bblob->set_left_tab_type(TT_UNCONFIRMED);
bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
bblob->set_left_rule(bleft.x());
bblob->set_right_rule(tright.x());
bblob->set_left_crossing_rule(bleft.x());
@ -221,20 +454,18 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
blob_grid.InsertBBox(false, true, bblob);
++b_count;
}
if (textord_debug_tabfind)
tprintf("Inserted %d line blobs into grid\n", b_count);
if (b_count == 0)
return;
// Search the entire grid, looking for vertical line vectors.
GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid);
BlobGridSearch lsearch(&blob_grid);
BLOBNBOX* bbox;
TabVector_IT vector_it(vectors);
*vertical_x = 0;
*vertical_y = 1;
lsearch.StartFullSearch();
while ((bbox = lsearch.NextFullSearch()) != NULL) {
if (bbox->left_tab_type() == TT_UNCONFIRMED) {
if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
const TBOX& box = bbox->bounding_box();
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
tprintf("Finding line vector starting at bbox (%d,%d)\n",
@ -249,89 +480,268 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
}
}
}
ScrollView* line_win = NULL;
if (textord_tabfind_show_vlines) {
line_win = blob_grid.MakeWindow(0, 50, "Vlines");
blob_grid.DisplayBoxes(line_win);
line_win = blob_grid.DisplayTabs("Vlines", line_win);
}
}
// Get a set of bounding boxes of possible vertical lines in the image.
// The input resolution overrides any resolution set in src_pix.
// The output line_pix contains just all the detected lines.
Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
// Remove any parts of 1 inch/kThinLineFraction wide or more, by opening
// away the thin lines and subtracting what's left.
// This is very generous and will leave in even quite wide lines.
Pix* pixt1 = pixOpenBrick(NULL, src_pix, resolution / kThinLineFraction, 1);
pixSubtract(pixt1, src_pix, pixt1);
// Spread sideways to allow for some skew.
Pix* pixt2 = pixDilateBrick(NULL, pixt1, 3, 1);
// Now keep only tall stuff of height at least 1 inch/kMinLineLengthFraction.
pixOpenBrick(pixt1, pixt2, 1, resolution / kMinLineLengthFraction);
pixDestroy(&pixt2);
// Put a single pixel crack in every line at an arbitrary spacing,
// so they break up and the bounding boxes can be used to get the
// direction accurately enough without needing outlines.
int wpl = pixGetWpl(pixt1);
int height = pixGetHeight(pixt1);
l_uint32* data = pixGetData(pixt1);
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
memset(data + wpl * y, 0, wpl * sizeof(*data));
// Returns a Pix music mask if music is detected.
// Any vertical line that has at least 5 intersections in sufficient density
// is taken to be a bar. Bars are used as a seed and the entire touching
// component is added to the output music mask and subtracted from the lines.
// Returns NULL and does minimal work if no music is found.
static Pix* FilterMusic(int resolution, Pix* pix_closed,
Pix* pix_vline, Pix* pix_hline,
l_int32* v_empty, l_int32* h_empty) {
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline);
Boxa* boxa = pixConnComp(pix_vline, NULL, 8);
// Iterate over the boxes to find music bars.
int nboxes = boxaGetCount(boxa);
Pix* music_mask = NULL;
for (int i = 0; i < nboxes; ++i) {
Box* box = boxaGetBox(boxa, i, L_CLONE);
l_int32 x, y, box_width, box_height;
boxGetGeometry(box, &x, &y, &box_width, &box_height);
int joins = NumTouchingIntersections(box, intersection_pix);
// Test for the join density being at least 5 per max_stave_height,
// ie (joins-1)/box_height >= (5-1)/max_stave_height.
if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
// This is a music bar. Add to the mask.
if (music_mask == NULL)
music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
1);
pixSetInRect(music_mask, box);
}
boxDestroy(&box);
}
if (textord_tabfind_show_vlines)
pixWrite("vlines.png", pixt1, IFF_PNG);
Boxa* boxa = pixConnComp(pixt1, NULL, 8);
*line_pix = pixt1;
return boxa;
}
// Get a set of bounding boxes of possible horizontal lines in the image.
// The input resolution overrides any resolution set in src_pix.
// The output line_pix contains just all the detected lines.
// The output boxes undergo the transformation (x,y)->(height-y,x) so the
// lines can be found with a vertical line finder afterwards.
// This transformation allows a simple x/y flip to reverse it in tesseract
// coordinates and it is faster to flip the lines than rotate the image.
Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
// Remove any parts of 1 inch/kThinLineFraction high or more, by opening
// away the thin lines and subtracting what's left.
// This is very generous and will leave in even quite wide lines.
Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction);
pixSubtract(pixt1, src_pix, pixt1);
// Spread vertically to allow for some skew.
Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3);
// Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction.
pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1);
pixDestroy(&pixt2);
// Put a single pixel crack in every line at an arbitrary spacing,
// so they break up and the bounding boxes can be used to get the
// direction accurately enough without needing outlines.
int wpl = pixGetWpl(pixt1);
int width = pixGetWidth(pixt1);
int height = pixGetHeight(pixt1);
l_uint32* data = pixGetData(pixt1);
for (int y = 0; y < height; ++y, data += wpl) {
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
CLEAR_DATA_BIT(data, x);
boxaDestroy(&boxa);
pixDestroy(&intersection_pix);
if (music_mask != NULL) {
// The mask currently contains just the bars. Use the mask as a seed
// and the pix_closed as the mask for a seedfill to get all the
// intersecting staves.
pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
// Filter out false positives. CCs in the music_mask should be the vast
// majority of the pixels in their bounding boxes, as we expect just a
// tiny amount of text, a few phrase marks, and crescendo etc left.
Boxa* boxa = pixConnComp(music_mask, NULL, 8);
// Iterate over the boxes to find music components.
int nboxes = boxaGetCount(boxa);
for (int i = 0; i < nboxes; ++i) {
Box* box = boxaGetBox(boxa, i, L_CLONE);
Pix* rect_pix = pixClipRectangle(music_mask, box, NULL);
l_int32 music_pixels;
pixCountPixels(rect_pix, &music_pixels, NULL);
pixDestroy(&rect_pix);
rect_pix = pixClipRectangle(pix_closed, box, NULL);
l_int32 all_pixels;
pixCountPixels(rect_pix, &all_pixels, NULL);
pixDestroy(&rect_pix);
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
// False positive. Delete from the music mask.
pixClearInRect(music_mask, box);
}
boxDestroy(&box);
}
l_int32 no_remaining_music;
boxaDestroy(&boxa);
pixZero(music_mask, &no_remaining_music);
if (no_remaining_music) {
pixDestroy(&music_mask);
} else {
pixSubtract(pix_vline, pix_vline, music_mask);
pixSubtract(pix_hline, pix_hline, music_mask);
// We may have deleted all the lines
pixZero(pix_vline, v_empty);
pixZero(pix_hline, h_empty);
}
}
if (textord_tabfind_show_vlines)
pixWrite("hlines.png", pixt1, IFF_PNG);
Boxa* boxa = pixConnComp(pixt1, NULL, 8);
*line_pix = pixt1;
return music_mask;
}
// Iterate the boxes to flip x and y.
int nboxes = boxaGetCount(boxa);
for (int i = 0; i < nboxes; ++i) {
l_int32 x, y, box_width, box_height;
boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height);
Box* box = boxCreate(height - (y + box_height),
width - (x + box_width), box_height, box_width);
boxaReplaceBox(boxa, i, box);
// Most of the heavy lifting of line finding. Given src_pix and its separate
// resolution, returns image masks:
// pix_vline candidate vertical lines.
// pix_non_vline pixels that didn't look like vertical lines.
// pix_hline candidate horizontal lines.
// pix_non_hline pixels that didn't look like horizontal lines.
// pix_intersections pixels where vertical and horizontal lines meet.
// pix_music_mask candidate music staves.
// This function promises to initialize all the output (2nd level) pointers,
// but any of the returns that are empty will be NULL on output.
// None of the input (1st level) pointers may be NULL except pix_music_mask,
// which will disable music detection, and pixa_display.
void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
Pix** pix_vline, Pix** pix_non_vline,
Pix** pix_hline, Pix** pix_non_hline,
Pix** pix_intersections, Pix** pix_music_mask,
Pixa* pixa_display) {
int max_line_width = resolution / kThinLineFraction;
int min_line_length = resolution / kMinLineLengthFraction;
if (pixa_display != NULL) {
tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
resolution, max_line_width, min_line_length);
}
int closing_brick = max_line_width / 3;
// Close up small holes, making it less likely that false alarms are found
// in thickened text (as it will become more solid) and also smoothing over
// some line breaks and nicks in the edges of the lines.
Pix* pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix_closed, L_CLONE);
// Open up with a big box to detect solid areas, which can then be subtracted.
// This is very generous and will leave in even quite wide lines.
Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width,
max_line_width);
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix_solid, L_CLONE);
Pix* pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
pixDestroy(&pix_solid);
// Now open up in both directions independently to find lines of at least
// 1 inch/kMinLineLengthFraction in length.
if (pixa_display != NULL)
pixaAddPix(pixa_display, pix_hollow, L_CLONE);
*pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
*pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
pixDestroy(&pix_hollow);
// Lines are sufficiently rare, that it is worth checking for a zero image.
l_int32 v_empty = 0;
l_int32 h_empty = 0;
pixZero(*pix_vline, &v_empty);
pixZero(*pix_hline, &h_empty);
if (pix_music_mask != NULL) {
if (!v_empty && !h_empty) {
*pix_music_mask = FilterMusic(resolution, pix_closed,
*pix_vline, *pix_hline,
&v_empty, &h_empty);
} else {
*pix_music_mask = NULL;
}
}
pixDestroy(&pix_closed);
Pix* pix_nonlines = NULL;
*pix_intersections = NULL;
Pix* extra_non_hlines = NULL;
if (!v_empty) {
// Subtract both line candidates from the source to get definite non-lines.
pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline);
if (!h_empty) {
pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
// Intersections are a useful indicator for likelihood of being a line.
*pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline);
// Candidate vlines are not hlines (apart from the intersections)
// and vice versa.
extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections);
}
*pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1);
pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
if (!h_empty) {
// Candidate hlines are not vlines.
pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
}
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
*pix_vline))
pixDestroy(pix_vline); // No candidates left.
} else {
// No vertical lines.
pixDestroy(pix_vline);
*pix_non_vline = NULL;
if (!h_empty) {
pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline);
}
}
if (h_empty) {
pixDestroy(pix_hline);
*pix_non_hline = NULL;
if (v_empty) {
return;
}
} else {
*pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue);
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
if (extra_non_hlines != NULL) {
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
pixDestroy(&extra_non_hlines);
}
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
*pix_hline))
pixDestroy(pix_hline); // No candidates left.
}
if (pixa_display != NULL) {
if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
if (*pix_non_vline != NULL)
pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
if (*pix_non_hline != NULL)
pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
if (*pix_intersections != NULL)
pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
if (pix_music_mask != NULL && *pix_music_mask != NULL)
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
}
pixDestroy(&pix_nonlines);
}
// Returns a list of boxes corresponding to the candidate line segments. Sets
// the line_crossings member of the boxes so we can later determin the number
// of intersections touched by a full line.
void LineFinder::GetLineBoxes(bool horizontal_lines,
Pix* pix_lines, Pix* pix_intersections,
C_BLOB_LIST* line_cblobs,
BLOBNBOX_LIST* line_bblobs) {
// Put a single pixel crack in every line at an arbitrary spacing,
// so they break up and the bounding boxes can be used to get the
// direction accurately enough without needing outlines.
int wpl = pixGetWpl(pix_lines);
int width = pixGetWidth(pix_lines);
int height = pixGetHeight(pix_lines);
l_uint32* data = pixGetData(pix_lines);
if (horizontal_lines) {
for (int y = 0; y < height; ++y, data += wpl) {
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
CLEAR_DATA_BIT(data, x);
}
}
} else {
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
memset(data + wpl * y, 0, wpl * sizeof(*data));
}
}
// Get the individual connected components
Boxa* boxa = pixConnComp(pix_lines, NULL, 8);
ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
// Make the BLOBNBOXes from the C_BLOBs.
C_BLOB_IT blob_it(line_cblobs);
BLOBNBOX_IT bbox_it(line_bblobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB* cblob = blob_it.data();
BLOBNBOX* bblob = new BLOBNBOX(cblob);
bbox_it.add_to_end(bblob);
// Determine whether the line segment touches two intersections.
const TBOX& bbox = bblob->bounding_box();
Box* box = boxCreate(bbox.left(), bbox.bottom(),
bbox.width(), bbox.height());
bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
boxDestroy(&box);
// Transform the bounding box prior to finding lines. To save writing
// two line finders, flip x and y for horizontal lines and re-use the
// tab-stop detection code. For vertical lines we still have to flip the
// y-coordinates to switch from leptonica coords to tesseract coords.
if (horizontal_lines) {
// Note that we have Leptonica coords stored in a Tesseract box, so that
// bbox.bottom(), being the MIN y coord, is actually the top, so to get
// back to Leptonica coords in RemoveUnusedLineSegments, we have to
// use height - box.right() as the top, which looks very odd.
TBOX new_box(height - bbox.top(), bbox.left(),
height - bbox.bottom(), bbox.right());
bblob->set_bounding_box(new_box);
} else {
TBOX new_box(bbox.left(), height - bbox.top(),
bbox.right(), height - bbox.bottom());
bblob->set_bounding_box(new_box);
}
}
return boxa;
}
} // namespace tesseract.

View File

@ -21,8 +21,9 @@
#ifndef TESSERACT_TEXTORD_LINEFIND_H__
#define TESSERACT_TEXTORD_LINEFIND_H__
struct Pix;
struct Boxa;
struct Pix;
struct Pixa;
class C_BLOB_LIST;
class BLOBNBOX_LIST;
class ICOORD;
@ -38,7 +39,8 @@ class TabVector_LIST;
class LineFinder {
public:
/**
* Finds vertical line objects in the given pix.
* Finds vertical and horizontal line objects in the given pix and removes
* them.
*
* Uses the given resolution to determine size thresholds instead of any
* that may be present in the pix.
@ -46,24 +48,21 @@ class LineFinder {
* The output vertical_x and vertical_y contain a sum of the output vectors,
* thereby giving the mean vertical direction.
*
* The output vectors are owned by the list and Frozen (cannot refit) by
* having no boxes, as there is no need to refit or merge separator lines.
*/
static void FindVerticalLines(int resolution, Pix* pix,
int* vertical_x, int* vertical_y,
TabVector_LIST* vectors);
/**
* Finds horizontal line objects in the given pix.
*
* Uses the given resolution to determine size thresholds instead of any
* that may be present in the pix.
* If pix_music_mask != NULL, and music is detected, a mask of the staves
* and anything that is connected (bars, notes etc.) will be returned in
* pix_music_mask, the mask subtracted from pix, and the lines will not
* appear in v_lines or h_lines.
*
* The output vectors are owned by the list and Frozen (cannot refit) by
* having no boxes, as there is no need to refit or merge separator lines.
*
* The detected lines are removed from the pix.
*/
static void FindHorizontalLines(int resolution, Pix* pix,
TabVector_LIST* vectors);
static void FindAndRemoveLines(int resolution, bool debug, Pix* pix,
int* vertical_x, int* vertical_y,
Pix** pix_music_mask,
TabVector_LIST* v_lines,
TabVector_LIST* h_lines);
/**
* Converts the Boxa array to a list of C_BLOB, getting rid of severely
@ -78,43 +77,71 @@ class LineFinder {
Boxa** boxes, C_BLOB_LIST* blobs);
private:
/**
* Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
* are the bounds of the image on which the input line_bblobs were found.
*
* The input line_bblobs list is const really.
*
* The output vertical_x and vertical_y are the total of all the vectors.
* The output list of TabVector makes no reference to the input BLOBNBOXes.
*/
// Finds vertical line objects in pix_vline and removes them from src_pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_vline is destroyed.
static void FindAndRemoveVLines(int resolution,
Pix* pix_intersections,
int* vertical_x, int* vertical_y,
Pix** pix_vline, Pix* pix_non_vline,
Pix* src_pix, TabVector_LIST* vectors);
// Finds horizontal line objects in pix_vline and removes them from src_pix.
// Uses the given resolution to determine size thresholds instead of any
// that may be present in the pix.
// The output vertical_x and vertical_y contain a sum of the output vectors,
// thereby giving the mean vertical direction.
// The output vectors are owned by the list and Frozen (cannot refit) by
// having no boxes, as there is no need to refit or merge separator lines.
// If no good lines are found, pix_hline is destroyed.
static void FindAndRemoveHLines(int resolution,
Pix* pix_intersections,
int vertical_x, int vertical_y,
Pix** pix_hline, Pix* pix_non_hline,
Pix* src_pix, TabVector_LIST* vectors);
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
// are the bounds of the image on which the input line_bblobs were found.
// The input line_bblobs list is const really.
// The output vertical_x and vertical_y are the total of all the vectors.
// The output list of TabVector makes no reference to the input BLOBNBOXes.
static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
BLOBNBOX_LIST* line_bblobs,
int* vertical_x, int* vertical_y,
TabVector_LIST* vectors);
/**
* Get a set of bounding boxes of possible vertical lines in the image.
*
* The input resolution overrides any resolution set in src_pix.
*
* The output line_pix contains just all the detected lines.
*/
static Boxa* GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix);
// Most of the heavy lifting of line finding. Given src_pix and its separate
// resolution, returns image masks:
// Returns image masks:
// pix_vline candidate vertical lines.
// pix_non_vline pixels that didn't look like vertical lines.
// pix_hline candidate horizontal lines.
// pix_non_hline pixels that didn't look like horizontal lines.
// pix_intersections pixels where vertical and horizontal lines meet.
// pix_music_mask candidate music staves.
// This function promises to initialize all the output (2nd level) pointers,
// but any of the returns that are empty will be NULL on output.
// None of the input (1st level) pointers may be NULL except pix_music_mask,
// which will disable music detection, and pixa_display, which is for debug.
static void GetLineMasks(int resolution, Pix* src_pix,
Pix** pix_vline, Pix** pix_non_vline,
Pix** pix_hline, Pix** pix_non_hline,
Pix** pix_intersections, Pix** pix_music_mask,
Pixa* pixa_display);
/**
* Get a set of bounding boxes of possible horizontal lines in the image.
*
* The input resolution overrides any resolution set in src_pix.
*
* The output line_pix contains just all the detected lines.
*
* The output boxes undergo the transformation (x,y)->(height-y,x) so the
* lines can be found with a vertical line finder afterwards.
*
* This transformation allows a simple x/y flip to reverse it in tesseract
* coordinates and it is faster to flip the lines than rotate the image.
*/
static Boxa* GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix);
// Returns a list of boxes corresponding to the candidate line segments. Sets
// the line_crossings member of the boxes so we can later determin the number
// of intersections touched by a full line.
static void GetLineBoxes(bool horizontal_lines,
Pix* pix_lines, Pix* pix_intersections,
C_BLOB_LIST* line_cblobs,
BLOBNBOX_LIST* line_bblobs);
};
} // namespace tesseract.

View File

@ -312,6 +312,9 @@ void compute_page_skew( //get average gradient
blob_count = 0;
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
POLY_BLOCK* pb = block_it.data()->block->poly_block();
if (pb != NULL && !pb->IsText())
continue; // Pretend non-text blocks don't exist.
row_count += block_it.data ()->get_rows ()->length ();
//count up rows
row_it.set_to_list (block_it.data ()->get_rows ());
@ -332,6 +335,9 @@ void compute_page_skew( //get average gradient
row_index = 0;
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
POLY_BLOCK* pb = block_it.data()->block->poly_block();
if (pb != NULL && !pb->IsText())
continue; // Pretend non-text blocks don't exist.
row_it.set_to_list (block_it.data ()->get_rows ());
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
row = row_it.data ();
@ -359,6 +365,9 @@ void compute_page_skew( //get average gradient
//desperate
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
POLY_BLOCK* pb = block_it.data()->block->poly_block();
if (pb != NULL && !pb->IsText())
continue; // Pretend non-text blocks don't exist.
row_it.set_to_list (block_it.data ()->get_rows ());
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
row_it.forward ()) {
@ -593,8 +602,11 @@ void Textord::cleanup_rows_fitting(ICOORD page_tr, // top right
if (textord_heavy_nr) {
vigorous_noise_removal(block);
}
separate_underlines(block, gradient, rotation, testing_on);
pre_associate_blobs(page_tr, block, rotation, testing_on);
POLY_BLOCK* pb = block->block->poly_block();
if (pb == NULL || pb->IsText()) {
separate_underlines(block, gradient, rotation, testing_on);
pre_associate_blobs(page_tr, block, rotation, testing_on);
}
#ifndef GRAPHICS_DISABLED
if (textord_show_final_rows && testing_on) {

File diff suppressed because it is too large Load Diff

View File

@ -20,38 +20,55 @@
#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H__
#define TESSERACT_TEXTORD_STROKEWIDTH_H__
#include "bbgrid.h" // Base class.
#include "blobbox.h" // BlobNeighourDir.
#include "tabvector.h" // For BLOBNBOX_CLIST.
#include "blobgrid.h" // Base class.
#include "colpartitiongrid.h"
#include "textlineprojection.h"
class TO_BLOCK;
class DENORM;
class ScrollView;
class TO_BLOCK;
namespace tesseract {
class ColPartition_LIST;
class TabFind;
class TextlineProjection;
// Misc enums to clarify bool arguments for direction-controlling args.
enum LeftOrRight {
LR_LEFT,
LR_RIGHT
};
/**
* The StrokeWidth class holds all the normal and large blobs.
* It is used to find good large blobs and move them to the normal blobs
* by virtue of having a reasonable strokewidth compatible neighbour.
*/
class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
class StrokeWidth : public BlobGrid {
public:
StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
virtual ~StrokeWidth();
// Sets the neighbours member of the medium-sized blobs in the block.
// Searches on 4 sides of each blob for similar-sized, similar-strokewidth
// blobs and sets pointers to the good neighbours.
void SetNeighboursOnMediumBlobs(TO_BLOCK* block);
// Sets the neighbour/textline writing direction members of the medium
// and large blobs with optional repair of broken CJK characters first.
// Repair of broken CJK is needed here because broken CJK characters
// can fool the textline direction detection algorithm.
void FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
TO_BLOCK* input_block);
// To save computation, the process of generating partitions is broken
// into the following 4 steps:
// TestVerticalTextDirection
// CorrectForRotation (used only if a rotation is to be applied)
// FindLeaderPartitions
// TODO(rays) Coming soon:
// GradeBlobsIntoPartitions.
// which will replace entirely the old call sequence of:
// InsertBlobsOld
// MoveGoodLargeBlobs.
// These functions are all required, in sequence, except for
// CorrectForRotation, which is not needed if no rotation is applied.
@ -59,36 +76,50 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
// returns true if the majority are vertical.
// If the blobs are rotated, it is necessary to call CorrectForRotation
// after rotating everything, otherwise the work done here will be enough.
// If cjk_merge is true, it will attempt to merge broken cjk characters.
// If osd_blobs is not null, a list of blobs from the dominant textline
// direction are returned for use in orientation and script detection.
bool TestVerticalTextDirection(bool cjk_merge,
TO_BLOCK* block, TabFind* line_grid,
bool TestVerticalTextDirection(TO_BLOCK* block,
BLOBNBOX_CLIST* osd_blobs);
// Corrects the data structures for the given rotation.
void CorrectForRotation(const FCOORD& rotation, TO_BLOCK* block);
void CorrectForRotation(const FCOORD& rerotation,
ColPartitionGrid* part_grid);
// Finds leader partitions and inserts them into the give grid.
void FindLeaderPartitions(TO_BLOCK* block, TabFind* line_grid);
void FindLeaderPartitions(TO_BLOCK* block,
ColPartitionGrid* part_grid);
// Finds and marks noise those blobs that look like bits of vertical lines
// that would otherwise screw up layout analysis.
void RemoveLineResidue(ColPartition_LIST* big_part_list);
// Types all the blobs as vertical text or horizontal text or unknown and
// puts them into initial ColPartitions in the supplied part_grid.
// rerotation determines how to get back to the image coordinates from the
// blob coordinates (since they may have been rotated for vertical text).
// block is the single block for the whole page or rectangle to be OCRed.
// nontext_pix (full-size), is a binary mask used to prevent merges across
// photo/text boundaries. It is not kept beyond this function.
// denorm provides a mapping back to the image from the current blob
// coordinate space.
// projection provides a measure of textline density over the image and
// provides functions to assist with diacritic detection. It should be a
// pointer to a new TextlineProjection, and will be setup here.
// part_grid is the output grid of textline partitions.
// Large blobs that cause overlap are put in separate partitions and added
// to the big_parts list.
void GradeBlobsIntoPartitions(const FCOORD& rerotation,
TO_BLOCK* block,
Pix* nontext_pix,
const DENORM* denorm,
TextlineProjection* projection,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts);
// Handles a click event in a display window.
virtual void HandleClick(int x, int y);
// Puts the block blobs (normal and large) into the grid.
void InsertBlobsOld(TO_BLOCK* block, TabFind* line_grid);
// Moves the large blobs that have good stroke-width neighbours to the normal
// blobs list.
void MoveGoodLargeBlobs(int resolution, TO_BLOCK* block);
private:
// Reorganize the blob lists with a different definition of small, medium
// and large, compared to the original definition.
// Height is still the primary filter key, but medium width blobs of small
// height become medium, and very wide blobs of small height stay small.
void ReFilterBlobs(TO_BLOCK* block);
// Computes the noise_density_ by summing the number of elements in a
// neighbourhood of each grid cell.
void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
@ -96,20 +127,25 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
// Detects and marks leader dots/dashes.
// Leaders are horizontal chains of small or noise blobs that look
// monospace according to ColPartition::MarkAsLeaderIfMonospaced().
// Detected leaders become the only occupants of small_blobs list.
// Detected leaders become the only occupants of the block->small_blobs list.
// Non-leader small blobs get moved to the blobs list.
// Non-leader noise blobs remain singletons in the noise list.
// All small and noise blobs in high density regions are marked BTFT_NONTEXT.
void FindLeadersAndMarkNoise(bool final, TO_BLOCK* block, TabFind* line_grid,
// block is the single block for the whole page or rectangle to be OCRed.
// leader_parts is the output.
void FindLeadersAndMarkNoise(TO_BLOCK* block,
ColPartition_LIST* leader_parts);
// Puts the block blobs (normal and large) into the grid.
void InsertBlobs(TO_BLOCK* block, TabFind* line_grid);
/** Inserts the block blobs (normal and large) into this grid.
* Blobs remain owned by the block. */
void InsertBlobs(TO_BLOCK* block);
// Fix broken CJK characters, using the fake joined blobs mechanism.
// Blobs are really merged, ie the master takes all the outlines and the
// others are deleted.
void FixBrokenCJK(BLOBNBOX_LIST* blobs, TabFind* line_grid);
// Returns true if sufficient blobs are merged that it may be worth running
// again, due to a better estimate of character size.
bool FixBrokenCJK(TO_BLOCK* block);
// Collect blobs that overlap or are within max_dist of the input bbox.
// Return them in the list of blobs and expand the bbox to be the union
@ -119,16 +155,21 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
int max_size, int max_dist,
TBOX* bbox, BLOBNBOX_CLIST* blobs);
// Finds the textline direction to be horizontal or vertical according
// to distance to neighbours and 1st and 2nd order neighbours.
// Non-text tends to end up without a definite direction.
void FindTextlineFlowDirection(bool final);
// For each blob in this grid, Finds the textline direction to be horizontal
// or vertical according to distance to neighbours and 1st and 2nd order
// neighbours. Non-text tends to end up without a definite direction.
// Result is setting of the neighbours and vert_possible/horz_possible
// flags in the BLOBNBOXes currently in this grid.
// This function is called more than once if page orientation is uncertain,
// so display_if_debugging is true on the final call to display the results.
void FindTextlineFlowDirection(bool display_if_debugging);
// Sets the neighbours and good_stroke_neighbours members of the blob by
// searching close on all 4 sides.
// When finding leader dots/dashes, there is a slightly different rule for
// what makes a good neighbour.
void SetNeighbours(bool leaders, BLOBNBOX* blob);
// If activate_line_trap, then line-like objects are found and isolated.
void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
// Sets the good_stroke_neighbours member of the blob if it has a
// GoodNeighbour on the given side.
@ -151,26 +192,111 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
// changed. Otherwise, only ambiguous blobs are processed.
void SmoothNeighbourTypes(BLOBNBOX* blob, bool desperate);
// Sets the leader_on_left or leader_on_right flags for blobs
// that are next to one end of the given leader partition.
// If left_of_part is true, then look at the left side of the partition for
// blobs on which to set the leader_on_right flag.
void MarkLeaderNeighbours(const ColPartition* part, bool left_of_part);
// Checks the left or right side of the given leader partition and sets the
// (opposite) leader_on_right or leader_on_left flags for blobs
// that are next to the given side of the given leader partition.
void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
// Partition creation. Accumulates vertical and horizontal text chains,
// puts the remaining blobs in as unknowns, and then merges/splits to
// minimize overlap and smoothes the types with neighbours and the color
// image if provided. rerotation is used to rotate the coordinate space
// back to the nontext_map_ image.
void FindInitialPartitions(const FCOORD& rerotation,
TO_BLOCK* block,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts);
// Finds vertical chains of text-like blobs and puts them in ColPartitions.
void FindVerticalTextChains(ColPartitionGrid* part_grid);
// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
void FindHorizontalTextChains(ColPartitionGrid* part_grid);
// Finds diacritics and saves their base character in the blob.
void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
// Searches this grid for an appropriately close and sized neighbour of the
// given [small] blob. If such a blob is found, the diacritic base is saved
// in the blob and true is returned.
// The small_grid is a secondary grid that contains the small/noise objects
// that are not in this grid, but may be useful for determining a connection
// between blob and its potential base character. (See DiacriticXGapFilled.)
bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
// Returns true if there is no gap between the base char and the diacritic
// bigger than a fraction of the height of the base char:
// Eg: line end.....'
// The quote is a long way from the end of the line, yet it needs to be a
// diacritic. To determine that the quote is not part of an image, or
// a different text block, we check for other marks in the gap between
// the base char and the diacritic.
// '<--Diacritic
// |---------|
// | |<-toobig-gap->
// | Base |<ok gap>
// |---------| x<-----Dot occupying gap
// The grid is const really.
bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
const TBOX& base_box);
// Merges diacritics with the ColPartition of the base character blob.
void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
// Any blobs on the large_blobs list of block that are still unowned by a
// ColPartition, are probably drop-cap or vertically touching so the blobs
// are removed to the big_parts list and treated separately.
void RemoveLargeUnusedBlobs(TO_BLOCK* block,
ColPartitionGrid* part_grid,
ColPartition_LIST* big_parts);
// All remaining unused blobs are put in individual ColPartitions.
void PartitionRemainingBlobs(ColPartitionGrid* part_grid);
// If combine, put all blobs in the cell_list into a single partition,
// otherwise put each one into its own partition.
void MakePartitionsFromCellList(bool combine,
ColPartitionGrid* part_grid,
BLOBNBOX_CLIST* cell_list);
// Helper function to finish setting up a ColPartition and insert into
// part_grid.
void CompletePartition(ColPartition* part, ColPartitionGrid* part_grid);
// Merge partitions where the merge appears harmless.
void EasyMerges(ColPartitionGrid* part_grid);
// Compute a search box based on the orientation of the partition.
// Returns true if a suitable box can be calculated.
// Callback for EasyMerges.
bool OrientationSearchBox(ColPartition* part, TBOX* box);
// Merge confirmation callback for EasyMerges.
bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
// Returns true if there is no significant noise in between the boxes.
bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
// Displays the blobs colored according to the number of good neighbours
// and the vertical/horizontal flow.
ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
// Displays blobs colored according to whether or not they are diacritics.
ScrollView* DisplayDiacritics(const char* window_name,
int x, int y, TO_BLOCK* block);
private:
// Returns true if there is at least one side neighbour that has a similar
// stroke width.
bool GoodTextBlob(BLOBNBOX* blob);
// Grid to indicate the dot noise density at each grid coord.
IntGrid* noise_density_;
// Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
Pix* nontext_map_;
// Textline projection map. Borrowed pointer.
TextlineProjection* projection_;
// DENORM used by projection_ to get back to image coords. Borrowed pointer.
const DENORM* denorm_;
// Bounding box of the grid.
TBOX grid_box_;
// Rerotation to get back to the original image.
FCOORD rerotation_;
// Windows for debug display.
ScrollView* leaders_win_;
ScrollView* initial_widths_win_;
ScrollView* widths_win_;
ScrollView* chains_win_;
ScrollView* diacritics_win_;
ScrollView* textlines_win_;
ScrollView* smoothed_win_;
};
} // namespace tesseract.

File diff suppressed because it is too large Load Diff

View File

@ -45,6 +45,7 @@ namespace tesseract {
typedef TessResultCallback1<bool, int> WidthCallback;
struct AlignedBlobParams;
class ColPartitionGrid;
/** Pixel resolution of column width estimates. */
const int kColumnWidthFactor = 20;
@ -67,30 +68,40 @@ class TabFind : public AlignedBlob {
/**
* Insert a list of blobs into the given grid (not necessarily this).
* If take_ownership is true, then the blobs are removed from the source list.
* See InsertBlob for the other arguments.
* It would seem to make more sense to swap this and grid, but this way
* around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
* while the grid that provides the tab stops(this) has to be derived from
* TabFind.
*/
void InsertBlobList(bool h_spread, bool v_spread, bool large,
BLOBNBOX_LIST* blobs, bool take_ownership,
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
void InsertBlobsToGrid(bool h_spread, bool v_spread,
BLOBNBOX_LIST* blobs,
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
/**
* Insert a single blob into the given grid (not necessarily this).
* If h_spread, then all cells covered horizontally by the box are
* used, otherwise, just the bottom-left. Similarly for v_spread.
* If large, then insert only if the bounding box doesn't intersect
* anything else already in the grid. Returns true if the blob was inserted.
* A side effect is that the left and right rule edges of the blob are
* set according to the tab vectors in this (not grid).
*/
bool InsertBlob(bool h_spread, bool v_spread, bool large, BLOBNBOX* blob,
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
// Calls SetBlobRuleEdges for all the blobs in the given block.
void SetBlockRuleEdges(TO_BLOCK* block);
// Sets the left and right rule and crossing_rules for the blobs in the given
// list by finding the next outermost tabvectors for each blob.
void SetBlobRuleEdges(BLOBNBOX_LIST* blobs);
// Returns the gutter width of the given TabVector between the given y limits.
// Also returns x-shift to be added to the vector to clear any intersecting
// blobs. The shift is deducted from the returned gutter.
// If ignore_unmergeables is true, then blobs of UnMergeableType are
// ignored as if they don't exist. (Used for text on image.)
// max_gutter_width is used as the maximum width worth searching for in case
// there is nothing near the TabVector.
int GutterWidth(int bottom_y, int top_y, const TabVector& v,
bool ignore_unmergeables, int max_gutter_width,
int* required_shift);
/**
* Find the gutter width and distance to inner neighbour for the given blob.
@ -100,20 +111,6 @@ class TabFind : public AlignedBlob {
BLOBNBOX* bbox, int* gutter_width,
int* neighbour_gap);
/**
* Find the next adjacent (to left or right) blob on this text line,
* with the constraint that it must vertically significantly overlap
* the input box.
*/
BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
bool right_to_left, int gap_limit);
/**
* Compute and return, but do not set the type as being BRT_TEXT or
* BRT_UNKNOWN according to how well it forms a text line.
*/
BlobRegionType ComputeBlobType(BLOBNBOX* blob);
/**
* Return the x-coord that corresponds to the right edge for the given
* box. If there is a rule line to the right that vertically overlaps it,
@ -192,16 +189,24 @@ class TabFind : public AlignedBlob {
/**
* Top-level function to find TabVectors in an input page block.
* Returns false if the detected skew angle is impossible.
* Applies the detected skew angle to deskew the tabs, blobs and part_grid.
*/
bool FindTabVectors(TabVector_LIST* hlines,
BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
int min_gutter_width,
ColPartitionGrid* part_grid,
FCOORD* deskew, FCOORD* reskew);
// Top-level function to not find TabVectors in an input page block,
// but setup for single column mode.
void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
// Cleans up the lists of blobs in the block ready for use by TabFind.
// Large blobs that look like text are moved to the main blobs list.
// Main blobs that are superseded by the image blobs are deleted.
void TidyBlobs(TO_BLOCK* block);
// Helper function to setup search limits for *TabForBox.
void SetupTabSearch(int x, int y, int* min_key, int* max_key);
@ -229,15 +234,33 @@ class TabFind : public AlignedBlob {
TabVector_LIST* horizontal_lines,
int* min_gutter_width);
// Clear the grid and get rid of the tab vectors, but not separators,
// ready to start again.
void Reset();
// Reflect the separator tab vectors and the grids in the y-axis.
// Can only be called after Reset!
void ReflectInYAxis();
private:
// For each box in the grid, decide whether it is a candidate tab-stop,
// and if so add it to the tab_grid_.
// and if so add it to the left and right tab boxes.
ScrollView* FindTabBoxes(int min_gutter_width);
// Return true if this box looks like a candidate tab stop, and set
// the appropriate tab type(s) to TT_UNCONFIRMED.
bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width);
// Returns true if there is nothing in the rectangle of width min_gutter to
// the left of bbox.
bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter);
// Returns true if there is nothing in the rectangle of width min_gutter to
// the right of bbox.
bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter);
// Returns true if there is nothing in the given search_box that vertically
// overlaps target_box other than target_box itself.
bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box);
// Fills the list of TabVector with the tabstops found in the grid,
// and estimates the logical vertical direction.
void FindAllTabVectors(int min_gutter_width);
@ -272,13 +295,17 @@ class TabFind : public AlignedBlob {
// Trace textlines from one side to the other of each tab vector, saving
// the most frequent column widths found in a list so that a given width
// can be tested for being a common width with a simple callback function.
void ComputeColumnWidths(ScrollView* tab_win);
void ComputeColumnWidths(ScrollView* tab_win,
ColPartitionGrid* part_grid);
// Set the region_type_ member for all the blobs in the grid.
void ComputeBlobGoodness();
// Find column width and pair-up tab vectors with existing ColPartitions.
void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
STATS* col_widths);
// Set the region_type_ member of the blob, if not already known.
void SetBlobRegionType(BLOBNBOX* blob);
// Helper makes the list of common column widths in column_widths_ from the
// input col_widths. Destroys the content of col_widths by repeatedly
// finding the mode and erasing the peak.
void MakeColumnWidths(int col_widths_size, STATS* col_widths);
// Mark blobs as being in a vertical text line where that is the case.
void MarkVerticalText();
@ -288,48 +315,14 @@ class TabFind : public AlignedBlob {
// points (< kMinLinesInColumn), then 0 is returned.
int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
// If this box looks like it is on a textline in the given direction,
// return the width of the textline-like group of blobs, and the number
// of blobs found.
// For more detail see FindTextlineSegment below.
int FindTextlineWidth(bool right_to_left, BLOBNBOX* bbox, int* blob_count);
// Search from the given tabstop bbox to the next opposite
// tabstop bbox on the same text line, which may be itself.
// Returns true if the search is successful, and sets
// start_pt, end_pt to the fitted baseline, width to the measured
// width of the text line (column width estimate.)
bool TraceTextline(BLOBNBOX* bbox, ICOORD* start_pt, ICOORD* end_pt,
int* left_edge, int* right_edge);
// Search from the given bbox in the given direction until the next tab
// vector is found or a significant horizontal gap is found.
// Returns the width of the line if the search is successful, (defined
// as good coverage of the width and a good fitting baseline) and sets
// start_pt, end_pt to the fitted baseline, left_blob, right_blob to
// the ends of the line. Returns zero otherwise.
// Sets blob_count to the number of blobs found on the line.
// On input, either both left_vector and right_vector should be NULL,
// indicating a basic search, or both left_vector and right_vector should
// be not NULL and one of *left_vector and *right_vector should be not NULL,
// in which case the search is strictly between tab vectors and will return
// zero if a gap is found before the opposite tab vector is reached, or a
// conflicting tab vector is found.
// If ignore_images is true, then blobs with aligned_text() < 0 are treated
// as if they do not exist.
int FindTextlineSegment(bool right_to_lefts, bool ignore_images,
BLOBNBOX* bbox, int* blob_count,
ICOORD* start_pt, ICOORD* end_pt,
TabVector** left_vector, TabVector** right_vector,
BLOBNBOX** left_blob, BLOBNBOX** right_blob);
// Find the next adjacent (to left or right) blob on this text line,
// with the constraint that it must vertically significantly overlap
// the [top_y, bottom_y] range.
// If ignore_images is true, then blobs with aligned_text() < 0 are treated
// as if they do not exist.
BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
bool right_to_left, bool ignore_images,
bool look_left, bool ignore_images,
double min_overlap_fraction,
int gap_limit, int top_y, int bottom_y);
// Add a bi-directional partner relationship between the left
@ -373,8 +366,9 @@ class TabFind : public AlignedBlob {
ICOORDELT_LIST column_widths_; //< List of commonly occurring widths.
/** Callback to test an int for being a common width. */
WidthCallback* width_cb_;
/** Instance of the base class that contains only candidate tab stops. */
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* tab_grid_;
// Sets of bounding boxes that are candidate tab stops.
GenericVector<BLOBNBOX*> left_tab_boxes_;
GenericVector<BLOBNBOX*> right_tab_boxes_;
};
} // namespace tesseract.

View File

@ -970,7 +970,7 @@ bool TableFinder::HasLeaderAdjacent(const ColPartition& part) {
if (!part.IsInSameColumnAs(*leader))
break;
// There should be a significant vertical overlap
if (!leader->VOverlaps(part))
if (!leader->VSignificantCoreOverlap(part))
continue;
// Leader passed all tests, so it is adjacent.
return true;
@ -2112,9 +2112,17 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid* grid,
}
// Insert table colpartition back to part_grid_
if (table_partition) {
table_partition->SetPartitionType(resolution_,
all_columns[table_search.GridY()]);
// To match the columns used when transforming to blocks, the new table
// partition must have its first and last column set at the grid y that
// corresponds to its bottom.
const TBOX& table_box = table_partition->bounding_box();
int grid_x, grid_y;
grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
table_partition->set_table_type();
table_partition->set_blob_type(BRT_TEXT);
table_partition->set_flow(BTFT_CHAIN);
table_partition->SetBlobTypes();
grid->InsertBBox(true, true, table_partition);
}
}

View File

@ -26,6 +26,7 @@
#include "colfind.h"
#include "colpartitionset.h"
#include "detlinefit.h"
#include "statistc.h"
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
@ -52,7 +53,7 @@ const double kLineCountReciprocal = 4.0;
// Constant add-on for minimum gutter for aligned tabs.
const double kMinAlignedGutter = 0.25;
// Constant add-on for minimum gutter for ragged tabs.
const double kMinRaggedGutter = 2.0;
const double kMinRaggedGutter = 1.5;
double_VAR(textord_tabvector_vertical_gap_fraction, 0.5,
"max fraction of mean blob width allowed for vertical gaps in vertical text");
@ -205,7 +206,8 @@ TabVector::TabVector(const TabVector& src, TabAlignment alignment,
const ICOORD& vertical_skew, BLOBNBOX* blob)
: extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
sort_key_(0), percent_score_(0), mean_width_(0),
needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false),
alignment_(alignment),
top_constraints_(NULL), bottom_constraints_(NULL) {
BLOBNBOX_C_IT it(&boxes_);
it.add_to_end(blob);
@ -236,6 +238,7 @@ TabVector* TabVector::ShallowCopy() const {
copy->alignment_ = alignment_;
copy->extended_ymax_ = extended_ymax_;
copy->extended_ymin_ = extended_ymin_;
copy->intersects_other_lines_ = intersects_other_lines_;
return copy;
}
@ -373,6 +376,9 @@ void TabVector::MergeSimilarTabVectors(const ICOORD& vertical,
v1->Print("by deleting");
}
v2->MergeWith(vertical, it1.extract());
if (textord_debug_tabfind) {
v2->Print("Producing");
}
ICOORD merged_vector = v2->endpt();
merged_vector -= v2->startpt();
if (abs(merged_vector.x()) > 100) {
@ -604,13 +610,19 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
++height_count;
}
mean_height /= height_count;
int max_gutter = kGutterMultiple * mean_height;
if (IsRagged()) {
// Ragged edges face a tougher test in that the gap must always be within
// the height of the blob.
max_gutter = kGutterToNeighbourRatio * mean_height;
}
STATS gutters(0, max_gutter + 1);
// Evaluate the boxes for their goodness, calculating the coverage as we go.
// Remove boxes that are not good and shorten the list to the first and
// last good boxes.
bool deleted_a_box = false;
int mean_gutter = 0;
int gutter_count = 0;
int num_deleted_boxes = 0;
bool text_on_image = false;
int good_length = 0;
const TBOX* prev_good_box = NULL;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
@ -618,8 +630,10 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
const TBOX& box = bbox->bounding_box();
int mid_y = (box.top() + box.bottom()) / 2;
if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
if (!debug)
if (!debug) {
tprintf("After already deleting %d boxes, ", num_deleted_boxes);
Print("Starting evaluation");
}
debug = true;
}
// A good box is one where the nearest neighbour on the inside is closer
@ -627,17 +641,11 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
// (of the putative column).
bool left = IsLeftTab();
int tab_x = XAtY(mid_y);
int max_gutter = kGutterMultiple * mean_height;
if (IsRagged()) {
// Ragged edges face a tougher test in that the gap must always be within
// the height of the blob.
max_gutter = kGutterToNeighbourRatio * mean_height;
}
int gutter_width;
int neighbour_gap;
finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
bbox, &gutter_width, &neighbour_gap);
if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
if (debug) {
tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
box.left(), box.bottom(), box.right(), box.top(),
gutter_width, neighbour_gap);
@ -646,8 +654,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
// A good box contributes its height to the good_length.
good_length += box.top() - box.bottom();
mean_gutter += gutter_width;
++gutter_count;
gutters.add(gutter_width, 1);
// Two good boxes together contribute the gap between them
// to the good_length as well, as long as the gap is not
// too big.
@ -667,6 +674,8 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
SetYStart(box.bottom());
}
prev_good_box = &box;
if (bbox->flow() == BTFT_TEXT_ON_IMAGE)
text_on_image = true;
} else {
// Get rid of boxes that are not good.
if (debug) {
@ -675,7 +684,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
gutter_width, neighbour_gap);
}
it.extract();
deleted_a_box = true;
++num_deleted_boxes;
}
}
if (debug) {
@ -684,8 +693,10 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
// If there are any good boxes, do it again, except this time get rid of
// boxes that have a gutter that is a small fraction of the mean gutter.
// This filters out ends that run into a coincidental gap in the text.
if (gutter_count > 0) {
mean_gutter /= gutter_count;
int search_top = endpt_.y();
int search_bottom = startpt_.y();
int median_gutter = IntCastRounded(gutters.median());
if (gutters.get_total() > 0) {
prev_good_box = NULL;
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* bbox = it.data();
@ -706,21 +717,23 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
bbox, &gutter_width, &neighbour_gap);
// Now we can make the test.
if (gutter_width >= mean_gutter * kMinGutterFraction) {
if (gutter_width >= median_gutter * kMinGutterFraction) {
if (prev_good_box == NULL) {
// Adjust the start to the first good box.
SetYStart(box.bottom());
search_bottom = box.top();
}
prev_good_box = &box;
search_top = box.bottom();
} else {
// Get rid of boxes that are not good.
if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
if (debug) {
tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
box.left(), box.bottom(), box.right(), box.top(),
gutter_width, mean_gutter);
gutter_width, median_gutter);
}
it.extract();
deleted_a_box = true;
++num_deleted_boxes = true;
}
}
}
@ -730,7 +743,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
// Compute the percentage of the vector that is occupied by good boxes.
int length = endpt_.y() - startpt_.y();
percent_score_ = 100 * good_length / length;
if (deleted_a_box) {
if (num_deleted_boxes > 0) {
needs_refit_ = true;
FitAndEvaluateIfNeeded(vertical, finder);
if (boxes_.empty())
@ -738,11 +751,19 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
}
// Test the gutter over the whole vector, instead of just at the boxes.
int required_shift;
int gutter_width = finder->GutterWidth(startpt_.y(), endpt_.y(), *this,
&required_shift);
if (search_bottom > search_top) {
search_bottom = startpt_.y();
search_top = endpt_.y();
}
double min_gutter_width = kLineCountReciprocal / boxes_.length();
min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
min_gutter_width *= mean_height;
int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
if (median_gutter > max_gutter_width)
max_gutter_width = median_gutter;
int gutter_width = finder->GutterWidth(search_bottom, search_top, *this,
text_on_image, max_gutter_width,
&required_shift);
if (gutter_width < min_gutter_width) {
if (debug) {
tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n",

View File

@ -20,6 +20,7 @@
#ifndef TESSERACT_TEXTORD_TABVECTOR_H__
#define TESSERACT_TEXTORD_TABVECTOR_H__
#include "blobgrid.h"
#include "clst.h"
#include "elst.h"
#include "elst2.h"
@ -29,8 +30,6 @@
class BLOBNBOX;
class ScrollView;
CLISTIZEH(BLOBNBOX)
namespace tesseract {
@ -56,8 +55,6 @@ enum TabAlignment {
class TabFind;
class TabVector;
class TabConstraint;
typedef BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGrid;
typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGridSearch;
ELIST2IZEH(TabVector)
CLISTIZEH(TabVector)
@ -179,6 +176,12 @@ class TabVector : public ELIST2_LINK {
void set_endpt(const ICOORD& end) {
endpt_ = end;
}
bool intersects_other_lines() const {
return intersects_other_lines_;
}
void set_intersects_other_lines(bool value) {
intersects_other_lines_ = value;
}
// Inline quasi-accessors that require some computation.
@ -258,6 +261,21 @@ class TabVector : public ELIST2_LINK {
endpt_.set_x(x);
}
// Reflect the tab vector in the y-axis.
void ReflectInYAxis() {
startpt_.set_x(-startpt_.x());
endpt_.set_x(-endpt_.x());
sort_key_ = -sort_key_;
if (alignment_ == TA_LEFT_ALIGNED)
alignment_ = TA_RIGHT_ALIGNED;
else if (alignment_ == TA_RIGHT_ALIGNED)
alignment_ = TA_LEFT_ALIGNED;
if (alignment_ == TA_LEFT_RAGGED)
alignment_ = TA_RIGHT_RAGGED;
else if (alignment_ == TA_RIGHT_RAGGED)
alignment_ = TA_LEFT_RAGGED;
}
// Separate function to compute the sort key for a given coordinate pair.
static int SortKey(const ICOORD& vertical, int x, int y) {
ICOORD pt(x, y);
@ -393,6 +411,8 @@ class TabVector : public ELIST2_LINK {
bool needs_refit_;
// True if a fit has been done, so re-evaluation is needed.
bool needs_evaluation_;
// True if a separator line intersects at least 2 other lines.
bool intersects_other_lines_;
// The type of this TabVector.
TabAlignment alignment_;
// The list of boxes whose edges are aligned at this TabVector.

View File

@ -0,0 +1,764 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "textlineprojection.h"
#include "allheaders.h"
#include "bbgrid.h" // Base class.
#include "blobbox.h" // BlobNeighourDir.
#include "blobs.h"
#include "colpartition.h"
#include "normalis.h"
// Padding factor to use on definitely oriented blobs
const int kOrientedPadFactor = 8;
// Padding factor to use on not definitely oriented blobs.
const int kDefaultPadFactor = 2;
// Penalty factor for going away from the line center.
const int kWrongWayPenalty = 4;
// Ratio between parallel gap and perpendicular gap used to measure total
// distance of a box from a target box in curved textline space.
// parallel-gap is treated more favorably by this factor to allow catching
// quotes and elipsis at the end of textlines.
const int kParaPerpDistRatio = 4;
// Multiple of scale_factor_ that the inter-line gap must be before we start
// padding the increment box perpendicular to the text line.
const int kMinLineSpacingFactor = 4;
// Maximum tab-stop overrun for horizontal padding, in projection pixels.
const int kMaxTabStopOverrun = 6;
namespace tesseract {
TextlineProjection::TextlineProjection(int resolution)
: x_origin_(0), y_origin_(0), pix_(NULL) {
// The projection map should be about 100 ppi, whatever the input.
scale_factor_ = IntCastRounded(resolution / 100.0);
if (scale_factor_ < 1) scale_factor_ = 1;
}
TextlineProjection::~TextlineProjection() {
pixDestroy(&pix_);
}
// Build the projection profile given the input_block containing lists of
// blobs, a rotation to convert to image coords,
// and a full-resolution nontext_map, marking out areas to avoid.
// During construction, we have the following assumptions:
// The rotation is a multiple of 90 degrees, ie no deskew yet.
// The blobs have had their left and right rules set to also limit
// the range of projection.
void TextlineProjection::ConstructProjection(TO_BLOCK* input_block,
const FCOORD& rotation,
Pix* nontext_map) {
pixDestroy(&pix_);
TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
x_origin_ = 0;
y_origin_ = image_box.height();
int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
pix_ = pixCreate(width, height, 8);
ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
Pix* final_pix = pixBlockconv(pix_, 1, 1);
// Pix* final_pix = pixBlockconv(pix_, 2, 2);
pixDestroy(&pix_);
pix_ = final_pix;
}
// Display the blobs in the window colored according to textline quality.
void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
ScrollView* win) {
BLOBNBOX_IT it(blobs);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
const TBOX& box = blob->bounding_box();
bool bad_box = BoxOutOfHTextline(box, NULL, false);
if (blob->UniquelyVertical())
win->Pen(ScrollView::YELLOW);
else
win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
}
win->Update();
}
// Moves blobs that look like they don't sit well on a textline from the
// input blobs list to the output small_blobs list.
// This gets them away from initial textline finding to stop diacritics
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
void TextlineProjection::MoveNonTextlineBlobs(
BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
BLOBNBOX_IT it(blobs);
BLOBNBOX_IT small_it(small_blobs);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
BLOBNBOX* blob = it.data();
const TBOX& box = blob->bounding_box();
bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
box.bottom());
if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
blob->ClearNeighbours();
small_it.add_to_end(it.extract());
}
}
}
// Create a window and display the projection in it.
void TextlineProjection::DisplayProjection() const {
int width = pixGetWidth(pix_);
int height = pixGetHeight(pix_);
Pix* pixc = pixCreate(width, height, 32);
int src_wpl = pixGetWpl(pix_);
int col_wpl = pixGetWpl(pixc);
uinT32* src_data = pixGetData(pix_);
uinT32* col_data = pixGetData(pixc);
for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
for (int x = 0; x < width; ++x) {
int pixel = GET_DATA_BYTE(src_data, x);
l_uint32 result;
if (pixel <= 17)
composeRGBPixel(0, 0, pixel * 15, &result);
else if (pixel <= 145)
composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
else
composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
col_data[x] = result;
}
}
#if 0
// TODO(rays) uncomment when scrollview can display non-binary images.
ScrollView* win = new ScrollView("Projection", 0, 0,
width, height, width, height);
win->Image(pixc, 0, 0);
win->Update();
#else
pixWrite("projection.png", pixc, IFF_PNG);
#endif
pixDestroy(&pixc);
}
// Compute the distance of the box from the partition using curved projection
// space. As DistanceOfBoxFromBox, except that the direction is taken from
// the ColPartition and the median bounds of the ColPartition are used as
// the to_box.
int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box,
const ColPartition& part,
const DENORM* denorm,
bool debug) const {
// Compute a partition box that uses the median top/bottom of the blobs
// within and median left/right for vertical.
TBOX part_box = part.bounding_box();
if (part.IsHorizontalType()) {
part_box.set_top(part.median_top());
part_box.set_bottom(part.median_bottom());
} else {
part_box.set_left(part.median_left());
part_box.set_right(part.median_right());
}
// Now use DistanceOfBoxFromBox to make the actual calculation.
return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
denorm, debug);
}
// Compute the distance from the from_box to the to_box using curved
// projection space. Separation that involves a decrease in projection
// density (moving from the from_box to the to_box) is weighted more heavily
// than constant density, and an increase is weighted less.
// If horizontal_textline is true, then curved space is used vertically,
// as for a diacritic on the edge of a textline.
// The projection uses original image coords, so denorm is used to get
// back to the image coords from box/part space.
// How the calculation works: Think of a diacritic near a textline.
// Distance is measured from the far side of the from_box to the near side of
// the to_box. Shown is the horizontal textline case.
// |------^-----|
// | from | box |
// |------|-----|
// perpendicular |
// <------v-------->|--------------------|
// parallel | to box |
// |--------------------|
// Perpendicular distance uses "curved space" See VerticalDistance below.
// Parallel distance is linear.
// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
const TBOX& to_box,
bool horizontal_textline,
const DENORM* denorm,
bool debug) const {
// The parallel_gap is the horizontal gap between a horizontal textline and
// the box. Analogous for vertical.
int parallel_gap = 0;
// start_pt is the box end of the line to be modified for curved space.
TPOINT start_pt;
// end_pt is the partition end of the line to be modified for curved space.
TPOINT end_pt;
if (horizontal_textline) {
parallel_gap = from_box.x_gap(to_box) + from_box.width();
start_pt.x = (from_box.left() + from_box.right()) / 2;
end_pt.x = start_pt.x;
if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
start_pt.y = from_box.top();
end_pt.y = MIN(to_box.top(), start_pt.y);
} else {
start_pt.y = from_box.bottom();
end_pt.y = MAX(to_box.bottom(), start_pt.y);
}
} else {
parallel_gap = from_box.y_gap(to_box) + from_box.height();
if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
start_pt.x = from_box.right();
end_pt.x = MIN(to_box.right(), start_pt.x);
} else {
start_pt.x = from_box.left();
end_pt.x = MAX(to_box.left(), start_pt.x);
}
start_pt.y = (from_box.bottom() + from_box.top()) / 2;
end_pt.y = start_pt.y;
}
// The perpendicular gap is the max vertical distance gap out of:
// top of from_box to to_box top and bottom of from_box to to_box bottom.
// This value is then modified for curved projection space.
// Analogous for vertical.
int perpendicular_gap = 0;
// If start_pt == end_pt, then the from_box lies entirely within the to_box
// (in the perpendicular direction), so we don't need to calculate the
// perpendicular_gap.
if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
if (denorm != NULL) {
// Denormalize the start and end.
denorm->DenormTransform(start_pt, &start_pt);
denorm->DenormTransform(end_pt, &end_pt);
}
if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
end_pt.y);
} else {
perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
start_pt.y);
}
}
// The parallel_gap weighs less than the perpendicular_gap.
return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
}
// Compute the distance between (x, y1) and (x, y2) using the rule that
// a decrease in textline density is weighted more heavily than an increase.
// The coordinates are in source image space, ie processed by any denorm
// already, but not yet scaled by scale_factor_.
// Going from the outside of a textline to the inside should measure much
// less distance than going from the inside of a textline to the outside.
// How it works:
// An increase is cheap (getting closer to a textline).
// Constant costs unity.
// A decrease is expensive (getting further from a textline).
// Pixels in projection map Counted distance
// 2
// 3 1/x
// 3 1
// 2 x
// 5 1/x
// 7 1/x
// Total: 1 + x + 3/x where x = kWrongWayPenalty.
int TextlineProjection::VerticalDistance(bool debug, int x,
int y1, int y2) const {
x = ImageXToProjectionX(x);
y1 = ImageYToProjectionY(y1);
y2 = ImageYToProjectionY(y2);
if (y1 == y2) return 0;
int wpl = pixGetWpl(pix_);
int step = y1 < y2 ? 1 : -1;
uinT32* data = pixGetData(pix_) + y1 * wpl;
wpl *= step;
int prev_pixel = GET_DATA_BYTE(data, x);
int distance = 0;
int right_way_steps = 0;
for (int y = y1; y != y2; y += step) {
data += wpl;
int pixel = GET_DATA_BYTE(data, x);
if (debug)
tprintf("At (%d,%d), pix = %d, prev=%d\n",
x, y + step, pixel, prev_pixel);
if (pixel < prev_pixel)
distance += kWrongWayPenalty;
else if (pixel > prev_pixel)
++right_way_steps;
else
++distance;
prev_pixel = pixel;
}
return distance * scale_factor_ +
right_way_steps * scale_factor_ / kWrongWayPenalty;
}
// Compute the distance between (x1, y) and (x2, y) using the rule that
// a decrease in textline density is weighted more heavily than an increase.
int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
int y) const {
x1 = ImageXToProjectionX(x1);
x2 = ImageXToProjectionX(x2);
y = ImageYToProjectionY(y);
if (x1 == x2) return 0;
int wpl = pixGetWpl(pix_);
int step = x1 < x2 ? 1 : -1;
uinT32* data = pixGetData(pix_) + y * wpl;
int prev_pixel = GET_DATA_BYTE(data, x1);
int distance = 0;
int right_way_steps = 0;
for (int x = x1; x != x2; x += step) {
int pixel = GET_DATA_BYTE(data, x + step);
if (debug)
tprintf("At (%d,%d), pix = %d, prev=%d\n",
x + step, y, pixel, prev_pixel);
if (pixel < prev_pixel)
distance += kWrongWayPenalty;
else if (pixel > prev_pixel)
++right_way_steps;
else
++distance;
prev_pixel = pixel;
}
return distance * scale_factor_ +
right_way_steps * scale_factor_ / kWrongWayPenalty;
}
// Returns true if the blob appears to be outside of a textline.
// Such blobs are potentially diacritics (even if large in Thai) and should
// be kept away from initial textline finding.
bool TextlineProjection::BoxOutOfHTextline(const TBOX& box,
const DENORM* denorm,
bool debug) const {
int grad1 = 0;
int grad2 = 0;
EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, NULL, NULL);
int worst_result = MIN(grad1, grad2);
int total_result = grad1 + grad2;
if (total_result >= 6) return false; // Strongly in textline.
// Medium strength: if either gradient is negative, it is likely outside
// the body of the textline.
if (worst_result < 0)
return true;
return false;
}
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
// but uses the median top/bottom for horizontal and median left/right for
// vertical instead of the bounding box edges.
// Evaluates for both horizontal and vertical and returns the best result,
// with a positive value for horizontal and a negative value for vertical.
int TextlineProjection::EvaluateColPartition(const ColPartition& part,
const DENORM* denorm,
bool debug) const {
if (part.IsSingleton())
return EvaluateBox(part.bounding_box(), denorm, debug);
// Test vertical orientation.
TBOX box = part.bounding_box();
// Use the partition median for left/right.
box.set_left(part.median_left());
box.set_right(part.median_right());
int vresult = EvaluateBox(box, denorm, debug);
// Test horizontal orientation.
box = part.bounding_box();
// Use the partition median for top/bottom.
box.set_top(part.median_top());
box.set_bottom(part.median_bottom());
int hresult = EvaluateBox(box, denorm, debug);
if (debug) {
tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
part.bounding_box().print();
part.Print();
}
return hresult >= -vresult ? hresult : vresult;
}
// Computes the mean projection gradients over the horizontal and vertical
// edges of the box:
// -h-h-h-h-h-h
// |------------| mean=htop -v|+v--------+v|-v
// |+h+h+h+h+h+h| -v|+v +v|-v
// | | -v|+v +v|-v
// | box | -v|+v box +v|-v
// | | -v|+v +v|-v
// |+h+h+h+h+h+h| -v|+v +v|-v
// |------------| mean=hbot -v|+v--------+v|-v
// -h-h-h-h-h-h
// mean=vleft mean=vright
//
// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
// for a horizontal textline, a negative number for a vertical textline,
// and near zero for undecided. Undecided is most likely non-text.
// All the gradients are truncated to remain non-negative, since negative
// horizontal gradients don't give any indication of being vertical and
// vice versa.
// Additional complexity: The coordinates have to be transformed to original
// image coordinates with denorm (if not null), scaled to match the projection
// pix, and THEN step out 2 pixels each way from the edge to compute the
// gradient, and tries 3 positions, each measuring the gradient over a
// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
// several layers of helpers below.
int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
bool debug) const {
return EvaluateBoxInternal(box, denorm, debug, NULL, NULL, NULL, NULL);
}
// Internal version of EvaluateBox returns the unclipped gradients as well
// as the result of EvaluateBox.
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
const DENORM* denorm, bool debug,
int* hgrad1, int* hgrad2,
int* vgrad1, int* vgrad2) const {
int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
box.top(), true);
int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
box.bottom(), false);
int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
box.top(), true);
int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
box.bottom(), box.top(),
false);
int top_clipped = MAX(top_gradient, 0);
int bottom_clipped = MAX(bottom_gradient, 0);
int left_clipped = MAX(left_gradient, 0);
int right_clipped = MAX(right_gradient, 0);
if (debug) {
tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
top_gradient, bottom_gradient, left_gradient, right_gradient);
box.print();
}
int result = MAX(top_clipped, bottom_clipped) -
MAX(left_clipped, right_clipped);
if (hgrad1 != NULL && hgrad2 != NULL) {
*hgrad1 = top_gradient;
*hgrad2 = bottom_gradient;
}
if (vgrad1 != NULL && vgrad2 != NULL) {
*vgrad1 = left_gradient;
*vgrad2 = right_gradient;
}
return result;
}
// Helper returns the mean gradient value for the horizontal row at the given
// y, (in the external coordinates) by subtracting the mean of the transformed
// row 2 pixels above from the mean of the transformed row 2 pixels below.
// This gives a positive value for a good top edge and negative for bottom.
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
inT16 min_x, inT16 max_x, inT16 y,
bool best_is_max) const {
TPOINT start_pt(min_x, y);
TPOINT end_pt(max_x, y);
int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
int best_gradient = lower - upper;
upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
int gradient = lower - upper;
if ((gradient > best_gradient) == best_is_max)
best_gradient = gradient;
upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
gradient = lower - upper;
if ((gradient > best_gradient) == best_is_max)
best_gradient = gradient;
return best_gradient;
}
// Helper returns the mean gradient value for the vertical column at the
// given x, (in the external coordinates) by subtracting the mean of the
// transformed column 2 pixels left from the mean of the transformed column
// 2 pixels to the right.
// This gives a positive value for a good left edge and negative for right.
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, inT16 x,
inT16 min_y, inT16 max_y,
bool best_is_max) const {
TPOINT start_pt(x, min_y);
TPOINT end_pt(x, max_y);
int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
int best_gradient = right - left;
left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
int gradient = right - left;
if ((gradient > best_gradient) == best_is_max)
best_gradient = gradient;
left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
gradient = right - left;
if ((gradient > best_gradient) == best_is_max)
best_gradient = gradient;
return best_gradient;
}
// Helper returns the mean pixel value over the line between the start_pt and
// end_pt (inclusive), but shifted perpendicular to the line in the projection
// image by offset pixels. For simplicity, it is assumed that the vector is
// either nearly horizontal or nearly vertical. It works on skewed textlines!
// The end points are in external coordinates, and will be denormalized with
// the denorm if not NULL before further conversion to pix coordinates.
// After all the conversions, the offset is added to the direction
// perpendicular to the line direction. The offset is thus in projection image
// coordinates, which allows the caller to get a guaranteed displacement
// between pixels used to calculate gradients.
int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
int offset,
TPOINT start_pt,
TPOINT end_pt) const {
TransformToPixCoords(denorm, &start_pt);
TransformToPixCoords(denorm, &end_pt);
TruncateToImageBounds(&start_pt);
TruncateToImageBounds(&end_pt);
int wpl = pixGetWpl(pix_);
uinT32* data = pixGetData(pix_);
int total = 0;
int count = 0;
int x_delta = end_pt.x - start_pt.x;
int y_delta = end_pt.y - start_pt.y;
if (abs(x_delta) >= abs(y_delta)) {
if (x_delta == 0)
return 0;
// Horizontal line. Add the offset vertically.
int x_step = x_delta > 0 ? 1 : -1;
// Correct offset for rotation, keeping it anti-clockwise of the delta.
offset *= x_step;
start_pt.y += offset;
end_pt.y += offset;
TruncateToImageBounds(&start_pt);
TruncateToImageBounds(&end_pt);
x_delta = end_pt.x - start_pt.x;
y_delta = end_pt.y - start_pt.y;
count = x_delta * x_step + 1;
for (int x = start_pt.x; x != end_pt.x; x += x_step) {
int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
total += GET_DATA_BYTE(data + wpl * y, x);
}
} else {
// Vertical line. Add the offset horizontally.
int y_step = y_delta > 0 ? 1 : -1;
// Correct offset for rotation, keeping it anti-clockwise of the delta.
// Pix holds the image with y=0 at the top, so the offset is negated.
offset *= -y_step;
start_pt.x += offset;
end_pt.x += offset;
TruncateToImageBounds(&start_pt);
TruncateToImageBounds(&end_pt);
x_delta = end_pt.x - start_pt.x;
y_delta = end_pt.y - start_pt.y;
count = y_delta * y_step + 1;
for (int y = start_pt.y; y != end_pt.y; y += y_step) {
int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
total += GET_DATA_BYTE(data + wpl * y, x);
}
}
return DivRounded(total, count);
}
// Given an input pix, and a box, the sides of the box are shrunk inwards until
// they bound any black pixels found within the original box.
// The function converts between tesseract coords and the pix coords assuming
// that this pix is full resolution equal in size to the original image.
// Returns an empty box if there are no black pixels in the source box.
static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
int im_height = pixGetHeight(pix);
Box* input_box = boxCreate(box.left(), im_height - box.top(),
box.width(), box.height());
Box* output_box = NULL;
pixClipBoxToForeground(pix, input_box, NULL, &output_box);
TBOX result_box;
if (output_box != NULL) {
l_int32 x, y, width, height;
boxGetGeometry(output_box, &x, &y, &width, &height);
result_box.set_left(x);
result_box.set_right(x + width);
result_box.set_top(im_height - y);
result_box.set_bottom(result_box.top() - height);
boxDestroy(&output_box);
}
boxDestroy(&input_box);
return result_box;
}
// Splits the given box in half at x_middle or y_middle according to split_on_x
// and checks for nontext_map pixels in each half. Reduces the bbox so that it
// still includes the middle point, but does not touch any fg pixels in
// nontext_map. An empty box may be returned if there is no such box.
static void TruncateBoxToMissNonText(int x_middle, int y_middle,
bool split_on_x, Pix* nontext_map,
TBOX* bbox) {
TBOX box1(*bbox);
TBOX box2(*bbox);
TBOX im_box;
if (split_on_x) {
box1.set_right(x_middle);
im_box = BoundsWithinBox(nontext_map, box1);
if (!im_box.null_box()) box1.set_left(im_box.right());
box2.set_left(x_middle);
im_box = BoundsWithinBox(nontext_map, box2);
if (!im_box.null_box()) box2.set_right(im_box.left());
} else {
box1.set_bottom(y_middle);
im_box = BoundsWithinBox(nontext_map, box1);
if (!im_box.null_box()) box1.set_top(im_box.bottom());
box2.set_top(y_middle);
im_box = BoundsWithinBox(nontext_map, box2);
if (!im_box.null_box()) box2.set_bottom(im_box.top());
}
box1 += box2;
*bbox = box1;
}
// Helper function to add 1 to a rectangle in source image coords to the
// internal projection pix_.
void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
int scaled_left = ImageXToProjectionX(box.left());
int scaled_top = ImageYToProjectionY(box.top());
int scaled_right = ImageXToProjectionX(box.right());
int scaled_bottom = ImageYToProjectionY(box.bottom());
int wpl = pixGetWpl(pix_);
uinT32* data = pixGetData(pix_) + scaled_top * wpl;
for (int y = scaled_top; y <= scaled_bottom; ++y) {
for (int x = scaled_left; x <= scaled_right; ++x) {
int pixel = GET_DATA_BYTE(data, x);
if (pixel < 255)
SET_DATA_BYTE(data, x, pixel + 1);
}
data += wpl;
}
}
// Inserts a list of blobs into the projection.
// Rotation is a multiple of 90 degrees to get from blob coords to
// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
// Blobs are spread horizontally or vertically according to their internal
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
const FCOORD& rotation,
const TBOX& nontext_map_box,
Pix* nontext_map) {
BLOBNBOX_IT blob_it(blobs);
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
BLOBNBOX* blob = blob_it.data();
TBOX bbox = blob->bounding_box();
ICOORD middle((bbox.left() + bbox.right()) / 2,
(bbox.bottom() + bbox.top()) / 2);
bool spreading_horizontally = PadBlobBox(blob, &bbox);
// Rotate to match the nontext_map.
bbox.rotate(rotation);
middle.rotate(rotation);
if (rotation.x() == 0.0f)
spreading_horizontally = !spreading_horizontally;
// Clip to the image before applying the increments.
bbox &= nontext_map_box; // This is in-place box intersection.
// Check for image pixels before spreading.
TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
nontext_map, &bbox);
if (bbox.area() > 0) {
IncrementRectangle8Bit(bbox);
}
}
}
// Pads the bounding box of the given blob according to whether it is on
// a horizontal or vertical text line, taking into account tab-stops near
// the blob. Returns true if padding was in the horizontal direction.
bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
// Determine which direction to spread.
// If text is well spaced out, it can be useful to pad perpendicular to
// the textline direction, so as to ensure diacritics get absorbed
// correctly, but if the text is tightly spaced, this will destroy the
// blank space between textlines in the projection map, and that would
// be very bad.
int pad_limit = scale_factor_ * kMinLineSpacingFactor;
int xpad = 0;
int ypad = 0;
bool padding_horizontally = false;
if (blob->UniquelyHorizontal()) {
xpad = bbox->height() * kOrientedPadFactor;
padding_horizontally = true;
// If the text appears to be very well spaced, pad the other direction by a
// single pixel in the projection profile space to help join diacritics to
// the textline.
if ((blob->neighbour(BND_ABOVE) == NULL ||
bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
(blob->neighbour(BND_BELOW) == NULL ||
bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
ypad = scale_factor_;
}
} else if (blob->UniquelyVertical()) {
ypad = bbox->width() * kOrientedPadFactor;
if ((blob->neighbour(BND_LEFT) == NULL ||
bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
(blob->neighbour(BND_RIGHT) == NULL ||
bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
xpad = scale_factor_;
}
} else {
if ((blob->neighbour(BND_ABOVE) != NULL &&
blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
(blob->neighbour(BND_BELOW) != NULL &&
blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
ypad = bbox->width() * kDefaultPadFactor;
}
if ((blob->neighbour(BND_RIGHT) != NULL &&
blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
(blob->neighbour(BND_LEFT) != NULL &&
blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
xpad = bbox->height() * kDefaultPadFactor;
padding_horizontally = true;
}
}
bbox->pad(xpad, ypad);
pad_limit = scale_factor_ * kMaxTabStopOverrun;
// Now shrink horizontally to avoid stepping more than pad_limit over a
// tab-stop.
if (bbox->left() < blob->left_rule() - pad_limit) {
bbox->set_left(blob->left_rule() - pad_limit);
}
if (bbox->right() > blob->right_rule() + pad_limit) {
bbox->set_right(blob->right_rule() + pad_limit);
}
return padding_horizontally;
}
// Helper denormalizes the TPOINT with the denorm if not NULL, then
// converts to pix_ coordinates.
void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
TPOINT* pt) const {
if (denorm != NULL) {
// Denormalize the point.
denorm->DenormTransform(*pt, pt);
}
pt->x = ImageXToProjectionX(pt->x);
pt->y = ImageYToProjectionY(pt->y);
}
// Helper truncates the TPOINT to be within the pix_.
void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
}
// Transform tesseract image coordinates to coordinates used in the projection.
int TextlineProjection::ImageXToProjectionX(int x) const {
x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
return x;
}
int TextlineProjection::ImageYToProjectionY(int y) const {
y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
return y;
}
} // namespace tesseract.

View File

@ -0,0 +1,206 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
#include "blobgrid.h" // For BlobGrid
class DENORM;
struct Pix;
struct TPOINT;
namespace tesseract {
class ColPartition;
// Simple class to encapsulate the computation of an image representing
// local textline density, and function(s) to make use of it.
// The underlying principle is that if you smear connected components
// horizontally (vertically for components on a vertically written textline)
// and count the number of smeared components in an image, then the resulting
// image shows the density of the textlines at each image position.
class TextlineProjection {
public:
// The down-scaling factor is computed to obtain a projection resolution
// of about 100 dpi, whatever the input.
explicit TextlineProjection(int resolution);
~TextlineProjection();
// Build the projection profile given the input_block containing lists of
// blobs, a rotation to convert to image coords,
// and a full-resolution nontext_map, marking out areas to avoid.
// During construction, we have the following assumptions:
// The rotation is a multiple of 90 degrees, ie no deskew yet.
// The blobs have had their left and right rules set to also limit
// the range of projection.
void ConstructProjection(TO_BLOCK* input_block,
const FCOORD& rotation, Pix* nontext_map);
// Display the blobs in the window colored according to textline quality.
void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
// Moves blobs that look like they don't sit well on a textline from the
// input blobs list to the output small_blobs list.
// This gets them away from initial textline finding to stop diacritics
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
BLOBNBOX_LIST* small_blobs) const;
// Create a window and display the projection in it.
void DisplayProjection() const;
// Compute the distance of the box from the partition using curved projection
// space. As DistanceOfBoxFromBox, except that the direction is taken from
// the ColPartition and the median bounds of the ColPartition are used as
// the to_box.
int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
const DENORM* denorm, bool debug) const;
// Compute the distance from the from_box to the to_box using curved
// projection space. Separation that involves a decrease in projection
// density (moving from the from_box to the to_box) is weighted more heavily
// than constant density, and an increase is weighted less.
// If horizontal_textline is true, then curved space is used vertically,
// as for a diacritic on the edge of a textline.
// The projection uses original image coords, so denorm is used to get
// back to the image coords from box/part space.
int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
bool horizontal_textline,
const DENORM* denorm, bool debug) const;
// Compute the distance between (x, y1) and (x, y2) using the rule that
// a decrease in textline density is weighted more heavily than an increase.
// The coordinates are in source image space, ie processed by any denorm
// already, but not yet scaled by scale_factor_.
// Going from the outside of a textline to the inside should measure much
// less distance than going from the inside of a textline to the outside.
int VerticalDistance(bool debug, int x, int y1, int y2) const;
// Compute the distance between (x1, y) and (x2, y) using the rule that
// a decrease in textline density is weighted more heavily than an increase.
int HorizontalDistance(bool debug, int x1, int x2, int y) const;
// Returns true if the blob appears to be outside of a horizontal textline.
// Such blobs are potentially diacritics (even if large in Thai) and should
// be kept away from initial textline finding.
bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
bool debug) const;
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
// but uses the median top/bottom for horizontal and median left/right for
// vertical instead of the bounding box edges.
// Evaluates for both horizontal and vertical and returns the best result,
// with a positive value for horizontal and a negative value for vertical.
int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
bool debug) const;
// Computes the mean projection gradients over the horizontal and vertical
// edges of the box:
// -h-h-h-h-h-h
// |------------| mean=htop -v|+v--------+v|-v
// |+h+h+h+h+h+h| -v|+v +v|-v
// | | -v|+v +v|-v
// | box | -v|+v box +v|-v
// | | -v|+v +v|-v
// |+h+h+h+h+h+h| -v|+v +v|-v
// |------------| mean=hbot -v|+v--------+v|-v
// -h-h-h-h-h-h
// mean=vleft mean=vright
//
// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
// for a horizontal textline, a negative number for a vertical textline,
// and near zero for undecided. Undecided is most likely non-text.
int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
private:
// Internal version of EvaluateBox returns the unclipped gradients as well
// as the result of EvaluateBox.
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
int* hgrad1, int* hgrad2,
int* vgrad1, int* vgrad2) const;
// Helper returns the mean gradient value for the horizontal row at the given
// y, (in the external coordinates) by subtracting the mean of the transformed
// row 2 pixels above from the mean of the transformed row 2 pixels below.
// This gives a positive value for a good top edge and negative for bottom.
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
int BestMeanGradientInRow(const DENORM* denorm, inT16 min_x, inT16 max_x,
inT16 y, bool best_is_max) const;
// Helper returns the mean gradient value for the vertical column at the
// given x, (in the external coordinates) by subtracting the mean of the
// transformed column 2 pixels left from the mean of the transformed column
// 2 pixels to the right.
// This gives a positive value for a good left edge and negative for right.
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
int BestMeanGradientInColumn(const DENORM* denorm, inT16 x, inT16 min_y,
inT16 max_y, bool best_is_max) const;
// Helper returns the mean pixel value over the line between the start_pt and
// end_pt (inclusive), but shifted perpendicular to the line in the projection
// image by offset pixels. For simplicity, it is assumed that the vector is
// either nearly horizontal or nearly vertical. It works on skewed textlines!
// The end points are in external coordinates, and will be denormalized with
// the denorm if not NULL before further conversion to pix coordinates.
// After all the conversions, the offset is added to the direction
// perpendicular to the line direction. The offset is thus in projection image
// coordinates, which allows the caller to get a guaranteed displacement
// between pixels used to calculate gradients.
int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
TPOINT start_pt, TPOINT end_pt) const;
// Helper function to add 1 to a rectangle in source image coords to the
// internal projection pix_.
void IncrementRectangle8Bit(const TBOX& box);
// Inserts a list of blobs into the projection.
// Rotation is a multiple of 90 degrees to get from blob coords to
// nontext_map coords, image_box is the bounds of the nontext_map.
// Blobs are spread horizontally or vertically according to their internal
// flags, but the spreading is truncated by set pixels in the nontext_map
// and also by the horizontal rule line limits on the blobs.
void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
const TBOX& image_box, Pix* nontext_map);
// Pads the bounding box of the given blob according to whether it is on
// a horizontal or vertical text line, taking into account tab-stops near
// the blob. Returns true if padding was in the horizontal direction.
bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
// Helper denormalizes the TPOINT with the denorm if not NULL, then
// converts to pix_ coordinates.
void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
// Helper truncates the TPOINT to be within the pix_.
void TruncateToImageBounds(TPOINT* pt) const;
// Transform tesseract coordinates to coordinates used in the pix.
int ImageXToProjectionX(int x) const;
int ImageYToProjectionY(int y) const;
// The down-sampling scale factor used in building the image.
int scale_factor_;
// The blob coordinates of the top-left (origin of the pix_) in tesseract
// coordinates. Used to transform the bottom-up tesseract coordinates to
// the top-down coordinates of the pix.
int x_origin_;
int y_origin_;
// The image of horizontally smeared blob boxes summed to provide a
// textline density map. As with a horizontal projection, the map has
// dips in the gaps between textlines.
Pix* pix_;
};
} // namespace tesseract.
#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_

View File

@ -27,7 +27,7 @@
namespace tesseract {
Textord::Textord(CCStruct* ccstruct)
: ccstruct_(ccstruct),
: ccstruct_(ccstruct), use_cjk_fp_model_(false),
// makerow.cpp ///////////////////////////////////////////
BOOL_MEMBER(textord_single_height_mode, false,
"Script has no xheight, so use a single mode",
@ -317,6 +317,13 @@ void Textord::TextordPage(PageSegMode pageseg_mode,
to_block->get_rows(), to_block->block->row_list());
}
cleanup_blocks(blocks); // Remove empties.
// Compute the margins for each row in the block, to be used later for
// paragraph detection.
BLOCK_IT b_it(blocks);
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
b_it.data()->compute_row_margins();
}
#ifndef GRAPHICS_DISABLED
close_to_win();
#endif

View File

@ -50,6 +50,13 @@ class Textord {
// than one, clean up and leave only the best.
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
bool use_cjk_fp_model() const {
return use_cjk_fp_model_;
}
void set_use_cjk_fp_model(bool flag) {
use_cjk_fp_model_ = flag;
}
// tospace.cpp ///////////////////////////////////////////
void to_spacing(
ICOORD page_tr, //topright of page
@ -64,6 +71,7 @@ class Textord {
// tordmain.cpp ///////////////////////////////////////////
void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on);
private:
// For underlying memory management and other utilities.
CCStruct* ccstruct_;
@ -71,6 +79,8 @@ class Textord {
// The size of the input image.
ICOORD page_tr_;
bool use_cjk_fp_model_;
// makerow.cpp ///////////////////////////////////////////
// Make the textlines inside each block.
void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,

View File

@ -112,9 +112,11 @@ void compute_fixed_pitch(ICOORD page_tr, // top right
}
block_index = 1;
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
block_it.forward()) {
block = block_it.data ();
POLY_BLOCK* pb = block->block->poly_block();
if (pb != NULL && !pb->IsText()) continue; // Non-text doesn't exist!
row_it.set_to_list (block->get_rows ());
row_index = 1;
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
@ -166,9 +168,11 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix
block_stats.set_range (0, maxwidth);
like_stats.set_range (0, maxwidth);
block_index = 1;
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
block_it.forward ()) {
block = block_it.data ();
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
block_it.forward()) {
block = block_it.data();
POLY_BLOCK* pb = block->block->poly_block();
if (pb != NULL && !pb->IsText()) continue; // Non text doesn't exist!
row_index = 1;
row_it.set_to_list (block->get_rows ());
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
@ -568,7 +572,8 @@ BOOL8 try_rows_fixed( //find line stats
row = row_it.data ();
ASSERT_HOST (row->xheight > 0);
maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
if (row->fixed_pitch > 0 && fixed_pitch_row (row, block_index)) {
if (row->fixed_pitch > 0 &&
fixed_pitch_row(row, block->block, block_index)) {
if (row->fixed_pitch == 0) {
lower = row->pr_nonsp;
upper = row->pr_space;
@ -971,9 +976,9 @@ BOOL8 find_row_pitch( //find lines
* The larger threshold is the word gap threshold.
**********************************************************************/
BOOL8 fixed_pitch_row( //find lines
TO_ROW *row, //row to do
inT32 block_index //block_number
BOOL8 fixed_pitch_row(TO_ROW *row, // row to do
BLOCK* block,
inT32 block_index // block_number
) {
const char *res_string; //pitch result
inT16 mid_cuts; //no of cheap cuts
@ -984,7 +989,8 @@ BOOL8 fixed_pitch_row( //find lines
non_space = row->fp_nonsp;
if (non_space > row->fixed_pitch)
non_space = row->fixed_pitch;
if (textord_all_prop) {
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (textord_all_prop || (pb != NULL && !pb->IsText())) {
// Set the decision to definitely proportional.
pitch_sd = textord_words_def_prop * row->fixed_pitch;
row->pitch_decision = PITCH_DEF_PROP;
@ -1755,6 +1761,10 @@ void print_pitch_sd( //find fp cells
**********************************************************************/
void find_repeated_chars(TO_BLOCK *block, // Block to search.
BOOL8 testing_on) { // Debug mode.
POLY_BLOCK* pb = block->block->poly_block();
if (pb != NULL && !pb->IsText())
return; // Don't find repeated chars in non-text blocks.
TO_ROW *row;
BLOBNBOX_IT box_it;
BLOBNBOX_IT search_it; // forward search

View File

@ -107,6 +107,7 @@ BOOL8 find_row_pitch( //find lines
);
BOOL8 fixed_pitch_row( //find lines
TO_ROW *row, //row to do
BLOCK* block,
inT32 block_index //block_number
);
BOOL8 count_pitch_stats( //find lines

View File

@ -29,6 +29,7 @@
#include "pitsync1.h"
#include "tovars.h"
#include "topitch.h"
#include "cjkpitch.h"
#include "textord.h"
#include "fpchop.h"
#include "wordseg.h"
@ -101,7 +102,6 @@ void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
*
* Arrange the blobs into words.
*/
void make_words(tesseract::Textord *textord,
ICOORD page_tr, // top right
float gradient, // page skew
@ -110,8 +110,12 @@ void make_words(tesseract::Textord *textord,
TO_BLOCK_IT block_it; // iterator
TO_BLOCK *block; // current block
compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
!(BOOL8) textord_test_landscape);
if (textord->use_cjk_fp_model()) {
compute_fixed_pitch_cjk(page_tr, port_blocks);
} else {
compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
!(BOOL8) textord_test_landscape);
}
textord->to_spacing(page_tr, port_blocks);
block_it.set_to_list(port_blocks);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
@ -525,24 +529,26 @@ void make_real_words(
row = row_it.data ();
if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
real_row = make_rep_words (row, block);
}
else if (!row->blob_list()->empty()) {
} else if (!row->blob_list()->empty()) {
// In a fixed pitch document, some lines may be detected as fixed pitch
// while others don't, and will go through different path.
// For non-space delimited language like CJK, fixed pitch chop always
// leave the entire line as one word. We can force consistent chopping
// with force_make_prop_words flag.
POLY_BLOCK* pb = block->block->poly_block();
if (textord_chopper_test) {
real_row = textord->make_blob_words (row, rotation);
} else if (textord_force_make_prop_words ||
row->pitch_decision == PITCH_DEF_PROP ||
row->pitch_decision == PITCH_CORR_PROP) {
(pb != NULL && !pb->IsText()) ||
row->pitch_decision == PITCH_DEF_PROP ||
row->pitch_decision == PITCH_CORR_PROP) {
real_row = textord->make_prop_words (row, rotation);
} else if (row->pitch_decision == PITCH_DEF_FIXED ||
row->pitch_decision == PITCH_CORR_FIXED) {
real_row = fixed_pitch_words (row, rotation);
} else
} else {
ASSERT_HOST(FALSE);
}
}
if (real_row != NULL) {
//put row in block

View File

@ -108,9 +108,11 @@ void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
ColPartition* next_block_part = part_it_.data();
const TBOX& part_box = part->bounding_box();
const TBOX& next_box = next_block_part->bounding_box();
// In addition to the same type, the next box must not be above the
// current box, nor (if image) too far below.
if (next_block_part->type() == part->type() &&
PolyBlockType type = part->type(), next_type = next_block_part->type();
if (ColPartition::TypesSimilar(type, next_type) &&
next_box.bottom() <= part_box.top() &&
(text_block ||
part_box.bottom() - next_box.top() < part_box.height()))
@ -139,4 +141,3 @@ void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
}
} // namespace tesseract.