mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Major improvements to layout analysis for better image detection, diacritic detection, better textline finding, better tabstop finding
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@648 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
04068c7055
commit
6e3d810c1d
@ -6,14 +6,15 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/cutil -I$(top_srcdir)/classify -I$(top_srcdir)/dict
|
||||
|
||||
include_HEADERS = \
|
||||
alignedblob.h bbgrid.h blkocc.h \
|
||||
colfind.h colpartition.h colpartitionset.h \
|
||||
alignedblob.h bbgrid.h blkocc.h blobgrid.h \
|
||||
ccnontextdetect.h cjkpitch.h colfind.h colpartition.h colpartitionset.h \
|
||||
colpartitiongrid.h \
|
||||
devanagari_processing.h drawedg.h drawtord.h edgblob.h edgloop.h \
|
||||
equationdetectbase.h \
|
||||
fpchop.h gap_map.h imagefind.h linefind.h makerow.h oldbasel.h \
|
||||
pithsync.h pitsync1.h scanedg.h sortflts.h strokewidth.h \
|
||||
tabfind.h tablefind.h tabvector.h \
|
||||
tablerecog.h textord.h \
|
||||
tablerecog.h textlineprojection.h textord.h \
|
||||
topitch.h tordmain.h tovars.h \
|
||||
underlin.h wordseg.h workingpartset.h
|
||||
|
||||
@ -34,13 +35,14 @@ libtesseract_textord_la_LIBADD = \
|
||||
endif
|
||||
|
||||
libtesseract_textord_la_SOURCES = \
|
||||
alignedblob.cpp bbgrid.cpp blkocc.cpp \
|
||||
colfind.cpp colpartition.cpp colpartitionset.cpp \
|
||||
alignedblob.cpp bbgrid.cpp blkocc.cpp blobgrid.cpp \
|
||||
ccnontextdetect.cpp cjkpitch.cpp colfind.cpp colpartition.cpp colpartitionset.cpp \
|
||||
colpartitiongrid.cpp devanagari_processing.cpp \
|
||||
drawedg.cpp drawtord.cpp edgblob.cpp edgloop.cpp \
|
||||
equationdetectbase.cpp \
|
||||
fpchop.cpp gap_map.cpp imagefind.cpp linefind.cpp makerow.cpp oldbasel.cpp \
|
||||
pithsync.cpp pitsync1.cpp scanedg.cpp sortflts.cpp strokewidth.cpp \
|
||||
tabfind.cpp tablefind.cpp tabvector.cpp \
|
||||
tablerecog.cpp textord.cpp \
|
||||
tablerecog.cpp textlineprojection.cpp textord.cpp \
|
||||
topitch.cpp tordmain.cpp tospace.cpp tovars.cpp \
|
||||
underlin.cpp wordseg.cpp workingpartset.cpp
|
||||
|
@ -39,11 +39,11 @@ namespace tesseract {
|
||||
// Fraction of resolution used as alignment tolerance for aligned tabs.
|
||||
const double kAlignedFraction = 0.03125;
|
||||
// Fraction of resolution used as alignment tolerance for ragged tabs.
|
||||
const double kRaggedFraction = 0.5;
|
||||
const double kRaggedFraction = 2.5;
|
||||
// Fraction of height used as a minimum gutter gap for aligned blobs.
|
||||
const double kAlignedGapFraction = 0.75;
|
||||
// Fraction of height used as a minimum gutter gap for ragged tabs.
|
||||
const double kRaggedGapFraction = 3.0;
|
||||
const double kRaggedGapFraction = 1.0;
|
||||
// Constant number of pixels used as alignment tolerance for line finding.
|
||||
const int kVLineAlignment = 3;
|
||||
// Constant number of pixels used as gutter gap tolerance for line finding.
|
||||
@ -163,7 +163,7 @@ void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) {
|
||||
|
||||
AlignedBlob::AlignedBlob(int gridsize,
|
||||
const ICOORD& bleft, const ICOORD& tright)
|
||||
: BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) {
|
||||
: BlobGrid(gridsize, bleft, tright) {
|
||||
}
|
||||
|
||||
AlignedBlob::~AlignedBlob() {
|
||||
@ -196,24 +196,24 @@ ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
|
||||
int bottom_y = box.bottom();
|
||||
TabType tabtype = bbox->left_tab_type();
|
||||
if (tabtype != TT_NONE) {
|
||||
if (tabtype == TT_UNCONFIRMED)
|
||||
if (tabtype == TT_MAYBE_ALIGNED)
|
||||
tab_win->Pen(ScrollView::BLUE);
|
||||
else if (tabtype == TT_MAYBE_RAGGED)
|
||||
tab_win->Pen(ScrollView::YELLOW);
|
||||
else if (tabtype == TT_CONFIRMED)
|
||||
tab_win->Pen(ScrollView::GREEN);
|
||||
else if (tabtype == TT_FAKE)
|
||||
tab_win->Pen(ScrollView::YELLOW);
|
||||
else
|
||||
tab_win->Pen(ScrollView::GREY);
|
||||
tab_win->Line(left_x, top_y, left_x, bottom_y);
|
||||
}
|
||||
tabtype = bbox->right_tab_type();
|
||||
if (tabtype != TT_NONE) {
|
||||
if (tabtype == TT_UNCONFIRMED)
|
||||
if (tabtype == TT_MAYBE_ALIGNED)
|
||||
tab_win->Pen(ScrollView::MAGENTA);
|
||||
else if (tabtype == TT_MAYBE_RAGGED)
|
||||
tab_win->Pen(ScrollView::ORANGE);
|
||||
else if (tabtype == TT_CONFIRMED)
|
||||
tab_win->Pen(ScrollView::RED);
|
||||
else if (tabtype == TT_FAKE)
|
||||
tab_win->Pen(ScrollView::ORANGE);
|
||||
else
|
||||
tab_win->Pen(ScrollView::GREY);
|
||||
tab_win->Line(right_x, top_y, right_x, bottom_y);
|
||||
@ -224,6 +224,17 @@ ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
|
||||
return tab_win;
|
||||
}
|
||||
|
||||
// Helper returns true if the total number of line_crossings of all the blobs
|
||||
// in the list is at least 2.
|
||||
static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) {
|
||||
BLOBNBOX_C_IT it(blobs);
|
||||
int total_crossings = 0;
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
total_crossings += it.data()->line_crossings();
|
||||
}
|
||||
return total_crossings >= 2;
|
||||
}
|
||||
|
||||
// Finds a vector corresponding to a set of vertically aligned blob edges
|
||||
// running through the given box. The type of vector returned and the
|
||||
// search parameters are determined by the AlignedBlobParams.
|
||||
@ -237,11 +248,13 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
|
||||
int ext_start_y, ext_end_y;
|
||||
BLOBNBOX_CLIST good_points;
|
||||
// Search up and then down from the starting bbox.
|
||||
TBOX box = bbox->bounding_box();
|
||||
bool debug = WithinTestRegion(2, box.left(), box.bottom());
|
||||
int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
|
||||
pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
|
||||
BLOBNBOX_C_IT it(&good_points);
|
||||
it.move_to_last();
|
||||
TBOX box = it.data()->bounding_box();
|
||||
box = it.data()->bounding_box();
|
||||
int end_y = box.top();
|
||||
int end_x = align_params.right_tab ? box.right() : box.left();
|
||||
it.move_to_first();
|
||||
@ -251,9 +264,14 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
|
||||
// Acceptable tab vectors must have a mininum number of points,
|
||||
// have a minimum acceptable length, and have a minimum gradient.
|
||||
// The gradient corresponds to the skew angle.
|
||||
if (pt_count >= align_params.min_points &&
|
||||
// Ragged tabs don't need to satisfy the gradient condition, as they
|
||||
// will always end up parallel to the vertical direction.
|
||||
bool at_least_2_crossings = AtLeast2LineCrossings(&good_points);
|
||||
if ((pt_count >= align_params.min_points &&
|
||||
end_y - start_y >= align_params.min_length &&
|
||||
end_y - start_y >= abs(end_x - start_x) * kMinTabGradient) {
|
||||
(align_params.ragged ||
|
||||
end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) ||
|
||||
at_least_2_crossings) {
|
||||
int confirmed_points = 0;
|
||||
// Count existing confirmed points to see if vector is acceptable.
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
@ -270,7 +288,7 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
|
||||
if (!align_params.ragged ||
|
||||
confirmed_points + confirmed_points < pt_count) {
|
||||
const TBOX& box = bbox->bounding_box();
|
||||
if (WithinTestRegion(2, box.left(), box.bottom())) {
|
||||
if (debug) {
|
||||
tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
|
||||
pt_count, box.left(), box.bottom());
|
||||
}
|
||||
@ -282,6 +300,9 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
|
||||
} else {
|
||||
bbox->set_left_tab_type(align_params.confirmed_type);
|
||||
}
|
||||
if (debug) {
|
||||
bbox->bounding_box().print();
|
||||
}
|
||||
}
|
||||
// Now make the vector and return it.
|
||||
TabVector* result = TabVector::FitVector(align_params.alignment,
|
||||
@ -289,12 +310,21 @@ TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
|
||||
ext_start_y, ext_end_y,
|
||||
&good_points,
|
||||
vertical_x, vertical_y);
|
||||
if (WithinTestRegion(2, box.left(), box.bottom())) {
|
||||
result->set_intersects_other_lines(at_least_2_crossings);
|
||||
if (debug) {
|
||||
tprintf("Box was %d, %d\n", box.left(), box.bottom());
|
||||
result->Print("After fitting");
|
||||
}
|
||||
return result;
|
||||
} else if (debug) {
|
||||
tprintf("Ragged tab used too many used points: %d out of %d\n",
|
||||
confirmed_points, pt_count);
|
||||
}
|
||||
} else if (debug) {
|
||||
tprintf("Tab vector failed basic tests: pt count %d vs min %d, "
|
||||
"length %d vs min %d, min grad %g\n",
|
||||
pt_count, align_params.min_points, end_y - start_y,
|
||||
align_params.min_length, abs(end_x - start_x) * kMinTabGradient);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
@ -310,13 +340,18 @@ int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
|
||||
BLOBNBOX_C_IT it(good_points);
|
||||
|
||||
TBOX box = bbox->bounding_box();
|
||||
bool debug = WithinTestRegion(2, box.left(), box.bottom());
|
||||
if (debug) {
|
||||
tprintf("Starting alignment run at blob:");
|
||||
box.print();
|
||||
}
|
||||
int x_start = params.right_tab ? box.right() : box.left();
|
||||
while (bbox != NULL) {
|
||||
// Add the blob to the list if the appropriate side is a tab candidate,
|
||||
// or if we are working on a ragged tab.
|
||||
if (((params.right_tab && bbox->right_tab_type() != TT_NONE) ||
|
||||
(!params.right_tab && bbox->left_tab_type() != TT_NONE) ||
|
||||
params.ragged) &&
|
||||
TabType type = params.right_tab ? bbox->right_tab_type()
|
||||
: bbox->left_tab_type();
|
||||
if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) &&
|
||||
(it.empty() || it.data() != bbox)) {
|
||||
if (top_to_bottom)
|
||||
it.add_before_then_move(bbox);
|
||||
@ -335,6 +370,10 @@ int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
|
||||
x_start = params.right_tab ? box.right() : box.left();
|
||||
}
|
||||
}
|
||||
if (debug) {
|
||||
tprintf("Alignment run ended with %d pts at blob:", ptcount);
|
||||
box.print();
|
||||
}
|
||||
return ptcount;
|
||||
}
|
||||
|
||||
@ -417,15 +456,12 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
|
||||
// waiting for a sequence of blobs in a line to end.
|
||||
// NextVerticalSearch alone does not guarantee this, as there may be
|
||||
// more than one blob in a grid cell. See comment in AlignTabs.
|
||||
if ((n_y < start_y) != top_to_bottom || n_y == start_y)
|
||||
if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box))
|
||||
continue; // Only look in the required direction.
|
||||
if (result != NULL &&
|
||||
((top_to_bottom && n_y < result->bounding_box().bottom()) ||
|
||||
(!top_to_bottom && n_y > result->bounding_box().top())))
|
||||
if (result != NULL && result->bounding_box().y_gap(nbox) > gridsize())
|
||||
return result; // This result is clear.
|
||||
if (backup_result != NULL && p.ragged &&
|
||||
((top_to_bottom && n_y < backup_result->bounding_box().bottom()) ||
|
||||
(!top_to_bottom && n_y > backup_result->bounding_box().top())))
|
||||
if (backup_result != NULL && p.ragged && result == NULL &&
|
||||
backup_result->bounding_box().y_gap(nbox) > gridsize())
|
||||
return backup_result; // This result is clear.
|
||||
|
||||
// If the neighbouring blob is the wrong side of a separator line, then it
|
||||
@ -446,7 +482,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
|
||||
n_right > x_at_n_y + p.r_align_tolerance &&
|
||||
(p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) {
|
||||
// In the gutter so end of line.
|
||||
if (bbox->right_tab_type() >= TT_UNCONFIRMED)
|
||||
if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED)
|
||||
bbox->set_right_tab_type(TT_DELETED);
|
||||
*end_y = top_to_bottom ? nbox.top() : nbox.bottom();
|
||||
if (WithinTestRegion(2, x_start, start_y))
|
||||
@ -458,7 +494,7 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
|
||||
n_right > x_at_n_y - p.min_gutter &&
|
||||
(p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) {
|
||||
// In the gutter so end of line.
|
||||
if (bbox->left_tab_type() >= TT_UNCONFIRMED)
|
||||
if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED)
|
||||
bbox->set_left_tab_type(TT_DELETED);
|
||||
*end_y = top_to_bottom ? nbox.top() : nbox.bottom();
|
||||
if (WithinTestRegion(2, x_start, start_y))
|
||||
@ -476,15 +512,23 @@ BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
|
||||
tprintf("aligned, seeking%d, l=%d, r=%d\n",
|
||||
p.right_tab, neighbour->left_tab_type(),
|
||||
neighbour->right_tab_type());
|
||||
if ((p.right_tab && neighbour->right_tab_type() != TT_NONE) ||
|
||||
(!p.right_tab && neighbour->left_tab_type() != TT_NONE)) {
|
||||
TabType n_type = p.right_tab ? neighbour->right_tab_type()
|
||||
: neighbour->left_tab_type();
|
||||
if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) {
|
||||
if (result == NULL) {
|
||||
result = neighbour;
|
||||
} else {
|
||||
// Keep the closest neighbour.
|
||||
int old_y = (result->bounding_box().top() +
|
||||
result->bounding_box().bottom()) / 2;
|
||||
if (abs(n_y - start_y) < abs(old_y - start_y))
|
||||
// Keep the closest neighbour by Euclidean distance.
|
||||
// This prevents it from picking a tab blob in another column.
|
||||
const TBOX& old_box = result->bounding_box();
|
||||
int x_diff = p.right_tab ? old_box.right() : old_box.left();
|
||||
x_diff -= x_at_n_y;
|
||||
int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y;
|
||||
int old_dist = x_diff * x_diff + y_diff * y_diff;
|
||||
x_diff = n_x - x_at_n_y;
|
||||
y_diff = n_y - start_y;
|
||||
int new_dist = x_diff * x_diff + y_diff * y_diff;
|
||||
if (new_dist < old_dist)
|
||||
result = neighbour;
|
||||
}
|
||||
} else if (backup_result == NULL) {
|
||||
|
@ -80,7 +80,7 @@ struct AlignedBlobParams {
|
||||
// The AlignedBlob class contains code to find vertically aligned blobs.
|
||||
// This is factored out into a separate class, so it can be used by both
|
||||
// vertical line finding (LineFind) and tabstop finding (TabFind).
|
||||
class AlignedBlob : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
class AlignedBlob : public BlobGrid {
|
||||
public:
|
||||
AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright);
|
||||
virtual ~AlignedBlob();
|
||||
|
@ -153,6 +153,61 @@ IntGrid* IntGrid::NeighbourhoodSum() const {
|
||||
return sumgrid;
|
||||
}
|
||||
|
||||
// Returns true if more than half the area of the rect is covered by grid
|
||||
// cells that are over the theshold.
|
||||
bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
|
||||
int min_x, min_y, max_x, max_y;
|
||||
GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
|
||||
GridCoords(rect.right(), rect.top(), &max_x, &max_y);
|
||||
int total_area = 0;
|
||||
for (int y = min_y; y <= max_y; ++y) {
|
||||
for (int x = min_x; x <= max_x; ++x) {
|
||||
int value = GridCellValue(x, y);
|
||||
if (value > threshold) {
|
||||
TBOX cell_box(x * gridsize_, y * gridsize_,
|
||||
(x + 1) * gridsize_, (y + 1) * gridsize_);
|
||||
cell_box &= rect; // This is in-place box intersection.
|
||||
total_area += cell_box.area();
|
||||
}
|
||||
}
|
||||
}
|
||||
return total_area * 2 > rect.area();
|
||||
}
|
||||
|
||||
// Returns true if any cell value in the given rectangle is zero.
|
||||
bool IntGrid::AnyZeroInRect(const TBOX& rect) const {
|
||||
int min_x, min_y, max_x, max_y;
|
||||
GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
|
||||
GridCoords(rect.right(), rect.top(), &max_x, &max_y);
|
||||
for (int y = min_y; y <= max_y; ++y) {
|
||||
for (int x = min_x; x <= max_x; ++x) {
|
||||
if (GridCellValue(x, y) == 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns a full-resolution binary pix in which each cell over the given
|
||||
// threshold is filled as a black square. pixDestroy after use.
|
||||
// Edge cells, which have a zero 4-neighbour, are not marked.
|
||||
Pix* IntGrid::ThresholdToPix(int threshold) const {
|
||||
Pix* pix = pixCreate(tright().x() - bleft().x(),
|
||||
tright().y() - bleft().y(), 1);
|
||||
int cellsize = gridsize();
|
||||
for (int y = 0; y < gridheight(); ++y) {
|
||||
for (int x = 0; x < gridwidth(); ++x) {
|
||||
if (GridCellValue(x, y) > threshold &&
|
||||
GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 &&
|
||||
GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) {
|
||||
pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize),
|
||||
cellsize, cellsize, PIX_SET, NULL, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
// Make a Pix of the correct scaled size for the TraceOutline functions.
|
||||
Pix* GridReducedPix(const TBOX& box, int gridsize,
|
||||
ICOORD bleft, int* left, int* bottom) {
|
||||
@ -232,4 +287,3 @@ Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -123,8 +123,7 @@ class IntGrid : public GridBase {
|
||||
IntGrid* NeighbourhoodSum() const;
|
||||
|
||||
int GridCellValue(int grid_x, int grid_y) const {
|
||||
ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth());
|
||||
ASSERT_HOST(grid_y >= 0 && grid_y < gridheight());
|
||||
ClipGridCoords(&grid_x, &grid_y);
|
||||
return grid_[grid_y * gridwidth_ + grid_x];
|
||||
}
|
||||
void SetGridCell(int grid_x, int grid_y, int value) {
|
||||
@ -132,6 +131,16 @@ class IntGrid : public GridBase {
|
||||
ASSERT_HOST(grid_y >= 0 && grid_y < gridheight());
|
||||
grid_[grid_y * gridwidth_ + grid_x] = value;
|
||||
}
|
||||
// Returns true if more than half the area of the rect is covered by grid
|
||||
// cells that are over the theshold.
|
||||
bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const;
|
||||
|
||||
// Returns true if any cell value in the given rectangle is zero.
|
||||
bool AnyZeroInRect(const TBOX& rect) const;
|
||||
|
||||
// Returns a full-resolution binary pix in which each cell over the given
|
||||
// threshold is filled as a black square. pixDestroy after use.
|
||||
Pix* ThresholdToPix(int threshold) const;
|
||||
|
||||
private:
|
||||
int* grid_; // 2-d array of ints.
|
||||
@ -373,6 +382,24 @@ int SortByBoxLeft(const void* void1, const void* void2) {
|
||||
return p1->bounding_box().top() - p2->bounding_box().top();
|
||||
}
|
||||
|
||||
// Sort function to sort a BBC by bounding_box().right() in right-to-left order.
|
||||
template<class BBC>
|
||||
int SortRightToLeft(const void* void1, const void* void2) {
|
||||
// The void*s are actually doubly indirected, so get rid of one level.
|
||||
const BBC* p1 = *reinterpret_cast<const BBC* const *>(void1);
|
||||
const BBC* p2 = *reinterpret_cast<const BBC* const *>(void2);
|
||||
int result = p2->bounding_box().right() - p1->bounding_box().right();
|
||||
if (result != 0)
|
||||
return result;
|
||||
result = p2->bounding_box().left() - p1->bounding_box().left();
|
||||
if (result != 0)
|
||||
return result;
|
||||
result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
|
||||
if (result != 0)
|
||||
return result;
|
||||
return p1->bounding_box().top() - p2->bounding_box().top();
|
||||
}
|
||||
|
||||
// Sort function to sort a BBC by bounding_box().bottom().
|
||||
template<class BBC>
|
||||
int SortByBoxBottom(const void* void1, const void* void2) {
|
||||
@ -859,6 +886,9 @@ void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox() {
|
||||
|
||||
template<class BBC, class BBC_CLIST, class BBC_C_IT>
|
||||
void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RepositionIterator() {
|
||||
// Something was deleted, so we have little choice but to clear the
|
||||
// returns list.
|
||||
returns_.shallow_clear();
|
||||
// Reset the iterator back to one past the previous return.
|
||||
// If the previous_return_ is no longer in the list, then
|
||||
// next_return_ serves as a backup.
|
||||
|
44
textord/blobgrid.cpp
Normal file
44
textord/blobgrid.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: blobgrid.h
|
||||
// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Sat Jun 11 10:30:01 PST 2011
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "blobgrid.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright)
|
||||
: BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) {
|
||||
}
|
||||
|
||||
BlobGrid::~BlobGrid() {
|
||||
}
|
||||
|
||||
// Inserts all the blobs from the given list, with x and y spreading,
|
||||
// without removing from the source list, so ownership remains with the
|
||||
// source list.
|
||||
void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) {
|
||||
BLOBNBOX_IT blob_it(blobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
BLOBNBOX* blob = blob_it.data();
|
||||
if (!blob->joined_to_prev())
|
||||
InsertBBox(true, true, blob);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace tesseract.
|
46
textord/blobgrid.h
Normal file
46
textord/blobgrid.h
Normal file
@ -0,0 +1,46 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: blobgrid.h
|
||||
// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Sat Jun 11 10:26:01 PST 2011
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#ifndef TESSERACT_TEXTORD_BLOBGRID_H_
|
||||
#define TESSERACT_TEXTORD_BLOBGRID_H_
|
||||
|
||||
#include "bbgrid.h"
|
||||
#include "blobbox.h"
|
||||
|
||||
CLISTIZEH(BLOBNBOX)
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGridSearch;
|
||||
|
||||
class BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
public:
|
||||
BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
|
||||
virtual ~BlobGrid();
|
||||
|
||||
// Inserts all the blobs from the given list, with x and y spreading,
|
||||
// without removing from the source list, so ownership remains with the
|
||||
// source list.
|
||||
void InsertBlobList(BLOBNBOX_LIST* blobs);
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_TEXTORD_BLOBGRID_H_
|
310
textord/ccnontextdetect.cpp
Normal file
310
textord/ccnontextdetect.cpp
Normal file
@ -0,0 +1,310 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccnontextdetect.cpp
|
||||
// Description: Connected-Component-based photo (non-text) detection.
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Sat Jun 11 10:12:01 PST 2011
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "ccnontextdetect.h"
|
||||
#include "imagefind.h"
|
||||
#include "strokewidth.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Max number of neighbour small objects per squared gridsize before a grid
|
||||
// cell becomes image.
|
||||
const double kMaxSmallNeighboursPerPix = 1.0 / 32;
|
||||
// Max number of small blobs a large blob may overlap before it is rejected
|
||||
// and determined to be image.
|
||||
const int kMaxLargeOverlapsWithSmall = 3;
|
||||
// Max number of small blobs a medium blob may overlap before it is rejected
|
||||
// and determined to be image. Larger than for large blobs as medium blobs
|
||||
// may be complex Chinese characters. Very large Chinese characters are going
|
||||
// to overlap more medium blobs than small.
|
||||
const int kMaxMediumOverlapsWithSmall = 12;
|
||||
// Max number of normal blobs a large blob may overlap before it is rejected
|
||||
// and determined to be image. This is set higher to allow for drop caps, which
|
||||
// may overlap a lot of good text blobs.
|
||||
const int kMaxLargeOverlapsWithMedium = 12;
|
||||
// Multiplier of original noise_count used to test for the case of spreading
|
||||
// noise beyond where it should really be.
|
||||
const int kOriginalNoiseMultiple = 8;
|
||||
// Pixel padding for noise blobs when rendering on the image
|
||||
// mask to encourage them to join together. Make it too big and images
|
||||
// will fatten out too much and have to be clipped to text.
|
||||
const int kNoisePadding = 4;
|
||||
// Fraction of max_noise_count_ to be added to the noise count if there is
|
||||
// photo mask in the background.
|
||||
const double kPhotoOffsetFraction = 0.375;
|
||||
// Min ratio of perimeter^2/16area for a "good" blob in estimating noise
|
||||
// density. Good blobs are supposed to be highly likely real text.
|
||||
// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
|
||||
// of 16. Digital circles are weird and have a minimum ratio of pi/64, not
|
||||
// the 1/(4pi) that you would expect.
|
||||
const double kMinGoodTextPARatio = 1.5;
|
||||
|
||||
CCNonTextDetect::CCNonTextDetect(int gridsize,
|
||||
const ICOORD& bleft, const ICOORD& tright)
|
||||
: BlobGrid(gridsize, bleft, tright),
|
||||
max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
|
||||
gridsize * gridsize)),
|
||||
noise_density_(NULL) {
|
||||
// TODO(rays) break max_noise_count_ out into an area-proportional
|
||||
// value, as now plus an additive constant for the number of text blobs
|
||||
// in the 3x3 neigbourhood - maybe 9.
|
||||
}
|
||||
|
||||
CCNonTextDetect::~CCNonTextDetect() {
|
||||
delete noise_density_;
|
||||
}
|
||||
|
||||
// Creates and returns a Pix with the same resolution as the original
|
||||
// in which 1 (black) pixels represent likely non text (photo, line drawing)
|
||||
// areas of the page, deleting from the blob_block the blobs that were
|
||||
// determined to be non-text.
|
||||
// The photo_map is used to bias the decision towards non-text, rather than
|
||||
// supplying definite decision.
|
||||
// The blob_block is the usual result of connected component analysis,
|
||||
// holding the detected blobs.
|
||||
// The returned Pix should be PixDestroyed after use.
|
||||
Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
|
||||
TO_BLOCK* blob_block) {
|
||||
// Insert the smallest blobs into the grid.
|
||||
InsertBlobList(&blob_block->small_blobs);
|
||||
InsertBlobList(&blob_block->noise_blobs);
|
||||
// Add the medium blobs that don't have a good strokewidth neighbour.
|
||||
// Those that do go into good_grid as an antidote to spreading beyond the
|
||||
// real reaches of a noise region.
|
||||
BlobGrid good_grid(gridsize(), bleft(), tright());
|
||||
BLOBNBOX_IT blob_it(&blob_block->blobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
BLOBNBOX* blob = blob_it.data();
|
||||
double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
|
||||
perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
|
||||
if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
|
||||
InsertBBox(true, true, blob);
|
||||
else
|
||||
good_grid.InsertBBox(true, true, blob);
|
||||
}
|
||||
noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
|
||||
good_grid.Clear(); // Not needed any more.
|
||||
Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
|
||||
if (debug) {
|
||||
pixWrite("junknoisemask.png", pix, IFF_PNG);
|
||||
}
|
||||
ScrollView* win = NULL;
|
||||
if (debug) {
|
||||
win = MakeWindow(0, 400, "Photo Mask Blobs");
|
||||
}
|
||||
// Large and medium blobs are not text if they overlap with "a lot" of small
|
||||
// blobs.
|
||||
MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
|
||||
kMaxLargeOverlapsWithSmall,
|
||||
win, ScrollView::DARK_GREEN, pix);
|
||||
MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
|
||||
win, ScrollView::WHITE, pix);
|
||||
// Clear the grid of small blobs and insert the medium blobs.
|
||||
Clear();
|
||||
InsertBlobList(&blob_block->blobs);
|
||||
MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
|
||||
kMaxLargeOverlapsWithMedium,
|
||||
win, ScrollView::DARK_GREEN, pix);
|
||||
// Clear again before we start deleting the blobs in the grid.
|
||||
Clear();
|
||||
MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
|
||||
win, ScrollView::CORAL, pix);
|
||||
MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
|
||||
win, ScrollView::GOLDENROD, pix);
|
||||
MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
|
||||
win, ScrollView::WHITE, pix);
|
||||
if (debug) {
|
||||
win->Update();
|
||||
pixWrite("junkccphotomask.png", pix, IFF_PNG);
|
||||
delete win->AwaitEvent(SVET_DESTROY);
|
||||
delete win;
|
||||
}
|
||||
return pix;
|
||||
}
|
||||
|
||||
// Computes and returns the noise_density IntGrid, at the same gridsize as
|
||||
// this by summing the number of small elements in a 3x3 neighbourhood of
|
||||
// each grid cell. good_grid is filled with blobs that are considered most
|
||||
// likely good text, and this is filled with small and medium blobs that are
|
||||
// more likely non-text.
|
||||
// The photo_map is used to bias the decision towards non-text, rather than
|
||||
// supplying definite decision.
|
||||
IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
|
||||
BlobGrid* good_grid) {
|
||||
IntGrid* noise_counts = CountCellElements();
|
||||
IntGrid* noise_density = noise_counts->NeighbourhoodSum();
|
||||
IntGrid* good_counts = good_grid->CountCellElements();
|
||||
// Now increase noise density in photo areas, to bias the decision and
|
||||
// minimize hallucinated text on image, but trim the noise_density where
|
||||
// there are good blobs and the original count is low in non-photo areas,
|
||||
// indicating that most of the result came from neighbouring cells.
|
||||
int height = pixGetHeight(photo_map);
|
||||
int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
|
||||
for (int y = 0; y < gridheight(); ++y) {
|
||||
for (int x = 0; x < gridwidth(); ++x) {
|
||||
int noise = noise_density->GridCellValue(x, y);
|
||||
if (max_noise_count_ < noise + photo_offset &&
|
||||
noise <= max_noise_count_) {
|
||||
// Test for photo.
|
||||
int left = x * gridsize();
|
||||
int right = left + gridsize();
|
||||
int bottom = height - y * gridsize();
|
||||
int top = bottom - gridsize();
|
||||
if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
|
||||
&bottom)) {
|
||||
noise_density->SetGridCell(x, y, noise + photo_offset);
|
||||
}
|
||||
}
|
||||
if (debug && noise > max_noise_count_ &&
|
||||
good_counts->GridCellValue(x, y) > 0) {
|
||||
tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
|
||||
x * gridsize(), y * gridsize(),
|
||||
noise_density->GridCellValue(x, y),
|
||||
good_counts->GridCellValue(x, y),
|
||||
noise_counts->GridCellValue(x, y), max_noise_count_);
|
||||
}
|
||||
if (noise > max_noise_count_ &&
|
||||
good_counts->GridCellValue(x, y) > 0 &&
|
||||
noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
|
||||
max_noise_count_) {
|
||||
noise_density->SetGridCell(x, y, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
delete noise_counts;
|
||||
delete good_counts;
|
||||
return noise_density;
|
||||
}
|
||||
|
||||
// Helper to expand a box in one of the 4 directions by the given pad,
|
||||
// provided it does not expand into any cell with a zero noise density.
|
||||
// If that is not possible, try expanding all round by a small constant.
|
||||
static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
|
||||
int pad) {
|
||||
TBOX expanded_box(box);
|
||||
expanded_box.set_right(box.right() + pad);
|
||||
if (!noise_density.AnyZeroInRect(expanded_box))
|
||||
return expanded_box;
|
||||
expanded_box = box;
|
||||
expanded_box.set_left(box.left() - pad);
|
||||
if (!noise_density.AnyZeroInRect(expanded_box))
|
||||
return expanded_box;
|
||||
expanded_box = box;
|
||||
expanded_box.set_top(box.top() + pad);
|
||||
if (!noise_density.AnyZeroInRect(expanded_box))
|
||||
return expanded_box;
|
||||
expanded_box = box;
|
||||
expanded_box.set_bottom(box.bottom() + pad);
|
||||
if (!noise_density.AnyZeroInRect(expanded_box))
|
||||
return expanded_box;
|
||||
expanded_box = box;
|
||||
expanded_box.pad(kNoisePadding, kNoisePadding);
|
||||
if (!noise_density.AnyZeroInRect(expanded_box))
|
||||
return expanded_box;
|
||||
return box;
|
||||
}
|
||||
|
||||
// Tests each blob in the list to see if it is certain non-text using 2
|
||||
// conditions:
|
||||
// 1. blob overlaps a cell with high value in noise_density_ (previously set
|
||||
// by ComputeNoiseDensity).
|
||||
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
|
||||
// condition is disabled with max_blob_overlaps == -1.
|
||||
// If it does, the blob is declared non-text, and is used to mark up the
|
||||
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
|
||||
// neighbours reset, as they may now point to deleted data.
|
||||
// WARNING: The blobs list blobs may be in the *this grid, but they are
|
||||
// not removed. If any deleted blobs might be in *this, then this must be
|
||||
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
|
||||
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
|
||||
// blobs are drawn on it in ok_color.
|
||||
void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
|
||||
int max_blob_overlaps,
|
||||
ScrollView* win,
|
||||
ScrollView::Color ok_color,
|
||||
Pix* nontext_mask) {
|
||||
int imageheight = tright().y() - bleft().x();
|
||||
BLOBNBOX_IT blob_it(blobs);
|
||||
BLOBNBOX_LIST dead_blobs;
|
||||
BLOBNBOX_IT dead_it(&dead_blobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
BLOBNBOX* blob = blob_it.data();
|
||||
TBOX box = blob->bounding_box();
|
||||
if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
|
||||
(max_blob_overlaps < 0 ||
|
||||
!BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
|
||||
blob->ClearNeighbours();
|
||||
if (win != NULL)
|
||||
blob->plot(win, ok_color, ok_color);
|
||||
} else {
|
||||
if (noise_density_->AnyZeroInRect(box)) {
|
||||
// There is a danger that the bounding box may overlap real text, so
|
||||
// we need to render the outline.
|
||||
Pix* blob_pix = blob->cblob()->render_outline();
|
||||
pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
|
||||
box.width(), box.height(), PIX_SRC | PIX_DST,
|
||||
blob_pix, 0, 0);
|
||||
pixDestroy(&blob_pix);
|
||||
} else {
|
||||
if (box.area() < gridsize() * gridsize()) {
|
||||
// It is a really bad idea to make lots of small components in the
|
||||
// photo mask, so try to join it to a bigger area by expanding the
|
||||
// box in a way that does not touch any zero noise density cell.
|
||||
box = AttemptBoxExpansion(box, *noise_density_, gridsize());
|
||||
}
|
||||
// All overlapped cells are non-zero, so just mark the rectangle.
|
||||
pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
|
||||
box.width(), box.height(), PIX_SET, NULL, 0, 0);
|
||||
}
|
||||
if (win != NULL)
|
||||
blob->plot(win, ScrollView::RED, ScrollView::RED);
|
||||
// It is safe to delete the cblob now, as it isn't used by the grid
|
||||
// or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
|
||||
// dead_blobs list.
|
||||
// TODO(rays) delete the delete when the BLOBNBOX destructor deletes
|
||||
// the cblob.
|
||||
delete blob->cblob();
|
||||
dead_it.add_to_end(blob_it.extract());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the given blob overlaps more than max_overlaps blobs
|
||||
// in the current grid.
|
||||
bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
|
||||
// Search the grid to see what intersects it.
|
||||
// Setup a Rectangle search for overlapping this blob.
|
||||
BlobGridSearch rsearch(this);
|
||||
TBOX box = blob->bounding_box();
|
||||
rsearch.StartRectSearch(box);
|
||||
rsearch.SetUniqueMode(true);
|
||||
BLOBNBOX* neighbour;
|
||||
int overlap_count = 0;
|
||||
while (overlap_count <= max_overlaps &&
|
||||
(neighbour = rsearch.NextRectSearch()) != NULL) {
|
||||
if (box.major_overlap(neighbour->bounding_box())) {
|
||||
++overlap_count;
|
||||
if (overlap_count > max_overlaps)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
87
textord/ccnontextdetect.h
Normal file
87
textord/ccnontextdetect.h
Normal file
@ -0,0 +1,87 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: ccnontextdetect.h
|
||||
// Description: Connected-Component-based non-text detection.
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Sat Jun 11 09:52:01 PST 2011
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_
|
||||
#define TESSERACT_TEXTORD_CCPHOTODETECT_H_
|
||||
|
||||
#include "blobgrid.h"
|
||||
#include "scrollview.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The CCNonTextDetect class contains grid-based operations on blobs to create
|
||||
// a full-resolution image mask analogous yet complementary to
|
||||
// pixGenHalftoneMask as it is better at line-drawings, graphs and charts.
|
||||
class CCNonTextDetect : public BlobGrid {
|
||||
public:
|
||||
CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright);
|
||||
virtual ~CCNonTextDetect();
|
||||
|
||||
// Creates and returns a Pix with the same resolution as the original
|
||||
// in which 1 (black) pixels represent likely non text (photo, line drawing)
|
||||
// areas of the page, deleting from the blob_block the blobs that were
|
||||
// determined to be non-text.
|
||||
// The photo_map (binary image mask) is used to bias the decision towards
|
||||
// non-text, rather than supplying a definite decision.
|
||||
// The blob_block is the usual result of connected component analysis,
|
||||
// holding the detected blobs.
|
||||
// The returned Pix should be PixDestroyed after use.
|
||||
Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block);
|
||||
|
||||
private:
|
||||
// Computes and returns the noise_density IntGrid, at the same gridsize as
|
||||
// this by summing the number of small elements in a 3x3 neighbourhood of
|
||||
// each grid cell. good_grid is filled with blobs that are considered most
|
||||
// likely good text, and this is filled with small and medium blobs that are
|
||||
// more likely non-text.
|
||||
// The photo_map is used to bias the decision towards non-text, rather than
|
||||
// supplying definite decision.
|
||||
IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid);
|
||||
|
||||
// Tests each blob in the list to see if it is certain non-text using 2
|
||||
// conditions:
|
||||
// 1. blob overlaps a cell with high value in noise_density_ (previously set
|
||||
// by ComputeNoiseDensity).
|
||||
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
|
||||
// condition is disabled with max_blob_overlaps == -1.
|
||||
// If it does, the blob is declared non-text, and is used to mark up the
|
||||
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
|
||||
// neighbours reset, as they may now point to deleted data.
|
||||
// WARNING: The blobs list blobs may be in the *this grid, but they are
|
||||
// not removed. If any deleted blobs might be in *this, then this must be
|
||||
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
|
||||
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
|
||||
void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
|
||||
int max_blob_overlaps,
|
||||
ScrollView* win, ScrollView::Color ok_color,
|
||||
Pix* nontext_mask);
|
||||
// Returns true if the given blob overlaps more than max_overlaps blobs
|
||||
// in the current grid.
|
||||
bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps);
|
||||
|
||||
// Max entry in noise_density_ before the cell is declared noisy.
|
||||
int max_noise_count_;
|
||||
// Completed noise density map, which we keep around to use for secondary
|
||||
// noise detection.
|
||||
IntGrid* noise_density_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_
|
1094
textord/cjkpitch.cpp
Normal file
1094
textord/cjkpitch.cpp
Normal file
File diff suppressed because it is too large
Load Diff
72
textord/cjkpitch.h
Normal file
72
textord/cjkpitch.h
Normal file
@ -0,0 +1,72 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: cjkpitch.h
|
||||
// Description: Code to determine fixed pitchness and the pitch if fixed,
|
||||
// for CJK text.
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: takenaka@google.com (Hiroshi Takenaka)
|
||||
// Created: Mon Jun 27 12:48:35 JST 2011
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
#ifndef CJKPITCH_H_
|
||||
#define CJKPITCH_H_
|
||||
|
||||
#include "blobbox.h"
|
||||
#include "notdll.h"
|
||||
|
||||
// Function to test "fixed-pitchness" of the input text and estimating
|
||||
// character pitch parameters for it, based on CJK fixed-pitch layout
|
||||
// model.
|
||||
//
|
||||
// This function assumes that a fixed-pitch CJK text has following
|
||||
// characteristics:
|
||||
//
|
||||
// - Most glyphs are designed to fit within the same sized square
|
||||
// (imaginary body). Also they are aligned to the center of their
|
||||
// imaginary bodies.
|
||||
// - The imaginary body is always a regular rectangle.
|
||||
// - There may be some extra space between character bodies
|
||||
// (tracking).
|
||||
// - There may be some extra space after punctuations.
|
||||
// - The text is *not* space-delimited. Thus spaces are rare.
|
||||
// - Character may consists of multiple unconnected blobs.
|
||||
//
|
||||
// And the function works in two passes. On pass 1, it looks for such
|
||||
// "good" blobs that has the pitch same pitch on the both side and
|
||||
// looks like a complete CJK character. Then estimates the character
|
||||
// pitch for every row, based on those good blobs. If we couldn't find
|
||||
// enough good blobs for a row, then the pitch is estimated from other
|
||||
// rows with similar character height instead.
|
||||
//
|
||||
// Pass 2 is an iterative process to fit the blobs into fixed-pitch
|
||||
// character cells. Once we have estimated the character pitch, blobs
|
||||
// that are almost as large as the pitch can be considered to be
|
||||
// complete characters. And once we know that some characters are
|
||||
// complete characters, we can estimate the region occupied by its
|
||||
// neighbors. And so on.
|
||||
//
|
||||
// We repeat the process until all ambiguities are resolved. Then make
|
||||
// the final decision about fixed-pitchness of each row and compute
|
||||
// pitch and spacing parameters.
|
||||
//
|
||||
// (If a row is considered to be propotional, pitch_decision for the
|
||||
// row is set to PITCH_CORR_PROP and the later phase
|
||||
// (i.e. Textord::to_spacing()) should determine its spacing
|
||||
// parameters)
|
||||
//
|
||||
// This function doesn't provide all information required by
|
||||
// fixed_pitch_words() and the rows need to be processed with
|
||||
// make_prop_words() even if they are fixed pitched.
|
||||
void compute_fixed_pitch_cjk(ICOORD page_tr, // top right
|
||||
TO_BLOCK_LIST *port_blocks); // input list
|
||||
|
||||
#endif // CJKPITCH_H_
|
1324
textord/colfind.cpp
1324
textord/colfind.cpp
File diff suppressed because it is too large
Load Diff
@ -25,25 +25,28 @@
|
||||
#include "colpartitiongrid.h"
|
||||
#include "colpartitionset.h"
|
||||
#include "ocrblock.h"
|
||||
#include "textlineprojection.h"
|
||||
|
||||
class ScrollView;
|
||||
class TO_BLOCK;
|
||||
class STATS;
|
||||
class BLOCK_LIST;
|
||||
struct Boxa;
|
||||
struct Pixa;
|
||||
class DENORM;
|
||||
class ScrollView;
|
||||
class STATS;
|
||||
class TO_BLOCK;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
extern BOOL_VAR_H(textord_tabfind_find_tables, false, "run table detection");
|
||||
|
||||
class StrokeWidth;
|
||||
class LineSpacing;
|
||||
class TempColumn_LIST;
|
||||
class ColSegment_LIST;
|
||||
class ColumnGroup_LIST;
|
||||
class ColPartitionSet;
|
||||
class ColPartitionSet_LIST;
|
||||
class ColSegment_LIST;
|
||||
class ColumnGroup_LIST;
|
||||
class LineSpacing;
|
||||
class StrokeWidth;
|
||||
class TempColumn_LIST;
|
||||
class EquationDetectBase;
|
||||
|
||||
// The ColumnFinder class finds columns in the grid.
|
||||
class ColumnFinder : public TabFind {
|
||||
@ -59,25 +62,54 @@ class ColumnFinder : public TabFind {
|
||||
int vertical_x, int vertical_y);
|
||||
virtual ~ColumnFinder();
|
||||
|
||||
// Accessors for testing
|
||||
const DENORM* denorm() const {
|
||||
return denorm_;
|
||||
}
|
||||
const TextlineProjection* projection() const {
|
||||
return &projection_;
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
// The main function of ColumnFinder is broken into pieces to facilitate
|
||||
// optional insertion of orientation and script detection in an efficient
|
||||
// way. The calling sequence IS MANDATORY however, whether or not
|
||||
// OSD is being used:
|
||||
// 1. Construction.
|
||||
// 2. IsVerticallyAlignedText.
|
||||
// 3. CorrectOrientation.
|
||||
// 4. FindBlocks.
|
||||
// 5. Destruction. Use of a single column finder for multiple images does not
|
||||
// 2. SetupAndFilterNoise.
|
||||
// 3. IsVerticallyAlignedText.
|
||||
// 4. CorrectOrientation.
|
||||
// 5. FindBlocks.
|
||||
// 6. Destruction. Use of a single column finder for multiple images does not
|
||||
// make sense.
|
||||
// Throughout these steps, the ColPartitions are owned by part_grid_, which
|
||||
// means that that it must be kept correct. Exception: big_parts_ owns its
|
||||
// own ColPartitions.
|
||||
// The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
|
||||
// for a phase in FindBlocks before TransformToBlocks, when they become
|
||||
// owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
|
||||
// indicates more of a betrothal for the majority of layout analysis, ie
|
||||
// which ColPartition will take ownership when the blobs are release from
|
||||
// the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
|
||||
// are part of the image regions, as they are not on any TO_BLOCK list.
|
||||
// TODO(rays) break up column finder further into smaller classes, as
|
||||
// there is a lot more to it than column finding now.
|
||||
// ======================================================================
|
||||
|
||||
// Tests for vertical alignment of text (returning true if so), and
|
||||
// generates a list of blobs for orientation and script detection. Note that
|
||||
// the vertical alignment may be due to text whose writing direction is
|
||||
// vertical, like say Japanese, or due to text whose writing direction is
|
||||
// Performs initial processing on the blobs in the input_block:
|
||||
// Setup the part_grid, stroke_width_, nontext_map_.
|
||||
// Obvious noise blobs are filtered out and used to mark the nontext_map_.
|
||||
// Initial stroke-width analysis is used to get local text alignment
|
||||
// direction, so the textline projection_ map can be setup.
|
||||
// On return, IsVerticallyAlignedText may be called (now optionally) to
|
||||
// determine the gross textline alignment of the page.
|
||||
void SetupAndFilterNoise(Pix* photo_mask_pix, TO_BLOCK* input_block);
|
||||
|
||||
// Tests for vertical alignment of text (returning true if so), and generates
|
||||
// a list of blobs (in osd_blobs) for orientation and script detection.
|
||||
// block is the single block for the whole page or rectangle to be OCRed.
|
||||
// Note that the vertical alignment may be due to text whose writing direction
|
||||
// is vertical, like say Japanese, or due to text whose writing direction is
|
||||
// horizontal but whose text appears vertically aligned because the image is
|
||||
// not the right way up.
|
||||
bool IsVerticallyAlignedText(TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
|
||||
@ -96,25 +128,32 @@ class ColumnFinder : public TabFind {
|
||||
void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
|
||||
int recognition_rotation);
|
||||
|
||||
// Finds the text and image blocks, returning them in the blocks and to_blocks
|
||||
// lists. (Each TO_BLOCK points to the basic BLOCK and adds more information.)
|
||||
// If boxa and pixa are not NULL, they are assumed to be the output of
|
||||
// ImageFinder::FindImages, and are used to generate image blocks.
|
||||
// The input boxa and pixa are destroyed.
|
||||
// Imageheight should be the pixel height of the original image.
|
||||
// The input block is the result of a call to find_components, and contains
|
||||
// the blobs found in the image. These blobs will be removed and placed
|
||||
// in the output blocks, while unused ones will be deleted.
|
||||
// Finds blocks of text, image, rule line, table etc, returning them in the
|
||||
// blocks and to_blocks
|
||||
// (Each TO_BLOCK points to the basic BLOCK and adds more information.)
|
||||
// Image blocks are generated by a combination of photo_mask_pix (which may
|
||||
// NOT be NULL) and the rejected text found during preliminary textline
|
||||
// finding.
|
||||
// The input_block is the result of a call to find_components, and contains
|
||||
// the blobs found in the image or rectangle to be OCRed. These blobs will be
|
||||
// removed and placed in the output blocks, while unused ones will be deleted.
|
||||
// If single_column is true, the input is treated as single column, but
|
||||
// it is still divided into blocks of equal line spacing/text size.
|
||||
// Returns -1 if the user requested retry with more debug info.
|
||||
int FindBlocks(bool single_column, int imageheight,
|
||||
TO_BLOCK* block, Boxa* boxa, Pixa* pixa,
|
||||
// scaled_color is scaled down by scaled_factor from the input color image,
|
||||
// and may be NULL if the input was not color.
|
||||
// Returns -1 if the user hits the 'd' key in the blocks window while running
|
||||
// in debug mode, which requests a retry with more debug info.
|
||||
int FindBlocks(bool single_column,
|
||||
Pix* scaled_color, int scaled_factor,
|
||||
TO_BLOCK* block, Pix* photo_mask_pix,
|
||||
BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
|
||||
|
||||
// Get the rotation required to deskew, and its inverse rotation.
|
||||
void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
|
||||
|
||||
// Set the equation detection pointer.
|
||||
void SetEquationDetect(EquationDetectBase* detect);
|
||||
|
||||
private:
|
||||
// Displays the blob and block bounding boxes in a window called Blocks.
|
||||
void DisplayBlocks(BLOCK_LIST* blocks);
|
||||
@ -122,75 +161,11 @@ class ColumnFinder : public TabFind {
|
||||
// best_columns_.
|
||||
void DisplayColumnBounds(PartSetVector* sets);
|
||||
|
||||
// Converts the arrays of Box/Pix to a list of C_OUTLINE, and then to blobs.
|
||||
// The output is a list of C_BLOBs for the images, but the C_OUTLINEs
|
||||
// contain no data.
|
||||
void ExtractImageBlobs(int image_height, Boxa* boxa, Pixa* pixa);
|
||||
|
||||
////// Functions involved in making the initial ColPartitions. /////
|
||||
|
||||
// Creates the initial ColPartitions, and puts them in a ColPartitionSet
|
||||
// for each grid y coordinate, storing the ColPartitionSets in part_sets_.
|
||||
// After creating the ColPartitonSets, attempts to merge them where they
|
||||
// overlap and unique the BLOBNBOXes within.
|
||||
// The return value is the number of ColPartitionSets made.
|
||||
int MakeColumnPartitions();
|
||||
// Partition the BLOBNBOXES horizontally at the given grid y, creating a
|
||||
// ColPartitionSet which is returned. NULL is returned if there are no
|
||||
// BLOBNBOXES at the given grid y.
|
||||
ColPartitionSet* PartitionsAtGridY(int grid_y);
|
||||
// Insert the blobs in the given list into the main grid and for
|
||||
// each one also make it a separate unknown partition.
|
||||
// If filter is true, use only the blobs that are above a threshold in
|
||||
// size or a non-isolated.
|
||||
void InsertSmallBlobsAsUnknowns(bool filter, BLOBNBOX_LIST* blobs);
|
||||
// Helper function for PartitionsAtGridY, with a long argument list.
|
||||
// This bbox is of unknown type, so it is added to an unk_partition.
|
||||
// If the edge is past the unk_right_margin then unk_partition has to be
|
||||
// completed and a new one made. See CompletePartition and StartPartition
|
||||
// for the other args.
|
||||
void ProcessUnknownBlob(int page_edge, BLOBNBOX* bbox,
|
||||
ColPartition** unk_partition,
|
||||
ColPartition_IT* unk_part_it,
|
||||
TabVector** unk_right_line,
|
||||
int* unk_right_margin,
|
||||
int* unk_prev_margin,
|
||||
bool* unk_edge_is_left);
|
||||
// Creates and returns a new ColPartition of the given start_type
|
||||
// and adds the given bbox to it.
|
||||
// Also finds the left and right tabvectors that bound the textline, setting
|
||||
// the members of the returned ColPartition appropriately:
|
||||
// If the left tabvector is less constraining than the input left_margin
|
||||
// (assumed to be the right edge of the previous partition), then the
|
||||
// tabvector is ignored and the left_margin used instead.
|
||||
// If the right tabvector is more constraining than the input *right_margin,
|
||||
// (probably the right edge of the page), then the *right_margin is adjusted
|
||||
// to use the tabvector.
|
||||
// *edge_is_left is set to true if the right tabvector is good and used as the
|
||||
// margin, so we can include blobs that overhang the tabvector in this
|
||||
// partition.
|
||||
ColPartition* StartPartition(BlobRegionType start_type, int left_margin,
|
||||
BLOBNBOX* bbox, TabVector** right_line,
|
||||
int* right_margin, bool* edge_is_left);
|
||||
// Completes the given partition, and adds it to the given iterator.
|
||||
// The right_margin on input is the left edge of the next blob if there is
|
||||
// one. The right tab vector plus a margin is used as the right margin if
|
||||
// it is more constraining than the next blob, but if there are no more
|
||||
// blobs, we want the right margin to make it to the page edge.
|
||||
// The return value is the next left margin, being the right edge of the
|
||||
// bounding box of blobs.
|
||||
int CompletePartition(bool no_more_blobs, int page_edge,
|
||||
TabVector* right_line, int* right_margin,
|
||||
ColPartition** partition, ColPartition_IT* part_it);
|
||||
|
||||
|
||||
////// Functions involved in determining the columns used on the page. /////
|
||||
|
||||
// Makes an ordered list of candidates to partition the width of the page
|
||||
// into columns using the part_sets_.
|
||||
// See AddToColumnSetsIfUnique for the ordering.
|
||||
// If single_column, then it just makes a single page-wide fake column.
|
||||
void MakeColumnCandidates(bool single_column);
|
||||
// Sets up column_sets_ (the determined column layout at each horizontal
|
||||
// slice). Returns false if the page is empty.
|
||||
bool MakeColumns(bool single_column);
|
||||
// Attempt to improve the column_candidates by expanding the columns
|
||||
// and adding new partitions from the partition sets in src_sets.
|
||||
// Src_sets may be equal to column_candidates, in which case it will
|
||||
@ -201,10 +176,10 @@ class ColumnFinder : public TabFind {
|
||||
void PrintColumnCandidates(const char* title);
|
||||
// Finds the optimal set of columns that cover the entire image with as
|
||||
// few changes in column partition as possible.
|
||||
void AssignColumns();
|
||||
void AssignColumns(const PartSetVector& part_sets);
|
||||
// Finds the biggest range in part_sets_ that has no assigned column, but
|
||||
// column assignment is possible.
|
||||
bool BiggestUnassignedRange(const bool* any_columns_possible,
|
||||
bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible,
|
||||
int* start, int* end);
|
||||
// Finds the modal compatible column_set_ index within the given range.
|
||||
int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
|
||||
@ -236,19 +211,21 @@ class ColumnFinder : public TabFind {
|
||||
//////// Functions that manipulate ColPartitions in the part_grid_ /////
|
||||
//////// to split, merge, find margins, and find types. //////////////
|
||||
|
||||
// Removes the ColPartitions from part_sets_, the ColPartitionSets that
|
||||
// contain them, and puts them in the part_grid_ after ensuring that no
|
||||
// BLOBNBOX is owned by more than one of them.
|
||||
void MovePartitionsToGrid();
|
||||
// Hoovers up all un-owned blobs and deletes them.
|
||||
// The rest get released from the block so the ColPartitions can pass
|
||||
// ownership to the output blocks.
|
||||
void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block);
|
||||
// Splits partitions that cross columns where they have nothing in the gap.
|
||||
void GridSplitPartitions();
|
||||
// Merges partitions where there is vertical overlap, within a single column,
|
||||
// and the horizontal gap is small enough.
|
||||
void GridMergePartitions();
|
||||
// Resolves unknown partitions from the unknown_parts_ list by merging them
|
||||
// with a close neighbour, inserting them into the grid with a known type,
|
||||
// or declaring them to be noise.
|
||||
void GridInsertUnknowns();
|
||||
// Inserts remaining noise blobs into the most applicable partition if any.
|
||||
// If there is no applicable partition, then the blobs are deleted.
|
||||
void InsertRemainingNoise(TO_BLOCK* block);
|
||||
// Remove partitions that come from horizontal lines that look like
|
||||
// underlines, but are not part of a table.
|
||||
void GridRemoveUnderlinePartitions();
|
||||
// Add horizontal line separators as partitions.
|
||||
void GridInsertHLinePartitions();
|
||||
// Add vertical line separators as partitions.
|
||||
@ -272,22 +249,34 @@ class ColumnFinder : public TabFind {
|
||||
// Transform the grid of partitions to the output blocks.
|
||||
void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
|
||||
|
||||
// Reflect the blob boxes (but not the outlines) in the y-axis so that
|
||||
// the blocks get created in the correct RTL order. Rotates the blobs
|
||||
// in the input_block and the bblobs list.
|
||||
// The reflection is undone in RotateAndReskewBlocks by
|
||||
// reflecting the blocks themselves, and then recomputing the blob bounding
|
||||
// boxes.
|
||||
void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs);
|
||||
|
||||
// Undo the deskew that was done in FindTabVectors, as recognition is done
|
||||
// without correcting blobs or blob outlines for skew.
|
||||
// Reskew the completed blocks to put them back to the original rotated coords
|
||||
// that were created by CorrectOrientation.
|
||||
// If the input_is_rtl, then reflect the blocks in the y-axis to undo the
|
||||
// reflection that was done before FindTabVectors.
|
||||
// Blocks that were identified as vertical text (relative to the rotated
|
||||
// coordinates) are further rotated so the text lines are horizontal.
|
||||
// blob polygonal outlines are rotated to match the position of the blocks
|
||||
// that they are in, and their bounding boxes are recalculated to be accurate.
|
||||
// Record appropriate inverse transformations and required
|
||||
// classifier transformation in the blocks.
|
||||
void RotateAndReskewBlocks(TO_BLOCK_LIST* to_blocks);
|
||||
void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks);
|
||||
|
||||
|
||||
// Move all the small and noise blobs into the main blobs list of
|
||||
// the block from the to_blocks list that contains them.
|
||||
void MoveSmallBlobs(BLOBNBOX_LIST* bblobs, TO_BLOCK_LIST* to_blocks);
|
||||
// Computes the rotations for the block (to make textlines horizontal) and
|
||||
// for the blobs (for classification) and sets the appropriate members
|
||||
// of the given block.
|
||||
// Returns the rotation that needs to be applied to the blobs to make
|
||||
// them sit in the rotated block.
|
||||
FCOORD ComputeBlockAndClassifyRotation(BLOCK* block);
|
||||
|
||||
// The minimum gutter width to apply for finding columns.
|
||||
// Modified when vertical text is detected to prevent detection of
|
||||
@ -305,9 +294,6 @@ class ColumnFinder : public TabFind {
|
||||
FCOORD rerotate_;
|
||||
// The additional rotation vector needed to rotate text for recognition.
|
||||
FCOORD text_rotation_;
|
||||
// The part_sets_ are the initial text-line-like partition of the grid,
|
||||
// and is a vector of ColPartitionSets.
|
||||
PartSetVector part_sets_;
|
||||
// The column_sets_ contain the ordered candidate ColPartitionSets that
|
||||
// define the possible divisions of the page into columns.
|
||||
PartSetVector column_sets_;
|
||||
@ -322,14 +308,31 @@ class ColumnFinder : public TabFind {
|
||||
// turned into regions, but are kept around because they are referenced
|
||||
// by the part_grid_.
|
||||
ColPartition_LIST good_parts_;
|
||||
// List of ColPartitions of unknown type.
|
||||
ColPartition_LIST unknown_parts_;
|
||||
// List of ColPartitions that are big and might be dropcap or vertically
|
||||
// joined.
|
||||
ColPartition_LIST big_parts_;
|
||||
// List of ColPartitions that have been declared noise.
|
||||
ColPartition_LIST noise_parts_;
|
||||
// The fake blobs that are made from the input boxa/pixa pair.
|
||||
// The fake blobs that are made from the images.
|
||||
BLOBNBOX_LIST image_bblobs_;
|
||||
// Horizontal line separators.
|
||||
TabVector_LIST horizontal_lines_;
|
||||
// Image map of photo/noise areas on the page.
|
||||
Pix* nontext_map_;
|
||||
// Textline projection map.
|
||||
TextlineProjection projection_;
|
||||
// Sequence of DENORMS that indicate how to get back to the original image
|
||||
// coordinate space. The destructor must delete all the DENORMs in the chain.
|
||||
DENORM* denorm_;
|
||||
|
||||
// Various debug windows that automatically go away on completion.
|
||||
ScrollView* input_blobs_win_;
|
||||
|
||||
// The equation region detector pointer. Note: This pointer is passed in by
|
||||
// member function SetEquationDetect, and releasing it is NOT owned by this
|
||||
// class.
|
||||
EquationDetectBase* equation_detect_;
|
||||
|
||||
// Allow a subsequent instance to reuse the blocks window.
|
||||
// Not thread-safe, but multiple threads shouldn't be using windows anyway.
|
||||
static ScrollView* blocks_win_;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -90,7 +90,18 @@ class ColPartition : public ELIST2_LINK {
|
||||
// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
|
||||
// the ColPartition owns the BLOBNBOX!!!
|
||||
// Call DeleteBoxes before deleting the ColPartition.
|
||||
static ColPartition* FakePartition(const TBOX& box);
|
||||
static ColPartition* FakePartition(const TBOX& box,
|
||||
PolyBlockType block_type,
|
||||
BlobRegionType blob_type,
|
||||
BlobTextFlowType flow);
|
||||
|
||||
// Constructs and returns a ColPartition with the given real BLOBNBOX,
|
||||
// and sets it up to be a "big" partition (single-blob partition bigger
|
||||
// than the surrounding text that may be a dropcap, two or more vertically
|
||||
// touching characters, or some graphic element.
|
||||
// If the given list is not NULL, the partition is also added to the list.
|
||||
static ColPartition* MakeBigPartition(BLOBNBOX* box,
|
||||
ColPartition_LIST* big_part_list);
|
||||
|
||||
~ColPartition();
|
||||
|
||||
@ -116,6 +127,12 @@ class ColPartition : public ELIST2_LINK {
|
||||
int median_bottom() const {
|
||||
return median_bottom_;
|
||||
}
|
||||
int median_left() const {
|
||||
return median_left_;
|
||||
}
|
||||
int median_right() const {
|
||||
return median_right_;
|
||||
}
|
||||
int median_size() const {
|
||||
return median_size_;
|
||||
}
|
||||
@ -185,6 +202,12 @@ class ColPartition : public ELIST2_LINK {
|
||||
void set_working_set(WorkingPartSet* working_set) {
|
||||
working_set_ = working_set;
|
||||
}
|
||||
bool block_owned() const {
|
||||
return block_owned_;
|
||||
}
|
||||
void set_block_owned(bool owned) {
|
||||
block_owned_ = owned;
|
||||
}
|
||||
bool desperately_merged() const {
|
||||
return desperately_merged_;
|
||||
}
|
||||
@ -342,33 +365,39 @@ class ColPartition : public ELIST2_LINK {
|
||||
bool HOverlaps(const ColPartition& other) const {
|
||||
return bounding_box_.x_overlap(other.bounding_box_);
|
||||
}
|
||||
// Returns true if this and other can be combined without putting a
|
||||
// horizontal step in either left or right edge.
|
||||
bool HCompatible(const ColPartition& other) const {
|
||||
return left_margin_ <= other.bounding_box_.left() &&
|
||||
bounding_box_.left() >= other.left_margin_ &&
|
||||
bounding_box_.right() <= other.right_margin_ &&
|
||||
right_margin_ >= other.bounding_box_.right();
|
||||
// Returns true if this and other's bounding boxes overlap vertically.
|
||||
// TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
|
||||
bool VOverlaps(const ColPartition& other) const {
|
||||
return bounding_box_.y_gap(other.bounding_box_) < 0;
|
||||
}
|
||||
// Returns the vertical overlap (by median) of this and other.
|
||||
// WARNING! Only makes sense on horizontal partitions!
|
||||
int VOverlap(const ColPartition& other) const {
|
||||
int VCoreOverlap(const ColPartition& other) const {
|
||||
return MIN(median_top_, other.median_top_) -
|
||||
MAX(median_bottom_, other.median_bottom_);
|
||||
}
|
||||
// Returns the horizontal overlap (by median) of this and other.
|
||||
// WARNING! Only makes sense on vertical partitions!
|
||||
int HOverlap(const ColPartition& other) const {
|
||||
int HCoreOverlap(const ColPartition& other) const {
|
||||
return MIN(median_right_, other.median_right_) -
|
||||
MAX(median_left_, other.median_left_);
|
||||
}
|
||||
// Returns true if this and other overlap significantly vertically.
|
||||
bool VOverlaps(const ColPartition& other) const {
|
||||
int overlap = VOverlap(other);
|
||||
// WARNING! Only makes sense on horizontal partitions!
|
||||
bool VSignificantCoreOverlap(const ColPartition& other) const {
|
||||
int overlap = VCoreOverlap(other);
|
||||
int height = MIN(median_top_ - median_bottom_,
|
||||
other.median_top_ - other.median_bottom_);
|
||||
return overlap * 3 > height;
|
||||
}
|
||||
// Returns true if this and other can be combined without putting a
|
||||
// horizontal step in either left or right edge of the resulting block.
|
||||
bool WithinSameMargins(const ColPartition& other) const {
|
||||
return left_margin_ <= other.bounding_box_.left() &&
|
||||
bounding_box_.left() >= other.left_margin_ &&
|
||||
bounding_box_.right() <= other.right_margin_ &&
|
||||
right_margin_ >= other.bounding_box_.right();
|
||||
}
|
||||
// Returns true if the region types (aligned_text_) match.
|
||||
// Lines never match anything, as they should never be merged or chained.
|
||||
bool TypesMatch(const ColPartition& other) const {
|
||||
@ -379,6 +408,13 @@ class ColPartition : public ELIST2_LINK {
|
||||
!BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
|
||||
}
|
||||
|
||||
// Returns true if the types are similar to each other.
|
||||
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
|
||||
return (type1 == type2 ||
|
||||
(type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
|
||||
(type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
|
||||
}
|
||||
|
||||
// Returns true if partitions is of horizontal line type
|
||||
bool IsLineType() const {
|
||||
return PTIsLineType(type_);
|
||||
@ -430,8 +466,8 @@ class ColPartition : public ELIST2_LINK {
|
||||
TBOX BoundsWithoutBox(BLOBNBOX* box);
|
||||
|
||||
// Claims the boxes in the boxes_list by marking them with a this owner
|
||||
// pointer. If a box is already owned, then run Unique on it.
|
||||
void ClaimBoxes(WidthCallback* cb);
|
||||
// pointer.
|
||||
void ClaimBoxes();
|
||||
|
||||
// NULL the owner of the blobs in this partition, so they can be deleted
|
||||
// independently of the ColPartition.
|
||||
@ -440,6 +476,12 @@ class ColPartition : public ELIST2_LINK {
|
||||
// Delete the boxes that this partition owns.
|
||||
void DeleteBoxes();
|
||||
|
||||
// Reflects the partition in the y-axis, assuming that its blobs have
|
||||
// already been done. Corrects only a limited part of the members, since
|
||||
// this function is assumed to be used shortly after initial creation, which
|
||||
// is before a lot of the members are used.
|
||||
void ReflectInYAxis();
|
||||
|
||||
// Returns true if this is a legal partition - meaning that the conditions
|
||||
// left_margin <= bounding_box left
|
||||
// left_key <= bounding box left key
|
||||
@ -451,6 +493,9 @@ class ColPartition : public ELIST2_LINK {
|
||||
// Returns true if the left and right edges are approximately equal.
|
||||
bool MatchingColumns(const ColPartition& other) const;
|
||||
|
||||
// Returns true if the colors match for two text partitions.
|
||||
bool MatchingTextColor(const ColPartition& other) const;
|
||||
|
||||
// Returns true if the sizes match for two text partitions,
|
||||
// taking orientation into account
|
||||
bool MatchingSizes(const ColPartition& other) const;
|
||||
@ -482,6 +527,19 @@ class ColPartition : public ELIST2_LINK {
|
||||
// Returns the right rule line x coord of the rightmost blob.
|
||||
int RightBlobRule() const;
|
||||
|
||||
// Returns the density value for a particular BlobSpecialTextType.
|
||||
float SpecialBlobsDensity(const BlobSpecialTextType type) const;
|
||||
// Returns the number of blobs for a particular BlobSpecialTextType.
|
||||
int SpecialBlobsCount(const BlobSpecialTextType type);
|
||||
// Set the density value for a particular BlobSpecialTextType, should ONLY be
|
||||
// used for debugging or testing. In production code, use
|
||||
// ComputeSpecialBlobsDensity instead.
|
||||
void SetSpecialBlobsDensity(
|
||||
const BlobSpecialTextType type, const float density);
|
||||
// Compute the SpecialTextType density of blobs, where we assume
|
||||
// that the SpecialTextType in the boxes_ has been set.
|
||||
void ComputeSpecialBlobsDensity();
|
||||
|
||||
// Add a partner above if upper, otherwise below.
|
||||
// Add them uniquely and keep the list sorted by box left.
|
||||
// Partnerships are added symmetrically to partner and this.
|
||||
@ -496,9 +554,6 @@ class ColPartition : public ELIST2_LINK {
|
||||
// Merge with the other partition and delete it.
|
||||
void Absorb(ColPartition* other, WidthCallback* cb);
|
||||
|
||||
// Shares out any common boxes amongst the partitions, ensuring that no
|
||||
// box stays in both. Returns true if anything was done.
|
||||
bool Unique(ColPartition* other, WidthCallback* cb);
|
||||
// Returns true if the overlap between this and the merged pair of
|
||||
// merge candidates is sufficiently trivial to be allowed.
|
||||
// The merged box can graze the edge of this by the ok_box_overlap
|
||||
@ -551,10 +606,20 @@ class ColPartition : public ELIST2_LINK {
|
||||
// Leader detection is limited to sequences of identical width objects,
|
||||
// such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
|
||||
bool MarkAsLeaderIfMonospaced();
|
||||
// Given the result of TextlineProjection::EvaluateColPartition, (positive for
|
||||
// horizontal text, negative for vertical text, and near zero for non-text),
|
||||
// sets the blob_type_ and flow_ for this partition to indicate whether it
|
||||
// is strongly or weakly vertical or horizontal text, or non-text.
|
||||
void SetRegionAndFlowTypesFromProjectionValue(int value);
|
||||
|
||||
// Sets all blobs with the partition blob type and flow.
|
||||
// Sets all blobs with the partition blob type and flow, but never overwrite
|
||||
// leader blobs, as we need to be able to identify them later.
|
||||
void SetBlobTypes();
|
||||
|
||||
// Returns true if a decent baseline can be fitted through the blobs.
|
||||
// Works for both horizontal and vertical text.
|
||||
bool HasGoodBaseline();
|
||||
|
||||
// Adds this ColPartition to a matching WorkingPartSet if one can be found,
|
||||
// otherwise starts a new one in the appropriate column, ending the previous.
|
||||
void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
|
||||
@ -579,6 +644,13 @@ class ColPartition : public ELIST2_LINK {
|
||||
ColPartition_LIST* block_parts,
|
||||
ColPartition_LIST* used_parts);
|
||||
|
||||
// Constructs a block from the given list of vertical text partitions.
|
||||
// Currently only creates rectangular blocks.
|
||||
static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft,
|
||||
const ICOORD& tright,
|
||||
ColPartition_LIST* block_parts,
|
||||
ColPartition_LIST* used_parts);
|
||||
|
||||
|
||||
// Returns a copy of everything except the list of boxes. The resulting
|
||||
// ColPartition is only suitable for keeping in a column candidate list.
|
||||
@ -769,6 +841,8 @@ class ColPartition : public ELIST2_LINK {
|
||||
ColPartition_CLIST lower_partners_;
|
||||
// The WorkingPartSet it lives in while blocks are being made.
|
||||
WorkingPartSet* working_set_;
|
||||
// Flag is true when AddBox is sorting vertically, false otherwise.
|
||||
bool last_add_was_vertical_;
|
||||
// True when the partition's ownership has been taken from the grid and
|
||||
// placed in a working set, or, after that, in the good_parts_ list.
|
||||
bool block_owned_;
|
||||
@ -809,6 +883,8 @@ class ColPartition : public ELIST2_LINK {
|
||||
uinT8 color1_[kRGBRMSColors];
|
||||
uinT8 color2_[kRGBRMSColors];
|
||||
bool owns_blobs_; // Does the partition own its blobs?
|
||||
// The density of special blobs.
|
||||
float special_blobs_densities_[BSTT_COUNT];
|
||||
};
|
||||
|
||||
// Typedef it now in case it becomes a class later.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@
|
||||
|
||||
#include "bbgrid.h"
|
||||
#include "colpartition.h"
|
||||
#include "colpartitionset.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -36,11 +37,32 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
||||
ColPartitionGrid();
|
||||
ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
|
||||
|
||||
~ColPartitionGrid();
|
||||
virtual ~ColPartitionGrid();
|
||||
|
||||
// Handles a click event in a display window.
|
||||
void HandleClick(int x, int y);
|
||||
|
||||
// Merges ColPartitions in the grid that look like they belong in the same
|
||||
// textline.
|
||||
// For all partitions in the grid, calls the box_cb permanent callback
|
||||
// to compute the search box, seaches the box, and if a candidate is found,
|
||||
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
||||
// true, then the partitions are merged.
|
||||
// Both callbacks are deleted before returning.
|
||||
void Merges(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
|
||||
TessResultCallback2<bool, const ColPartition*,
|
||||
const ColPartition*>* confirm_cb);
|
||||
|
||||
// For the given partition, calls the box_cb permanent callback
|
||||
// to compute the search box, searches the box, and if a candidate is found,
|
||||
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
||||
// true, then the partitions are merged.
|
||||
// Returns true if the partition is consumed by one or more merges.
|
||||
bool MergePart(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb,
|
||||
TessResultCallback2<bool, const ColPartition*,
|
||||
const ColPartition*>* confirm_cb,
|
||||
ColPartition* part);
|
||||
|
||||
// Finds all the ColPartitions in the grid that overlap with the given
|
||||
// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
|
||||
// Any partition equal to not_this (may be NULL) is excluded.
|
||||
@ -59,6 +81,68 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
||||
const ColPartition*>* confirm_cb,
|
||||
int* overlap_increase);
|
||||
|
||||
// Split partitions where it reduces overlap between their bounding boxes.
|
||||
// ColPartitions are after all supposed to be a partitioning of the blobs
|
||||
// AND of the space on the page!
|
||||
// Blobs that cause overlaps get removed, put in individual partitions
|
||||
// and added to the big_parts list. They are most likely characters on
|
||||
// 2 textlines that touch, or something big like a dropcap.
|
||||
void SplitOverlappingPartitions(ColPartition_LIST* big_parts);
|
||||
|
||||
// Filters partitions of source_type by looking at local neighbours.
|
||||
// Where a majority of neighbours have a text type, the partitions are
|
||||
// changed to text, where the neighbours have image type, they are changed
|
||||
// to image, and partitions that have no definite neighbourhood type are
|
||||
// left unchanged.
|
||||
// im_box and rerotation are used to map blob coordinates onto the
|
||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||
// into images.
|
||||
// Returns true if anything was changed.
|
||||
bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map,
|
||||
const TBOX& im_box, const FCOORD& rerotation);
|
||||
|
||||
// Compute the mean RGB of the light and dark pixels in each ColPartition
|
||||
// and also the rms error in the linearity of color.
|
||||
void ComputePartitionColors(Pix* scaled_color, int scaled_factor,
|
||||
const FCOORD& rerotation);
|
||||
|
||||
// Reflects the grid and its colpartitions in the y-axis, assuming that
|
||||
// all blob boxes have already been done.
|
||||
void ReflectInYAxis();
|
||||
|
||||
// Rotates the grid and its colpartitions by the given angle, assuming that
|
||||
// all blob boxes have already been done.
|
||||
void Deskew(const FCOORD& deskew);
|
||||
|
||||
// Sets the left and right tabs of the partitions in the grid.
|
||||
void SetTabStops(TabFind* tabgrid);
|
||||
|
||||
// Makes the ColPartSets and puts them in the PartSetVector ready
|
||||
// for finding column bounds. Returns false if no partitions were found.
|
||||
// Each ColPartition in the grid is placed in a single ColPartSet based
|
||||
// on the bottom-left of its bounding box.
|
||||
bool MakeColPartSets(PartSetVector* part_sets);
|
||||
|
||||
// Makes a single ColPartitionSet consisting of a single ColPartition that
|
||||
// represents the total horizontal extent of the significant content on the
|
||||
// page. Used for the single column setting in place of automatic detection.
|
||||
// Returns NULL if the page is empty of significant content.
|
||||
ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb);
|
||||
|
||||
// Mark the BLOBNBOXes in each partition as being owned by that partition.
|
||||
void ClaimBoxes();
|
||||
|
||||
// Retypes all the blobs referenced by the partitions in the grid.
|
||||
// Image blobs are sliced on the grid boundaries to give the tab finder
|
||||
// a better handle on the edges of the images, and the actual blobs are
|
||||
// returned in the im_blobs list, as they are not owned by the block.
|
||||
void ReTypeBlobs(BLOBNBOX_LIST* im_blobs);
|
||||
|
||||
// The boxes within the partitions have changed (by deskew) so recompute
|
||||
// the bounds of all the partitions and reinsert them into the grid.
|
||||
void RecomputeBounds(int gridsize, const ICOORD& bleft,
|
||||
const ICOORD& tright, const ICOORD& vertical);
|
||||
|
||||
// Improves the margins of the ColPartitions in the grid by calling
|
||||
// FindPartitionMargins on each.
|
||||
void GridFindMargins(ColPartitionSet** best_columns);
|
||||
@ -68,6 +152,13 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
||||
void ListFindMargins(ColPartitionSet** best_columns,
|
||||
ColPartition_LIST* parts);
|
||||
|
||||
// Deletes all the partitions in the grid after disowning all the blobs.
|
||||
void DeleteParts();
|
||||
|
||||
// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
|
||||
// all the blobs in them.
|
||||
void DeleteUnknownParts(TO_BLOCK* block);
|
||||
|
||||
// Finds and marks text partitions that represent figure captions.
|
||||
void FindFigureCaptions();
|
||||
|
||||
@ -78,12 +169,64 @@ class ColPartitionGrid : public BBGrid<ColPartition,
|
||||
// Finds the best partner in the given direction for the given partition.
|
||||
// Stores the result with AddPartner.
|
||||
void FindPartitionPartners(bool upper, ColPartition* part);
|
||||
// Finds the best partner in the given direction for the given partition.
|
||||
// Stores the result with AddPartner.
|
||||
void FindVPartitionPartners(bool to_the_left, ColPartition* part);
|
||||
// For every ColPartition with multiple partners in the grid, reduces the
|
||||
// number of partners to 0 or 1. If get_desperate is true, goes to more
|
||||
// desperate merge methods to merge flowing text before breaking partnerships.
|
||||
void RefinePartitionPartners(bool get_desperate);
|
||||
|
||||
private:
|
||||
// Finds and returns a list of candidate ColPartitions to merge with part.
|
||||
// The candidates must overlap search_box, and when merged must not
|
||||
// overlap any other partitions that are not overlapped by each individually.
|
||||
void FindMergeCandidates(const ColPartition* part, const TBOX& search_box,
|
||||
bool debug, ColPartition_CLIST* candidates);
|
||||
|
||||
// Smoothes the region type/flow type of the given part by looking at local
|
||||
// neigbours and the given image mask. Searches a padded rectangle with the
|
||||
// padding truncated on one size of the part's box in turn for each side,
|
||||
// using the result (if any) that has the least distance to all neighbours
|
||||
// that contribute to the decision. This biases in favor of rectangular
|
||||
// regions without completely enforcing them.
|
||||
// If a good decision cannot be reached, the part is left unchanged.
|
||||
// im_box and rerotation are used to map blob coordinates onto the
|
||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||
// into images.
|
||||
// Returns true if the partition was changed.
|
||||
bool SmoothRegionType(Pix* nontext_map,
|
||||
const TBOX& im_box,
|
||||
const FCOORD& rerotation,
|
||||
bool debug,
|
||||
ColPartition* part);
|
||||
// Executes the search for SmoothRegionType in a single direction.
|
||||
// Creates a bounding box that is padded in all directions except direction,
|
||||
// and searches it for other partitions. Finds the nearest collection of
|
||||
// partitions that makes a decisive result (if any) and returns the type
|
||||
// and the distance of the collection. If there are any pixels in the
|
||||
// nontext_map, then the decision is biased towards image.
|
||||
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
|
||||
Pix* nontext_map,
|
||||
const TBOX& im_box,
|
||||
const FCOORD& rerotation,
|
||||
bool debug,
|
||||
const ColPartition& part,
|
||||
int* best_distance);
|
||||
// Counts the partitions in the given search_box by appending the gap
|
||||
// distance (scaled by dist_scaling) of the part from the base_part to the
|
||||
// vector of the appropriate type for the partition. Prior to return, the
|
||||
// vectors in the dists array are sorted in increasing order.
|
||||
// dists must be an array of GenericVectors of size NPT_COUNT.
|
||||
void AccumulatePartDistances(const ColPartition& base_part,
|
||||
const ICOORD& dist_scaling,
|
||||
const TBOX& search_box,
|
||||
Pix* nontext_map,
|
||||
const TBOX& im_box,
|
||||
const FCOORD& rerotation,
|
||||
bool debug,
|
||||
GenericVector<int>* dists);
|
||||
|
||||
// Improves the margins of the ColPartition by searching for
|
||||
// neighbours that vertically overlap significantly.
|
||||
void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
|
||||
|
@ -66,79 +66,13 @@ ColPartition* ColPartitionSet::ColumnContaining(int x, int y) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Insert the ColPartitions in our list into the given grid.
|
||||
void ColPartitionSet::ReturnParts(ColPartition_LIST* parts) {
|
||||
ColPartition_IT it(parts);
|
||||
it.add_list_before(&parts_);
|
||||
}
|
||||
|
||||
// Merge any significantly overlapping partitions within the this and other,
|
||||
// and unique the boxes so that no two partitions use the same box.
|
||||
// Return true if any changes were made to either set.
|
||||
bool ColPartitionSet::MergeOverlaps(ColPartitionSet* other, WidthCallback* cb) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom()) ||
|
||||
TabFind::WithinTestRegion(2, other->bounding_box_.left(),
|
||||
other->bounding_box_.bottom());
|
||||
if (debug) {
|
||||
tprintf("Considering merge on:\n");
|
||||
Print();
|
||||
other->Print();
|
||||
// Extract all the parts from the list, relinquishing ownership.
|
||||
void ColPartitionSet::RelinquishParts() {
|
||||
ColPartition_IT it(&parts_);
|
||||
while (!it.empty()) {
|
||||
it.extract();
|
||||
it.forward();
|
||||
}
|
||||
ColPartition_IT it1(&parts_);
|
||||
ColPartition_IT it2(&other->parts_);
|
||||
bool any_merged = false;
|
||||
it1.mark_cycle_pt();
|
||||
it2.mark_cycle_pt();
|
||||
// Iterate the two lists in parallel, using the fact that they are
|
||||
// sorted by x-coord to keep the iterators in sync.
|
||||
while (!it1.cycled_list() && !it2.cycled_list()) {
|
||||
any_merged = false;
|
||||
ColPartition* part1 = it1.data();
|
||||
ColPartition* part2 = it2.data();
|
||||
if (debug) {
|
||||
tprintf("Vover=%d, HOver=%d, Hcompatible=%d, typesmatch=%d\n",
|
||||
part1->VOverlaps(*part2), part1->HOverlaps(*part2),
|
||||
part1->HCompatible(*part2), part1->TypesMatch(*part2));
|
||||
}
|
||||
if (part1->VOverlaps(*part2) &&
|
||||
part1->HCompatible(*part2) && part1->TypesMatch(*part2)) {
|
||||
// Partitions seem to be mergeable, so absorb part1 into part2.
|
||||
part1->Absorb(it2.extract(), cb);
|
||||
any_merged = true;
|
||||
it1.forward();
|
||||
it2.forward();
|
||||
} else if (part1->HOverlaps(*part2) && part1->TypesMatch(*part2) &&
|
||||
part1->Unique(part2, cb)) {
|
||||
// Unique moved some boxes, so check to see in either partition was
|
||||
// left empty. If not, any_merged is not set true.
|
||||
if (part1->IsEmpty()) {
|
||||
any_merged = true;
|
||||
delete it1.extract();
|
||||
it1.forward();
|
||||
continue;
|
||||
}
|
||||
if (part2->IsEmpty()) {
|
||||
any_merged = true;
|
||||
delete it2.extract();
|
||||
it2.forward();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!any_merged) {
|
||||
// Move on the iterator that point to the leftmost partition.
|
||||
if (part1->IsLeftOf(*part2)) {
|
||||
it1.forward();
|
||||
} else {
|
||||
it2.forward();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (any_merged) {
|
||||
ComputeCoverage();
|
||||
other->ComputeCoverage();
|
||||
}
|
||||
return any_merged;
|
||||
}
|
||||
|
||||
// Attempt to improve this by adding partitions or expanding partitions.
|
||||
@ -245,13 +179,13 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
|
||||
}
|
||||
for (int i = 0; i < column_sets->size(); ++i) {
|
||||
ColPartitionSet* columns = column_sets->get(i);
|
||||
// In ordering the column set candidates, total_coverage_ is king,
|
||||
// followed by good_column_count_ and then total column_count.
|
||||
bool better = total_coverage_ > columns->total_coverage_;
|
||||
if (total_coverage_ == columns->total_coverage_) {
|
||||
// In ordering the column set candidates, good_coverage_ is king,
|
||||
// followed by good_column_count_ and then bad_coverage_.
|
||||
bool better = good_coverage_ > columns->good_coverage_;
|
||||
if (good_coverage_ == columns->good_coverage_) {
|
||||
better = good_column_count_ > columns->good_column_count_;
|
||||
if (good_column_count_ == columns->good_column_count_) {
|
||||
better = parts_.length() > columns->parts_.length();
|
||||
better = bad_coverage_ > columns->bad_coverage_;
|
||||
}
|
||||
}
|
||||
if (better) {
|
||||
@ -278,7 +212,7 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
|
||||
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
|
||||
WidthCallback* cb) {
|
||||
if (debug) {
|
||||
tprintf("CompatibleColumns testing compability\n");
|
||||
tprintf("CompatibleColumns testing compatibility\n");
|
||||
Print();
|
||||
other->Print();
|
||||
}
|
||||
@ -295,7 +229,7 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
|
||||
tprintf("CompatibleColumns ignoring image partition\n");
|
||||
part->Print();
|
||||
}
|
||||
continue; // Image partitions are irrelevant to column compability.
|
||||
continue; // Image partitions are irrelevant to column compatibility.
|
||||
}
|
||||
int y = part->MidY();
|
||||
int left = part->bounding_box().left();
|
||||
@ -331,30 +265,15 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
|
||||
ColPartition* next_left_col = ColumnContaining(next_left, y);
|
||||
if (right_col == next_left_col) {
|
||||
// There is a column break in this column.
|
||||
// Check for the difference between different column layout and
|
||||
// a pull-out block.
|
||||
int part_box_width = part->bounding_box().width();
|
||||
int part_margin_width = part->right_margin() - part->left_margin();
|
||||
int next_box_width = next_part->bounding_box().width();
|
||||
int next_margin_width = next_part->right_margin() -
|
||||
next_part->left_margin();
|
||||
int next_right = next_part->bounding_box().right();
|
||||
if (part_box_width < next_margin_width &&
|
||||
next_box_width < part_margin_width) {
|
||||
// This can be due to a figure caption within a column, a pull-out
|
||||
// block, or a simple broken textline that remains to be merged:
|
||||
// all allowed, or a change in column layout: not allowed.
|
||||
// If both partitions are of good width, then it is likely
|
||||
// a change in column layout, otherwise probably an allowed situation.
|
||||
if (part->good_width() && next_part->good_width()) {
|
||||
if (debug) {
|
||||
tprintf("CompatibleColumns false due to equal sized columns\n");
|
||||
tprintf("part1 %d-%d = %d, part2 %d-%d = %d\n",
|
||||
left, right, part->ColumnWidth(),
|
||||
next_left, next_right, next_part->ColumnWidth());
|
||||
right_col->Print();
|
||||
}
|
||||
return false; // Must be a new column layout as they are equal size.
|
||||
}
|
||||
ColPartition* next_right_col = ColumnContaining(next_right, y);
|
||||
if (left_col == right_col && next_right_col == next_left_col) {
|
||||
// Column completely contains both. Not allowed.
|
||||
if (debug) {
|
||||
tprintf("CompatibleColumns false due to containing 2 partitions\n");
|
||||
int next_right = next_part->bounding_box().right();
|
||||
tprintf("CompatibleColumns false due to 2 parts of good width\n");
|
||||
tprintf("part1 %d-%d, part2 %d-%d\n",
|
||||
left, right, next_left, next_right);
|
||||
right_col->Print();
|
||||
@ -654,8 +573,9 @@ void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width,
|
||||
// Provide debug output for this ColPartitionSet and all the ColPartitions.
|
||||
void ColPartitionSet::Print() {
|
||||
ColPartition_IT it(&parts_);
|
||||
tprintf("Partition set of %d parts, %d good, coverage=%d (%d,%d)->(%d,%d)\n",
|
||||
it.length(), good_column_count_, total_coverage_,
|
||||
tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
|
||||
" (%d,%d)->(%d,%d)\n",
|
||||
it.length(), good_column_count_, good_coverage_, bad_coverage_,
|
||||
bounding_box_.left(), bounding_box_.bottom(),
|
||||
bounding_box_.right(), bounding_box_.top());
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
@ -669,13 +589,7 @@ void ColPartitionSet::Print() {
|
||||
// Add the given partition to the list in the appropriate place.
|
||||
void ColPartitionSet::AddPartition(ColPartition* new_part,
|
||||
ColPartition_IT* it) {
|
||||
bounding_box_ += new_part->bounding_box();
|
||||
if (new_part->good_column() || new_part->good_width()) {
|
||||
total_coverage_ += new_part->ColumnWidth();
|
||||
++good_column_count_;
|
||||
if (new_part->good_width())
|
||||
++good_column_count_;
|
||||
}
|
||||
AddPartitionCoverageAndBox(*new_part);
|
||||
int new_right = new_part->right_key();
|
||||
if (it->data()->left_key() >= new_right)
|
||||
it->add_before_stay_put(new_part);
|
||||
@ -683,22 +597,50 @@ void ColPartitionSet::AddPartition(ColPartition* new_part,
|
||||
it->add_after_stay_put(new_part);
|
||||
}
|
||||
|
||||
// Compute the coverage and good column count.
|
||||
// Compute the coverage and good column count. Coverage is the amount of the
|
||||
// width of the page (in pixels) that is covered by ColPartitions, which are
|
||||
// used to provide candidate column layouts.
|
||||
// Coverage is split into good and bad. Good coverage is provided by
|
||||
// ColPartitions of a frequent width (according to the callback function
|
||||
// provided by TabFinder::WidthCB, which accesses stored statistics on the
|
||||
// widths of ColParititions) and bad coverage is provided by all other
|
||||
// ColPartitions, even if they have tab vectors at both sides. Thus:
|
||||
// |-----------------------------------------------------------------|
|
||||
// | Double width heading |
|
||||
// |-----------------------------------------------------------------|
|
||||
// |-------------------------------| |-------------------------------|
|
||||
// | Common width ColParition | | Common width ColPartition |
|
||||
// |-------------------------------| |-------------------------------|
|
||||
// the layout with two common-width columns has better coverage than the
|
||||
// double width heading, because the coverage is "good," even though less in
|
||||
// total coverage than the heading, because the heading coverage is "bad."
|
||||
void ColPartitionSet::ComputeCoverage() {
|
||||
// Count the number of good columns and sum their width.
|
||||
ColPartition_IT it(&parts_);
|
||||
good_column_count_ = 0;
|
||||
total_coverage_ = 0;
|
||||
good_coverage_ = 0;
|
||||
bad_coverage_ = 0;
|
||||
bounding_box_ = TBOX();
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColPartition* part = it.data();
|
||||
bounding_box_ += part->bounding_box();
|
||||
if (part->good_column() || part->good_width()) {
|
||||
total_coverage_ += part->ColumnWidth();
|
||||
AddPartitionCoverageAndBox(*part);
|
||||
}
|
||||
}
|
||||
|
||||
// Adds the coverage, column count and box for a single partition,
|
||||
// without adding it to the list. (Helper factored from ComputeCoverage.)
|
||||
void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
|
||||
bounding_box_ += part.bounding_box();
|
||||
int coverage = part.ColumnWidth();
|
||||
if (part.good_width()) {
|
||||
good_coverage_ += coverage;
|
||||
good_column_count_ += 2;
|
||||
} else {
|
||||
if (part.blob_type() < BRT_UNKNOWN)
|
||||
coverage /= 2;
|
||||
if (part.good_column())
|
||||
++good_column_count_;
|
||||
if (part->good_width())
|
||||
++good_column_count_;
|
||||
}
|
||||
bad_coverage_ += coverage;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,13 +66,8 @@ class ColPartitionSet : public ELIST_LINK {
|
||||
// Return the bounding boxes of columns at the given y-range
|
||||
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
|
||||
|
||||
// Move the parts to the output list, giving up ownership.
|
||||
void ReturnParts(ColPartition_LIST* parts);
|
||||
|
||||
// Merge any significantly overlapping partitions within the this and other,
|
||||
// and unique the boxes so that no two partitions use the same box.
|
||||
// Return true if any changes were made to either set.
|
||||
bool MergeOverlaps(ColPartitionSet* other, WidthCallback* cb);
|
||||
// Extract all the parts from the list, relinquishing ownership.
|
||||
void RelinquishParts();
|
||||
|
||||
// Attempt to improve this by adding partitions or expanding partitions.
|
||||
void ImproveColumnCandidate(WidthCallback* cb, PartSetVector* src_sets);
|
||||
@ -133,15 +128,37 @@ class ColPartitionSet : public ELIST_LINK {
|
||||
// Add the given partition to the list in the appropriate place.
|
||||
void AddPartition(ColPartition* new_part, ColPartition_IT* it);
|
||||
|
||||
// Compute the coverage and good column count.
|
||||
// Compute the coverage and good column count. Coverage is the amount of the
|
||||
// width of the page (in pixels) that is covered by ColPartitions, which are
|
||||
// used to provide candidate column layouts.
|
||||
// Coverage is split into good and bad. Good coverage is provided by
|
||||
// ColPartitions of a frequent width (according to the callback function
|
||||
// provided by TabFinder::WidthCB, which accesses stored statistics on the
|
||||
// widths of ColParititions) and bad coverage is provided by all other
|
||||
// ColPartitions, even if they have tab vectors at both sides. Thus:
|
||||
// |-----------------------------------------------------------------|
|
||||
// | Double width heading |
|
||||
// |-----------------------------------------------------------------|
|
||||
// |-------------------------------| |-------------------------------|
|
||||
// | Common width ColParition | | Common width ColPartition |
|
||||
// |-------------------------------| |-------------------------------|
|
||||
// the layout with two common-width columns has better coverage than the
|
||||
// double width heading, because the coverage is "good," even though less in
|
||||
// total coverage than the heading, because the heading coverage is "bad."
|
||||
void ComputeCoverage();
|
||||
|
||||
// Adds the coverage, column count and box for a single partition,
|
||||
// without adding it to the list. (Helper factored from ComputeCoverage.)
|
||||
void AddPartitionCoverageAndBox(const ColPartition& part);
|
||||
|
||||
// The partitions in this column candidate.
|
||||
ColPartition_LIST parts_;
|
||||
// The number of partitions that have a frequent column width.
|
||||
int good_column_count_;
|
||||
// Total width of all the ColPartitions.
|
||||
int total_coverage_;
|
||||
// Total width of all the good ColPartitions.
|
||||
int good_coverage_;
|
||||
// Total width of all the bad ColPartitions.
|
||||
int bad_coverage_;
|
||||
// Bounding box of all partitions in the set.
|
||||
TBOX bounding_box_;
|
||||
};
|
||||
|
@ -165,36 +165,6 @@ bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// This method changes the input page image and pix_binary to be the same as
|
||||
// the splitted image owned by this object.
|
||||
// Any of the parameters can be NULL.
|
||||
void ShiroRekhaSplitter::CopySplittedImageTo(IMAGE* page_image,
|
||||
Pix** pix_binary) const {
|
||||
ASSERT_HOST(splitted_image_);
|
||||
if (pix_binary) {
|
||||
pixDestroy(pix_binary);
|
||||
*pix_binary = pixClone(splitted_image_);
|
||||
}
|
||||
if (page_image) {
|
||||
page_image->FromPix(splitted_image_);
|
||||
}
|
||||
}
|
||||
|
||||
// This method changes the input page image and pix_binary to be the same as
|
||||
// the original image provided to this object.
|
||||
// Any of the parameters can be NULL.
|
||||
void ShiroRekhaSplitter::CopyOriginalImageTo(IMAGE* page_image,
|
||||
Pix** pix_binary) const {
|
||||
ASSERT_HOST(orig_pix_);
|
||||
if (pix_binary) {
|
||||
pixDestroy(pix_binary);
|
||||
*pix_binary = pixClone(orig_pix_);
|
||||
}
|
||||
if (page_image) {
|
||||
page_image->FromPix(orig_pix_);
|
||||
}
|
||||
}
|
||||
|
||||
// Method to perform a close operation on the input image. The xheight
|
||||
// estimate decides the size of sel used.
|
||||
void ShiroRekhaSplitter::PerformClose(Pix* pix, int xheight_estimate) {
|
||||
@ -395,7 +365,8 @@ void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(
|
||||
C_BLOB_LIST not_found_blobs;
|
||||
RefreshWordBlobsFromNewBlobs(segmentation_block_list_,
|
||||
new_blobs,
|
||||
¬_found_blobs);
|
||||
((devanagari_split_debugimage && debug_image_) ?
|
||||
¬_found_blobs : NULL));
|
||||
|
||||
if (devanagari_split_debuglevel > 0) {
|
||||
tprintf("After refreshing blobs:\n");
|
||||
@ -525,4 +496,4 @@ void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) {
|
||||
numaDestroy(&counts);
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace tesseract.
|
||||
|
@ -80,16 +80,6 @@ class ShiroRekhaSplitter {
|
||||
// splitting. If false, the ocr_split_strategy_ is used.
|
||||
bool Split(bool split_for_pageseg);
|
||||
|
||||
// This method changes the input page image and pix_binary to be the same as
|
||||
// the splitted image owned by this object.
|
||||
// Any of the parameters can be NULL.
|
||||
void CopySplittedImageTo(IMAGE* page_image, Pix** pix_binary) const;
|
||||
|
||||
// This method changes the input page image and pix_binary to be the same as
|
||||
// the original image provided to this object.
|
||||
// Any of the parameters can be NULL.
|
||||
void CopyOriginalImageTo(IMAGE* page_image, Pix** pix_binary) const;
|
||||
|
||||
// Clears the memory held by this object.
|
||||
void Clear();
|
||||
|
||||
@ -212,5 +202,6 @@ class ShiroRekhaSplitter {
|
||||
// performed before CCs are run through splitting.
|
||||
};
|
||||
|
||||
}
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
|
||||
|
@ -419,12 +419,11 @@ void empty_buckets( // find blobs
|
||||
out_it.set_to_list(&outlines);
|
||||
do {
|
||||
parent_it = bucket_it; // find outermost
|
||||
do
|
||||
bucket_it.forward();
|
||||
while (!bucket_it.at_first()
|
||||
&& !(*parent_it.data() < *bucket_it.data()));
|
||||
}
|
||||
while (!bucket_it.at_first());
|
||||
do {
|
||||
bucket_it.forward();
|
||||
} while (!bucket_it.at_first() &&
|
||||
!(*parent_it.data() < *bucket_it.data()));
|
||||
} while (!bucket_it.at_first());
|
||||
|
||||
// move to new list
|
||||
out_it.add_after_then_move(parent_it.extract());
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -21,21 +21,40 @@
|
||||
#ifndef TESSERACT_TEXTORD_IMAGEFIND_H__
|
||||
#define TESSERACT_TEXTORD_IMAGEFIND_H__
|
||||
|
||||
#include "host.h"
|
||||
|
||||
struct Boxa;
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
class TBOX;
|
||||
class FCOORD;
|
||||
class TO_BLOCK;
|
||||
class BLOBNBOX_LIST;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// The ImageFinder class is a simple static function wrapper class that
|
||||
class ColPartitionGrid;
|
||||
class ColPartition_LIST;
|
||||
class TabFind;
|
||||
|
||||
// The ImageFind class is a simple static function wrapper class that
|
||||
// exposes the FindImages function and some useful helper functions.
|
||||
class ImageFinder {
|
||||
class ImageFind {
|
||||
public:
|
||||
// Finds image regions within the source pix (page image) and returns
|
||||
// the image regions as a Boxa, Pixa pair, analgous to pixConnComp.
|
||||
// Finds image regions within the BINARY source pix (page image) and returns
|
||||
// the image regions as a mask image.
|
||||
// The returned pix may be NULL, meaning no images found.
|
||||
// If not NULL, it must be PixDestroyed by the caller.
|
||||
static Pix* FindImages(Pix* pix);
|
||||
|
||||
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
|
||||
// analgous to pixConnComp, except that connected components which are nearly
|
||||
// rectangular are replaced with solid rectangles.
|
||||
// The returned boxa, pixa may be NULL, meaning no images found.
|
||||
// If not NULL, they must be destroyed by the caller.
|
||||
static void FindImages(Pix* pix, Boxa** boxa, Pixa** pixa);
|
||||
// Resolution of pix should match the source image (Tesseract::pix_binary_)
|
||||
// so the output coordinate systems match.
|
||||
static void ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa);
|
||||
|
||||
// Returns true if there is a rectangle in the source pix, such that all
|
||||
// pixel rows and column slices outside of it have less than
|
||||
@ -54,9 +73,84 @@ class ImageFinder {
|
||||
|
||||
// Given an input pix, and a bounding rectangle, the sides of the rectangle
|
||||
// are shrunk inwards until they bound any black pixels found within the
|
||||
// original rectangle.
|
||||
static void BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
|
||||
// original rectangle. Returns false if the rectangle contains no black
|
||||
// pixels at all.
|
||||
static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
|
||||
int* x_end, int* y_end);
|
||||
|
||||
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
|
||||
// of the point from the given line, defined by a pair of points in the 3-D
|
||||
// (RGB) space, line1 and line2.
|
||||
static double ColorDistanceFromLine(const uinT8* line1, const uinT8* line2,
|
||||
const uinT8* point);
|
||||
|
||||
// Returns the leptonica combined code for the given RGB triplet.
|
||||
static uinT32 RGB(uinT32 r, uinT32 g, uinT32 b);
|
||||
|
||||
// Returns the input value clipped to a uinT8.
|
||||
static uinT8 ClipToByte(double pixel);
|
||||
|
||||
// Computes the light and dark extremes of color in the given rectangle of
|
||||
// the given pix, which is factor smaller than the coordinate system in rect.
|
||||
// The light and dark points are taken to be the upper and lower 8th-ile of
|
||||
// the most deviant of R, G and B. The value of the other 2 channels are
|
||||
// computed by linear fit against the most deviant.
|
||||
// The colors of the two point are returned in color1 and color2, with the
|
||||
// alpha channel set to a scaled mean rms of the fits.
|
||||
// If color_map1 is not null then it and color_map2 get rect pasted in them
|
||||
// with the two calculated colors, and rms map gets a pasted rect of the rms.
|
||||
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
|
||||
static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
|
||||
Pix* color_map1, Pix* color_map2,
|
||||
Pix* rms_map,
|
||||
uinT8* color1, uinT8* color2);
|
||||
|
||||
// Returns true if there are no black pixels in between the boxes.
|
||||
// The im_box must represent the bounding box of the pix in tesseract
|
||||
// coordinates, which may be negative, due to rotations to make the textlines
|
||||
// horizontal. The boxes are rotated by rotation, which should undo such
|
||||
// rotations, before mapping them onto the pix.
|
||||
static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2,
|
||||
const TBOX& im_box, const FCOORD& rotation,
|
||||
Pix* pix);
|
||||
|
||||
// Returns the number of pixels in box in the pix.
|
||||
// The im_box must represent the bounding box of the pix in tesseract
|
||||
// coordinates, which may be negative, due to rotations to make the textlines
|
||||
// horizontal. The boxes are rotated by rotation, which should undo such
|
||||
// rotations, before mapping them onto the pix.
|
||||
static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
|
||||
const FCOORD& rotation, Pix* pix);
|
||||
|
||||
|
||||
// Locates all the image partitions in the part_grid, that were found by a
|
||||
// previous call to FindImagePartitions, marks them in the image_mask,
|
||||
// removes them from the grid, and deletes them. This makes it possble to
|
||||
// call FindImagePartitions again to produce less broken-up and less
|
||||
// overlapping image partitions.
|
||||
// rerotation specifies how to rotate the partition coords to match
|
||||
// the image_mask, since this function is used after orientation correction.
|
||||
static void TransferImagePartsToImageMask(const FCOORD& rerotation,
|
||||
ColPartitionGrid* part_grid,
|
||||
Pix* image_mask);
|
||||
|
||||
// Runs a CC analysis on the image_pix mask image, and creates
|
||||
// image partitions from them, cutting out strong text, and merging with
|
||||
// nearby image regions such that they don't interfere with text.
|
||||
// Rotation and rerotation specify how to rotate image coords to match
|
||||
// the blob and partition coords and back again.
|
||||
// The input/output part_grid owns all the created partitions, and
|
||||
// the partitions own all the fake blobs that belong in the partitions.
|
||||
// Since the other blobs in the other partitions will be owned by the block,
|
||||
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
|
||||
// situation and collect the image blobs.
|
||||
static void FindImagePartitions(Pix* image_pix,
|
||||
const FCOORD& rotation,
|
||||
const FCOORD& rerotation,
|
||||
TO_BLOCK* block,
|
||||
TabFind* tab_grid,
|
||||
ColPartitionGrid* part_grid,
|
||||
ColPartition_LIST* big_parts);
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -34,129 +34,283 @@
|
||||
#endif
|
||||
#include "allheaders.h"
|
||||
|
||||
BOOL_VAR(textord_tabfind_show_vlines, false, "Show vertical rule lines");
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/// Denominator of resolution makes max pixel width to allow thin lines.
|
||||
const int kThinLineFraction = 30;
|
||||
const int kThinLineFraction = 20;
|
||||
/// Denominator of resolution makes min pixels to demand line lengths to be.
|
||||
const int kMinLineLengthFraction = 8;
|
||||
const int kMinLineLengthFraction = 4;
|
||||
/// Spacing of cracks across the page to break up tall vertical lines.
|
||||
const int kCrackSpacing = 100;
|
||||
/// Grid size used by line finder. Not very critical.
|
||||
const int kLineFindGridSize = 50;
|
||||
// Min width of a line in pixels to be considered thick.
|
||||
const int kMinThickLineWidth = 12;
|
||||
// Max size of line residue. (The pixels that fail the long thin opening, and
|
||||
// therefore don't make it to the candidate line mask, but are nevertheless
|
||||
// part of the line.)
|
||||
const int kMaxLineResidue = 6;
|
||||
// Min length in inches of a line segment that exceeds kMinThickLineWidth in
|
||||
// thickness. (Such lines shouldn't break by simple image degradation.)
|
||||
const double kThickLengthMultiple = 0.75;
|
||||
// Max fraction of line box area that can be occupied by non-line pixels.
|
||||
const double kMaxNonLineDensity = 0.25;
|
||||
// Max height of a music stave in inches.
|
||||
const double kMaxStaveHeight = 1.0;
|
||||
// Minimum fraction of pixels in a music rectangle connected to the staves.
|
||||
const double kMinMusicPixelFraction = 0.75;
|
||||
|
||||
// Finds vertical line objects in the given pix.
|
||||
// Erases the unused blobs from the line_pix image, taking into account
|
||||
// whether this was a horizontal or vertical line set.
|
||||
static void RemoveUnusedLineSegments(bool horizontal_lines,
|
||||
BLOBNBOX_LIST* line_bblobs,
|
||||
Pix* line_pix) {
|
||||
int height = pixGetHeight(line_pix);
|
||||
BLOBNBOX_IT bbox_it(line_bblobs);
|
||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
||||
BLOBNBOX* blob = bbox_it.data();
|
||||
if (blob->left_tab_type() == TT_MAYBE_ALIGNED) {
|
||||
const TBOX& box = blob->bounding_box();
|
||||
Box* pixbox = NULL;
|
||||
if (horizontal_lines) {
|
||||
// Horizontal lines are in tess format and also have x and y flipped
|
||||
// (to use FindVerticalAlignment) so we have to flip x and y and then
|
||||
// convert to Leptonica by height - flipped x (ie the right edge).
|
||||
// See GetLineBoxes for more explanation.
|
||||
pixbox = boxCreate(box.bottom(), height - box.right(),
|
||||
box.height(), box.width());
|
||||
|
||||
} else {
|
||||
// For vertical lines, just flip upside-down to convert to Leptonica.
|
||||
// The y position of the box in Leptonica terms is the distance from
|
||||
// the top of the image to the top of the box.
|
||||
pixbox = boxCreate(box.left(), height - box.top(),
|
||||
box.width(), box.height());
|
||||
}
|
||||
pixClearInRect(line_pix, pixbox);
|
||||
boxDestroy(&pixbox);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper subtracts the line_pix image from the src_pix, and removes residue
|
||||
// as well by removing components that touch the line, but are not in the
|
||||
// non_line_pix mask. It is assumed that the non_line_pix mask has already
|
||||
// been prepared to required accuracy.
|
||||
static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
|
||||
int resolution, Pix* src_pix) {
|
||||
// First remove the lines themselves.
|
||||
pixSubtract(src_pix, src_pix, line_pix);
|
||||
// Subtract the non-lines from the image to get the residue.
|
||||
Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix);
|
||||
// Dilate the lines so they touch the residue.
|
||||
Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3);
|
||||
// Seed fill the fat lines to get all the residue.
|
||||
pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
|
||||
// Subtract the residue from the original image.
|
||||
pixSubtract(src_pix, src_pix, fat_line_pix);
|
||||
pixDestroy(&fat_line_pix);
|
||||
pixDestroy(&residue_pix);
|
||||
}
|
||||
|
||||
// Returns the maximum strokewidth in the given binary image by doubling
|
||||
// the maximum of the distance function.
|
||||
static int MaxStrokeWidth(Pix* pix) {
|
||||
Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
|
||||
int width = pixGetWidth(dist_pix);
|
||||
int height = pixGetHeight(dist_pix);
|
||||
int wpl = pixGetWpl(dist_pix);
|
||||
l_uint32* data = pixGetData(dist_pix);
|
||||
// Find the maximum value in the distance image.
|
||||
int max_dist = 0;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
int pixel = GET_DATA_BYTE(data, x);
|
||||
if (pixel > max_dist)
|
||||
max_dist = pixel;
|
||||
}
|
||||
data += wpl;
|
||||
}
|
||||
pixDestroy(&dist_pix);
|
||||
return max_dist * 2;
|
||||
}
|
||||
|
||||
// Returns the number of components in the intersection_pix touched by line_box.
|
||||
static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
|
||||
if (intersection_pix == NULL) return 0;
|
||||
Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL);
|
||||
Boxa* boxa = pixConnComp(rect_pix, NULL, 8);
|
||||
pixDestroy(&rect_pix);
|
||||
if (boxa == NULL) return false;
|
||||
int result = boxaGetCount(boxa);
|
||||
boxaDestroy(&boxa);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns the number of black pixels found in the box made by adding the line
|
||||
// width to both sides of the line bounding box. (Increasing the smallest
|
||||
// dimension of the bounding box.)
|
||||
static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
|
||||
Pix* nonline_pix) {
|
||||
l_int32 x, y, box_width, box_height;
|
||||
boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
|
||||
if (box_width > box_height) {
|
||||
// horizontal line.
|
||||
int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width);
|
||||
y = MAX(0, y - line_width);
|
||||
box_height = bottom - y;
|
||||
} else {
|
||||
// Vertical line.
|
||||
int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width);
|
||||
x = MAX(0, x - line_width);
|
||||
box_width = right - x;
|
||||
}
|
||||
Box* box = boxCreate(x, y, box_width, box_height);
|
||||
Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL);
|
||||
boxDestroy(&box);
|
||||
l_int32 result;
|
||||
pixCountPixels(rect_pix, &result, NULL);
|
||||
pixDestroy(&rect_pix);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper erases false-positive line segments from the input/output line_pix.
|
||||
// 1. Since thick lines shouldn't really break up, we can eliminate some false
|
||||
// positives by marking segments that are at least kMinThickLineWidth
|
||||
// thickness, yet have a length less than min_thick_length.
|
||||
// 2. Lines that don't have at least 2 intersections with other lines and have
|
||||
// a lot of neighbouring non-lines are probably not lines (perhaps arabic
|
||||
// or Hindi words, or underlines.)
|
||||
// Bad line components are erased from line_pix.
|
||||
// Returns the number of remaining connected components.
|
||||
static int FilterFalsePositives(int resolution, Pix* nonline_pix,
|
||||
Pix* intersection_pix, Pix* line_pix) {
|
||||
int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
|
||||
Pixa* pixa = NULL;
|
||||
Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
|
||||
// Iterate over the boxes to remove false positives.
|
||||
int nboxes = boxaGetCount(boxa);
|
||||
int remaining_boxes = nboxes;
|
||||
for (int i = 0; i < nboxes; ++i) {
|
||||
Box* box = boxaGetBox(boxa, i, L_CLONE);
|
||||
l_int32 x, y, box_width, box_height;
|
||||
boxGetGeometry(box, &x, &y, &box_width, &box_height);
|
||||
Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
|
||||
int max_width = MaxStrokeWidth(comp_pix);
|
||||
pixDestroy(&comp_pix);
|
||||
bool bad_line = false;
|
||||
// If the length is too short to stand-alone as a line, and the box width
|
||||
// is thick enough, and the stroke width is thick enough it is bad.
|
||||
if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
|
||||
box_width < min_thick_length && box_height < min_thick_length &&
|
||||
max_width > kMinThickLineWidth) {
|
||||
// Too thick for the length.
|
||||
bad_line = true;
|
||||
}
|
||||
if (!bad_line &&
|
||||
(intersection_pix == NULL ||
|
||||
NumTouchingIntersections(box, intersection_pix) < 2)) {
|
||||
// Test non-line density near the line.
|
||||
int nonline_count = CountPixelsAdjacentToLine(max_width, box,
|
||||
nonline_pix);
|
||||
if (nonline_count > box_height * box_width * kMaxNonLineDensity)
|
||||
bad_line = true;
|
||||
}
|
||||
if (bad_line) {
|
||||
// Not a good line.
|
||||
pixClearInRect(line_pix, box);
|
||||
--remaining_boxes;
|
||||
}
|
||||
boxDestroy(&box);
|
||||
}
|
||||
pixaDestroy(&pixa);
|
||||
boxaDestroy(&boxa);
|
||||
return remaining_boxes;
|
||||
}
|
||||
|
||||
// Finds vertical and horizontal line objects in the given pix.
|
||||
// Uses the given resolution to determine size thresholds instead of any
|
||||
// that may be present in the pix.
|
||||
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
||||
// thereby giving the mean vertical direction.
|
||||
// If pix_music_mask != NULL, and music is detected, a mask of the staves
|
||||
// and anything that is connected (bars, notes etc.) will be returned in
|
||||
// pix_music_mask, the mask subtracted from pix, and the lines will not
|
||||
// appear in v_lines or h_lines.
|
||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
// having no boxes, as there is no need to refit or merge separator lines.
|
||||
void LineFinder::FindVerticalLines(int resolution, Pix* pix,
|
||||
int* vertical_x, int* vertical_y,
|
||||
TabVector_LIST* vectors) {
|
||||
Pix* line_pix;
|
||||
Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix);
|
||||
C_BLOB_LIST line_cblobs;
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs);
|
||||
// Make the BLOBNBOXes from the C_BLOBs.
|
||||
BLOBNBOX_LIST line_bblobs;
|
||||
C_BLOB_IT blob_it(&line_cblobs);
|
||||
BLOBNBOX_IT bbox_it(&line_bblobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
C_BLOB* cblob = blob_it.data();
|
||||
BLOBNBOX* bblob = new BLOBNBOX(cblob);
|
||||
bbox_it.add_to_end(bblob);
|
||||
// The detected lines are removed from the pix.
|
||||
void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
|
||||
int* vertical_x, int* vertical_y,
|
||||
Pix** pix_music_mask,
|
||||
TabVector_LIST* v_lines,
|
||||
TabVector_LIST* h_lines) {
|
||||
if (pix == NULL || vertical_x == NULL || vertical_y == NULL) {
|
||||
tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
|
||||
return;
|
||||
}
|
||||
ICOORD bleft(0, 0);
|
||||
ICOORD tright(width, height);
|
||||
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
|
||||
if (!vectors->empty()) {
|
||||
// Some lines were found, so erase the unused blobs from the line image
|
||||
// and then subtract the line image from the source.
|
||||
bbox_it.move_to_first();
|
||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
||||
BLOBNBOX* blob = bbox_it.data();
|
||||
if (blob->left_tab_type() == TT_UNCONFIRMED) {
|
||||
const TBOX& box = blob->bounding_box();
|
||||
Box* pixbox = boxCreate(box.left(), height - box.top(),
|
||||
box.width(), box.height());
|
||||
pixClearInRect(line_pix, pixbox);
|
||||
boxDestroy(&pixbox);
|
||||
}
|
||||
Pix* pix_vline = NULL;
|
||||
Pix* pix_non_vline = NULL;
|
||||
Pix* pix_hline = NULL;
|
||||
Pix* pix_non_hline = NULL;
|
||||
Pix* pix_intersections = NULL;
|
||||
Pixa* pixa_display = debug ? pixaCreate(0) : NULL;
|
||||
GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
|
||||
&pix_non_hline, &pix_intersections, pix_music_mask,
|
||||
pixa_display);
|
||||
// Find lines, convert to TabVector_LIST and remove those that are used.
|
||||
FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
|
||||
&pix_vline, pix_non_vline, pix, v_lines);
|
||||
if (pix_hline != NULL) {
|
||||
// Recompute intersections and re-filter false positive h-lines.
|
||||
if (pix_vline != NULL)
|
||||
pixAnd(pix_intersections, pix_vline, pix_hline);
|
||||
else
|
||||
pixDestroy(&pix_intersections);
|
||||
if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
|
||||
pix_hline)) {
|
||||
pixDestroy(&pix_hline);
|
||||
}
|
||||
pixDilateBrick(line_pix, line_pix, 1, 3);
|
||||
pixSubtract(pix, pix, line_pix);
|
||||
if (textord_tabfind_show_vlines)
|
||||
pixWrite("vlinesclean.png", line_pix, IFF_PNG);
|
||||
ICOORD vertical;
|
||||
vertical.set_with_shrink(*vertical_x, *vertical_y);
|
||||
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
||||
}
|
||||
pixDestroy(&line_pix);
|
||||
}
|
||||
FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
|
||||
&pix_hline, pix_non_hline, pix, h_lines);
|
||||
if (pixa_display != NULL && pix_vline != NULL)
|
||||
pixaAddPix(pixa_display, pix_vline, L_CLONE);
|
||||
if (pixa_display != NULL && pix_hline != NULL)
|
||||
pixaAddPix(pixa_display, pix_hline, L_CLONE);
|
||||
if (pix_vline != NULL && pix_hline != NULL) {
|
||||
// Remove joins (intersections) where lines cross, and the residue.
|
||||
// Recalculate the intersections, since some lines have been deleted.
|
||||
pixAnd(pix_intersections, pix_vline, pix_hline);
|
||||
// Fatten up the intersections and seed-fill to get the intersection
|
||||
// residue.
|
||||
Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5);
|
||||
pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
|
||||
// Now remove the intersection residue.
|
||||
pixSubtract(pix, pix, pix_join_residue);
|
||||
pixDestroy(&pix_join_residue);
|
||||
}
|
||||
// Remove any detected music.
|
||||
if (pix_music_mask != NULL && *pix_music_mask != NULL) {
|
||||
if (pixa_display != NULL)
|
||||
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
|
||||
pixSubtract(pix, pix, *pix_music_mask);
|
||||
}
|
||||
if (pixa_display != NULL)
|
||||
pixaAddPix(pixa_display, pix, L_CLONE);
|
||||
|
||||
// Finds horizontal line objects in the given pix.
|
||||
// Uses the given resolution to determine size thresholds instead of any
|
||||
// that may be present in the pix.
|
||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
// having no boxes, as there is no need to refit or merge separator lines.
|
||||
void LineFinder::FindHorizontalLines(int resolution, Pix* pix,
|
||||
TabVector_LIST* vectors) {
|
||||
Pix* line_pix;
|
||||
Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix);
|
||||
C_BLOB_LIST line_cblobs;
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs);
|
||||
// Make the BLOBNBOXes from the C_BLOBs.
|
||||
BLOBNBOX_LIST line_bblobs;
|
||||
C_BLOB_IT blob_it(&line_cblobs);
|
||||
BLOBNBOX_IT bbox_it(&line_bblobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
C_BLOB* cblob = blob_it.data();
|
||||
BLOBNBOX* bblob = new BLOBNBOX(cblob);
|
||||
bbox_it.add_to_end(bblob);
|
||||
pixDestroy(&pix_vline);
|
||||
pixDestroy(&pix_non_vline);
|
||||
pixDestroy(&pix_hline);
|
||||
pixDestroy(&pix_non_hline);
|
||||
pixDestroy(&pix_intersections);
|
||||
if (pixa_display != NULL) {
|
||||
#if LIBLEPT_MINOR_VERSION >= 69 || LIBLEPT_MAJOR_VERSION > 1
|
||||
pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
|
||||
"vhlinefinding.pdf");
|
||||
#endif
|
||||
pixaDestroy(&pixa_display);
|
||||
}
|
||||
ICOORD bleft(0, 0);
|
||||
ICOORD tright(height, width);
|
||||
int vertical_x, vertical_y;
|
||||
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
|
||||
vectors);
|
||||
if (!vectors->empty()) {
|
||||
// Some lines were found, so erase the unused blobs from the line image
|
||||
// and then subtract the line image from the source.
|
||||
bbox_it.move_to_first();
|
||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
||||
BLOBNBOX* blob = bbox_it.data();
|
||||
if (blob->left_tab_type() == TT_UNCONFIRMED) {
|
||||
const TBOX& box = blob->bounding_box();
|
||||
// Coords are in tess format so filp x and y and then covert
|
||||
// to leptonica by height -y.
|
||||
Box* pixbox = boxCreate(box.bottom(), height - box.right(),
|
||||
box.height(), box.width());
|
||||
pixClearInRect(line_pix, pixbox);
|
||||
boxDestroy(&pixbox);
|
||||
}
|
||||
}
|
||||
pixDilateBrick(line_pix, line_pix, 3, 1);
|
||||
pixSubtract(pix, pix, line_pix);
|
||||
if (textord_tabfind_show_vlines)
|
||||
pixWrite("hlinesclean.png", line_pix, IFF_PNG);
|
||||
ICOORD vertical;
|
||||
vertical.set_with_shrink(vertical_x, vertical_y);
|
||||
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
||||
// Iterate the vectors to flip them.
|
||||
TabVector_IT h_it(vectors);
|
||||
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
|
||||
h_it.data()->XYFlip();
|
||||
}
|
||||
}
|
||||
pixDestroy(&line_pix);
|
||||
}
|
||||
|
||||
// Converts the Boxa array to a list of C_BLOB, getting rid of severely
|
||||
@ -176,8 +330,8 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
|
||||
// Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
|
||||
// as there is no outline, just a bounding box, but with some very
|
||||
// small changes to coutln.cpp, it works nicely.
|
||||
ICOORD top_left(x, image_height - y);
|
||||
ICOORD bot_right(x + width, image_height - (y + height));
|
||||
ICOORD top_left(x, y);
|
||||
ICOORD bot_right(x + width, y + height);
|
||||
CRACKEDGE startpt;
|
||||
startpt.pos = top_left;
|
||||
C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
|
||||
@ -197,6 +351,85 @@ void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
|
||||
boxaDestroy(boxes);
|
||||
}
|
||||
|
||||
// Finds vertical line objects in pix_vline and removes the from src_pix.
|
||||
// Uses the given resolution to determine size thresholds instead of any
|
||||
// that may be present in the pix.
|
||||
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
||||
// thereby giving the mean vertical direction.
|
||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
// having no boxes, as there is no need to refit or merge separator lines.
|
||||
// If no good lines are found, pix_vline is destroyed.
|
||||
// None of the input pointers may be NULL, and if *pix_vline is NULL then
|
||||
// the function does nothing.
|
||||
void LineFinder::FindAndRemoveVLines(int resolution,
|
||||
Pix* pix_intersections,
|
||||
int* vertical_x, int* vertical_y,
|
||||
Pix** pix_vline, Pix* pix_non_vline,
|
||||
Pix* src_pix, TabVector_LIST* vectors) {
|
||||
if (pix_vline == NULL || *pix_vline == NULL) return;
|
||||
C_BLOB_LIST line_cblobs;
|
||||
BLOBNBOX_LIST line_bblobs;
|
||||
GetLineBoxes(false, *pix_vline, pix_intersections,
|
||||
&line_cblobs, &line_bblobs);
|
||||
int width = pixGetWidth(src_pix);
|
||||
int height = pixGetHeight(src_pix);
|
||||
ICOORD bleft(0, 0);
|
||||
ICOORD tright(width, height);
|
||||
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
|
||||
if (!vectors->empty()) {
|
||||
RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
|
||||
SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
|
||||
ICOORD vertical;
|
||||
vertical.set_with_shrink(*vertical_x, *vertical_y);
|
||||
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
||||
} else {
|
||||
pixDestroy(pix_vline);
|
||||
}
|
||||
}
|
||||
|
||||
// Finds horizontal line objects in pix_hline and removes them from src_pix.
|
||||
// Uses the given resolution to determine size thresholds instead of any
|
||||
// that may be present in the pix.
|
||||
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
||||
// thereby giving the mean vertical direction.
|
||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
// having no boxes, as there is no need to refit or merge separator lines.
|
||||
// If no good lines are found, pix_hline is destroyed.
|
||||
// None of the input pointers may be NULL, and if *pix_hline is NULL then
|
||||
// the function does nothing.
|
||||
void LineFinder::FindAndRemoveHLines(int resolution,
|
||||
Pix* pix_intersections,
|
||||
int vertical_x, int vertical_y,
|
||||
Pix** pix_hline, Pix* pix_non_hline,
|
||||
Pix* src_pix, TabVector_LIST* vectors) {
|
||||
if (pix_hline == NULL || *pix_hline == NULL) return;
|
||||
C_BLOB_LIST line_cblobs;
|
||||
BLOBNBOX_LIST line_bblobs;
|
||||
GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
|
||||
int width = pixGetWidth(src_pix);
|
||||
int height = pixGetHeight(src_pix);
|
||||
ICOORD bleft(0, 0);
|
||||
ICOORD tright(height, width);
|
||||
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
|
||||
vectors);
|
||||
if (!vectors->empty()) {
|
||||
RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
|
||||
SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
|
||||
ICOORD vertical;
|
||||
vertical.set_with_shrink(vertical_x, vertical_y);
|
||||
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
||||
// Iterate the vectors to flip them. x and y were flipped for horizontal
|
||||
// lines, so FindLineVectors can work just with the vertical case.
|
||||
// See GetLineBoxes for more on the flip.
|
||||
TabVector_IT h_it(vectors);
|
||||
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
|
||||
h_it.data()->XYFlip();
|
||||
}
|
||||
} else {
|
||||
pixDestroy(pix_hline);
|
||||
}
|
||||
}
|
||||
|
||||
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
||||
// are the bounds of the image on which the input line_bblobs were found.
|
||||
// The input line_bblobs list is const really.
|
||||
@ -213,7 +446,7 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
|
||||
AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
|
||||
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
||||
BLOBNBOX* bblob = bbox_it.data();
|
||||
bblob->set_left_tab_type(TT_UNCONFIRMED);
|
||||
bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
|
||||
bblob->set_left_rule(bleft.x());
|
||||
bblob->set_right_rule(tright.x());
|
||||
bblob->set_left_crossing_rule(bleft.x());
|
||||
@ -221,20 +454,18 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
|
||||
blob_grid.InsertBBox(false, true, bblob);
|
||||
++b_count;
|
||||
}
|
||||
if (textord_debug_tabfind)
|
||||
tprintf("Inserted %d line blobs into grid\n", b_count);
|
||||
if (b_count == 0)
|
||||
return;
|
||||
|
||||
// Search the entire grid, looking for vertical line vectors.
|
||||
GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid);
|
||||
BlobGridSearch lsearch(&blob_grid);
|
||||
BLOBNBOX* bbox;
|
||||
TabVector_IT vector_it(vectors);
|
||||
*vertical_x = 0;
|
||||
*vertical_y = 1;
|
||||
lsearch.StartFullSearch();
|
||||
while ((bbox = lsearch.NextFullSearch()) != NULL) {
|
||||
if (bbox->left_tab_type() == TT_UNCONFIRMED) {
|
||||
if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
|
||||
const TBOX& box = bbox->bounding_box();
|
||||
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
|
||||
tprintf("Finding line vector starting at bbox (%d,%d)\n",
|
||||
@ -249,89 +480,268 @@ void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
|
||||
}
|
||||
}
|
||||
}
|
||||
ScrollView* line_win = NULL;
|
||||
if (textord_tabfind_show_vlines) {
|
||||
line_win = blob_grid.MakeWindow(0, 50, "Vlines");
|
||||
blob_grid.DisplayBoxes(line_win);
|
||||
line_win = blob_grid.DisplayTabs("Vlines", line_win);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a set of bounding boxes of possible vertical lines in the image.
|
||||
// The input resolution overrides any resolution set in src_pix.
|
||||
// The output line_pix contains just all the detected lines.
|
||||
Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
|
||||
// Remove any parts of 1 inch/kThinLineFraction wide or more, by opening
|
||||
// away the thin lines and subtracting what's left.
|
||||
// This is very generous and will leave in even quite wide lines.
|
||||
Pix* pixt1 = pixOpenBrick(NULL, src_pix, resolution / kThinLineFraction, 1);
|
||||
pixSubtract(pixt1, src_pix, pixt1);
|
||||
// Spread sideways to allow for some skew.
|
||||
Pix* pixt2 = pixDilateBrick(NULL, pixt1, 3, 1);
|
||||
// Now keep only tall stuff of height at least 1 inch/kMinLineLengthFraction.
|
||||
pixOpenBrick(pixt1, pixt2, 1, resolution / kMinLineLengthFraction);
|
||||
pixDestroy(&pixt2);
|
||||
// Put a single pixel crack in every line at an arbitrary spacing,
|
||||
// so they break up and the bounding boxes can be used to get the
|
||||
// direction accurately enough without needing outlines.
|
||||
int wpl = pixGetWpl(pixt1);
|
||||
int height = pixGetHeight(pixt1);
|
||||
l_uint32* data = pixGetData(pixt1);
|
||||
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
|
||||
memset(data + wpl * y, 0, wpl * sizeof(*data));
|
||||
// Returns a Pix music mask if music is detected.
|
||||
// Any vertical line that has at least 5 intersections in sufficient density
|
||||
// is taken to be a bar. Bars are used as a seed and the entire touching
|
||||
// component is added to the output music mask and subtracted from the lines.
|
||||
// Returns NULL and does minimal work if no music is found.
|
||||
static Pix* FilterMusic(int resolution, Pix* pix_closed,
|
||||
Pix* pix_vline, Pix* pix_hline,
|
||||
l_int32* v_empty, l_int32* h_empty) {
|
||||
int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
|
||||
Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline);
|
||||
Boxa* boxa = pixConnComp(pix_vline, NULL, 8);
|
||||
// Iterate over the boxes to find music bars.
|
||||
int nboxes = boxaGetCount(boxa);
|
||||
Pix* music_mask = NULL;
|
||||
for (int i = 0; i < nboxes; ++i) {
|
||||
Box* box = boxaGetBox(boxa, i, L_CLONE);
|
||||
l_int32 x, y, box_width, box_height;
|
||||
boxGetGeometry(box, &x, &y, &box_width, &box_height);
|
||||
int joins = NumTouchingIntersections(box, intersection_pix);
|
||||
// Test for the join density being at least 5 per max_stave_height,
|
||||
// ie (joins-1)/box_height >= (5-1)/max_stave_height.
|
||||
if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
|
||||
// This is a music bar. Add to the mask.
|
||||
if (music_mask == NULL)
|
||||
music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
|
||||
1);
|
||||
pixSetInRect(music_mask, box);
|
||||
}
|
||||
boxDestroy(&box);
|
||||
}
|
||||
if (textord_tabfind_show_vlines)
|
||||
pixWrite("vlines.png", pixt1, IFF_PNG);
|
||||
Boxa* boxa = pixConnComp(pixt1, NULL, 8);
|
||||
*line_pix = pixt1;
|
||||
return boxa;
|
||||
}
|
||||
|
||||
// Get a set of bounding boxes of possible horizontal lines in the image.
|
||||
// The input resolution overrides any resolution set in src_pix.
|
||||
// The output line_pix contains just all the detected lines.
|
||||
// The output boxes undergo the transformation (x,y)->(height-y,x) so the
|
||||
// lines can be found with a vertical line finder afterwards.
|
||||
// This transformation allows a simple x/y flip to reverse it in tesseract
|
||||
// coordinates and it is faster to flip the lines than rotate the image.
|
||||
Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
|
||||
// Remove any parts of 1 inch/kThinLineFraction high or more, by opening
|
||||
// away the thin lines and subtracting what's left.
|
||||
// This is very generous and will leave in even quite wide lines.
|
||||
Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction);
|
||||
pixSubtract(pixt1, src_pix, pixt1);
|
||||
// Spread vertically to allow for some skew.
|
||||
Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3);
|
||||
// Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction.
|
||||
pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1);
|
||||
pixDestroy(&pixt2);
|
||||
// Put a single pixel crack in every line at an arbitrary spacing,
|
||||
// so they break up and the bounding boxes can be used to get the
|
||||
// direction accurately enough without needing outlines.
|
||||
int wpl = pixGetWpl(pixt1);
|
||||
int width = pixGetWidth(pixt1);
|
||||
int height = pixGetHeight(pixt1);
|
||||
l_uint32* data = pixGetData(pixt1);
|
||||
for (int y = 0; y < height; ++y, data += wpl) {
|
||||
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
|
||||
CLEAR_DATA_BIT(data, x);
|
||||
boxaDestroy(&boxa);
|
||||
pixDestroy(&intersection_pix);
|
||||
if (music_mask != NULL) {
|
||||
// The mask currently contains just the bars. Use the mask as a seed
|
||||
// and the pix_closed as the mask for a seedfill to get all the
|
||||
// intersecting staves.
|
||||
pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
|
||||
// Filter out false positives. CCs in the music_mask should be the vast
|
||||
// majority of the pixels in their bounding boxes, as we expect just a
|
||||
// tiny amount of text, a few phrase marks, and crescendo etc left.
|
||||
Boxa* boxa = pixConnComp(music_mask, NULL, 8);
|
||||
// Iterate over the boxes to find music components.
|
||||
int nboxes = boxaGetCount(boxa);
|
||||
for (int i = 0; i < nboxes; ++i) {
|
||||
Box* box = boxaGetBox(boxa, i, L_CLONE);
|
||||
Pix* rect_pix = pixClipRectangle(music_mask, box, NULL);
|
||||
l_int32 music_pixels;
|
||||
pixCountPixels(rect_pix, &music_pixels, NULL);
|
||||
pixDestroy(&rect_pix);
|
||||
rect_pix = pixClipRectangle(pix_closed, box, NULL);
|
||||
l_int32 all_pixels;
|
||||
pixCountPixels(rect_pix, &all_pixels, NULL);
|
||||
pixDestroy(&rect_pix);
|
||||
if (music_pixels < kMinMusicPixelFraction * all_pixels) {
|
||||
// False positive. Delete from the music mask.
|
||||
pixClearInRect(music_mask, box);
|
||||
}
|
||||
boxDestroy(&box);
|
||||
}
|
||||
l_int32 no_remaining_music;
|
||||
boxaDestroy(&boxa);
|
||||
pixZero(music_mask, &no_remaining_music);
|
||||
if (no_remaining_music) {
|
||||
pixDestroy(&music_mask);
|
||||
} else {
|
||||
pixSubtract(pix_vline, pix_vline, music_mask);
|
||||
pixSubtract(pix_hline, pix_hline, music_mask);
|
||||
// We may have deleted all the lines
|
||||
pixZero(pix_vline, v_empty);
|
||||
pixZero(pix_hline, h_empty);
|
||||
}
|
||||
}
|
||||
if (textord_tabfind_show_vlines)
|
||||
pixWrite("hlines.png", pixt1, IFF_PNG);
|
||||
Boxa* boxa = pixConnComp(pixt1, NULL, 8);
|
||||
*line_pix = pixt1;
|
||||
return music_mask;
|
||||
}
|
||||
|
||||
// Iterate the boxes to flip x and y.
|
||||
int nboxes = boxaGetCount(boxa);
|
||||
for (int i = 0; i < nboxes; ++i) {
|
||||
l_int32 x, y, box_width, box_height;
|
||||
boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height);
|
||||
Box* box = boxCreate(height - (y + box_height),
|
||||
width - (x + box_width), box_height, box_width);
|
||||
boxaReplaceBox(boxa, i, box);
|
||||
// Most of the heavy lifting of line finding. Given src_pix and its separate
|
||||
// resolution, returns image masks:
|
||||
// pix_vline candidate vertical lines.
|
||||
// pix_non_vline pixels that didn't look like vertical lines.
|
||||
// pix_hline candidate horizontal lines.
|
||||
// pix_non_hline pixels that didn't look like horizontal lines.
|
||||
// pix_intersections pixels where vertical and horizontal lines meet.
|
||||
// pix_music_mask candidate music staves.
|
||||
// This function promises to initialize all the output (2nd level) pointers,
|
||||
// but any of the returns that are empty will be NULL on output.
|
||||
// None of the input (1st level) pointers may be NULL except pix_music_mask,
|
||||
// which will disable music detection, and pixa_display.
|
||||
void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
|
||||
Pix** pix_vline, Pix** pix_non_vline,
|
||||
Pix** pix_hline, Pix** pix_non_hline,
|
||||
Pix** pix_intersections, Pix** pix_music_mask,
|
||||
Pixa* pixa_display) {
|
||||
int max_line_width = resolution / kThinLineFraction;
|
||||
int min_line_length = resolution / kMinLineLengthFraction;
|
||||
if (pixa_display != NULL) {
|
||||
tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
|
||||
resolution, max_line_width, min_line_length);
|
||||
}
|
||||
int closing_brick = max_line_width / 3;
|
||||
|
||||
// Close up small holes, making it less likely that false alarms are found
|
||||
// in thickened text (as it will become more solid) and also smoothing over
|
||||
// some line breaks and nicks in the edges of the lines.
|
||||
Pix* pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
|
||||
if (pixa_display != NULL)
|
||||
pixaAddPix(pixa_display, pix_closed, L_CLONE);
|
||||
// Open up with a big box to detect solid areas, which can then be subtracted.
|
||||
// This is very generous and will leave in even quite wide lines.
|
||||
Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width,
|
||||
max_line_width);
|
||||
if (pixa_display != NULL)
|
||||
pixaAddPix(pixa_display, pix_solid, L_CLONE);
|
||||
Pix* pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
|
||||
pixDestroy(&pix_solid);
|
||||
// Now open up in both directions independently to find lines of at least
|
||||
// 1 inch/kMinLineLengthFraction in length.
|
||||
if (pixa_display != NULL)
|
||||
pixaAddPix(pixa_display, pix_hollow, L_CLONE);
|
||||
*pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
|
||||
*pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
|
||||
pixDestroy(&pix_hollow);
|
||||
// Lines are sufficiently rare, that it is worth checking for a zero image.
|
||||
l_int32 v_empty = 0;
|
||||
l_int32 h_empty = 0;
|
||||
pixZero(*pix_vline, &v_empty);
|
||||
pixZero(*pix_hline, &h_empty);
|
||||
if (pix_music_mask != NULL) {
|
||||
if (!v_empty && !h_empty) {
|
||||
*pix_music_mask = FilterMusic(resolution, pix_closed,
|
||||
*pix_vline, *pix_hline,
|
||||
&v_empty, &h_empty);
|
||||
} else {
|
||||
*pix_music_mask = NULL;
|
||||
}
|
||||
}
|
||||
pixDestroy(&pix_closed);
|
||||
Pix* pix_nonlines = NULL;
|
||||
*pix_intersections = NULL;
|
||||
Pix* extra_non_hlines = NULL;
|
||||
if (!v_empty) {
|
||||
// Subtract both line candidates from the source to get definite non-lines.
|
||||
pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline);
|
||||
if (!h_empty) {
|
||||
pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
|
||||
// Intersections are a useful indicator for likelihood of being a line.
|
||||
*pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline);
|
||||
// Candidate vlines are not hlines (apart from the intersections)
|
||||
// and vice versa.
|
||||
extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections);
|
||||
}
|
||||
*pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1);
|
||||
pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
|
||||
if (!h_empty) {
|
||||
// Candidate hlines are not vlines.
|
||||
pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
|
||||
pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
|
||||
}
|
||||
if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
|
||||
*pix_vline))
|
||||
pixDestroy(pix_vline); // No candidates left.
|
||||
} else {
|
||||
// No vertical lines.
|
||||
pixDestroy(pix_vline);
|
||||
*pix_non_vline = NULL;
|
||||
if (!h_empty) {
|
||||
pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline);
|
||||
}
|
||||
}
|
||||
if (h_empty) {
|
||||
pixDestroy(pix_hline);
|
||||
*pix_non_hline = NULL;
|
||||
if (v_empty) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
*pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue);
|
||||
pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
|
||||
if (extra_non_hlines != NULL) {
|
||||
pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
|
||||
pixDestroy(&extra_non_hlines);
|
||||
}
|
||||
if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
|
||||
*pix_hline))
|
||||
pixDestroy(pix_hline); // No candidates left.
|
||||
}
|
||||
if (pixa_display != NULL) {
|
||||
if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
|
||||
if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
|
||||
if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
|
||||
if (*pix_non_vline != NULL)
|
||||
pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
|
||||
if (*pix_non_hline != NULL)
|
||||
pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
|
||||
if (*pix_intersections != NULL)
|
||||
pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
|
||||
if (pix_music_mask != NULL && *pix_music_mask != NULL)
|
||||
pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
|
||||
}
|
||||
pixDestroy(&pix_nonlines);
|
||||
}
|
||||
|
||||
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
||||
// the line_crossings member of the boxes so we can later determin the number
|
||||
// of intersections touched by a full line.
|
||||
void LineFinder::GetLineBoxes(bool horizontal_lines,
|
||||
Pix* pix_lines, Pix* pix_intersections,
|
||||
C_BLOB_LIST* line_cblobs,
|
||||
BLOBNBOX_LIST* line_bblobs) {
|
||||
// Put a single pixel crack in every line at an arbitrary spacing,
|
||||
// so they break up and the bounding boxes can be used to get the
|
||||
// direction accurately enough without needing outlines.
|
||||
int wpl = pixGetWpl(pix_lines);
|
||||
int width = pixGetWidth(pix_lines);
|
||||
int height = pixGetHeight(pix_lines);
|
||||
l_uint32* data = pixGetData(pix_lines);
|
||||
if (horizontal_lines) {
|
||||
for (int y = 0; y < height; ++y, data += wpl) {
|
||||
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
|
||||
CLEAR_DATA_BIT(data, x);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
|
||||
memset(data + wpl * y, 0, wpl * sizeof(*data));
|
||||
}
|
||||
}
|
||||
// Get the individual connected components
|
||||
Boxa* boxa = pixConnComp(pix_lines, NULL, 8);
|
||||
ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
|
||||
// Make the BLOBNBOXes from the C_BLOBs.
|
||||
C_BLOB_IT blob_it(line_cblobs);
|
||||
BLOBNBOX_IT bbox_it(line_bblobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
C_BLOB* cblob = blob_it.data();
|
||||
BLOBNBOX* bblob = new BLOBNBOX(cblob);
|
||||
bbox_it.add_to_end(bblob);
|
||||
// Determine whether the line segment touches two intersections.
|
||||
const TBOX& bbox = bblob->bounding_box();
|
||||
Box* box = boxCreate(bbox.left(), bbox.bottom(),
|
||||
bbox.width(), bbox.height());
|
||||
bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
|
||||
boxDestroy(&box);
|
||||
// Transform the bounding box prior to finding lines. To save writing
|
||||
// two line finders, flip x and y for horizontal lines and re-use the
|
||||
// tab-stop detection code. For vertical lines we still have to flip the
|
||||
// y-coordinates to switch from leptonica coords to tesseract coords.
|
||||
if (horizontal_lines) {
|
||||
// Note that we have Leptonica coords stored in a Tesseract box, so that
|
||||
// bbox.bottom(), being the MIN y coord, is actually the top, so to get
|
||||
// back to Leptonica coords in RemoveUnusedLineSegments, we have to
|
||||
// use height - box.right() as the top, which looks very odd.
|
||||
TBOX new_box(height - bbox.top(), bbox.left(),
|
||||
height - bbox.bottom(), bbox.right());
|
||||
bblob->set_bounding_box(new_box);
|
||||
} else {
|
||||
TBOX new_box(bbox.left(), height - bbox.top(),
|
||||
bbox.right(), height - bbox.bottom());
|
||||
bblob->set_bounding_box(new_box);
|
||||
}
|
||||
}
|
||||
return boxa;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -21,8 +21,9 @@
|
||||
#ifndef TESSERACT_TEXTORD_LINEFIND_H__
|
||||
#define TESSERACT_TEXTORD_LINEFIND_H__
|
||||
|
||||
struct Pix;
|
||||
struct Boxa;
|
||||
struct Pix;
|
||||
struct Pixa;
|
||||
class C_BLOB_LIST;
|
||||
class BLOBNBOX_LIST;
|
||||
class ICOORD;
|
||||
@ -38,7 +39,8 @@ class TabVector_LIST;
|
||||
class LineFinder {
|
||||
public:
|
||||
/**
|
||||
* Finds vertical line objects in the given pix.
|
||||
* Finds vertical and horizontal line objects in the given pix and removes
|
||||
* them.
|
||||
*
|
||||
* Uses the given resolution to determine size thresholds instead of any
|
||||
* that may be present in the pix.
|
||||
@ -46,24 +48,21 @@ class LineFinder {
|
||||
* The output vertical_x and vertical_y contain a sum of the output vectors,
|
||||
* thereby giving the mean vertical direction.
|
||||
*
|
||||
* The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
* having no boxes, as there is no need to refit or merge separator lines.
|
||||
*/
|
||||
static void FindVerticalLines(int resolution, Pix* pix,
|
||||
int* vertical_x, int* vertical_y,
|
||||
TabVector_LIST* vectors);
|
||||
|
||||
/**
|
||||
* Finds horizontal line objects in the given pix.
|
||||
*
|
||||
* Uses the given resolution to determine size thresholds instead of any
|
||||
* that may be present in the pix.
|
||||
* If pix_music_mask != NULL, and music is detected, a mask of the staves
|
||||
* and anything that is connected (bars, notes etc.) will be returned in
|
||||
* pix_music_mask, the mask subtracted from pix, and the lines will not
|
||||
* appear in v_lines or h_lines.
|
||||
*
|
||||
* The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
* having no boxes, as there is no need to refit or merge separator lines.
|
||||
*
|
||||
* The detected lines are removed from the pix.
|
||||
*/
|
||||
static void FindHorizontalLines(int resolution, Pix* pix,
|
||||
TabVector_LIST* vectors);
|
||||
static void FindAndRemoveLines(int resolution, bool debug, Pix* pix,
|
||||
int* vertical_x, int* vertical_y,
|
||||
Pix** pix_music_mask,
|
||||
TabVector_LIST* v_lines,
|
||||
TabVector_LIST* h_lines);
|
||||
|
||||
/**
|
||||
* Converts the Boxa array to a list of C_BLOB, getting rid of severely
|
||||
@ -78,43 +77,71 @@ class LineFinder {
|
||||
Boxa** boxes, C_BLOB_LIST* blobs);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
||||
* are the bounds of the image on which the input line_bblobs were found.
|
||||
*
|
||||
* The input line_bblobs list is const really.
|
||||
*
|
||||
* The output vertical_x and vertical_y are the total of all the vectors.
|
||||
* The output list of TabVector makes no reference to the input BLOBNBOXes.
|
||||
*/
|
||||
// Finds vertical line objects in pix_vline and removes them from src_pix.
|
||||
// Uses the given resolution to determine size thresholds instead of any
|
||||
// that may be present in the pix.
|
||||
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
||||
// thereby giving the mean vertical direction.
|
||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
// having no boxes, as there is no need to refit or merge separator lines.
|
||||
// If no good lines are found, pix_vline is destroyed.
|
||||
static void FindAndRemoveVLines(int resolution,
|
||||
Pix* pix_intersections,
|
||||
int* vertical_x, int* vertical_y,
|
||||
Pix** pix_vline, Pix* pix_non_vline,
|
||||
Pix* src_pix, TabVector_LIST* vectors);
|
||||
|
||||
|
||||
// Finds horizontal line objects in pix_vline and removes them from src_pix.
|
||||
// Uses the given resolution to determine size thresholds instead of any
|
||||
// that may be present in the pix.
|
||||
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
||||
// thereby giving the mean vertical direction.
|
||||
// The output vectors are owned by the list and Frozen (cannot refit) by
|
||||
// having no boxes, as there is no need to refit or merge separator lines.
|
||||
// If no good lines are found, pix_hline is destroyed.
|
||||
static void FindAndRemoveHLines(int resolution,
|
||||
Pix* pix_intersections,
|
||||
int vertical_x, int vertical_y,
|
||||
Pix** pix_hline, Pix* pix_non_hline,
|
||||
Pix* src_pix, TabVector_LIST* vectors);
|
||||
|
||||
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
||||
// are the bounds of the image on which the input line_bblobs were found.
|
||||
// The input line_bblobs list is const really.
|
||||
// The output vertical_x and vertical_y are the total of all the vectors.
|
||||
// The output list of TabVector makes no reference to the input BLOBNBOXes.
|
||||
static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
|
||||
BLOBNBOX_LIST* line_bblobs,
|
||||
int* vertical_x, int* vertical_y,
|
||||
TabVector_LIST* vectors);
|
||||
|
||||
/**
|
||||
* Get a set of bounding boxes of possible vertical lines in the image.
|
||||
*
|
||||
* The input resolution overrides any resolution set in src_pix.
|
||||
*
|
||||
* The output line_pix contains just all the detected lines.
|
||||
*/
|
||||
static Boxa* GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix);
|
||||
// Most of the heavy lifting of line finding. Given src_pix and its separate
|
||||
// resolution, returns image masks:
|
||||
// Returns image masks:
|
||||
// pix_vline candidate vertical lines.
|
||||
// pix_non_vline pixels that didn't look like vertical lines.
|
||||
// pix_hline candidate horizontal lines.
|
||||
// pix_non_hline pixels that didn't look like horizontal lines.
|
||||
// pix_intersections pixels where vertical and horizontal lines meet.
|
||||
// pix_music_mask candidate music staves.
|
||||
// This function promises to initialize all the output (2nd level) pointers,
|
||||
// but any of the returns that are empty will be NULL on output.
|
||||
// None of the input (1st level) pointers may be NULL except pix_music_mask,
|
||||
// which will disable music detection, and pixa_display, which is for debug.
|
||||
static void GetLineMasks(int resolution, Pix* src_pix,
|
||||
Pix** pix_vline, Pix** pix_non_vline,
|
||||
Pix** pix_hline, Pix** pix_non_hline,
|
||||
Pix** pix_intersections, Pix** pix_music_mask,
|
||||
Pixa* pixa_display);
|
||||
|
||||
/**
|
||||
* Get a set of bounding boxes of possible horizontal lines in the image.
|
||||
*
|
||||
* The input resolution overrides any resolution set in src_pix.
|
||||
*
|
||||
* The output line_pix contains just all the detected lines.
|
||||
*
|
||||
* The output boxes undergo the transformation (x,y)->(height-y,x) so the
|
||||
* lines can be found with a vertical line finder afterwards.
|
||||
*
|
||||
* This transformation allows a simple x/y flip to reverse it in tesseract
|
||||
* coordinates and it is faster to flip the lines than rotate the image.
|
||||
*/
|
||||
static Boxa* GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix);
|
||||
// Returns a list of boxes corresponding to the candidate line segments. Sets
|
||||
// the line_crossings member of the boxes so we can later determin the number
|
||||
// of intersections touched by a full line.
|
||||
static void GetLineBoxes(bool horizontal_lines,
|
||||
Pix* pix_lines, Pix* pix_intersections,
|
||||
C_BLOB_LIST* line_cblobs,
|
||||
BLOBNBOX_LIST* line_bblobs);
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -312,6 +312,9 @@ void compute_page_skew( //get average gradient
|
||||
blob_count = 0;
|
||||
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
||||
block_it.forward ()) {
|
||||
POLY_BLOCK* pb = block_it.data()->block->poly_block();
|
||||
if (pb != NULL && !pb->IsText())
|
||||
continue; // Pretend non-text blocks don't exist.
|
||||
row_count += block_it.data ()->get_rows ()->length ();
|
||||
//count up rows
|
||||
row_it.set_to_list (block_it.data ()->get_rows ());
|
||||
@ -332,6 +335,9 @@ void compute_page_skew( //get average gradient
|
||||
row_index = 0;
|
||||
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
||||
block_it.forward ()) {
|
||||
POLY_BLOCK* pb = block_it.data()->block->poly_block();
|
||||
if (pb != NULL && !pb->IsText())
|
||||
continue; // Pretend non-text blocks don't exist.
|
||||
row_it.set_to_list (block_it.data ()->get_rows ());
|
||||
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
||||
row = row_it.data ();
|
||||
@ -359,6 +365,9 @@ void compute_page_skew( //get average gradient
|
||||
//desperate
|
||||
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
||||
block_it.forward ()) {
|
||||
POLY_BLOCK* pb = block_it.data()->block->poly_block();
|
||||
if (pb != NULL && !pb->IsText())
|
||||
continue; // Pretend non-text blocks don't exist.
|
||||
row_it.set_to_list (block_it.data ()->get_rows ());
|
||||
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
|
||||
row_it.forward ()) {
|
||||
@ -593,8 +602,11 @@ void Textord::cleanup_rows_fitting(ICOORD page_tr, // top right
|
||||
if (textord_heavy_nr) {
|
||||
vigorous_noise_removal(block);
|
||||
}
|
||||
separate_underlines(block, gradient, rotation, testing_on);
|
||||
pre_associate_blobs(page_tr, block, rotation, testing_on);
|
||||
POLY_BLOCK* pb = block->block->poly_block();
|
||||
if (pb == NULL || pb->IsText()) {
|
||||
separate_underlines(block, gradient, rotation, testing_on);
|
||||
pre_associate_blobs(page_tr, block, rotation, testing_on);
|
||||
}
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
if (textord_show_final_rows && testing_on) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -20,38 +20,55 @@
|
||||
#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H__
|
||||
#define TESSERACT_TEXTORD_STROKEWIDTH_H__
|
||||
|
||||
#include "bbgrid.h" // Base class.
|
||||
#include "blobbox.h" // BlobNeighourDir.
|
||||
#include "tabvector.h" // For BLOBNBOX_CLIST.
|
||||
#include "blobgrid.h" // Base class.
|
||||
#include "colpartitiongrid.h"
|
||||
#include "textlineprojection.h"
|
||||
|
||||
class TO_BLOCK;
|
||||
class DENORM;
|
||||
class ScrollView;
|
||||
class TO_BLOCK;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class ColPartition_LIST;
|
||||
class TabFind;
|
||||
class TextlineProjection;
|
||||
|
||||
// Misc enums to clarify bool arguments for direction-controlling args.
|
||||
enum LeftOrRight {
|
||||
LR_LEFT,
|
||||
LR_RIGHT
|
||||
};
|
||||
|
||||
/**
|
||||
* The StrokeWidth class holds all the normal and large blobs.
|
||||
* It is used to find good large blobs and move them to the normal blobs
|
||||
* by virtue of having a reasonable strokewidth compatible neighbour.
|
||||
*/
|
||||
class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
class StrokeWidth : public BlobGrid {
|
||||
public:
|
||||
StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
|
||||
virtual ~StrokeWidth();
|
||||
|
||||
// Sets the neighbours member of the medium-sized blobs in the block.
|
||||
// Searches on 4 sides of each blob for similar-sized, similar-strokewidth
|
||||
// blobs and sets pointers to the good neighbours.
|
||||
void SetNeighboursOnMediumBlobs(TO_BLOCK* block);
|
||||
|
||||
// Sets the neighbour/textline writing direction members of the medium
|
||||
// and large blobs with optional repair of broken CJK characters first.
|
||||
// Repair of broken CJK is needed here because broken CJK characters
|
||||
// can fool the textline direction detection algorithm.
|
||||
void FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
|
||||
TO_BLOCK* input_block);
|
||||
|
||||
// To save computation, the process of generating partitions is broken
|
||||
// into the following 4 steps:
|
||||
// TestVerticalTextDirection
|
||||
// CorrectForRotation (used only if a rotation is to be applied)
|
||||
// FindLeaderPartitions
|
||||
// TODO(rays) Coming soon:
|
||||
// GradeBlobsIntoPartitions.
|
||||
// which will replace entirely the old call sequence of:
|
||||
// InsertBlobsOld
|
||||
// MoveGoodLargeBlobs.
|
||||
// These functions are all required, in sequence, except for
|
||||
// CorrectForRotation, which is not needed if no rotation is applied.
|
||||
|
||||
@ -59,36 +76,50 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
// returns true if the majority are vertical.
|
||||
// If the blobs are rotated, it is necessary to call CorrectForRotation
|
||||
// after rotating everything, otherwise the work done here will be enough.
|
||||
// If cjk_merge is true, it will attempt to merge broken cjk characters.
|
||||
// If osd_blobs is not null, a list of blobs from the dominant textline
|
||||
// direction are returned for use in orientation and script detection.
|
||||
bool TestVerticalTextDirection(bool cjk_merge,
|
||||
TO_BLOCK* block, TabFind* line_grid,
|
||||
bool TestVerticalTextDirection(TO_BLOCK* block,
|
||||
BLOBNBOX_CLIST* osd_blobs);
|
||||
|
||||
// Corrects the data structures for the given rotation.
|
||||
void CorrectForRotation(const FCOORD& rotation, TO_BLOCK* block);
|
||||
void CorrectForRotation(const FCOORD& rerotation,
|
||||
ColPartitionGrid* part_grid);
|
||||
|
||||
// Finds leader partitions and inserts them into the give grid.
|
||||
void FindLeaderPartitions(TO_BLOCK* block, TabFind* line_grid);
|
||||
void FindLeaderPartitions(TO_BLOCK* block,
|
||||
ColPartitionGrid* part_grid);
|
||||
|
||||
// Finds and marks noise those blobs that look like bits of vertical lines
|
||||
// that would otherwise screw up layout analysis.
|
||||
void RemoveLineResidue(ColPartition_LIST* big_part_list);
|
||||
|
||||
// Types all the blobs as vertical text or horizontal text or unknown and
|
||||
// puts them into initial ColPartitions in the supplied part_grid.
|
||||
// rerotation determines how to get back to the image coordinates from the
|
||||
// blob coordinates (since they may have been rotated for vertical text).
|
||||
// block is the single block for the whole page or rectangle to be OCRed.
|
||||
// nontext_pix (full-size), is a binary mask used to prevent merges across
|
||||
// photo/text boundaries. It is not kept beyond this function.
|
||||
// denorm provides a mapping back to the image from the current blob
|
||||
// coordinate space.
|
||||
// projection provides a measure of textline density over the image and
|
||||
// provides functions to assist with diacritic detection. It should be a
|
||||
// pointer to a new TextlineProjection, and will be setup here.
|
||||
// part_grid is the output grid of textline partitions.
|
||||
// Large blobs that cause overlap are put in separate partitions and added
|
||||
// to the big_parts list.
|
||||
void GradeBlobsIntoPartitions(const FCOORD& rerotation,
|
||||
TO_BLOCK* block,
|
||||
Pix* nontext_pix,
|
||||
const DENORM* denorm,
|
||||
TextlineProjection* projection,
|
||||
ColPartitionGrid* part_grid,
|
||||
ColPartition_LIST* big_parts);
|
||||
|
||||
// Handles a click event in a display window.
|
||||
virtual void HandleClick(int x, int y);
|
||||
|
||||
// Puts the block blobs (normal and large) into the grid.
|
||||
void InsertBlobsOld(TO_BLOCK* block, TabFind* line_grid);
|
||||
|
||||
// Moves the large blobs that have good stroke-width neighbours to the normal
|
||||
// blobs list.
|
||||
void MoveGoodLargeBlobs(int resolution, TO_BLOCK* block);
|
||||
|
||||
private:
|
||||
// Reorganize the blob lists with a different definition of small, medium
|
||||
// and large, compared to the original definition.
|
||||
// Height is still the primary filter key, but medium width blobs of small
|
||||
// height become medium, and very wide blobs of small height stay small.
|
||||
void ReFilterBlobs(TO_BLOCK* block);
|
||||
|
||||
// Computes the noise_density_ by summing the number of elements in a
|
||||
// neighbourhood of each grid cell.
|
||||
void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
|
||||
@ -96,20 +127,25 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
// Detects and marks leader dots/dashes.
|
||||
// Leaders are horizontal chains of small or noise blobs that look
|
||||
// monospace according to ColPartition::MarkAsLeaderIfMonospaced().
|
||||
// Detected leaders become the only occupants of small_blobs list.
|
||||
// Detected leaders become the only occupants of the block->small_blobs list.
|
||||
// Non-leader small blobs get moved to the blobs list.
|
||||
// Non-leader noise blobs remain singletons in the noise list.
|
||||
// All small and noise blobs in high density regions are marked BTFT_NONTEXT.
|
||||
void FindLeadersAndMarkNoise(bool final, TO_BLOCK* block, TabFind* line_grid,
|
||||
// block is the single block for the whole page or rectangle to be OCRed.
|
||||
// leader_parts is the output.
|
||||
void FindLeadersAndMarkNoise(TO_BLOCK* block,
|
||||
ColPartition_LIST* leader_parts);
|
||||
|
||||
// Puts the block blobs (normal and large) into the grid.
|
||||
void InsertBlobs(TO_BLOCK* block, TabFind* line_grid);
|
||||
/** Inserts the block blobs (normal and large) into this grid.
|
||||
* Blobs remain owned by the block. */
|
||||
void InsertBlobs(TO_BLOCK* block);
|
||||
|
||||
// Fix broken CJK characters, using the fake joined blobs mechanism.
|
||||
// Blobs are really merged, ie the master takes all the outlines and the
|
||||
// others are deleted.
|
||||
void FixBrokenCJK(BLOBNBOX_LIST* blobs, TabFind* line_grid);
|
||||
// Returns true if sufficient blobs are merged that it may be worth running
|
||||
// again, due to a better estimate of character size.
|
||||
bool FixBrokenCJK(TO_BLOCK* block);
|
||||
|
||||
// Collect blobs that overlap or are within max_dist of the input bbox.
|
||||
// Return them in the list of blobs and expand the bbox to be the union
|
||||
@ -119,16 +155,21 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
int max_size, int max_dist,
|
||||
TBOX* bbox, BLOBNBOX_CLIST* blobs);
|
||||
|
||||
// Finds the textline direction to be horizontal or vertical according
|
||||
// to distance to neighbours and 1st and 2nd order neighbours.
|
||||
// Non-text tends to end up without a definite direction.
|
||||
void FindTextlineFlowDirection(bool final);
|
||||
// For each blob in this grid, Finds the textline direction to be horizontal
|
||||
// or vertical according to distance to neighbours and 1st and 2nd order
|
||||
// neighbours. Non-text tends to end up without a definite direction.
|
||||
// Result is setting of the neighbours and vert_possible/horz_possible
|
||||
// flags in the BLOBNBOXes currently in this grid.
|
||||
// This function is called more than once if page orientation is uncertain,
|
||||
// so display_if_debugging is true on the final call to display the results.
|
||||
void FindTextlineFlowDirection(bool display_if_debugging);
|
||||
|
||||
// Sets the neighbours and good_stroke_neighbours members of the blob by
|
||||
// searching close on all 4 sides.
|
||||
// When finding leader dots/dashes, there is a slightly different rule for
|
||||
// what makes a good neighbour.
|
||||
void SetNeighbours(bool leaders, BLOBNBOX* blob);
|
||||
// If activate_line_trap, then line-like objects are found and isolated.
|
||||
void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
|
||||
|
||||
// Sets the good_stroke_neighbours member of the blob if it has a
|
||||
// GoodNeighbour on the given side.
|
||||
@ -151,26 +192,111 @@ class StrokeWidth : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
|
||||
// changed. Otherwise, only ambiguous blobs are processed.
|
||||
void SmoothNeighbourTypes(BLOBNBOX* blob, bool desperate);
|
||||
|
||||
// Sets the leader_on_left or leader_on_right flags for blobs
|
||||
// that are next to one end of the given leader partition.
|
||||
// If left_of_part is true, then look at the left side of the partition for
|
||||
// blobs on which to set the leader_on_right flag.
|
||||
void MarkLeaderNeighbours(const ColPartition* part, bool left_of_part);
|
||||
// Checks the left or right side of the given leader partition and sets the
|
||||
// (opposite) leader_on_right or leader_on_left flags for blobs
|
||||
// that are next to the given side of the given leader partition.
|
||||
void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
|
||||
|
||||
// Partition creation. Accumulates vertical and horizontal text chains,
|
||||
// puts the remaining blobs in as unknowns, and then merges/splits to
|
||||
// minimize overlap and smoothes the types with neighbours and the color
|
||||
// image if provided. rerotation is used to rotate the coordinate space
|
||||
// back to the nontext_map_ image.
|
||||
void FindInitialPartitions(const FCOORD& rerotation,
|
||||
TO_BLOCK* block,
|
||||
ColPartitionGrid* part_grid,
|
||||
ColPartition_LIST* big_parts);
|
||||
// Finds vertical chains of text-like blobs and puts them in ColPartitions.
|
||||
void FindVerticalTextChains(ColPartitionGrid* part_grid);
|
||||
// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
|
||||
void FindHorizontalTextChains(ColPartitionGrid* part_grid);
|
||||
// Finds diacritics and saves their base character in the blob.
|
||||
void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
|
||||
// Searches this grid for an appropriately close and sized neighbour of the
|
||||
// given [small] blob. If such a blob is found, the diacritic base is saved
|
||||
// in the blob and true is returned.
|
||||
// The small_grid is a secondary grid that contains the small/noise objects
|
||||
// that are not in this grid, but may be useful for determining a connection
|
||||
// between blob and its potential base character. (See DiacriticXGapFilled.)
|
||||
bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
|
||||
// Returns true if there is no gap between the base char and the diacritic
|
||||
// bigger than a fraction of the height of the base char:
|
||||
// Eg: line end.....'
|
||||
// The quote is a long way from the end of the line, yet it needs to be a
|
||||
// diacritic. To determine that the quote is not part of an image, or
|
||||
// a different text block, we check for other marks in the gap between
|
||||
// the base char and the diacritic.
|
||||
// '<--Diacritic
|
||||
// |---------|
|
||||
// | |<-toobig-gap->
|
||||
// | Base |<ok gap>
|
||||
// |---------| x<-----Dot occupying gap
|
||||
// The grid is const really.
|
||||
bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
|
||||
const TBOX& base_box);
|
||||
// Merges diacritics with the ColPartition of the base character blob.
|
||||
void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
|
||||
// Any blobs on the large_blobs list of block that are still unowned by a
|
||||
// ColPartition, are probably drop-cap or vertically touching so the blobs
|
||||
// are removed to the big_parts list and treated separately.
|
||||
void RemoveLargeUnusedBlobs(TO_BLOCK* block,
|
||||
ColPartitionGrid* part_grid,
|
||||
ColPartition_LIST* big_parts);
|
||||
|
||||
// All remaining unused blobs are put in individual ColPartitions.
|
||||
void PartitionRemainingBlobs(ColPartitionGrid* part_grid);
|
||||
|
||||
// If combine, put all blobs in the cell_list into a single partition,
|
||||
// otherwise put each one into its own partition.
|
||||
void MakePartitionsFromCellList(bool combine,
|
||||
ColPartitionGrid* part_grid,
|
||||
BLOBNBOX_CLIST* cell_list);
|
||||
|
||||
// Helper function to finish setting up a ColPartition and insert into
|
||||
// part_grid.
|
||||
void CompletePartition(ColPartition* part, ColPartitionGrid* part_grid);
|
||||
|
||||
// Merge partitions where the merge appears harmless.
|
||||
void EasyMerges(ColPartitionGrid* part_grid);
|
||||
|
||||
// Compute a search box based on the orientation of the partition.
|
||||
// Returns true if a suitable box can be calculated.
|
||||
// Callback for EasyMerges.
|
||||
bool OrientationSearchBox(ColPartition* part, TBOX* box);
|
||||
|
||||
// Merge confirmation callback for EasyMerges.
|
||||
bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
|
||||
|
||||
// Returns true if there is no significant noise in between the boxes.
|
||||
bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
|
||||
|
||||
// Displays the blobs colored according to the number of good neighbours
|
||||
// and the vertical/horizontal flow.
|
||||
ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
|
||||
|
||||
// Displays blobs colored according to whether or not they are diacritics.
|
||||
ScrollView* DisplayDiacritics(const char* window_name,
|
||||
int x, int y, TO_BLOCK* block);
|
||||
|
||||
private:
|
||||
// Returns true if there is at least one side neighbour that has a similar
|
||||
// stroke width.
|
||||
bool GoodTextBlob(BLOBNBOX* blob);
|
||||
// Grid to indicate the dot noise density at each grid coord.
|
||||
IntGrid* noise_density_;
|
||||
// Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
|
||||
Pix* nontext_map_;
|
||||
// Textline projection map. Borrowed pointer.
|
||||
TextlineProjection* projection_;
|
||||
// DENORM used by projection_ to get back to image coords. Borrowed pointer.
|
||||
const DENORM* denorm_;
|
||||
// Bounding box of the grid.
|
||||
TBOX grid_box_;
|
||||
// Rerotation to get back to the original image.
|
||||
FCOORD rerotation_;
|
||||
// Windows for debug display.
|
||||
ScrollView* leaders_win_;
|
||||
ScrollView* initial_widths_win_;
|
||||
ScrollView* widths_win_;
|
||||
ScrollView* chains_win_;
|
||||
ScrollView* diacritics_win_;
|
||||
ScrollView* textlines_win_;
|
||||
ScrollView* smoothed_win_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -45,6 +45,7 @@ namespace tesseract {
|
||||
typedef TessResultCallback1<bool, int> WidthCallback;
|
||||
|
||||
struct AlignedBlobParams;
|
||||
class ColPartitionGrid;
|
||||
|
||||
/** Pixel resolution of column width estimates. */
|
||||
const int kColumnWidthFactor = 20;
|
||||
@ -67,30 +68,40 @@ class TabFind : public AlignedBlob {
|
||||
|
||||
/**
|
||||
* Insert a list of blobs into the given grid (not necessarily this).
|
||||
* If take_ownership is true, then the blobs are removed from the source list.
|
||||
* See InsertBlob for the other arguments.
|
||||
* It would seem to make more sense to swap this and grid, but this way
|
||||
* around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
|
||||
* while the grid that provides the tab stops(this) has to be derived from
|
||||
* TabFind.
|
||||
*/
|
||||
void InsertBlobList(bool h_spread, bool v_spread, bool large,
|
||||
BLOBNBOX_LIST* blobs, bool take_ownership,
|
||||
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
|
||||
void InsertBlobsToGrid(bool h_spread, bool v_spread,
|
||||
BLOBNBOX_LIST* blobs,
|
||||
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
|
||||
|
||||
/**
|
||||
* Insert a single blob into the given grid (not necessarily this).
|
||||
* If h_spread, then all cells covered horizontally by the box are
|
||||
* used, otherwise, just the bottom-left. Similarly for v_spread.
|
||||
* If large, then insert only if the bounding box doesn't intersect
|
||||
* anything else already in the grid. Returns true if the blob was inserted.
|
||||
* A side effect is that the left and right rule edges of the blob are
|
||||
* set according to the tab vectors in this (not grid).
|
||||
*/
|
||||
bool InsertBlob(bool h_spread, bool v_spread, bool large, BLOBNBOX* blob,
|
||||
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
|
||||
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
|
||||
|
||||
// Calls SetBlobRuleEdges for all the blobs in the given block.
|
||||
void SetBlockRuleEdges(TO_BLOCK* block);
|
||||
// Sets the left and right rule and crossing_rules for the blobs in the given
|
||||
// list by finding the next outermost tabvectors for each blob.
|
||||
void SetBlobRuleEdges(BLOBNBOX_LIST* blobs);
|
||||
|
||||
// Returns the gutter width of the given TabVector between the given y limits.
|
||||
// Also returns x-shift to be added to the vector to clear any intersecting
|
||||
// blobs. The shift is deducted from the returned gutter.
|
||||
// If ignore_unmergeables is true, then blobs of UnMergeableType are
|
||||
// ignored as if they don't exist. (Used for text on image.)
|
||||
// max_gutter_width is used as the maximum width worth searching for in case
|
||||
// there is nothing near the TabVector.
|
||||
int GutterWidth(int bottom_y, int top_y, const TabVector& v,
|
||||
bool ignore_unmergeables, int max_gutter_width,
|
||||
int* required_shift);
|
||||
/**
|
||||
* Find the gutter width and distance to inner neighbour for the given blob.
|
||||
@ -100,20 +111,6 @@ class TabFind : public AlignedBlob {
|
||||
BLOBNBOX* bbox, int* gutter_width,
|
||||
int* neighbour_gap);
|
||||
|
||||
/**
|
||||
* Find the next adjacent (to left or right) blob on this text line,
|
||||
* with the constraint that it must vertically significantly overlap
|
||||
* the input box.
|
||||
*/
|
||||
BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
|
||||
bool right_to_left, int gap_limit);
|
||||
|
||||
/**
|
||||
* Compute and return, but do not set the type as being BRT_TEXT or
|
||||
* BRT_UNKNOWN according to how well it forms a text line.
|
||||
*/
|
||||
BlobRegionType ComputeBlobType(BLOBNBOX* blob);
|
||||
|
||||
/**
|
||||
* Return the x-coord that corresponds to the right edge for the given
|
||||
* box. If there is a rule line to the right that vertically overlaps it,
|
||||
@ -192,16 +189,24 @@ class TabFind : public AlignedBlob {
|
||||
/**
|
||||
* Top-level function to find TabVectors in an input page block.
|
||||
* Returns false if the detected skew angle is impossible.
|
||||
* Applies the detected skew angle to deskew the tabs, blobs and part_grid.
|
||||
*/
|
||||
bool FindTabVectors(TabVector_LIST* hlines,
|
||||
BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
|
||||
int min_gutter_width,
|
||||
ColPartitionGrid* part_grid,
|
||||
FCOORD* deskew, FCOORD* reskew);
|
||||
|
||||
// Top-level function to not find TabVectors in an input page block,
|
||||
// but setup for single column mode.
|
||||
void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
|
||||
TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
|
||||
|
||||
// Cleans up the lists of blobs in the block ready for use by TabFind.
|
||||
// Large blobs that look like text are moved to the main blobs list.
|
||||
// Main blobs that are superseded by the image blobs are deleted.
|
||||
void TidyBlobs(TO_BLOCK* block);
|
||||
|
||||
// Helper function to setup search limits for *TabForBox.
|
||||
void SetupTabSearch(int x, int y, int* min_key, int* max_key);
|
||||
|
||||
@ -229,15 +234,33 @@ class TabFind : public AlignedBlob {
|
||||
TabVector_LIST* horizontal_lines,
|
||||
int* min_gutter_width);
|
||||
|
||||
// Clear the grid and get rid of the tab vectors, but not separators,
|
||||
// ready to start again.
|
||||
void Reset();
|
||||
|
||||
// Reflect the separator tab vectors and the grids in the y-axis.
|
||||
// Can only be called after Reset!
|
||||
void ReflectInYAxis();
|
||||
|
||||
private:
|
||||
// For each box in the grid, decide whether it is a candidate tab-stop,
|
||||
// and if so add it to the tab_grid_.
|
||||
// and if so add it to the left and right tab boxes.
|
||||
ScrollView* FindTabBoxes(int min_gutter_width);
|
||||
|
||||
// Return true if this box looks like a candidate tab stop, and set
|
||||
// the appropriate tab type(s) to TT_UNCONFIRMED.
|
||||
bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width);
|
||||
|
||||
// Returns true if there is nothing in the rectangle of width min_gutter to
|
||||
// the left of bbox.
|
||||
bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter);
|
||||
// Returns true if there is nothing in the rectangle of width min_gutter to
|
||||
// the right of bbox.
|
||||
bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter);
|
||||
// Returns true if there is nothing in the given search_box that vertically
|
||||
// overlaps target_box other than target_box itself.
|
||||
bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box);
|
||||
|
||||
// Fills the list of TabVector with the tabstops found in the grid,
|
||||
// and estimates the logical vertical direction.
|
||||
void FindAllTabVectors(int min_gutter_width);
|
||||
@ -272,13 +295,17 @@ class TabFind : public AlignedBlob {
|
||||
// Trace textlines from one side to the other of each tab vector, saving
|
||||
// the most frequent column widths found in a list so that a given width
|
||||
// can be tested for being a common width with a simple callback function.
|
||||
void ComputeColumnWidths(ScrollView* tab_win);
|
||||
void ComputeColumnWidths(ScrollView* tab_win,
|
||||
ColPartitionGrid* part_grid);
|
||||
|
||||
// Set the region_type_ member for all the blobs in the grid.
|
||||
void ComputeBlobGoodness();
|
||||
// Find column width and pair-up tab vectors with existing ColPartitions.
|
||||
void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
|
||||
STATS* col_widths);
|
||||
|
||||
// Set the region_type_ member of the blob, if not already known.
|
||||
void SetBlobRegionType(BLOBNBOX* blob);
|
||||
// Helper makes the list of common column widths in column_widths_ from the
|
||||
// input col_widths. Destroys the content of col_widths by repeatedly
|
||||
// finding the mode and erasing the peak.
|
||||
void MakeColumnWidths(int col_widths_size, STATS* col_widths);
|
||||
|
||||
// Mark blobs as being in a vertical text line where that is the case.
|
||||
void MarkVerticalText();
|
||||
@ -288,48 +315,14 @@ class TabFind : public AlignedBlob {
|
||||
// points (< kMinLinesInColumn), then 0 is returned.
|
||||
int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
|
||||
|
||||
// If this box looks like it is on a textline in the given direction,
|
||||
// return the width of the textline-like group of blobs, and the number
|
||||
// of blobs found.
|
||||
// For more detail see FindTextlineSegment below.
|
||||
int FindTextlineWidth(bool right_to_left, BLOBNBOX* bbox, int* blob_count);
|
||||
|
||||
// Search from the given tabstop bbox to the next opposite
|
||||
// tabstop bbox on the same text line, which may be itself.
|
||||
// Returns true if the search is successful, and sets
|
||||
// start_pt, end_pt to the fitted baseline, width to the measured
|
||||
// width of the text line (column width estimate.)
|
||||
bool TraceTextline(BLOBNBOX* bbox, ICOORD* start_pt, ICOORD* end_pt,
|
||||
int* left_edge, int* right_edge);
|
||||
|
||||
// Search from the given bbox in the given direction until the next tab
|
||||
// vector is found or a significant horizontal gap is found.
|
||||
// Returns the width of the line if the search is successful, (defined
|
||||
// as good coverage of the width and a good fitting baseline) and sets
|
||||
// start_pt, end_pt to the fitted baseline, left_blob, right_blob to
|
||||
// the ends of the line. Returns zero otherwise.
|
||||
// Sets blob_count to the number of blobs found on the line.
|
||||
// On input, either both left_vector and right_vector should be NULL,
|
||||
// indicating a basic search, or both left_vector and right_vector should
|
||||
// be not NULL and one of *left_vector and *right_vector should be not NULL,
|
||||
// in which case the search is strictly between tab vectors and will return
|
||||
// zero if a gap is found before the opposite tab vector is reached, or a
|
||||
// conflicting tab vector is found.
|
||||
// If ignore_images is true, then blobs with aligned_text() < 0 are treated
|
||||
// as if they do not exist.
|
||||
int FindTextlineSegment(bool right_to_lefts, bool ignore_images,
|
||||
BLOBNBOX* bbox, int* blob_count,
|
||||
ICOORD* start_pt, ICOORD* end_pt,
|
||||
TabVector** left_vector, TabVector** right_vector,
|
||||
BLOBNBOX** left_blob, BLOBNBOX** right_blob);
|
||||
|
||||
// Find the next adjacent (to left or right) blob on this text line,
|
||||
// with the constraint that it must vertically significantly overlap
|
||||
// the [top_y, bottom_y] range.
|
||||
// If ignore_images is true, then blobs with aligned_text() < 0 are treated
|
||||
// as if they do not exist.
|
||||
BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
|
||||
bool right_to_left, bool ignore_images,
|
||||
bool look_left, bool ignore_images,
|
||||
double min_overlap_fraction,
|
||||
int gap_limit, int top_y, int bottom_y);
|
||||
|
||||
// Add a bi-directional partner relationship between the left
|
||||
@ -373,8 +366,9 @@ class TabFind : public AlignedBlob {
|
||||
ICOORDELT_LIST column_widths_; //< List of commonly occurring widths.
|
||||
/** Callback to test an int for being a common width. */
|
||||
WidthCallback* width_cb_;
|
||||
/** Instance of the base class that contains only candidate tab stops. */
|
||||
BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* tab_grid_;
|
||||
// Sets of bounding boxes that are candidate tab stops.
|
||||
GenericVector<BLOBNBOX*> left_tab_boxes_;
|
||||
GenericVector<BLOBNBOX*> right_tab_boxes_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -970,7 +970,7 @@ bool TableFinder::HasLeaderAdjacent(const ColPartition& part) {
|
||||
if (!part.IsInSameColumnAs(*leader))
|
||||
break;
|
||||
// There should be a significant vertical overlap
|
||||
if (!leader->VOverlaps(part))
|
||||
if (!leader->VSignificantCoreOverlap(part))
|
||||
continue;
|
||||
// Leader passed all tests, so it is adjacent.
|
||||
return true;
|
||||
@ -2112,9 +2112,17 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid* grid,
|
||||
}
|
||||
// Insert table colpartition back to part_grid_
|
||||
if (table_partition) {
|
||||
table_partition->SetPartitionType(resolution_,
|
||||
all_columns[table_search.GridY()]);
|
||||
// To match the columns used when transforming to blocks, the new table
|
||||
// partition must have its first and last column set at the grid y that
|
||||
// corresponds to its bottom.
|
||||
const TBOX& table_box = table_partition->bounding_box();
|
||||
int grid_x, grid_y;
|
||||
grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
|
||||
table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
|
||||
table_partition->set_table_type();
|
||||
table_partition->set_blob_type(BRT_TEXT);
|
||||
table_partition->set_flow(BTFT_CHAIN);
|
||||
table_partition->SetBlobTypes();
|
||||
grid->InsertBBox(true, true, table_partition);
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "colfind.h"
|
||||
#include "colpartitionset.h"
|
||||
#include "detlinefit.h"
|
||||
#include "statistc.h"
|
||||
|
||||
// Include automatically generated configuration file if running autoconf.
|
||||
#ifdef HAVE_CONFIG_H
|
||||
@ -52,7 +53,7 @@ const double kLineCountReciprocal = 4.0;
|
||||
// Constant add-on for minimum gutter for aligned tabs.
|
||||
const double kMinAlignedGutter = 0.25;
|
||||
// Constant add-on for minimum gutter for ragged tabs.
|
||||
const double kMinRaggedGutter = 2.0;
|
||||
const double kMinRaggedGutter = 1.5;
|
||||
|
||||
double_VAR(textord_tabvector_vertical_gap_fraction, 0.5,
|
||||
"max fraction of mean blob width allowed for vertical gaps in vertical text");
|
||||
@ -205,7 +206,8 @@ TabVector::TabVector(const TabVector& src, TabAlignment alignment,
|
||||
const ICOORD& vertical_skew, BLOBNBOX* blob)
|
||||
: extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
|
||||
sort_key_(0), percent_score_(0), mean_width_(0),
|
||||
needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
|
||||
needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false),
|
||||
alignment_(alignment),
|
||||
top_constraints_(NULL), bottom_constraints_(NULL) {
|
||||
BLOBNBOX_C_IT it(&boxes_);
|
||||
it.add_to_end(blob);
|
||||
@ -236,6 +238,7 @@ TabVector* TabVector::ShallowCopy() const {
|
||||
copy->alignment_ = alignment_;
|
||||
copy->extended_ymax_ = extended_ymax_;
|
||||
copy->extended_ymin_ = extended_ymin_;
|
||||
copy->intersects_other_lines_ = intersects_other_lines_;
|
||||
return copy;
|
||||
}
|
||||
|
||||
@ -373,6 +376,9 @@ void TabVector::MergeSimilarTabVectors(const ICOORD& vertical,
|
||||
v1->Print("by deleting");
|
||||
}
|
||||
v2->MergeWith(vertical, it1.extract());
|
||||
if (textord_debug_tabfind) {
|
||||
v2->Print("Producing");
|
||||
}
|
||||
ICOORD merged_vector = v2->endpt();
|
||||
merged_vector -= v2->startpt();
|
||||
if (abs(merged_vector.x()) > 100) {
|
||||
@ -604,13 +610,19 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
++height_count;
|
||||
}
|
||||
mean_height /= height_count;
|
||||
int max_gutter = kGutterMultiple * mean_height;
|
||||
if (IsRagged()) {
|
||||
// Ragged edges face a tougher test in that the gap must always be within
|
||||
// the height of the blob.
|
||||
max_gutter = kGutterToNeighbourRatio * mean_height;
|
||||
}
|
||||
|
||||
STATS gutters(0, max_gutter + 1);
|
||||
// Evaluate the boxes for their goodness, calculating the coverage as we go.
|
||||
// Remove boxes that are not good and shorten the list to the first and
|
||||
// last good boxes.
|
||||
bool deleted_a_box = false;
|
||||
int mean_gutter = 0;
|
||||
int gutter_count = 0;
|
||||
int num_deleted_boxes = 0;
|
||||
bool text_on_image = false;
|
||||
int good_length = 0;
|
||||
const TBOX* prev_good_box = NULL;
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
@ -618,8 +630,10 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
const TBOX& box = bbox->bounding_box();
|
||||
int mid_y = (box.top() + box.bottom()) / 2;
|
||||
if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
|
||||
if (!debug)
|
||||
if (!debug) {
|
||||
tprintf("After already deleting %d boxes, ", num_deleted_boxes);
|
||||
Print("Starting evaluation");
|
||||
}
|
||||
debug = true;
|
||||
}
|
||||
// A good box is one where the nearest neighbour on the inside is closer
|
||||
@ -627,17 +641,11 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
// (of the putative column).
|
||||
bool left = IsLeftTab();
|
||||
int tab_x = XAtY(mid_y);
|
||||
int max_gutter = kGutterMultiple * mean_height;
|
||||
if (IsRagged()) {
|
||||
// Ragged edges face a tougher test in that the gap must always be within
|
||||
// the height of the blob.
|
||||
max_gutter = kGutterToNeighbourRatio * mean_height;
|
||||
}
|
||||
int gutter_width;
|
||||
int neighbour_gap;
|
||||
finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
|
||||
bbox, &gutter_width, &neighbour_gap);
|
||||
if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
|
||||
if (debug) {
|
||||
tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
|
||||
box.left(), box.bottom(), box.right(), box.top(),
|
||||
gutter_width, neighbour_gap);
|
||||
@ -646,8 +654,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
|
||||
// A good box contributes its height to the good_length.
|
||||
good_length += box.top() - box.bottom();
|
||||
mean_gutter += gutter_width;
|
||||
++gutter_count;
|
||||
gutters.add(gutter_width, 1);
|
||||
// Two good boxes together contribute the gap between them
|
||||
// to the good_length as well, as long as the gap is not
|
||||
// too big.
|
||||
@ -667,6 +674,8 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
SetYStart(box.bottom());
|
||||
}
|
||||
prev_good_box = &box;
|
||||
if (bbox->flow() == BTFT_TEXT_ON_IMAGE)
|
||||
text_on_image = true;
|
||||
} else {
|
||||
// Get rid of boxes that are not good.
|
||||
if (debug) {
|
||||
@ -675,7 +684,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
gutter_width, neighbour_gap);
|
||||
}
|
||||
it.extract();
|
||||
deleted_a_box = true;
|
||||
++num_deleted_boxes;
|
||||
}
|
||||
}
|
||||
if (debug) {
|
||||
@ -684,8 +693,10 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
// If there are any good boxes, do it again, except this time get rid of
|
||||
// boxes that have a gutter that is a small fraction of the mean gutter.
|
||||
// This filters out ends that run into a coincidental gap in the text.
|
||||
if (gutter_count > 0) {
|
||||
mean_gutter /= gutter_count;
|
||||
int search_top = endpt_.y();
|
||||
int search_bottom = startpt_.y();
|
||||
int median_gutter = IntCastRounded(gutters.median());
|
||||
if (gutters.get_total() > 0) {
|
||||
prev_good_box = NULL;
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
BLOBNBOX* bbox = it.data();
|
||||
@ -706,21 +717,23 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
|
||||
bbox, &gutter_width, &neighbour_gap);
|
||||
// Now we can make the test.
|
||||
if (gutter_width >= mean_gutter * kMinGutterFraction) {
|
||||
if (gutter_width >= median_gutter * kMinGutterFraction) {
|
||||
if (prev_good_box == NULL) {
|
||||
// Adjust the start to the first good box.
|
||||
SetYStart(box.bottom());
|
||||
search_bottom = box.top();
|
||||
}
|
||||
prev_good_box = &box;
|
||||
search_top = box.bottom();
|
||||
} else {
|
||||
// Get rid of boxes that are not good.
|
||||
if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
|
||||
if (debug) {
|
||||
tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
|
||||
box.left(), box.bottom(), box.right(), box.top(),
|
||||
gutter_width, mean_gutter);
|
||||
gutter_width, median_gutter);
|
||||
}
|
||||
it.extract();
|
||||
deleted_a_box = true;
|
||||
++num_deleted_boxes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -730,7 +743,7 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
// Compute the percentage of the vector that is occupied by good boxes.
|
||||
int length = endpt_.y() - startpt_.y();
|
||||
percent_score_ = 100 * good_length / length;
|
||||
if (deleted_a_box) {
|
||||
if (num_deleted_boxes > 0) {
|
||||
needs_refit_ = true;
|
||||
FitAndEvaluateIfNeeded(vertical, finder);
|
||||
if (boxes_.empty())
|
||||
@ -738,11 +751,19 @@ void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
|
||||
}
|
||||
// Test the gutter over the whole vector, instead of just at the boxes.
|
||||
int required_shift;
|
||||
int gutter_width = finder->GutterWidth(startpt_.y(), endpt_.y(), *this,
|
||||
&required_shift);
|
||||
if (search_bottom > search_top) {
|
||||
search_bottom = startpt_.y();
|
||||
search_top = endpt_.y();
|
||||
}
|
||||
double min_gutter_width = kLineCountReciprocal / boxes_.length();
|
||||
min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
|
||||
min_gutter_width *= mean_height;
|
||||
int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
|
||||
if (median_gutter > max_gutter_width)
|
||||
max_gutter_width = median_gutter;
|
||||
int gutter_width = finder->GutterWidth(search_bottom, search_top, *this,
|
||||
text_on_image, max_gutter_width,
|
||||
&required_shift);
|
||||
if (gutter_width < min_gutter_width) {
|
||||
if (debug) {
|
||||
tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n",
|
||||
|
@ -20,6 +20,7 @@
|
||||
#ifndef TESSERACT_TEXTORD_TABVECTOR_H__
|
||||
#define TESSERACT_TEXTORD_TABVECTOR_H__
|
||||
|
||||
#include "blobgrid.h"
|
||||
#include "clst.h"
|
||||
#include "elst.h"
|
||||
#include "elst2.h"
|
||||
@ -29,8 +30,6 @@
|
||||
class BLOBNBOX;
|
||||
class ScrollView;
|
||||
|
||||
CLISTIZEH(BLOBNBOX)
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
|
||||
@ -56,8 +55,6 @@ enum TabAlignment {
|
||||
class TabFind;
|
||||
class TabVector;
|
||||
class TabConstraint;
|
||||
typedef BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGrid;
|
||||
typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGridSearch;
|
||||
|
||||
ELIST2IZEH(TabVector)
|
||||
CLISTIZEH(TabVector)
|
||||
@ -179,6 +176,12 @@ class TabVector : public ELIST2_LINK {
|
||||
void set_endpt(const ICOORD& end) {
|
||||
endpt_ = end;
|
||||
}
|
||||
bool intersects_other_lines() const {
|
||||
return intersects_other_lines_;
|
||||
}
|
||||
void set_intersects_other_lines(bool value) {
|
||||
intersects_other_lines_ = value;
|
||||
}
|
||||
|
||||
// Inline quasi-accessors that require some computation.
|
||||
|
||||
@ -258,6 +261,21 @@ class TabVector : public ELIST2_LINK {
|
||||
endpt_.set_x(x);
|
||||
}
|
||||
|
||||
// Reflect the tab vector in the y-axis.
|
||||
void ReflectInYAxis() {
|
||||
startpt_.set_x(-startpt_.x());
|
||||
endpt_.set_x(-endpt_.x());
|
||||
sort_key_ = -sort_key_;
|
||||
if (alignment_ == TA_LEFT_ALIGNED)
|
||||
alignment_ = TA_RIGHT_ALIGNED;
|
||||
else if (alignment_ == TA_RIGHT_ALIGNED)
|
||||
alignment_ = TA_LEFT_ALIGNED;
|
||||
if (alignment_ == TA_LEFT_RAGGED)
|
||||
alignment_ = TA_RIGHT_RAGGED;
|
||||
else if (alignment_ == TA_RIGHT_RAGGED)
|
||||
alignment_ = TA_LEFT_RAGGED;
|
||||
}
|
||||
|
||||
// Separate function to compute the sort key for a given coordinate pair.
|
||||
static int SortKey(const ICOORD& vertical, int x, int y) {
|
||||
ICOORD pt(x, y);
|
||||
@ -393,6 +411,8 @@ class TabVector : public ELIST2_LINK {
|
||||
bool needs_refit_;
|
||||
// True if a fit has been done, so re-evaluation is needed.
|
||||
bool needs_evaluation_;
|
||||
// True if a separator line intersects at least 2 other lines.
|
||||
bool intersects_other_lines_;
|
||||
// The type of this TabVector.
|
||||
TabAlignment alignment_;
|
||||
// The list of boxes whose edges are aligned at this TabVector.
|
||||
|
764
textord/textlineprojection.cpp
Normal file
764
textord/textlineprojection.cpp
Normal file
@ -0,0 +1,764 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "textlineprojection.h"
|
||||
#include "allheaders.h"
|
||||
#include "bbgrid.h" // Base class.
|
||||
#include "blobbox.h" // BlobNeighourDir.
|
||||
#include "blobs.h"
|
||||
#include "colpartition.h"
|
||||
#include "normalis.h"
|
||||
|
||||
// Padding factor to use on definitely oriented blobs
|
||||
const int kOrientedPadFactor = 8;
|
||||
// Padding factor to use on not definitely oriented blobs.
|
||||
const int kDefaultPadFactor = 2;
|
||||
// Penalty factor for going away from the line center.
|
||||
const int kWrongWayPenalty = 4;
|
||||
// Ratio between parallel gap and perpendicular gap used to measure total
|
||||
// distance of a box from a target box in curved textline space.
|
||||
// parallel-gap is treated more favorably by this factor to allow catching
|
||||
// quotes and elipsis at the end of textlines.
|
||||
const int kParaPerpDistRatio = 4;
|
||||
// Multiple of scale_factor_ that the inter-line gap must be before we start
|
||||
// padding the increment box perpendicular to the text line.
|
||||
const int kMinLineSpacingFactor = 4;
|
||||
// Maximum tab-stop overrun for horizontal padding, in projection pixels.
|
||||
const int kMaxTabStopOverrun = 6;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
TextlineProjection::TextlineProjection(int resolution)
|
||||
: x_origin_(0), y_origin_(0), pix_(NULL) {
|
||||
// The projection map should be about 100 ppi, whatever the input.
|
||||
scale_factor_ = IntCastRounded(resolution / 100.0);
|
||||
if (scale_factor_ < 1) scale_factor_ = 1;
|
||||
}
|
||||
TextlineProjection::~TextlineProjection() {
|
||||
pixDestroy(&pix_);
|
||||
}
|
||||
|
||||
// Build the projection profile given the input_block containing lists of
|
||||
// blobs, a rotation to convert to image coords,
|
||||
// and a full-resolution nontext_map, marking out areas to avoid.
|
||||
// During construction, we have the following assumptions:
|
||||
// The rotation is a multiple of 90 degrees, ie no deskew yet.
|
||||
// The blobs have had their left and right rules set to also limit
|
||||
// the range of projection.
|
||||
void TextlineProjection::ConstructProjection(TO_BLOCK* input_block,
|
||||
const FCOORD& rotation,
|
||||
Pix* nontext_map) {
|
||||
pixDestroy(&pix_);
|
||||
TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
|
||||
x_origin_ = 0;
|
||||
y_origin_ = image_box.height();
|
||||
int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
|
||||
int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
|
||||
|
||||
pix_ = pixCreate(width, height, 8);
|
||||
ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
|
||||
ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
|
||||
Pix* final_pix = pixBlockconv(pix_, 1, 1);
|
||||
// Pix* final_pix = pixBlockconv(pix_, 2, 2);
|
||||
pixDestroy(&pix_);
|
||||
pix_ = final_pix;
|
||||
}
|
||||
|
||||
// Display the blobs in the window colored according to textline quality.
|
||||
void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
|
||||
ScrollView* win) {
|
||||
BLOBNBOX_IT it(blobs);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
BLOBNBOX* blob = it.data();
|
||||
const TBOX& box = blob->bounding_box();
|
||||
bool bad_box = BoxOutOfHTextline(box, NULL, false);
|
||||
if (blob->UniquelyVertical())
|
||||
win->Pen(ScrollView::YELLOW);
|
||||
else
|
||||
win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
|
||||
win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
|
||||
}
|
||||
win->Update();
|
||||
}
|
||||
|
||||
// Moves blobs that look like they don't sit well on a textline from the
|
||||
// input blobs list to the output small_blobs list.
|
||||
// This gets them away from initial textline finding to stop diacritics
|
||||
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
|
||||
void TextlineProjection::MoveNonTextlineBlobs(
|
||||
BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
|
||||
BLOBNBOX_IT it(blobs);
|
||||
BLOBNBOX_IT small_it(small_blobs);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
BLOBNBOX* blob = it.data();
|
||||
const TBOX& box = blob->bounding_box();
|
||||
bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
|
||||
box.bottom());
|
||||
if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
|
||||
blob->ClearNeighbours();
|
||||
small_it.add_to_end(it.extract());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a window and display the projection in it.
|
||||
void TextlineProjection::DisplayProjection() const {
|
||||
int width = pixGetWidth(pix_);
|
||||
int height = pixGetHeight(pix_);
|
||||
Pix* pixc = pixCreate(width, height, 32);
|
||||
int src_wpl = pixGetWpl(pix_);
|
||||
int col_wpl = pixGetWpl(pixc);
|
||||
uinT32* src_data = pixGetData(pix_);
|
||||
uinT32* col_data = pixGetData(pixc);
|
||||
for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
int pixel = GET_DATA_BYTE(src_data, x);
|
||||
l_uint32 result;
|
||||
if (pixel <= 17)
|
||||
composeRGBPixel(0, 0, pixel * 15, &result);
|
||||
else if (pixel <= 145)
|
||||
composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
|
||||
else
|
||||
composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
|
||||
col_data[x] = result;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// TODO(rays) uncomment when scrollview can display non-binary images.
|
||||
ScrollView* win = new ScrollView("Projection", 0, 0,
|
||||
width, height, width, height);
|
||||
win->Image(pixc, 0, 0);
|
||||
win->Update();
|
||||
#else
|
||||
pixWrite("projection.png", pixc, IFF_PNG);
|
||||
#endif
|
||||
pixDestroy(&pixc);
|
||||
}
|
||||
|
||||
// Compute the distance of the box from the partition using curved projection
|
||||
// space. As DistanceOfBoxFromBox, except that the direction is taken from
|
||||
// the ColPartition and the median bounds of the ColPartition are used as
|
||||
// the to_box.
|
||||
int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box,
|
||||
const ColPartition& part,
|
||||
const DENORM* denorm,
|
||||
bool debug) const {
|
||||
// Compute a partition box that uses the median top/bottom of the blobs
|
||||
// within and median left/right for vertical.
|
||||
TBOX part_box = part.bounding_box();
|
||||
if (part.IsHorizontalType()) {
|
||||
part_box.set_top(part.median_top());
|
||||
part_box.set_bottom(part.median_bottom());
|
||||
} else {
|
||||
part_box.set_left(part.median_left());
|
||||
part_box.set_right(part.median_right());
|
||||
}
|
||||
// Now use DistanceOfBoxFromBox to make the actual calculation.
|
||||
return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
|
||||
denorm, debug);
|
||||
}
|
||||
|
||||
// Compute the distance from the from_box to the to_box using curved
|
||||
// projection space. Separation that involves a decrease in projection
|
||||
// density (moving from the from_box to the to_box) is weighted more heavily
|
||||
// than constant density, and an increase is weighted less.
|
||||
// If horizontal_textline is true, then curved space is used vertically,
|
||||
// as for a diacritic on the edge of a textline.
|
||||
// The projection uses original image coords, so denorm is used to get
|
||||
// back to the image coords from box/part space.
|
||||
// How the calculation works: Think of a diacritic near a textline.
|
||||
// Distance is measured from the far side of the from_box to the near side of
|
||||
// the to_box. Shown is the horizontal textline case.
|
||||
// |------^-----|
|
||||
// | from | box |
|
||||
// |------|-----|
|
||||
// perpendicular |
|
||||
// <------v-------->|--------------------|
|
||||
// parallel | to box |
|
||||
// |--------------------|
|
||||
// Perpendicular distance uses "curved space" See VerticalDistance below.
|
||||
// Parallel distance is linear.
|
||||
// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
|
||||
int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
|
||||
const TBOX& to_box,
|
||||
bool horizontal_textline,
|
||||
const DENORM* denorm,
|
||||
bool debug) const {
|
||||
// The parallel_gap is the horizontal gap between a horizontal textline and
|
||||
// the box. Analogous for vertical.
|
||||
int parallel_gap = 0;
|
||||
// start_pt is the box end of the line to be modified for curved space.
|
||||
TPOINT start_pt;
|
||||
// end_pt is the partition end of the line to be modified for curved space.
|
||||
TPOINT end_pt;
|
||||
if (horizontal_textline) {
|
||||
parallel_gap = from_box.x_gap(to_box) + from_box.width();
|
||||
start_pt.x = (from_box.left() + from_box.right()) / 2;
|
||||
end_pt.x = start_pt.x;
|
||||
if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
|
||||
start_pt.y = from_box.top();
|
||||
end_pt.y = MIN(to_box.top(), start_pt.y);
|
||||
} else {
|
||||
start_pt.y = from_box.bottom();
|
||||
end_pt.y = MAX(to_box.bottom(), start_pt.y);
|
||||
}
|
||||
} else {
|
||||
parallel_gap = from_box.y_gap(to_box) + from_box.height();
|
||||
if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
|
||||
start_pt.x = from_box.right();
|
||||
end_pt.x = MIN(to_box.right(), start_pt.x);
|
||||
} else {
|
||||
start_pt.x = from_box.left();
|
||||
end_pt.x = MAX(to_box.left(), start_pt.x);
|
||||
}
|
||||
start_pt.y = (from_box.bottom() + from_box.top()) / 2;
|
||||
end_pt.y = start_pt.y;
|
||||
}
|
||||
// The perpendicular gap is the max vertical distance gap out of:
|
||||
// top of from_box to to_box top and bottom of from_box to to_box bottom.
|
||||
// This value is then modified for curved projection space.
|
||||
// Analogous for vertical.
|
||||
int perpendicular_gap = 0;
|
||||
// If start_pt == end_pt, then the from_box lies entirely within the to_box
|
||||
// (in the perpendicular direction), so we don't need to calculate the
|
||||
// perpendicular_gap.
|
||||
if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
|
||||
if (denorm != NULL) {
|
||||
// Denormalize the start and end.
|
||||
denorm->DenormTransform(start_pt, &start_pt);
|
||||
denorm->DenormTransform(end_pt, &end_pt);
|
||||
}
|
||||
if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
|
||||
perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
|
||||
end_pt.y);
|
||||
} else {
|
||||
perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
|
||||
start_pt.y);
|
||||
}
|
||||
}
|
||||
// The parallel_gap weighs less than the perpendicular_gap.
|
||||
return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
|
||||
}
|
||||
|
||||
// Compute the distance between (x, y1) and (x, y2) using the rule that
|
||||
// a decrease in textline density is weighted more heavily than an increase.
|
||||
// The coordinates are in source image space, ie processed by any denorm
|
||||
// already, but not yet scaled by scale_factor_.
|
||||
// Going from the outside of a textline to the inside should measure much
|
||||
// less distance than going from the inside of a textline to the outside.
|
||||
// How it works:
|
||||
// An increase is cheap (getting closer to a textline).
|
||||
// Constant costs unity.
|
||||
// A decrease is expensive (getting further from a textline).
|
||||
// Pixels in projection map Counted distance
|
||||
// 2
|
||||
// 3 1/x
|
||||
// 3 1
|
||||
// 2 x
|
||||
// 5 1/x
|
||||
// 7 1/x
|
||||
// Total: 1 + x + 3/x where x = kWrongWayPenalty.
|
||||
int TextlineProjection::VerticalDistance(bool debug, int x,
|
||||
int y1, int y2) const {
|
||||
x = ImageXToProjectionX(x);
|
||||
y1 = ImageYToProjectionY(y1);
|
||||
y2 = ImageYToProjectionY(y2);
|
||||
if (y1 == y2) return 0;
|
||||
int wpl = pixGetWpl(pix_);
|
||||
int step = y1 < y2 ? 1 : -1;
|
||||
uinT32* data = pixGetData(pix_) + y1 * wpl;
|
||||
wpl *= step;
|
||||
int prev_pixel = GET_DATA_BYTE(data, x);
|
||||
int distance = 0;
|
||||
int right_way_steps = 0;
|
||||
for (int y = y1; y != y2; y += step) {
|
||||
data += wpl;
|
||||
int pixel = GET_DATA_BYTE(data, x);
|
||||
if (debug)
|
||||
tprintf("At (%d,%d), pix = %d, prev=%d\n",
|
||||
x, y + step, pixel, prev_pixel);
|
||||
if (pixel < prev_pixel)
|
||||
distance += kWrongWayPenalty;
|
||||
else if (pixel > prev_pixel)
|
||||
++right_way_steps;
|
||||
else
|
||||
++distance;
|
||||
prev_pixel = pixel;
|
||||
}
|
||||
return distance * scale_factor_ +
|
||||
right_way_steps * scale_factor_ / kWrongWayPenalty;
|
||||
}
|
||||
|
||||
// Compute the distance between (x1, y) and (x2, y) using the rule that
|
||||
// a decrease in textline density is weighted more heavily than an increase.
|
||||
int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
|
||||
int y) const {
|
||||
x1 = ImageXToProjectionX(x1);
|
||||
x2 = ImageXToProjectionX(x2);
|
||||
y = ImageYToProjectionY(y);
|
||||
if (x1 == x2) return 0;
|
||||
int wpl = pixGetWpl(pix_);
|
||||
int step = x1 < x2 ? 1 : -1;
|
||||
uinT32* data = pixGetData(pix_) + y * wpl;
|
||||
int prev_pixel = GET_DATA_BYTE(data, x1);
|
||||
int distance = 0;
|
||||
int right_way_steps = 0;
|
||||
for (int x = x1; x != x2; x += step) {
|
||||
int pixel = GET_DATA_BYTE(data, x + step);
|
||||
if (debug)
|
||||
tprintf("At (%d,%d), pix = %d, prev=%d\n",
|
||||
x + step, y, pixel, prev_pixel);
|
||||
if (pixel < prev_pixel)
|
||||
distance += kWrongWayPenalty;
|
||||
else if (pixel > prev_pixel)
|
||||
++right_way_steps;
|
||||
else
|
||||
++distance;
|
||||
prev_pixel = pixel;
|
||||
}
|
||||
return distance * scale_factor_ +
|
||||
right_way_steps * scale_factor_ / kWrongWayPenalty;
|
||||
}
|
||||
|
||||
// Returns true if the blob appears to be outside of a textline.
|
||||
// Such blobs are potentially diacritics (even if large in Thai) and should
|
||||
// be kept away from initial textline finding.
|
||||
bool TextlineProjection::BoxOutOfHTextline(const TBOX& box,
|
||||
const DENORM* denorm,
|
||||
bool debug) const {
|
||||
int grad1 = 0;
|
||||
int grad2 = 0;
|
||||
EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, NULL, NULL);
|
||||
int worst_result = MIN(grad1, grad2);
|
||||
int total_result = grad1 + grad2;
|
||||
if (total_result >= 6) return false; // Strongly in textline.
|
||||
// Medium strength: if either gradient is negative, it is likely outside
|
||||
// the body of the textline.
|
||||
if (worst_result < 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
|
||||
// but uses the median top/bottom for horizontal and median left/right for
|
||||
// vertical instead of the bounding box edges.
|
||||
// Evaluates for both horizontal and vertical and returns the best result,
|
||||
// with a positive value for horizontal and a negative value for vertical.
|
||||
int TextlineProjection::EvaluateColPartition(const ColPartition& part,
|
||||
const DENORM* denorm,
|
||||
bool debug) const {
|
||||
if (part.IsSingleton())
|
||||
return EvaluateBox(part.bounding_box(), denorm, debug);
|
||||
// Test vertical orientation.
|
||||
TBOX box = part.bounding_box();
|
||||
// Use the partition median for left/right.
|
||||
box.set_left(part.median_left());
|
||||
box.set_right(part.median_right());
|
||||
int vresult = EvaluateBox(box, denorm, debug);
|
||||
|
||||
// Test horizontal orientation.
|
||||
box = part.bounding_box();
|
||||
// Use the partition median for top/bottom.
|
||||
box.set_top(part.median_top());
|
||||
box.set_bottom(part.median_bottom());
|
||||
int hresult = EvaluateBox(box, denorm, debug);
|
||||
if (debug) {
|
||||
tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
|
||||
part.bounding_box().print();
|
||||
part.Print();
|
||||
}
|
||||
return hresult >= -vresult ? hresult : vresult;
|
||||
}
|
||||
|
||||
// Computes the mean projection gradients over the horizontal and vertical
|
||||
// edges of the box:
|
||||
// -h-h-h-h-h-h
|
||||
// |------------| mean=htop -v|+v--------+v|-v
|
||||
// |+h+h+h+h+h+h| -v|+v +v|-v
|
||||
// | | -v|+v +v|-v
|
||||
// | box | -v|+v box +v|-v
|
||||
// | | -v|+v +v|-v
|
||||
// |+h+h+h+h+h+h| -v|+v +v|-v
|
||||
// |------------| mean=hbot -v|+v--------+v|-v
|
||||
// -h-h-h-h-h-h
|
||||
// mean=vleft mean=vright
|
||||
//
|
||||
// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
|
||||
// for a horizontal textline, a negative number for a vertical textline,
|
||||
// and near zero for undecided. Undecided is most likely non-text.
|
||||
// All the gradients are truncated to remain non-negative, since negative
|
||||
// horizontal gradients don't give any indication of being vertical and
|
||||
// vice versa.
|
||||
// Additional complexity: The coordinates have to be transformed to original
|
||||
// image coordinates with denorm (if not null), scaled to match the projection
|
||||
// pix, and THEN step out 2 pixels each way from the edge to compute the
|
||||
// gradient, and tries 3 positions, each measuring the gradient over a
|
||||
// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
|
||||
// several layers of helpers below.
|
||||
int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
|
||||
bool debug) const {
|
||||
return EvaluateBoxInternal(box, denorm, debug, NULL, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
// Internal version of EvaluateBox returns the unclipped gradients as well
|
||||
// as the result of EvaluateBox.
|
||||
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
|
||||
int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
|
||||
const DENORM* denorm, bool debug,
|
||||
int* hgrad1, int* hgrad2,
|
||||
int* vgrad1, int* vgrad2) const {
|
||||
int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
|
||||
box.top(), true);
|
||||
int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
|
||||
box.bottom(), false);
|
||||
int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
|
||||
box.top(), true);
|
||||
int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
|
||||
box.bottom(), box.top(),
|
||||
false);
|
||||
int top_clipped = MAX(top_gradient, 0);
|
||||
int bottom_clipped = MAX(bottom_gradient, 0);
|
||||
int left_clipped = MAX(left_gradient, 0);
|
||||
int right_clipped = MAX(right_gradient, 0);
|
||||
if (debug) {
|
||||
tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
|
||||
top_gradient, bottom_gradient, left_gradient, right_gradient);
|
||||
box.print();
|
||||
}
|
||||
int result = MAX(top_clipped, bottom_clipped) -
|
||||
MAX(left_clipped, right_clipped);
|
||||
if (hgrad1 != NULL && hgrad2 != NULL) {
|
||||
*hgrad1 = top_gradient;
|
||||
*hgrad2 = bottom_gradient;
|
||||
}
|
||||
if (vgrad1 != NULL && vgrad2 != NULL) {
|
||||
*vgrad1 = left_gradient;
|
||||
*vgrad2 = right_gradient;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper returns the mean gradient value for the horizontal row at the given
|
||||
// y, (in the external coordinates) by subtracting the mean of the transformed
|
||||
// row 2 pixels above from the mean of the transformed row 2 pixels below.
|
||||
// This gives a positive value for a good top edge and negative for bottom.
|
||||
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
|
||||
int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
|
||||
inT16 min_x, inT16 max_x, inT16 y,
|
||||
bool best_is_max) const {
|
||||
TPOINT start_pt(min_x, y);
|
||||
TPOINT end_pt(max_x, y);
|
||||
int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
|
||||
int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
|
||||
int best_gradient = lower - upper;
|
||||
upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
|
||||
lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
|
||||
int gradient = lower - upper;
|
||||
if ((gradient > best_gradient) == best_is_max)
|
||||
best_gradient = gradient;
|
||||
upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
|
||||
lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
|
||||
gradient = lower - upper;
|
||||
if ((gradient > best_gradient) == best_is_max)
|
||||
best_gradient = gradient;
|
||||
return best_gradient;
|
||||
}
|
||||
|
||||
// Helper returns the mean gradient value for the vertical column at the
|
||||
// given x, (in the external coordinates) by subtracting the mean of the
|
||||
// transformed column 2 pixels left from the mean of the transformed column
|
||||
// 2 pixels to the right.
|
||||
// This gives a positive value for a good left edge and negative for right.
|
||||
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
|
||||
int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, inT16 x,
|
||||
inT16 min_y, inT16 max_y,
|
||||
bool best_is_max) const {
|
||||
TPOINT start_pt(x, min_y);
|
||||
TPOINT end_pt(x, max_y);
|
||||
int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
|
||||
int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
|
||||
int best_gradient = right - left;
|
||||
left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
|
||||
right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
|
||||
int gradient = right - left;
|
||||
if ((gradient > best_gradient) == best_is_max)
|
||||
best_gradient = gradient;
|
||||
left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
|
||||
right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
|
||||
gradient = right - left;
|
||||
if ((gradient > best_gradient) == best_is_max)
|
||||
best_gradient = gradient;
|
||||
return best_gradient;
|
||||
}
|
||||
|
||||
// Helper returns the mean pixel value over the line between the start_pt and
|
||||
// end_pt (inclusive), but shifted perpendicular to the line in the projection
|
||||
// image by offset pixels. For simplicity, it is assumed that the vector is
|
||||
// either nearly horizontal or nearly vertical. It works on skewed textlines!
|
||||
// The end points are in external coordinates, and will be denormalized with
|
||||
// the denorm if not NULL before further conversion to pix coordinates.
|
||||
// After all the conversions, the offset is added to the direction
|
||||
// perpendicular to the line direction. The offset is thus in projection image
|
||||
// coordinates, which allows the caller to get a guaranteed displacement
|
||||
// between pixels used to calculate gradients.
|
||||
int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
|
||||
int offset,
|
||||
TPOINT start_pt,
|
||||
TPOINT end_pt) const {
|
||||
TransformToPixCoords(denorm, &start_pt);
|
||||
TransformToPixCoords(denorm, &end_pt);
|
||||
TruncateToImageBounds(&start_pt);
|
||||
TruncateToImageBounds(&end_pt);
|
||||
int wpl = pixGetWpl(pix_);
|
||||
uinT32* data = pixGetData(pix_);
|
||||
int total = 0;
|
||||
int count = 0;
|
||||
int x_delta = end_pt.x - start_pt.x;
|
||||
int y_delta = end_pt.y - start_pt.y;
|
||||
if (abs(x_delta) >= abs(y_delta)) {
|
||||
if (x_delta == 0)
|
||||
return 0;
|
||||
// Horizontal line. Add the offset vertically.
|
||||
int x_step = x_delta > 0 ? 1 : -1;
|
||||
// Correct offset for rotation, keeping it anti-clockwise of the delta.
|
||||
offset *= x_step;
|
||||
start_pt.y += offset;
|
||||
end_pt.y += offset;
|
||||
TruncateToImageBounds(&start_pt);
|
||||
TruncateToImageBounds(&end_pt);
|
||||
x_delta = end_pt.x - start_pt.x;
|
||||
y_delta = end_pt.y - start_pt.y;
|
||||
count = x_delta * x_step + 1;
|
||||
for (int x = start_pt.x; x != end_pt.x; x += x_step) {
|
||||
int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
|
||||
total += GET_DATA_BYTE(data + wpl * y, x);
|
||||
}
|
||||
} else {
|
||||
// Vertical line. Add the offset horizontally.
|
||||
int y_step = y_delta > 0 ? 1 : -1;
|
||||
// Correct offset for rotation, keeping it anti-clockwise of the delta.
|
||||
// Pix holds the image with y=0 at the top, so the offset is negated.
|
||||
offset *= -y_step;
|
||||
start_pt.x += offset;
|
||||
end_pt.x += offset;
|
||||
TruncateToImageBounds(&start_pt);
|
||||
TruncateToImageBounds(&end_pt);
|
||||
x_delta = end_pt.x - start_pt.x;
|
||||
y_delta = end_pt.y - start_pt.y;
|
||||
count = y_delta * y_step + 1;
|
||||
for (int y = start_pt.y; y != end_pt.y; y += y_step) {
|
||||
int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
|
||||
total += GET_DATA_BYTE(data + wpl * y, x);
|
||||
}
|
||||
}
|
||||
return DivRounded(total, count);
|
||||
}
|
||||
|
||||
// Given an input pix, and a box, the sides of the box are shrunk inwards until
|
||||
// they bound any black pixels found within the original box.
|
||||
// The function converts between tesseract coords and the pix coords assuming
|
||||
// that this pix is full resolution equal in size to the original image.
|
||||
// Returns an empty box if there are no black pixels in the source box.
|
||||
static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
|
||||
int im_height = pixGetHeight(pix);
|
||||
Box* input_box = boxCreate(box.left(), im_height - box.top(),
|
||||
box.width(), box.height());
|
||||
Box* output_box = NULL;
|
||||
pixClipBoxToForeground(pix, input_box, NULL, &output_box);
|
||||
TBOX result_box;
|
||||
if (output_box != NULL) {
|
||||
l_int32 x, y, width, height;
|
||||
boxGetGeometry(output_box, &x, &y, &width, &height);
|
||||
result_box.set_left(x);
|
||||
result_box.set_right(x + width);
|
||||
result_box.set_top(im_height - y);
|
||||
result_box.set_bottom(result_box.top() - height);
|
||||
boxDestroy(&output_box);
|
||||
}
|
||||
boxDestroy(&input_box);
|
||||
return result_box;
|
||||
}
|
||||
|
||||
// Splits the given box in half at x_middle or y_middle according to split_on_x
|
||||
// and checks for nontext_map pixels in each half. Reduces the bbox so that it
|
||||
// still includes the middle point, but does not touch any fg pixels in
|
||||
// nontext_map. An empty box may be returned if there is no such box.
|
||||
static void TruncateBoxToMissNonText(int x_middle, int y_middle,
|
||||
bool split_on_x, Pix* nontext_map,
|
||||
TBOX* bbox) {
|
||||
TBOX box1(*bbox);
|
||||
TBOX box2(*bbox);
|
||||
TBOX im_box;
|
||||
if (split_on_x) {
|
||||
box1.set_right(x_middle);
|
||||
im_box = BoundsWithinBox(nontext_map, box1);
|
||||
if (!im_box.null_box()) box1.set_left(im_box.right());
|
||||
box2.set_left(x_middle);
|
||||
im_box = BoundsWithinBox(nontext_map, box2);
|
||||
if (!im_box.null_box()) box2.set_right(im_box.left());
|
||||
} else {
|
||||
box1.set_bottom(y_middle);
|
||||
im_box = BoundsWithinBox(nontext_map, box1);
|
||||
if (!im_box.null_box()) box1.set_top(im_box.bottom());
|
||||
box2.set_top(y_middle);
|
||||
im_box = BoundsWithinBox(nontext_map, box2);
|
||||
if (!im_box.null_box()) box2.set_bottom(im_box.top());
|
||||
}
|
||||
box1 += box2;
|
||||
*bbox = box1;
|
||||
}
|
||||
|
||||
|
||||
// Helper function to add 1 to a rectangle in source image coords to the
|
||||
// internal projection pix_.
|
||||
void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
|
||||
int scaled_left = ImageXToProjectionX(box.left());
|
||||
int scaled_top = ImageYToProjectionY(box.top());
|
||||
int scaled_right = ImageXToProjectionX(box.right());
|
||||
int scaled_bottom = ImageYToProjectionY(box.bottom());
|
||||
int wpl = pixGetWpl(pix_);
|
||||
uinT32* data = pixGetData(pix_) + scaled_top * wpl;
|
||||
for (int y = scaled_top; y <= scaled_bottom; ++y) {
|
||||
for (int x = scaled_left; x <= scaled_right; ++x) {
|
||||
int pixel = GET_DATA_BYTE(data, x);
|
||||
if (pixel < 255)
|
||||
SET_DATA_BYTE(data, x, pixel + 1);
|
||||
}
|
||||
data += wpl;
|
||||
}
|
||||
}
|
||||
|
||||
// Inserts a list of blobs into the projection.
|
||||
// Rotation is a multiple of 90 degrees to get from blob coords to
|
||||
// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
|
||||
// Blobs are spread horizontally or vertically according to their internal
|
||||
// flags, but the spreading is truncated by set pixels in the nontext_map
|
||||
// and also by the horizontal rule line limits on the blobs.
|
||||
void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
|
||||
const FCOORD& rotation,
|
||||
const TBOX& nontext_map_box,
|
||||
Pix* nontext_map) {
|
||||
BLOBNBOX_IT blob_it(blobs);
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
BLOBNBOX* blob = blob_it.data();
|
||||
TBOX bbox = blob->bounding_box();
|
||||
ICOORD middle((bbox.left() + bbox.right()) / 2,
|
||||
(bbox.bottom() + bbox.top()) / 2);
|
||||
bool spreading_horizontally = PadBlobBox(blob, &bbox);
|
||||
// Rotate to match the nontext_map.
|
||||
bbox.rotate(rotation);
|
||||
middle.rotate(rotation);
|
||||
if (rotation.x() == 0.0f)
|
||||
spreading_horizontally = !spreading_horizontally;
|
||||
// Clip to the image before applying the increments.
|
||||
bbox &= nontext_map_box; // This is in-place box intersection.
|
||||
// Check for image pixels before spreading.
|
||||
TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
|
||||
nontext_map, &bbox);
|
||||
if (bbox.area() > 0) {
|
||||
IncrementRectangle8Bit(bbox);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pads the bounding box of the given blob according to whether it is on
|
||||
// a horizontal or vertical text line, taking into account tab-stops near
|
||||
// the blob. Returns true if padding was in the horizontal direction.
|
||||
bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
|
||||
// Determine which direction to spread.
|
||||
// If text is well spaced out, it can be useful to pad perpendicular to
|
||||
// the textline direction, so as to ensure diacritics get absorbed
|
||||
// correctly, but if the text is tightly spaced, this will destroy the
|
||||
// blank space between textlines in the projection map, and that would
|
||||
// be very bad.
|
||||
int pad_limit = scale_factor_ * kMinLineSpacingFactor;
|
||||
int xpad = 0;
|
||||
int ypad = 0;
|
||||
bool padding_horizontally = false;
|
||||
if (blob->UniquelyHorizontal()) {
|
||||
xpad = bbox->height() * kOrientedPadFactor;
|
||||
padding_horizontally = true;
|
||||
// If the text appears to be very well spaced, pad the other direction by a
|
||||
// single pixel in the projection profile space to help join diacritics to
|
||||
// the textline.
|
||||
if ((blob->neighbour(BND_ABOVE) == NULL ||
|
||||
bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
|
||||
(blob->neighbour(BND_BELOW) == NULL ||
|
||||
bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
|
||||
ypad = scale_factor_;
|
||||
}
|
||||
} else if (blob->UniquelyVertical()) {
|
||||
ypad = bbox->width() * kOrientedPadFactor;
|
||||
if ((blob->neighbour(BND_LEFT) == NULL ||
|
||||
bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
|
||||
(blob->neighbour(BND_RIGHT) == NULL ||
|
||||
bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
|
||||
xpad = scale_factor_;
|
||||
}
|
||||
} else {
|
||||
if ((blob->neighbour(BND_ABOVE) != NULL &&
|
||||
blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
|
||||
(blob->neighbour(BND_BELOW) != NULL &&
|
||||
blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
|
||||
ypad = bbox->width() * kDefaultPadFactor;
|
||||
}
|
||||
if ((blob->neighbour(BND_RIGHT) != NULL &&
|
||||
blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
|
||||
(blob->neighbour(BND_LEFT) != NULL &&
|
||||
blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
|
||||
xpad = bbox->height() * kDefaultPadFactor;
|
||||
padding_horizontally = true;
|
||||
}
|
||||
}
|
||||
bbox->pad(xpad, ypad);
|
||||
pad_limit = scale_factor_ * kMaxTabStopOverrun;
|
||||
// Now shrink horizontally to avoid stepping more than pad_limit over a
|
||||
// tab-stop.
|
||||
if (bbox->left() < blob->left_rule() - pad_limit) {
|
||||
bbox->set_left(blob->left_rule() - pad_limit);
|
||||
}
|
||||
if (bbox->right() > blob->right_rule() + pad_limit) {
|
||||
bbox->set_right(blob->right_rule() + pad_limit);
|
||||
}
|
||||
return padding_horizontally;
|
||||
}
|
||||
|
||||
// Helper denormalizes the TPOINT with the denorm if not NULL, then
|
||||
// converts to pix_ coordinates.
|
||||
void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
|
||||
TPOINT* pt) const {
|
||||
if (denorm != NULL) {
|
||||
// Denormalize the point.
|
||||
denorm->DenormTransform(*pt, pt);
|
||||
}
|
||||
pt->x = ImageXToProjectionX(pt->x);
|
||||
pt->y = ImageYToProjectionY(pt->y);
|
||||
}
|
||||
|
||||
// Helper truncates the TPOINT to be within the pix_.
|
||||
void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
|
||||
pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
|
||||
pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
|
||||
}
|
||||
|
||||
// Transform tesseract image coordinates to coordinates used in the projection.
|
||||
int TextlineProjection::ImageXToProjectionX(int x) const {
|
||||
x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
|
||||
return x;
|
||||
}
|
||||
int TextlineProjection::ImageYToProjectionY(int y) const {
|
||||
y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
|
||||
return y;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
206
textord/textlineprojection.h
Normal file
206
textord/textlineprojection.h
Normal file
@ -0,0 +1,206 @@
|
||||
// Copyright 2011 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
|
||||
#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
|
||||
|
||||
#include "blobgrid.h" // For BlobGrid
|
||||
|
||||
class DENORM;
|
||||
struct Pix;
|
||||
struct TPOINT;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
class ColPartition;
|
||||
|
||||
// Simple class to encapsulate the computation of an image representing
|
||||
// local textline density, and function(s) to make use of it.
|
||||
// The underlying principle is that if you smear connected components
|
||||
// horizontally (vertically for components on a vertically written textline)
|
||||
// and count the number of smeared components in an image, then the resulting
|
||||
// image shows the density of the textlines at each image position.
|
||||
class TextlineProjection {
|
||||
public:
|
||||
// The down-scaling factor is computed to obtain a projection resolution
|
||||
// of about 100 dpi, whatever the input.
|
||||
explicit TextlineProjection(int resolution);
|
||||
~TextlineProjection();
|
||||
|
||||
// Build the projection profile given the input_block containing lists of
|
||||
// blobs, a rotation to convert to image coords,
|
||||
// and a full-resolution nontext_map, marking out areas to avoid.
|
||||
// During construction, we have the following assumptions:
|
||||
// The rotation is a multiple of 90 degrees, ie no deskew yet.
|
||||
// The blobs have had their left and right rules set to also limit
|
||||
// the range of projection.
|
||||
void ConstructProjection(TO_BLOCK* input_block,
|
||||
const FCOORD& rotation, Pix* nontext_map);
|
||||
|
||||
// Display the blobs in the window colored according to textline quality.
|
||||
void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
|
||||
|
||||
// Moves blobs that look like they don't sit well on a textline from the
|
||||
// input blobs list to the output small_blobs list.
|
||||
// This gets them away from initial textline finding to stop diacritics
|
||||
// from forming incorrect textlines. (Introduced mainly to fix Thai.)
|
||||
void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
|
||||
BLOBNBOX_LIST* small_blobs) const;
|
||||
|
||||
// Create a window and display the projection in it.
|
||||
void DisplayProjection() const;
|
||||
|
||||
// Compute the distance of the box from the partition using curved projection
|
||||
// space. As DistanceOfBoxFromBox, except that the direction is taken from
|
||||
// the ColPartition and the median bounds of the ColPartition are used as
|
||||
// the to_box.
|
||||
int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
|
||||
const DENORM* denorm, bool debug) const;
|
||||
|
||||
// Compute the distance from the from_box to the to_box using curved
|
||||
// projection space. Separation that involves a decrease in projection
|
||||
// density (moving from the from_box to the to_box) is weighted more heavily
|
||||
// than constant density, and an increase is weighted less.
|
||||
// If horizontal_textline is true, then curved space is used vertically,
|
||||
// as for a diacritic on the edge of a textline.
|
||||
// The projection uses original image coords, so denorm is used to get
|
||||
// back to the image coords from box/part space.
|
||||
int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
|
||||
bool horizontal_textline,
|
||||
const DENORM* denorm, bool debug) const;
|
||||
|
||||
// Compute the distance between (x, y1) and (x, y2) using the rule that
|
||||
// a decrease in textline density is weighted more heavily than an increase.
|
||||
// The coordinates are in source image space, ie processed by any denorm
|
||||
// already, but not yet scaled by scale_factor_.
|
||||
// Going from the outside of a textline to the inside should measure much
|
||||
// less distance than going from the inside of a textline to the outside.
|
||||
int VerticalDistance(bool debug, int x, int y1, int y2) const;
|
||||
|
||||
// Compute the distance between (x1, y) and (x2, y) using the rule that
|
||||
// a decrease in textline density is weighted more heavily than an increase.
|
||||
int HorizontalDistance(bool debug, int x1, int x2, int y) const;
|
||||
|
||||
// Returns true if the blob appears to be outside of a horizontal textline.
|
||||
// Such blobs are potentially diacritics (even if large in Thai) and should
|
||||
// be kept away from initial textline finding.
|
||||
bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
|
||||
bool debug) const;
|
||||
|
||||
// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
|
||||
// but uses the median top/bottom for horizontal and median left/right for
|
||||
// vertical instead of the bounding box edges.
|
||||
// Evaluates for both horizontal and vertical and returns the best result,
|
||||
// with a positive value for horizontal and a negative value for vertical.
|
||||
int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
|
||||
bool debug) const;
|
||||
|
||||
// Computes the mean projection gradients over the horizontal and vertical
|
||||
// edges of the box:
|
||||
// -h-h-h-h-h-h
|
||||
// |------------| mean=htop -v|+v--------+v|-v
|
||||
// |+h+h+h+h+h+h| -v|+v +v|-v
|
||||
// | | -v|+v +v|-v
|
||||
// | box | -v|+v box +v|-v
|
||||
// | | -v|+v +v|-v
|
||||
// |+h+h+h+h+h+h| -v|+v +v|-v
|
||||
// |------------| mean=hbot -v|+v--------+v|-v
|
||||
// -h-h-h-h-h-h
|
||||
// mean=vleft mean=vright
|
||||
//
|
||||
// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
|
||||
// for a horizontal textline, a negative number for a vertical textline,
|
||||
// and near zero for undecided. Undecided is most likely non-text.
|
||||
int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
|
||||
|
||||
private:
|
||||
// Internal version of EvaluateBox returns the unclipped gradients as well
|
||||
// as the result of EvaluateBox.
|
||||
// hgrad1 and hgrad2 are the gradients for the horizontal textline.
|
||||
int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
|
||||
int* hgrad1, int* hgrad2,
|
||||
int* vgrad1, int* vgrad2) const;
|
||||
|
||||
// Helper returns the mean gradient value for the horizontal row at the given
|
||||
// y, (in the external coordinates) by subtracting the mean of the transformed
|
||||
// row 2 pixels above from the mean of the transformed row 2 pixels below.
|
||||
// This gives a positive value for a good top edge and negative for bottom.
|
||||
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
|
||||
int BestMeanGradientInRow(const DENORM* denorm, inT16 min_x, inT16 max_x,
|
||||
inT16 y, bool best_is_max) const;
|
||||
|
||||
// Helper returns the mean gradient value for the vertical column at the
|
||||
// given x, (in the external coordinates) by subtracting the mean of the
|
||||
// transformed column 2 pixels left from the mean of the transformed column
|
||||
// 2 pixels to the right.
|
||||
// This gives a positive value for a good left edge and negative for right.
|
||||
// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
|
||||
int BestMeanGradientInColumn(const DENORM* denorm, inT16 x, inT16 min_y,
|
||||
inT16 max_y, bool best_is_max) const;
|
||||
|
||||
// Helper returns the mean pixel value over the line between the start_pt and
|
||||
// end_pt (inclusive), but shifted perpendicular to the line in the projection
|
||||
// image by offset pixels. For simplicity, it is assumed that the vector is
|
||||
// either nearly horizontal or nearly vertical. It works on skewed textlines!
|
||||
// The end points are in external coordinates, and will be denormalized with
|
||||
// the denorm if not NULL before further conversion to pix coordinates.
|
||||
// After all the conversions, the offset is added to the direction
|
||||
// perpendicular to the line direction. The offset is thus in projection image
|
||||
// coordinates, which allows the caller to get a guaranteed displacement
|
||||
// between pixels used to calculate gradients.
|
||||
int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
|
||||
TPOINT start_pt, TPOINT end_pt) const;
|
||||
|
||||
// Helper function to add 1 to a rectangle in source image coords to the
|
||||
// internal projection pix_.
|
||||
void IncrementRectangle8Bit(const TBOX& box);
|
||||
// Inserts a list of blobs into the projection.
|
||||
// Rotation is a multiple of 90 degrees to get from blob coords to
|
||||
// nontext_map coords, image_box is the bounds of the nontext_map.
|
||||
// Blobs are spread horizontally or vertically according to their internal
|
||||
// flags, but the spreading is truncated by set pixels in the nontext_map
|
||||
// and also by the horizontal rule line limits on the blobs.
|
||||
void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
|
||||
const TBOX& image_box, Pix* nontext_map);
|
||||
// Pads the bounding box of the given blob according to whether it is on
|
||||
// a horizontal or vertical text line, taking into account tab-stops near
|
||||
// the blob. Returns true if padding was in the horizontal direction.
|
||||
bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
|
||||
|
||||
// Helper denormalizes the TPOINT with the denorm if not NULL, then
|
||||
// converts to pix_ coordinates.
|
||||
void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
|
||||
|
||||
// Helper truncates the TPOINT to be within the pix_.
|
||||
void TruncateToImageBounds(TPOINT* pt) const;
|
||||
|
||||
// Transform tesseract coordinates to coordinates used in the pix.
|
||||
int ImageXToProjectionX(int x) const;
|
||||
int ImageYToProjectionY(int y) const;
|
||||
|
||||
// The down-sampling scale factor used in building the image.
|
||||
int scale_factor_;
|
||||
// The blob coordinates of the top-left (origin of the pix_) in tesseract
|
||||
// coordinates. Used to transform the bottom-up tesseract coordinates to
|
||||
// the top-down coordinates of the pix.
|
||||
int x_origin_;
|
||||
int y_origin_;
|
||||
// The image of horizontally smeared blob boxes summed to provide a
|
||||
// textline density map. As with a horizontal projection, the map has
|
||||
// dips in the gaps between textlines.
|
||||
Pix* pix_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
|
@ -27,7 +27,7 @@
|
||||
namespace tesseract {
|
||||
|
||||
Textord::Textord(CCStruct* ccstruct)
|
||||
: ccstruct_(ccstruct),
|
||||
: ccstruct_(ccstruct), use_cjk_fp_model_(false),
|
||||
// makerow.cpp ///////////////////////////////////////////
|
||||
BOOL_MEMBER(textord_single_height_mode, false,
|
||||
"Script has no xheight, so use a single mode",
|
||||
@ -317,6 +317,13 @@ void Textord::TextordPage(PageSegMode pageseg_mode,
|
||||
to_block->get_rows(), to_block->block->row_list());
|
||||
}
|
||||
cleanup_blocks(blocks); // Remove empties.
|
||||
|
||||
// Compute the margins for each row in the block, to be used later for
|
||||
// paragraph detection.
|
||||
BLOCK_IT b_it(blocks);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
b_it.data()->compute_row_margins();
|
||||
}
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
close_to_win();
|
||||
#endif
|
||||
|
@ -50,6 +50,13 @@ class Textord {
|
||||
// than one, clean up and leave only the best.
|
||||
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
|
||||
|
||||
bool use_cjk_fp_model() const {
|
||||
return use_cjk_fp_model_;
|
||||
}
|
||||
void set_use_cjk_fp_model(bool flag) {
|
||||
use_cjk_fp_model_ = flag;
|
||||
}
|
||||
|
||||
// tospace.cpp ///////////////////////////////////////////
|
||||
void to_spacing(
|
||||
ICOORD page_tr, //topright of page
|
||||
@ -64,6 +71,7 @@ class Textord {
|
||||
// tordmain.cpp ///////////////////////////////////////////
|
||||
void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
|
||||
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on);
|
||||
|
||||
private:
|
||||
// For underlying memory management and other utilities.
|
||||
CCStruct* ccstruct_;
|
||||
@ -71,6 +79,8 @@ class Textord {
|
||||
// The size of the input image.
|
||||
ICOORD page_tr_;
|
||||
|
||||
bool use_cjk_fp_model_;
|
||||
|
||||
// makerow.cpp ///////////////////////////////////////////
|
||||
// Make the textlines inside each block.
|
||||
void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,
|
||||
|
@ -112,9 +112,11 @@ void compute_fixed_pitch(ICOORD page_tr, // top right
|
||||
}
|
||||
|
||||
block_index = 1;
|
||||
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
||||
block_it.forward ()) {
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
|
||||
block_it.forward()) {
|
||||
block = block_it.data ();
|
||||
POLY_BLOCK* pb = block->block->poly_block();
|
||||
if (pb != NULL && !pb->IsText()) continue; // Non-text doesn't exist!
|
||||
row_it.set_to_list (block->get_rows ());
|
||||
row_index = 1;
|
||||
for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
|
||||
@ -166,9 +168,11 @@ void fix_row_pitch(TO_ROW *bad_row, // row to fix
|
||||
block_stats.set_range (0, maxwidth);
|
||||
like_stats.set_range (0, maxwidth);
|
||||
block_index = 1;
|
||||
for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
|
||||
block_it.forward ()) {
|
||||
block = block_it.data ();
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list();
|
||||
block_it.forward()) {
|
||||
block = block_it.data();
|
||||
POLY_BLOCK* pb = block->block->poly_block();
|
||||
if (pb != NULL && !pb->IsText()) continue; // Non text doesn't exist!
|
||||
row_index = 1;
|
||||
row_it.set_to_list (block->get_rows ());
|
||||
for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
|
||||
@ -568,7 +572,8 @@ BOOL8 try_rows_fixed( //find line stats
|
||||
row = row_it.data ();
|
||||
ASSERT_HOST (row->xheight > 0);
|
||||
maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
|
||||
if (row->fixed_pitch > 0 && fixed_pitch_row (row, block_index)) {
|
||||
if (row->fixed_pitch > 0 &&
|
||||
fixed_pitch_row(row, block->block, block_index)) {
|
||||
if (row->fixed_pitch == 0) {
|
||||
lower = row->pr_nonsp;
|
||||
upper = row->pr_space;
|
||||
@ -971,9 +976,9 @@ BOOL8 find_row_pitch( //find lines
|
||||
* The larger threshold is the word gap threshold.
|
||||
**********************************************************************/
|
||||
|
||||
BOOL8 fixed_pitch_row( //find lines
|
||||
TO_ROW *row, //row to do
|
||||
inT32 block_index //block_number
|
||||
BOOL8 fixed_pitch_row(TO_ROW *row, // row to do
|
||||
BLOCK* block,
|
||||
inT32 block_index // block_number
|
||||
) {
|
||||
const char *res_string; //pitch result
|
||||
inT16 mid_cuts; //no of cheap cuts
|
||||
@ -984,7 +989,8 @@ BOOL8 fixed_pitch_row( //find lines
|
||||
non_space = row->fp_nonsp;
|
||||
if (non_space > row->fixed_pitch)
|
||||
non_space = row->fixed_pitch;
|
||||
if (textord_all_prop) {
|
||||
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
|
||||
if (textord_all_prop || (pb != NULL && !pb->IsText())) {
|
||||
// Set the decision to definitely proportional.
|
||||
pitch_sd = textord_words_def_prop * row->fixed_pitch;
|
||||
row->pitch_decision = PITCH_DEF_PROP;
|
||||
@ -1755,6 +1761,10 @@ void print_pitch_sd( //find fp cells
|
||||
**********************************************************************/
|
||||
void find_repeated_chars(TO_BLOCK *block, // Block to search.
|
||||
BOOL8 testing_on) { // Debug mode.
|
||||
POLY_BLOCK* pb = block->block->poly_block();
|
||||
if (pb != NULL && !pb->IsText())
|
||||
return; // Don't find repeated chars in non-text blocks.
|
||||
|
||||
TO_ROW *row;
|
||||
BLOBNBOX_IT box_it;
|
||||
BLOBNBOX_IT search_it; // forward search
|
||||
|
@ -107,6 +107,7 @@ BOOL8 find_row_pitch( //find lines
|
||||
);
|
||||
BOOL8 fixed_pitch_row( //find lines
|
||||
TO_ROW *row, //row to do
|
||||
BLOCK* block,
|
||||
inT32 block_index //block_number
|
||||
);
|
||||
BOOL8 count_pitch_stats( //find lines
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "pitsync1.h"
|
||||
#include "tovars.h"
|
||||
#include "topitch.h"
|
||||
#include "cjkpitch.h"
|
||||
#include "textord.h"
|
||||
#include "fpchop.h"
|
||||
#include "wordseg.h"
|
||||
@ -101,7 +102,6 @@ void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
|
||||
*
|
||||
* Arrange the blobs into words.
|
||||
*/
|
||||
|
||||
void make_words(tesseract::Textord *textord,
|
||||
ICOORD page_tr, // top right
|
||||
float gradient, // page skew
|
||||
@ -110,8 +110,12 @@ void make_words(tesseract::Textord *textord,
|
||||
TO_BLOCK_IT block_it; // iterator
|
||||
TO_BLOCK *block; // current block
|
||||
|
||||
compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
|
||||
!(BOOL8) textord_test_landscape);
|
||||
if (textord->use_cjk_fp_model()) {
|
||||
compute_fixed_pitch_cjk(page_tr, port_blocks);
|
||||
} else {
|
||||
compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
|
||||
!(BOOL8) textord_test_landscape);
|
||||
}
|
||||
textord->to_spacing(page_tr, port_blocks);
|
||||
block_it.set_to_list(port_blocks);
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||
@ -525,24 +529,26 @@ void make_real_words(
|
||||
row = row_it.data ();
|
||||
if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
|
||||
real_row = make_rep_words (row, block);
|
||||
}
|
||||
else if (!row->blob_list()->empty()) {
|
||||
} else if (!row->blob_list()->empty()) {
|
||||
// In a fixed pitch document, some lines may be detected as fixed pitch
|
||||
// while others don't, and will go through different path.
|
||||
// For non-space delimited language like CJK, fixed pitch chop always
|
||||
// leave the entire line as one word. We can force consistent chopping
|
||||
// with force_make_prop_words flag.
|
||||
POLY_BLOCK* pb = block->block->poly_block();
|
||||
if (textord_chopper_test) {
|
||||
real_row = textord->make_blob_words (row, rotation);
|
||||
} else if (textord_force_make_prop_words ||
|
||||
row->pitch_decision == PITCH_DEF_PROP ||
|
||||
row->pitch_decision == PITCH_CORR_PROP) {
|
||||
(pb != NULL && !pb->IsText()) ||
|
||||
row->pitch_decision == PITCH_DEF_PROP ||
|
||||
row->pitch_decision == PITCH_CORR_PROP) {
|
||||
real_row = textord->make_prop_words (row, rotation);
|
||||
} else if (row->pitch_decision == PITCH_DEF_FIXED ||
|
||||
row->pitch_decision == PITCH_CORR_FIXED) {
|
||||
real_row = fixed_pitch_words (row, rotation);
|
||||
} else
|
||||
} else {
|
||||
ASSERT_HOST(FALSE);
|
||||
}
|
||||
}
|
||||
if (real_row != NULL) {
|
||||
//put row in block
|
||||
|
@ -108,9 +108,11 @@ void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
|
||||
ColPartition* next_block_part = part_it_.data();
|
||||
const TBOX& part_box = part->bounding_box();
|
||||
const TBOX& next_box = next_block_part->bounding_box();
|
||||
|
||||
// In addition to the same type, the next box must not be above the
|
||||
// current box, nor (if image) too far below.
|
||||
if (next_block_part->type() == part->type() &&
|
||||
PolyBlockType type = part->type(), next_type = next_block_part->type();
|
||||
if (ColPartition::TypesSimilar(type, next_type) &&
|
||||
next_box.bottom() <= part_box.top() &&
|
||||
(text_block ||
|
||||
part_box.bottom() - next_box.top() < part_box.height()))
|
||||
@ -139,4 +141,3 @@ void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user