mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
38a6b18a5f
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@363 d0cd1f9f-072b-0410-8dd7-cf729c803f20
355 lines
14 KiB
C++
355 lines
14 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: linefind.cpp
|
|
// Description: Class to find vertical lines in an image and create
|
|
// a corresponding list of empty blobs.
|
|
// Author: Ray Smith
|
|
// Created: Thu Mar 20 09:49:01 PDT 2008
|
|
//
|
|
// (C) Copyright 2008, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable:4244) // Conversion warnings
|
|
#endif
|
|
|
|
#include "linefind.h"
|
|
#include "alignedblob.h"
|
|
#include "tabvector.h"
|
|
#include "blobbox.h"
|
|
#include "edgblob.h"
|
|
// This entire file is dependent upon leptonica. If you don't have it,
|
|
// then the code doesn't do anything useful.
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config_auto.h"
|
|
#endif
|
|
#ifdef HAVE_LIBLEPT
|
|
#include "allheaders.h"
|
|
#endif
|
|
|
|
BOOL_VAR(textord_tabfind_show_vlines, false, "Show vertical rule lines");
|
|
|
|
namespace tesseract {
|
|
|
|
// Denominator of resolution makes max pixel width to allow thin lines.
|
|
const int kThinLineFraction = 30;
|
|
// Denominator of resolution makes min pixels to demand line lengths to be.
|
|
const int kMinLineLengthFraction = 8;
|
|
// Spacing of cracks across the page to break up tall vertical lines.
|
|
const int kCrackSpacing = 100;
|
|
// Grid size used by line finder. Not very critical.
|
|
const int kLineFindGridSize = 50;
|
|
|
|
// Finds vertical line objects in the given pix.
|
|
// Uses the given resolution to determine size thresholds instead of any
|
|
// that may be present in the pix.
|
|
// The output vertical_x and vertical_y contain a sum of the output vectors,
|
|
// thereby giving the mean vertical direction.
|
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
|
// having no boxes, as there is no need to refit or merge separator lines.
|
|
void LineFinder::FindVerticalLines(int resolution, Pix* pix,
|
|
int* vertical_x, int* vertical_y,
|
|
TabVector_LIST* vectors) {
|
|
#ifdef HAVE_LIBLEPT
|
|
Pix* line_pix;
|
|
Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix);
|
|
C_BLOB_LIST line_cblobs;
|
|
int width = pixGetWidth(pix);
|
|
int height = pixGetHeight(pix);
|
|
ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs);
|
|
// Make the BLOBNBOXes from the C_BLOBs.
|
|
BLOBNBOX_LIST line_bblobs;
|
|
C_BLOB_IT blob_it(&line_cblobs);
|
|
BLOBNBOX_IT bbox_it(&line_bblobs);
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
C_BLOB* cblob = blob_it.data();
|
|
BLOBNBOX* bblob = new BLOBNBOX(cblob);
|
|
bbox_it.add_to_end(bblob);
|
|
}
|
|
ICOORD bleft(0, 0);
|
|
ICOORD tright(width, height);
|
|
FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
|
|
if (!vectors->empty()) {
|
|
// Some lines were found, so erase the unused blobs from the line image
|
|
// and then subtract the line image from the source.
|
|
bbox_it.move_to_first();
|
|
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
|
BLOBNBOX* blob = bbox_it.data();
|
|
if (blob->left_tab_type() == TT_UNCONFIRMED) {
|
|
const TBOX& box = blob->bounding_box();
|
|
Box* pixbox = boxCreate(box.left(), height - box.top(),
|
|
box.width(), box.height());
|
|
pixClearInRect(line_pix, pixbox);
|
|
boxDestroy(&pixbox);
|
|
}
|
|
}
|
|
pixDilateBrick(line_pix, line_pix, 1, 3);
|
|
pixSubtract(pix, pix, line_pix);
|
|
if (textord_tabfind_show_vlines)
|
|
pixWrite("vlinesclean.png", line_pix, IFF_PNG);
|
|
ICOORD vertical;
|
|
vertical.set_with_shrink(*vertical_x, *vertical_y);
|
|
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
|
}
|
|
pixDestroy(&line_pix);
|
|
#endif
|
|
}
|
|
|
|
// Finds horizontal line objects in the given pix.
|
|
// Uses the given resolution to determine size thresholds instead of any
|
|
// that may be present in the pix.
|
|
// The output vectors are owned by the list and Frozen (cannot refit) by
|
|
// having no boxes, as there is no need to refit or merge separator lines.
|
|
void LineFinder::FindHorizontalLines(int resolution, Pix* pix,
|
|
TabVector_LIST* vectors) {
|
|
#ifdef HAVE_LIBLEPT
|
|
Pix* line_pix;
|
|
Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix);
|
|
C_BLOB_LIST line_cblobs;
|
|
int width = pixGetWidth(pix);
|
|
int height = pixGetHeight(pix);
|
|
ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs);
|
|
// Make the BLOBNBOXes from the C_BLOBs.
|
|
BLOBNBOX_LIST line_bblobs;
|
|
C_BLOB_IT blob_it(&line_cblobs);
|
|
BLOBNBOX_IT bbox_it(&line_bblobs);
|
|
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
|
C_BLOB* cblob = blob_it.data();
|
|
BLOBNBOX* bblob = new BLOBNBOX(cblob);
|
|
bbox_it.add_to_end(bblob);
|
|
}
|
|
ICOORD bleft(0, 0);
|
|
ICOORD tright(height, width);
|
|
int vertical_x, vertical_y;
|
|
FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
|
|
vectors);
|
|
if (!vectors->empty()) {
|
|
// Some lines were found, so erase the unused blobs from the line image
|
|
// and then subtract the line image from the source.
|
|
bbox_it.move_to_first();
|
|
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
|
BLOBNBOX* blob = bbox_it.data();
|
|
if (blob->left_tab_type() == TT_UNCONFIRMED) {
|
|
const TBOX& box = blob->bounding_box();
|
|
// Coords are in tess format so filp x and y and then covert
|
|
// to leptonica by height -y.
|
|
Box* pixbox = boxCreate(box.bottom(), height - box.right(),
|
|
box.height(), box.width());
|
|
pixClearInRect(line_pix, pixbox);
|
|
boxDestroy(&pixbox);
|
|
}
|
|
}
|
|
pixDilateBrick(line_pix, line_pix, 3, 1);
|
|
pixSubtract(pix, pix, line_pix);
|
|
if (textord_tabfind_show_vlines)
|
|
pixWrite("hlinesclean.png", line_pix, IFF_PNG);
|
|
ICOORD vertical;
|
|
vertical.set_with_shrink(vertical_x, vertical_y);
|
|
TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
|
|
// Iterate the vectors to flip them.
|
|
TabVector_IT h_it(vectors);
|
|
for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
|
|
h_it.data()->XYFlip();
|
|
}
|
|
}
|
|
pixDestroy(&line_pix);
|
|
#endif
|
|
}
|
|
|
|
// Converts the Boxa array to a list of C_BLOB, getting rid of severely
|
|
// overlapping outlines and those that are children of a bigger one.
|
|
// The output is a list of C_BLOBs that are owned by the list.
|
|
// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
|
|
// bounding boxes. The Boxa is consumed and destroyed.
|
|
void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
|
|
Boxa** boxes, C_BLOB_LIST* blobs) {
|
|
#ifdef HAVE_LIBLEPT
|
|
C_OUTLINE_LIST outlines;
|
|
C_OUTLINE_IT ol_it = &outlines;
|
|
// Iterate the boxes to convert to outlines.
|
|
int nboxes = boxaGetCount(*boxes);
|
|
for (int i = 0; i < nboxes; ++i) {
|
|
l_int32 x, y, width, height;
|
|
boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
|
|
// Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
|
|
// as there is no outline, just a bounding box, but with some very
|
|
// small changes to coutln.cpp, it works nicely.
|
|
ICOORD top_left(x, image_height - y);
|
|
ICOORD bot_right(x + width, image_height - (y + height));
|
|
CRACKEDGE startpt;
|
|
startpt.pos = top_left;
|
|
C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
|
|
ol_it.add_after_then_move(outline);
|
|
}
|
|
// Use outlines_to_blobs to convert the outlines to blobs and find
|
|
// overlapping and contained objects. The output list of blobs in the block
|
|
// has all the bad ones filtered out and deleted.
|
|
BLOCK block;
|
|
ICOORD page_tl(0, 0);
|
|
ICOORD page_br(image_width, image_height);
|
|
outlines_to_blobs(&block, page_tl, page_br, &outlines);
|
|
// Transfer the created blobs to the output list.
|
|
C_BLOB_IT blob_it(blobs);
|
|
blob_it.add_list_after(block.blob_list());
|
|
// The boxes aren't needed any more.
|
|
boxaDestroy(boxes);
|
|
#endif
|
|
}
|
|
|
|
// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
|
|
// are the bounds of the image on which the input line_bblobs were found.
|
|
// The input line_bblobs list is const really.
|
|
// The output vertical_x and vertical_y are the total of all the vectors.
|
|
// The output list of TabVector makes no reference to the input BLOBNBOXes.
|
|
void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
|
|
BLOBNBOX_LIST* line_bblobs,
|
|
int* vertical_x, int* vertical_y,
|
|
TabVector_LIST* vectors) {
|
|
BLOBNBOX_IT bbox_it(line_bblobs);
|
|
int b_count = 0;
|
|
// Put all the blobs into the grid to find the lines, and move the blobs
|
|
// to the output lists.
|
|
AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
|
|
for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
|
|
BLOBNBOX* bblob = bbox_it.data();
|
|
bblob->set_left_tab_type(TT_UNCONFIRMED);
|
|
bblob->set_left_rule(bleft.x());
|
|
bblob->set_right_rule(tright.x());
|
|
bblob->set_left_crossing_rule(bleft.x());
|
|
bblob->set_right_crossing_rule(tright.x());
|
|
blob_grid.InsertBBox(false, true, bblob);
|
|
++b_count;
|
|
}
|
|
if (textord_debug_tabfind)
|
|
tprintf("Inserted %d line blobs into grid\n", b_count);
|
|
if (b_count == 0)
|
|
return;
|
|
|
|
// Search the entire grid, looking for vertical line vectors.
|
|
GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid);
|
|
BLOBNBOX* bbox;
|
|
TabVector_IT vector_it(vectors);
|
|
*vertical_x = 0;
|
|
*vertical_y = 1;
|
|
lsearch.StartFullSearch();
|
|
while ((bbox = lsearch.NextFullSearch()) != NULL) {
|
|
if (bbox->left_tab_type() == TT_UNCONFIRMED) {
|
|
const TBOX& box = bbox->bounding_box();
|
|
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
|
|
tprintf("Finding line vector starting at bbox (%d,%d)\n",
|
|
box.left(), box.bottom());
|
|
AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
|
|
TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
|
|
vertical_x,
|
|
vertical_y);
|
|
if (vector != NULL) {
|
|
vector->Freeze();
|
|
vector_it.add_to_end(vector);
|
|
}
|
|
}
|
|
}
|
|
ScrollView* line_win = NULL;
|
|
if (textord_tabfind_show_vlines) {
|
|
line_win = blob_grid.MakeWindow(0, 50, "Vlines");
|
|
blob_grid.DisplayBoxes(line_win);
|
|
line_win = blob_grid.DisplayTabs("Vlines", line_win);
|
|
}
|
|
}
|
|
|
|
// Get a set of bounding boxes of possible vertical lines in the image.
|
|
// The input resolution overrides any resolution set in src_pix.
|
|
// The output line_pix contains just all the detected lines.
|
|
Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
|
|
#ifdef HAVE_LIBLEPT
|
|
// Remove any parts of 1 inch/kThinLineFraction wide or more, by opening
|
|
// away the thin lines and subtracting what's left.
|
|
// This is very generous and will leave in even quite wide lines.
|
|
Pix* pixt1 = pixOpenBrick(NULL, src_pix, resolution / kThinLineFraction, 1);
|
|
pixSubtract(pixt1, src_pix, pixt1);
|
|
// Spread sideways to allow for some skew.
|
|
Pix* pixt2 = pixDilateBrick(NULL, pixt1, 3, 1);
|
|
// Now keep only tall stuff of height at least 1 inch/kMinLineLengthFraction.
|
|
pixOpenBrick(pixt1, pixt2, 1, resolution / kMinLineLengthFraction);
|
|
pixDestroy(&pixt2);
|
|
// Put a single pixel crack in every line at an arbitrary spacing,
|
|
// so they break up and the bounding boxes can be used to get the
|
|
// direction accurately enough without needing outlines.
|
|
int wpl = pixGetWpl(pixt1);
|
|
int height = pixGetHeight(pixt1);
|
|
l_uint32* data = pixGetData(pixt1);
|
|
for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
|
|
memset(data + wpl * y, 0, wpl * sizeof(*data));
|
|
}
|
|
if (textord_tabfind_show_vlines)
|
|
pixWrite("vlines.png", pixt1, IFF_PNG);
|
|
Boxa* boxa = pixConnComp(pixt1, NULL, 8);
|
|
*line_pix = pixt1;
|
|
return boxa;
|
|
#else
|
|
return NULL;
|
|
#endif
|
|
}
|
|
|
|
// Get a set of bounding boxes of possible horizontal lines in the image.
|
|
// The input resolution overrides any resolution set in src_pix.
|
|
// The output line_pix contains just all the detected lines.
|
|
// The output boxes undergo the transformation (x,y)->(height-y,x) so the
|
|
// lines can be found with a vertical line finder afterwards.
|
|
// This transformation allows a simple x/y flip to reverse it in tesseract
|
|
// coordinates and it is faster to flip the lines than rotate the image.
|
|
Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
|
|
#ifdef HAVE_LIBLEPT
|
|
// Remove any parts of 1 inch/kThinLineFraction high or more, by opening
|
|
// away the thin lines and subtracting what's left.
|
|
// This is very generous and will leave in even quite wide lines.
|
|
Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction);
|
|
pixSubtract(pixt1, src_pix, pixt1);
|
|
// Spread vertically to allow for some skew.
|
|
Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3);
|
|
// Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction.
|
|
pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1);
|
|
pixDestroy(&pixt2);
|
|
// Put a single pixel crack in every line at an arbitrary spacing,
|
|
// so they break up and the bounding boxes can be used to get the
|
|
// direction accurately enough without needing outlines.
|
|
int wpl = pixGetWpl(pixt1);
|
|
int width = pixGetWidth(pixt1);
|
|
int height = pixGetHeight(pixt1);
|
|
l_uint32* data = pixGetData(pixt1);
|
|
for (int y = 0; y < height; ++y, data += wpl) {
|
|
for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
|
|
CLEAR_DATA_BIT(data, x);
|
|
}
|
|
}
|
|
if (textord_tabfind_show_vlines)
|
|
pixWrite("hlines.png", pixt1, IFF_PNG);
|
|
Boxa* boxa = pixConnComp(pixt1, NULL, 8);
|
|
*line_pix = pixt1;
|
|
|
|
// Iterate the boxes to flip x and y.
|
|
int nboxes = boxaGetCount(boxa);
|
|
for (int i = 0; i < nboxes; ++i) {
|
|
l_int32 x, y, box_width, box_height;
|
|
boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height);
|
|
Box* box = boxCreate(height - (y + box_height),
|
|
width - (x + box_width), box_height, box_width);
|
|
boxaReplaceBox(boxa, i, box);
|
|
}
|
|
return boxa;
|
|
#else
|
|
return NULL;
|
|
#endif
|
|
}
|
|
|
|
} // namespace tesseract.
|
|
|