/////////////////////////////////////////////////////////////////////// // File: linefind.cpp // Description: Class to find vertical lines in an image and create // a corresponding list of empty blobs. // Author: Ray Smith // Created: Thu Mar 20 09:49:01 PDT 2008 // // (C) Copyright 2008, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // /////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable:4244) // Conversion warnings #endif #include "linefind.h" #include "alignedblob.h" #include "tabvector.h" #include "blobbox.h" #include "edgblob.h" // This entire file is dependent upon leptonica. If you don't have it, // then the code doesn't do anything useful. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif #include "allheaders.h" BOOL_VAR(textord_tabfind_show_vlines, false, "Show vertical rule lines"); namespace tesseract { /// Denominator of resolution makes max pixel width to allow thin lines. const int kThinLineFraction = 30; /// Denominator of resolution makes min pixels to demand line lengths to be. const int kMinLineLengthFraction = 8; /// Spacing of cracks across the page to break up tall vertical lines. const int kCrackSpacing = 100; /// Grid size used by line finder. Not very critical. const int kLineFindGridSize = 50; // Finds vertical line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vertical_x and vertical_y contain a sum of the output vectors, // thereby giving the mean vertical direction. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindVerticalLines(int resolution, Pix* pix, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { Pix* line_pix; Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(width, height); FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); Box* pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 1, 3); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("vlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(*vertical_x, *vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); } pixDestroy(&line_pix); } // Finds horizontal line objects in the given pix. // Uses the given resolution to determine size thresholds instead of any // that may be present in the pix. // The output vectors are owned by the list and Frozen (cannot refit) by // having no boxes, as there is no need to refit or merge separator lines. void LineFinder::FindHorizontalLines(int resolution, Pix* pix, TabVector_LIST* vectors) { Pix* line_pix; Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix); C_BLOB_LIST line_cblobs; int width = pixGetWidth(pix); int height = pixGetHeight(pix); ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs); // Make the BLOBNBOXes from the C_BLOBs. BLOBNBOX_LIST line_bblobs; C_BLOB_IT blob_it(&line_cblobs); BLOBNBOX_IT bbox_it(&line_bblobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { C_BLOB* cblob = blob_it.data(); BLOBNBOX* bblob = new BLOBNBOX(cblob); bbox_it.add_to_end(bblob); } ICOORD bleft(0, 0); ICOORD tright(height, width); int vertical_x, vertical_y; FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors); if (!vectors->empty()) { // Some lines were found, so erase the unused blobs from the line image // and then subtract the line image from the source. bbox_it.move_to_first(); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* blob = bbox_it.data(); if (blob->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = blob->bounding_box(); // Coords are in tess format so filp x and y and then covert // to leptonica by height -y. Box* pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width()); pixClearInRect(line_pix, pixbox); boxDestroy(&pixbox); } } pixDilateBrick(line_pix, line_pix, 3, 1); pixSubtract(pix, pix, line_pix); if (textord_tabfind_show_vlines) pixWrite("hlinesclean.png", line_pix, IFF_PNG); ICOORD vertical; vertical.set_with_shrink(vertical_x, vertical_y); TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); // Iterate the vectors to flip them. TabVector_IT h_it(vectors); for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { h_it.data()->XYFlip(); } } pixDestroy(&line_pix); } // Converts the Boxa array to a list of C_BLOB, getting rid of severely // overlapping outlines and those that are children of a bigger one. // The output is a list of C_BLOBs that are owned by the list. // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty // bounding boxes. The Boxa is consumed and destroyed. void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, Boxa** boxes, C_BLOB_LIST* blobs) { C_OUTLINE_LIST outlines; C_OUTLINE_IT ol_it = &outlines; // Iterate the boxes to convert to outlines. int nboxes = boxaGetCount(*boxes); for (int i = 0; i < nboxes; ++i) { l_int32 x, y, width, height; boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height); // Make a C_OUTLINE from the leptonica box. This is a bit of a hack, // as there is no outline, just a bounding box, but with some very // small changes to coutln.cpp, it works nicely. ICOORD top_left(x, image_height - y); ICOORD bot_right(x + width, image_height - (y + height)); CRACKEDGE startpt; startpt.pos = top_left; C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0); ol_it.add_after_then_move(outline); } // Use outlines_to_blobs to convert the outlines to blobs and find // overlapping and contained objects. The output list of blobs in the block // has all the bad ones filtered out and deleted. BLOCK block; ICOORD page_tl(0, 0); ICOORD page_br(image_width, image_height); outlines_to_blobs(&block, page_tl, page_br, &outlines); // Transfer the created blobs to the output list. C_BLOB_IT blob_it(blobs); blob_it.add_list_after(block.blob_list()); // The boxes aren't needed any more. boxaDestroy(boxes); } // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright // are the bounds of the image on which the input line_bblobs were found. // The input line_bblobs list is const really. // The output vertical_x and vertical_y are the total of all the vectors. // The output list of TabVector makes no reference to the input BLOBNBOXes. void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, BLOBNBOX_LIST* line_bblobs, int* vertical_x, int* vertical_y, TabVector_LIST* vectors) { BLOBNBOX_IT bbox_it(line_bblobs); int b_count = 0; // Put all the blobs into the grid to find the lines, and move the blobs // to the output lists. AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { BLOBNBOX* bblob = bbox_it.data(); bblob->set_left_tab_type(TT_UNCONFIRMED); bblob->set_left_rule(bleft.x()); bblob->set_right_rule(tright.x()); bblob->set_left_crossing_rule(bleft.x()); bblob->set_right_crossing_rule(tright.x()); blob_grid.InsertBBox(false, true, bblob); ++b_count; } if (textord_debug_tabfind) tprintf("Inserted %d line blobs into grid\n", b_count); if (b_count == 0) return; // Search the entire grid, looking for vertical line vectors. GridSearch lsearch(&blob_grid); BLOBNBOX* bbox; TabVector_IT vector_it(vectors); *vertical_x = 0; *vertical_y = 1; lsearch.StartFullSearch(); while ((bbox = lsearch.NextFullSearch()) != NULL) { if (bbox->left_tab_type() == TT_UNCONFIRMED) { const TBOX& box = bbox->bounding_box(); if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), box.bottom()); AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y); if (vector != NULL) { vector->Freeze(); vector_it.add_to_end(vector); } } } ScrollView* line_win = NULL; if (textord_tabfind_show_vlines) { line_win = blob_grid.MakeWindow(0, 50, "Vlines"); blob_grid.DisplayBoxes(line_win); line_win = blob_grid.DisplayTabs("Vlines", line_win); } } // Get a set of bounding boxes of possible vertical lines in the image. // The input resolution overrides any resolution set in src_pix. // The output line_pix contains just all the detected lines. Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { // Remove any parts of 1 inch/kThinLineFraction wide or more, by opening // away the thin lines and subtracting what's left. // This is very generous and will leave in even quite wide lines. Pix* pixt1 = pixOpenBrick(NULL, src_pix, resolution / kThinLineFraction, 1); pixSubtract(pixt1, src_pix, pixt1); // Spread sideways to allow for some skew. Pix* pixt2 = pixDilateBrick(NULL, pixt1, 3, 1); // Now keep only tall stuff of height at least 1 inch/kMinLineLengthFraction. pixOpenBrick(pixt1, pixt2, 1, resolution / kMinLineLengthFraction); pixDestroy(&pixt2); // Put a single pixel crack in every line at an arbitrary spacing, // so they break up and the bounding boxes can be used to get the // direction accurately enough without needing outlines. int wpl = pixGetWpl(pixt1); int height = pixGetHeight(pixt1); l_uint32* data = pixGetData(pixt1); for (int y = kCrackSpacing; y < height; y += kCrackSpacing) { memset(data + wpl * y, 0, wpl * sizeof(*data)); } if (textord_tabfind_show_vlines) pixWrite("vlines.png", pixt1, IFF_PNG); Boxa* boxa = pixConnComp(pixt1, NULL, 8); *line_pix = pixt1; return boxa; } // Get a set of bounding boxes of possible horizontal lines in the image. // The input resolution overrides any resolution set in src_pix. // The output line_pix contains just all the detected lines. // The output boxes undergo the transformation (x,y)->(height-y,x) so the // lines can be found with a vertical line finder afterwards. // This transformation allows a simple x/y flip to reverse it in tesseract // coordinates and it is faster to flip the lines than rotate the image. Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { // Remove any parts of 1 inch/kThinLineFraction high or more, by opening // away the thin lines and subtracting what's left. // This is very generous and will leave in even quite wide lines. Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction); pixSubtract(pixt1, src_pix, pixt1); // Spread vertically to allow for some skew. Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3); // Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction. pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1); pixDestroy(&pixt2); // Put a single pixel crack in every line at an arbitrary spacing, // so they break up and the bounding boxes can be used to get the // direction accurately enough without needing outlines. int wpl = pixGetWpl(pixt1); int width = pixGetWidth(pixt1); int height = pixGetHeight(pixt1); l_uint32* data = pixGetData(pixt1); for (int y = 0; y < height; ++y, data += wpl) { for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { CLEAR_DATA_BIT(data, x); } } if (textord_tabfind_show_vlines) pixWrite("hlines.png", pixt1, IFF_PNG); Boxa* boxa = pixConnComp(pixt1, NULL, 8); *line_pix = pixt1; // Iterate the boxes to flip x and y. int nboxes = boxaGetCount(boxa); for (int i = 0; i < nboxes; ++i) { l_int32 x, y, box_width, box_height; boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height); Box* box = boxCreate(height - (y + box_height), width - (x + box_width), box_height, box_width); boxaReplaceBox(boxa, i, box); } return boxa; } } // namespace tesseract.