/********************************************************************** * File: pagesegmain.cpp * Description: Top-level page segmenter for Tesseract. * Author: Ray Smith * Created: Thu Sep 25 17:12:01 PDT 2008 * * (C) Copyright 2008, Google Inc. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/ #ifdef WIN32 #include #else #include #endif #ifdef _MSC_VER #pragma warning(disable:4244) // Conversion warnings #endif // Include automatically generated configuration file if running autoconf. #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif #ifdef HAVE_LIBLEPT // Include leptonica library only if autoconf (or makefile etc) tell us to. #include "allheaders.h" #endif #include "tesseractclass.h" #include "img.h" #include "blobbox.h" #include "linefind.h" #include "imagefind.h" #include "colfind.h" #include "tabvector.h" #include "blread.h" #include "wordseg.h" #include "makerow.h" #include "baseapi.h" #include "tordmain.h" #include "tessvars.h" namespace tesseract { // Minimum believable resolution. const int kMinCredibleResolution = 70; // Default resolution used if input in not believable. const int kDefaultResolution = 300; // Segment the page according to the current value of tessedit_pageseg_mode. // If the pix_binary_ member is not NULL, it is used as the source image, // and copied to image, otherwise it just uses image as the input. // On return the blocks list owns all the constructed page layout. int Tesseract::SegmentPage(const STRING* input_file, IMAGE* image, BLOCK_LIST* blocks) { int width = image->get_xsize(); int height = image->get_ysize(); int resolution = image->get_res(); #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) { width = pixGetWidth(pix_binary_); height = pixGetHeight(pix_binary_); resolution = pixGetXRes(pix_binary_); } #endif // Zero resolution messes up the algorithms, so make sure it is credible. if (resolution < kMinCredibleResolution) resolution = kDefaultResolution; // Get page segmentation mode. PageSegMode pageseg_mode = static_cast( static_cast(tessedit_pageseg_mode)); // If a UNLV zone file can be found, use that instead of segmentation. if (pageseg_mode != tesseract::PSM_AUTO && input_file != NULL && input_file->length() > 0) { STRING name = *input_file; const char* lastdot = strrchr(name.string(), '.'); if (lastdot != NULL) name[lastdot - name.string()] = '\0'; read_unlv_file(name, width, height, blocks); } bool single_column = pageseg_mode > PSM_AUTO; if (blocks->empty()) { // No UNLV file present. Work according to the PageSegMode. // First make a single block covering the whole image. BLOCK_IT block_it(blocks); BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height); block_it.add_to_end(block); } else { // UNLV file present. Use PSM_SINGLE_COLUMN. pageseg_mode = PSM_SINGLE_COLUMN; } TO_BLOCK_LIST land_blocks, port_blocks; TBOX page_box; if (pageseg_mode <= PSM_SINGLE_COLUMN) { if (AutoPageSeg(width, height, resolution, single_column, image, blocks, &port_blocks) < 0) { return -1; } // To create blobs from the image region bounds uncomment this line: // port_blocks.clear(); // Uncomment to go back to the old mode. } else { #if HAVE_LIBLEPT image->FromPix(pix_binary_); #endif deskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f); } if (blocks->empty()) { tprintf("Empty page\n"); return 0; // AutoPageSeg found an empty page. } if (port_blocks.empty()) { // AutoPageSeg was not used, so we need to find_components first. find_components(blocks, &land_blocks, &port_blocks, &page_box); } else { // AutoPageSeg does not need to find_components as it did that already. page_box.set_left(0); page_box.set_bottom(0); page_box.set_right(width); page_box.set_top(height); // Filter_blobs sets up the TO_BLOCKs the same as find_components does. filter_blobs(page_box.topright(), &port_blocks, true); } TO_BLOCK_IT to_block_it(&port_blocks); ASSERT_HOST(!port_blocks.empty()); TO_BLOCK* to_block = to_block_it.data(); if (pageseg_mode <= PSM_SINGLE_BLOCK || to_block->line_size < 2) { // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old // textord. The difference is the number of blocks and how the are made. textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks, this); } else { // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. float gradient = make_single_row(page_box.topright(), to_block, &port_blocks, this); if (pageseg_mode == PSM_SINGLE_LINE) { // SINGLE_LINE uses the old word maker on the single line. make_words(page_box.topright(), gradient, blocks, &land_blocks, &port_blocks, this); } else { // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a // single word, and in SINGLE_CHAR mode, all the outlines // go in a single blob. make_single_word(pageseg_mode == PSM_SINGLE_CHAR, to_block->get_rows(), to_block->block->row_list()); } } return 0; } // Auto page segmentation. Divide the page image into blocks of uniform // text linespacing and images. // Width, height and resolution are derived from the input image. // If the pix is non-NULL, then it is assumed to be the input, and it is // copied to the image, otherwise the image is used directly. // The output goes in the blocks list with corresponding TO_BLOCKs in the // to_blocks list. // If single_column is true, then no attempt is made to divide the image // into columns, but multiple blocks are still made if the text is of // non-uniform linespacing. int Tesseract::AutoPageSeg(int width, int height, int resolution, bool single_column, IMAGE* image, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { int vertical_x = 0; int vertical_y = 1; TabVector_LIST v_lines; TabVector_LIST h_lines; ICOORD bleft(0, 0); Boxa* boxa = NULL; Pixa* pixa = NULL; // The blocks made by the ColumnFinder. Moved to blocks before return. BLOCK_LIST found_blocks; #ifdef HAVE_LIBLEPT if (pix_binary_ != NULL) { if (textord_debug_images) { Pix* grey_pix = pixCreate(width, height, 8); // Printable images are light grey on white, but for screen display // they are black on dark grey so the other colors show up well. if (textord_debug_printable) { pixSetAll(grey_pix); pixSetMasked(grey_pix, pix_binary_, 192); } else { pixSetAllArbitrary(grey_pix, 64); pixSetMasked(grey_pix, pix_binary_, 0); } AlignedBlob::IncrementDebugPix(); pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG); pixDestroy(&grey_pix); } if (tessedit_dump_pageseg_images) pixWrite("tessinput.png", pix_binary_, IFF_PNG); // Leptonica is used to find the lines and image regions in the input. LineFinder::FindVerticalLines(resolution, pix_binary_, &vertical_x, &vertical_y, &v_lines); LineFinder::FindHorizontalLines(resolution, pix_binary_, &h_lines); if (tessedit_dump_pageseg_images) pixWrite("tessnolines.png", pix_binary_, IFF_PNG); ImageFinder::FindImages(pix_binary_, &boxa, &pixa); if (tessedit_dump_pageseg_images) pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); // Copy the Pix to the IMAGE. image->FromPix(pix_binary_); if (single_column) v_lines.clear(); } #endif TO_BLOCK_LIST land_blocks, port_blocks; TBOX page_box; // The rest of the algorithm uses the usual connected components. find_components(blocks, &land_blocks, &port_blocks, &page_box); TO_BLOCK_IT to_block_it(&port_blocks); ASSERT_HOST(!to_block_it.empty()); for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list(); to_block_it.forward()) { TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); if (to_block->line_size >= 2) { // Note: if there are multiple blocks, then v_lines, boxa, and pixa // are empty on the next iteration, but in this case, we assume // that there aren't any interesting line separators or images, since // it means that we have a pre-defined unlv zone file. ColumnFinder finder(static_cast(to_block->line_size), blkbox.botleft(), blkbox.topright(), &v_lines, &h_lines, vertical_x, vertical_y); if (finder.FindBlocks(height, resolution, single_column, to_block, boxa, pixa, &found_blocks, to_blocks) < 0) return -1; finder.ComputeDeskewVectors(&deskew_, &reskew_); boxa = NULL; pixa = NULL; } } #ifdef HAVE_LIBLEPT boxaDestroy(&boxa); pixaDestroy(&pixa); #endif blocks->clear(); BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. block_it.add_list_after(&found_blocks); if (textord_debug_images) { // The debug image is no longer needed so delete it. unlink(AlignedBlob::textord_debug_pix().string()); } return 0; } } // namespace tesseract.