mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 05:39:35 +08:00
e3b507b15e
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@161 d0cd1f9f-072b-0410-8dd7-cf729c803f20
171 lines
5.5 KiB
C++
171 lines
5.5 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
// File: pageseg.cpp
|
|
// Description: Page Segmenter
|
|
// Author: Thomas Kielbus
|
|
// Created: Wed Jul 18 10:05:01 PDT 2007
|
|
//
|
|
// (C) Copyright 2007, Google Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
// Include automatically generated configuration file if running autoconf.
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config_auto.h"
|
|
#endif
|
|
|
|
#include "pageseg.h"
|
|
|
|
#include "edgloop.h"
|
|
#include "imgs.h"
|
|
#ifdef HAVE_LIBLEPT
|
|
#include "leptonica_pageseg_interface.h"
|
|
#endif
|
|
#include "polyaprx.h"
|
|
#include "tessvars.h"
|
|
#include "varable.h"
|
|
|
|
BOOL_VAR(pageseg_leptonica_activated, FALSE,
|
|
"Activate Leptonica-based segmentation");
|
|
|
|
// Create a new full-page BLOCK and append it to the given list
|
|
static void append_full_page_block(BLOCK_LIST *blocks);
|
|
|
|
// Create a new BLOCK with the given coordinates and append it to the given list
|
|
static void append_block(BLOCK_LIST *blocks,
|
|
int xmin, int ymin,
|
|
int xmax, int ymax,
|
|
POLY_BLOCK* outline);
|
|
|
|
#ifndef HAVE_LIBLEPT
|
|
// Leptonica-based page segmentation is not enabled
|
|
|
|
|
|
void segment_page(BLOCK_LIST *blocks) {
|
|
append_full_page_block(blocks);
|
|
}
|
|
|
|
|
|
#else // HAVE_LIBLEPT Leptonica-based page segmentation is enabled
|
|
|
|
|
|
// Create a list of BLOCKs from the given mask image and append them to the
|
|
// given BLOCK_LIST
|
|
static void mask_to_blocks(IMAGE* textblock_mask_image,
|
|
BLOCK_LIST *blocks);
|
|
|
|
// Extract the OUTLINEs from the image mask and append them to the given
|
|
// OUTLINE_LIST
|
|
static void mask_to_outlines(IMAGE* image, OUTLINE_LIST* outlines);
|
|
|
|
// Convert the OUTLINEs into BLOCKs and append them to the given BLOCK_LIST
|
|
static void outlines_to_blocks(OUTLINE_LIST* outlines, BLOCK_LIST* blocks);
|
|
|
|
void segment_page(BLOCK_LIST *blocks) {
|
|
IMAGE textblock_mask;
|
|
if (pageseg_leptonica_activated &&
|
|
leptonica_pageseg_get_textblock_mask(&page_image, &textblock_mask))
|
|
mask_to_blocks(&textblock_mask, blocks);
|
|
else
|
|
append_full_page_block(blocks);
|
|
}
|
|
|
|
void mask_to_outlines(IMAGE* image,
|
|
OUTLINE_LIST* outlines) {
|
|
// Get outlines of the textblock_mask_image
|
|
ICOORD page_tr(image->get_xsize(), image->get_ysize());
|
|
PDBLK full_page_block(0, 0, image->get_xsize(), image->get_ysize());
|
|
C_OUTLINE_LIST c_outlines;
|
|
C_OUTLINE_IT c_outline_it(&c_outlines);
|
|
|
|
get_outlines(NULL, image, image,
|
|
page_tr, &full_page_block, &c_outline_it);
|
|
|
|
// Convert each C_OUTLINE to an OUTLINE
|
|
OUTLINE_IT outline_it(outlines);
|
|
outline_it.move_to_last();
|
|
for (c_outline_it.mark_cycle_pt();
|
|
!c_outline_it.cycled_list();
|
|
c_outline_it.forward()) {
|
|
OUTLINE* outline;
|
|
outline = tesspoly_outline(c_outline_it.data(), 0);
|
|
outline_it.add_after_then_move(outline);
|
|
}
|
|
c_outlines.clear();
|
|
}
|
|
|
|
void outlines_to_blocks(OUTLINE_LIST* outlines, BLOCK_LIST* blocks) {
|
|
OUTLINE_IT outline_it(outlines);
|
|
|
|
// Convert each OUTLINE to a POLY_BLOCK and add it to the block list
|
|
for (outline_it.mark_cycle_pt();
|
|
!outline_it.cycled_list();
|
|
outline_it.forward()) {
|
|
// Iterate over outline POLYPT_LIST and create the corresponding
|
|
// ICOORDELT_LIST
|
|
ICOORDELT_LIST icoordelts;
|
|
ICOORDELT_IT icoordelt_it(&icoordelts);
|
|
POLYPT_LIST* polypts = outline_it.data()->polypts();
|
|
POLYPT_IT polypt_it(polypts);
|
|
|
|
for (polypt_it.mark_cycle_pt();
|
|
!polypt_it.cycled_list();
|
|
polypt_it.forward()) {
|
|
ICOORD current_icoord = ICOORD((inT16) polypt_it.data()->pos.x(),
|
|
(inT16) polypt_it.data()->pos.y());
|
|
icoordelt_it.add_after_then_move(new ICOORDELT(current_icoord));
|
|
}
|
|
|
|
// Create the POLY_BLOCK from the ICOORDELT_LIST
|
|
POLY_BLOCK* poly_block_outline = new POLY_BLOCK(&icoordelts, POLY_X);
|
|
icoordelts.clear();
|
|
|
|
// Add this text block to the block list
|
|
int xmin = poly_block_outline->bounding_box()->left();
|
|
int ymin = poly_block_outline->bounding_box()->bottom();
|
|
int xmax = poly_block_outline->bounding_box()->right();
|
|
int ymax = poly_block_outline->bounding_box()->top();
|
|
append_block(blocks, xmin, ymin, xmax, ymax, poly_block_outline);
|
|
}
|
|
}
|
|
|
|
void mask_to_blocks(IMAGE* textblock_mask_image, BLOCK_LIST *blocks) {
|
|
OUTLINE_LIST outlines;
|
|
|
|
mask_to_outlines(textblock_mask_image, &outlines);
|
|
outlines_to_blocks(&outlines, blocks);
|
|
outlines.clear();
|
|
}
|
|
|
|
|
|
#endif // HAVE_LIBLEPT
|
|
|
|
|
|
void append_block(BLOCK_LIST *blocks,
|
|
int xmin, int ymin,
|
|
int xmax, int ymax,
|
|
POLY_BLOCK* outline) {
|
|
BLOCK *block;
|
|
BLOCK_IT block_it = blocks;
|
|
|
|
block = new BLOCK("", TRUE, 0, 0, xmin, ymin, xmax, ymax);
|
|
if (outline != NULL)
|
|
block->set_poly_block(outline);
|
|
block_it.add_to_end(block);
|
|
}
|
|
|
|
void append_full_page_block(BLOCK_LIST *blocks) {
|
|
append_block(blocks,
|
|
0, 0,
|
|
page_image.get_xsize(), page_image.get_ysize(),
|
|
NULL);
|
|
}
|