mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 10:17:50 +08:00
4523ce9f7d
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20
233 lines
7.7 KiB
C++
233 lines
7.7 KiB
C++
/* -*-C-*-
|
|
********************************************************************************
|
|
*
|
|
* File: pieces.c (Formerly pieces.c)
|
|
* Description:
|
|
* Author: Mark Seaman, OCR Technology
|
|
* Created: Fri Oct 16 14:37:00 1987
|
|
* Modified: Mon May 20 12:12:35 1991 (Mark Seaman) marks@hpgrlt
|
|
* Language: C
|
|
* Package: N/A
|
|
* Status: Reusable Software Component
|
|
*
|
|
* (c) Copyright 1987, Hewlett-Packard Company.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
*********************************************************************************/
|
|
/*----------------------------------------------------------------------
|
|
I n c l u d e s
|
|
----------------------------------------------------------------------*/
|
|
#include "pieces.h"
|
|
|
|
#include "blobs.h"
|
|
#include "freelist.h"
|
|
#include "matchtab.h"
|
|
#include "ndminx.h"
|
|
#include "plotseg.h"
|
|
#include "ratngs.h"
|
|
#include "wordclass.h"
|
|
#include "wordrec.h"
|
|
|
|
// Include automatically generated configuration file if running autoconf.
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config_auto.h"
|
|
#endif
|
|
|
|
/*----------------------------------------------------------------------
|
|
M a c r o s
|
|
----------------------------------------------------------------------*/
|
|
/**********************************************************************
|
|
* set_bounds_entry
|
|
*
|
|
* Set the value of the entry in an array of bounds elements.
|
|
**********************************************************************/
|
|
|
|
#define set_bounds_entry(array,index,top_left,bot_right) \
|
|
((array)[index].topleft = (top_left), \
|
|
(array)[index].botright = (bot_right)) \
|
|
|
|
|
|
/**********************************************************************
|
|
* get_bounds_entry
|
|
*
|
|
* Get the value of the entry in an array of bounds elements.
|
|
**********************************************************************/
|
|
|
|
#define get_bounds_entry(array,index,top_left,bot_right) \
|
|
((top_left) = (array)[index].topleft, \
|
|
(bot_right) = (array)[index].botright) \
|
|
|
|
|
|
/*----------------------------------------------------------------------
|
|
F u n c t i o n s
|
|
----------------------------------------------------------------------*/
|
|
|
|
|
|
/**********************************************************************
|
|
* bounds_of_piece
|
|
*
|
|
* Find the bounds of the piece that will be created by joining the
|
|
* requested collection of pieces together.
|
|
**********************************************************************/
|
|
void bounds_of_piece(BOUNDS_LIST bounds,
|
|
inT16 start,
|
|
inT16 end,
|
|
TPOINT *extreme_tl,
|
|
TPOINT *extreme_br) {
|
|
TPOINT topleft;
|
|
TPOINT botright;
|
|
inT16 x;
|
|
|
|
get_bounds_entry(bounds, start, *extreme_tl, *extreme_br);
|
|
|
|
for (x = start + 1; x <= end; x++) {
|
|
get_bounds_entry(bounds, x, topleft, botright);
|
|
|
|
extreme_tl->x = MIN (topleft.x, extreme_tl->x);
|
|
extreme_tl->y = MAX (topleft.y, extreme_tl->y);
|
|
extreme_br->x = MAX (botright.x, extreme_br->x);
|
|
extreme_br->y = MIN (botright.y, extreme_br->y);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* classify_piece
|
|
*
|
|
* Create a larger piece from a collection of smaller ones. Classify
|
|
* it and return the results. Take the large piece apart to leave
|
|
* the collection of small pieces un modified.
|
|
**********************************************************************/
|
|
namespace tesseract {
|
|
BLOB_CHOICE_LIST *Wordrec::classify_piece(TBLOB *pieces,
|
|
SEAMS seams,
|
|
inT16 start,
|
|
inT16 end) {
|
|
BLOB_CHOICE_LIST *choices;
|
|
TBLOB *blob;
|
|
inT16 x;
|
|
|
|
join_pieces(pieces, seams, start, end);
|
|
for (blob = pieces, x = 0; x < start; x++) {
|
|
blob = blob->next;
|
|
}
|
|
choices = classify_blob(blob, "pieces:", White);
|
|
|
|
break_pieces(blob, seams, start, end);
|
|
#ifndef GRAPHICS_DISABLED
|
|
if (wordrec_display_segmentations > 2) {
|
|
STATE current_state;
|
|
SEARCH_STATE chunk_groups;
|
|
set_n_ones (¤t_state, array_count(seams));
|
|
chunk_groups = bin_to_chunks(¤t_state, array_count(seams));
|
|
display_segmentation(pieces, chunk_groups);
|
|
window_wait(segm_window);
|
|
memfree(chunk_groups);
|
|
}
|
|
#endif
|
|
|
|
return (choices);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* get_piece_rating
|
|
*
|
|
* Check to see if this piece has already been classified. If it has
|
|
* return that rating. Otherwise build the piece from the smaller
|
|
* pieces, classify it, store the rating for later, and take the piece
|
|
* apart again.
|
|
**********************************************************************/
|
|
BLOB_CHOICE_LIST *Wordrec::get_piece_rating(MATRIX *ratings,
|
|
TBLOB *blobs,
|
|
SEAMS seams,
|
|
inT16 start,
|
|
inT16 end) {
|
|
BLOB_CHOICE_LIST *choices = ratings->get(start, end);
|
|
if (choices == NOT_CLASSIFIED) {
|
|
choices = classify_piece(blobs,
|
|
seams,
|
|
start,
|
|
end);
|
|
ratings->put(start, end, choices);
|
|
if (wordrec_debug_level > 0) {
|
|
tprintf("get_piece_rating(): updated ratings matrix\n");
|
|
ratings->print(getDict().getUnicharset());
|
|
}
|
|
}
|
|
return (choices);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* record_blob_bounds
|
|
*
|
|
* Set up and initialize an array that holds the bounds of a set of
|
|
* blobs.
|
|
**********************************************************************/
|
|
BOUNDS_LIST Wordrec::record_blob_bounds(TBLOB *blobs) {
|
|
BOUNDS_LIST bounds;
|
|
TPOINT topleft;
|
|
TPOINT botright;
|
|
inT16 x = 0;
|
|
|
|
bounds = (BOUNDS_LIST) memalloc (count_blobs (blobs) * sizeof (BOUNDS));
|
|
|
|
for (TBLOB* blob = blobs; blob != NULL; blob = blob->next) {
|
|
blob_bounding_box(blob, &topleft, &botright);
|
|
set_bounds_entry(bounds, x, topleft, botright);
|
|
x++;
|
|
}
|
|
return (bounds);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* record_piece_ratings
|
|
*
|
|
* Save the choices for all the pieces that have been classified into
|
|
* a matrix that can be used to look them up later. A two dimensional
|
|
* matrix is created. The indices correspond to the starting and
|
|
* ending initial piece number.
|
|
**********************************************************************/
|
|
MATRIX *Wordrec::record_piece_ratings(TBLOB *blobs) {
|
|
BOUNDS_LIST bounds;
|
|
inT16 num_blobs;
|
|
inT16 x;
|
|
inT16 y;
|
|
TPOINT tp_topleft;
|
|
TPOINT tp_botright;
|
|
unsigned int topleft;
|
|
unsigned int botright;
|
|
MATRIX *ratings;
|
|
BLOB_CHOICE_LIST *choices;
|
|
|
|
bounds = record_blob_bounds (blobs);
|
|
num_blobs = count_blobs (blobs);
|
|
ratings = new MATRIX(num_blobs);
|
|
|
|
for (x = 0; x < num_blobs; x++) {
|
|
for (y = x; y < num_blobs; y++) {
|
|
bounds_of_piece(bounds, x, y, &tp_topleft, &tp_botright);
|
|
topleft = *(unsigned int *) &tp_topleft;
|
|
botright = *(unsigned int *) &tp_botright;
|
|
choices = blob_match_table.get_match_by_bounds (topleft, botright);
|
|
if (choices != NULL) {
|
|
ratings->put(x, y, choices);
|
|
}
|
|
}
|
|
}
|
|
memfree(bounds);
|
|
return (ratings);
|
|
}
|
|
|
|
} // namespace tesseract
|