mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-07 18:27:48 +08:00
4523ce9f7d
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@526 d0cd1f9f-072b-0410-8dd7-cf729c803f20
239 lines
7.7 KiB
C++
239 lines
7.7 KiB
C++
/* -*-C-*-
|
|
********************************************************************************
|
|
*
|
|
* File: matchtab.c (Formerly matchtab.c)
|
|
* Description: Match table to retain blobs that were matched.
|
|
* Author: Mark Seaman, OCR Technology
|
|
* Created: Mon Jan 29 09:00:56 1990
|
|
* Modified: Tue Mar 19 15:09:06 1991 (Mark Seaman) marks@hpgrlt
|
|
* Language: C
|
|
* Package: N/A
|
|
* Status: Experimental (Do Not Distribute)
|
|
*
|
|
* (c) Copyright 1990, Hewlett-Packard Company.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
*********************************************************************************/
|
|
#include "matchtab.h"
|
|
|
|
#include "blobs.h"
|
|
#include "callcpp.h"
|
|
#include "elst.h"
|
|
#include "freelist.h"
|
|
#include "ratngs.h"
|
|
|
|
#define NUM_MATCH_ENTRIES 500 /* Entries in match_table */
|
|
|
|
/**********************************************************************
|
|
* blank_entry
|
|
*
|
|
* Test an element in the blob match table to see if it is blank.
|
|
* Return a non-zero value if it is blank.
|
|
**********************************************************************/
|
|
#define blank_entry(match_table,x) \
|
|
(! (match_table[x].topleft | match_table[x].botright))
|
|
|
|
|
|
namespace tesseract {
|
|
|
|
BlobMatchTable::BlobMatchTable()
|
|
: been_initialized_(false), match_table_(NULL) {
|
|
init_match_table();
|
|
}
|
|
|
|
BlobMatchTable::~BlobMatchTable() {
|
|
end_match_table();
|
|
}
|
|
|
|
/**********************************************************************
|
|
* init_match_table
|
|
*
|
|
* Create and clear a match table to be used to speed up the splitter.
|
|
**********************************************************************/
|
|
void BlobMatchTable::init_match_table() {
|
|
int x;
|
|
|
|
if (been_initialized_) {
|
|
/* Reclaim old choices */
|
|
for (x = 0; x < NUM_MATCH_ENTRIES; x++) {
|
|
if ((!blank_entry(match_table_, x)) && match_table_[x].rating)
|
|
match_table_[x].rating->clear();
|
|
delete match_table_[x].rating;
|
|
}
|
|
} else {
|
|
/* Allocate memory once */
|
|
been_initialized_ = true;
|
|
match_table_ = new MATCH[NUM_MATCH_ENTRIES];
|
|
}
|
|
/* Initialize the table */
|
|
for (x = 0; x < NUM_MATCH_ENTRIES; x++) {
|
|
match_table_[x].topleft = 0;
|
|
match_table_[x].botright = 0;
|
|
match_table_[x].rating = NULL;
|
|
}
|
|
}
|
|
|
|
void BlobMatchTable::end_match_table() {
|
|
if (been_initialized_) {
|
|
init_match_table();
|
|
delete[] match_table_;
|
|
match_table_ = NULL;
|
|
been_initialized_ = false;
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* put_match
|
|
*
|
|
* Put a new blob and its corresponding match ratings into the match
|
|
* table.
|
|
**********************************************************************/
|
|
void BlobMatchTable::put_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) {
|
|
unsigned int topleft;
|
|
unsigned int botright;
|
|
unsigned int start;
|
|
TPOINT tp_topleft;
|
|
TPOINT tp_botright;
|
|
int x;
|
|
/* Hash into table */
|
|
blob_bounding_box(blob, &tp_topleft, &tp_botright);
|
|
topleft = *(unsigned int *) &tp_topleft;
|
|
botright = *(unsigned int *) &tp_botright;
|
|
start = (topleft * botright) % NUM_MATCH_ENTRIES;
|
|
|
|
/* Look for empty */
|
|
x = start;
|
|
do {
|
|
if (blank_entry (match_table_, x)) {
|
|
/* Add this entry */
|
|
match_table_[x].topleft = topleft;
|
|
match_table_[x].botright = botright;
|
|
// Copy ratings to match_table_[x].rating
|
|
match_table_[x].rating = new BLOB_CHOICE_LIST();
|
|
match_table_[x].rating->deep_copy(ratings, &BLOB_CHOICE::deep_copy);
|
|
return;
|
|
}
|
|
if (++x >= NUM_MATCH_ENTRIES)
|
|
x = 0;
|
|
} while (x != start);
|
|
|
|
cprintf ("error: Match table is full\n");
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* get_match
|
|
*
|
|
* Look up this blob in the match table to see if it needs to be
|
|
* matched. If it is not present then NULL is returned.
|
|
**********************************************************************/
|
|
BLOB_CHOICE_LIST *BlobMatchTable::get_match(TBLOB *blob) {
|
|
unsigned int topleft;
|
|
unsigned int botright;
|
|
TPOINT tp_topleft;
|
|
TPOINT tp_botright;
|
|
/* Do starting hash */
|
|
blob_bounding_box(blob, &tp_topleft, &tp_botright);
|
|
topleft = *(unsigned int *) &tp_topleft;
|
|
botright = *(unsigned int *) &tp_botright;
|
|
return (get_match_by_bounds (topleft, botright));
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* get_match_by_bounds
|
|
*
|
|
* Look up this blob in the match table to see if it needs to be
|
|
* matched. If it is not present then NULL is returned.
|
|
**********************************************************************/
|
|
BLOB_CHOICE_LIST *BlobMatchTable::get_match_by_bounds(unsigned int topleft,
|
|
unsigned int botright) {
|
|
unsigned int start;
|
|
int x;
|
|
/* Do starting hash */
|
|
start = (topleft * botright) % NUM_MATCH_ENTRIES;
|
|
/* Search for match */
|
|
x = start;
|
|
do {
|
|
/* Not found when blank */
|
|
if (blank_entry (match_table_, x))
|
|
break;
|
|
/* Is this the match ? */
|
|
if (match_table_[x].topleft == topleft &&
|
|
match_table_[x].botright == botright) {
|
|
BLOB_CHOICE_LIST *blist = new BLOB_CHOICE_LIST();
|
|
blist->deep_copy(match_table_[x].rating, &BLOB_CHOICE::deep_copy);
|
|
return blist;
|
|
}
|
|
if (++x >= NUM_MATCH_ENTRIES)
|
|
x = 0;
|
|
} while (x != start);
|
|
return NULL;
|
|
}
|
|
|
|
/**********************************************************************
|
|
* add_to_match
|
|
*
|
|
* Update ratings list in the match_table corresponding to the given
|
|
* blob. The function assumes that:
|
|
* -- the match table contains the initial non-NULL list with choices
|
|
* for the given blob
|
|
* -- the new ratings list is a superset of the corresponding list in
|
|
* the match_table and the unichar ids of the blob choices in the
|
|
* list are unique.
|
|
* The entries that appear in the new ratings list and not in the
|
|
* old one are added to the old ratings list in the match_table.
|
|
**********************************************************************/
|
|
void BlobMatchTable::add_to_match(TBLOB *blob, BLOB_CHOICE_LIST *ratings) {
|
|
unsigned int topleft;
|
|
unsigned int botright;
|
|
TPOINT tp_topleft;
|
|
TPOINT tp_botright;
|
|
blob_bounding_box(blob, &tp_topleft, &tp_botright);
|
|
topleft = *(unsigned int *) &tp_topleft;
|
|
botright = *(unsigned int *) &tp_botright;
|
|
unsigned int start;
|
|
int x;
|
|
/* Do starting hash */
|
|
start = (topleft * botright) % NUM_MATCH_ENTRIES;
|
|
/* Search for match */
|
|
x = start;
|
|
do {
|
|
if (blank_entry(match_table_, x)) {
|
|
fprintf(stderr, "Can not update uninitialized entry in match_table\n");
|
|
ASSERT_HOST(!blank_entry(match_table_, x));
|
|
}
|
|
if (match_table_[x].topleft == topleft &&
|
|
match_table_[x].botright == botright) {
|
|
// Copy new ratings to match_table_[x].rating.
|
|
BLOB_CHOICE_IT it;
|
|
it.set_to_list(match_table_[x].rating);
|
|
BLOB_CHOICE_IT new_it;
|
|
new_it.set_to_list(ratings);
|
|
assert(it.length() <= new_it.length());
|
|
for (it.mark_cycle_pt(), new_it.mark_cycle_pt();
|
|
!it.cycled_list() && !new_it.cycled_list(); new_it.forward()) {
|
|
if (it.data()->unichar_id() == new_it.data()->unichar_id()) {
|
|
it.forward();
|
|
} else {
|
|
it.add_before_stay_put(new BLOB_CHOICE(*(new_it.data())));
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
if (++x >= NUM_MATCH_ENTRIES)
|
|
x = 0;
|
|
} while (x != start);
|
|
}
|
|
|
|
} // namespace tesseract
|