mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-05 00:27:49 +08:00
425d593ebe
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk/trunk@2 d0cd1f9f-072b-0410-8dd7-cf729c803f20
513 lines
16 KiB
C++
513 lines
16 KiB
C++
/* -*-C-*-
|
|
********************************************************************************
|
|
*
|
|
* File: bestfirst.c (Formerly bestfirst.c)
|
|
* Description: Best first search functions
|
|
* Author: Mark Seaman, OCR Technology
|
|
* Created: Mon May 14 11:23:29 1990
|
|
* Modified: Tue Jul 30 16:08:47 1991 (Mark Seaman) marks@hpgrlt
|
|
* Language: C
|
|
* Package: N/A
|
|
* Status: Experimental (Do Not Distribute)
|
|
*
|
|
* (c) Copyright 1990, Hewlett-Packard Company.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
***************************************************************************/
|
|
|
|
/*----------------------------------------------------------------------
|
|
I n c l u d e s
|
|
---------------------------------------------------------------------*/
|
|
#include "bestfirst.h"
|
|
#include "heuristic.h"
|
|
#include "plotseg.h"
|
|
#include "tordvars.h"
|
|
#include "debug.h"
|
|
#include "pieces.h"
|
|
#include "stopper.h"
|
|
#include "metrics.h"
|
|
#include "states.h"
|
|
#include "bitvec.h"
|
|
#include "freelist.h"
|
|
#include "permute.h"
|
|
#include "structures.h"
|
|
#include "wordclass.h"
|
|
|
|
void call_caller();
|
|
|
|
/*----------------------------------------------------------------------
|
|
V a r i a b l e s
|
|
---------------------------------------------------------------------*/
|
|
int num_joints; /* Number of chunks - 1 */
|
|
int num_pushed = 0;
|
|
int num_popped = 0;
|
|
|
|
make_int_var (num_seg_states, 30, make_seg_states,
|
|
9, 1, set_seg_states, "Segmentation states");
|
|
|
|
make_float_var (worst_state, 1, make_worst_state,
|
|
9, 9, set_worst_state, "Worst segmentation state");
|
|
/**/
|
|
/*----------------------------------------------------------------------
|
|
F u n c t i o n s
|
|
----------------------------------------------------------------------*/
|
|
/**********************************************************************
|
|
* init_bestfirst_vars
|
|
*
|
|
* Create and initialize references to debug variables that control
|
|
* operations in this file.
|
|
**********************************************************************/
|
|
void init_bestfirst_vars() {
|
|
make_seg_states();
|
|
make_worst_state();
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* best_first_search
|
|
*
|
|
* Find the best segmentation by doing a best first search of the
|
|
* solution space.
|
|
**********************************************************************/
|
|
void best_first_search(CHUNKS_RECORD *chunks_record,
|
|
A_CHOICE *best_choice,
|
|
A_CHOICE *raw_choice,
|
|
STATE *state,
|
|
DANGERR *fixpt,
|
|
STATE *best_state,
|
|
INT32 pass) {
|
|
SEARCH_RECORD *the_search;
|
|
INT16 keep_going;
|
|
STATE guided_state;
|
|
|
|
num_joints = matrix_dimension (chunks_record->ratings) - 1;
|
|
the_search = new_search (chunks_record, num_joints,
|
|
best_choice, raw_choice, state);
|
|
|
|
#ifndef GRAPHICS_DISABLED
|
|
save_best_state(chunks_record);
|
|
#endif
|
|
start_recording();
|
|
|
|
guided_state = *state;
|
|
do {
|
|
/* Look for answer */
|
|
if (!hash_lookup (the_search->closed_states, the_search->this_state)) {
|
|
|
|
if (blob_skip) {
|
|
free_state (the_search->this_state);
|
|
break;
|
|
}
|
|
|
|
guided_state = *(the_search->this_state);
|
|
keep_going =
|
|
evaluate_state(chunks_record, the_search, fixpt, best_state, pass);
|
|
|
|
hash_add (the_search->closed_states, the_search->this_state);
|
|
|
|
if (!keep_going ||
|
|
(the_search->num_states > num_seg_states) || (blob_skip)) {
|
|
free_state (the_search->this_state);
|
|
break;
|
|
}
|
|
|
|
expand_node(chunks_record, the_search);
|
|
}
|
|
|
|
free_state (the_search->this_state);
|
|
num_popped++;
|
|
the_search->this_state = pop_queue (the_search->open_states);
|
|
}
|
|
while (the_search->this_state);
|
|
|
|
state->part1 = the_search->best_state->part1;
|
|
state->part2 = the_search->best_state->part2;
|
|
stop_recording();
|
|
delete_search(the_search);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* chunks_width
|
|
*
|
|
* Return the width of several of the chunks (if they were joined to-
|
|
* gether.
|
|
**********************************************************************/
|
|
int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk) {
|
|
int result = 0;
|
|
int x;
|
|
|
|
for (x = start_chunk * 2; x <= last_chunk * 2; x++)
|
|
result += width_record->widths[x];
|
|
|
|
return (result);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* delete_search
|
|
*
|
|
* Terminate the current search and free all the memory involved.
|
|
**********************************************************************/
|
|
void delete_search(SEARCH_RECORD *the_search) {
|
|
float closeness;
|
|
|
|
closeness = (the_search->num_joints ?
|
|
(hamming_distance ((unsigned long *) the_search->first_state,
|
|
(unsigned long *) the_search->best_state,
|
|
2) / (float) the_search->num_joints) : 0.0);
|
|
|
|
record_search_status (the_search->num_states,
|
|
the_search->before_best, closeness);
|
|
|
|
free_state (the_search->first_state);
|
|
free_state (the_search->best_state);
|
|
|
|
free_hash_table (the_search->closed_states);
|
|
FreeHeapData (the_search->open_states, (void_dest) free_state);
|
|
|
|
memfree(the_search);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* evaluate_chunks
|
|
*
|
|
* A particular word level segmentation has been chosen. Evaluation
|
|
* this to find the word list that corresponds to it.
|
|
**********************************************************************/
|
|
CHOICES_LIST evaluate_chunks(CHUNKS_RECORD *chunks_record,
|
|
SEARCH_STATE search_state,
|
|
STATE *this_state,
|
|
STATE *best_state,
|
|
INT32 pass) {
|
|
CHOICES_LIST char_choices;
|
|
CHOICES this_choice;
|
|
int i;
|
|
int x = 0;
|
|
int y;
|
|
|
|
char_choices = new_choice_list ();
|
|
/* Iterate sub-paths */
|
|
for (i = 1; i <= search_state[0] + 1; i++) {
|
|
if (i > search_state[0])
|
|
y = count_blobs (chunks_record->chunks) - 1;
|
|
else
|
|
y = x + search_state[i];
|
|
|
|
if (blob_skip) {
|
|
array_free(char_choices);
|
|
return (NULL);
|
|
} /* Process one square */
|
|
/* Classify if needed */
|
|
this_choice = get_piece_rating (chunks_record->ratings,
|
|
chunks_record->chunks,
|
|
chunks_record->splits,
|
|
x, y,
|
|
chunks_record->fx,
|
|
this_state, best_state, pass, i - 1);
|
|
|
|
if (this_choice == NIL) {
|
|
array_free(char_choices);
|
|
return (NULL);
|
|
}
|
|
/* Add permuted ratings */
|
|
last_segmentation[i - 1].certainty = best_certainty (this_choice);
|
|
last_segmentation[i - 1].match = best_probability (this_choice);
|
|
|
|
last_segmentation[i - 1].width =
|
|
chunks_width (chunks_record->chunk_widths, x, y);
|
|
last_segmentation[i - 1].gap =
|
|
chunks_gap (chunks_record->chunk_widths, y);
|
|
|
|
char_choices = array_push (char_choices, this_choice);
|
|
x = y + 1;
|
|
}
|
|
return (char_choices);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* evaluate_state
|
|
*
|
|
* Evaluate the segmentation that is represented by this state in the
|
|
* best first search. Add this state to the "states_seen" list.
|
|
**********************************************************************/
|
|
INT16 evaluate_state(CHUNKS_RECORD *chunks_record,
|
|
SEARCH_RECORD *the_search,
|
|
DANGERR *fixpt,
|
|
STATE *best_state,
|
|
INT32 pass) {
|
|
CHOICES_LIST char_choices;
|
|
SEARCH_STATE chunk_groups;
|
|
float rating_limit = class_probability (the_search->best_choice);
|
|
INT16 keep_going = TRUE;
|
|
PIECES_STATE widths;
|
|
|
|
the_search->num_states++;
|
|
chunk_groups = bin_to_chunks (the_search->this_state,
|
|
the_search->num_joints);
|
|
bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
|
|
LogNewSegmentation(widths);
|
|
|
|
rating_limit = class_probability (the_search->best_choice);
|
|
|
|
char_choices =
|
|
evaluate_chunks (chunks_record, chunk_groups, the_search->this_state,
|
|
best_state, pass);
|
|
if (char_choices != NULL) {
|
|
permute_characters (char_choices,
|
|
rating_limit,
|
|
the_search->best_choice, the_search->raw_choice);
|
|
if (AcceptableChoice (char_choices, the_search->best_choice,
|
|
the_search->raw_choice, fixpt))
|
|
keep_going = FALSE;
|
|
array_free(char_choices);
|
|
}
|
|
|
|
#ifndef GRAPHICS_DISABLED
|
|
if (display_segmentations) {
|
|
display_segmentation (chunks_record->chunks, chunk_groups);
|
|
if (display_segmentations > 1)
|
|
window_wait(segm_window);
|
|
}
|
|
#endif
|
|
|
|
if (rating_limit != class_probability (the_search->best_choice)) {
|
|
the_search->before_best = the_search->num_states;
|
|
the_search->best_state->part1 = the_search->this_state->part1;
|
|
the_search->best_state->part2 = the_search->this_state->part2;
|
|
replace_char_widths(chunks_record, chunk_groups);
|
|
}
|
|
else if (char_choices != NULL)
|
|
fixpt->index = -1;
|
|
|
|
memfree(chunk_groups);
|
|
|
|
return (keep_going);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* rebuild_current_state
|
|
*
|
|
* Evaluate the segmentation that is represented by this state in the
|
|
* best first search. Add this state to the "states_seen" list.
|
|
**********************************************************************/
|
|
CHOICES_LIST rebuild_current_state(TBLOB *blobs,
|
|
SEAMS seam_list,
|
|
STATE *state,
|
|
CHOICES_LIST old_choices,
|
|
int fx) {
|
|
CHOICES_LIST char_choices;
|
|
SEARCH_STATE search_state;
|
|
int i;
|
|
int num_joints = array_count (seam_list);
|
|
int x = 0;
|
|
int blobindex; /*current blob */
|
|
TBLOB *p_blob;
|
|
TBLOB *blob;
|
|
TBLOB *next_blob;
|
|
int y;
|
|
|
|
search_state = bin_to_chunks (state, num_joints);
|
|
|
|
char_choices = new_choice_list ();
|
|
/* Iterate sub-paths */
|
|
for (i = 1; i <= search_state[0]; i++) {
|
|
y = x + search_state[i];
|
|
x = y + 1;
|
|
char_choices = array_push (char_choices, NULL);
|
|
}
|
|
char_choices = array_push (char_choices, NULL);
|
|
|
|
y = count_blobs (blobs) - 1;
|
|
for (i = search_state[0]; i >= 0; i--) {
|
|
if (x == y) { /*single fragment */
|
|
array_value (char_choices, i) = array_value (old_choices, x);
|
|
/*grab the list */
|
|
array_value (old_choices, x) = NULL;
|
|
}
|
|
else {
|
|
join_pieces(blobs, seam_list, x, y);
|
|
for (blob = blobs, blobindex = 0, p_blob = NULL; blobindex < x;
|
|
blobindex++) {
|
|
p_blob = blob;
|
|
blob = blob->next;
|
|
}
|
|
while (blobindex < y) {
|
|
next_blob = blob->next;
|
|
blob->next = next_blob->next;
|
|
oldblob(next_blob); /*junk dead blobs */
|
|
blobindex++;
|
|
}
|
|
array_value (char_choices, i) =
|
|
(char *) classify_blob (p_blob, blob, blob->next, NULL, fx,
|
|
"rebuild", Orange, NULL, NULL, 0, 0);
|
|
}
|
|
|
|
y = x - 1;
|
|
x = y - search_state[i];
|
|
}
|
|
|
|
memfree(search_state);
|
|
free_all_choices(old_choices, x);
|
|
return (char_choices);
|
|
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* expand_node
|
|
*
|
|
* Create the states that are attached to this one. Check to see that
|
|
* each one has not already been visited. If not add it to the priority
|
|
* queue.
|
|
**********************************************************************/
|
|
void expand_node(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) {
|
|
STATE old_state;
|
|
int x;
|
|
int mask = 1 << (the_search->num_joints - 1 - 32);
|
|
|
|
old_state.part1 = the_search->this_state->part1;
|
|
old_state.part2 = the_search->this_state->part2;
|
|
|
|
for (x = the_search->num_joints; x > 32; x--) {
|
|
the_search->this_state->part1 = mask ^ old_state.part1;
|
|
if (!hash_lookup (the_search->closed_states, the_search->this_state))
|
|
push_queue (the_search->open_states,
|
|
the_search->this_state,
|
|
prioritize_state (chunks_record, the_search, &old_state));
|
|
mask >>= 1;
|
|
}
|
|
|
|
if (the_search->num_joints > 32) {
|
|
mask = 1 << 31;
|
|
}
|
|
else {
|
|
mask = 1 << (the_search->num_joints - 1);
|
|
}
|
|
|
|
while (x--) {
|
|
the_search->this_state->part2 = mask ^ old_state.part2;
|
|
if (!hash_lookup (the_search->closed_states, the_search->this_state))
|
|
push_queue (the_search->open_states,
|
|
the_search->this_state,
|
|
prioritize_state (chunks_record, the_search, &old_state));
|
|
mask >>= 1;
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* new_search
|
|
*
|
|
* Create and initialize a new search record.
|
|
**********************************************************************/
|
|
SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
|
|
int num_joints,
|
|
A_CHOICE *best_choice,
|
|
A_CHOICE *raw_choice,
|
|
STATE *state) {
|
|
SEARCH_RECORD *this_search;
|
|
|
|
this_search = (SEARCH_RECORD *) memalloc (sizeof (SEARCH_RECORD));
|
|
|
|
this_search->open_states = MakeHeap (num_seg_states * 20);
|
|
this_search->closed_states = new_hash_table ();
|
|
|
|
if (state)
|
|
this_search->this_state = new_state (state);
|
|
else
|
|
cprintf ("error: bad initial state in new_search\n");
|
|
|
|
this_search->first_state = new_state (this_search->this_state);
|
|
this_search->best_state = new_state (this_search->this_state);
|
|
|
|
this_search->best_choice = best_choice;
|
|
this_search->raw_choice = raw_choice;
|
|
|
|
this_search->num_joints = num_joints;
|
|
this_search->num_states = 0;
|
|
this_search->before_best = 0;
|
|
|
|
return (this_search);
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* pop_queue
|
|
*
|
|
* Get this state from the priority queue. It should be the state that
|
|
* has the greatest urgency to be evaluated.
|
|
**********************************************************************/
|
|
STATE *pop_queue(HEAP *queue) {
|
|
HEAPENTRY entry;
|
|
|
|
if (GetTopOfHeap (queue, &entry) == OK) {
|
|
#ifndef GRAPHICS_DISABLED
|
|
if (display_segmentations) {
|
|
cprintf ("eval state: %8.3f ", entry.Key);
|
|
print_state ("", (STATE *) entry.Data, num_joints);
|
|
}
|
|
#endif
|
|
return ((STATE *) entry.Data);
|
|
}
|
|
else {
|
|
return (NULL);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* push_queue
|
|
*
|
|
* Add this state into the priority queue.
|
|
**********************************************************************/
|
|
void push_queue(HEAP *queue, STATE *state, FLOAT32 priority) {
|
|
HEAPENTRY entry;
|
|
|
|
if (SizeOfHeap (queue) < MaxSizeOfHeap (queue) && priority < worst_state) {
|
|
entry.Data = (char *) new_state (state);
|
|
num_pushed++;
|
|
entry.Key = priority;
|
|
HeapStore(queue, &entry);
|
|
}
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* replace_char_widths
|
|
*
|
|
* Replace the value of the char_width field in the chunks_record with
|
|
* the updated width measurements from the last_segmentation.
|
|
**********************************************************************/
|
|
void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) {
|
|
WIDTH_RECORD *width_record;
|
|
int num_blobs;
|
|
int i;
|
|
|
|
free_widths (chunks_record->char_widths);
|
|
|
|
num_blobs = state[0] + 1;
|
|
width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
|
|
width_record->num_chars = num_blobs;
|
|
|
|
for (i = 0; i < num_blobs; i++) {
|
|
|
|
width_record->widths[2 * i] = last_segmentation[i].width;
|
|
|
|
if (i + 1 < num_blobs)
|
|
width_record->widths[2 * i + 1] = last_segmentation[i].gap;
|
|
}
|
|
chunks_record->char_widths = width_record;
|
|
}
|