tesseract/ccmain/adaptions.h

/**********************************************************************
 * File:        adaptions.h  (Formerly adaptions.h)
 * Description: Functions used to adapt to blobs already confidently
 *					identified
 * Author:		Chris Newton
 * Created:		Thu Oct  7 10:17:28 BST 1993
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#ifndef           ADAPTIONS_H
#define           ADAPTIONS_H

#include          "charsample.h"
#include          "charcut.h"
#include          "notdll.h"

extern BOOL_VAR_H (tessedit_reject_ems, FALSE, "Reject all m's");
extern BOOL_VAR_H (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's");
extern double_VAR_H (tessedit_cluster_t1, 0.20,
"t1 threshold for clustering samples");
extern double_VAR_H (tessedit_cluster_t2, 0.40,
"t2 threshold for clustering samples");
extern double_VAR_H (tessedit_cluster_t3, 0.12,
"Extra threshold for clustering samples, only keep a new sample if best score greater than this value");
extern double_VAR_H (tessedit_cluster_accept_fraction, 0.80,
"Largest fraction of characters in cluster for it to be used for adaption");
extern INT_VAR_H (tessedit_cluster_min_size, 3,
"Smallest number of samples in a cluster for it to be used for adaption");
extern BOOL_VAR_H (tessedit_cluster_debug, FALSE,
"Generate and print debug information for adaption by clustering");
extern BOOL_VAR_H (tessedit_use_best_sample, FALSE,
"Use best sample from cluster when adapting");
extern BOOL_VAR_H (tessedit_test_cluster_input, FALSE,
"Set reject map to enable cluster input to be measured");
extern BOOL_VAR_H (tessedit_matrix_match, TRUE, "Use matrix matcher");
extern BOOL_VAR_H (tessedit_old_matrix_match, FALSE, "Use matrix matcher");
extern BOOL_VAR_H (tessedit_mm_use_non_adaption_set, FALSE,
"Don't try to adapt to characters on this list");
extern STRING_VAR_H (tessedit_non_adaption_set, ",.;:'~@*",
"Characters to be avoided when adapting");
extern BOOL_VAR_H (tessedit_mm_adapt_using_prototypes, TRUE,
"Use prototypes when adapting");
extern BOOL_VAR_H (tessedit_mm_use_prototypes, TRUE,
"Use prototypes as clusters are built");
extern BOOL_VAR_H (tessedit_mm_use_rejmap, FALSE,
"Adapt to characters using reject map");
extern BOOL_VAR_H (tessedit_mm_all_rejects, FALSE,
"Adapt to all characters using, matrix matcher");
extern BOOL_VAR_H (tessedit_mm_only_match_same_char, FALSE,
"Only match samples against clusters for the same character");
extern BOOL_VAR_H (tessedit_process_rns, FALSE, "Handle m - rn ambigs");
extern BOOL_VAR_H (tessedit_demo_adaption, FALSE,
"Display cut images and matrix match for demo purposes");
extern INT_VAR_H (tessedit_demo_word1, 62,
"Word number of first word to display");
extern INT_VAR_H (tessedit_demo_word2, 64,
"Word number of second word to display");
extern STRING_VAR_H (tessedit_demo_file, "academe",
"Name of document containing demo words");
BOOL8 word_adaptable(  //should we adapt?
                     WERD_RES *word,
                     uinT16 mode);
void collect_ems_for_adaption(WERD_RES *word,
                              CHAR_SAMPLES_LIST *char_clusters,
                              CHAR_SAMPLE_LIST *chars_waiting);
void collect_characters_for_adaption(WERD_RES *word,
                                     CHAR_SAMPLES_LIST *char_clusters,
                                     CHAR_SAMPLE_LIST *chars_waiting);
void cluster_sample(CHAR_SAMPLE *sample,
                    CHAR_SAMPLES_LIST *char_clusters,
                    CHAR_SAMPLE_LIST *chars_waiting);
void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
                     CHAR_SAMPLE *sample,
                     CHAR_SAMPLES *best_cluster);
void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
                         CHAR_SAMPLE_LIST *chars_waiting);
void adapt_to_good_ems(WERD_RES *word,
                       CHAR_SAMPLES_LIST *char_clusters,
                       CHAR_SAMPLE_LIST *chars_waiting);
void adapt_to_good_samples(WERD_RES *word,
                           CHAR_SAMPLES_LIST *char_clusters,
                           CHAR_SAMPLE_LIST *chars_waiting);
void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,
                    CHAR_SAMPLE_LIST *chars_waiting);
                                 //lines of the image
CHAR_SAMPLE *clip_sample(PIXROW *pixrow,
                         IMAGELINE *imlines,
                         TBOX pix_box,  //box of imlines extent
                         BOOL8 white_on_black,
                         char c);
void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters);
void reject_all_ems(WERD_RES *word);
void reject_all_fullstops(WERD_RES *word);
void reject_suspect_ems(WERD_RES *word);
void reject_suspect_fullstops(WERD_RES *word);
BOOL8 suspect_em(WERD_RES *word, inT16 index);
BOOL8 suspect_fullstop(WERD_RES *word, inT16 i);
#endif