tesseract/ccmain/adaptions.h
2007-03-07 20:03:40 +00:00

110 lines
5.3 KiB
C

/**********************************************************************
* File: adaptions.h (Formerly adaptions.h)
* Description: Functions used to adapt to blobs already confidently
* identified
* Author: Chris Newton
* Created: Thu Oct 7 10:17:28 BST 1993
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef ADAPTIONS_H
#define ADAPTIONS_H
#include "charsample.h"
#include "charcut.h"
#include "notdll.h"
extern BOOL_VAR_H (tessedit_reject_ems, FALSE, "Reject all m's");
extern BOOL_VAR_H (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's");
extern double_VAR_H (tessedit_cluster_t1, 0.20,
"t1 threshold for clustering samples");
extern double_VAR_H (tessedit_cluster_t2, 0.40,
"t2 threshold for clustering samples");
extern double_VAR_H (tessedit_cluster_t3, 0.12,
"Extra threshold for clustering samples, only keep a new sample if best score greater than this value");
extern double_VAR_H (tessedit_cluster_accept_fraction, 0.80,
"Largest fraction of characters in cluster for it to be used for adaption");
extern INT_VAR_H (tessedit_cluster_min_size, 3,
"Smallest number of samples in a cluster for it to be used for adaption");
extern BOOL_VAR_H (tessedit_cluster_debug, FALSE,
"Generate and print debug information for adaption by clustering");
extern BOOL_VAR_H (tessedit_use_best_sample, FALSE,
"Use best sample from cluster when adapting");
extern BOOL_VAR_H (tessedit_test_cluster_input, FALSE,
"Set reject map to enable cluster input to be measured");
extern BOOL_VAR_H (tessedit_matrix_match, TRUE, "Use matrix matcher");
extern BOOL_VAR_H (tessedit_old_matrix_match, FALSE, "Use matrix matcher");
extern BOOL_VAR_H (tessedit_mm_use_non_adaption_set, FALSE,
"Don't try to adapt to characters on this list");
extern STRING_VAR_H (tessedit_non_adaption_set, ",.;:'~@*",
"Characters to be avoided when adapting");
extern BOOL_VAR_H (tessedit_mm_adapt_using_prototypes, TRUE,
"Use prototypes when adapting");
extern BOOL_VAR_H (tessedit_mm_use_prototypes, TRUE,
"Use prototypes as clusters are built");
extern BOOL_VAR_H (tessedit_mm_use_rejmap, FALSE,
"Adapt to characters using reject map");
extern BOOL_VAR_H (tessedit_mm_all_rejects, FALSE,
"Adapt to all characters using, matrix matcher");
extern BOOL_VAR_H (tessedit_mm_only_match_same_char, FALSE,
"Only match samples against clusters for the same character");
extern BOOL_VAR_H (tessedit_process_rns, FALSE, "Handle m - rn ambigs");
extern BOOL_VAR_H (tessedit_demo_adaption, FALSE,
"Display cut images and matrix match for demo purposes");
extern INT_VAR_H (tessedit_demo_word1, 62,
"Word number of first word to display");
extern INT_VAR_H (tessedit_demo_word2, 64,
"Word number of second word to display");
extern STRING_VAR_H (tessedit_demo_file, "academe",
"Name of document containing demo words");
BOOL8 word_adaptable( //should we adapt?
WERD_RES *word,
UINT16 mode);
void collect_ems_for_adaption(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void collect_characters_for_adaption(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void cluster_sample(CHAR_SAMPLE *sample,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
CHAR_SAMPLE *sample,
CHAR_SAMPLES *best_cluster);
void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void adapt_to_good_ems(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void adapt_to_good_samples(WERD_RES *word,
CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,
CHAR_SAMPLE_LIST *chars_waiting);
//lines of the image
CHAR_SAMPLE *clip_sample(PIXROW *pixrow,
IMAGELINE *imlines,
BOX pix_box, //box of imlines extent
BOOL8 white_on_black,
char c);
void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters);
void reject_all_ems(WERD_RES *word);
void reject_all_fullstops(WERD_RES *word);
void reject_suspect_ems(WERD_RES *word);
void reject_suspect_fullstops(WERD_RES *word);
BOOL8 suspect_em(WERD_RES *word, INT16 index);
BOOL8 suspect_fullstop(WERD_RES *word, INT16 i);
#endif