tesseract/ccmain/reject.h
2007-07-18 01:15:07 +00:00

182 lines
8.4 KiB
C

/**********************************************************************
* File: reject.h (Formerly reject.h)
* Description: Rejection functions used in tessedit
* Author: Phil Cheatle
* Created: Wed Sep 23 16:50:21 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#ifndef REJECT_H
#define REJECT_H
#include "varable.h"
#include "pageres.h"
#include "notdll.h"
extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm");
extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm");
extern BOOL_VAR_H (tessedit_use_nn, TRUE, "");
extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug");
extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats");
extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips");
extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5,
"Aspect ratio dot/hyphen test");
extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8,
"Aspect ratio dot/hyphen test");
extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE,
"Use DOC dawg in 11l conf. detector");
extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test");
extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check");
extern BOOL_VAR_H (one_ell_conflict_default, TRUE,
"one_ell_conflict default");
extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?");
extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?");
extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?");
extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches");
extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check");
extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE,
"Double check for confusions");
extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts");
extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts");
extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts");
extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check");
extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check");
extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check");
extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE,
"Require stronger NN match");
extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score");
extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? ");
extern INT_VAR_H (nn_conf_initial_i_level, 3,
"NN accept initial Ii match level ");
extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE,
"Dubious chars next to reject?");
extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE,
"Stop unrej of non A/N wds?");
extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?");
extern BOOL_VAR_H (rej_use_tess_accepted, TRUE,
"Individual rejection control");
extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control");
extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control");
extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check");
extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check");
extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85,
"if >this fract");
extern INT_VAR_H (rej_mostly_reject_mode, 1,
"0-never, 1-afterNN, 2-after new xht");
extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2,
"if >this fract then reject");
extern INT_VAR_H (net_image_width, 40, "NN input image width");
extern INT_VAR_H (net_image_height, 36, "NN input image height");
extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height");
extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit");
extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes");
extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score");
extern double_VAR_H (nn_reject_head_and_shoulders, 0.6,
"top scores sep factor");
extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075",
"Allow NN to unrej");
extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075",
"Allow NN to unrej");
extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set");
extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set");
extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"",
"Unreliable chars");
extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"",
"Unreliable chars");
extern INT_VAR_H (min_sane_x_ht_pixels, 8,
"Reject any x-ht lt or eq than this");
void set_done( //set done flag
WERD_RES *word,
INT16 pass);
void make_reject_map( //make rej map for wd //detailed results
WERD_RES *word,
BLOB_CHOICE_LIST_CLIST *blob_choices,
ROW *row,
INT16 pass //1st or 2nd?
);
void reject_blanks(WERD_RES *word);
void reject_I_1_L(WERD_RES *word);
//detailed results
void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices);
float compute_reject_threshold( //compute threshold //detailed results
BLOB_CHOICE_LIST_CLIST *blob_choices);
int sort_floats( //qsort function
const void *arg1, //ptrs to floats
const void *arg2);
void reject_edge_blobs(WERD_RES *word);
BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
INT16 first_alphanum_offset(const char *word,
const char *word_lengths);
INT16 first_alphanum_index(const char *word,
const char *word_lengths);
INT16 alpha_count(const char *word,
const char *word_lengths);
BOOL8 word_contains_non_1_digit(const char *word,
const char *word_lengths);
BOOL8 test_ambig_word( //test for ambiguity
WERD_RES *word);
//original word
BOOL8 ambig_word(const char *start_word,
char *temp_word, //alterable copy
INT16 test_char_pos //idx to char to alter
);
const char *char_ambiguities(char c);
#ifndef EMBEDDED
void test_ambigs(const char *word);
#endif
void nn_recover_rejects(WERD_RES *word, ROW *row);
void nn_match_word( //Match a word
WERD_RES *word,
ROW *row);
//of character
INT16 nn_match_char(IMAGE &scaled_image,
float baseline_pos, //rel to scaled_image
BOOL8 dict_word, //part of dict wd?
BOOL8 checked_dict_word, //part of dict wd?
BOOL8 sensible_word, //part acceptable str?
BOOL8 centre, //not at word ends?
BOOL8 good_quality_word, //initial segmentation
char tess_ch //confirm this?
);
INT16 evaluate_net_match(char top,
float top_score,
char next,
float next_score,
char tess_ch,
BOOL8 dict_word,
BOOL8 checked_dict_word,
BOOL8 sensible_word,
BOOL8 centre,
BOOL8 good_quality_word);
void dont_allow_dubious_chars(WERD_RES *word);
void dont_allow_1Il(WERD_RES *word);
INT16 count_alphanums( //how many alphanums
WERD_RES *word);
void reject_mostly_rejects( //rej all if most rejectd
WERD_RES *word);
BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
BOOL8 repeated_ch_string(const char *rep_ch_str,
const char *lengths);
INT16 safe_dict_word(const char *s);
void flip_hyphens(WERD_RES *word);
void flip_0O(WERD_RES *word);
BOOL8 non_O_upper(const char* str, int length);
BOOL8 non_0_digit(const char* str, int length);
#endif