mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-23 02:24:09 +08:00
570af48b8b
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@87 d0cd1f9f-072b-0410-8dd7-cf729c803f20
182 lines
8.4 KiB
C
182 lines
8.4 KiB
C
/**********************************************************************
|
|
* File: reject.h (Formerly reject.h)
|
|
* Description: Rejection functions used in tessedit
|
|
* Author: Phil Cheatle
|
|
* Created: Wed Sep 23 16:50:21 BST 1992
|
|
*
|
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
|
** you may not use this file except in compliance with the License.
|
|
** You may obtain a copy of the License at
|
|
** http://www.apache.org/licenses/LICENSE-2.0
|
|
** Unless required by applicable law or agreed to in writing, software
|
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
** See the License for the specific language governing permissions and
|
|
** limitations under the License.
|
|
*
|
|
**********************************************************************/
|
|
|
|
#ifndef REJECT_H
|
|
#define REJECT_H
|
|
|
|
#include "varable.h"
|
|
#include "pageres.h"
|
|
#include "notdll.h"
|
|
|
|
extern INT_VAR_H (tessedit_reject_mode, 5, "Rejection algorithm");
|
|
extern INT_VAR_H (tessedit_ok_mode, 5, "Acceptance decision algorithm");
|
|
extern BOOL_VAR_H (tessedit_use_nn, TRUE, "");
|
|
extern BOOL_VAR_H (tessedit_rejection_debug, FALSE, "Adaption debug");
|
|
extern BOOL_VAR_H (tessedit_rejection_stats, FALSE, "Show NN stats");
|
|
extern BOOL_VAR_H (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips");
|
|
extern double_VAR_H (tessedit_lower_flip_hyphen, 1.5,
|
|
"Aspect ratio dot/hyphen test");
|
|
extern double_VAR_H (tessedit_upper_flip_hyphen, 1.8,
|
|
"Aspect ratio dot/hyphen test");
|
|
extern BOOL_VAR_H (rej_trust_doc_dawg, FALSE,
|
|
"Use DOC dawg in 11l conf. detector");
|
|
extern BOOL_VAR_H (rej_1Il_use_dict_word, FALSE, "Use dictword test");
|
|
extern BOOL_VAR_H (rej_1Il_trust_permuter_type, TRUE, "Dont double check");
|
|
extern BOOL_VAR_H (one_ell_conflict_default, TRUE,
|
|
"one_ell_conflict default");
|
|
extern BOOL_VAR_H (show_char_clipping, FALSE, "Show clip image window?");
|
|
extern BOOL_VAR_H (nn_debug, FALSE, "NN DEBUGGING?");
|
|
extern BOOL_VAR_H (nn_reject_debug, FALSE, "NN DEBUG each char?");
|
|
extern BOOL_VAR_H (nn_lax, FALSE, "Use 2nd rate matches");
|
|
extern BOOL_VAR_H (nn_double_check_dict, FALSE, "Double check");
|
|
extern BOOL_VAR_H (nn_conf_double_check_dict, TRUE,
|
|
"Double check for confusions");
|
|
extern BOOL_VAR_H (nn_conf_1Il, TRUE, "NN use 1Il conflicts");
|
|
extern BOOL_VAR_H (nn_conf_Ss, TRUE, "NN use Ss conflicts");
|
|
extern BOOL_VAR_H (nn_conf_hyphen, TRUE, "NN hyphen conflicts");
|
|
extern BOOL_VAR_H (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check");
|
|
extern BOOL_VAR_H (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check");
|
|
extern BOOL_VAR_H (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check");
|
|
extern BOOL_VAR_H (nn_conf_strict_on_dodgy_chs, TRUE,
|
|
"Require stronger NN match");
|
|
extern double_VAR_H (nn_dodgy_char_threshold, 0.99, "min accept score");
|
|
extern INT_VAR_H (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? ");
|
|
extern INT_VAR_H (nn_conf_initial_i_level, 3,
|
|
"NN accept initial Ii match level ");
|
|
extern BOOL_VAR_H (no_unrej_dubious_chars, TRUE,
|
|
"Dubious chars next to reject?");
|
|
extern BOOL_VAR_H (no_unrej_no_alphanum_wds, TRUE,
|
|
"Stop unrej of non A/N wds?");
|
|
extern BOOL_VAR_H (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?");
|
|
extern BOOL_VAR_H (rej_use_tess_accepted, TRUE,
|
|
"Individual rejection control");
|
|
extern BOOL_VAR_H (rej_use_tess_blanks, TRUE, "Individual rejection control");
|
|
extern BOOL_VAR_H (rej_use_good_perm, TRUE, "Individual rejection control");
|
|
extern BOOL_VAR_H (rej_use_sensible_wd, FALSE, "Extend permuter check");
|
|
extern BOOL_VAR_H (rej_alphas_in_number_perm, FALSE, "Extend permuter check");
|
|
extern double_VAR_H (rej_whole_of_mostly_reject_word_fract, 0.85,
|
|
"if >this fract");
|
|
extern INT_VAR_H (rej_mostly_reject_mode, 1,
|
|
"0-never, 1-afterNN, 2-after new xht");
|
|
extern double_VAR_H (tessed_fullstop_aspect_ratio, 1.2,
|
|
"if >this fract then reject");
|
|
extern INT_VAR_H (net_image_width, 40, "NN input image width");
|
|
extern INT_VAR_H (net_image_height, 36, "NN input image height");
|
|
extern INT_VAR_H (net_image_x_height, 22, "NN input image x_height");
|
|
extern INT_VAR_H (tessedit_image_border, 2, "Rej blbs near image edge limit");
|
|
extern INT_VAR_H (net_bl_nodes, 20, "Number of baseline nodes");
|
|
extern double_VAR_H (nn_reject_threshold, 0.5, "NN min accept score");
|
|
extern double_VAR_H (nn_reject_head_and_shoulders, 0.6,
|
|
"top scores sep factor");
|
|
extern STRING_VAR_H (ok_single_ch_non_alphanum_wds, "-?\075",
|
|
"Allow NN to unrej");
|
|
extern STRING_VAR_H (ok_repeated_ch_non_alphanum_wds, "-?*\075",
|
|
"Allow NN to unrej");
|
|
extern STRING_VAR_H (conflict_set_I_l_1, "Il1[]", "Il1 conflict set");
|
|
extern STRING_VAR_H (conflict_set_S_s, "Ss$", "Ss conflict set");
|
|
extern STRING_VAR_H (conflict_set_hyphen, "-_~", "hyphen conflict set");
|
|
extern STRING_VAR_H (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"",
|
|
"Unreliable chars");
|
|
extern STRING_VAR_H (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"",
|
|
"Unreliable chars");
|
|
extern INT_VAR_H (min_sane_x_ht_pixels, 8,
|
|
"Reject any x-ht lt or eq than this");
|
|
void set_done( //set done flag
|
|
WERD_RES *word,
|
|
INT16 pass);
|
|
void make_reject_map( //make rej map for wd //detailed results
|
|
WERD_RES *word,
|
|
BLOB_CHOICE_LIST_CLIST *blob_choices,
|
|
ROW *row,
|
|
INT16 pass //1st or 2nd?
|
|
);
|
|
void reject_blanks(WERD_RES *word);
|
|
void reject_I_1_L(WERD_RES *word);
|
|
//detailed results
|
|
void reject_poor_matches(WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices);
|
|
float compute_reject_threshold( //compute threshold //detailed results
|
|
BLOB_CHOICE_LIST_CLIST *blob_choices);
|
|
int sort_floats( //qsort function
|
|
const void *arg1, //ptrs to floats
|
|
const void *arg2);
|
|
void reject_edge_blobs(WERD_RES *word);
|
|
BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map);
|
|
INT16 first_alphanum_offset(const char *word,
|
|
const char *word_lengths);
|
|
INT16 first_alphanum_index(const char *word,
|
|
const char *word_lengths);
|
|
INT16 alpha_count(const char *word,
|
|
const char *word_lengths);
|
|
BOOL8 word_contains_non_1_digit(const char *word,
|
|
const char *word_lengths);
|
|
BOOL8 test_ambig_word( //test for ambiguity
|
|
WERD_RES *word);
|
|
//original word
|
|
BOOL8 ambig_word(const char *start_word,
|
|
char *temp_word, //alterable copy
|
|
INT16 test_char_pos //idx to char to alter
|
|
);
|
|
const char *char_ambiguities(char c);
|
|
|
|
#ifndef EMBEDDED
|
|
void test_ambigs(const char *word);
|
|
#endif
|
|
|
|
void nn_recover_rejects(WERD_RES *word, ROW *row);
|
|
void nn_match_word( //Match a word
|
|
WERD_RES *word,
|
|
ROW *row);
|
|
//of character
|
|
INT16 nn_match_char(IMAGE &scaled_image,
|
|
float baseline_pos, //rel to scaled_image
|
|
BOOL8 dict_word, //part of dict wd?
|
|
BOOL8 checked_dict_word, //part of dict wd?
|
|
BOOL8 sensible_word, //part acceptable str?
|
|
BOOL8 centre, //not at word ends?
|
|
BOOL8 good_quality_word, //initial segmentation
|
|
char tess_ch //confirm this?
|
|
);
|
|
INT16 evaluate_net_match(char top,
|
|
float top_score,
|
|
char next,
|
|
float next_score,
|
|
char tess_ch,
|
|
BOOL8 dict_word,
|
|
BOOL8 checked_dict_word,
|
|
BOOL8 sensible_word,
|
|
BOOL8 centre,
|
|
BOOL8 good_quality_word);
|
|
void dont_allow_dubious_chars(WERD_RES *word);
|
|
|
|
void dont_allow_1Il(WERD_RES *word);
|
|
|
|
INT16 count_alphanums( //how many alphanums
|
|
WERD_RES *word);
|
|
void reject_mostly_rejects( //rej all if most rejectd
|
|
WERD_RES *word);
|
|
BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row);
|
|
BOOL8 repeated_ch_string(const char *rep_ch_str,
|
|
const char *lengths);
|
|
INT16 safe_dict_word(const char *s);
|
|
void flip_hyphens(WERD_RES *word);
|
|
void flip_0O(WERD_RES *word);
|
|
BOOL8 non_O_upper(const char* str, int length);
|
|
BOOL8 non_0_digit(const char* str, int length);
|
|
#endif
|