mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 05:39:35 +08:00
110 lines
5.3 KiB
C
110 lines
5.3 KiB
C
|
/**********************************************************************
|
||
|
* File: adaptions.h (Formerly adaptions.h)
|
||
|
* Description: Functions used to adapt to blobs already confidently
|
||
|
* identified
|
||
|
* Author: Chris Newton
|
||
|
* Created: Thu Oct 7 10:17:28 BST 1993
|
||
|
*
|
||
|
* (C) Copyright 1992, Hewlett-Packard Ltd.
|
||
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
** you may not use this file except in compliance with the License.
|
||
|
** You may obtain a copy of the License at
|
||
|
** http://www.apache.org/licenses/LICENSE-2.0
|
||
|
** Unless required by applicable law or agreed to in writing, software
|
||
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
** See the License for the specific language governing permissions and
|
||
|
** limitations under the License.
|
||
|
*
|
||
|
**********************************************************************/
|
||
|
|
||
|
#ifndef ADAPTIONS_H
|
||
|
#define ADAPTIONS_H
|
||
|
|
||
|
#include "charsample.h"
|
||
|
#include "charcut.h"
|
||
|
#include "notdll.h"
|
||
|
|
||
|
extern BOOL_VAR_H (tessedit_reject_ems, FALSE, "Reject all m's");
|
||
|
extern BOOL_VAR_H (tessedit_reject_suspect_ems, FALSE, "Reject suspect m's");
|
||
|
extern double_VAR_H (tessedit_cluster_t1, 0.20,
|
||
|
"t1 threshold for clustering samples");
|
||
|
extern double_VAR_H (tessedit_cluster_t2, 0.40,
|
||
|
"t2 threshold for clustering samples");
|
||
|
extern double_VAR_H (tessedit_cluster_t3, 0.12,
|
||
|
"Extra threshold for clustering samples, only keep a new sample if best score greater than this value");
|
||
|
extern double_VAR_H (tessedit_cluster_accept_fraction, 0.80,
|
||
|
"Largest fraction of characters in cluster for it to be used for adaption");
|
||
|
extern INT_VAR_H (tessedit_cluster_min_size, 3,
|
||
|
"Smallest number of samples in a cluster for it to be used for adaption");
|
||
|
extern BOOL_VAR_H (tessedit_cluster_debug, FALSE,
|
||
|
"Generate and print debug information for adaption by clustering");
|
||
|
extern BOOL_VAR_H (tessedit_use_best_sample, FALSE,
|
||
|
"Use best sample from cluster when adapting");
|
||
|
extern BOOL_VAR_H (tessedit_test_cluster_input, FALSE,
|
||
|
"Set reject map to enable cluster input to be measured");
|
||
|
extern BOOL_VAR_H (tessedit_matrix_match, TRUE, "Use matrix matcher");
|
||
|
extern BOOL_VAR_H (tessedit_old_matrix_match, FALSE, "Use matrix matcher");
|
||
|
extern BOOL_VAR_H (tessedit_mm_use_non_adaption_set, FALSE,
|
||
|
"Don't try to adapt to characters on this list");
|
||
|
extern STRING_VAR_H (tessedit_non_adaption_set, ",.;:'~@*",
|
||
|
"Characters to be avoided when adapting");
|
||
|
extern BOOL_VAR_H (tessedit_mm_adapt_using_prototypes, TRUE,
|
||
|
"Use prototypes when adapting");
|
||
|
extern BOOL_VAR_H (tessedit_mm_use_prototypes, TRUE,
|
||
|
"Use prototypes as clusters are built");
|
||
|
extern BOOL_VAR_H (tessedit_mm_use_rejmap, FALSE,
|
||
|
"Adapt to characters using reject map");
|
||
|
extern BOOL_VAR_H (tessedit_mm_all_rejects, FALSE,
|
||
|
"Adapt to all characters using, matrix matcher");
|
||
|
extern BOOL_VAR_H (tessedit_mm_only_match_same_char, FALSE,
|
||
|
"Only match samples against clusters for the same character");
|
||
|
extern BOOL_VAR_H (tessedit_process_rns, FALSE, "Handle m - rn ambigs");
|
||
|
extern BOOL_VAR_H (tessedit_demo_adaption, FALSE,
|
||
|
"Display cut images and matrix match for demo purposes");
|
||
|
extern INT_VAR_H (tessedit_demo_word1, 62,
|
||
|
"Word number of first word to display");
|
||
|
extern INT_VAR_H (tessedit_demo_word2, 64,
|
||
|
"Word number of second word to display");
|
||
|
extern STRING_VAR_H (tessedit_demo_file, "academe",
|
||
|
"Name of document containing demo words");
|
||
|
BOOL8 word_adaptable( //should we adapt?
|
||
|
WERD_RES *word,
|
||
|
UINT16 mode);
|
||
|
void collect_ems_for_adaption(WERD_RES *word,
|
||
|
CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
void collect_characters_for_adaption(WERD_RES *word,
|
||
|
CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
void cluster_sample(CHAR_SAMPLE *sample,
|
||
|
CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting,
|
||
|
CHAR_SAMPLE *sample,
|
||
|
CHAR_SAMPLES *best_cluster);
|
||
|
void complete_clustering(CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
void adapt_to_good_ems(WERD_RES *word,
|
||
|
CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
void adapt_to_good_samples(WERD_RES *word,
|
||
|
CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
void print_em_stats(CHAR_SAMPLES_LIST *char_clusters,
|
||
|
CHAR_SAMPLE_LIST *chars_waiting);
|
||
|
//lines of the image
|
||
|
CHAR_SAMPLE *clip_sample(PIXROW *pixrow,
|
||
|
IMAGELINE *imlines,
|
||
|
BOX pix_box, //box of imlines extent
|
||
|
BOOL8 white_on_black,
|
||
|
char c);
|
||
|
void display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters);
|
||
|
void reject_all_ems(WERD_RES *word);
|
||
|
void reject_all_fullstops(WERD_RES *word);
|
||
|
void reject_suspect_ems(WERD_RES *word);
|
||
|
void reject_suspect_fullstops(WERD_RES *word);
|
||
|
BOOL8 suspect_em(WERD_RES *word, INT16 index);
|
||
|
BOOL8 suspect_fullstop(WERD_RES *word, INT16 i);
|
||
|
#endif
|