mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Removed dependence on IMAGE class
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@944 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
69dac05e1c
commit
67f9af58b8
@ -1,5 +1,5 @@
|
||||
AM_CPPFLAGS += -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
|
||||
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/viewer -I$(top_srcdir)/image
|
||||
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/viewer
|
||||
|
||||
if VISIBILITY
|
||||
AM_CPPFLAGS += -DTESS_EXPORTS \
|
||||
@ -19,7 +19,6 @@ libtesseract_dict_la_LIBADD = \
|
||||
../ccutil/libtesseract_ccutil.la \
|
||||
../cutil/libtesseract_cutil.la \
|
||||
../ccstruct/libtesseract_ccstruct.la \
|
||||
../image/libtesseract_image.la \
|
||||
../viewer/libtesseract_viewer.la
|
||||
endif
|
||||
|
||||
|
@ -30,128 +30,128 @@ namespace tesseract {
|
||||
|
||||
class Image;
|
||||
|
||||
Dict::Dict(Image* image_ptr)
|
||||
Dict::Dict(CCUtil* ccutil)
|
||||
: letter_is_okay_(&tesseract::Dict::def_letter_is_okay),
|
||||
probability_in_context_(&tesseract::Dict::def_probability_in_context),
|
||||
params_model_classify_(NULL),
|
||||
image_ptr_(image_ptr),
|
||||
ccutil_(ccutil),
|
||||
STRING_INIT_MEMBER(user_words_suffix, "",
|
||||
"A list of user-provided words.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
STRING_INIT_MEMBER(user_patterns_suffix, "",
|
||||
"A list of user-provided patterns.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_INIT_MEMBER(load_system_dawg, true, "Load system word dawg.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_INIT_MEMBER(load_freq_dawg, true, "Load frequent word dawg.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_INIT_MEMBER(load_unambig_dawg, true, "Load unambiguous word dawg.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_INIT_MEMBER(load_punc_dawg, true, "Load dawg with punctuation"
|
||||
" patterns.", getImage()->getCCUtil()->params()),
|
||||
" patterns.", getCCUtil()->params()),
|
||||
BOOL_INIT_MEMBER(load_number_dawg, true, "Load dawg with number"
|
||||
" patterns.", getImage()->getCCUtil()->params()),
|
||||
" patterns.", getCCUtil()->params()),
|
||||
BOOL_INIT_MEMBER(load_bigram_dawg, true, "Load dawg with special word "
|
||||
"bigrams.", getImage()->getCCUtil()->params()),
|
||||
"bigrams.", getCCUtil()->params()),
|
||||
double_MEMBER(xheight_penalty_subscripts, 0.125,
|
||||
"Score penalty (0.1 = 10%) added if there are subscripts "
|
||||
"or superscripts in a word, but it is otherwise OK.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(xheight_penalty_inconsistent, 0.25,
|
||||
"Score penalty (0.1 = 10%) added if an xheight is "
|
||||
"inconsistent.", getImage()->getCCUtil()->params()),
|
||||
"inconsistent.", getCCUtil()->params()),
|
||||
double_MEMBER(segment_penalty_dict_frequent_word, 1.0,
|
||||
"Score multiplier for word matches which have good case and"
|
||||
"are frequent in the given language (lower is better).",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(segment_penalty_dict_case_ok, 1.1,
|
||||
"Score multiplier for word matches that have good case "
|
||||
"(lower is better).", getImage()->getCCUtil()->params()),
|
||||
"(lower is better).", getCCUtil()->params()),
|
||||
double_MEMBER(segment_penalty_dict_case_bad, 1.3125,
|
||||
"Default score multiplier for word matches, which may have "
|
||||
"case issues (lower is better).",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(segment_penalty_ngram_best_choice, 1.24,
|
||||
"Multipler to for the best choice from the ngram model.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(segment_penalty_dict_nonword, 1.25,
|
||||
"Score multiplier for glyph fragment segmentations which "
|
||||
"do not match a dictionary word (lower is better).",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(segment_penalty_garbage, 1.50,
|
||||
"Score multiplier for poorly cased strings that are not in"
|
||||
" the dictionary and generally look like garbage (lower is"
|
||||
" better).", getImage()->getCCUtil()->params()),
|
||||
" better).", getCCUtil()->params()),
|
||||
STRING_MEMBER(output_ambig_words_file, "",
|
||||
"Output file for ambiguities found in the dictionary",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(dawg_debug_level, 0, "Set to 1 for general debug info"
|
||||
", to 2 for more details, to 3 to see all the debug messages",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(max_viterbi_list_size, 10, "Maximum size of viterbi list.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_MEMBER(use_only_first_uft8_step, false,
|
||||
"Use only the first UTF8 step of the given string"
|
||||
" when computing log probabilities.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(stopper_nondict_certainty_base, -2.50,
|
||||
"Certainty threshold for non-dict words",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(stopper_phase2_certainty_rejection_offset, 1.0,
|
||||
"Reject certainty offset",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(stopper_smallword_size, 2,
|
||||
"Size of dict word to be treated as non-dict word",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(stopper_certainty_per_char, -0.50, "Certainty to add"
|
||||
" for each dict char above small word size.",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(stopper_allowable_character_badness, 3.0,
|
||||
"Max certaintly variation allowed in a word (in sigma)",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(stopper_debug_level, 0, "Stopper debug level",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_MEMBER(stopper_no_acceptable_choices, false,
|
||||
"Make AcceptableChoice() always return false. Useful"
|
||||
" when there is a need to explore all segmentations",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_MEMBER(save_raw_choices, false,
|
||||
"Deprecated- backward compatablity only",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
|
||||
"Max words to keep in list",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
STRING_MEMBER(word_to_debug, "", "Word for which stopper debug"
|
||||
" information should be printed to stdout",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
STRING_MEMBER(word_to_debug_lengths, "",
|
||||
"Lengths of unichars in word_to_debug",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
INT_MEMBER(fragments_debug, 0, "Debug character fragments",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_MEMBER(segment_nonalphabetic_script, false,
|
||||
"Don't use any alphabetic-specific tricks."
|
||||
"Set to true in the traineddata config file for"
|
||||
" scripts that are cursive or inherently fixed-pitch",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
BOOL_MEMBER(save_doc_words, 0, "Save Document Words",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(doc_dict_pending_threshold, 0.0,
|
||||
"Worst certainty for using pending dictionary",
|
||||
getImage()->getCCUtil()->params()),
|
||||
getCCUtil()->params()),
|
||||
double_MEMBER(doc_dict_certainty_threshold, -2.25,
|
||||
"Worst certainty for words that can be inserted into the"
|
||||
"document dictionary", getImage()->getCCUtil()->params()),
|
||||
"document dictionary", getCCUtil()->params()),
|
||||
INT_MEMBER(max_permuter_attempts, 10000, "Maximum number of different"
|
||||
" character choices to consider during permutation."
|
||||
" This limit is especially useful when user patterns"
|
||||
" are specified, since overly generic patterns can result in"
|
||||
" dawg search exploring an overly large number of options.",
|
||||
getImage()->getCCUtil()->params()) {
|
||||
getCCUtil()->params()) {
|
||||
dang_ambigs_table_ = NULL;
|
||||
replace_ambigs_table_ = NULL;
|
||||
reject_offset_ = 0.0;
|
||||
@ -186,7 +186,7 @@ DawgCache *Dict::GlobalDawgCache() {
|
||||
|
||||
void Dict::Load(DawgCache *dawg_cache) {
|
||||
STRING name;
|
||||
STRING &lang = getImage()->getCCUtil()->lang;
|
||||
STRING &lang = getCCUtil()->lang;
|
||||
|
||||
if (dawgs_.length() != 0) this->End();
|
||||
|
||||
@ -203,7 +203,7 @@ void Dict::Load(DawgCache *dawg_cache) {
|
||||
dawg_cache_is_ours_ = true;
|
||||
}
|
||||
|
||||
TessdataManager &tessdata_manager = getImage()->getCCUtil()->tessdata_manager;
|
||||
TessdataManager &tessdata_manager = getCCUtil()->tessdata_manager;
|
||||
const char *data_file_name = tessdata_manager.GetDataFileName().string();
|
||||
|
||||
// Load dawgs_.
|
||||
@ -241,7 +241,7 @@ void Dict::Load(DawgCache *dawg_cache) {
|
||||
Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
|
||||
kMaxUserDawgEdges, getUnicharset().size(),
|
||||
dawg_debug_level);
|
||||
name = getImage()->getCCUtil()->language_data_path_prefix;
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_words_suffix;
|
||||
if (!trie_ptr->read_and_add_word_list(name.string(), getUnicharset(),
|
||||
Trie::RRP_REVERSE_IF_HAS_RTL)) {
|
||||
@ -257,7 +257,7 @@ void Dict::Load(DawgCache *dawg_cache) {
|
||||
kMaxUserDawgEdges, getUnicharset().size(),
|
||||
dawg_debug_level);
|
||||
trie_ptr->initialize_patterns(&(getUnicharset()));
|
||||
name = getImage()->getCCUtil()->language_data_path_prefix;
|
||||
name = getCCUtil()->language_data_path_prefix;
|
||||
name += user_patterns_suffix;
|
||||
if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) {
|
||||
tprintf("Error: failed to load %s\n", name.string());
|
||||
@ -599,7 +599,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
|
||||
}
|
||||
|
||||
if (save_doc_words) {
|
||||
strcpy(filename, getImage()->getCCUtil()->imagefile.string());
|
||||
strcpy(filename, getCCUtil()->imagefile.string());
|
||||
strcat(filename, ".doc");
|
||||
doc_word_file = open_file (filename, "a");
|
||||
fprintf(doc_word_file, "%s\n",
|
||||
|
23
dict/dict.h
23
dict/dict.h
@ -23,7 +23,6 @@
|
||||
#include "dawg.h"
|
||||
#include "dawg_cache.h"
|
||||
#include "host.h"
|
||||
#include "image.h"
|
||||
#include "oldlist.h"
|
||||
#include "ratngs.h"
|
||||
#include "stopper.h"
|
||||
@ -89,22 +88,22 @@ struct DawgArgs {
|
||||
|
||||
class Dict {
|
||||
public:
|
||||
Dict(Image* image_ptr);
|
||||
Dict(CCUtil* image_ptr);
|
||||
~Dict();
|
||||
const Image* getImage() const {
|
||||
return image_ptr_;
|
||||
const CCUtil* getCCUtil() const {
|
||||
return ccutil_;
|
||||
}
|
||||
Image* getImage() {
|
||||
return image_ptr_;
|
||||
CCUtil* getCCUtil() {
|
||||
return ccutil_;
|
||||
}
|
||||
const UNICHARSET& getUnicharset() const {
|
||||
return getImage()->getCCUtil()->unicharset;
|
||||
return getCCUtil()->unicharset;
|
||||
}
|
||||
UNICHARSET& getUnicharset() {
|
||||
return getImage()->getCCUtil()->unicharset;
|
||||
return getCCUtil()->unicharset;
|
||||
}
|
||||
const UnicharAmbigs &getUnicharAmbigs() const {
|
||||
return getImage()->getCCUtil()->unichar_ambigs;
|
||||
return getCCUtil()->unichar_ambigs;
|
||||
}
|
||||
|
||||
// Returns true if unichar_id is a word compounding character like - or /.
|
||||
@ -369,7 +368,7 @@ class Dict {
|
||||
const char* character,
|
||||
int character_bytes) {
|
||||
return (this->*probability_in_context_)(
|
||||
getImage()->getCCUtil()->lang.string(),
|
||||
getCCUtil()->lang.string(),
|
||||
context, context_bytes,
|
||||
character, character_bytes);
|
||||
}
|
||||
@ -397,7 +396,7 @@ class Dict {
|
||||
float CallParamsModelClassify(void *path) {
|
||||
ASSERT_HOST(params_model_classify_ != NULL); // ASSERT_HOST -> assert
|
||||
return (this->*params_model_classify_)(
|
||||
getImage()->getCCUtil()->lang.string(), path);
|
||||
getCCUtil()->lang.string(), path);
|
||||
}
|
||||
|
||||
inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; }
|
||||
@ -490,7 +489,7 @@ class Dict {
|
||||
|
||||
private:
|
||||
/** Private member variables. */
|
||||
Image* image_ptr_;
|
||||
CCUtil* ccutil_;
|
||||
/**
|
||||
* Table that stores ambiguities computed during training
|
||||
* (loaded when NoDangerousAmbigs() is called for the first time).
|
||||
|
@ -37,7 +37,6 @@
|
||||
|
||||
#include <ctype.h>
|
||||
#include "dict.h"
|
||||
#include "image.h"
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
F u n c t i o n s
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include "dict.h"
|
||||
#include "efio.h"
|
||||
#include "helpers.h"
|
||||
#include "image.h"
|
||||
#include "matchdefs.h"
|
||||
#include "pageres.h"
|
||||
#include "params.h"
|
||||
|
Loading…
Reference in New Issue
Block a user