Removed dependence on IMAGE class

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@944 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2014-01-09 17:31:29 +00:00
parent 69dac05e1c
commit 67f9af58b8
5 changed files with 58 additions and 62 deletions

View File

@ -1,5 +1,5 @@
AM_CPPFLAGS += -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/viewer -I$(top_srcdir)/image
-I$(top_srcdir)/ccstruct -I$(top_srcdir)/viewer
if VISIBILITY
AM_CPPFLAGS += -DTESS_EXPORTS \
@ -19,7 +19,6 @@ libtesseract_dict_la_LIBADD = \
../ccutil/libtesseract_ccutil.la \
../cutil/libtesseract_cutil.la \
../ccstruct/libtesseract_ccstruct.la \
../image/libtesseract_image.la \
../viewer/libtesseract_viewer.la
endif

View File

@ -30,128 +30,128 @@ namespace tesseract {
class Image;
Dict::Dict(Image* image_ptr)
Dict::Dict(CCUtil* ccutil)
: letter_is_okay_(&tesseract::Dict::def_letter_is_okay),
probability_in_context_(&tesseract::Dict::def_probability_in_context),
params_model_classify_(NULL),
image_ptr_(image_ptr),
ccutil_(ccutil),
STRING_INIT_MEMBER(user_words_suffix, "",
"A list of user-provided words.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
STRING_INIT_MEMBER(user_patterns_suffix, "",
"A list of user-provided patterns.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_INIT_MEMBER(load_system_dawg, true, "Load system word dawg.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_INIT_MEMBER(load_freq_dawg, true, "Load frequent word dawg.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_INIT_MEMBER(load_unambig_dawg, true, "Load unambiguous word dawg.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_INIT_MEMBER(load_punc_dawg, true, "Load dawg with punctuation"
" patterns.", getImage()->getCCUtil()->params()),
" patterns.", getCCUtil()->params()),
BOOL_INIT_MEMBER(load_number_dawg, true, "Load dawg with number"
" patterns.", getImage()->getCCUtil()->params()),
" patterns.", getCCUtil()->params()),
BOOL_INIT_MEMBER(load_bigram_dawg, true, "Load dawg with special word "
"bigrams.", getImage()->getCCUtil()->params()),
"bigrams.", getCCUtil()->params()),
double_MEMBER(xheight_penalty_subscripts, 0.125,
"Score penalty (0.1 = 10%) added if there are subscripts "
"or superscripts in a word, but it is otherwise OK.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(xheight_penalty_inconsistent, 0.25,
"Score penalty (0.1 = 10%) added if an xheight is "
"inconsistent.", getImage()->getCCUtil()->params()),
"inconsistent.", getCCUtil()->params()),
double_MEMBER(segment_penalty_dict_frequent_word, 1.0,
"Score multiplier for word matches which have good case and"
"are frequent in the given language (lower is better).",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(segment_penalty_dict_case_ok, 1.1,
"Score multiplier for word matches that have good case "
"(lower is better).", getImage()->getCCUtil()->params()),
"(lower is better).", getCCUtil()->params()),
double_MEMBER(segment_penalty_dict_case_bad, 1.3125,
"Default score multiplier for word matches, which may have "
"case issues (lower is better).",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(segment_penalty_ngram_best_choice, 1.24,
"Multipler to for the best choice from the ngram model.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(segment_penalty_dict_nonword, 1.25,
"Score multiplier for glyph fragment segmentations which "
"do not match a dictionary word (lower is better).",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(segment_penalty_garbage, 1.50,
"Score multiplier for poorly cased strings that are not in"
" the dictionary and generally look like garbage (lower is"
" better).", getImage()->getCCUtil()->params()),
" better).", getCCUtil()->params()),
STRING_MEMBER(output_ambig_words_file, "",
"Output file for ambiguities found in the dictionary",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(dawg_debug_level, 0, "Set to 1 for general debug info"
", to 2 for more details, to 3 to see all the debug messages",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(max_viterbi_list_size, 10, "Maximum size of viterbi list.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_MEMBER(use_only_first_uft8_step, false,
"Use only the first UTF8 step of the given string"
" when computing log probabilities.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(stopper_nondict_certainty_base, -2.50,
"Certainty threshold for non-dict words",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(stopper_phase2_certainty_rejection_offset, 1.0,
"Reject certainty offset",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(stopper_smallword_size, 2,
"Size of dict word to be treated as non-dict word",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(stopper_certainty_per_char, -0.50, "Certainty to add"
" for each dict char above small word size.",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(stopper_allowable_character_badness, 3.0,
"Max certaintly variation allowed in a word (in sigma)",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(stopper_debug_level, 0, "Stopper debug level",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_MEMBER(stopper_no_acceptable_choices, false,
"Make AcceptableChoice() always return false. Useful"
" when there is a need to explore all segmentations",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_MEMBER(save_raw_choices, false,
"Deprecated- backward compatablity only",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
"Max words to keep in list",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
STRING_MEMBER(word_to_debug, "", "Word for which stopper debug"
" information should be printed to stdout",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
STRING_MEMBER(word_to_debug_lengths, "",
"Lengths of unichars in word_to_debug",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
INT_MEMBER(fragments_debug, 0, "Debug character fragments",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_MEMBER(segment_nonalphabetic_script, false,
"Don't use any alphabetic-specific tricks."
"Set to true in the traineddata config file for"
" scripts that are cursive or inherently fixed-pitch",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
BOOL_MEMBER(save_doc_words, 0, "Save Document Words",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(doc_dict_pending_threshold, 0.0,
"Worst certainty for using pending dictionary",
getImage()->getCCUtil()->params()),
getCCUtil()->params()),
double_MEMBER(doc_dict_certainty_threshold, -2.25,
"Worst certainty for words that can be inserted into the"
"document dictionary", getImage()->getCCUtil()->params()),
"document dictionary", getCCUtil()->params()),
INT_MEMBER(max_permuter_attempts, 10000, "Maximum number of different"
" character choices to consider during permutation."
" This limit is especially useful when user patterns"
" are specified, since overly generic patterns can result in"
" dawg search exploring an overly large number of options.",
getImage()->getCCUtil()->params()) {
getCCUtil()->params()) {
dang_ambigs_table_ = NULL;
replace_ambigs_table_ = NULL;
reject_offset_ = 0.0;
@ -186,7 +186,7 @@ DawgCache *Dict::GlobalDawgCache() {
void Dict::Load(DawgCache *dawg_cache) {
STRING name;
STRING &lang = getImage()->getCCUtil()->lang;
STRING &lang = getCCUtil()->lang;
if (dawgs_.length() != 0) this->End();
@ -203,7 +203,7 @@ void Dict::Load(DawgCache *dawg_cache) {
dawg_cache_is_ours_ = true;
}
TessdataManager &tessdata_manager = getImage()->getCCUtil()->tessdata_manager;
TessdataManager &tessdata_manager = getCCUtil()->tessdata_manager;
const char *data_file_name = tessdata_manager.GetDataFileName().string();
// Load dawgs_.
@ -241,7 +241,7 @@ void Dict::Load(DawgCache *dawg_cache) {
Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM,
kMaxUserDawgEdges, getUnicharset().size(),
dawg_debug_level);
name = getImage()->getCCUtil()->language_data_path_prefix;
name = getCCUtil()->language_data_path_prefix;
name += user_words_suffix;
if (!trie_ptr->read_and_add_word_list(name.string(), getUnicharset(),
Trie::RRP_REVERSE_IF_HAS_RTL)) {
@ -257,7 +257,7 @@ void Dict::Load(DawgCache *dawg_cache) {
kMaxUserDawgEdges, getUnicharset().size(),
dawg_debug_level);
trie_ptr->initialize_patterns(&(getUnicharset()));
name = getImage()->getCCUtil()->language_data_path_prefix;
name = getCCUtil()->language_data_path_prefix;
name += user_patterns_suffix;
if (!trie_ptr->read_pattern_list(name.string(), getUnicharset())) {
tprintf("Error: failed to load %s\n", name.string());
@ -599,7 +599,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
}
if (save_doc_words) {
strcpy(filename, getImage()->getCCUtil()->imagefile.string());
strcpy(filename, getCCUtil()->imagefile.string());
strcat(filename, ".doc");
doc_word_file = open_file (filename, "a");
fprintf(doc_word_file, "%s\n",

View File

@ -23,7 +23,6 @@
#include "dawg.h"
#include "dawg_cache.h"
#include "host.h"
#include "image.h"
#include "oldlist.h"
#include "ratngs.h"
#include "stopper.h"
@ -89,22 +88,22 @@ struct DawgArgs {
class Dict {
public:
Dict(Image* image_ptr);
Dict(CCUtil* image_ptr);
~Dict();
const Image* getImage() const {
return image_ptr_;
const CCUtil* getCCUtil() const {
return ccutil_;
}
Image* getImage() {
return image_ptr_;
CCUtil* getCCUtil() {
return ccutil_;
}
const UNICHARSET& getUnicharset() const {
return getImage()->getCCUtil()->unicharset;
return getCCUtil()->unicharset;
}
UNICHARSET& getUnicharset() {
return getImage()->getCCUtil()->unicharset;
return getCCUtil()->unicharset;
}
const UnicharAmbigs &getUnicharAmbigs() const {
return getImage()->getCCUtil()->unichar_ambigs;
return getCCUtil()->unichar_ambigs;
}
// Returns true if unichar_id is a word compounding character like - or /.
@ -369,7 +368,7 @@ class Dict {
const char* character,
int character_bytes) {
return (this->*probability_in_context_)(
getImage()->getCCUtil()->lang.string(),
getCCUtil()->lang.string(),
context, context_bytes,
character, character_bytes);
}
@ -397,7 +396,7 @@ class Dict {
float CallParamsModelClassify(void *path) {
ASSERT_HOST(params_model_classify_ != NULL); // ASSERT_HOST -> assert
return (this->*params_model_classify_)(
getImage()->getCCUtil()->lang.string(), path);
getCCUtil()->lang.string(), path);
}
inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; }
@ -490,7 +489,7 @@ class Dict {
private:
/** Private member variables. */
Image* image_ptr_;
CCUtil* ccutil_;
/**
* Table that stores ambiguities computed during training
* (loaded when NoDangerousAmbigs() is called for the first time).

View File

@ -37,7 +37,6 @@
#include <ctype.h>
#include "dict.h"
#include "image.h"
/*----------------------------------------------------------------------
F u n c t i o n s

View File

@ -29,7 +29,6 @@
#include "dict.h"
#include "efio.h"
#include "helpers.h"
#include "image.h"
#include "matchdefs.h"
#include "pageres.h"
#include "params.h"