Refactorerd control functions to enable parallel blob classification

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@904 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2013-11-08 20:30:56 +00:00
parent 77c1b41e4e
commit 7ec4fd7a56
25 changed files with 580 additions and 673 deletions

View File

@ -46,7 +46,7 @@ libtesseract_main_la_SOURCES = \
docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \
imgscale.cpp ltrresultiterator.cpp \
osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \
pagewalk.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
reject.cpp resultiterator.cpp scaleimg.cpp superscript.cpp \
tesseract_cube_combiner.cpp \
tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \

View File

@ -241,10 +241,12 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
BLOCK* block, ROW* row,
WERD_RES* word_res) {
if (!word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block)) {
if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block)) {
word_res->CloneChoppedToRebuild();
return;
}

View File

@ -97,8 +97,9 @@ BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) {
inT16 char_qual;
inT16 good_char_qual;
classify_word_and_language(&Tesseract::classify_word_pass2,
block, row, word_res);
WordData word_data(block, row, word_res);
SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
if (tessedit_debug_quality_metrics) {
word_char_quality(word_res, row, &char_qual, &good_char_qual);
tprintf
@ -153,6 +154,111 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box,
return true;
}
// If tesseract is to be run, sets the words up ready for it.
void Tesseract::SetupAllWordsPassN(int pass_n,
const TBOX* target_word_box,
const char* word_config,
PAGE_RES* page_res,
GenericVector<WordData>* words) {
// Prepare all the words.
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
if (pass_n == 1)
page_res_it.word()->SetupFake(unicharset);
if (target_word_box == NULL ||
ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 1)) {
words->push_back(WordData(page_res_it));
}
}
// Setup all the words for recognition with polygonal approximation.
for (int w = 0; w < words->size(); ++w) {
SetupWordPassN(pass_n, &(*words)[w]);
if (w > 0) (*words)[w].prev_word = &(*words)[w - 1];
}
}
// Sets up the single word ready for whichever engine is to be run.
void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
if (pass_n == 1 || !word->word->done || tessedit_training_tess) {
if (pass_n == 2) {
// TODO(rays) Should we do this on pass1 too?
word->word->caps_height = 0.0;
if (word->word->x_height == 0.0f)
word->word->x_height = word->row->x_height();
}
// Cube doesn't get setup for pass2.
if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
word->word->SetupForRecognition(
unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode, textord_use_cjk_fp_model,
poly_allow_detailed_fx, word->row, word->block);
}
}
if (!sub_langs_.empty()) {
if (word->lang_words.size() != sub_langs_.size()) {
// Setup the words for all the sub-languages now.
WERD_RES empty;
word->lang_words.init_to_size(sub_langs_.size(), empty);
}
for (int s = 0; s < sub_langs_.size(); ++s) {
Tesseract* lang_t = sub_langs_[s];
if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY &&
(!word->lang_words[s].done || lang_t->tessedit_training_tess))) {
word->lang_words[s].InitForRetryRecognition(*word->word);
word->lang_words[s].SetupForRecognition(
lang_t->unicharset, lang_t, BestPix(),
lang_t->tessedit_ocr_engine_mode, NULL,
lang_t->classify_bln_numeric_mode,
lang_t->textord_use_cjk_fp_model,
lang_t->poly_allow_detailed_fx, word->row, word->block);
}
}
}
}
// Runs word recognition on all the words.
bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
GenericVector<WordData>* words) {
// TODO(rays) Before this loop can be parallelized (it would yield a massive
// speed-up) all remaining member globals need to be converted to local/heap
// (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be
// added. The results will be significantly different with adaption on, and
// deterioration will need investigation.
for (int w = 0; w < words->size(); ++w) {
WordData* word = &(*words)[w];
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
if (pass_n == 1)
monitor->progress = 30 + 50 * w / words->size();
else
monitor->progress = 80 + 10 * w / words->size();
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
words->size()))) {
// Timeout. Fake out the rest of the words.
for (; w < words->size(); ++w) {
(*words)[w].word->SetupFake(unicharset);
}
return false;
}
}
if (word->word->tess_failed) continue;
WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1
: &Tesseract::classify_word_pass2;
classify_word_and_language(recognizer, word);
if (tessedit_dump_choices) {
word_dumper(NULL, word->row, word->word);
tprintf("Pass%d: %s [%s]\n", pass_n,
word->word->best_choice->unichar_string().string(),
word->word->best_choice->debug_string().string());
}
}
return true;
}
/**
* recog_all_words()
*
@ -179,27 +285,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
const TBOX* target_word_box,
const char* word_config,
int dopasses) {
PAGE_RES_IT page_res_it;
inT32 word_index; // current word
PAGE_RES_IT page_res_it(page_res);
if (tessedit_minimal_rej_pass1) {
tessedit_test_adaption.set_value (TRUE);
tessedit_minimal_rejection.set_value (TRUE);
}
// Before the main recognition loop below, walk through the whole page and set
// up fake words. That way, if we run out of time a user will still get the
// expected best_choice and box_words out the end; they'll just be empty.
page_res_it.page_res = page_res;
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
page_res_it.word()->SetupFake(unicharset);
}
if (dopasses==0 || dopasses==1) {
page_res_it.page_res=page_res;
page_res_it.restart_page();
// ****************** Pass 1 *******************
// Clear adaptive classifier at the beginning of the page if it is full.
@ -214,20 +308,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (sub_langs_[i]->AdaptiveClassifierIsFull())
sub_langs_[i]->ResetAdaptiveClassifierInternal();
}
stats_.word_count = 0;
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
while (page_res_it.word() != NULL) {
stats_.word_count++;
page_res_it.forward();
}
page_res_it.restart_page();
} else {
stats_.word_count = 1;
// Set up all words ready for recognition, so that if parallelism is on
// all the input and output classes are ready to run the classifier.
GenericVector<WordData> words;
SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words);
if (tessedit_parallelize) {
PrerecAllWordsPar(words);
}
word_index = 0;
stats_.word_count = words.size();
stats_.dict_words = 0;
stats_.doc_blob_quality = 0;
@ -237,56 +326,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
stats_.doc_good_char_quality = 0;
most_recently_used_ = this;
// Run pass 1 word recognition.
if (!RecogAllWordsPassN(1, monitor, &words)) return false;
// Pass 1 post-processing.
while (page_res_it.word() != NULL) {
set_global_loc_code(LOC_PASS1);
word_index++;
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
monitor->progress = 30 + 50 * word_index / stats_.word_count;
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
stats_.dict_words)))
return false;
}
if (target_word_box &&
!ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 1)) {
page_res_it.forward();
continue;
}
classify_word_and_language(&Tesseract::classify_word_pass1,
page_res_it.block()->block,
page_res_it.row()->row,
page_res_it.word());
if (page_res_it.word()->word->flag(W_REP_CHAR)) {
fix_rep_char(&page_res_it);
page_res_it.forward();
continue;
}
if (tessedit_dump_choices) {
word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
tprintf("Pass1: %s [%s]\n",
page_res_it.word()->best_choice->unichar_string().string(),
page_res_it.word()->best_choice->debug_string().string());
}
// tessedit_test_adaption enables testing of the accuracy of the
// input to the adaptive classifier.
if (tessedit_test_adaption && !tessedit_minimal_rejection) {
if (!word_adaptable (page_res_it.word(),
tessedit_test_adaption_mode)) {
page_res_it.word()->reject_map.rej_word_tess_failure();
// FAKE PERM REJ
} else {
// Override rejection mechanisms for this word.
UNICHAR_ID space = unicharset.unichar_to_id(" ");
for (int i = 0; i < page_res_it.word()->best_choice->length(); i++) {
if ((page_res_it.word()->best_choice->unichar_id(i) != space) &&
page_res_it.word()->reject_map[i].rejected())
page_res_it.word()->reject_map[i].setrej_minimal_rej_accept();
}
}
}
// Count dict words.
if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)
@ -307,49 +355,26 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (dopasses == 1) return true;
// ****************** Pass 2 *******************
page_res_it.restart_page();
word_index = 0;
most_recently_used_ = this;
while (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption &&
page_res_it.word() != NULL) {
set_global_loc_code(LOC_PASS2);
word_index++;
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
monitor->progress = 80 + 10 * word_index / stats_.word_count;
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
stats_.dict_words)))
return false;
if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) {
page_res_it.restart_page();
GenericVector<WordData> words;
SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);
if (tessedit_parallelize) {
PrerecAllWordsPar(words);
}
// changed by jetsoft
// specific to its needs to extract one word when need
if (target_word_box &&
!ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 2)) {
most_recently_used_ = this;
// Run pass 2 word recognition.
if (!RecogAllWordsPassN(2, monitor, &words)) return false;
// Pass 2 post-processing.
while (page_res_it.word() != NULL) {
WERD_RES* word = page_res_it.word();
if (word->word->flag(W_REP_CHAR) && !word->done) {
fix_rep_char(&page_res_it);
page_res_it.forward();
continue;
}
page_res_it.forward();
continue;
}
// end jetsoft
classify_word_and_language(&Tesseract::classify_word_pass2,
page_res_it.block()->block,
page_res_it.row()->row,
page_res_it.word());
if (page_res_it.word()->word->flag(W_REP_CHAR) &&
!page_res_it.word()->done) {
fix_rep_char(&page_res_it);
page_res_it.forward();
continue;
}
if (tessedit_dump_choices) {
word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
tprintf("Pass2: %s [%s]\n",
page_res_it.word()->best_choice->unichar_string().string(),
page_res_it.word()->best_choice->debug_string().string());
}
page_res_it.forward();
}
// The next passes can only be run if tesseract has been used, as cube
@ -384,6 +409,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
// Check the correctness of the final results.
blamer_pass(page_res);
}
script_pos_pass(page_res);
// Write results pass.
set_global_loc_code(LOC_WRITE_RESULTS);
@ -672,6 +698,46 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) {
}
}
// Sets script positions and detects smallcaps on all output words.
void Tesseract::script_pos_pass(PAGE_RES* page_res) {
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
WERD_RES* word = page_res_it.word();
if (word->word->flag(W_REP_CHAR)) {
page_res_it.forward();
continue;
}
float x_height = page_res_it.block()->block->x_height();
float word_x_height = word->x_height;
if (word_x_height < word->best_choice->min_x_height() ||
word_x_height > word->best_choice->max_x_height()) {
word_x_height = (word->best_choice->min_x_height() +
word->best_choice->max_x_height()) / 2.0f;
}
// Test for small caps. Word capheight must be close to block xheight,
// and word must contain no lower case letters, and at least one upper case.
double small_cap_xheight = x_height * kXHeightCapRatio;
double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
if (word->uch_set->script_has_xheight() &&
small_cap_xheight - small_cap_delta <= word_x_height &&
word_x_height <= small_cap_xheight + small_cap_delta) {
// Scan for upper/lower.
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))
++num_upper;
else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))
++num_lower;
}
if (num_upper > 0 && num_lower == 0)
word->small_caps = true;
}
word->SetScriptPositions();
}
}
// Helper returns true if the new_word is better than the word, using a
// simple test of better certainty AND rating (to reduce false positives
// from cube) or a dictionary vs non-dictionary word.
@ -701,38 +767,33 @@ static bool NewWordBetter(const WERD_RES& word, const WERD_RES& new_word,
// Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously.
bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
bool Tesseract::RetryWithLanguage(const WERD_RES& best_word,
WordData* word_data, WERD_RES* word,
WordRecognizer recognizer) {
if (classify_debug_level || cube_debug_level) {
tprintf("Retrying word using lang %s, oem %d\n",
lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
}
// Setup a trial WERD_RES in which to classify.
WERD_RES lang_word;
lang_word.InitForRetryRecognition(*word);
// Run the recognizer on the word.
// Initial version is a bit of a hack based on better certainty and rating
// (to reduce false positives from cube) or a dictionary vs non-dictionary
// word.
(this->*recognizer)(block, row, &lang_word);
bool new_is_better = NewWordBetter(*word, lang_word,
(this->*recognizer)(word_data, word);
bool new_is_better = NewWordBetter(best_word, *word,
classify_max_rating_ratio,
classify_max_certainty_margin);
if (classify_debug_level || cube_debug_level) {
if (lang_word.best_choice == NULL) {
tprintf("New result %s better:%s\n",
if (word->best_choice == NULL) {
tprintf("NULL result %s better!\n",
new_is_better ? "IS" : "NOT");
} else {
tprintf("New result %s better:%s, r=%g, c=%g\n",
new_is_better ? "IS" : "NOT",
lang_word.best_choice->unichar_string().string(),
lang_word.best_choice->rating(),
lang_word.best_choice->certainty());
word->best_choice->unichar_string().string(),
word->best_choice->rating(),
word->best_choice->certainty());
}
}
if (new_is_better) {
word->ConsumeWordResults(&lang_word);
}
return new_is_better;
}
@ -743,9 +804,9 @@ bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
// If recognition was not successful, tries all available languages until
// it gets a successful result or runs out of languages. Keeps the best result.
void Tesseract::classify_word_and_language(WordRecognizer recognizer,
BLOCK* block,
ROW *row,
WERD_RES *word) {
WordData* word_data) {
// Points to the best result. May be word or in lang_words.
WERD_RES* word = word_data->word;
clock_t start_t = clock();
if (classify_debug_level || cube_debug_level) {
tprintf("Processing word with lang %s at:",
@ -755,15 +816,23 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
const char* result_type = "Initial";
bool initially_done = !word->tess_failed && word->done;
if (initially_done) {
// If done on pass1, we reuse the tesseract that did it, and don't try
// any more. The only need to call the classifier at all is for the
// cube combiner and xheight fixing (which may be bogus on a done word.)
// If done on pass1, leave it as-is.
most_recently_used_ = word->tesseract;
result_type = "Already done";
} else {
if (most_recently_used_ != this) {
// Point to the word for most_recently_used_.
for (int s = 0; s < sub_langs_.size(); ++s) {
if (most_recently_used_ == sub_langs_[s]) {
word = &word_data->lang_words[s];
break;
}
}
}
(most_recently_used_->*recognizer)(word_data, word);
if (!word->tess_failed && word->tess_accepted)
result_type = "Accepted";
}
(most_recently_used_->*recognizer)(block, row, word);
if (!word->tess_failed && word->tess_accepted)
result_type = "Accepted";
if (classify_debug_level || cube_debug_level) {
tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d"
" xht=[%g,%g]\n",
@ -782,11 +851,31 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
if (classify_debug_level) {
tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string());
}
if (RetryWithLanguage(word, block, row, recognizer)) {
most_recently_used_ = this;
if (!word->tess_failed && word->tess_accepted)
return; // No need to look at the others.
if (word_data->word->tesseract == this) {
// This is pass1, and we are trying the main language.
if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) {
most_recently_used_ = this;
word = word_data->word;
}
} else {
// This is pass2, and we are trying the main language again, but it
// has no word allocated to it, so we must re-initialize it.
WERD_RES main_word(*word_data->word);
main_word.InitForRetryRecognition(*word_data->word);
main_word.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
word_data->row, word_data->block);
if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) {
most_recently_used_ = this;
word_data->word->ConsumeWordResults(&main_word);
word = word_data->word;
}
}
if (!word->tess_failed && word->tess_accepted)
return; // No need to look at the others.
}
for (int i = 0; i < sub_langs_.size(); ++i) {
@ -795,14 +884,21 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
tprintf("Retrying with sub-Tesseract[%d] lang: %s\n",
i, sub_langs_[i]->lang.string());
}
if (sub_langs_[i]->RetryWithLanguage(word, block, row, recognizer)) {
if (sub_langs_[i]->RetryWithLanguage(*word, word_data,
&word_data->lang_words[i],
recognizer)) {
most_recently_used_ = sub_langs_[i];
word = &word_data->lang_words[i];
if (!word->tess_failed && word->tess_accepted)
return; // No need to look at the others.
break; // No need to look at the others.
}
}
}
}
if (word != word_data->word) {
// Move the result for the best language to the main word.
word_data->word->ConsumeWordResults(word);
}
clock_t ocr_t = clock();
if (tessedit_timing_debug) {
tprintf("%s (ocr took %.2f sec)\n",
@ -817,7 +913,11 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
* Baseline normalize the word and pass it to Tess.
*/
void Tesseract::classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) {
ROW* row = word_data->row;
BLOCK* block = word_data->block;
prev_word_best_choice_ = word_data->prev_word != NULL
? word_data->prev_word->word->best_choice : NULL;
// If we only intend to run cube - run it and return.
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
cube_word_pass1(block, row, word);
@ -880,6 +980,10 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
}
new_x_ht_word.x_height = new_x_ht;
new_x_ht_word.caps_height = 0.0;
new_x_ht_word.SetupForRecognition(
unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode, textord_use_cjk_fp_model,
poly_allow_detailed_fx, row, block);
match_word_pass_n(2, &new_x_ht_word, row, block);
if (!new_x_ht_word.tess_failed) {
int new_misfits = CountMisfitTops(&new_x_ht_word);
@ -916,11 +1020,15 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
* Control what to do with the word in pass 2
*/
void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) {
// Return if we do not want to run Tesseract.
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED)
return;
ROW* row = word_data->row;
BLOCK* block = word_data->block;
prev_word_best_choice_ = word_data->prev_word != NULL
? word_data->prev_word->word->best_choice : NULL;
set_global_subloc_code(SUBLOC_NORM);
check_debug_pt(word, 30);
@ -940,26 +1048,6 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
// Use the tops and bottoms since they are available.
TrainedXheightFix(word, block, row);
}
// Test for small caps. Word capheight must be close to block xheight,
// and word must contain no lower case letters, and at least one upper case.
double small_cap_xheight = block->x_height() * kXHeightCapRatio;
double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0;
if (unicharset.script_has_xheight() &&
small_cap_xheight - small_cap_delta <= word->x_height &&
word->x_height <= small_cap_xheight + small_cap_delta) {
// Scan for upper/lower.
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
if (unicharset.get_isupper(word->best_choice->unichar_id(i)))
++num_upper;
else if (unicharset.get_islower(word->best_choice->unichar_id(i)))
++num_lower;
}
if (num_upper > 0 && num_lower == 0)
word->small_caps = true;
}
word->SetScriptPositions();
set_global_subloc_code(SUBLOC_NORM);
}
@ -988,12 +1076,8 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
ROW *row, BLOCK* block) {
if (word->SetupForTessRecognition(unicharset, this, BestPix(),
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block))
tess_segment_pass_n(pass_n, word);
if (word->tess_failed) return;
tess_segment_pass_n(pass_n, word);
if (!word->tess_failed) {
if (!word->word->flag (W_REP_CHAR)) {
@ -1136,12 +1220,12 @@ void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice,
WERD_RES* rep_word =
page_res_it->InsertSimpleCloneWord(*word_res, blob_word);
// Setup the single char WERD_RES
if (rep_word->SetupForTessRecognition(*word_res->uch_set, this, BestPix(),
false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
page_res_it->row()->row,
page_res_it->block()->block)) {
if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(),
tessedit_ocr_engine_mode, NULL, false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
page_res_it->row()->row,
page_res_it->block()->block)) {
rep_word->CloneChoppedToRebuild();
BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice);
rep_word->FakeClassifyWord(1, &blob_choice);

View File

@ -197,6 +197,9 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
// Iterate through the word results and call cube on each word.
for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward()) {
BLOCK* block = page_res_it.block()->block;
if (block->poly_block() != NULL && !block->poly_block()->IsText())
continue; // Don't deal with non-text blocks.
WERD_RES* word = page_res_it.word();
// Skip cube entirely if tesseract's certainty is greater than threshold.
int combiner_run_thresh = convert_prob_to_tess_certainty(
@ -210,6 +213,11 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
// Setup a trial WERD_RES in which to classify with cube.
WERD_RES cube_word;
cube_word.InitForRetryRecognition(*word);
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
OEM_CUBE_ONLY,
NULL, false, false, false,
page_res_it.row()->row,
page_res_it.block()->block);
CubeObject *cube_obj = lang_tess->cube_recognize_word(
page_res_it.block()->block, &cube_word);
if (cube_obj != NULL)
@ -317,10 +325,6 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
**********************************************************************/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
WERD_RES *word) {
if (!word->SetupForCubeRecognition(unicharset, this, block)) {
return false; // Graphics block.
}
// Run cube
WordAltList *cube_alt_list = cube_obj->RecognizeWord();
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {

View File

@ -204,8 +204,9 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word = word_it.data();
if ((!word->part_of_combo) && (word->box_word == NULL)) {
classify_word_and_language(&Tesseract::classify_word_pass2,
block, row, word);
WordData word_data(block, row, word);
SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
}
prev_word_best_choice_ = word->best_choice;
}

View File

@ -731,10 +731,12 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
TWERD *bln_word = word_res->chopped_word;
if (bln_word == NULL) {
word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
bln_word = word_res->chopped_word;
}
bln_word_window_handle()->Clear();
@ -963,10 +965,12 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res,
if (word != NULL) {
WERD_RES word_res(word);
word_res.x_height = row->x_height();
word_res.SetupForTessRecognition(unicharset, this, BestPix(), false,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
word_res.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block);
TWERD* bln_word = word_res.chopped_word;
TBLOB* bln_blob = bln_word->blobs[0];
INT_FX_RESULT_STRUCT fx_info;

View File

@ -203,7 +203,9 @@ void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res,
FILE *output_file) {
// Classify word.
fflush(stdout);
classify_word_pass1(block_res->block, row_res->row, werd_res);
WordData word_data(block_res->block, row_res->row, werd_res);
SetupWordPassN(1, &word_data);
classify_word_pass1(&word_data, werd_res);
WERD_CHOICE *best_choice = werd_res->best_choice;
ASSERT_HOST(best_choice != NULL);

View File

@ -402,6 +402,8 @@ Tesseract::Tesseract()
"for layout analysis.", this->params()),
BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector",
this->params()),
INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible",
this->params()),
// The following parameters were deprecated and removed from their original
// locations. The parameters are temporarily kept here to give Tesseract
@ -528,7 +530,6 @@ void Tesseract::Clear() {
reskew_ = FCOORD(1.0f, 0.0f);
splitter_.Clear();
scaled_factor_ = -1;
ResetFeaturesHaveBeenExtracted();
for (int i = 0; i < sub_langs_.size(); ++i)
sub_langs_[i]->Clear();
}

View File

@ -100,10 +100,6 @@ class EquationDetect;
class Tesseract;
class TesseractCubeCombiner;
typedef void (Tesseract::*WordRecognizer)(BLOCK* block,
ROW *row,
WERD_RES *word);
// A collection of various variables for statistics and debugging.
struct TesseractStats {
TesseractStats()
@ -136,6 +132,24 @@ struct TesseractStats {
bool write_results_empty_block;
};
// Struct to hold all the pointers to relevant data for processing a word.
struct WordData {
WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {}
explicit WordData(const PAGE_RES_IT& page_res_it)
: word(page_res_it.word()), row(page_res_it.row()->row),
block(page_res_it.block()->block), prev_word(NULL) {}
WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)
: word(word_res), row(row_in), block(block_in), prev_word(NULL) {}
WERD_RES* word;
ROW* row;
BLOCK* block;
WordData* prev_word;
GenericVector<WERD_RES> lang_words;
};
typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
class Tesseract : public Wordrec {
public:
Tesseract();
@ -250,10 +264,23 @@ class Tesseract : public Wordrec {
bool single_column, bool osd, bool only_osd,
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
// par_control.cpp
void PrerecAllWordsPar(const GenericVector<WordData>& words);
//// control.h /////////////////////////////////////////////////////////
bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
const char* word_config, int pass);
// Sets up the words ready for whichever engine is to be run
void SetupAllWordsPassN(int pass_n,
const TBOX* target_word_box,
const char* word_config,
PAGE_RES* page_res,
GenericVector<WordData>* words);
// Sets up the single word ready for whichever engine is to be run.
void SetupWordPassN(int pass_n, WordData* word);
// Runs word recognition on all the words.
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
GenericVector<WordData>* words);
bool recog_all_words(PAGE_RES* page_res,
ETEXT_DESC* monitor,
const TBOX* target_word_box,
@ -265,13 +292,15 @@ class Tesseract : public Wordrec {
const char* word_config);
void bigram_correction_pass(PAGE_RES *page_res);
void blamer_pass(PAGE_RES* page_res);
// Sets script positions and detects smallcaps on all output words.
void script_pos_pass(PAGE_RES* page_res);
// Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously.
bool RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
WordRecognizer recognizer);
bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
WERD_RES* word, WordRecognizer recognizer);
void classify_word_and_language(WordRecognizer recognizer,
BLOCK* block, ROW *row, WERD_RES *word);
void classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);
WordData* word_data);
void classify_word_pass1(WordData* word_data, WERD_RES* word);
void recog_pseudo_word(PAGE_RES* page_res, // blocks to check
TBOX &selection_box);
@ -282,7 +311,7 @@ class Tesseract : public Wordrec {
const char *s,
const char *lengths);
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
void classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word);
void classify_word_pass2(WordData* word_data, WERD_RES* word);
void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
WERD_RES* word, WERD_RES* new_word);
bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
@ -936,6 +965,7 @@ class Tesseract : public Wordrec {
"Only initialize with the config file. Useful if the instance is "
"not going to be used for OCR but say only for layout analysis.");
BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
// The following parameters were deprecated and removed from their original
// locations. The parameters are temporarily kept here to give Tesseract

View File

@ -741,19 +741,36 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) {
// DENORMs in the blobs.
void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
bool inverse, float x_height, bool numeric_mode,
tesseract::OcrEngineMode hint,
const TBOX* norm_box,
DENORM* word_denorm) {
TBOX word_box = bounding_box();
if (norm_box != NULL) word_box = *norm_box;
float word_middle = (word_box.left() + word_box.right()) / 2.0f;
float input_y_offset = 0.0f;
float final_y_offset = static_cast<float>(kBlnBaselineOffset);
float scale = kBlnXHeight / x_height;
if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) {
word_middle = word_box.left();
input_y_offset = word_box.bottom();
final_y_offset = 0.0f;
if (hint == tesseract::OEM_CUBE_ONLY)
scale = 1.0f;
} else {
input_y_offset = row->base_line(word_middle);
}
for (int b = 0; b < blobs.size(); ++b) {
TBLOB* blob = blobs[b];
TBOX blob_box = blob->bounding_box();
float mid_x = (blob_box.left() + blob_box.right()) / 2.0f;
float baseline = row->base_line(mid_x);
float scale = kBlnXHeight / x_height;
float baseline = input_y_offset;
float blob_scale = scale;
if (numeric_mode) {
baseline = blob_box.bottom();
scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
scale, scale * 1.5f);
blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
scale, scale * 1.5f);
} else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) {
baseline = row->base_line(mid_x);
}
// The image will be 8-bit grey if the input was grey or color. Note that in
// a grey image 0 is black and 255 is white. If the input was binary, then
@ -761,16 +778,13 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
// To tell the difference pixGetDepth() will return 8 or 1.
// The inverse flag will be true iff the word has been determined to be
// white on black, and is independent of whether the pix is 8 bit or 1 bit.
blob->Normalize(block, NULL, NULL, word_middle, baseline, scale, scale,
0.0f, static_cast<float>(kBlnBaselineOffset),
inverse, pix);
blob->Normalize(block, NULL, NULL, word_middle, baseline, blob_scale,
blob_scale, 0.0f, final_y_offset, inverse, pix);
}
if (word_denorm != NULL) {
float scale = kBlnXHeight / x_height;
word_denorm->SetupNormalization(block, NULL, NULL, word_middle,
row->base_line(word_middle),
scale, scale, 0.0f,
static_cast<float>(kBlnBaselineOffset));
input_y_offset, scale, scale,
0.0f, final_y_offset);
word_denorm->set_inverse(inverse);
word_denorm->set_pix(pix);
}

View File

@ -31,6 +31,7 @@
----------------------------------------------------------------------*/
#include "clst.h"
#include "normalis.h"
#include "publictypes.h"
#include "rect.h"
#include "vecfuncs.h"
@ -316,7 +317,10 @@ struct TWERD {
// Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs.
void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
float x_height, bool numeric_mode, DENORM* word_denorm);
float x_height, bool numeric_mode,
tesseract::OcrEngineMode hint,
const TBOX* norm_box,
DENORM* word_denorm);
// Copies the data and the blobs, but leaves next untouched.
void CopyFrom(const TWERD& src);
// Deletes owned data.

View File

@ -32,6 +32,8 @@ static const double kStopperAmbiguityThresholdGain = 8.0;
// Constant offset for computing thresholds that determine the ambiguity of a
// word.
static const double kStopperAmbiguityThresholdOffset = 1.5;
// Max number of broken pieces to associate.
const int kWordrecMaxNumJoinChunks = 4;
// Computes and returns a threshold of certainty difference used to determine
// which words to keep, based on the adjustment factors of the two words.
@ -245,16 +247,25 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES& source) {
// If allow_detailed_fx is true, the feature extractor will receive fine
// precision outline information, allowing smoother features and better
// features on low resolution images.
// The norm_mode_hint sets the default mode for normalization in absence
// of any of the above flags.
// norm_box is used to override the word bounding box to determine the
// normalization scale and offset.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tess, Pix* pix,
int norm_mode,
const TBOX* norm_box,
bool numeric_mode,
bool use_body_size,
bool allow_detailed_fx,
ROW *row, BLOCK* block) {
ROW *row, const BLOCK* block) {
tesseract::OcrEngineMode norm_mode_hint =
static_cast<tesseract::OcrEngineMode>(norm_mode);
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk.
SetupFake(unicharset_in);
@ -264,13 +275,17 @@ bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
ClearResults();
SetupWordScript(unicharset_in);
chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
float word_xheight = use_body_size && row->body_size() > 0.0f
float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
? row->body_size() : x_height;
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
word_xheight, numeric_mode, &denorm);
word_xheight, numeric_mode, norm_mode_hint,
norm_box, &denorm);
blob_row = row;
SetupBasicsFromChoppedWord(unicharset_in);
SetupBlamerBundle();
int num_blobs = chopped_word->NumBlobs();
ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
tess_failed = false;
return true;
}
@ -284,30 +299,6 @@ void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) {
ClearWordChoices();
}
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForCubeRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tess,
const BLOCK* block) {
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (pb != NULL && !pb->IsText()) {
// Ignore words in graphic regions.
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
TBOX word_box = word->bounding_box();
denorm.SetupNormalization(block, NULL, NULL,
word_box.left(), word_box.bottom(),
1.0f, 1.0f, 0.0f, 0.0f);
SetupBlamerBundle();
return true;
}
// Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {

View File

@ -339,7 +339,8 @@ class WERD_RES : public ELIST_LINK {
// characters purely based on their shape on the page, and by default produce
// the corresponding unicode for a left-to-right context.
const char* const BestUTF8(int blob_index, bool in_rtl_context) const {
if (blob_index < 0 || blob_index >= best_choice->length())
if (blob_index < 0 || best_choice == NULL ||
blob_index >= best_choice->length())
return NULL;
UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
@ -435,25 +436,22 @@ class WERD_RES : public ELIST_LINK {
// If allow_detailed_fx is true, the feature extractor will receive fine
// precision outline information, allowing smoother features and better
// features on low resolution images.
// The norm_mode sets the default mode for normalization in absence
// of any of the above flags. It should really be a tesseract::OcrEngineMode
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
// Returns false if the word is empty and sets up fake results.
bool SetupForTessRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract, Pix* pix,
bool numeric_mode, bool use_body_size,
bool allow_detailed_fx,
ROW *row, BLOCK* block);
bool SetupForRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract, Pix* pix,
int norm_mode,
const TBOX* norm_box, bool numeric_mode,
bool use_body_size, bool allow_detailed_fx,
ROW *row, const BLOCK* block);
// Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
// accumulators from a made chopped word. We presume the fields are already
// empty.
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in);
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm.
// Returns false if the word is empty and sets up fake results.
bool SetupForCubeRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract,
const BLOCK* block);
// Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void SetupFake(const UNICHARSET& uch);

View File

@ -530,8 +530,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) {
// Initialize to normal.
for (int i = 0; i < length_; ++i)
script_pos_[i] = tesseract::SP_NORMAL;
if (word->blobs.empty())
if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
return;
}
int position_counts[4];
for (int i = 0; i < 4; i++) {

View File

@ -122,8 +122,6 @@ struct PROTO_KEY {
#define MarginalMatch(Rating) \
((Rating) > matcher_great_threshold)
#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
/*-----------------------------------------------------------------------------
Private Function Prototypes
-----------------------------------------------------------------------------*/
@ -179,8 +177,7 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
Results->Initialize();
if (AdaptedTemplates == NULL)
AdaptedTemplates = NewAdaptedTemplates (true);
ASSERT_HOST(AdaptedTemplates != NULL);
DoAdaptiveMatch(Blob, Results);
if (CPResults != NULL)
@ -207,7 +204,6 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
DebugAdaptiveClassifier(Blob, Results);
#endif
NumClassesOutput += Choices->length();
delete Results;
} /* AdaptiveClassifier */
@ -249,7 +245,6 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
if (!EnableLearning || word->best_choice == NULL)
return; // Can't or won't adapt.
NumWordsAdaptedTo++;
if (classify_learning_debug_level >= 1)
tprintf("\n\nAdapting to word = %s\n",
word->best_choice->debug_string().string());
@ -480,15 +475,11 @@ void Classify::EndAdaptiveClassifier() {
FreeNormProtos();
if (AllProtosOn != NULL) {
FreeBitVector(AllProtosOn);
FreeBitVector(PrunedProtos);
FreeBitVector(AllConfigsOn);
FreeBitVector(AllProtosOff);
FreeBitVector(AllConfigsOff);
FreeBitVector(TempProtoMask);
AllProtosOn = NULL;
PrunedProtos = NULL;
AllConfigsOn = NULL;
AllProtosOff = NULL;
AllConfigsOff = NULL;
TempProtoMask = NULL;
}
@ -561,19 +552,15 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
static_classifier_ = new TessClassifier(false, this);
}
im_.Init(&classify_debug_level, classify_integer_matcher_multiplier);
im_.Init(&classify_debug_level);
InitIntegerFX();
AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
for (int i = 0; i < MAX_NUM_CLASSES; i++) {
@ -617,53 +604,11 @@ void Classify::ResetAdaptiveClassifierInternal() {
NumAdaptationsFailed);
}
free_adapted_templates(AdaptedTemplates);
AdaptedTemplates = NULL;
AdaptedTemplates = NewAdaptedTemplates(true);
NumAdaptationsFailed = 0;
}
/*---------------------------------------------------------------------------*/
/**
* Print to File the statistics which have
* been gathered for the adaptive matcher.
*
* @param File open text file to print adaptive statistics to
*
* Globals: none
*
* @note Exceptions: none
* @note History: Thu Apr 18 14:37:37 1991, DSJ, Created.
*/
void Classify::PrintAdaptiveStatistics(FILE *File) {
#ifndef SECURE_NAMES
fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
NumClassesOutput,
((AdaptiveMatcherCalls == 0) ? (0.0) :
((float) NumClassesOutput / AdaptiveMatcherCalls)));
fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
BaselineClassifierCalls,
((BaselineClassifierCalls == 0) ? (0.0) :
((float) NumBaselineClassesTried / BaselineClassifierCalls)));
fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
CharNormClassifierCalls,
((CharNormClassifierCalls == 0) ? (0.0) :
((float) NumCharNormClassesTried / CharNormClassifierCalls)));
fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
AmbigClassifierCalls,
((AmbigClassifierCalls == 0) ? (0.0) :
((float) NumAmbigClassesTried / AmbigClassifierCalls)));
fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
PrintAdaptedTemplates(File, AdaptedTemplates);
#endif
} /* PrintAdaptiveStatistics */
/*---------------------------------------------------------------------------*/
/**
@ -915,8 +860,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
FEATURE_SET FloatFeatures;
int NewTempConfigId;
ResetFeaturesHaveBeenExtracted();
NumCharsAdaptedTo++;
if (!LegalClassId (ClassId))
return;
@ -932,7 +875,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
if (NumFeatures <= 0)
return;
im_.SetBaseLineMatch();
// Only match configs with the matching font.
BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
@ -1004,17 +946,16 @@ void Classify::AdaptToChar(TBLOB *Blob,
void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
#ifndef GRAPHICS_DISABLED
int bloblength = 0;
INT_FEATURE_ARRAY features;
uinT8* norm_array = new uinT8[unicharset.size()];
int num_features = GetBaselineFeatures(blob, PreTrainedTemplates,
features,
norm_array, &bloblength);
delete [] norm_array;
INT_RESULT_STRUCT IntResult;
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info,
&bl_features);
if (sample == NULL) return;
INT_RESULT_STRUCT IntResult;
im_.Match(int_class, AllProtosOn, AllConfigsOn,
num_features, features,
bl_features.size(), &bl_features[0],
&IntResult, classify_adapt_feature_threshold,
NO_DEBUG, matcher_debug_separate_windows);
cprintf ("Best match to temp config %d = %4.1f%%.\n",
@ -1024,7 +965,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
ConfigMask = 1 << IntResult.Config;
ShowMatchDisplay();
im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
num_features, features,
bl_features.size(), &bl_features[0],
&IntResult, classify_adapt_feature_threshold,
6 | 0x19, matcher_debug_separate_windows);
UpdateMatchDisplay();
@ -1033,50 +974,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
}
/*---------------------------------------------------------------------------*/
/**
* @param Blob blob to add to templates for ClassId
* @param ClassId class to add blob to
* @param FontinfoId font information from pre-trained teamples
* @param Threshold minimum match rating to existing template
*
* Globals:
* - PreTrainedTemplates current set of built-in templates
*
* @note Exceptions: none
* @note History: Thu Mar 14 09:36:03 1991, DSJ, Created.
*/
void Classify::AdaptToPunc(TBLOB *Blob,
CLASS_ID ClassId,
int FontinfoId,
FLOAT32 Threshold) {
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
int i;
Results->Initialize();
CharNormClassifier(Blob, PreTrainedTemplates, Results);
RemoveBadMatches(Results);
if (Results->NumMatches != 1) {
if (classify_learning_debug_level >= 1) {
cprintf ("Rejecting punc = %s (Alternatives = ",
unicharset.id_to_unichar(ClassId));
for (i = 0; i < Results->NumMatches; i++)
tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id));
tprintf(")\n");
}
} else {
#ifndef SECURE_NAMES
if (classify_learning_debug_level >= 1)
cprintf ("Adapting to punc = %s, thr= %g\n",
unicharset.id_to_unichar(ClassId), Threshold);
#endif
AdaptToChar(Blob, ClassId, FontinfoId, Threshold);
}
delete Results;
} /* AdaptToPunc */
/*---------------------------------------------------------------------------*/
/**
@ -1167,50 +1064,41 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
* @note Exceptions: none
* @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
*/
void Classify::AmbigClassifier(TBLOB *Blob,
INT_TEMPLATES Templates,
ADAPT_CLASS *Classes,
UNICHAR_ID *Ambiguities,
ADAPT_RESULTS *Results) {
int NumFeatures;
INT_FEATURE_ARRAY IntFeatures;
void Classify::AmbigClassifier(
const GenericVector<INT_FEATURE_STRUCT>& int_features,
const INT_FX_RESULT_STRUCT& fx_info,
const TBLOB *blob,
INT_TEMPLATES templates,
ADAPT_CLASS *classes,
UNICHAR_ID *ambiguities,
ADAPT_RESULTS *results) {
if (int_features.empty()) return;
uinT8* CharNormArray = new uinT8[unicharset.size()];
INT_RESULT_STRUCT IntResult;
CLASS_ID ClassId;
AmbigClassifierCalls++;
NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures,
NULL, CharNormArray,
&(Results->BlobLength));
if (NumFeatures <= 0) {
delete [] CharNormArray;
return;
}
results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
CharNormArray);
bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
if (debug)
tprintf("AM Matches = ");
int top = Blob->bounding_box().top();
int bottom = Blob->bounding_box().bottom();
while (*Ambiguities >= 0) {
ClassId = *Ambiguities;
int top = blob->bounding_box().top();
int bottom = blob->bounding_box().bottom();
while (*ambiguities >= 0) {
CLASS_ID class_id = *ambiguities;
im_.SetCharNormMatch(classify_integer_matcher_multiplier);
im_.Match(ClassForClassId(Templates, ClassId),
im_.Match(ClassForClassId(templates, class_id),
AllProtosOn, AllConfigsOn,
NumFeatures, IntFeatures,
int_features.size(), &int_features[0],
&IntResult,
classify_adapt_feature_threshold, NO_DEBUG,
matcher_debug_separate_windows);
ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0,
Results->BlobLength, CharNormArray,
IntResult, Results);
Ambiguities++;
NumAmbigClassesTried++;
ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
results->BlobLength,
classify_integer_matcher_multiplier,
CharNormArray, IntResult, results);
ambiguities++;
}
delete [] CharNormArray;
} /* AmbigClassifier */
@ -1225,6 +1113,7 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
ADAPT_CLASS* classes,
int debug,
int num_classes,
int matcher_multiplier,
const TBOX& blob_box,
CLASS_PRUNER_RESULTS results,
ADAPT_RESULTS* final_results) {
@ -1246,7 +1135,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
results[c].Rating,
final_results->BlobLength, norm_factors,
final_results->BlobLength,
matcher_multiplier, norm_factors,
int_result, final_results);
}
}
@ -1258,7 +1148,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
// The results are added to the final_results output.
void Classify::ExpandShapesAndApplyCorrections(
ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
float cp_rating, int blob_length, const uinT8* cn_factors,
float cp_rating, int blob_length, int matcher_multiplier,
const uinT8* cn_factors,
INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) {
// Compute the fontinfo_ids.
int fontinfo_id = kBlankFontinfoId;
@ -1292,7 +1183,7 @@ void Classify::ExpandShapesAndApplyCorrections(
int_result.Rating,
int_result.FeatureMisses,
bottom, top, blob_length,
cn_factors);
matcher_multiplier, cn_factors);
if (c == 0 || rating < min_rating)
min_rating = rating;
if (unicharset.get_enabled(unichar_id)) {
@ -1309,7 +1200,7 @@ void Classify::ExpandShapesAndApplyCorrections(
int_result.Rating,
int_result.FeatureMisses,
bottom, top, blob_length,
cn_factors);
matcher_multiplier, cn_factors);
if (unicharset.get_enabled(class_id)) {
AddNewResult(final_results, class_id, -1, rating,
classes != NULL, int_result.Config,
@ -1325,11 +1216,12 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
double cp_rating, double im_rating,
int feature_misses,
int bottom, int top,
int blob_length,
int blob_length, int matcher_multiplier,
const uinT8* cn_factors) {
// Compute class feature corrections.
double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
cn_factors[unichar_id]);
cn_factors[unichar_id],
matcher_multiplier);
double miss_penalty = tessedit_class_miss_scale * feature_misses;
double vertical_penalty = 0.0;
// Penalize non-alnums for being vertical misfits.
@ -1383,39 +1275,30 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
* @note Exceptions: none
* @note History: Tue Mar 12 19:38:03 1991, DSJ, Created.
*/
UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results) {
int NumFeatures;
UNICHAR_ID *Classify::BaselineClassifier(
TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
const INT_FX_RESULT_STRUCT& fx_info,
ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
if (int_features.empty()) return NULL;
int NumClasses;
INT_FEATURE_ARRAY IntFeatures;
uinT8* CharNormArray = new uinT8[unicharset.size()];
CLASS_ID ClassId;
ClearCharNormArray(CharNormArray);
BaselineClassifierCalls++;
NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures,
CharNormArray, &Results->BlobLength);
if (NumFeatures <= 0) {
delete [] CharNormArray;
return NULL;
}
NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures,
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
NumClasses = PruneClasses(Templates->Templates, int_features.size(),
&int_features[0],
CharNormArray, BaselineCutoffs, Results->CPResults);
NumBaselineClassesTried += NumClasses;
if (matcher_debug_level >= 2 || classify_debug_level > 1)
cprintf ("BL Matches = ");
im_.SetBaseLineMatch();
MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
Templates->Class, matcher_debug_flags, NumClasses,
MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
CharNormArray,
Templates->Class, matcher_debug_flags, NumClasses, 0,
Blob->bounding_box(), Results->CPResults, Results);
delete [] CharNormArray;
ClassId = Results->best_match.unichar_id;
CLASS_ID ClassId = Results->best_match.unichar_id;
if (ClassId == NO_CLASS)
return (NULL);
/* this is a bug - maybe should return "" */
@ -1445,17 +1328,13 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
* @note History: Tue Mar 12 16:02:52 1991, DSJ, Created.
*/
int Classify::CharNormClassifier(TBLOB *blob,
INT_TEMPLATES Templates,
const TrainingSample& sample,
ADAPT_RESULTS *adapt_results) {
CharNormClassifierCalls++;
TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
classify_nonlinear_norm);
if (sample == NULL) return 0;
// This is the length that is used for scaling ratings vs certainty.
adapt_results->BlobLength =
IntCastRounded(sample->outline_length() / kStandardFeatureLength);
IntCastRounded(sample.outline_length() / kStandardFeatureLength);
GenericVector<UnicharRating> unichar_results;
static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0,
static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
-1, &unichar_results);
// Convert results to the format used internally by AdaptiveClassifier.
for (int r = 0; r < unichar_results.size(); ++r) {
@ -1468,9 +1347,7 @@ int Classify::CharNormClassifier(TBLOB *blob,
float rating = 1.0f - unichar_results[r].rating;
AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2);
}
int num_features = sample->num_features();
delete sample;
return num_features;
return sample.num_features();
} /* CharNormClassifier */
// As CharNormClassifier, but operates on a TrainingSample and outputs to
@ -1518,10 +1395,10 @@ int Classify::CharNormTrainingSample(bool pruner_only,
UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
}
} else {
im_.SetCharNormMatch(classify_integer_matcher_multiplier);
MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
char_norm_array,
NULL, matcher_debug_flags, num_classes,
classify_integer_matcher_multiplier,
blob_box, adapt_results->CPResults, adapt_results);
// Convert master matcher results to output format.
for (int i = 0; i < adapt_results->NumMatches; i++) {
@ -1711,8 +1588,10 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
if (i == 0 || Results->match[i].rating < Results->best_match.rating)
Results->best_match = Results->match[i];
}
TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
classify_nonlinear_norm);
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
if (sample == NULL) return;
static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
Results->best_match.unichar_id);
@ -1745,21 +1624,26 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
UNICHAR_ID *Ambiguities;
AdaptiveMatcherCalls++;
InitIntFX();
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
&bl_features);
if (sample == NULL) return;
if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
tess_cn_matching) {
CharNormClassifier(Blob, PreTrainedTemplates, Results);
CharNormClassifier(Blob, *sample, Results);
} else {
Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results);
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
AdaptedTemplates, Results);
if ((Results->NumMatches > 0 &&
MarginalMatch (Results->best_match.rating) &&
!tess_bn_matching) ||
Results->NumMatches == 0) {
CharNormClassifier(Blob, PreTrainedTemplates, Results);
CharNormClassifier(Blob, *sample, Results);
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
AmbigClassifier(Blob,
AmbigClassifier(bl_features, fx_info, Blob,
PreTrainedTemplates,
AdaptedTemplates->Class,
Ambiguities,
@ -1773,6 +1657,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
// just adding a NULL classification.
if (!Results->HasNonfragment || Results->NumMatches == 0)
ClassifyAsNoise(Results);
delete sample;
} /* DoAdaptiveMatch */
/*---------------------------------------------------------------------------*/
@ -1799,8 +1684,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
int i;
Results->Initialize();
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
&bl_features);
if (sample == NULL) return NULL;
CharNormClassifier(Blob, PreTrainedTemplates, Results);
CharNormClassifier(Blob, *sample, Results);
delete sample;
RemoveBadMatches(Results);
qsort((void *)Results->match, Results->NumMatches,
sizeof(ScoredClass), CompareByRating);
@ -1823,58 +1715,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
return Ambiguities;
} /* GetAmbiguities */
/*---------------------------------------------------------------------------*/
/**
* This routine calls the integer (Hardware) feature
* extractor if it has not been called before for this blob.
* The results from the feature extractor are placed into
* globals so that they can be used in other routines without
* re-extracting the features.
* It then copies the baseline features into the IntFeatures
* array provided by the caller.
*
* @param Blob blob to extract features from
* @param Templates used to compute char norm adjustments
* @param IntFeatures array to fill with integer features
* @param CharNormArray array to fill with dummy char norm adjustments
* @param BlobLength length of blob in baseline-normalized units
*
* Globals:
* - FeaturesHaveBeenExtracted TRUE if fx has been done
* - BaselineFeatures holds extracted baseline feat
* - CharNormFeatures holds extracted char norm feat
* - FXInfo holds misc. FX info
*
* @return Number of features extracted or 0 if an error occured.
* @note Exceptions: none
* @note History: Tue May 28 10:40:52 1991, DSJ, Created.
*/
int Classify::GetBaselineFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* CharNormArray,
inT32 *BlobLength) {
if (!FeaturesHaveBeenExtracted) {
FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
BaselineFeatures, CharNormFeatures, &FXInfo);
FeaturesHaveBeenExtracted = TRUE;
}
*BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
if (!FeaturesOK) {
return 0;
}
memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0]));
ClearCharNormArray(CharNormArray);
return FXInfo.NumBL;
} /* GetBaselineFeatures */
void Classify::ResetFeaturesHaveBeenExtracted() {
FeaturesHaveBeenExtracted = FALSE;
}
// Returns true if the given blob looks too dissimilar to any character
// present in the classifier templates.
bool Classify::LooksLikeGarbage(TBLOB *blob) {
@ -1921,48 +1761,28 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
* @param BlobLength length of blob in baseline-normalized units
*
* Globals:
* - FeaturesHaveBeenExtracted TRUE if fx has been done
* - BaselineFeatures holds extracted baseline feat
* - CharNormFeatures holds extracted char norm feat
* - FXInfo holds misc. FX info
*
* @return Number of features extracted or 0 if an error occured.
* @note Exceptions: none
* @note History: Tue May 28 10:40:52 1991, DSJ, Created.
*/
int Classify::GetCharNormFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* PrunerNormArray,
uinT8* CharNormArray,
inT32 *BlobLength) {
FEATURE NormFeature;
FLOAT32 Baseline, Scale;
if (!FeaturesHaveBeenExtracted) {
FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
BaselineFeatures, CharNormFeatures, &FXInfo);
FeaturesHaveBeenExtracted = TRUE;
}
*BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
if (!FeaturesOK) {
return 0;
}
memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0]));
NormFeature = NewFeature(&CharNormDesc);
Baseline = kBlnBaselineOffset;
Scale = MF_SCALE_FACTOR;
NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
NormFeature->Params[CharNormLength] =
FXInfo.Length * Scale / LENGTH_COMPRESSION;
NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
return FXInfo.NumCN;
} /* GetCharNormFeatures */
int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
INT_TEMPLATES templates,
uinT8* pruner_norm_array,
uinT8* char_norm_array) {
FEATURE norm_feature = NewFeature(&CharNormDesc);
float baseline = kBlnBaselineOffset;
float scale = MF_SCALE_FACTOR;
norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
norm_feature->Params[CharNormLength] =
fx_info.Length * scale / LENGTH_COMPRESSION;
norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
// Deletes norm_feature.
ComputeCharNormArrays(norm_feature, templates, char_norm_array,
pruner_norm_array);
return IntCastRounded(fx_info.Length / kStandardFeatureLength);
} /* GetCharNormFeature */
// Computes the char_norm_array for the unicharset and, if not NULL, the
// pruner_array as appropriate according to the existence of the shape_table.
@ -2454,7 +2274,6 @@ void Classify::ShowBestMatchFor(int shape_id,
}
INT_RESULT_STRUCT cn_result;
classify_norm_method.set_value(character);
im_.SetCharNormMatch(classify_integer_matcher_multiplier);
im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
AllProtosOn, AllConfigsOn,
num_features, features, &cn_result,

View File

@ -165,27 +165,13 @@ Classify::Classify()
AdaptedTemplates = NULL;
PreTrainedTemplates = NULL;
AllProtosOn = NULL;
PrunedProtos = NULL;
AllConfigsOn = NULL;
AllProtosOff = NULL;
AllConfigsOff = NULL;
TempProtoMask = NULL;
NormProtos = NULL;
AdaptiveMatcherCalls = 0;
BaselineClassifierCalls = 0;
CharNormClassifierCalls = 0;
AmbigClassifierCalls = 0;
NumWordsAdaptedTo = 0;
NumCharsAdaptedTo = 0;
NumBaselineClassesTried = 0;
NumCharNormClassesTried = 0;
NumAmbigClassesTried = 0;
NumClassesOutput = 0;
NumAdaptationsFailed = 0;
FeaturesHaveBeenExtracted = false;
FeaturesOK = true;
learn_debug_win_ = NULL;
learn_fragmented_word_debug_win_ = NULL;
learn_fragments_debug_win_ = NULL;

View File

@ -145,15 +145,13 @@ class Classify : public CCStruct {
int FontinfoId,
ADAPT_CLASS Class,
ADAPT_TEMPLATES Templates);
void AdaptToPunc(TBLOB *Blob,
CLASS_ID ClassId,
int FontinfoId,
FLOAT32 Threshold);
void AmbigClassifier(TBLOB *Blob,
INT_TEMPLATES Templates,
ADAPT_CLASS *Classes,
UNICHAR_ID *Ambiguities,
ADAPT_RESULTS *Results);
void AmbigClassifier(const GenericVector<INT_FEATURE_STRUCT>& int_features,
const INT_FX_RESULT_STRUCT& fx_info,
const TBLOB *blob,
INT_TEMPLATES templates,
ADAPT_CLASS *classes,
UNICHAR_ID *ambiguities,
ADAPT_RESULTS *results);
void MasterMatcher(INT_TEMPLATES templates,
inT16 num_features,
const INT_FEATURE_STRUCT* features,
@ -161,6 +159,7 @@ class Classify : public CCStruct {
ADAPT_CLASS* classes,
int debug,
int num_classes,
int matcher_multiplier,
const TBOX& blob_box,
CLASS_PRUNER_RESULTS results,
ADAPT_RESULTS* final_results);
@ -175,6 +174,7 @@ class Classify : public CCStruct {
int bottom, int top,
float cp_rating,
int blob_length,
int matcher_multiplier,
const uinT8* cn_factors,
INT_RESULT_STRUCT& int_result,
ADAPT_RESULTS* final_results);
@ -184,7 +184,8 @@ class Classify : public CCStruct {
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
double im_rating, int feature_misses,
int bottom, int top,
int blob_length, const uinT8* cn_factors);
int blob_length, int matcher_multiplier,
const uinT8* cn_factors);
void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
ADAPT_RESULTS *Results,
BLOB_CHOICE_LIST *Choices);
@ -246,12 +247,13 @@ class Classify : public CCStruct {
// Converts a shape_table_ index to a classifier class_id index (not a
// unichar-id!). Uses a search, so not fast.
int ShapeIDToClassID(int shape_id) const;
UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results);
int CharNormClassifier(TBLOB *Blob,
INT_TEMPLATES Templates,
ADAPT_RESULTS *Results);
UNICHAR_ID *BaselineClassifier(
TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
const INT_FX_RESULT_STRUCT& fx_info,
ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
int CharNormClassifier(TBLOB *blob,
const TrainingSample& sample,
ADAPT_RESULTS *adapt_results);
// As CharNormClassifier, but operates on a TrainingSample and outputs to
// a GenericVector of ShapeRating without conversion to classes.
@ -267,7 +269,6 @@ class Classify : public CCStruct {
void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
bool AdaptableWord(WERD_RES* word);
void EndAdaptiveClassifier();
void PrintAdaptiveStatistics(FILE *File);
void SettupPass1();
void SettupPass2();
void AdaptiveClassifier(TBLOB *Blob,
@ -276,17 +277,10 @@ class Classify : public CCStruct {
void ClassifyAsNoise(ADAPT_RESULTS *Results);
void ResetAdaptiveClassifierInternal();
int GetBaselineFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* CharNormArray,
inT32 *BlobLength);
int GetCharNormFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* PrunerNormArray,
uinT8* CharNormArray,
inT32 *BlobLength);
int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
INT_TEMPLATES templates,
uinT8* pruner_norm_array,
uinT8* char_norm_array);
// Computes the char_norm_array for the unicharset and, if not NULL, the
// pruner_array as appropriate according to the existence of the shape_table.
// The norm_feature is deleted as it is almost certainly no longer needed.
@ -298,7 +292,6 @@ class Classify : public CCStruct {
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
void ResetFeaturesHaveBeenExtracted();
bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
bool LooksLikeGarbage(TBLOB *blob);
void RefreshDebugWindow(ScrollView **win, const char *msg,
@ -468,9 +461,7 @@ class Classify : public CCStruct {
// Create dummy proto and config masks for use with the built-in templates.
BIT_VECTOR AllProtosOn;
BIT_VECTOR PrunedProtos;
BIT_VECTOR AllConfigsOn;
BIT_VECTOR AllProtosOff;
BIT_VECTOR AllConfigsOff;
BIT_VECTOR TempProtoMask;
bool EnableLearning;
@ -504,34 +495,13 @@ class Classify : public CCStruct {
ShapeTable* shape_table_;
private:
Dict dict_;
// The currently active static classifier.
ShapeClassifier* static_classifier_;
/* variables used to hold performance statistics */
int AdaptiveMatcherCalls;
int BaselineClassifierCalls;
int CharNormClassifierCalls;
int AmbigClassifierCalls;
int NumWordsAdaptedTo;
int NumCharsAdaptedTo;
int NumBaselineClassesTried;
int NumCharNormClassesTried;
int NumAmbigClassesTried;
int NumClassesOutput;
int NumAdaptationsFailed;
/* variables used to hold onto extracted features. This is used
to map from the old scheme in which baseline features and char norm
features are extracted separately, to the new scheme in which they
are extracted at the same time. */
bool FeaturesHaveBeenExtracted;
bool FeaturesOK;
INT_FEATURE_ARRAY BaselineFeatures;
INT_FEATURE_ARRAY CharNormFeatures;
INT_FX_RESULT_STRUCT FXInfo;
// Expected number of features in the class pruner, used to penalize
// unknowns that have too few features (like a c being classified as e) so
// it doesn't recognize everything as '@' or '#'.

View File

@ -78,31 +78,19 @@ namespace tesseract {
// TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify.
TrainingSample* BlobToTrainingSample(const TBLOB& blob,
tesseract::NormalizationMode mode,
bool nonlinear_norm) {
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* BlobToTrainingSample(
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
GenericVector<INT_FEATURE_STRUCT>* bl_features) {
GenericVector<INT_FEATURE_STRUCT> cn_features;
Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features,
&cn_features, &fx_info, NULL);
Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
&cn_features, fx_info, NULL);
// TODO(rays) Use blob->PreciseBoundingBox() instead.
TBOX box = blob.bounding_box();
TrainingSample* sample = NULL;
if (mode == tesseract::NM_CHAR_ANISOTROPIC) {
int num_features = fx_info.NumCN;
if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(fx_info, box, &cn_features[0],
num_features);
}
} else if (mode == tesseract::NM_BASELINE) {
int num_features = fx_info.NumBL;
if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(fx_info, box, &bl_features[0],
num_features);
}
} else {
ASSERT_HOST(!"Unsupported normalization mode!");
int num_features = fx_info->NumCN;
if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
num_features);
}
if (sample != NULL) {
// Set the bounding box (in original image coordinates) in the sample.

View File

@ -60,9 +60,9 @@ namespace tesseract {
// TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify.
TrainingSample* BlobToTrainingSample(const TBLOB& blob,
tesseract::NormalizationMode mode,
bool nonlinear_norm);
TrainingSample* BlobToTrainingSample(
const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
GenericVector<INT_FEATURE_STRUCT>* bl_features);
}
// Deprecated! Prefer tesseract::Classify::ExtractFeatures instead.

View File

@ -693,13 +693,9 @@ int IntegerMatcher::FindBadFeatures(
/*---------------------------------------------------------------------------*/
void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
int classify_integer_matcher_multiplier) {
void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
classify_debug_level_ = classify_debug_level;
/* Set default mode of operation of IntegerMatcher */
SetCharNormMatch(classify_integer_matcher_multiplier);
/* Initialize table for evidence to similarity lookup */
for (int i = 0; i < SE_TABLE_SIZE; i++) {
uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
@ -724,17 +720,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
}
/*--------------------------------------------------------------------------*/
void IntegerMatcher::SetBaseLineMatch() {
local_matcher_multiplier_ = 0;
}
/*--------------------------------------------------------------------------*/
void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) {
local_matcher_multiplier_ = integer_matcher_multiplier;
}
/**----------------------------------------------------------------------------
Private Code
@ -1283,10 +1268,11 @@ int IntegerMatcher::FindBestMatch(
// Applies the CN normalization factor to the given rating and returns
// the modified rating.
float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
int normalization_factor) {
int normalization_factor,
int matcher_multiplier) {
return (rating * blob_length +
local_matcher_multiplier_ * normalization_factor / 256.0) /
(blob_length + local_matcher_multiplier_);
matcher_multiplier * normalization_factor / 256.0) /
(blob_length + matcher_multiplier);
}
/*---------------------------------------------------------------------------*/

View File

@ -102,11 +102,7 @@ class IntegerMatcher {
IntegerMatcher() : classify_debug_level_(0) {}
void Init(tesseract::IntParam *classify_debug_level,
int classify_integer_matcher_multiplier);
void SetBaseLineMatch();
void SetCharNormMatch(int integer_matcher_multiplier);
void Init(tesseract::IntParam *classify_debug_level);
void Match(INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
@ -121,7 +117,7 @@ class IntegerMatcher {
// Applies the CN normalization factor to the given rating and returns
// the modified rating.
float ApplyCNCorrection(float rating, int blob_length,
int normalization_factor);
int normalization_factor, int matcher_multiplier);
int FindGoodProtos(INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
@ -192,7 +188,6 @@ class IntegerMatcher {
uinT32 evidence_table_mask_;
uinT32 mult_trunc_shift_bits_;
uinT32 table_trunc_shift_bits_;
inT16 local_matcher_multiplier_;
tesseract::IntParam *classify_debug_level_;
uinT32 evidence_mult_mask_;
};

View File

@ -235,8 +235,11 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
** Exceptions: none
** History: 8/8/2011, rays, Created.
*/
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
*blob, tesseract::NM_CHAR_ANISOTROPIC, false);
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
GenericVector<INT_FEATURE_STRUCT> bl_features;
tesseract::TrainingSample* sample =
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
&bl_features);
if (sample == NULL) return NULL;
int num_features = sample->num_features();
@ -267,8 +270,11 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
** Exceptions: none
** History: 8/8/2011, rays, Created.
*/
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
*blob, tesseract::NM_CHAR_ANISOTROPIC, false);
INT_FX_RESULT_STRUCT local_fx_info(fx_info);
GenericVector<INT_FEATURE_STRUCT> bl_features;
tesseract::TrainingSample* sample =
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
&bl_features);
if (sample == NULL) return NULL;
FEATURE_SET feature_set = NewFeatureSet(1);

View File

@ -119,6 +119,9 @@ Dict::Dict(Image* image_ptr)
"Make AcceptableChoice() always return false. Useful"
" when there is a need to explore all segmentations",
getImage()->getCCUtil()->params()),
BOOL_MEMBER(save_raw_choices, false,
"Deprecated- backward compatablity only",
getImage()->getCCUtil()->params()),
INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
"Max words to keep in list",
getImage()->getCCUtil()->params()),
@ -689,7 +692,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
const WERD_CHOICE *word_ptr = &word;
WERD_CHOICE temp_word(word.unicharset());
if (hyphenated()) {
if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) {
copy_hyphen_info(&temp_word);
temp_word += word;
word_ptr = &temp_word;

View File

@ -613,6 +613,8 @@ class Dict {
BOOL_VAR_H(stopper_no_acceptable_choices, false,
"Make AcceptableChoice() always return false. Useful"
" when there is a need to explore all segmentations");
BOOL_VAR_H(save_raw_choices, false,
"Deprecated- backward compatability only");
INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information"
" should be printed to stdout");

View File

@ -440,16 +440,32 @@ namespace tesseract {
* enough. The results are returned in the WERD_RES.
*/
void Wordrec::chop_word_main(WERD_RES *word) {
// Initial clean up.
word->ClearRatings();
int num_blobs = word->chopped_word->NumBlobs();
word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
// Run initial classification.
for (int b = 0; b < num_blobs; ++b) {
BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
"Initial:", word->chopped_word,
word->blamer_bundle);
word->ratings->put(b, b, choices);
if (word->ratings == NULL) {
word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
}
if (word->ratings->get(0, 0) == NULL) {
// Run initial classification.
for (int b = 0; b < num_blobs; ++b) {
BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
"Initial:", word->chopped_word,
word->blamer_bundle);
word->ratings->put(b, b, choices);
}
} else {
// Blobs have been pre-classified. Set matrix cell for all blob choices
for (int col = 0; col < word->ratings->dimension(); ++col) {
for (int row = col; row < word->ratings->dimension() &&
row < col + word->ratings->bandwidth(); ++row) {
BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
if (choices != NULL) {
BLOB_CHOICE_IT bc_it(choices);
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
bc_it.data()->set_matrix_cell(col, row);
}
}
}
}
}
// Run Segmentation Search.