Refactorerd control functions to enable parallel blob classification

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@904 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2013-11-08 20:30:56 +00:00
parent 77c1b41e4e
commit 7ec4fd7a56
25 changed files with 580 additions and 673 deletions

View File

@ -46,7 +46,7 @@ libtesseract_main_la_SOURCES = \
docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \ docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \
imgscale.cpp ltrresultiterator.cpp \ imgscale.cpp ltrresultiterator.cpp \
osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \ osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \
pagewalk.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \ pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
reject.cpp resultiterator.cpp scaleimg.cpp superscript.cpp \ reject.cpp resultiterator.cpp scaleimg.cpp superscript.cpp \
tesseract_cube_combiner.cpp \ tesseract_cube_combiner.cpp \
tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \ tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \

View File

@ -241,7 +241,9 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes, void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
BLOCK* block, ROW* row, BLOCK* block, ROW* row,
WERD_RES* word_res) { WERD_RES* word_res) {
if (!word_res->SetupForTessRecognition(unicharset, this, BestPix(), false, if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model, textord_use_cjk_fp_model,
poly_allow_detailed_fx, poly_allow_detailed_fx,
row, block)) { row, block)) {

View File

@ -97,8 +97,9 @@ BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) {
inT16 char_qual; inT16 char_qual;
inT16 good_char_qual; inT16 good_char_qual;
classify_word_and_language(&Tesseract::classify_word_pass2, WordData word_data(block, row, word_res);
block, row, word_res); SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
if (tessedit_debug_quality_metrics) { if (tessedit_debug_quality_metrics) {
word_char_quality(word_res, row, &char_qual, &good_char_qual); word_char_quality(word_res, row, &char_qual, &good_char_qual);
tprintf tprintf
@ -153,6 +154,111 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box,
return true; return true;
} }
// If tesseract is to be run, sets the words up ready for it.
void Tesseract::SetupAllWordsPassN(int pass_n,
const TBOX* target_word_box,
const char* word_config,
PAGE_RES* page_res,
GenericVector<WordData>* words) {
// Prepare all the words.
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
if (pass_n == 1)
page_res_it.word()->SetupFake(unicharset);
if (target_word_box == NULL ||
ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 1)) {
words->push_back(WordData(page_res_it));
}
}
// Setup all the words for recognition with polygonal approximation.
for (int w = 0; w < words->size(); ++w) {
SetupWordPassN(pass_n, &(*words)[w]);
if (w > 0) (*words)[w].prev_word = &(*words)[w - 1];
}
}
// Sets up the single word ready for whichever engine is to be run.
void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
if (pass_n == 1 || !word->word->done || tessedit_training_tess) {
if (pass_n == 2) {
// TODO(rays) Should we do this on pass1 too?
word->word->caps_height = 0.0;
if (word->word->x_height == 0.0f)
word->word->x_height = word->row->x_height();
}
// Cube doesn't get setup for pass2.
if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
word->word->SetupForRecognition(
unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode, textord_use_cjk_fp_model,
poly_allow_detailed_fx, word->row, word->block);
}
}
if (!sub_langs_.empty()) {
if (word->lang_words.size() != sub_langs_.size()) {
// Setup the words for all the sub-languages now.
WERD_RES empty;
word->lang_words.init_to_size(sub_langs_.size(), empty);
}
for (int s = 0; s < sub_langs_.size(); ++s) {
Tesseract* lang_t = sub_langs_[s];
if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY &&
(!word->lang_words[s].done || lang_t->tessedit_training_tess))) {
word->lang_words[s].InitForRetryRecognition(*word->word);
word->lang_words[s].SetupForRecognition(
lang_t->unicharset, lang_t, BestPix(),
lang_t->tessedit_ocr_engine_mode, NULL,
lang_t->classify_bln_numeric_mode,
lang_t->textord_use_cjk_fp_model,
lang_t->poly_allow_detailed_fx, word->row, word->block);
}
}
}
}
// Runs word recognition on all the words.
bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
GenericVector<WordData>* words) {
// TODO(rays) Before this loop can be parallelized (it would yield a massive
// speed-up) all remaining member globals need to be converted to local/heap
// (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be
// added. The results will be significantly different with adaption on, and
// deterioration will need investigation.
for (int w = 0; w < words->size(); ++w) {
WordData* word = &(*words)[w];
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
if (pass_n == 1)
monitor->progress = 30 + 50 * w / words->size();
else
monitor->progress = 80 + 10 * w / words->size();
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
words->size()))) {
// Timeout. Fake out the rest of the words.
for (; w < words->size(); ++w) {
(*words)[w].word->SetupFake(unicharset);
}
return false;
}
}
if (word->word->tess_failed) continue;
WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1
: &Tesseract::classify_word_pass2;
classify_word_and_language(recognizer, word);
if (tessedit_dump_choices) {
word_dumper(NULL, word->row, word->word);
tprintf("Pass%d: %s [%s]\n", pass_n,
word->word->best_choice->unichar_string().string(),
word->word->best_choice->debug_string().string());
}
}
return true;
}
/** /**
* recog_all_words() * recog_all_words()
* *
@ -179,27 +285,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
const TBOX* target_word_box, const TBOX* target_word_box,
const char* word_config, const char* word_config,
int dopasses) { int dopasses) {
PAGE_RES_IT page_res_it; PAGE_RES_IT page_res_it(page_res);
inT32 word_index; // current word
if (tessedit_minimal_rej_pass1) { if (tessedit_minimal_rej_pass1) {
tessedit_test_adaption.set_value (TRUE); tessedit_test_adaption.set_value (TRUE);
tessedit_minimal_rejection.set_value (TRUE); tessedit_minimal_rejection.set_value (TRUE);
} }
// Before the main recognition loop below, walk through the whole page and set
// up fake words. That way, if we run out of time a user will still get the
// expected best_choice and box_words out the end; they'll just be empty.
page_res_it.page_res = page_res;
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
page_res_it.word()->SetupFake(unicharset);
}
if (dopasses==0 || dopasses==1) { if (dopasses==0 || dopasses==1) {
page_res_it.page_res=page_res;
page_res_it.restart_page(); page_res_it.restart_page();
// ****************** Pass 1 ******************* // ****************** Pass 1 *******************
// Clear adaptive classifier at the beginning of the page if it is full. // Clear adaptive classifier at the beginning of the page if it is full.
@ -214,20 +308,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (sub_langs_[i]->AdaptiveClassifierIsFull()) if (sub_langs_[i]->AdaptiveClassifierIsFull())
sub_langs_[i]->ResetAdaptiveClassifierInternal(); sub_langs_[i]->ResetAdaptiveClassifierInternal();
} }
// Set up all words ready for recognition, so that if parallelism is on
stats_.word_count = 0; // all the input and output classes are ready to run the classifier.
if (monitor != NULL) { GenericVector<WordData> words;
monitor->ocr_alive = TRUE; SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words);
while (page_res_it.word() != NULL) { if (tessedit_parallelize) {
stats_.word_count++; PrerecAllWordsPar(words);
page_res_it.forward();
}
page_res_it.restart_page();
} else {
stats_.word_count = 1;
} }
word_index = 0; stats_.word_count = words.size();
stats_.dict_words = 0; stats_.dict_words = 0;
stats_.doc_blob_quality = 0; stats_.doc_blob_quality = 0;
@ -237,56 +326,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
stats_.doc_good_char_quality = 0; stats_.doc_good_char_quality = 0;
most_recently_used_ = this; most_recently_used_ = this;
// Run pass 1 word recognition.
if (!RecogAllWordsPassN(1, monitor, &words)) return false;
// Pass 1 post-processing.
while (page_res_it.word() != NULL) { while (page_res_it.word() != NULL) {
set_global_loc_code(LOC_PASS1);
word_index++;
if (monitor != NULL) {
monitor->ocr_alive = TRUE;
monitor->progress = 30 + 50 * word_index / stats_.word_count;
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
stats_.dict_words)))
return false;
}
if (target_word_box &&
!ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 1)) {
page_res_it.forward();
continue;
}
classify_word_and_language(&Tesseract::classify_word_pass1,
page_res_it.block()->block,
page_res_it.row()->row,
page_res_it.word());
if (page_res_it.word()->word->flag(W_REP_CHAR)) { if (page_res_it.word()->word->flag(W_REP_CHAR)) {
fix_rep_char(&page_res_it); fix_rep_char(&page_res_it);
page_res_it.forward(); page_res_it.forward();
continue; continue;
} }
if (tessedit_dump_choices) {
word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
tprintf("Pass1: %s [%s]\n",
page_res_it.word()->best_choice->unichar_string().string(),
page_res_it.word()->best_choice->debug_string().string());
}
// tessedit_test_adaption enables testing of the accuracy of the
// input to the adaptive classifier.
if (tessedit_test_adaption && !tessedit_minimal_rejection) {
if (!word_adaptable (page_res_it.word(),
tessedit_test_adaption_mode)) {
page_res_it.word()->reject_map.rej_word_tess_failure();
// FAKE PERM REJ
} else {
// Override rejection mechanisms for this word.
UNICHAR_ID space = unicharset.unichar_to_id(" ");
for (int i = 0; i < page_res_it.word()->best_choice->length(); i++) {
if ((page_res_it.word()->best_choice->unichar_id(i) != space) &&
page_res_it.word()->reject_map[i].rejected())
page_res_it.word()->reject_map[i].setrej_minimal_rej_accept();
}
}
}
// Count dict words. // Count dict words.
if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)
@ -307,50 +355,27 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (dopasses == 1) return true; if (dopasses == 1) return true;
// ****************** Pass 2 ******************* // ****************** Pass 2 *******************
if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) {
page_res_it.restart_page(); page_res_it.restart_page();
word_index = 0; GenericVector<WordData> words;
SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);
if (tessedit_parallelize) {
PrerecAllWordsPar(words);
}
most_recently_used_ = this; most_recently_used_ = this;
while (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption && // Run pass 2 word recognition.
page_res_it.word() != NULL) { if (!RecogAllWordsPassN(2, monitor, &words)) return false;
set_global_loc_code(LOC_PASS2); // Pass 2 post-processing.
word_index++; while (page_res_it.word() != NULL) {
if (monitor != NULL) { WERD_RES* word = page_res_it.word();
monitor->ocr_alive = TRUE; if (word->word->flag(W_REP_CHAR) && !word->done) {
monitor->progress = 80 + 10 * word_index / stats_.word_count;
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
stats_.dict_words)))
return false;
}
// changed by jetsoft
// specific to its needs to extract one word when need
if (target_word_box &&
!ProcessTargetWord(page_res_it.word()->word->bounding_box(),
*target_word_box, word_config, 2)) {
page_res_it.forward();
continue;
}
// end jetsoft
classify_word_and_language(&Tesseract::classify_word_pass2,
page_res_it.block()->block,
page_res_it.row()->row,
page_res_it.word());
if (page_res_it.word()->word->flag(W_REP_CHAR) &&
!page_res_it.word()->done) {
fix_rep_char(&page_res_it); fix_rep_char(&page_res_it);
page_res_it.forward(); page_res_it.forward();
continue; continue;
} }
if (tessedit_dump_choices) {
word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
tprintf("Pass2: %s [%s]\n",
page_res_it.word()->best_choice->unichar_string().string(),
page_res_it.word()->best_choice->debug_string().string());
}
page_res_it.forward(); page_res_it.forward();
} }
}
// The next passes can only be run if tesseract has been used, as cube // The next passes can only be run if tesseract has been used, as cube
// doesn't set all the necessary outputs in WERD_RES. // doesn't set all the necessary outputs in WERD_RES.
@ -384,6 +409,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
// Check the correctness of the final results. // Check the correctness of the final results.
blamer_pass(page_res); blamer_pass(page_res);
} }
script_pos_pass(page_res);
// Write results pass. // Write results pass.
set_global_loc_code(LOC_WRITE_RESULTS); set_global_loc_code(LOC_WRITE_RESULTS);
@ -672,6 +698,46 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) {
} }
} }
// Sets script positions and detects smallcaps on all output words.
void Tesseract::script_pos_pass(PAGE_RES* page_res) {
PAGE_RES_IT page_res_it(page_res);
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
WERD_RES* word = page_res_it.word();
if (word->word->flag(W_REP_CHAR)) {
page_res_it.forward();
continue;
}
float x_height = page_res_it.block()->block->x_height();
float word_x_height = word->x_height;
if (word_x_height < word->best_choice->min_x_height() ||
word_x_height > word->best_choice->max_x_height()) {
word_x_height = (word->best_choice->min_x_height() +
word->best_choice->max_x_height()) / 2.0f;
}
// Test for small caps. Word capheight must be close to block xheight,
// and word must contain no lower case letters, and at least one upper case.
double small_cap_xheight = x_height * kXHeightCapRatio;
double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
if (word->uch_set->script_has_xheight() &&
small_cap_xheight - small_cap_delta <= word_x_height &&
word_x_height <= small_cap_xheight + small_cap_delta) {
// Scan for upper/lower.
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))
++num_upper;
else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))
++num_lower;
}
if (num_upper > 0 && num_lower == 0)
word->small_caps = true;
}
word->SetScriptPositions();
}
}
// Helper returns true if the new_word is better than the word, using a // Helper returns true if the new_word is better than the word, using a
// simple test of better certainty AND rating (to reduce false positives // simple test of better certainty AND rating (to reduce false positives
// from cube) or a dictionary vs non-dictionary word. // from cube) or a dictionary vs non-dictionary word.
@ -701,38 +767,33 @@ static bool NewWordBetter(const WERD_RES& word, const WERD_RES& new_word,
// Helper to recognize the word using the given (language-specific) tesseract. // Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously. // Returns true if the result was better than previously.
bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row, bool Tesseract::RetryWithLanguage(const WERD_RES& best_word,
WordData* word_data, WERD_RES* word,
WordRecognizer recognizer) { WordRecognizer recognizer) {
if (classify_debug_level || cube_debug_level) { if (classify_debug_level || cube_debug_level) {
tprintf("Retrying word using lang %s, oem %d\n", tprintf("Retrying word using lang %s, oem %d\n",
lang.string(), static_cast<int>(tessedit_ocr_engine_mode)); lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
} }
// Setup a trial WERD_RES in which to classify.
WERD_RES lang_word;
lang_word.InitForRetryRecognition(*word);
// Run the recognizer on the word. // Run the recognizer on the word.
// Initial version is a bit of a hack based on better certainty and rating // Initial version is a bit of a hack based on better certainty and rating
// (to reduce false positives from cube) or a dictionary vs non-dictionary // (to reduce false positives from cube) or a dictionary vs non-dictionary
// word. // word.
(this->*recognizer)(block, row, &lang_word); (this->*recognizer)(word_data, word);
bool new_is_better = NewWordBetter(*word, lang_word, bool new_is_better = NewWordBetter(best_word, *word,
classify_max_rating_ratio, classify_max_rating_ratio,
classify_max_certainty_margin); classify_max_certainty_margin);
if (classify_debug_level || cube_debug_level) { if (classify_debug_level || cube_debug_level) {
if (lang_word.best_choice == NULL) { if (word->best_choice == NULL) {
tprintf("New result %s better:%s\n", tprintf("NULL result %s better!\n",
new_is_better ? "IS" : "NOT"); new_is_better ? "IS" : "NOT");
} else { } else {
tprintf("New result %s better:%s, r=%g, c=%g\n", tprintf("New result %s better:%s, r=%g, c=%g\n",
new_is_better ? "IS" : "NOT", new_is_better ? "IS" : "NOT",
lang_word.best_choice->unichar_string().string(), word->best_choice->unichar_string().string(),
lang_word.best_choice->rating(), word->best_choice->rating(),
lang_word.best_choice->certainty()); word->best_choice->certainty());
} }
} }
if (new_is_better) {
word->ConsumeWordResults(&lang_word);
}
return new_is_better; return new_is_better;
} }
@ -743,9 +804,9 @@ bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
// If recognition was not successful, tries all available languages until // If recognition was not successful, tries all available languages until
// it gets a successful result or runs out of languages. Keeps the best result. // it gets a successful result or runs out of languages. Keeps the best result.
void Tesseract::classify_word_and_language(WordRecognizer recognizer, void Tesseract::classify_word_and_language(WordRecognizer recognizer,
BLOCK* block, WordData* word_data) {
ROW *row, // Points to the best result. May be word or in lang_words.
WERD_RES *word) { WERD_RES* word = word_data->word;
clock_t start_t = clock(); clock_t start_t = clock();
if (classify_debug_level || cube_debug_level) { if (classify_debug_level || cube_debug_level) {
tprintf("Processing word with lang %s at:", tprintf("Processing word with lang %s at:",
@ -755,15 +816,23 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
const char* result_type = "Initial"; const char* result_type = "Initial";
bool initially_done = !word->tess_failed && word->done; bool initially_done = !word->tess_failed && word->done;
if (initially_done) { if (initially_done) {
// If done on pass1, we reuse the tesseract that did it, and don't try // If done on pass1, leave it as-is.
// any more. The only need to call the classifier at all is for the
// cube combiner and xheight fixing (which may be bogus on a done word.)
most_recently_used_ = word->tesseract; most_recently_used_ = word->tesseract;
result_type = "Already done"; result_type = "Already done";
} else {
if (most_recently_used_ != this) {
// Point to the word for most_recently_used_.
for (int s = 0; s < sub_langs_.size(); ++s) {
if (most_recently_used_ == sub_langs_[s]) {
word = &word_data->lang_words[s];
break;
} }
(most_recently_used_->*recognizer)(block, row, word); }
}
(most_recently_used_->*recognizer)(word_data, word);
if (!word->tess_failed && word->tess_accepted) if (!word->tess_failed && word->tess_accepted)
result_type = "Accepted"; result_type = "Accepted";
}
if (classify_debug_level || cube_debug_level) { if (classify_debug_level || cube_debug_level) {
tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d" tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d"
" xht=[%g,%g]\n", " xht=[%g,%g]\n",
@ -782,12 +851,32 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
if (classify_debug_level) { if (classify_debug_level) {
tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string()); tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string());
} }
if (RetryWithLanguage(word, block, row, recognizer)) { if (word_data->word->tesseract == this) {
// This is pass1, and we are trying the main language.
if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) {
most_recently_used_ = this; most_recently_used_ = this;
word = word_data->word;
}
} else {
// This is pass2, and we are trying the main language again, but it
// has no word allocated to it, so we must re-initialize it.
WERD_RES main_word(*word_data->word);
main_word.InitForRetryRecognition(*word_data->word);
main_word.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
word_data->row, word_data->block);
if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) {
most_recently_used_ = this;
word_data->word->ConsumeWordResults(&main_word);
word = word_data->word;
}
}
if (!word->tess_failed && word->tess_accepted) if (!word->tess_failed && word->tess_accepted)
return; // No need to look at the others. return; // No need to look at the others.
} }
}
for (int i = 0; i < sub_langs_.size(); ++i) { for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i] != previous_used) { if (sub_langs_[i] != previous_used) {
@ -795,14 +884,21 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
tprintf("Retrying with sub-Tesseract[%d] lang: %s\n", tprintf("Retrying with sub-Tesseract[%d] lang: %s\n",
i, sub_langs_[i]->lang.string()); i, sub_langs_[i]->lang.string());
} }
if (sub_langs_[i]->RetryWithLanguage(word, block, row, recognizer)) { if (sub_langs_[i]->RetryWithLanguage(*word, word_data,
&word_data->lang_words[i],
recognizer)) {
most_recently_used_ = sub_langs_[i]; most_recently_used_ = sub_langs_[i];
word = &word_data->lang_words[i];
if (!word->tess_failed && word->tess_accepted) if (!word->tess_failed && word->tess_accepted)
return; // No need to look at the others. break; // No need to look at the others.
} }
} }
} }
} }
if (word != word_data->word) {
// Move the result for the best language to the main word.
word_data->word->ConsumeWordResults(word);
}
clock_t ocr_t = clock(); clock_t ocr_t = clock();
if (tessedit_timing_debug) { if (tessedit_timing_debug) {
tprintf("%s (ocr took %.2f sec)\n", tprintf("%s (ocr took %.2f sec)\n",
@ -817,7 +913,11 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
* Baseline normalize the word and pass it to Tess. * Baseline normalize the word and pass it to Tess.
*/ */
void Tesseract::classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) { void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) {
ROW* row = word_data->row;
BLOCK* block = word_data->block;
prev_word_best_choice_ = word_data->prev_word != NULL
? word_data->prev_word->word->best_choice : NULL;
// If we only intend to run cube - run it and return. // If we only intend to run cube - run it and return.
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) { if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
cube_word_pass1(block, row, word); cube_word_pass1(block, row, word);
@ -880,6 +980,10 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
} }
new_x_ht_word.x_height = new_x_ht; new_x_ht_word.x_height = new_x_ht;
new_x_ht_word.caps_height = 0.0; new_x_ht_word.caps_height = 0.0;
new_x_ht_word.SetupForRecognition(
unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode, textord_use_cjk_fp_model,
poly_allow_detailed_fx, row, block);
match_word_pass_n(2, &new_x_ht_word, row, block); match_word_pass_n(2, &new_x_ht_word, row, block);
if (!new_x_ht_word.tess_failed) { if (!new_x_ht_word.tess_failed) {
int new_misfits = CountMisfitTops(&new_x_ht_word); int new_misfits = CountMisfitTops(&new_x_ht_word);
@ -916,11 +1020,15 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
* Control what to do with the word in pass 2 * Control what to do with the word in pass 2
*/ */
void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) { void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) {
// Return if we do not want to run Tesseract. // Return if we do not want to run Tesseract.
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY && if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED) tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED)
return; return;
ROW* row = word_data->row;
BLOCK* block = word_data->block;
prev_word_best_choice_ = word_data->prev_word != NULL
? word_data->prev_word->word->best_choice : NULL;
set_global_subloc_code(SUBLOC_NORM); set_global_subloc_code(SUBLOC_NORM);
check_debug_pt(word, 30); check_debug_pt(word, 30);
@ -940,26 +1048,6 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
// Use the tops and bottoms since they are available. // Use the tops and bottoms since they are available.
TrainedXheightFix(word, block, row); TrainedXheightFix(word, block, row);
} }
// Test for small caps. Word capheight must be close to block xheight,
// and word must contain no lower case letters, and at least one upper case.
double small_cap_xheight = block->x_height() * kXHeightCapRatio;
double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0;
if (unicharset.script_has_xheight() &&
small_cap_xheight - small_cap_delta <= word->x_height &&
word->x_height <= small_cap_xheight + small_cap_delta) {
// Scan for upper/lower.
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
if (unicharset.get_isupper(word->best_choice->unichar_id(i)))
++num_upper;
else if (unicharset.get_islower(word->best_choice->unichar_id(i)))
++num_lower;
}
if (num_upper > 0 && num_lower == 0)
word->small_caps = true;
}
word->SetScriptPositions();
set_global_subloc_code(SUBLOC_NORM); set_global_subloc_code(SUBLOC_NORM);
} }
@ -988,11 +1076,7 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
ROW *row, BLOCK* block) { ROW *row, BLOCK* block) {
if (word->SetupForTessRecognition(unicharset, this, BestPix(), if (word->tess_failed) return;
classify_bln_numeric_mode,
textord_use_cjk_fp_model,
poly_allow_detailed_fx,
row, block))
tess_segment_pass_n(pass_n, word); tess_segment_pass_n(pass_n, word);
if (!word->tess_failed) { if (!word->tess_failed) {
@ -1136,8 +1220,8 @@ void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice,
WERD_RES* rep_word = WERD_RES* rep_word =
page_res_it->InsertSimpleCloneWord(*word_res, blob_word); page_res_it->InsertSimpleCloneWord(*word_res, blob_word);
// Setup the single char WERD_RES // Setup the single char WERD_RES
if (rep_word->SetupForTessRecognition(*word_res->uch_set, this, BestPix(), if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(),
false, tessedit_ocr_engine_mode, NULL, false,
textord_use_cjk_fp_model, textord_use_cjk_fp_model,
poly_allow_detailed_fx, poly_allow_detailed_fx,
page_res_it->row()->row, page_res_it->row()->row,

View File

@ -197,6 +197,9 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
// Iterate through the word results and call cube on each word. // Iterate through the word results and call cube on each word.
for (page_res_it.restart_page(); page_res_it.word () != NULL; for (page_res_it.restart_page(); page_res_it.word () != NULL;
page_res_it.forward()) { page_res_it.forward()) {
BLOCK* block = page_res_it.block()->block;
if (block->poly_block() != NULL && !block->poly_block()->IsText())
continue; // Don't deal with non-text blocks.
WERD_RES* word = page_res_it.word(); WERD_RES* word = page_res_it.word();
// Skip cube entirely if tesseract's certainty is greater than threshold. // Skip cube entirely if tesseract's certainty is greater than threshold.
int combiner_run_thresh = convert_prob_to_tess_certainty( int combiner_run_thresh = convert_prob_to_tess_certainty(
@ -210,6 +213,11 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
// Setup a trial WERD_RES in which to classify with cube. // Setup a trial WERD_RES in which to classify with cube.
WERD_RES cube_word; WERD_RES cube_word;
cube_word.InitForRetryRecognition(*word); cube_word.InitForRetryRecognition(*word);
cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
OEM_CUBE_ONLY,
NULL, false, false, false,
page_res_it.row()->row,
page_res_it.block()->block);
CubeObject *cube_obj = lang_tess->cube_recognize_word( CubeObject *cube_obj = lang_tess->cube_recognize_word(
page_res_it.block()->block, &cube_word); page_res_it.block()->block, &cube_word);
if (cube_obj != NULL) if (cube_obj != NULL)
@ -317,10 +325,6 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
**********************************************************************/ **********************************************************************/
bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block, bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
WERD_RES *word) { WERD_RES *word) {
if (!word->SetupForCubeRecognition(unicharset, this, block)) {
return false; // Graphics block.
}
// Run cube // Run cube
WordAltList *cube_alt_list = cube_obj->RecognizeWord(); WordAltList *cube_alt_list = cube_obj->RecognizeWord();
if (!cube_alt_list || cube_alt_list->AltCount() <= 0) { if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {

View File

@ -204,8 +204,9 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
word = word_it.data(); word = word_it.data();
if ((!word->part_of_combo) && (word->box_word == NULL)) { if ((!word->part_of_combo) && (word->box_word == NULL)) {
classify_word_and_language(&Tesseract::classify_word_pass2, WordData word_data(block, row, word);
block, row, word); SetupWordPassN(2, &word_data);
classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
} }
prev_word_best_choice_ = word->best_choice; prev_word_best_choice_ = word->best_choice;
} }

View File

@ -731,7 +731,9 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) { BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
TWERD *bln_word = word_res->chopped_word; TWERD *bln_word = word_res->chopped_word;
if (bln_word == NULL) { if (bln_word == NULL) {
word_res->SetupForTessRecognition(unicharset, this, BestPix(), false, word_res->SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model, textord_use_cjk_fp_model,
poly_allow_detailed_fx, poly_allow_detailed_fx,
row, block); row, block);
@ -963,7 +965,9 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res,
if (word != NULL) { if (word != NULL) {
WERD_RES word_res(word); WERD_RES word_res(word);
word_res.x_height = row->x_height(); word_res.x_height = row->x_height();
word_res.SetupForTessRecognition(unicharset, this, BestPix(), false, word_res.SetupForRecognition(unicharset, this, BestPix(),
tessedit_ocr_engine_mode, NULL,
classify_bln_numeric_mode,
textord_use_cjk_fp_model, textord_use_cjk_fp_model,
poly_allow_detailed_fx, poly_allow_detailed_fx,
row, block); row, block);

View File

@ -203,7 +203,9 @@ void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res,
FILE *output_file) { FILE *output_file) {
// Classify word. // Classify word.
fflush(stdout); fflush(stdout);
classify_word_pass1(block_res->block, row_res->row, werd_res); WordData word_data(block_res->block, row_res->row, werd_res);
SetupWordPassN(1, &word_data);
classify_word_pass1(&word_data, werd_res);
WERD_CHOICE *best_choice = werd_res->best_choice; WERD_CHOICE *best_choice = werd_res->best_choice;
ASSERT_HOST(best_choice != NULL); ASSERT_HOST(best_choice != NULL);

View File

@ -402,6 +402,8 @@ Tesseract::Tesseract()
"for layout analysis.", this->params()), "for layout analysis.", this->params()),
BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector", BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector",
this->params()), this->params()),
INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible",
this->params()),
// The following parameters were deprecated and removed from their original // The following parameters were deprecated and removed from their original
// locations. The parameters are temporarily kept here to give Tesseract // locations. The parameters are temporarily kept here to give Tesseract
@ -528,7 +530,6 @@ void Tesseract::Clear() {
reskew_ = FCOORD(1.0f, 0.0f); reskew_ = FCOORD(1.0f, 0.0f);
splitter_.Clear(); splitter_.Clear();
scaled_factor_ = -1; scaled_factor_ = -1;
ResetFeaturesHaveBeenExtracted();
for (int i = 0; i < sub_langs_.size(); ++i) for (int i = 0; i < sub_langs_.size(); ++i)
sub_langs_[i]->Clear(); sub_langs_[i]->Clear();
} }

View File

@ -100,10 +100,6 @@ class EquationDetect;
class Tesseract; class Tesseract;
class TesseractCubeCombiner; class TesseractCubeCombiner;
typedef void (Tesseract::*WordRecognizer)(BLOCK* block,
ROW *row,
WERD_RES *word);
// A collection of various variables for statistics and debugging. // A collection of various variables for statistics and debugging.
struct TesseractStats { struct TesseractStats {
TesseractStats() TesseractStats()
@ -136,6 +132,24 @@ struct TesseractStats {
bool write_results_empty_block; bool write_results_empty_block;
}; };
// Struct to hold all the pointers to relevant data for processing a word.
struct WordData {
WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {}
explicit WordData(const PAGE_RES_IT& page_res_it)
: word(page_res_it.word()), row(page_res_it.row()->row),
block(page_res_it.block()->block), prev_word(NULL) {}
WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)
: word(word_res), row(row_in), block(block_in), prev_word(NULL) {}
WERD_RES* word;
ROW* row;
BLOCK* block;
WordData* prev_word;
GenericVector<WERD_RES> lang_words;
};
typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
class Tesseract : public Wordrec { class Tesseract : public Wordrec {
public: public:
Tesseract(); Tesseract();
@ -250,10 +264,23 @@ class Tesseract : public Wordrec {
bool single_column, bool osd, bool only_osd, bool single_column, bool osd, bool only_osd,
BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix); TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
// par_control.cpp
void PrerecAllWordsPar(const GenericVector<WordData>& words);
//// control.h ///////////////////////////////////////////////////////// //// control.h /////////////////////////////////////////////////////////
bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
const char* word_config, int pass); const char* word_config, int pass);
// Sets up the words ready for whichever engine is to be run
void SetupAllWordsPassN(int pass_n,
const TBOX* target_word_box,
const char* word_config,
PAGE_RES* page_res,
GenericVector<WordData>* words);
// Sets up the single word ready for whichever engine is to be run.
void SetupWordPassN(int pass_n, WordData* word);
// Runs word recognition on all the words.
bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
GenericVector<WordData>* words);
bool recog_all_words(PAGE_RES* page_res, bool recog_all_words(PAGE_RES* page_res,
ETEXT_DESC* monitor, ETEXT_DESC* monitor,
const TBOX* target_word_box, const TBOX* target_word_box,
@ -265,13 +292,15 @@ class Tesseract : public Wordrec {
const char* word_config); const char* word_config);
void bigram_correction_pass(PAGE_RES *page_res); void bigram_correction_pass(PAGE_RES *page_res);
void blamer_pass(PAGE_RES* page_res); void blamer_pass(PAGE_RES* page_res);
// Sets script positions and detects smallcaps on all output words.
void script_pos_pass(PAGE_RES* page_res);
// Helper to recognize the word using the given (language-specific) tesseract. // Helper to recognize the word using the given (language-specific) tesseract.
// Returns true if the result was better than previously. // Returns true if the result was better than previously.
bool RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row, bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
WordRecognizer recognizer); WERD_RES* word, WordRecognizer recognizer);
void classify_word_and_language(WordRecognizer recognizer, void classify_word_and_language(WordRecognizer recognizer,
BLOCK* block, ROW *row, WERD_RES *word); WordData* word_data);
void classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word); void classify_word_pass1(WordData* word_data, WERD_RES* word);
void recog_pseudo_word(PAGE_RES* page_res, // blocks to check void recog_pseudo_word(PAGE_RES* page_res, // blocks to check
TBOX &selection_box); TBOX &selection_box);
@ -282,7 +311,7 @@ class Tesseract : public Wordrec {
const char *s, const char *s,
const char *lengths); const char *lengths);
void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block); void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
void classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word); void classify_word_pass2(WordData* word_data, WERD_RES* word);
void ReportXhtFixResult(bool accept_new_word, float new_x_ht, void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
WERD_RES* word, WERD_RES* new_word); WERD_RES* word, WERD_RES* new_word);
bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row); bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
@ -936,6 +965,7 @@ class Tesseract : public Wordrec {
"Only initialize with the config file. Useful if the instance is " "Only initialize with the config file. Useful if the instance is "
"not going to be used for OCR but say only for layout analysis."); "not going to be used for OCR but say only for layout analysis.");
BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector"); BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
// The following parameters were deprecated and removed from their original // The following parameters were deprecated and removed from their original
// locations. The parameters are temporarily kept here to give Tesseract // locations. The parameters are temporarily kept here to give Tesseract

View File

@ -741,19 +741,36 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) {
// DENORMs in the blobs. // DENORMs in the blobs.
void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
bool inverse, float x_height, bool numeric_mode, bool inverse, float x_height, bool numeric_mode,
tesseract::OcrEngineMode hint,
const TBOX* norm_box,
DENORM* word_denorm) { DENORM* word_denorm) {
TBOX word_box = bounding_box(); TBOX word_box = bounding_box();
if (norm_box != NULL) word_box = *norm_box;
float word_middle = (word_box.left() + word_box.right()) / 2.0f; float word_middle = (word_box.left() + word_box.right()) / 2.0f;
float input_y_offset = 0.0f;
float final_y_offset = static_cast<float>(kBlnBaselineOffset);
float scale = kBlnXHeight / x_height;
if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) {
word_middle = word_box.left();
input_y_offset = word_box.bottom();
final_y_offset = 0.0f;
if (hint == tesseract::OEM_CUBE_ONLY)
scale = 1.0f;
} else {
input_y_offset = row->base_line(word_middle);
}
for (int b = 0; b < blobs.size(); ++b) { for (int b = 0; b < blobs.size(); ++b) {
TBLOB* blob = blobs[b]; TBLOB* blob = blobs[b];
TBOX blob_box = blob->bounding_box(); TBOX blob_box = blob->bounding_box();
float mid_x = (blob_box.left() + blob_box.right()) / 2.0f; float mid_x = (blob_box.left() + blob_box.right()) / 2.0f;
float baseline = row->base_line(mid_x); float baseline = input_y_offset;
float scale = kBlnXHeight / x_height; float blob_scale = scale;
if (numeric_mode) { if (numeric_mode) {
baseline = blob_box.bottom(); baseline = blob_box.bottom();
scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
scale, scale * 1.5f); scale, scale * 1.5f);
} else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) {
baseline = row->base_line(mid_x);
} }
// The image will be 8-bit grey if the input was grey or color. Note that in // The image will be 8-bit grey if the input was grey or color. Note that in
// a grey image 0 is black and 255 is white. If the input was binary, then // a grey image 0 is black and 255 is white. If the input was binary, then
@ -761,16 +778,13 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
// To tell the difference pixGetDepth() will return 8 or 1. // To tell the difference pixGetDepth() will return 8 or 1.
// The inverse flag will be true iff the word has been determined to be // The inverse flag will be true iff the word has been determined to be
// white on black, and is independent of whether the pix is 8 bit or 1 bit. // white on black, and is independent of whether the pix is 8 bit or 1 bit.
blob->Normalize(block, NULL, NULL, word_middle, baseline, scale, scale, blob->Normalize(block, NULL, NULL, word_middle, baseline, blob_scale,
0.0f, static_cast<float>(kBlnBaselineOffset), blob_scale, 0.0f, final_y_offset, inverse, pix);
inverse, pix);
} }
if (word_denorm != NULL) { if (word_denorm != NULL) {
float scale = kBlnXHeight / x_height;
word_denorm->SetupNormalization(block, NULL, NULL, word_middle, word_denorm->SetupNormalization(block, NULL, NULL, word_middle,
row->base_line(word_middle), input_y_offset, scale, scale,
scale, scale, 0.0f, 0.0f, final_y_offset);
static_cast<float>(kBlnBaselineOffset));
word_denorm->set_inverse(inverse); word_denorm->set_inverse(inverse);
word_denorm->set_pix(pix); word_denorm->set_pix(pix);
} }

View File

@ -31,6 +31,7 @@
----------------------------------------------------------------------*/ ----------------------------------------------------------------------*/
#include "clst.h" #include "clst.h"
#include "normalis.h" #include "normalis.h"
#include "publictypes.h"
#include "rect.h" #include "rect.h"
#include "vecfuncs.h" #include "vecfuncs.h"
@ -316,7 +317,10 @@ struct TWERD {
// Baseline normalizes the blobs in-place, recording the normalization in the // Baseline normalizes the blobs in-place, recording the normalization in the
// DENORMs in the blobs. // DENORMs in the blobs.
void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse, void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
float x_height, bool numeric_mode, DENORM* word_denorm); float x_height, bool numeric_mode,
tesseract::OcrEngineMode hint,
const TBOX* norm_box,
DENORM* word_denorm);
// Copies the data and the blobs, but leaves next untouched. // Copies the data and the blobs, but leaves next untouched.
void CopyFrom(const TWERD& src); void CopyFrom(const TWERD& src);
// Deletes owned data. // Deletes owned data.

View File

@ -32,6 +32,8 @@ static const double kStopperAmbiguityThresholdGain = 8.0;
// Constant offset for computing thresholds that determine the ambiguity of a // Constant offset for computing thresholds that determine the ambiguity of a
// word. // word.
static const double kStopperAmbiguityThresholdOffset = 1.5; static const double kStopperAmbiguityThresholdOffset = 1.5;
// Max number of broken pieces to associate.
const int kWordrecMaxNumJoinChunks = 4;
// Computes and returns a threshold of certainty difference used to determine // Computes and returns a threshold of certainty difference used to determine
// which words to keep, based on the adjustment factors of the two words. // which words to keep, based on the adjustment factors of the two words.
@ -245,16 +247,25 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES& source) {
// If allow_detailed_fx is true, the feature extractor will receive fine // If allow_detailed_fx is true, the feature extractor will receive fine
// precision outline information, allowing smoother features and better // precision outline information, allowing smoother features and better
// features on low resolution images. // features on low resolution images.
// The norm_mode_hint sets the default mode for normalization in absence
// of any of the above flags.
// norm_box is used to override the word bounding box to determine the
// normalization scale and offset.
// Returns false if the word is empty and sets up fake results. // Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in, bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tess, Pix* pix, tesseract::Tesseract* tess, Pix* pix,
int norm_mode,
const TBOX* norm_box,
bool numeric_mode, bool numeric_mode,
bool use_body_size, bool use_body_size,
bool allow_detailed_fx, bool allow_detailed_fx,
ROW *row, BLOCK* block) { ROW *row, const BLOCK* block) {
tesseract::OcrEngineMode norm_mode_hint =
static_cast<tesseract::OcrEngineMode>(norm_mode);
tesseract = tess; tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) { if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
// Empty words occur when all the blobs have been moved to the rej_blobs // Empty words occur when all the blobs have been moved to the rej_blobs
// list, which seems to occur frequently in junk. // list, which seems to occur frequently in junk.
SetupFake(unicharset_in); SetupFake(unicharset_in);
@ -264,13 +275,17 @@ bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
ClearResults(); ClearResults();
SetupWordScript(unicharset_in); SetupWordScript(unicharset_in);
chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word); chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
float word_xheight = use_body_size && row->body_size() > 0.0f float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
? row->body_size() : x_height; ? row->body_size() : x_height;
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
word_xheight, numeric_mode, &denorm); word_xheight, numeric_mode, norm_mode_hint,
norm_box, &denorm);
blob_row = row; blob_row = row;
SetupBasicsFromChoppedWord(unicharset_in); SetupBasicsFromChoppedWord(unicharset_in);
SetupBlamerBundle(); SetupBlamerBundle();
int num_blobs = chopped_word->NumBlobs();
ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
tess_failed = false;
return true; return true;
} }
@ -284,30 +299,6 @@ void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) {
ClearWordChoices(); ClearWordChoices();
} }
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
// Returns false if the word is empty and sets up fake results.
bool WERD_RES::SetupForCubeRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tess,
const BLOCK* block) {
tesseract = tess;
POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
if (pb != NULL && !pb->IsText()) {
// Ignore words in graphic regions.
SetupFake(unicharset_in);
word->set_flag(W_REP_CHAR, false);
return false;
}
ClearResults();
SetupWordScript(unicharset_in);
TBOX word_box = word->bounding_box();
denorm.SetupNormalization(block, NULL, NULL,
word_box.left(), word_box.bottom(),
1.0f, 1.0f, 0.0f, 0.0f);
SetupBlamerBundle();
return true;
}
// Sets up the members used in recognition for an empty recognition result: // Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) { void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {

View File

@ -339,7 +339,8 @@ class WERD_RES : public ELIST_LINK {
// characters purely based on their shape on the page, and by default produce // characters purely based on their shape on the page, and by default produce
// the corresponding unicode for a left-to-right context. // the corresponding unicode for a left-to-right context.
const char* const BestUTF8(int blob_index, bool in_rtl_context) const { const char* const BestUTF8(int blob_index, bool in_rtl_context) const {
if (blob_index < 0 || blob_index >= best_choice->length()) if (blob_index < 0 || best_choice == NULL ||
blob_index >= best_choice->length())
return NULL; return NULL;
UNICHAR_ID id = best_choice->unichar_id(blob_index); UNICHAR_ID id = best_choice->unichar_id(blob_index);
if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
@ -435,25 +436,22 @@ class WERD_RES : public ELIST_LINK {
// If allow_detailed_fx is true, the feature extractor will receive fine // If allow_detailed_fx is true, the feature extractor will receive fine
// precision outline information, allowing smoother features and better // precision outline information, allowing smoother features and better
// features on low resolution images. // features on low resolution images.
// The norm_mode sets the default mode for normalization in absence
// of any of the above flags. It should really be a tesseract::OcrEngineMode
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
// Returns false if the word is empty and sets up fake results. // Returns false if the word is empty and sets up fake results.
bool SetupForTessRecognition(const UNICHARSET& unicharset_in, bool SetupForRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract, Pix* pix, tesseract::Tesseract* tesseract, Pix* pix,
bool numeric_mode, bool use_body_size, int norm_mode,
bool allow_detailed_fx, const TBOX* norm_box, bool numeric_mode,
ROW *row, BLOCK* block); bool use_body_size, bool allow_detailed_fx,
ROW *row, const BLOCK* block);
// Set up the seam array, bln_boxes, best_choice, and raw_choice to empty // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
// accumulators from a made chopped word. We presume the fields are already // accumulators from a made chopped word. We presume the fields are already
// empty. // empty.
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in); void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in);
// Sets up the members used in recognition:
// bln_boxes, chopped_word, seam_array, denorm.
// Returns false if the word is empty and sets up fake results.
bool SetupForCubeRecognition(const UNICHARSET& unicharset_in,
tesseract::Tesseract* tesseract,
const BLOCK* block);
// Sets up the members used in recognition for an empty recognition result: // Sets up the members used in recognition for an empty recognition result:
// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
void SetupFake(const UNICHARSET& uch); void SetupFake(const UNICHARSET& uch);

View File

@ -530,8 +530,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) {
// Initialize to normal. // Initialize to normal.
for (int i = 0; i < length_; ++i) for (int i = 0; i < length_; ++i)
script_pos_[i] = tesseract::SP_NORMAL; script_pos_[i] = tesseract::SP_NORMAL;
if (word->blobs.empty()) if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
return; return;
}
int position_counts[4]; int position_counts[4];
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {

View File

@ -122,8 +122,6 @@ struct PROTO_KEY {
#define MarginalMatch(Rating) \ #define MarginalMatch(Rating) \
((Rating) > matcher_great_threshold) ((Rating) > matcher_great_threshold)
#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
Private Function Prototypes Private Function Prototypes
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
@ -179,8 +177,7 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
ADAPT_RESULTS *Results = new ADAPT_RESULTS(); ADAPT_RESULTS *Results = new ADAPT_RESULTS();
Results->Initialize(); Results->Initialize();
if (AdaptedTemplates == NULL) ASSERT_HOST(AdaptedTemplates != NULL);
AdaptedTemplates = NewAdaptedTemplates (true);
DoAdaptiveMatch(Blob, Results); DoAdaptiveMatch(Blob, Results);
if (CPResults != NULL) if (CPResults != NULL)
@ -207,7 +204,6 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
DebugAdaptiveClassifier(Blob, Results); DebugAdaptiveClassifier(Blob, Results);
#endif #endif
NumClassesOutput += Choices->length();
delete Results; delete Results;
} /* AdaptiveClassifier */ } /* AdaptiveClassifier */
@ -249,7 +245,6 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
if (!EnableLearning || word->best_choice == NULL) if (!EnableLearning || word->best_choice == NULL)
return; // Can't or won't adapt. return; // Can't or won't adapt.
NumWordsAdaptedTo++;
if (classify_learning_debug_level >= 1) if (classify_learning_debug_level >= 1)
tprintf("\n\nAdapting to word = %s\n", tprintf("\n\nAdapting to word = %s\n",
word->best_choice->debug_string().string()); word->best_choice->debug_string().string());
@ -480,15 +475,11 @@ void Classify::EndAdaptiveClassifier() {
FreeNormProtos(); FreeNormProtos();
if (AllProtosOn != NULL) { if (AllProtosOn != NULL) {
FreeBitVector(AllProtosOn); FreeBitVector(AllProtosOn);
FreeBitVector(PrunedProtos);
FreeBitVector(AllConfigsOn); FreeBitVector(AllConfigsOn);
FreeBitVector(AllProtosOff);
FreeBitVector(AllConfigsOff); FreeBitVector(AllConfigsOff);
FreeBitVector(TempProtoMask); FreeBitVector(TempProtoMask);
AllProtosOn = NULL; AllProtosOn = NULL;
PrunedProtos = NULL;
AllConfigsOn = NULL; AllConfigsOn = NULL;
AllProtosOff = NULL;
AllConfigsOff = NULL; AllConfigsOff = NULL;
TempProtoMask = NULL; TempProtoMask = NULL;
} }
@ -561,19 +552,15 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
static_classifier_ = new TessClassifier(false, this); static_classifier_ = new TessClassifier(false, this);
} }
im_.Init(&classify_debug_level, classify_integer_matcher_multiplier); im_.Init(&classify_debug_level);
InitIntegerFX(); InitIntegerFX();
AllProtosOn = NewBitVector(MAX_NUM_PROTOS); AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS); AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS); AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
TempProtoMask = NewBitVector(MAX_NUM_PROTOS); TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS)); set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS)); set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
for (int i = 0; i < MAX_NUM_CLASSES; i++) { for (int i = 0; i < MAX_NUM_CLASSES; i++) {
@ -617,53 +604,11 @@ void Classify::ResetAdaptiveClassifierInternal() {
NumAdaptationsFailed); NumAdaptationsFailed);
} }
free_adapted_templates(AdaptedTemplates); free_adapted_templates(AdaptedTemplates);
AdaptedTemplates = NULL; AdaptedTemplates = NewAdaptedTemplates(true);
NumAdaptationsFailed = 0; NumAdaptationsFailed = 0;
} }
/*---------------------------------------------------------------------------*/
/**
* Print to File the statistics which have
* been gathered for the adaptive matcher.
*
* @param File open text file to print adaptive statistics to
*
* Globals: none
*
* @note Exceptions: none
* @note History: Thu Apr 18 14:37:37 1991, DSJ, Created.
*/
void Classify::PrintAdaptiveStatistics(FILE *File) {
#ifndef SECURE_NAMES
fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
NumClassesOutput,
((AdaptiveMatcherCalls == 0) ? (0.0) :
((float) NumClassesOutput / AdaptiveMatcherCalls)));
fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
BaselineClassifierCalls,
((BaselineClassifierCalls == 0) ? (0.0) :
((float) NumBaselineClassesTried / BaselineClassifierCalls)));
fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
CharNormClassifierCalls,
((CharNormClassifierCalls == 0) ? (0.0) :
((float) NumCharNormClassesTried / CharNormClassifierCalls)));
fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
AmbigClassifierCalls,
((AmbigClassifierCalls == 0) ? (0.0) :
((float) NumAmbigClassesTried / AmbigClassifierCalls)));
fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
PrintAdaptedTemplates(File, AdaptedTemplates);
#endif
} /* PrintAdaptiveStatistics */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/** /**
@ -915,8 +860,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
FEATURE_SET FloatFeatures; FEATURE_SET FloatFeatures;
int NewTempConfigId; int NewTempConfigId;
ResetFeaturesHaveBeenExtracted();
NumCharsAdaptedTo++;
if (!LegalClassId (ClassId)) if (!LegalClassId (ClassId))
return; return;
@ -932,7 +875,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
if (NumFeatures <= 0) if (NumFeatures <= 0)
return; return;
im_.SetBaseLineMatch();
// Only match configs with the matching font. // Only match configs with the matching font.
BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS); BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) { for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
@ -1004,17 +946,16 @@ void Classify::AdaptToChar(TBLOB *Blob,
void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
int bloblength = 0; INT_FX_RESULT_STRUCT fx_info;
INT_FEATURE_ARRAY features; GenericVector<INT_FEATURE_STRUCT> bl_features;
uinT8* norm_array = new uinT8[unicharset.size()]; TrainingSample* sample =
int num_features = GetBaselineFeatures(blob, PreTrainedTemplates, BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info,
features, &bl_features);
norm_array, &bloblength); if (sample == NULL) return;
delete [] norm_array;
INT_RESULT_STRUCT IntResult;
INT_RESULT_STRUCT IntResult;
im_.Match(int_class, AllProtosOn, AllConfigsOn, im_.Match(int_class, AllProtosOn, AllConfigsOn,
num_features, features, bl_features.size(), &bl_features[0],
&IntResult, classify_adapt_feature_threshold, &IntResult, classify_adapt_feature_threshold,
NO_DEBUG, matcher_debug_separate_windows); NO_DEBUG, matcher_debug_separate_windows);
cprintf ("Best match to temp config %d = %4.1f%%.\n", cprintf ("Best match to temp config %d = %4.1f%%.\n",
@ -1024,7 +965,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
ConfigMask = 1 << IntResult.Config; ConfigMask = 1 << IntResult.Config;
ShowMatchDisplay(); ShowMatchDisplay();
im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask, im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
num_features, features, bl_features.size(), &bl_features[0],
&IntResult, classify_adapt_feature_threshold, &IntResult, classify_adapt_feature_threshold,
6 | 0x19, matcher_debug_separate_windows); 6 | 0x19, matcher_debug_separate_windows);
UpdateMatchDisplay(); UpdateMatchDisplay();
@ -1033,50 +974,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
} }
/*---------------------------------------------------------------------------*/
/**
* @param Blob blob to add to templates for ClassId
* @param ClassId class to add blob to
* @param FontinfoId font information from pre-trained teamples
* @param Threshold minimum match rating to existing template
*
* Globals:
* - PreTrainedTemplates current set of built-in templates
*
* @note Exceptions: none
* @note History: Thu Mar 14 09:36:03 1991, DSJ, Created.
*/
void Classify::AdaptToPunc(TBLOB *Blob,
CLASS_ID ClassId,
int FontinfoId,
FLOAT32 Threshold) {
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
int i;
Results->Initialize();
CharNormClassifier(Blob, PreTrainedTemplates, Results);
RemoveBadMatches(Results);
if (Results->NumMatches != 1) {
if (classify_learning_debug_level >= 1) {
cprintf ("Rejecting punc = %s (Alternatives = ",
unicharset.id_to_unichar(ClassId));
for (i = 0; i < Results->NumMatches; i++)
tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id));
tprintf(")\n");
}
} else {
#ifndef SECURE_NAMES
if (classify_learning_debug_level >= 1)
cprintf ("Adapting to punc = %s, thr= %g\n",
unicharset.id_to_unichar(ClassId), Threshold);
#endif
AdaptToChar(Blob, ClassId, FontinfoId, Threshold);
}
delete Results;
} /* AdaptToPunc */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/** /**
@ -1167,50 +1064,41 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
* @note Exceptions: none * @note Exceptions: none
* @note History: Tue Mar 12 19:40:36 1991, DSJ, Created. * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
*/ */
void Classify::AmbigClassifier(TBLOB *Blob, void Classify::AmbigClassifier(
INT_TEMPLATES Templates, const GenericVector<INT_FEATURE_STRUCT>& int_features,
ADAPT_CLASS *Classes, const INT_FX_RESULT_STRUCT& fx_info,
UNICHAR_ID *Ambiguities, const TBLOB *blob,
ADAPT_RESULTS *Results) { INT_TEMPLATES templates,
int NumFeatures; ADAPT_CLASS *classes,
INT_FEATURE_ARRAY IntFeatures; UNICHAR_ID *ambiguities,
ADAPT_RESULTS *results) {
if (int_features.empty()) return;
uinT8* CharNormArray = new uinT8[unicharset.size()]; uinT8* CharNormArray = new uinT8[unicharset.size()];
INT_RESULT_STRUCT IntResult; INT_RESULT_STRUCT IntResult;
CLASS_ID ClassId;
AmbigClassifierCalls++;
NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures,
NULL, CharNormArray,
&(Results->BlobLength));
if (NumFeatures <= 0) {
delete [] CharNormArray;
return;
}
results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
CharNormArray);
bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
if (debug) if (debug)
tprintf("AM Matches = "); tprintf("AM Matches = ");
int top = Blob->bounding_box().top(); int top = blob->bounding_box().top();
int bottom = Blob->bounding_box().bottom(); int bottom = blob->bounding_box().bottom();
while (*Ambiguities >= 0) { while (*ambiguities >= 0) {
ClassId = *Ambiguities; CLASS_ID class_id = *ambiguities;
im_.SetCharNormMatch(classify_integer_matcher_multiplier); im_.Match(ClassForClassId(templates, class_id),
im_.Match(ClassForClassId(Templates, ClassId),
AllProtosOn, AllConfigsOn, AllProtosOn, AllConfigsOn,
NumFeatures, IntFeatures, int_features.size(), &int_features[0],
&IntResult, &IntResult,
classify_adapt_feature_threshold, NO_DEBUG, classify_adapt_feature_threshold, NO_DEBUG,
matcher_debug_separate_windows); matcher_debug_separate_windows);
ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0, ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
Results->BlobLength, CharNormArray, results->BlobLength,
IntResult, Results); classify_integer_matcher_multiplier,
Ambiguities++; CharNormArray, IntResult, results);
ambiguities++;
NumAmbigClassesTried++;
} }
delete [] CharNormArray; delete [] CharNormArray;
} /* AmbigClassifier */ } /* AmbigClassifier */
@ -1225,6 +1113,7 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
ADAPT_CLASS* classes, ADAPT_CLASS* classes,
int debug, int debug,
int num_classes, int num_classes,
int matcher_multiplier,
const TBOX& blob_box, const TBOX& blob_box,
CLASS_PRUNER_RESULTS results, CLASS_PRUNER_RESULTS results,
ADAPT_RESULTS* final_results) { ADAPT_RESULTS* final_results) {
@ -1246,7 +1135,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top, ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
results[c].Rating, results[c].Rating,
final_results->BlobLength, norm_factors, final_results->BlobLength,
matcher_multiplier, norm_factors,
int_result, final_results); int_result, final_results);
} }
} }
@ -1258,7 +1148,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
// The results are added to the final_results output. // The results are added to the final_results output.
void Classify::ExpandShapesAndApplyCorrections( void Classify::ExpandShapesAndApplyCorrections(
ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top, ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
float cp_rating, int blob_length, const uinT8* cn_factors, float cp_rating, int blob_length, int matcher_multiplier,
const uinT8* cn_factors,
INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) { INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) {
// Compute the fontinfo_ids. // Compute the fontinfo_ids.
int fontinfo_id = kBlankFontinfoId; int fontinfo_id = kBlankFontinfoId;
@ -1292,7 +1183,7 @@ void Classify::ExpandShapesAndApplyCorrections(
int_result.Rating, int_result.Rating,
int_result.FeatureMisses, int_result.FeatureMisses,
bottom, top, blob_length, bottom, top, blob_length,
cn_factors); matcher_multiplier, cn_factors);
if (c == 0 || rating < min_rating) if (c == 0 || rating < min_rating)
min_rating = rating; min_rating = rating;
if (unicharset.get_enabled(unichar_id)) { if (unicharset.get_enabled(unichar_id)) {
@ -1309,7 +1200,7 @@ void Classify::ExpandShapesAndApplyCorrections(
int_result.Rating, int_result.Rating,
int_result.FeatureMisses, int_result.FeatureMisses,
bottom, top, blob_length, bottom, top, blob_length,
cn_factors); matcher_multiplier, cn_factors);
if (unicharset.get_enabled(class_id)) { if (unicharset.get_enabled(class_id)) {
AddNewResult(final_results, class_id, -1, rating, AddNewResult(final_results, class_id, -1, rating,
classes != NULL, int_result.Config, classes != NULL, int_result.Config,
@ -1325,11 +1216,12 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
double cp_rating, double im_rating, double cp_rating, double im_rating,
int feature_misses, int feature_misses,
int bottom, int top, int bottom, int top,
int blob_length, int blob_length, int matcher_multiplier,
const uinT8* cn_factors) { const uinT8* cn_factors) {
// Compute class feature corrections. // Compute class feature corrections.
double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length, double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
cn_factors[unichar_id]); cn_factors[unichar_id],
matcher_multiplier);
double miss_penalty = tessedit_class_miss_scale * feature_misses; double miss_penalty = tessedit_class_miss_scale * feature_misses;
double vertical_penalty = 0.0; double vertical_penalty = 0.0;
// Penalize non-alnums for being vertical misfits. // Penalize non-alnums for being vertical misfits.
@ -1383,39 +1275,30 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
* @note Exceptions: none * @note Exceptions: none
* @note History: Tue Mar 12 19:38:03 1991, DSJ, Created. * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created.
*/ */
UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, UNICHAR_ID *Classify::BaselineClassifier(
ADAPT_TEMPLATES Templates, TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
ADAPT_RESULTS *Results) { const INT_FX_RESULT_STRUCT& fx_info,
int NumFeatures; ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
if (int_features.empty()) return NULL;
int NumClasses; int NumClasses;
INT_FEATURE_ARRAY IntFeatures;
uinT8* CharNormArray = new uinT8[unicharset.size()]; uinT8* CharNormArray = new uinT8[unicharset.size()];
CLASS_ID ClassId; ClearCharNormArray(CharNormArray);
BaselineClassifierCalls++; Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
NumClasses = PruneClasses(Templates->Templates, int_features.size(),
NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures, &int_features[0],
CharNormArray, &Results->BlobLength);
if (NumFeatures <= 0) {
delete [] CharNormArray;
return NULL;
}
NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures,
CharNormArray, BaselineCutoffs, Results->CPResults); CharNormArray, BaselineCutoffs, Results->CPResults);
NumBaselineClassesTried += NumClasses;
if (matcher_debug_level >= 2 || classify_debug_level > 1) if (matcher_debug_level >= 2 || classify_debug_level > 1)
cprintf ("BL Matches = "); cprintf ("BL Matches = ");
im_.SetBaseLineMatch(); MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray, CharNormArray,
Templates->Class, matcher_debug_flags, NumClasses, Templates->Class, matcher_debug_flags, NumClasses, 0,
Blob->bounding_box(), Results->CPResults, Results); Blob->bounding_box(), Results->CPResults, Results);
delete [] CharNormArray; delete [] CharNormArray;
ClassId = Results->best_match.unichar_id; CLASS_ID ClassId = Results->best_match.unichar_id;
if (ClassId == NO_CLASS) if (ClassId == NO_CLASS)
return (NULL); return (NULL);
/* this is a bug - maybe should return "" */ /* this is a bug - maybe should return "" */
@ -1445,17 +1328,13 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
* @note History: Tue Mar 12 16:02:52 1991, DSJ, Created. * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created.
*/ */
int Classify::CharNormClassifier(TBLOB *blob, int Classify::CharNormClassifier(TBLOB *blob,
INT_TEMPLATES Templates, const TrainingSample& sample,
ADAPT_RESULTS *adapt_results) { ADAPT_RESULTS *adapt_results) {
CharNormClassifierCalls++;
TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
classify_nonlinear_norm);
if (sample == NULL) return 0;
// This is the length that is used for scaling ratings vs certainty. // This is the length that is used for scaling ratings vs certainty.
adapt_results->BlobLength = adapt_results->BlobLength =
IntCastRounded(sample->outline_length() / kStandardFeatureLength); IntCastRounded(sample.outline_length() / kStandardFeatureLength);
GenericVector<UnicharRating> unichar_results; GenericVector<UnicharRating> unichar_results;
static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0, static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
-1, &unichar_results); -1, &unichar_results);
// Convert results to the format used internally by AdaptiveClassifier. // Convert results to the format used internally by AdaptiveClassifier.
for (int r = 0; r < unichar_results.size(); ++r) { for (int r = 0; r < unichar_results.size(); ++r) {
@ -1468,9 +1347,7 @@ int Classify::CharNormClassifier(TBLOB *blob,
float rating = 1.0f - unichar_results[r].rating; float rating = 1.0f - unichar_results[r].rating;
AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2); AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2);
} }
int num_features = sample->num_features(); return sample.num_features();
delete sample;
return num_features;
} /* CharNormClassifier */ } /* CharNormClassifier */
// As CharNormClassifier, but operates on a TrainingSample and outputs to // As CharNormClassifier, but operates on a TrainingSample and outputs to
@ -1518,10 +1395,10 @@ int Classify::CharNormTrainingSample(bool pruner_only,
UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating)); UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
} }
} else { } else {
im_.SetCharNormMatch(classify_integer_matcher_multiplier);
MasterMatcher(PreTrainedTemplates, num_features, sample.features(), MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
char_norm_array, char_norm_array,
NULL, matcher_debug_flags, num_classes, NULL, matcher_debug_flags, num_classes,
classify_integer_matcher_multiplier,
blob_box, adapt_results->CPResults, adapt_results); blob_box, adapt_results->CPResults, adapt_results);
// Convert master matcher results to output format. // Convert master matcher results to output format.
for (int i = 0; i < adapt_results->NumMatches; i++) { for (int i = 0; i < adapt_results->NumMatches; i++) {
@ -1711,8 +1588,10 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
if (i == 0 || Results->match[i].rating < Results->best_match.rating) if (i == 0 || Results->match[i].rating < Results->best_match.rating)
Results->best_match = Results->match[i]; Results->best_match = Results->match[i];
} }
TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC, INT_FX_RESULT_STRUCT fx_info;
classify_nonlinear_norm); GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
if (sample == NULL) return; if (sample == NULL) return;
static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
Results->best_match.unichar_id); Results->best_match.unichar_id);
@ -1745,21 +1624,26 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
UNICHAR_ID *Ambiguities; UNICHAR_ID *Ambiguities;
AdaptiveMatcherCalls++; INT_FX_RESULT_STRUCT fx_info;
InitIntFX(); GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
&bl_features);
if (sample == NULL) return;
if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
tess_cn_matching) { tess_cn_matching) {
CharNormClassifier(Blob, PreTrainedTemplates, Results); CharNormClassifier(Blob, *sample, Results);
} else { } else {
Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results); Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
AdaptedTemplates, Results);
if ((Results->NumMatches > 0 && if ((Results->NumMatches > 0 &&
MarginalMatch (Results->best_match.rating) && MarginalMatch (Results->best_match.rating) &&
!tess_bn_matching) || !tess_bn_matching) ||
Results->NumMatches == 0) { Results->NumMatches == 0) {
CharNormClassifier(Blob, PreTrainedTemplates, Results); CharNormClassifier(Blob, *sample, Results);
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
AmbigClassifier(Blob, AmbigClassifier(bl_features, fx_info, Blob,
PreTrainedTemplates, PreTrainedTemplates,
AdaptedTemplates->Class, AdaptedTemplates->Class,
Ambiguities, Ambiguities,
@ -1773,6 +1657,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
// just adding a NULL classification. // just adding a NULL classification.
if (!Results->HasNonfragment || Results->NumMatches == 0) if (!Results->HasNonfragment || Results->NumMatches == 0)
ClassifyAsNoise(Results); ClassifyAsNoise(Results);
delete sample;
} /* DoAdaptiveMatch */ } /* DoAdaptiveMatch */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
@ -1799,8 +1684,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
int i; int i;
Results->Initialize(); Results->Initialize();
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
TrainingSample* sample =
BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
&bl_features);
if (sample == NULL) return NULL;
CharNormClassifier(Blob, PreTrainedTemplates, Results); CharNormClassifier(Blob, *sample, Results);
delete sample;
RemoveBadMatches(Results); RemoveBadMatches(Results);
qsort((void *)Results->match, Results->NumMatches, qsort((void *)Results->match, Results->NumMatches,
sizeof(ScoredClass), CompareByRating); sizeof(ScoredClass), CompareByRating);
@ -1823,58 +1715,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
return Ambiguities; return Ambiguities;
} /* GetAmbiguities */ } /* GetAmbiguities */
/*---------------------------------------------------------------------------*/
/**
* This routine calls the integer (Hardware) feature
* extractor if it has not been called before for this blob.
* The results from the feature extractor are placed into
* globals so that they can be used in other routines without
* re-extracting the features.
* It then copies the baseline features into the IntFeatures
* array provided by the caller.
*
* @param Blob blob to extract features from
* @param Templates used to compute char norm adjustments
* @param IntFeatures array to fill with integer features
* @param CharNormArray array to fill with dummy char norm adjustments
* @param BlobLength length of blob in baseline-normalized units
*
* Globals:
* - FeaturesHaveBeenExtracted TRUE if fx has been done
* - BaselineFeatures holds extracted baseline feat
* - CharNormFeatures holds extracted char norm feat
* - FXInfo holds misc. FX info
*
* @return Number of features extracted or 0 if an error occured.
* @note Exceptions: none
* @note History: Tue May 28 10:40:52 1991, DSJ, Created.
*/
int Classify::GetBaselineFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* CharNormArray,
inT32 *BlobLength) {
if (!FeaturesHaveBeenExtracted) {
FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
BaselineFeatures, CharNormFeatures, &FXInfo);
FeaturesHaveBeenExtracted = TRUE;
}
*BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
if (!FeaturesOK) {
return 0;
}
memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0]));
ClearCharNormArray(CharNormArray);
return FXInfo.NumBL;
} /* GetBaselineFeatures */
void Classify::ResetFeaturesHaveBeenExtracted() {
FeaturesHaveBeenExtracted = FALSE;
}
// Returns true if the given blob looks too dissimilar to any character // Returns true if the given blob looks too dissimilar to any character
// present in the classifier templates. // present in the classifier templates.
bool Classify::LooksLikeGarbage(TBLOB *blob) { bool Classify::LooksLikeGarbage(TBLOB *blob) {
@ -1921,48 +1761,28 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
* @param BlobLength length of blob in baseline-normalized units * @param BlobLength length of blob in baseline-normalized units
* *
* Globals: * Globals:
* - FeaturesHaveBeenExtracted TRUE if fx has been done
* - BaselineFeatures holds extracted baseline feat
* - CharNormFeatures holds extracted char norm feat
* - FXInfo holds misc. FX info
* *
* @return Number of features extracted or 0 if an error occured. * @return Number of features extracted or 0 if an error occured.
* @note Exceptions: none * @note Exceptions: none
* @note History: Tue May 28 10:40:52 1991, DSJ, Created. * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
*/ */
int Classify::GetCharNormFeatures(TBLOB *Blob, int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
INT_TEMPLATES Templates, INT_TEMPLATES templates,
INT_FEATURE_ARRAY IntFeatures, uinT8* pruner_norm_array,
uinT8* PrunerNormArray, uinT8* char_norm_array) {
uinT8* CharNormArray, FEATURE norm_feature = NewFeature(&CharNormDesc);
inT32 *BlobLength) { float baseline = kBlnBaselineOffset;
FEATURE NormFeature; float scale = MF_SCALE_FACTOR;
FLOAT32 Baseline, Scale; norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
norm_feature->Params[CharNormLength] =
if (!FeaturesHaveBeenExtracted) { fx_info.Length * scale / LENGTH_COMPRESSION;
FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm, norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
BaselineFeatures, CharNormFeatures, &FXInfo); norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
FeaturesHaveBeenExtracted = TRUE; // Deletes norm_feature.
} ComputeCharNormArrays(norm_feature, templates, char_norm_array,
pruner_norm_array);
*BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength); return IntCastRounded(fx_info.Length / kStandardFeatureLength);
if (!FeaturesOK) { } /* GetCharNormFeature */
return 0;
}
memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0]));
NormFeature = NewFeature(&CharNormDesc);
Baseline = kBlnBaselineOffset;
Scale = MF_SCALE_FACTOR;
NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
NormFeature->Params[CharNormLength] =
FXInfo.Length * Scale / LENGTH_COMPRESSION;
NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
return FXInfo.NumCN;
} /* GetCharNormFeatures */
// Computes the char_norm_array for the unicharset and, if not NULL, the // Computes the char_norm_array for the unicharset and, if not NULL, the
// pruner_array as appropriate according to the existence of the shape_table. // pruner_array as appropriate according to the existence of the shape_table.
@ -2454,7 +2274,6 @@ void Classify::ShowBestMatchFor(int shape_id,
} }
INT_RESULT_STRUCT cn_result; INT_RESULT_STRUCT cn_result;
classify_norm_method.set_value(character); classify_norm_method.set_value(character);
im_.SetCharNormMatch(classify_integer_matcher_multiplier);
im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
AllProtosOn, AllConfigsOn, AllProtosOn, AllConfigsOn,
num_features, features, &cn_result, num_features, features, &cn_result,

View File

@ -165,27 +165,13 @@ Classify::Classify()
AdaptedTemplates = NULL; AdaptedTemplates = NULL;
PreTrainedTemplates = NULL; PreTrainedTemplates = NULL;
AllProtosOn = NULL; AllProtosOn = NULL;
PrunedProtos = NULL;
AllConfigsOn = NULL; AllConfigsOn = NULL;
AllProtosOff = NULL;
AllConfigsOff = NULL; AllConfigsOff = NULL;
TempProtoMask = NULL; TempProtoMask = NULL;
NormProtos = NULL; NormProtos = NULL;
AdaptiveMatcherCalls = 0;
BaselineClassifierCalls = 0;
CharNormClassifierCalls = 0;
AmbigClassifierCalls = 0;
NumWordsAdaptedTo = 0;
NumCharsAdaptedTo = 0;
NumBaselineClassesTried = 0;
NumCharNormClassesTried = 0;
NumAmbigClassesTried = 0;
NumClassesOutput = 0;
NumAdaptationsFailed = 0; NumAdaptationsFailed = 0;
FeaturesHaveBeenExtracted = false;
FeaturesOK = true;
learn_debug_win_ = NULL; learn_debug_win_ = NULL;
learn_fragmented_word_debug_win_ = NULL; learn_fragmented_word_debug_win_ = NULL;
learn_fragments_debug_win_ = NULL; learn_fragments_debug_win_ = NULL;

View File

@ -145,15 +145,13 @@ class Classify : public CCStruct {
int FontinfoId, int FontinfoId,
ADAPT_CLASS Class, ADAPT_CLASS Class,
ADAPT_TEMPLATES Templates); ADAPT_TEMPLATES Templates);
void AdaptToPunc(TBLOB *Blob, void AmbigClassifier(const GenericVector<INT_FEATURE_STRUCT>& int_features,
CLASS_ID ClassId, const INT_FX_RESULT_STRUCT& fx_info,
int FontinfoId, const TBLOB *blob,
FLOAT32 Threshold); INT_TEMPLATES templates,
void AmbigClassifier(TBLOB *Blob, ADAPT_CLASS *classes,
INT_TEMPLATES Templates, UNICHAR_ID *ambiguities,
ADAPT_CLASS *Classes, ADAPT_RESULTS *results);
UNICHAR_ID *Ambiguities,
ADAPT_RESULTS *Results);
void MasterMatcher(INT_TEMPLATES templates, void MasterMatcher(INT_TEMPLATES templates,
inT16 num_features, inT16 num_features,
const INT_FEATURE_STRUCT* features, const INT_FEATURE_STRUCT* features,
@ -161,6 +159,7 @@ class Classify : public CCStruct {
ADAPT_CLASS* classes, ADAPT_CLASS* classes,
int debug, int debug,
int num_classes, int num_classes,
int matcher_multiplier,
const TBOX& blob_box, const TBOX& blob_box,
CLASS_PRUNER_RESULTS results, CLASS_PRUNER_RESULTS results,
ADAPT_RESULTS* final_results); ADAPT_RESULTS* final_results);
@ -175,6 +174,7 @@ class Classify : public CCStruct {
int bottom, int top, int bottom, int top,
float cp_rating, float cp_rating,
int blob_length, int blob_length,
int matcher_multiplier,
const uinT8* cn_factors, const uinT8* cn_factors,
INT_RESULT_STRUCT& int_result, INT_RESULT_STRUCT& int_result,
ADAPT_RESULTS* final_results); ADAPT_RESULTS* final_results);
@ -184,7 +184,8 @@ class Classify : public CCStruct {
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
double im_rating, int feature_misses, double im_rating, int feature_misses,
int bottom, int top, int bottom, int top,
int blob_length, const uinT8* cn_factors); int blob_length, int matcher_multiplier,
const uinT8* cn_factors);
void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
ADAPT_RESULTS *Results, ADAPT_RESULTS *Results,
BLOB_CHOICE_LIST *Choices); BLOB_CHOICE_LIST *Choices);
@ -246,12 +247,13 @@ class Classify : public CCStruct {
// Converts a shape_table_ index to a classifier class_id index (not a // Converts a shape_table_ index to a classifier class_id index (not a
// unichar-id!). Uses a search, so not fast. // unichar-id!). Uses a search, so not fast.
int ShapeIDToClassID(int shape_id) const; int ShapeIDToClassID(int shape_id) const;
UNICHAR_ID *BaselineClassifier(TBLOB *Blob, UNICHAR_ID *BaselineClassifier(
ADAPT_TEMPLATES Templates, TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
ADAPT_RESULTS *Results); const INT_FX_RESULT_STRUCT& fx_info,
int CharNormClassifier(TBLOB *Blob, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
INT_TEMPLATES Templates, int CharNormClassifier(TBLOB *blob,
ADAPT_RESULTS *Results); const TrainingSample& sample,
ADAPT_RESULTS *adapt_results);
// As CharNormClassifier, but operates on a TrainingSample and outputs to // As CharNormClassifier, but operates on a TrainingSample and outputs to
// a GenericVector of ShapeRating without conversion to classes. // a GenericVector of ShapeRating without conversion to classes.
@ -267,7 +269,6 @@ class Classify : public CCStruct {
void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
bool AdaptableWord(WERD_RES* word); bool AdaptableWord(WERD_RES* word);
void EndAdaptiveClassifier(); void EndAdaptiveClassifier();
void PrintAdaptiveStatistics(FILE *File);
void SettupPass1(); void SettupPass1();
void SettupPass2(); void SettupPass2();
void AdaptiveClassifier(TBLOB *Blob, void AdaptiveClassifier(TBLOB *Blob,
@ -276,17 +277,10 @@ class Classify : public CCStruct {
void ClassifyAsNoise(ADAPT_RESULTS *Results); void ClassifyAsNoise(ADAPT_RESULTS *Results);
void ResetAdaptiveClassifierInternal(); void ResetAdaptiveClassifierInternal();
int GetBaselineFeatures(TBLOB *Blob, int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
INT_TEMPLATES Templates, INT_TEMPLATES templates,
INT_FEATURE_ARRAY IntFeatures, uinT8* pruner_norm_array,
uinT8* CharNormArray, uinT8* char_norm_array);
inT32 *BlobLength);
int GetCharNormFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* PrunerNormArray,
uinT8* CharNormArray,
inT32 *BlobLength);
// Computes the char_norm_array for the unicharset and, if not NULL, the // Computes the char_norm_array for the unicharset and, if not NULL, the
// pruner_array as appropriate according to the existence of the shape_table. // pruner_array as appropriate according to the existence of the shape_table.
// The norm_feature is deleted as it is almost certainly no longer needed. // The norm_feature is deleted as it is almost certainly no longer needed.
@ -298,7 +292,6 @@ class Classify : public CCStruct {
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
void ResetFeaturesHaveBeenExtracted();
bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; } bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
bool LooksLikeGarbage(TBLOB *blob); bool LooksLikeGarbage(TBLOB *blob);
void RefreshDebugWindow(ScrollView **win, const char *msg, void RefreshDebugWindow(ScrollView **win, const char *msg,
@ -468,9 +461,7 @@ class Classify : public CCStruct {
// Create dummy proto and config masks for use with the built-in templates. // Create dummy proto and config masks for use with the built-in templates.
BIT_VECTOR AllProtosOn; BIT_VECTOR AllProtosOn;
BIT_VECTOR PrunedProtos;
BIT_VECTOR AllConfigsOn; BIT_VECTOR AllConfigsOn;
BIT_VECTOR AllProtosOff;
BIT_VECTOR AllConfigsOff; BIT_VECTOR AllConfigsOff;
BIT_VECTOR TempProtoMask; BIT_VECTOR TempProtoMask;
bool EnableLearning; bool EnableLearning;
@ -504,34 +495,13 @@ class Classify : public CCStruct {
ShapeTable* shape_table_; ShapeTable* shape_table_;
private: private:
Dict dict_; Dict dict_;
// The currently active static classifier. // The currently active static classifier.
ShapeClassifier* static_classifier_; ShapeClassifier* static_classifier_;
/* variables used to hold performance statistics */ /* variables used to hold performance statistics */
int AdaptiveMatcherCalls;
int BaselineClassifierCalls;
int CharNormClassifierCalls;
int AmbigClassifierCalls;
int NumWordsAdaptedTo;
int NumCharsAdaptedTo;
int NumBaselineClassesTried;
int NumCharNormClassesTried;
int NumAmbigClassesTried;
int NumClassesOutput;
int NumAdaptationsFailed; int NumAdaptationsFailed;
/* variables used to hold onto extracted features. This is used
to map from the old scheme in which baseline features and char norm
features are extracted separately, to the new scheme in which they
are extracted at the same time. */
bool FeaturesHaveBeenExtracted;
bool FeaturesOK;
INT_FEATURE_ARRAY BaselineFeatures;
INT_FEATURE_ARRAY CharNormFeatures;
INT_FX_RESULT_STRUCT FXInfo;
// Expected number of features in the class pruner, used to penalize // Expected number of features in the class pruner, used to penalize
// unknowns that have too few features (like a c being classified as e) so // unknowns that have too few features (like a c being classified as e) so
// it doesn't recognize everything as '@' or '#'. // it doesn't recognize everything as '@' or '#'.

View File

@ -78,32 +78,20 @@ namespace tesseract {
// TODO(rays) BlobToTrainingSample must remain a global function until // TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob // the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify. // made a member of Classify.
TrainingSample* BlobToTrainingSample(const TBLOB& blob, TrainingSample* BlobToTrainingSample(
tesseract::NormalizationMode mode, const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
bool nonlinear_norm) { GenericVector<INT_FEATURE_STRUCT>* bl_features) {
INT_FX_RESULT_STRUCT fx_info;
GenericVector<INT_FEATURE_STRUCT> bl_features;
GenericVector<INT_FEATURE_STRUCT> cn_features; GenericVector<INT_FEATURE_STRUCT> cn_features;
Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features, Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
&cn_features, &fx_info, NULL); &cn_features, fx_info, NULL);
// TODO(rays) Use blob->PreciseBoundingBox() instead. // TODO(rays) Use blob->PreciseBoundingBox() instead.
TBOX box = blob.bounding_box(); TBOX box = blob.bounding_box();
TrainingSample* sample = NULL; TrainingSample* sample = NULL;
if (mode == tesseract::NM_CHAR_ANISOTROPIC) { int num_features = fx_info->NumCN;
int num_features = fx_info.NumCN;
if (num_features > 0) { if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(fx_info, box, &cn_features[0], sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
num_features); num_features);
} }
} else if (mode == tesseract::NM_BASELINE) {
int num_features = fx_info.NumBL;
if (num_features > 0) {
sample = TrainingSample::CopyFromFeatures(fx_info, box, &bl_features[0],
num_features);
}
} else {
ASSERT_HOST(!"Unsupported normalization mode!");
}
if (sample != NULL) { if (sample != NULL) {
// Set the bounding box (in original image coordinates) in the sample. // Set the bounding box (in original image coordinates) in the sample.
TPOINT topleft, botright; TPOINT topleft, botright;

View File

@ -60,9 +60,9 @@ namespace tesseract {
// TODO(rays) BlobToTrainingSample must remain a global function until // TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob // the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify. // made a member of Classify.
TrainingSample* BlobToTrainingSample(const TBLOB& blob, TrainingSample* BlobToTrainingSample(
tesseract::NormalizationMode mode, const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
bool nonlinear_norm); GenericVector<INT_FEATURE_STRUCT>* bl_features);
} }
// Deprecated! Prefer tesseract::Classify::ExtractFeatures instead. // Deprecated! Prefer tesseract::Classify::ExtractFeatures instead.

View File

@ -693,13 +693,9 @@ int IntegerMatcher::FindBadFeatures(
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level, void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
int classify_integer_matcher_multiplier) {
classify_debug_level_ = classify_debug_level; classify_debug_level_ = classify_debug_level;
/* Set default mode of operation of IntegerMatcher */
SetCharNormMatch(classify_integer_matcher_multiplier);
/* Initialize table for evidence to similarity lookup */ /* Initialize table for evidence to similarity lookup */
for (int i = 0; i < SE_TABLE_SIZE; i++) { for (int i = 0; i < SE_TABLE_SIZE; i++) {
uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS); uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
@ -724,17 +720,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
} }
/*--------------------------------------------------------------------------*/
void IntegerMatcher::SetBaseLineMatch() {
local_matcher_multiplier_ = 0;
}
/*--------------------------------------------------------------------------*/
void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) {
local_matcher_multiplier_ = integer_matcher_multiplier;
}
/**---------------------------------------------------------------------------- /**----------------------------------------------------------------------------
Private Code Private Code
@ -1283,10 +1268,11 @@ int IntegerMatcher::FindBestMatch(
// Applies the CN normalization factor to the given rating and returns // Applies the CN normalization factor to the given rating and returns
// the modified rating. // the modified rating.
float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
int normalization_factor) { int normalization_factor,
int matcher_multiplier) {
return (rating * blob_length + return (rating * blob_length +
local_matcher_multiplier_ * normalization_factor / 256.0) / matcher_multiplier * normalization_factor / 256.0) /
(blob_length + local_matcher_multiplier_); (blob_length + matcher_multiplier);
} }
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/

View File

@ -102,11 +102,7 @@ class IntegerMatcher {
IntegerMatcher() : classify_debug_level_(0) {} IntegerMatcher() : classify_debug_level_(0) {}
void Init(tesseract::IntParam *classify_debug_level, void Init(tesseract::IntParam *classify_debug_level);
int classify_integer_matcher_multiplier);
void SetBaseLineMatch();
void SetCharNormMatch(int integer_matcher_multiplier);
void Match(INT_CLASS ClassTemplate, void Match(INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -121,7 +117,7 @@ class IntegerMatcher {
// Applies the CN normalization factor to the given rating and returns // Applies the CN normalization factor to the given rating and returns
// the modified rating. // the modified rating.
float ApplyCNCorrection(float rating, int blob_length, float ApplyCNCorrection(float rating, int blob_length,
int normalization_factor); int normalization_factor, int matcher_multiplier);
int FindGoodProtos(INT_CLASS ClassTemplate, int FindGoodProtos(INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask, BIT_VECTOR ProtoMask,
@ -192,7 +188,6 @@ class IntegerMatcher {
uinT32 evidence_table_mask_; uinT32 evidence_table_mask_;
uinT32 mult_trunc_shift_bits_; uinT32 mult_trunc_shift_bits_;
uinT32 table_trunc_shift_bits_; uinT32 table_trunc_shift_bits_;
inT16 local_matcher_multiplier_;
tesseract::IntParam *classify_debug_level_; tesseract::IntParam *classify_debug_level_;
uinT32 evidence_mult_mask_; uinT32 evidence_mult_mask_;
}; };

View File

@ -235,8 +235,11 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
** Exceptions: none ** Exceptions: none
** History: 8/8/2011, rays, Created. ** History: 8/8/2011, rays, Created.
*/ */
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( INT_FX_RESULT_STRUCT local_fx_info(fx_info);
*blob, tesseract::NM_CHAR_ANISOTROPIC, false); GenericVector<INT_FEATURE_STRUCT> bl_features;
tesseract::TrainingSample* sample =
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
&bl_features);
if (sample == NULL) return NULL; if (sample == NULL) return NULL;
int num_features = sample->num_features(); int num_features = sample->num_features();
@ -267,8 +270,11 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
** Exceptions: none ** Exceptions: none
** History: 8/8/2011, rays, Created. ** History: 8/8/2011, rays, Created.
*/ */
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( INT_FX_RESULT_STRUCT local_fx_info(fx_info);
*blob, tesseract::NM_CHAR_ANISOTROPIC, false); GenericVector<INT_FEATURE_STRUCT> bl_features;
tesseract::TrainingSample* sample =
tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
&bl_features);
if (sample == NULL) return NULL; if (sample == NULL) return NULL;
FEATURE_SET feature_set = NewFeatureSet(1); FEATURE_SET feature_set = NewFeatureSet(1);

View File

@ -119,6 +119,9 @@ Dict::Dict(Image* image_ptr)
"Make AcceptableChoice() always return false. Useful" "Make AcceptableChoice() always return false. Useful"
" when there is a need to explore all segmentations", " when there is a need to explore all segmentations",
getImage()->getCCUtil()->params()), getImage()->getCCUtil()->params()),
BOOL_MEMBER(save_raw_choices, false,
"Deprecated- backward compatablity only",
getImage()->getCCUtil()->params()),
INT_MEMBER(tessedit_truncate_wordchoice_log, 10, INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
"Max words to keep in list", "Max words to keep in list",
getImage()->getCCUtil()->params()), getImage()->getCCUtil()->params()),
@ -689,7 +692,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const { int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
const WERD_CHOICE *word_ptr = &word; const WERD_CHOICE *word_ptr = &word;
WERD_CHOICE temp_word(word.unicharset()); WERD_CHOICE temp_word(word.unicharset());
if (hyphenated()) { if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) {
copy_hyphen_info(&temp_word); copy_hyphen_info(&temp_word);
temp_word += word; temp_word += word;
word_ptr = &temp_word; word_ptr = &temp_word;

View File

@ -613,6 +613,8 @@ class Dict {
BOOL_VAR_H(stopper_no_acceptable_choices, false, BOOL_VAR_H(stopper_no_acceptable_choices, false,
"Make AcceptableChoice() always return false. Useful" "Make AcceptableChoice() always return false. Useful"
" when there is a need to explore all segmentations"); " when there is a need to explore all segmentations");
BOOL_VAR_H(save_raw_choices, false,
"Deprecated- backward compatability only");
INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list"); INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information" STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information"
" should be printed to stdout"); " should be printed to stdout");

View File

@ -440,10 +440,11 @@ namespace tesseract {
* enough. The results are returned in the WERD_RES. * enough. The results are returned in the WERD_RES.
*/ */
void Wordrec::chop_word_main(WERD_RES *word) { void Wordrec::chop_word_main(WERD_RES *word) {
// Initial clean up.
word->ClearRatings();
int num_blobs = word->chopped_word->NumBlobs(); int num_blobs = word->chopped_word->NumBlobs();
if (word->ratings == NULL) {
word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
}
if (word->ratings->get(0, 0) == NULL) {
// Run initial classification. // Run initial classification.
for (int b = 0; b < num_blobs; ++b) { for (int b = 0; b < num_blobs; ++b) {
BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
@ -451,6 +452,21 @@ void Wordrec::chop_word_main(WERD_RES *word) {
word->blamer_bundle); word->blamer_bundle);
word->ratings->put(b, b, choices); word->ratings->put(b, b, choices);
} }
} else {
// Blobs have been pre-classified. Set matrix cell for all blob choices
for (int col = 0; col < word->ratings->dimension(); ++col) {
for (int row = col; row < word->ratings->dimension() &&
row < col + word->ratings->bandwidth(); ++row) {
BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
if (choices != NULL) {
BLOB_CHOICE_IT bc_it(choices);
for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
bc_it.data()->set_matrix_cell(col, row);
}
}
}
}
}
// Run Segmentation Search. // Run Segmentation Search.
BestChoiceBundle best_choice_bundle(word->ratings->dimension()); BestChoiceBundle best_choice_bundle(word->ratings->dimension());