From 7ec4fd7a561eb52454eb0dcb0acbfdf8d1a0e6bd Mon Sep 17 00:00:00 2001 From: "theraysmith@gmail.com" Date: Fri, 8 Nov 2013 20:30:56 +0000 Subject: [PATCH] Refactorerd control functions to enable parallel blob classification git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@904 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- ccmain/Makefile.am | 2 +- ccmain/applybox.cpp | 10 +- ccmain/control.cpp | 432 +++++++++++++++++++++++--------------- ccmain/cube_control.cpp | 12 +- ccmain/fixspace.cpp | 5 +- ccmain/pgedit.cpp | 20 +- ccmain/recogtraining.cpp | 4 +- ccmain/tesseractclass.cpp | 3 +- ccmain/tesseractclass.h | 48 ++++- ccstruct/blobs.cpp | 36 +++- ccstruct/blobs.h | 6 +- ccstruct/pageres.cpp | 49 ++--- ccstruct/pageres.h | 24 +-- ccstruct/ratngs.cpp | 3 +- classify/adaptmatch.cpp | 387 +++++++++------------------------- classify/classify.cpp | 14 -- classify/classify.h | 74 ++----- classify/intfx.cpp | 30 +-- classify/intfx.h | 6 +- classify/intmatcher.cpp | 24 +-- classify/intmatcher.h | 9 +- classify/picofeat.cpp | 14 +- dict/dict.cpp | 5 +- dict/dict.h | 2 + wordrec/chopper.cpp | 34 ++- 25 files changed, 580 insertions(+), 673 deletions(-) diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am index d3f3a70ca..eb4358085 100644 --- a/ccmain/Makefile.am +++ b/ccmain/Makefile.am @@ -46,7 +46,7 @@ libtesseract_main_la_SOURCES = \ docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \ imgscale.cpp ltrresultiterator.cpp \ osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \ - pagewalk.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \ + pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \ reject.cpp resultiterator.cpp scaleimg.cpp superscript.cpp \ tesseract_cube_combiner.cpp \ tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \ diff --git a/ccmain/applybox.cpp b/ccmain/applybox.cpp index ce46053a4..d8723854c 100644 --- a/ccmain/applybox.cpp +++ b/ccmain/applybox.cpp @@ -241,10 +241,12 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, void Tesseract::MaximallyChopWord(const GenericVector& boxes, BLOCK* block, ROW* row, WERD_RES* word_res) { - if (!word_res->SetupForTessRecognition(unicharset, this, BestPix(), false, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - row, block)) { + if (!word_res->SetupForRecognition(unicharset, this, BestPix(), + tessedit_ocr_engine_mode, NULL, + classify_bln_numeric_mode, + textord_use_cjk_fp_model, + poly_allow_detailed_fx, + row, block)) { word_res->CloneChoppedToRebuild(); return; } diff --git a/ccmain/control.cpp b/ccmain/control.cpp index 0110f2970..c60a3d118 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -97,8 +97,9 @@ BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) { inT16 char_qual; inT16 good_char_qual; - classify_word_and_language(&Tesseract::classify_word_pass2, - block, row, word_res); + WordData word_data(block, row, word_res); + SetupWordPassN(2, &word_data); + classify_word_and_language(&Tesseract::classify_word_pass2, &word_data); if (tessedit_debug_quality_metrics) { word_char_quality(word_res, row, &char_qual, &good_char_qual); tprintf @@ -153,6 +154,111 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box, return true; } +// If tesseract is to be run, sets the words up ready for it. +void Tesseract::SetupAllWordsPassN(int pass_n, + const TBOX* target_word_box, + const char* word_config, + PAGE_RES* page_res, + GenericVector* words) { + // Prepare all the words. + PAGE_RES_IT page_res_it(page_res); + for (page_res_it.restart_page(); page_res_it.word() != NULL; + page_res_it.forward()) { + if (pass_n == 1) + page_res_it.word()->SetupFake(unicharset); + if (target_word_box == NULL || + ProcessTargetWord(page_res_it.word()->word->bounding_box(), + *target_word_box, word_config, 1)) { + words->push_back(WordData(page_res_it)); + } + } + // Setup all the words for recognition with polygonal approximation. + for (int w = 0; w < words->size(); ++w) { + SetupWordPassN(pass_n, &(*words)[w]); + if (w > 0) (*words)[w].prev_word = &(*words)[w - 1]; + } +} + +// Sets up the single word ready for whichever engine is to be run. +void Tesseract::SetupWordPassN(int pass_n, WordData* word) { + if (pass_n == 1 || !word->word->done || tessedit_training_tess) { + if (pass_n == 2) { + // TODO(rays) Should we do this on pass1 too? + word->word->caps_height = 0.0; + if (word->word->x_height == 0.0f) + word->word->x_height = word->row->x_height(); + } + // Cube doesn't get setup for pass2. + if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) { + word->word->SetupForRecognition( + unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, word->row, word->block); + } + } + if (!sub_langs_.empty()) { + if (word->lang_words.size() != sub_langs_.size()) { + // Setup the words for all the sub-languages now. + WERD_RES empty; + word->lang_words.init_to_size(sub_langs_.size(), empty); + } + for (int s = 0; s < sub_langs_.size(); ++s) { + Tesseract* lang_t = sub_langs_[s]; + if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY && + (!word->lang_words[s].done || lang_t->tessedit_training_tess))) { + word->lang_words[s].InitForRetryRecognition(*word->word); + word->lang_words[s].SetupForRecognition( + lang_t->unicharset, lang_t, BestPix(), + lang_t->tessedit_ocr_engine_mode, NULL, + lang_t->classify_bln_numeric_mode, + lang_t->textord_use_cjk_fp_model, + lang_t->poly_allow_detailed_fx, word->row, word->block); + } + } + } +} + + +// Runs word recognition on all the words. +bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, + GenericVector* words) { + // TODO(rays) Before this loop can be parallelized (it would yield a massive + // speed-up) all remaining member globals need to be converted to local/heap + // (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be + // added. The results will be significantly different with adaption on, and + // deterioration will need investigation. + for (int w = 0; w < words->size(); ++w) { + WordData* word = &(*words)[w]; + if (monitor != NULL) { + monitor->ocr_alive = TRUE; + if (pass_n == 1) + monitor->progress = 30 + 50 * w / words->size(); + else + monitor->progress = 80 + 10 * w / words->size(); + if (monitor->deadline_exceeded() || + (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this, + words->size()))) { + // Timeout. Fake out the rest of the words. + for (; w < words->size(); ++w) { + (*words)[w].word->SetupFake(unicharset); + } + return false; + } + } + if (word->word->tess_failed) continue; + WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1 + : &Tesseract::classify_word_pass2; + classify_word_and_language(recognizer, word); + if (tessedit_dump_choices) { + word_dumper(NULL, word->row, word->word); + tprintf("Pass%d: %s [%s]\n", pass_n, + word->word->best_choice->unichar_string().string(), + word->word->best_choice->debug_string().string()); + } + } + return true; +} + /** * recog_all_words() * @@ -179,27 +285,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, const TBOX* target_word_box, const char* word_config, int dopasses) { - PAGE_RES_IT page_res_it; - inT32 word_index; // current word + PAGE_RES_IT page_res_it(page_res); if (tessedit_minimal_rej_pass1) { tessedit_test_adaption.set_value (TRUE); tessedit_minimal_rejection.set_value (TRUE); } - // Before the main recognition loop below, walk through the whole page and set - // up fake words. That way, if we run out of time a user will still get the - // expected best_choice and box_words out the end; they'll just be empty. - page_res_it.page_res = page_res; - for (page_res_it.restart_page(); page_res_it.word() != NULL; - page_res_it.forward()) { - page_res_it.word()->SetupFake(unicharset); - } - if (dopasses==0 || dopasses==1) { - page_res_it.page_res=page_res; page_res_it.restart_page(); - // ****************** Pass 1 ******************* // Clear adaptive classifier at the beginning of the page if it is full. @@ -214,20 +308,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, if (sub_langs_[i]->AdaptiveClassifierIsFull()) sub_langs_[i]->ResetAdaptiveClassifierInternal(); } - - stats_.word_count = 0; - if (monitor != NULL) { - monitor->ocr_alive = TRUE; - while (page_res_it.word() != NULL) { - stats_.word_count++; - page_res_it.forward(); - } - page_res_it.restart_page(); - } else { - stats_.word_count = 1; + // Set up all words ready for recognition, so that if parallelism is on + // all the input and output classes are ready to run the classifier. + GenericVector words; + SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words); + if (tessedit_parallelize) { + PrerecAllWordsPar(words); } - word_index = 0; + stats_.word_count = words.size(); stats_.dict_words = 0; stats_.doc_blob_quality = 0; @@ -237,56 +326,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, stats_.doc_good_char_quality = 0; most_recently_used_ = this; + // Run pass 1 word recognition. + if (!RecogAllWordsPassN(1, monitor, &words)) return false; + // Pass 1 post-processing. while (page_res_it.word() != NULL) { - set_global_loc_code(LOC_PASS1); - word_index++; - if (monitor != NULL) { - monitor->ocr_alive = TRUE; - monitor->progress = 30 + 50 * word_index / stats_.word_count; - if (monitor->deadline_exceeded() || - (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this, - stats_.dict_words))) - return false; - } - if (target_word_box && - !ProcessTargetWord(page_res_it.word()->word->bounding_box(), - *target_word_box, word_config, 1)) { - page_res_it.forward(); - continue; - } - classify_word_and_language(&Tesseract::classify_word_pass1, - page_res_it.block()->block, - page_res_it.row()->row, - page_res_it.word()); if (page_res_it.word()->word->flag(W_REP_CHAR)) { fix_rep_char(&page_res_it); page_res_it.forward(); continue; } - if (tessedit_dump_choices) { - word_dumper(NULL, page_res_it.row()->row, page_res_it.word()); - tprintf("Pass1: %s [%s]\n", - page_res_it.word()->best_choice->unichar_string().string(), - page_res_it.word()->best_choice->debug_string().string()); - } - - // tessedit_test_adaption enables testing of the accuracy of the - // input to the adaptive classifier. - if (tessedit_test_adaption && !tessedit_minimal_rejection) { - if (!word_adaptable (page_res_it.word(), - tessedit_test_adaption_mode)) { - page_res_it.word()->reject_map.rej_word_tess_failure(); - // FAKE PERM REJ - } else { - // Override rejection mechanisms for this word. - UNICHAR_ID space = unicharset.unichar_to_id(" "); - for (int i = 0; i < page_res_it.word()->best_choice->length(); i++) { - if ((page_res_it.word()->best_choice->unichar_id(i) != space) && - page_res_it.word()->reject_map[i].rejected()) - page_res_it.word()->reject_map[i].setrej_minimal_rej_accept(); - } - } - } // Count dict words. if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM) @@ -307,49 +355,26 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, if (dopasses == 1) return true; // ****************** Pass 2 ******************* - page_res_it.restart_page(); - word_index = 0; - most_recently_used_ = this; - while (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption && - page_res_it.word() != NULL) { - set_global_loc_code(LOC_PASS2); - word_index++; - if (monitor != NULL) { - monitor->ocr_alive = TRUE; - monitor->progress = 80 + 10 * word_index / stats_.word_count; - if (monitor->deadline_exceeded() || - (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this, - stats_.dict_words))) - return false; + if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) { + page_res_it.restart_page(); + GenericVector words; + SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words); + if (tessedit_parallelize) { + PrerecAllWordsPar(words); } - - // changed by jetsoft - // specific to its needs to extract one word when need - if (target_word_box && - !ProcessTargetWord(page_res_it.word()->word->bounding_box(), - *target_word_box, word_config, 2)) { + most_recently_used_ = this; + // Run pass 2 word recognition. + if (!RecogAllWordsPassN(2, monitor, &words)) return false; + // Pass 2 post-processing. + while (page_res_it.word() != NULL) { + WERD_RES* word = page_res_it.word(); + if (word->word->flag(W_REP_CHAR) && !word->done) { + fix_rep_char(&page_res_it); + page_res_it.forward(); + continue; + } page_res_it.forward(); - continue; } - // end jetsoft - - classify_word_and_language(&Tesseract::classify_word_pass2, - page_res_it.block()->block, - page_res_it.row()->row, - page_res_it.word()); - if (page_res_it.word()->word->flag(W_REP_CHAR) && - !page_res_it.word()->done) { - fix_rep_char(&page_res_it); - page_res_it.forward(); - continue; - } - if (tessedit_dump_choices) { - word_dumper(NULL, page_res_it.row()->row, page_res_it.word()); - tprintf("Pass2: %s [%s]\n", - page_res_it.word()->best_choice->unichar_string().string(), - page_res_it.word()->best_choice->debug_string().string()); - } - page_res_it.forward(); } // The next passes can only be run if tesseract has been used, as cube @@ -384,6 +409,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res, // Check the correctness of the final results. blamer_pass(page_res); } + script_pos_pass(page_res); // Write results pass. set_global_loc_code(LOC_WRITE_RESULTS); @@ -672,6 +698,46 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) { } } +// Sets script positions and detects smallcaps on all output words. +void Tesseract::script_pos_pass(PAGE_RES* page_res) { + PAGE_RES_IT page_res_it(page_res); + for (page_res_it.restart_page(); page_res_it.word() != NULL; + page_res_it.forward()) { + WERD_RES* word = page_res_it.word(); + if (word->word->flag(W_REP_CHAR)) { + page_res_it.forward(); + continue; + } + float x_height = page_res_it.block()->block->x_height(); + float word_x_height = word->x_height; + if (word_x_height < word->best_choice->min_x_height() || + word_x_height > word->best_choice->max_x_height()) { + word_x_height = (word->best_choice->min_x_height() + + word->best_choice->max_x_height()) / 2.0f; + } + // Test for small caps. Word capheight must be close to block xheight, + // and word must contain no lower case letters, and at least one upper case. + double small_cap_xheight = x_height * kXHeightCapRatio; + double small_cap_delta = (x_height - small_cap_xheight) / 2.0; + if (word->uch_set->script_has_xheight() && + small_cap_xheight - small_cap_delta <= word_x_height && + word_x_height <= small_cap_xheight + small_cap_delta) { + // Scan for upper/lower. + int num_upper = 0; + int num_lower = 0; + for (int i = 0; i < word->best_choice->length(); ++i) { + if (word->uch_set->get_isupper(word->best_choice->unichar_id(i))) + ++num_upper; + else if (word->uch_set->get_islower(word->best_choice->unichar_id(i))) + ++num_lower; + } + if (num_upper > 0 && num_lower == 0) + word->small_caps = true; + } + word->SetScriptPositions(); + } +} + // Helper returns true if the new_word is better than the word, using a // simple test of better certainty AND rating (to reduce false positives // from cube) or a dictionary vs non-dictionary word. @@ -701,38 +767,33 @@ static bool NewWordBetter(const WERD_RES& word, const WERD_RES& new_word, // Helper to recognize the word using the given (language-specific) tesseract. // Returns true if the result was better than previously. -bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row, +bool Tesseract::RetryWithLanguage(const WERD_RES& best_word, + WordData* word_data, WERD_RES* word, WordRecognizer recognizer) { if (classify_debug_level || cube_debug_level) { tprintf("Retrying word using lang %s, oem %d\n", lang.string(), static_cast(tessedit_ocr_engine_mode)); } - // Setup a trial WERD_RES in which to classify. - WERD_RES lang_word; - lang_word.InitForRetryRecognition(*word); // Run the recognizer on the word. // Initial version is a bit of a hack based on better certainty and rating // (to reduce false positives from cube) or a dictionary vs non-dictionary // word. - (this->*recognizer)(block, row, &lang_word); - bool new_is_better = NewWordBetter(*word, lang_word, + (this->*recognizer)(word_data, word); + bool new_is_better = NewWordBetter(best_word, *word, classify_max_rating_ratio, classify_max_certainty_margin); if (classify_debug_level || cube_debug_level) { - if (lang_word.best_choice == NULL) { - tprintf("New result %s better:%s\n", + if (word->best_choice == NULL) { + tprintf("NULL result %s better!\n", new_is_better ? "IS" : "NOT"); } else { tprintf("New result %s better:%s, r=%g, c=%g\n", new_is_better ? "IS" : "NOT", - lang_word.best_choice->unichar_string().string(), - lang_word.best_choice->rating(), - lang_word.best_choice->certainty()); + word->best_choice->unichar_string().string(), + word->best_choice->rating(), + word->best_choice->certainty()); } } - if (new_is_better) { - word->ConsumeWordResults(&lang_word); - } return new_is_better; } @@ -743,9 +804,9 @@ bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row, // If recognition was not successful, tries all available languages until // it gets a successful result or runs out of languages. Keeps the best result. void Tesseract::classify_word_and_language(WordRecognizer recognizer, - BLOCK* block, - ROW *row, - WERD_RES *word) { + WordData* word_data) { + // Points to the best result. May be word or in lang_words. + WERD_RES* word = word_data->word; clock_t start_t = clock(); if (classify_debug_level || cube_debug_level) { tprintf("Processing word with lang %s at:", @@ -755,15 +816,23 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer, const char* result_type = "Initial"; bool initially_done = !word->tess_failed && word->done; if (initially_done) { - // If done on pass1, we reuse the tesseract that did it, and don't try - // any more. The only need to call the classifier at all is for the - // cube combiner and xheight fixing (which may be bogus on a done word.) + // If done on pass1, leave it as-is. most_recently_used_ = word->tesseract; result_type = "Already done"; + } else { + if (most_recently_used_ != this) { + // Point to the word for most_recently_used_. + for (int s = 0; s < sub_langs_.size(); ++s) { + if (most_recently_used_ == sub_langs_[s]) { + word = &word_data->lang_words[s]; + break; + } + } + } + (most_recently_used_->*recognizer)(word_data, word); + if (!word->tess_failed && word->tess_accepted) + result_type = "Accepted"; } - (most_recently_used_->*recognizer)(block, row, word); - if (!word->tess_failed && word->tess_accepted) - result_type = "Accepted"; if (classify_debug_level || cube_debug_level) { tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d" " xht=[%g,%g]\n", @@ -782,11 +851,31 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer, if (classify_debug_level) { tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string()); } - if (RetryWithLanguage(word, block, row, recognizer)) { - most_recently_used_ = this; - if (!word->tess_failed && word->tess_accepted) - return; // No need to look at the others. + if (word_data->word->tesseract == this) { + // This is pass1, and we are trying the main language. + if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) { + most_recently_used_ = this; + word = word_data->word; + } + } else { + // This is pass2, and we are trying the main language again, but it + // has no word allocated to it, so we must re-initialize it. + WERD_RES main_word(*word_data->word); + main_word.InitForRetryRecognition(*word_data->word); + main_word.SetupForRecognition(unicharset, this, BestPix(), + tessedit_ocr_engine_mode, NULL, + classify_bln_numeric_mode, + textord_use_cjk_fp_model, + poly_allow_detailed_fx, + word_data->row, word_data->block); + if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) { + most_recently_used_ = this; + word_data->word->ConsumeWordResults(&main_word); + word = word_data->word; + } } + if (!word->tess_failed && word->tess_accepted) + return; // No need to look at the others. } for (int i = 0; i < sub_langs_.size(); ++i) { @@ -795,14 +884,21 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer, tprintf("Retrying with sub-Tesseract[%d] lang: %s\n", i, sub_langs_[i]->lang.string()); } - if (sub_langs_[i]->RetryWithLanguage(word, block, row, recognizer)) { + if (sub_langs_[i]->RetryWithLanguage(*word, word_data, + &word_data->lang_words[i], + recognizer)) { most_recently_used_ = sub_langs_[i]; + word = &word_data->lang_words[i]; if (!word->tess_failed && word->tess_accepted) - return; // No need to look at the others. + break; // No need to look at the others. } } } } + if (word != word_data->word) { + // Move the result for the best language to the main word. + word_data->word->ConsumeWordResults(word); + } clock_t ocr_t = clock(); if (tessedit_timing_debug) { tprintf("%s (ocr took %.2f sec)\n", @@ -817,7 +913,11 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer, * Baseline normalize the word and pass it to Tess. */ -void Tesseract::classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) { +void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) { + ROW* row = word_data->row; + BLOCK* block = word_data->block; + prev_word_best_choice_ = word_data->prev_word != NULL + ? word_data->prev_word->word->best_choice : NULL; // If we only intend to run cube - run it and return. if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) { cube_word_pass1(block, row, word); @@ -880,6 +980,10 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { } new_x_ht_word.x_height = new_x_ht; new_x_ht_word.caps_height = 0.0; + new_x_ht_word.SetupForRecognition( + unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL, + classify_bln_numeric_mode, textord_use_cjk_fp_model, + poly_allow_detailed_fx, row, block); match_word_pass_n(2, &new_x_ht_word, row, block); if (!new_x_ht_word.tess_failed) { int new_misfits = CountMisfitTops(&new_x_ht_word); @@ -916,11 +1020,15 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) { * Control what to do with the word in pass 2 */ -void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) { +void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) { // Return if we do not want to run Tesseract. if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY && tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED) return; + ROW* row = word_data->row; + BLOCK* block = word_data->block; + prev_word_best_choice_ = word_data->prev_word != NULL + ? word_data->prev_word->word->best_choice : NULL; set_global_subloc_code(SUBLOC_NORM); check_debug_pt(word, 30); @@ -940,26 +1048,6 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) { // Use the tops and bottoms since they are available. TrainedXheightFix(word, block, row); } - // Test for small caps. Word capheight must be close to block xheight, - // and word must contain no lower case letters, and at least one upper case. - double small_cap_xheight = block->x_height() * kXHeightCapRatio; - double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0; - if (unicharset.script_has_xheight() && - small_cap_xheight - small_cap_delta <= word->x_height && - word->x_height <= small_cap_xheight + small_cap_delta) { - // Scan for upper/lower. - int num_upper = 0; - int num_lower = 0; - for (int i = 0; i < word->best_choice->length(); ++i) { - if (unicharset.get_isupper(word->best_choice->unichar_id(i))) - ++num_upper; - else if (unicharset.get_islower(word->best_choice->unichar_id(i))) - ++num_lower; - } - if (num_upper > 0 && num_lower == 0) - word->small_caps = true; - } - word->SetScriptPositions(); set_global_subloc_code(SUBLOC_NORM); } @@ -988,12 +1076,8 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) { void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block) { - if (word->SetupForTessRecognition(unicharset, this, BestPix(), - classify_bln_numeric_mode, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - row, block)) - tess_segment_pass_n(pass_n, word); + if (word->tess_failed) return; + tess_segment_pass_n(pass_n, word); if (!word->tess_failed) { if (!word->word->flag (W_REP_CHAR)) { @@ -1136,12 +1220,12 @@ void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice, WERD_RES* rep_word = page_res_it->InsertSimpleCloneWord(*word_res, blob_word); // Setup the single char WERD_RES - if (rep_word->SetupForTessRecognition(*word_res->uch_set, this, BestPix(), - false, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - page_res_it->row()->row, - page_res_it->block()->block)) { + if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(), + tessedit_ocr_engine_mode, NULL, false, + textord_use_cjk_fp_model, + poly_allow_detailed_fx, + page_res_it->row()->row, + page_res_it->block()->block)) { rep_word->CloneChoppedToRebuild(); BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice); rep_word->FakeClassifyWord(1, &blob_choice); diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp index 411ea1a51..e0425e679 100644 --- a/ccmain/cube_control.cpp +++ b/ccmain/cube_control.cpp @@ -197,6 +197,9 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) { // Iterate through the word results and call cube on each word. for (page_res_it.restart_page(); page_res_it.word () != NULL; page_res_it.forward()) { + BLOCK* block = page_res_it.block()->block; + if (block->poly_block() != NULL && !block->poly_block()->IsText()) + continue; // Don't deal with non-text blocks. WERD_RES* word = page_res_it.word(); // Skip cube entirely if tesseract's certainty is greater than threshold. int combiner_run_thresh = convert_prob_to_tess_certainty( @@ -210,6 +213,11 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) { // Setup a trial WERD_RES in which to classify with cube. WERD_RES cube_word; cube_word.InitForRetryRecognition(*word); + cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(), + OEM_CUBE_ONLY, + NULL, false, false, false, + page_res_it.row()->row, + page_res_it.block()->block); CubeObject *cube_obj = lang_tess->cube_recognize_word( page_res_it.block()->block, &cube_word); if (cube_obj != NULL) @@ -317,10 +325,6 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word, **********************************************************************/ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block, WERD_RES *word) { - if (!word->SetupForCubeRecognition(unicharset, this, block)) { - return false; // Graphics block. - } - // Run cube WordAltList *cube_alt_list = cube_obj->RecognizeWord(); if (!cube_alt_list || cube_alt_list->AltCount() <= 0) { diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp index ec568720c..24b37073c 100644 --- a/ccmain/fixspace.cpp +++ b/ccmain/fixspace.cpp @@ -204,8 +204,9 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { word = word_it.data(); if ((!word->part_of_combo) && (word->box_word == NULL)) { - classify_word_and_language(&Tesseract::classify_word_pass2, - block, row, word); + WordData word_data(block, row, word); + SetupWordPassN(2, &word_data); + classify_word_and_language(&Tesseract::classify_word_pass2, &word_data); } prev_word_best_choice_ = word->best_choice; } diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp index faf7df9c3..8812808f4 100644 --- a/ccmain/pgedit.cpp +++ b/ccmain/pgedit.cpp @@ -731,10 +731,12 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row, BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) { TWERD *bln_word = word_res->chopped_word; if (bln_word == NULL) { - word_res->SetupForTessRecognition(unicharset, this, BestPix(), false, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - row, block); + word_res->SetupForRecognition(unicharset, this, BestPix(), + tessedit_ocr_engine_mode, NULL, + classify_bln_numeric_mode, + textord_use_cjk_fp_model, + poly_allow_detailed_fx, + row, block); bln_word = word_res->chopped_word; } bln_word_window_handle()->Clear(); @@ -963,10 +965,12 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res, if (word != NULL) { WERD_RES word_res(word); word_res.x_height = row->x_height(); - word_res.SetupForTessRecognition(unicharset, this, BestPix(), false, - textord_use_cjk_fp_model, - poly_allow_detailed_fx, - row, block); + word_res.SetupForRecognition(unicharset, this, BestPix(), + tessedit_ocr_engine_mode, NULL, + classify_bln_numeric_mode, + textord_use_cjk_fp_model, + poly_allow_detailed_fx, + row, block); TWERD* bln_word = word_res.chopped_word; TBLOB* bln_blob = bln_word->blobs[0]; INT_FX_RESULT_STRUCT fx_info; diff --git a/ccmain/recogtraining.cpp b/ccmain/recogtraining.cpp index d9c27b9be..e4d6e5f3d 100644 --- a/ccmain/recogtraining.cpp +++ b/ccmain/recogtraining.cpp @@ -203,7 +203,9 @@ void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res, FILE *output_file) { // Classify word. fflush(stdout); - classify_word_pass1(block_res->block, row_res->row, werd_res); + WordData word_data(block_res->block, row_res->row, werd_res); + SetupWordPassN(1, &word_data); + classify_word_pass1(&word_data, werd_res); WERD_CHOICE *best_choice = werd_res->best_choice; ASSERT_HOST(best_choice != NULL); diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp index 40e388395..17ed433c3 100644 --- a/ccmain/tesseractclass.cpp +++ b/ccmain/tesseractclass.cpp @@ -402,6 +402,8 @@ Tesseract::Tesseract() "for layout analysis.", this->params()), BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector", this->params()), + INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible", + this->params()), // The following parameters were deprecated and removed from their original // locations. The parameters are temporarily kept here to give Tesseract @@ -528,7 +530,6 @@ void Tesseract::Clear() { reskew_ = FCOORD(1.0f, 0.0f); splitter_.Clear(); scaled_factor_ = -1; - ResetFeaturesHaveBeenExtracted(); for (int i = 0; i < sub_langs_.size(); ++i) sub_langs_[i]->Clear(); } diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h index 3c1d5fa44..311bb9460 100644 --- a/ccmain/tesseractclass.h +++ b/ccmain/tesseractclass.h @@ -100,10 +100,6 @@ class EquationDetect; class Tesseract; class TesseractCubeCombiner; -typedef void (Tesseract::*WordRecognizer)(BLOCK* block, - ROW *row, - WERD_RES *word); - // A collection of various variables for statistics and debugging. struct TesseractStats { TesseractStats() @@ -136,6 +132,24 @@ struct TesseractStats { bool write_results_empty_block; }; +// Struct to hold all the pointers to relevant data for processing a word. +struct WordData { + WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {} + explicit WordData(const PAGE_RES_IT& page_res_it) + : word(page_res_it.word()), row(page_res_it.row()->row), + block(page_res_it.block()->block), prev_word(NULL) {} + WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res) + : word(word_res), row(row_in), block(block_in), prev_word(NULL) {} + + WERD_RES* word; + ROW* row; + BLOCK* block; + WordData* prev_word; + GenericVector lang_words; +}; + +typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word); + class Tesseract : public Wordrec { public: Tesseract(); @@ -250,10 +264,23 @@ class Tesseract : public Wordrec { bool single_column, bool osd, bool only_osd, BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix); + // par_control.cpp + void PrerecAllWordsPar(const GenericVector& words); //// control.h ///////////////////////////////////////////////////////// bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box, const char* word_config, int pass); + // Sets up the words ready for whichever engine is to be run + void SetupAllWordsPassN(int pass_n, + const TBOX* target_word_box, + const char* word_config, + PAGE_RES* page_res, + GenericVector* words); + // Sets up the single word ready for whichever engine is to be run. + void SetupWordPassN(int pass_n, WordData* word); + // Runs word recognition on all the words. + bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, + GenericVector* words); bool recog_all_words(PAGE_RES* page_res, ETEXT_DESC* monitor, const TBOX* target_word_box, @@ -265,13 +292,15 @@ class Tesseract : public Wordrec { const char* word_config); void bigram_correction_pass(PAGE_RES *page_res); void blamer_pass(PAGE_RES* page_res); + // Sets script positions and detects smallcaps on all output words. + void script_pos_pass(PAGE_RES* page_res); // Helper to recognize the word using the given (language-specific) tesseract. // Returns true if the result was better than previously. - bool RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row, - WordRecognizer recognizer); + bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data, + WERD_RES* word, WordRecognizer recognizer); void classify_word_and_language(WordRecognizer recognizer, - BLOCK* block, ROW *row, WERD_RES *word); - void classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word); + WordData* word_data); + void classify_word_pass1(WordData* word_data, WERD_RES* word); void recog_pseudo_word(PAGE_RES* page_res, // blocks to check TBOX &selection_box); @@ -282,7 +311,7 @@ class Tesseract : public Wordrec { const char *s, const char *lengths); void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block); - void classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word); + void classify_word_pass2(WordData* word_data, WERD_RES* word); void ReportXhtFixResult(bool accept_new_word, float new_x_ht, WERD_RES* word, WERD_RES* new_word); bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row); @@ -936,6 +965,7 @@ class Tesseract : public Wordrec { "Only initialize with the config file. Useful if the instance is " "not going to be used for OCR but say only for layout analysis."); BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector"); + INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible"); // The following parameters were deprecated and removed from their original // locations. The parameters are temporarily kept here to give Tesseract diff --git a/ccstruct/blobs.cpp b/ccstruct/blobs.cpp index fe758b0d3..a8bac832a 100644 --- a/ccstruct/blobs.cpp +++ b/ccstruct/blobs.cpp @@ -741,19 +741,36 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) { // DENORMs in the blobs. void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse, float x_height, bool numeric_mode, + tesseract::OcrEngineMode hint, + const TBOX* norm_box, DENORM* word_denorm) { TBOX word_box = bounding_box(); + if (norm_box != NULL) word_box = *norm_box; float word_middle = (word_box.left() + word_box.right()) / 2.0f; + float input_y_offset = 0.0f; + float final_y_offset = static_cast(kBlnBaselineOffset); + float scale = kBlnXHeight / x_height; + if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) { + word_middle = word_box.left(); + input_y_offset = word_box.bottom(); + final_y_offset = 0.0f; + if (hint == tesseract::OEM_CUBE_ONLY) + scale = 1.0f; + } else { + input_y_offset = row->base_line(word_middle); + } for (int b = 0; b < blobs.size(); ++b) { TBLOB* blob = blobs[b]; TBOX blob_box = blob->bounding_box(); float mid_x = (blob_box.left() + blob_box.right()) / 2.0f; - float baseline = row->base_line(mid_x); - float scale = kBlnXHeight / x_height; + float baseline = input_y_offset; + float blob_scale = scale; if (numeric_mode) { baseline = blob_box.bottom(); - scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), - scale, scale * 1.5f); + blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()), + scale, scale * 1.5f); + } else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) { + baseline = row->base_line(mid_x); } // The image will be 8-bit grey if the input was grey or color. Note that in // a grey image 0 is black and 255 is white. If the input was binary, then @@ -761,16 +778,13 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, // To tell the difference pixGetDepth() will return 8 or 1. // The inverse flag will be true iff the word has been determined to be // white on black, and is independent of whether the pix is 8 bit or 1 bit. - blob->Normalize(block, NULL, NULL, word_middle, baseline, scale, scale, - 0.0f, static_cast(kBlnBaselineOffset), - inverse, pix); + blob->Normalize(block, NULL, NULL, word_middle, baseline, blob_scale, + blob_scale, 0.0f, final_y_offset, inverse, pix); } if (word_denorm != NULL) { - float scale = kBlnXHeight / x_height; word_denorm->SetupNormalization(block, NULL, NULL, word_middle, - row->base_line(word_middle), - scale, scale, 0.0f, - static_cast(kBlnBaselineOffset)); + input_y_offset, scale, scale, + 0.0f, final_y_offset); word_denorm->set_inverse(inverse); word_denorm->set_pix(pix); } diff --git a/ccstruct/blobs.h b/ccstruct/blobs.h index 24aae1134..e39761b17 100644 --- a/ccstruct/blobs.h +++ b/ccstruct/blobs.h @@ -31,6 +31,7 @@ ----------------------------------------------------------------------*/ #include "clst.h" #include "normalis.h" +#include "publictypes.h" #include "rect.h" #include "vecfuncs.h" @@ -316,7 +317,10 @@ struct TWERD { // Baseline normalizes the blobs in-place, recording the normalization in the // DENORMs in the blobs. void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse, - float x_height, bool numeric_mode, DENORM* word_denorm); + float x_height, bool numeric_mode, + tesseract::OcrEngineMode hint, + const TBOX* norm_box, + DENORM* word_denorm); // Copies the data and the blobs, but leaves next untouched. void CopyFrom(const TWERD& src); // Deletes owned data. diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp index c710ee207..342a2a371 100644 --- a/ccstruct/pageres.cpp +++ b/ccstruct/pageres.cpp @@ -32,6 +32,8 @@ static const double kStopperAmbiguityThresholdGain = 8.0; // Constant offset for computing thresholds that determine the ambiguity of a // word. static const double kStopperAmbiguityThresholdOffset = 1.5; +// Max number of broken pieces to associate. +const int kWordrecMaxNumJoinChunks = 4; // Computes and returns a threshold of certainty difference used to determine // which words to keep, based on the adjustment factors of the two words. @@ -245,16 +247,25 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES& source) { // If allow_detailed_fx is true, the feature extractor will receive fine // precision outline information, allowing smoother features and better // features on low resolution images. +// The norm_mode_hint sets the default mode for normalization in absence +// of any of the above flags. +// norm_box is used to override the word bounding box to determine the +// normalization scale and offset. // Returns false if the word is empty and sets up fake results. -bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in, +bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in, tesseract::Tesseract* tess, Pix* pix, + int norm_mode, + const TBOX* norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, - ROW *row, BLOCK* block) { + ROW *row, const BLOCK* block) { + tesseract::OcrEngineMode norm_mode_hint = + static_cast(norm_mode); tesseract = tess; POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; - if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) { + if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY && + word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) { // Empty words occur when all the blobs have been moved to the rej_blobs // list, which seems to occur frequently in junk. SetupFake(unicharset_in); @@ -264,13 +275,17 @@ bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in, ClearResults(); SetupWordScript(unicharset_in); chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word); - float word_xheight = use_body_size && row->body_size() > 0.0f + float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f ? row->body_size() : x_height; chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), - word_xheight, numeric_mode, &denorm); + word_xheight, numeric_mode, norm_mode_hint, + norm_box, &denorm); blob_row = row; SetupBasicsFromChoppedWord(unicharset_in); SetupBlamerBundle(); + int num_blobs = chopped_word->NumBlobs(); + ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks); + tess_failed = false; return true; } @@ -284,30 +299,6 @@ void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) { ClearWordChoices(); } -// Sets up the members used in recognition: -// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. -// Returns false if the word is empty and sets up fake results. -bool WERD_RES::SetupForCubeRecognition(const UNICHARSET& unicharset_in, - tesseract::Tesseract* tess, - const BLOCK* block) { - tesseract = tess; - POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL; - if (pb != NULL && !pb->IsText()) { - // Ignore words in graphic regions. - SetupFake(unicharset_in); - word->set_flag(W_REP_CHAR, false); - return false; - } - ClearResults(); - SetupWordScript(unicharset_in); - TBOX word_box = word->bounding_box(); - denorm.SetupNormalization(block, NULL, NULL, - word_box.left(), word_box.bottom(), - 1.0f, 1.0f, 0.0f, 0.0f); - SetupBlamerBundle(); - return true; -} - // Sets up the members used in recognition for an empty recognition result: // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) { diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index d8044129a..4eb29ee4b 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -339,7 +339,8 @@ class WERD_RES : public ELIST_LINK { // characters purely based on their shape on the page, and by default produce // the corresponding unicode for a left-to-right context. const char* const BestUTF8(int blob_index, bool in_rtl_context) const { - if (blob_index < 0 || blob_index >= best_choice->length()) + if (blob_index < 0 || best_choice == NULL || + blob_index >= best_choice->length()) return NULL; UNICHAR_ID id = best_choice->unichar_id(blob_index); if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID) @@ -435,25 +436,22 @@ class WERD_RES : public ELIST_LINK { // If allow_detailed_fx is true, the feature extractor will receive fine // precision outline information, allowing smoother features and better // features on low resolution images. + // The norm_mode sets the default mode for normalization in absence + // of any of the above flags. It should really be a tesseract::OcrEngineMode + // but is declared as int for ease of use with tessedit_ocr_engine_mode. // Returns false if the word is empty and sets up fake results. - bool SetupForTessRecognition(const UNICHARSET& unicharset_in, - tesseract::Tesseract* tesseract, Pix* pix, - bool numeric_mode, bool use_body_size, - bool allow_detailed_fx, - ROW *row, BLOCK* block); + bool SetupForRecognition(const UNICHARSET& unicharset_in, + tesseract::Tesseract* tesseract, Pix* pix, + int norm_mode, + const TBOX* norm_box, bool numeric_mode, + bool use_body_size, bool allow_detailed_fx, + ROW *row, const BLOCK* block); // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty // accumulators from a made chopped word. We presume the fields are already // empty. void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in); - // Sets up the members used in recognition: - // bln_boxes, chopped_word, seam_array, denorm. - // Returns false if the word is empty and sets up fake results. - bool SetupForCubeRecognition(const UNICHARSET& unicharset_in, - tesseract::Tesseract* tesseract, - const BLOCK* block); - // Sets up the members used in recognition for an empty recognition result: // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice. void SetupFake(const UNICHARSET& uch); diff --git a/ccstruct/ratngs.cpp b/ccstruct/ratngs.cpp index 30608bb46..9460262c5 100644 --- a/ccstruct/ratngs.cpp +++ b/ccstruct/ratngs.cpp @@ -530,8 +530,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) { // Initialize to normal. for (int i = 0; i < length_; ++i) script_pos_[i] = tesseract::SP_NORMAL; - if (word->blobs.empty()) + if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) { return; + } int position_counts[4]; for (int i = 0; i < 4; i++) { diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 1ef606e3b..6051a95a0 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -122,8 +122,6 @@ struct PROTO_KEY { #define MarginalMatch(Rating) \ ((Rating) > matcher_great_threshold) -#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE) - /*----------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ @@ -179,8 +177,7 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results = new ADAPT_RESULTS(); Results->Initialize(); - if (AdaptedTemplates == NULL) - AdaptedTemplates = NewAdaptedTemplates (true); + ASSERT_HOST(AdaptedTemplates != NULL); DoAdaptiveMatch(Blob, Results); if (CPResults != NULL) @@ -207,7 +204,6 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, DebugAdaptiveClassifier(Blob, Results); #endif - NumClassesOutput += Choices->length(); delete Results; } /* AdaptiveClassifier */ @@ -249,7 +245,6 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) { if (!EnableLearning || word->best_choice == NULL) return; // Can't or won't adapt. - NumWordsAdaptedTo++; if (classify_learning_debug_level >= 1) tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().string()); @@ -480,15 +475,11 @@ void Classify::EndAdaptiveClassifier() { FreeNormProtos(); if (AllProtosOn != NULL) { FreeBitVector(AllProtosOn); - FreeBitVector(PrunedProtos); FreeBitVector(AllConfigsOn); - FreeBitVector(AllProtosOff); FreeBitVector(AllConfigsOff); FreeBitVector(TempProtoMask); AllProtosOn = NULL; - PrunedProtos = NULL; AllConfigsOn = NULL; - AllProtosOff = NULL; AllConfigsOff = NULL; TempProtoMask = NULL; } @@ -561,19 +552,15 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) { static_classifier_ = new TessClassifier(false, this); } - im_.Init(&classify_debug_level, classify_integer_matcher_multiplier); + im_.Init(&classify_debug_level); InitIntegerFX(); AllProtosOn = NewBitVector(MAX_NUM_PROTOS); - PrunedProtos = NewBitVector(MAX_NUM_PROTOS); AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS); - AllProtosOff = NewBitVector(MAX_NUM_PROTOS); AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS); TempProtoMask = NewBitVector(MAX_NUM_PROTOS); set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS)); - set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS)); set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS)); - zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS)); zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS)); for (int i = 0; i < MAX_NUM_CLASSES; i++) { @@ -617,53 +604,11 @@ void Classify::ResetAdaptiveClassifierInternal() { NumAdaptationsFailed); } free_adapted_templates(AdaptedTemplates); - AdaptedTemplates = NULL; + AdaptedTemplates = NewAdaptedTemplates(true); NumAdaptationsFailed = 0; } -/*---------------------------------------------------------------------------*/ -/** - * Print to File the statistics which have - * been gathered for the adaptive matcher. - * - * @param File open text file to print adaptive statistics to - * - * Globals: none - * - * @note Exceptions: none - * @note History: Thu Apr 18 14:37:37 1991, DSJ, Created. - */ -void Classify::PrintAdaptiveStatistics(FILE *File) { - #ifndef SECURE_NAMES - - fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n"); - fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls); - fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n", - NumClassesOutput, - ((AdaptiveMatcherCalls == 0) ? (0.0) : - ((float) NumClassesOutput / AdaptiveMatcherCalls))); - fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n", - BaselineClassifierCalls, - ((BaselineClassifierCalls == 0) ? (0.0) : - ((float) NumBaselineClassesTried / BaselineClassifierCalls))); - fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n", - CharNormClassifierCalls, - ((CharNormClassifierCalls == 0) ? (0.0) : - ((float) NumCharNormClassesTried / CharNormClassifierCalls))); - fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n", - AmbigClassifierCalls, - ((AmbigClassifierCalls == 0) ? (0.0) : - ((float) NumAmbigClassesTried / AmbigClassifierCalls))); - - fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n"); - fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo); - fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo); - - PrintAdaptedTemplates(File, AdaptedTemplates); - #endif -} /* PrintAdaptiveStatistics */ - /*---------------------------------------------------------------------------*/ /** @@ -915,8 +860,6 @@ void Classify::AdaptToChar(TBLOB *Blob, FEATURE_SET FloatFeatures; int NewTempConfigId; - ResetFeaturesHaveBeenExtracted(); - NumCharsAdaptedTo++; if (!LegalClassId (ClassId)) return; @@ -932,7 +875,6 @@ void Classify::AdaptToChar(TBLOB *Blob, if (NumFeatures <= 0) return; - im_.SetBaseLineMatch(); // Only match configs with the matching font. BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS); for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) { @@ -1004,17 +946,16 @@ void Classify::AdaptToChar(TBLOB *Blob, void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { #ifndef GRAPHICS_DISABLED - int bloblength = 0; - INT_FEATURE_ARRAY features; - uinT8* norm_array = new uinT8[unicharset.size()]; - int num_features = GetBaselineFeatures(blob, PreTrainedTemplates, - features, - norm_array, &bloblength); - delete [] norm_array; - INT_RESULT_STRUCT IntResult; + INT_FX_RESULT_STRUCT fx_info; + GenericVector bl_features; + TrainingSample* sample = + BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info, + &bl_features); + if (sample == NULL) return; + INT_RESULT_STRUCT IntResult; im_.Match(int_class, AllProtosOn, AllConfigsOn, - num_features, features, + bl_features.size(), &bl_features[0], &IntResult, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); cprintf ("Best match to temp config %d = %4.1f%%.\n", @@ -1024,7 +965,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { ConfigMask = 1 << IntResult.Config; ShowMatchDisplay(); im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask, - num_features, features, + bl_features.size(), &bl_features[0], &IntResult, classify_adapt_feature_threshold, 6 | 0x19, matcher_debug_separate_windows); UpdateMatchDisplay(); @@ -1033,50 +974,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { } -/*---------------------------------------------------------------------------*/ -/** - * @param Blob blob to add to templates for ClassId - * @param ClassId class to add blob to - * @param FontinfoId font information from pre-trained teamples - * @param Threshold minimum match rating to existing template - * - * Globals: - * - PreTrainedTemplates current set of built-in templates - * - * @note Exceptions: none - * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created. - */ -void Classify::AdaptToPunc(TBLOB *Blob, - CLASS_ID ClassId, - int FontinfoId, - FLOAT32 Threshold) { - ADAPT_RESULTS *Results = new ADAPT_RESULTS(); - int i; - - Results->Initialize(); - CharNormClassifier(Blob, PreTrainedTemplates, Results); - RemoveBadMatches(Results); - - if (Results->NumMatches != 1) { - if (classify_learning_debug_level >= 1) { - cprintf ("Rejecting punc = %s (Alternatives = ", - unicharset.id_to_unichar(ClassId)); - - for (i = 0; i < Results->NumMatches; i++) - tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id)); - tprintf(")\n"); - } - } else { - #ifndef SECURE_NAMES - if (classify_learning_debug_level >= 1) - cprintf ("Adapting to punc = %s, thr= %g\n", - unicharset.id_to_unichar(ClassId), Threshold); - #endif - AdaptToChar(Blob, ClassId, FontinfoId, Threshold); - } - delete Results; -} /* AdaptToPunc */ - /*---------------------------------------------------------------------------*/ /** @@ -1167,50 +1064,41 @@ void Classify::AddNewResult(ADAPT_RESULTS *results, * @note Exceptions: none * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created. */ -void Classify::AmbigClassifier(TBLOB *Blob, - INT_TEMPLATES Templates, - ADAPT_CLASS *Classes, - UNICHAR_ID *Ambiguities, - ADAPT_RESULTS *Results) { - int NumFeatures; - INT_FEATURE_ARRAY IntFeatures; +void Classify::AmbigClassifier( + const GenericVector& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + const TBLOB *blob, + INT_TEMPLATES templates, + ADAPT_CLASS *classes, + UNICHAR_ID *ambiguities, + ADAPT_RESULTS *results) { + if (int_features.empty()) return; uinT8* CharNormArray = new uinT8[unicharset.size()]; INT_RESULT_STRUCT IntResult; - CLASS_ID ClassId; - - AmbigClassifierCalls++; - - NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures, - NULL, CharNormArray, - &(Results->BlobLength)); - if (NumFeatures <= 0) { - delete [] CharNormArray; - return; - } + results->BlobLength = GetCharNormFeature(fx_info, templates, NULL, + CharNormArray); bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; if (debug) tprintf("AM Matches = "); - int top = Blob->bounding_box().top(); - int bottom = Blob->bounding_box().bottom(); - while (*Ambiguities >= 0) { - ClassId = *Ambiguities; + int top = blob->bounding_box().top(); + int bottom = blob->bounding_box().bottom(); + while (*ambiguities >= 0) { + CLASS_ID class_id = *ambiguities; - im_.SetCharNormMatch(classify_integer_matcher_multiplier); - im_.Match(ClassForClassId(Templates, ClassId), + im_.Match(ClassForClassId(templates, class_id), AllProtosOn, AllConfigsOn, - NumFeatures, IntFeatures, + int_features.size(), &int_features[0], &IntResult, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); - ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0, - Results->BlobLength, CharNormArray, - IntResult, Results); - Ambiguities++; - - NumAmbigClassesTried++; + ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0, + results->BlobLength, + classify_integer_matcher_multiplier, + CharNormArray, IntResult, results); + ambiguities++; } delete [] CharNormArray; } /* AmbigClassifier */ @@ -1225,6 +1113,7 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, ADAPT_CLASS* classes, int debug, int num_classes, + int matcher_multiplier, const TBOX& blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS* final_results) { @@ -1246,7 +1135,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, bool debug = matcher_debug_level >= 2 || classify_debug_level > 1; ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top, results[c].Rating, - final_results->BlobLength, norm_factors, + final_results->BlobLength, + matcher_multiplier, norm_factors, int_result, final_results); } } @@ -1258,7 +1148,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, // The results are added to the final_results output. void Classify::ExpandShapesAndApplyCorrections( ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top, - float cp_rating, int blob_length, const uinT8* cn_factors, + float cp_rating, int blob_length, int matcher_multiplier, + const uinT8* cn_factors, INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) { // Compute the fontinfo_ids. int fontinfo_id = kBlankFontinfoId; @@ -1292,7 +1183,7 @@ void Classify::ExpandShapesAndApplyCorrections( int_result.Rating, int_result.FeatureMisses, bottom, top, blob_length, - cn_factors); + matcher_multiplier, cn_factors); if (c == 0 || rating < min_rating) min_rating = rating; if (unicharset.get_enabled(unichar_id)) { @@ -1309,7 +1200,7 @@ void Classify::ExpandShapesAndApplyCorrections( int_result.Rating, int_result.FeatureMisses, bottom, top, blob_length, - cn_factors); + matcher_multiplier, cn_factors); if (unicharset.get_enabled(class_id)) { AddNewResult(final_results, class_id, -1, rating, classes != NULL, int_result.Config, @@ -1325,11 +1216,12 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, - int blob_length, + int blob_length, int matcher_multiplier, const uinT8* cn_factors) { // Compute class feature corrections. double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length, - cn_factors[unichar_id]); + cn_factors[unichar_id], + matcher_multiplier); double miss_penalty = tessedit_class_miss_scale * feature_misses; double vertical_penalty = 0.0; // Penalize non-alnums for being vertical misfits. @@ -1383,39 +1275,30 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id, * @note Exceptions: none * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created. */ -UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, - ADAPT_TEMPLATES Templates, - ADAPT_RESULTS *Results) { - int NumFeatures; +UNICHAR_ID *Classify::BaselineClassifier( + TBLOB *Blob, const GenericVector& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { + if (int_features.empty()) return NULL; int NumClasses; - INT_FEATURE_ARRAY IntFeatures; uinT8* CharNormArray = new uinT8[unicharset.size()]; - CLASS_ID ClassId; + ClearCharNormArray(CharNormArray); - BaselineClassifierCalls++; - - NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures, - CharNormArray, &Results->BlobLength); - if (NumFeatures <= 0) { - delete [] CharNormArray; - return NULL; - } - - NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures, + Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength); + NumClasses = PruneClasses(Templates->Templates, int_features.size(), + &int_features[0], CharNormArray, BaselineCutoffs, Results->CPResults); - NumBaselineClassesTried += NumClasses; - if (matcher_debug_level >= 2 || classify_debug_level > 1) cprintf ("BL Matches = "); - im_.SetBaseLineMatch(); - MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray, - Templates->Class, matcher_debug_flags, NumClasses, + MasterMatcher(Templates->Templates, int_features.size(), &int_features[0], + CharNormArray, + Templates->Class, matcher_debug_flags, NumClasses, 0, Blob->bounding_box(), Results->CPResults, Results); delete [] CharNormArray; - ClassId = Results->best_match.unichar_id; + CLASS_ID ClassId = Results->best_match.unichar_id; if (ClassId == NO_CLASS) return (NULL); /* this is a bug - maybe should return "" */ @@ -1445,17 +1328,13 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created. */ int Classify::CharNormClassifier(TBLOB *blob, - INT_TEMPLATES Templates, + const TrainingSample& sample, ADAPT_RESULTS *adapt_results) { - CharNormClassifierCalls++; - TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC, - classify_nonlinear_norm); - if (sample == NULL) return 0; // This is the length that is used for scaling ratings vs certainty. adapt_results->BlobLength = - IntCastRounded(sample->outline_length() / kStandardFeatureLength); + IntCastRounded(sample.outline_length() / kStandardFeatureLength); GenericVector unichar_results; - static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0, + static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, -1, &unichar_results); // Convert results to the format used internally by AdaptiveClassifier. for (int r = 0; r < unichar_results.size(); ++r) { @@ -1468,9 +1347,7 @@ int Classify::CharNormClassifier(TBLOB *blob, float rating = 1.0f - unichar_results[r].rating; AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2); } - int num_features = sample->num_features(); - delete sample; - return num_features; + return sample.num_features(); } /* CharNormClassifier */ // As CharNormClassifier, but operates on a TrainingSample and outputs to @@ -1518,10 +1395,10 @@ int Classify::CharNormTrainingSample(bool pruner_only, UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating)); } } else { - im_.SetCharNormMatch(classify_integer_matcher_multiplier); MasterMatcher(PreTrainedTemplates, num_features, sample.features(), char_norm_array, NULL, matcher_debug_flags, num_classes, + classify_integer_matcher_multiplier, blob_box, adapt_results->CPResults, adapt_results); // Convert master matcher results to output format. for (int i = 0; i < adapt_results->NumMatches; i++) { @@ -1711,8 +1588,10 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob, if (i == 0 || Results->match[i].rating < Results->best_match.rating) Results->best_match = Results->match[i]; } - TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC, - classify_nonlinear_norm); + INT_FX_RESULT_STRUCT fx_info; + GenericVector bl_features; + TrainingSample* sample = + BlobToTrainingSample(*blob, false, &fx_info, &bl_features); if (sample == NULL) return; static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), Results->best_match.unichar_id); @@ -1745,21 +1624,26 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob, void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { UNICHAR_ID *Ambiguities; - AdaptiveMatcherCalls++; - InitIntFX(); + INT_FX_RESULT_STRUCT fx_info; + GenericVector bl_features; + TrainingSample* sample = + BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, + &bl_features); + if (sample == NULL) return; if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || tess_cn_matching) { - CharNormClassifier(Blob, PreTrainedTemplates, Results); + CharNormClassifier(Blob, *sample, Results); } else { - Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results); + Ambiguities = BaselineClassifier(Blob, bl_features, fx_info, + AdaptedTemplates, Results); if ((Results->NumMatches > 0 && MarginalMatch (Results->best_match.rating) && !tess_bn_matching) || Results->NumMatches == 0) { - CharNormClassifier(Blob, PreTrainedTemplates, Results); + CharNormClassifier(Blob, *sample, Results); } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { - AmbigClassifier(Blob, + AmbigClassifier(bl_features, fx_info, Blob, PreTrainedTemplates, AdaptedTemplates->Class, Ambiguities, @@ -1773,6 +1657,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { // just adding a NULL classification. if (!Results->HasNonfragment || Results->NumMatches == 0) ClassifyAsNoise(Results); + delete sample; } /* DoAdaptiveMatch */ /*---------------------------------------------------------------------------*/ @@ -1799,8 +1684,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, int i; Results->Initialize(); + INT_FX_RESULT_STRUCT fx_info; + GenericVector bl_features; + TrainingSample* sample = + BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info, + &bl_features); + if (sample == NULL) return NULL; - CharNormClassifier(Blob, PreTrainedTemplates, Results); + CharNormClassifier(Blob, *sample, Results); + delete sample; RemoveBadMatches(Results); qsort((void *)Results->match, Results->NumMatches, sizeof(ScoredClass), CompareByRating); @@ -1823,58 +1715,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, return Ambiguities; } /* GetAmbiguities */ -/*---------------------------------------------------------------------------*/ -/** - * This routine calls the integer (Hardware) feature - * extractor if it has not been called before for this blob. - * The results from the feature extractor are placed into - * globals so that they can be used in other routines without - * re-extracting the features. - * It then copies the baseline features into the IntFeatures - * array provided by the caller. - * - * @param Blob blob to extract features from - * @param Templates used to compute char norm adjustments - * @param IntFeatures array to fill with integer features - * @param CharNormArray array to fill with dummy char norm adjustments - * @param BlobLength length of blob in baseline-normalized units - * - * Globals: - * - FeaturesHaveBeenExtracted TRUE if fx has been done - * - BaselineFeatures holds extracted baseline feat - * - CharNormFeatures holds extracted char norm feat - * - FXInfo holds misc. FX info - * - * @return Number of features extracted or 0 if an error occured. - * @note Exceptions: none - * @note History: Tue May 28 10:40:52 1991, DSJ, Created. - */ -int Classify::GetBaselineFeatures(TBLOB *Blob, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - uinT8* CharNormArray, - inT32 *BlobLength) { - if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm, - BaselineFeatures, CharNormFeatures, &FXInfo); - FeaturesHaveBeenExtracted = TRUE; - } - - *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength); - if (!FeaturesOK) { - return 0; - } - - memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0])); - - ClearCharNormArray(CharNormArray); - return FXInfo.NumBL; -} /* GetBaselineFeatures */ - -void Classify::ResetFeaturesHaveBeenExtracted() { - FeaturesHaveBeenExtracted = FALSE; -} - // Returns true if the given blob looks too dissimilar to any character // present in the classifier templates. bool Classify::LooksLikeGarbage(TBLOB *blob) { @@ -1921,48 +1761,28 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) { * @param BlobLength length of blob in baseline-normalized units * * Globals: - * - FeaturesHaveBeenExtracted TRUE if fx has been done - * - BaselineFeatures holds extracted baseline feat - * - CharNormFeatures holds extracted char norm feat - * - FXInfo holds misc. FX info * * @return Number of features extracted or 0 if an error occured. * @note Exceptions: none * @note History: Tue May 28 10:40:52 1991, DSJ, Created. */ -int Classify::GetCharNormFeatures(TBLOB *Blob, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - uinT8* PrunerNormArray, - uinT8* CharNormArray, - inT32 *BlobLength) { - FEATURE NormFeature; - FLOAT32 Baseline, Scale; - - if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm, - BaselineFeatures, CharNormFeatures, &FXInfo); - FeaturesHaveBeenExtracted = TRUE; - } - - *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength); - if (!FeaturesOK) { - return 0; - } - - memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0])); - - NormFeature = NewFeature(&CharNormDesc); - Baseline = kBlnBaselineOffset; - Scale = MF_SCALE_FACTOR; - NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale; - NormFeature->Params[CharNormLength] = - FXInfo.Length * Scale / LENGTH_COMPRESSION; - NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale; - NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale; - ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray); - return FXInfo.NumCN; -} /* GetCharNormFeatures */ +int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, + INT_TEMPLATES templates, + uinT8* pruner_norm_array, + uinT8* char_norm_array) { + FEATURE norm_feature = NewFeature(&CharNormDesc); + float baseline = kBlnBaselineOffset; + float scale = MF_SCALE_FACTOR; + norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale; + norm_feature->Params[CharNormLength] = + fx_info.Length * scale / LENGTH_COMPRESSION; + norm_feature->Params[CharNormRx] = fx_info.Rx * scale; + norm_feature->Params[CharNormRy] = fx_info.Ry * scale; + // Deletes norm_feature. + ComputeCharNormArrays(norm_feature, templates, char_norm_array, + pruner_norm_array); + return IntCastRounded(fx_info.Length / kStandardFeatureLength); +} /* GetCharNormFeature */ // Computes the char_norm_array for the unicharset and, if not NULL, the // pruner_array as appropriate according to the existence of the shape_table. @@ -2454,7 +2274,6 @@ void Classify::ShowBestMatchFor(int shape_id, } INT_RESULT_STRUCT cn_result; classify_norm_method.set_value(character); - im_.SetCharNormMatch(classify_integer_matcher_multiplier); im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), AllProtosOn, AllConfigsOn, num_features, features, &cn_result, diff --git a/classify/classify.cpp b/classify/classify.cpp index 1eca2e9c2..166680d6e 100644 --- a/classify/classify.cpp +++ b/classify/classify.cpp @@ -165,27 +165,13 @@ Classify::Classify() AdaptedTemplates = NULL; PreTrainedTemplates = NULL; AllProtosOn = NULL; - PrunedProtos = NULL; AllConfigsOn = NULL; - AllProtosOff = NULL; AllConfigsOff = NULL; TempProtoMask = NULL; NormProtos = NULL; - AdaptiveMatcherCalls = 0; - BaselineClassifierCalls = 0; - CharNormClassifierCalls = 0; - AmbigClassifierCalls = 0; - NumWordsAdaptedTo = 0; - NumCharsAdaptedTo = 0; - NumBaselineClassesTried = 0; - NumCharNormClassesTried = 0; - NumAmbigClassesTried = 0; - NumClassesOutput = 0; NumAdaptationsFailed = 0; - FeaturesHaveBeenExtracted = false; - FeaturesOK = true; learn_debug_win_ = NULL; learn_fragmented_word_debug_win_ = NULL; learn_fragments_debug_win_ = NULL; diff --git a/classify/classify.h b/classify/classify.h index 92629da71..54f8b7773 100644 --- a/classify/classify.h +++ b/classify/classify.h @@ -145,15 +145,13 @@ class Classify : public CCStruct { int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates); - void AdaptToPunc(TBLOB *Blob, - CLASS_ID ClassId, - int FontinfoId, - FLOAT32 Threshold); - void AmbigClassifier(TBLOB *Blob, - INT_TEMPLATES Templates, - ADAPT_CLASS *Classes, - UNICHAR_ID *Ambiguities, - ADAPT_RESULTS *Results); + void AmbigClassifier(const GenericVector& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + const TBLOB *blob, + INT_TEMPLATES templates, + ADAPT_CLASS *classes, + UNICHAR_ID *ambiguities, + ADAPT_RESULTS *results); void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT* features, @@ -161,6 +159,7 @@ class Classify : public CCStruct { ADAPT_CLASS* classes, int debug, int num_classes, + int matcher_multiplier, const TBOX& blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS* final_results); @@ -175,6 +174,7 @@ class Classify : public CCStruct { int bottom, int top, float cp_rating, int blob_length, + int matcher_multiplier, const uinT8* cn_factors, INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results); @@ -184,7 +184,8 @@ class Classify : public CCStruct { double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, - int blob_length, const uinT8* cn_factors); + int blob_length, int matcher_multiplier, + const uinT8* cn_factors); void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices); @@ -246,12 +247,13 @@ class Classify : public CCStruct { // Converts a shape_table_ index to a classifier class_id index (not a // unichar-id!). Uses a search, so not fast. int ShapeIDToClassID(int shape_id) const; - UNICHAR_ID *BaselineClassifier(TBLOB *Blob, - ADAPT_TEMPLATES Templates, - ADAPT_RESULTS *Results); - int CharNormClassifier(TBLOB *Blob, - INT_TEMPLATES Templates, - ADAPT_RESULTS *Results); + UNICHAR_ID *BaselineClassifier( + TBLOB *Blob, const GenericVector& int_features, + const INT_FX_RESULT_STRUCT& fx_info, + ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results); + int CharNormClassifier(TBLOB *blob, + const TrainingSample& sample, + ADAPT_RESULTS *adapt_results); // As CharNormClassifier, but operates on a TrainingSample and outputs to // a GenericVector of ShapeRating without conversion to classes. @@ -267,7 +269,6 @@ class Classify : public CCStruct { void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); bool AdaptableWord(WERD_RES* word); void EndAdaptiveClassifier(); - void PrintAdaptiveStatistics(FILE *File); void SettupPass1(); void SettupPass2(); void AdaptiveClassifier(TBLOB *Blob, @@ -276,17 +277,10 @@ class Classify : public CCStruct { void ClassifyAsNoise(ADAPT_RESULTS *Results); void ResetAdaptiveClassifierInternal(); - int GetBaselineFeatures(TBLOB *Blob, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - uinT8* CharNormArray, - inT32 *BlobLength); - int GetCharNormFeatures(TBLOB *Blob, - INT_TEMPLATES Templates, - INT_FEATURE_ARRAY IntFeatures, - uinT8* PrunerNormArray, - uinT8* CharNormArray, - inT32 *BlobLength); + int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, + INT_TEMPLATES templates, + uinT8* pruner_norm_array, + uinT8* char_norm_array); // Computes the char_norm_array for the unicharset and, if not NULL, the // pruner_array as appropriate according to the existence of the shape_table. // The norm_feature is deleted as it is almost certainly no longer needed. @@ -298,7 +292,6 @@ class Classify : public CCStruct { bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); - void ResetFeaturesHaveBeenExtracted(); bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; } bool LooksLikeGarbage(TBLOB *blob); void RefreshDebugWindow(ScrollView **win, const char *msg, @@ -468,9 +461,7 @@ class Classify : public CCStruct { // Create dummy proto and config masks for use with the built-in templates. BIT_VECTOR AllProtosOn; - BIT_VECTOR PrunedProtos; BIT_VECTOR AllConfigsOn; - BIT_VECTOR AllProtosOff; BIT_VECTOR AllConfigsOff; BIT_VECTOR TempProtoMask; bool EnableLearning; @@ -504,34 +495,13 @@ class Classify : public CCStruct { ShapeTable* shape_table_; private: - Dict dict_; // The currently active static classifier. ShapeClassifier* static_classifier_; /* variables used to hold performance statistics */ - int AdaptiveMatcherCalls; - int BaselineClassifierCalls; - int CharNormClassifierCalls; - int AmbigClassifierCalls; - int NumWordsAdaptedTo; - int NumCharsAdaptedTo; - int NumBaselineClassesTried; - int NumCharNormClassesTried; - int NumAmbigClassesTried; - int NumClassesOutput; int NumAdaptationsFailed; - /* variables used to hold onto extracted features. This is used - to map from the old scheme in which baseline features and char norm - features are extracted separately, to the new scheme in which they - are extracted at the same time. */ - bool FeaturesHaveBeenExtracted; - bool FeaturesOK; - INT_FEATURE_ARRAY BaselineFeatures; - INT_FEATURE_ARRAY CharNormFeatures; - INT_FX_RESULT_STRUCT FXInfo; - // Expected number of features in the class pruner, used to penalize // unknowns that have too few features (like a c being classified as e) so // it doesn't recognize everything as '@' or '#'. diff --git a/classify/intfx.cpp b/classify/intfx.cpp index 63d6ddb1e..496cdad2c 100644 --- a/classify/intfx.cpp +++ b/classify/intfx.cpp @@ -78,31 +78,19 @@ namespace tesseract { // TODO(rays) BlobToTrainingSample must remain a global function until // the FlexFx and FeatureDescription code can be removed and LearnBlob // made a member of Classify. -TrainingSample* BlobToTrainingSample(const TBLOB& blob, - tesseract::NormalizationMode mode, - bool nonlinear_norm) { - INT_FX_RESULT_STRUCT fx_info; - GenericVector bl_features; +TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + GenericVector* bl_features) { GenericVector cn_features; - Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features, - &cn_features, &fx_info, NULL); + Classify::ExtractFeatures(blob, nonlinear_norm, bl_features, + &cn_features, fx_info, NULL); // TODO(rays) Use blob->PreciseBoundingBox() instead. TBOX box = blob.bounding_box(); TrainingSample* sample = NULL; - if (mode == tesseract::NM_CHAR_ANISOTROPIC) { - int num_features = fx_info.NumCN; - if (num_features > 0) { - sample = TrainingSample::CopyFromFeatures(fx_info, box, &cn_features[0], - num_features); - } - } else if (mode == tesseract::NM_BASELINE) { - int num_features = fx_info.NumBL; - if (num_features > 0) { - sample = TrainingSample::CopyFromFeatures(fx_info, box, &bl_features[0], - num_features); - } - } else { - ASSERT_HOST(!"Unsupported normalization mode!"); + int num_features = fx_info->NumCN; + if (num_features > 0) { + sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0], + num_features); } if (sample != NULL) { // Set the bounding box (in original image coordinates) in the sample. diff --git a/classify/intfx.h b/classify/intfx.h index 11a68377c..26c435374 100644 --- a/classify/intfx.h +++ b/classify/intfx.h @@ -60,9 +60,9 @@ namespace tesseract { // TODO(rays) BlobToTrainingSample must remain a global function until // the FlexFx and FeatureDescription code can be removed and LearnBlob // made a member of Classify. - TrainingSample* BlobToTrainingSample(const TBLOB& blob, - tesseract::NormalizationMode mode, - bool nonlinear_norm); + TrainingSample* BlobToTrainingSample( + const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info, + GenericVector* bl_features); } // Deprecated! Prefer tesseract::Classify::ExtractFeatures instead. diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp index ea7eea958..d03a14ace 100644 --- a/classify/intmatcher.cpp +++ b/classify/intmatcher.cpp @@ -693,13 +693,9 @@ int IntegerMatcher::FindBadFeatures( /*---------------------------------------------------------------------------*/ -void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level, - int classify_integer_matcher_multiplier) { +void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { classify_debug_level_ = classify_debug_level; - /* Set default mode of operation of IntegerMatcher */ - SetCharNormMatch(classify_integer_matcher_multiplier); - /* Initialize table for evidence to similarity lookup */ for (int i = 0; i < SE_TABLE_SIZE; i++) { uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS); @@ -724,17 +720,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level, evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); } -/*--------------------------------------------------------------------------*/ -void IntegerMatcher::SetBaseLineMatch() { - local_matcher_multiplier_ = 0; -} - - -/*--------------------------------------------------------------------------*/ -void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) { - local_matcher_multiplier_ = integer_matcher_multiplier; -} - /**---------------------------------------------------------------------------- Private Code @@ -1283,10 +1268,11 @@ int IntegerMatcher::FindBestMatch( // Applies the CN normalization factor to the given rating and returns // the modified rating. float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length, - int normalization_factor) { + int normalization_factor, + int matcher_multiplier) { return (rating * blob_length + - local_matcher_multiplier_ * normalization_factor / 256.0) / - (blob_length + local_matcher_multiplier_); + matcher_multiplier * normalization_factor / 256.0) / + (blob_length + matcher_multiplier); } /*---------------------------------------------------------------------------*/ diff --git a/classify/intmatcher.h b/classify/intmatcher.h index 5598d273a..72003bacb 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -102,11 +102,7 @@ class IntegerMatcher { IntegerMatcher() : classify_debug_level_(0) {} - void Init(tesseract::IntParam *classify_debug_level, - int classify_integer_matcher_multiplier); - - void SetBaseLineMatch(); - void SetCharNormMatch(int integer_matcher_multiplier); + void Init(tesseract::IntParam *classify_debug_level); void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, @@ -121,7 +117,7 @@ class IntegerMatcher { // Applies the CN normalization factor to the given rating and returns // the modified rating. float ApplyCNCorrection(float rating, int blob_length, - int normalization_factor); + int normalization_factor, int matcher_multiplier); int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, @@ -192,7 +188,6 @@ class IntegerMatcher { uinT32 evidence_table_mask_; uinT32 mult_trunc_shift_bits_; uinT32 table_trunc_shift_bits_; - inT16 local_matcher_multiplier_; tesseract::IntParam *classify_debug_level_; uinT32 evidence_mult_mask_; }; diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp index ba19fb1ca..aff0c24b1 100644 --- a/classify/picofeat.cpp +++ b/classify/picofeat.cpp @@ -235,8 +235,11 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm, ** Exceptions: none ** History: 8/8/2011, rays, Created. */ - tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( - *blob, tesseract::NM_CHAR_ANISOTROPIC, false); + INT_FX_RESULT_STRUCT local_fx_info(fx_info); + GenericVector bl_features; + tesseract::TrainingSample* sample = + tesseract::BlobToTrainingSample(*blob, false, &local_fx_info, + &bl_features); if (sample == NULL) return NULL; int num_features = sample->num_features(); @@ -267,8 +270,11 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm, ** Exceptions: none ** History: 8/8/2011, rays, Created. */ - tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( - *blob, tesseract::NM_CHAR_ANISOTROPIC, false); + INT_FX_RESULT_STRUCT local_fx_info(fx_info); + GenericVector bl_features; + tesseract::TrainingSample* sample = + tesseract::BlobToTrainingSample(*blob, false, &local_fx_info, + &bl_features); if (sample == NULL) return NULL; FEATURE_SET feature_set = NewFeatureSet(1); diff --git a/dict/dict.cpp b/dict/dict.cpp index a21da7967..cccc2d076 100644 --- a/dict/dict.cpp +++ b/dict/dict.cpp @@ -119,6 +119,9 @@ Dict::Dict(Image* image_ptr) "Make AcceptableChoice() always return false. Useful" " when there is a need to explore all segmentations", getImage()->getCCUtil()->params()), + BOOL_MEMBER(save_raw_choices, false, + "Deprecated- backward compatablity only", + getImage()->getCCUtil()->params()), INT_MEMBER(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list", getImage()->getCCUtil()->params()), @@ -689,7 +692,7 @@ void Dict::adjust_word(WERD_CHOICE *word, int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const { const WERD_CHOICE *word_ptr = &word; WERD_CHOICE temp_word(word.unicharset()); - if (hyphenated()) { + if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) { copy_hyphen_info(&temp_word); temp_word += word; word_ptr = &temp_word; diff --git a/dict/dict.h b/dict/dict.h index 213b2cab2..9e067973a 100644 --- a/dict/dict.h +++ b/dict/dict.h @@ -613,6 +613,8 @@ class Dict { BOOL_VAR_H(stopper_no_acceptable_choices, false, "Make AcceptableChoice() always return false. Useful" " when there is a need to explore all segmentations"); + BOOL_VAR_H(save_raw_choices, false, + "Deprecated- backward compatability only"); INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list"); STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information" " should be printed to stdout"); diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp index f7603f6a0..624e79f74 100644 --- a/wordrec/chopper.cpp +++ b/wordrec/chopper.cpp @@ -440,16 +440,32 @@ namespace tesseract { * enough. The results are returned in the WERD_RES. */ void Wordrec::chop_word_main(WERD_RES *word) { - // Initial clean up. - word->ClearRatings(); int num_blobs = word->chopped_word->NumBlobs(); - word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); - // Run initial classification. - for (int b = 0; b < num_blobs; ++b) { - BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, - "Initial:", word->chopped_word, - word->blamer_bundle); - word->ratings->put(b, b, choices); + if (word->ratings == NULL) { + word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks); + } + if (word->ratings->get(0, 0) == NULL) { + // Run initial classification. + for (int b = 0; b < num_blobs; ++b) { + BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b, + "Initial:", word->chopped_word, + word->blamer_bundle); + word->ratings->put(b, b, choices); + } + } else { + // Blobs have been pre-classified. Set matrix cell for all blob choices + for (int col = 0; col < word->ratings->dimension(); ++col) { + for (int row = col; row < word->ratings->dimension() && + row < col + word->ratings->bandwidth(); ++row) { + BLOB_CHOICE_LIST* choices = word->ratings->get(col, row); + if (choices != NULL) { + BLOB_CHOICE_IT bc_it(choices); + for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { + bc_it.data()->set_matrix_cell(col, row); + } + } + } + } } // Run Segmentation Search.