From 99edf4ccbd97b1b925a8481f8974b9d4a7fa9615 Mon Sep 17 00:00:00 2001 From: "theraysmith@gmail.com" Date: Mon, 23 Sep 2013 15:15:06 +0000 Subject: [PATCH] Refactored classifier to make it easier to add new ones and generalized feature extractor to allow fx from grey git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@873 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- classify/Makefile.am | 6 +- classify/adaptmatch.cpp | 616 ++++++------------ classify/baseline.h | 41 -- classify/blobclass.cpp | 15 +- classify/blobclass.h | 9 +- classify/classify.cpp | 66 +- classify/classify.h | 134 ++-- classify/cluster.cpp | 34 +- classify/errorcounter.cpp | 335 ++++++---- classify/errorcounter.h | 91 ++- classify/extract.cpp | 6 +- classify/extract.h | 6 +- classify/featdefs.cpp | 2 +- classify/flexfx.cpp | 14 +- classify/flexfx.h | 5 +- classify/intfeaturespace.cpp | 17 +- classify/intfx.cpp | 1090 +++++++++++++------------------- classify/intfx.h | 51 +- classify/intmatcher.h | 2 +- classify/intproto.cpp | 17 + classify/intproto.h | 12 +- classify/mastertrainer.cpp | 84 +-- classify/mastertrainer.h | 26 +- classify/mf.cpp | 7 +- classify/mf.h | 4 +- classify/mfoutline.cpp | 97 +-- classify/mfoutline.h | 15 +- classify/mfx.cpp | 22 +- classify/mfx.h | 4 +- classify/normfeat.cpp | 18 +- classify/normfeat.h | 4 +- classify/normmatch.cpp | 2 +- classify/ocrfeatures.cpp | 2 +- classify/ocrfeatures.h | 4 +- classify/picofeat.cpp | 16 +- classify/picofeat.h | 8 +- classify/shapeclassifier.cpp | 230 +++++++ classify/shapeclassifier.h | 103 +-- classify/shapetable.cpp | 320 +++++++++- classify/shapetable.h | 171 ++++- classify/speckle.cpp | 107 ---- classify/speckle.h | 35 - classify/tessclassifier.cpp | 52 +- classify/tessclassifier.h | 18 +- classify/trainingsample.cpp | 33 +- classify/trainingsample.h | 12 +- classify/trainingsampleset.cpp | 17 +- classify/trainingsampleset.h | 9 +- 48 files changed, 2192 insertions(+), 1797 deletions(-) delete mode 100644 classify/baseline.h create mode 100644 classify/shapeclassifier.cpp delete mode 100644 classify/speckle.cpp delete mode 100644 classify/speckle.h diff --git a/classify/Makefile.am b/classify/Makefile.am index 8b352a99f..1dc99ec8a 100644 --- a/classify/Makefile.am +++ b/classify/Makefile.am @@ -9,7 +9,7 @@ AM_CPPFLAGS += -DTESS_EXPORTS \ endif noinst_HEADERS = \ - adaptive.h baseline.h blobclass.h chartoname.h \ + adaptive.h blobclass.h chartoname.h \ classify.h cluster.h clusttool.h cutoffs.h \ errorcounter.h extern.h extract.h \ featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h \ @@ -19,7 +19,7 @@ noinst_HEADERS = \ normfeat.h normmatch.h \ ocrfeatures.h outfeat.h picofeat.h protos.h \ sampleiterator.h shapeclassifier.h shapetable.h \ - speckle.h tessclassifier.h trainingsample.h trainingsampleset.h xform2d.h + tessclassifier.h trainingsample.h trainingsampleset.h xform2d.h if !USING_MULTIPLELIBS noinst_LTLIBRARIES = libtesseract_classify.la @@ -45,7 +45,7 @@ libtesseract_classify_la_SOURCES = \ mastertrainer.cpp mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \ normfeat.cpp normmatch.cpp \ ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \ - sampleiterator.cpp shapetable.cpp speckle.cpp \ + sampleiterator.cpp shapeclassifier.cpp shapetable.cpp \ tessclassifier.cpp trainingsample.cpp trainingsampleset.cpp xform2d.cpp diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index dcdbfbd20..1ef606e3b 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -31,10 +31,8 @@ #include "outfeat.h" #include "emalloc.h" #include "intfx.h" -#include "speckle.h" #include "efio.h" #include "normmatch.h" -#include "permute.h" #include "ndminx.h" #include "intproto.h" #include "const.h" @@ -167,7 +165,6 @@ namespace tesseract { * @note History: Mon Mar 11 10:00:58 1991, DSJ, Created. * * @param Blob blob to be classified - * @param denorm normalization/denormalization parameters * @param[out] Choices List of choices found by adaptive matcher. * @param[out] CPResults Array of CPResultStruct of size MAX_NUM_CLASSES is * filled on return with the choices found by the @@ -176,7 +173,6 @@ namespace tesseract { * */ void Classify::AdaptiveClassifier(TBLOB *Blob, - const DENORM& denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS CPResults) { assert(Choices != NULL); @@ -185,7 +181,8 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, if (AdaptedTemplates == NULL) AdaptedTemplates = NewAdaptedTemplates (true); - DoAdaptiveMatch(Blob, denorm, Results); + + DoAdaptiveMatch(Blob, Results); if (CPResults != NULL) memcpy(CPResults, Results->CPResults, sizeof(CPResults[0]) * Results->NumMatches); @@ -194,32 +191,23 @@ void Classify::AdaptiveClassifier(TBLOB *Blob, qsort((void *)Results->match, Results->NumMatches, sizeof(ScoredClass), CompareByRating); RemoveExtraPuncs(Results); - ConvertMatchesToChoices(denorm, Blob->bounding_box(), Results, Choices); + ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results, + Choices); if (matcher_debug_level >= 1) { cprintf ("AD Matches = "); PrintAdaptiveMatchResults(stdout, Results); } - if (LargeSpeckle(Blob)) - AddLargeSpeckleTo(Choices); + if (LargeSpeckle(*Blob) || Choices->length() == 0) + AddLargeSpeckleTo(Results->BlobLength, Choices); #ifndef GRAPHICS_DISABLED if (classify_enable_adaptive_debugger) - DebugAdaptiveClassifier(Blob, denorm, Results); + DebugAdaptiveClassifier(Blob, Results); #endif NumClassesOutput += Choices->length(); - if (Choices->length() == 0) { - if (!classify_bln_numeric_mode) - tprintf ("Empty classification!\n"); // Should never normally happen. - Choices = new BLOB_CHOICE_LIST(); - BLOB_CHOICE_IT temp_it; - temp_it.set_to_list(Choices); - temp_it.add_to_end( - new BLOB_CHOICE(0, 50.0f, -20.0f, -1, -1, NULL, 0, 0, false)); - } - delete Results; } /* AdaptiveClassifier */ @@ -251,19 +239,14 @@ void Classify::RefreshDebugWindow(ScrollView **win, const char *msg, // Otherwise AdaptToBlob is called for adaption within a document. // If rejmap is not NULL, then only chars with a rejmap entry of '1' will // be learned, otherwise all chars with good correct_text are learned. -void Classify::LearnWord(const char* filename, const char *rejmap, - WERD_RES *word) { +void Classify::LearnWord(const char* filename, WERD_RES *word) { int word_len = word->correct_text.size(); if (word_len == 0) return; float* thresholds = NULL; if (filename == NULL) { // Adaption mode. - if (!EnableLearning || word->best_choice == NULL || - // If word->best_choice is not recorded at the top of accumulator's - // best choices (which could happen for choices that are - // altered with ReplaceAmbig()) we skip the adaption. - !getDict().CurrentBestChoiceIs(*(word->best_choice))) + if (!EnableLearning || word->best_choice == NULL) return; // Can't or won't adapt. NumWordsAdaptedTo++; @@ -271,11 +254,12 @@ void Classify::LearnWord(const char* filename, const char *rejmap, tprintf("\n\nAdapting to word = %s\n", word->best_choice->debug_string().string()); thresholds = new float[word_len]; - GetAdaptThresholds(word->rebuild_word, word->denorm, *word->best_choice, - *word->raw_choice, thresholds); + word->ComputeAdaptionThresholds(certainty_scale, + matcher_perfect_threshold, + matcher_good_threshold, + matcher_rating_margin, thresholds); } int start_blob = 0; - char prev_map_char = '0'; #ifndef GRAPHICS_DISABLED if (classify_debug_character_fragments) { @@ -295,9 +279,7 @@ void Classify::LearnWord(const char* filename, const char *rejmap, if (classify_debug_character_fragments) { tprintf("\nLearning %s\n", word->correct_text[ch].string()); } - char rej_map_char = rejmap != NULL ? *rejmap++ : '1'; - - if (word->correct_text[ch].length() > 0 && rej_map_char == '1') { + if (word->correct_text[ch].length() > 0) { float threshold = thresholds != NULL ? thresholds[ch] : 0.0f; LearnPieces(filename, start_blob, word->best_state[ch], @@ -308,14 +290,12 @@ void Classify::LearnWord(const char* filename, const char *rejmap, // that each match a whole character with at least // classify_character_fragments_garbage_certainty_threshold bool garbage = false; - TBLOB* frag_blob = word->chopped_word->blobs; - for (int i = 0; i < start_blob; ++i) frag_blob = frag_blob->next; int frag; for (frag = 0; frag < word->best_state[ch]; ++frag) { + TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag]; if (classify_character_fragments_garbage_certainty_threshold < 0) { - garbage |= LooksLikeGarbage(word->denorm, frag_blob); + garbage |= LooksLikeGarbage(frag_blob); } - frag_blob = frag_blob->next; } // Learn the fragments. if (!garbage) { @@ -346,28 +326,22 @@ void Classify::LearnWord(const char* filename, const char *rejmap, // TODO(rays): re-enable this part of the code when we switch to the // new classifier that needs to see examples of garbage. /* - char next_map_char = ch + 1 < word_len - ? (rejmap != NULL ? *rejmap : '1') - : '0'; if (word->best_state[ch] > 1) { // If the next blob is good, make junk with the rightmost fragment. - if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0 && - next_map_char == '1') { + if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { LearnPieces(filename, start_blob + word->best_state[ch] - 1, word->best_state[ch + 1] + 1, threshold, CST_IMPROPER, INVALID_UNICHAR, word); } // If the previous blob is good, make junk with the leftmost fragment. - if (ch > 0 && word->correct_text[ch - 1].length() > 0 && - prev_map_char == '1') { + if (ch > 0 && word->correct_text[ch - 1].length() > 0) { LearnPieces(filename, start_blob - word->best_state[ch - 1], word->best_state[ch - 1] + 1, threshold, CST_IMPROPER, INVALID_UNICHAR, word); } } // If the next blob is good, make a join with it. - if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0 && - next_map_char == '1') { + if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) { STRING joined_text = word->correct_text[ch]; joined_text += word->correct_text[ch + 1]; LearnPieces(filename, start_blob, @@ -377,7 +351,6 @@ void Classify::LearnWord(const char* filename, const char *rejmap, */ } start_blob += word->best_state[ch]; - prev_map_char = rej_map_char; } delete [] thresholds; } // LearnWord. @@ -388,7 +361,7 @@ void Classify::LearnWord(const char* filename, const char *rejmap, // is called and the data will be written to a file for static training. // Otherwise AdaptToBlob is called for adaption within a document. // threshold is a magic number required by AdaptToChar and generated by -// GetAdaptThresholds. +// ComputeAdaptionThresholds. // Although it can be partly inferred from the string, segmentation is // provided to explicitly clarify the character segmentation. void Classify::LearnPieces(const char* filename, int start, int length, @@ -401,15 +374,12 @@ void Classify::LearnPieces(const char* filename, int start, int length, return; if (length > 1) { - join_pieces(word->chopped_word->blobs, word->seam_array, - start, start + length - 1); + join_pieces(word->seam_array, start, start + length - 1, + word->chopped_word); } - TBLOB* blob = word->chopped_word->blobs; - for (int i = 0; i < start; ++i) - blob = blob->next; + TBLOB* blob = word->chopped_word->blobs[start]; // Rotate the blob if needed for classification. - const DENORM* denorm = &word->denorm; - TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded(&denorm); + TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded(); if (rotated_blob == NULL) rotated_blob = blob; @@ -434,8 +404,12 @@ void Classify::LearnPieces(const char* filename, int start, int length, classify_norm_method.set_value(character); // force char norm spc 30/11/93 tess_bn_matching.set_value(false); // turn it off tess_cn_matching.set_value(false); - LearnBlob(feature_defs_, filename, rotated_blob, *denorm, - correct_text); + DENORM bl_denorm, cn_denorm; + INT_FX_RESULT_STRUCT fx_info; + SetupBLCNDenorms(*rotated_blob, classify_nonlinear_norm, + &bl_denorm, &cn_denorm, &fx_info); + LearnBlob(feature_defs_, filename, rotated_blob, bl_denorm, cn_denorm, + fx_info, correct_text); } else if (unicharset.contains_unichar(correct_text)) { UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text); int font_id = word->fontinfo != NULL @@ -446,16 +420,15 @@ void Classify::LearnPieces(const char* filename, int start, int length, unicharset.id_to_unichar(class_id), threshold, font_id); // If filename is not NULL we are doing recognition // (as opposed to training), so we must have already set word fonts. - AdaptToChar(rotated_blob, *denorm, class_id, font_id, threshold); + AdaptToChar(rotated_blob, class_id, font_id, threshold); } else if (classify_debug_level >= 1) { tprintf("Can't adapt to %s not in unicharset\n", correct_text); } if (rotated_blob != blob) { delete rotated_blob; - delete denorm; } - break_pieces(blob, word->seam_array, start, start + length - 1); + break_pieces(word->seam_array, start, start + length - 1, word->chopped_word); } // LearnPieces. /*---------------------------------------------------------------------------*/ @@ -521,6 +494,10 @@ void Classify::EndAdaptiveClassifier() { } delete shape_table_; shape_table_ = NULL; + if (static_classifier_ != NULL) { + delete static_classifier_; + static_classifier_ = NULL; + } } /* EndAdaptiveClassifier */ @@ -581,6 +558,7 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) { ReadNormProtos(tessdata_manager.GetDataFilePtr(), tessdata_manager.GetEndOffset(TESSDATA_NORMPROTO)); if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n"); + static_classifier_ = new TessClassifier(false, this); } im_.Init(&classify_debug_level, classify_integer_matcher_multiplier); @@ -741,7 +719,6 @@ void Classify::SettupPass2() { * config in that class. * * @param Blob blob to model new class after - * @param denorm normalization/denormalization parameters * @param ClassId id of the class to be initialized * @param FontinfoId font information inferred from pre-trained templates * @param Class adapted class to be initialized @@ -756,7 +733,6 @@ void Classify::SettupPass2() { * @note History: Thu Mar 14 12:49:39 1991, DSJ, Created. */ void Classify::InitAdaptedClass(TBLOB *Blob, - const DENORM& denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, @@ -822,7 +798,7 @@ void Classify::InitAdaptedClass(TBLOB *Blob, cprintf ("Added new class '%s' with class id %d and %d protos.\n", unicharset.id_to_unichar(ClassId), ClassId, NumFeatures); if (classify_learning_debug_level > 1) - DisplayAdaptedChar(Blob, denorm, IClass); + DisplayAdaptedChar(Blob, IClass); } if (IsEmptyAdaptedClass(Class)) @@ -885,21 +861,19 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, * * @param Word current word * @param BestChoiceWord best overall choice for word with context - * @param RawChoiceWord best choice for word without context * * @return TRUE or FALSE * @note Exceptions: none * @note History: Thu May 30 14:25:06 1991, DSJ, Created. */ -int Classify::AdaptableWord(TWERD *Word, - const WERD_CHOICE &BestChoiceWord, - const WERD_CHOICE &RawChoiceWord) { - int BestChoiceLength = BestChoiceWord.length(); +bool Classify::AdaptableWord(WERD_RES* word) { + if (word->best_choice == NULL) return false; + int BestChoiceLength = word->best_choice->length(); float adaptable_score = getDict().segment_penalty_dict_case_ok + ADAPTABLE_WERD_ADJUSTMENT; return // rules that apply in general - simplest to compute first BestChoiceLength > 0 && - BestChoiceLength == Word->NumBlobs() && + BestChoiceLength == word->rebuild_word->NumBlobs() && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && // This basically ensures that the word is at least a dictionary match // (freq word, user word, system dawg word, etc). @@ -907,16 +881,14 @@ int Classify::AdaptableWord(TWERD *Word, // than higher than adaptable_score=1.1+0.05=1.15 // Since these are other flags that ensure that the word is dict word, // this check could be at times redundant. - getDict().CurrentBestChoiceAdjustFactor() <= adaptable_score && + word->best_choice->adjust_factor() <= adaptable_score && // Make sure that alternative choices are not dictionary words. - getDict().AlternativeChoicesWorseThan(adaptable_score) && - getDict().CurrentBestChoiceIs(BestChoiceWord); + word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score); } /*---------------------------------------------------------------------------*/ /** * @param Blob blob to add to templates for ClassId - * @param denorm normalization/denormalization parameters * @param ClassId class to add blob to * @param FontinfoId font information from pre-trained templates * @param Threshold minimum match rating to existing template @@ -931,7 +903,6 @@ int Classify::AdaptableWord(TWERD *Word, * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created. */ void Classify::AdaptToChar(TBLOB *Blob, - const DENORM& denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold) { @@ -952,8 +923,7 @@ void Classify::AdaptToChar(TBLOB *Blob, Class = AdaptedTemplates->Class[ClassId]; assert(Class != NULL); if (IsEmptyAdaptedClass(Class)) { - InitAdaptedClass(Blob, denorm, ClassId, FontinfoId, Class, - AdaptedTemplates); + InitAdaptedClass(Blob, ClassId, FontinfoId, Class, AdaptedTemplates); } else { IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId); @@ -999,9 +969,8 @@ void Classify::AdaptToChar(TBLOB *Blob, IntResult.Config, TempConfig->NumTimesSeen); if (TempConfigReliable(ClassId, TempConfig)) { - MakePermanent(AdaptedTemplates, ClassId, IntResult.Config, denorm, - Blob); - UpdateAmbigsGroup(ClassId, denorm, Blob); + MakePermanent(AdaptedTemplates, ClassId, IntResult.Config, Blob); + UpdateAmbigsGroup(ClassId, Blob); } } else { @@ -1009,7 +978,7 @@ void Classify::AdaptToChar(TBLOB *Blob, cprintf ("Found poor match to temp config %d = %4.1f%%.\n", IntResult.Config, (1.0 - IntResult.Rating) * 100.0); if (classify_learning_debug_level > 2) - DisplayAdaptedChar(Blob, denorm, IClass); + DisplayAdaptedChar(Blob, IClass); } NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates, ClassId, @@ -1019,13 +988,13 @@ void Classify::AdaptToChar(TBLOB *Blob, FloatFeatures); if (NewTempConfigId >= 0 && TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) { - MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, denorm, Blob); - UpdateAmbigsGroup(ClassId, denorm, Blob); + MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, Blob); + UpdateAmbigsGroup(ClassId, Blob); } #ifndef GRAPHICS_DISABLED if (classify_learning_debug_level > 1) { - DisplayAdaptedChar(Blob, denorm, IClass); + DisplayAdaptedChar(Blob, IClass); } #endif } @@ -1033,13 +1002,12 @@ void Classify::AdaptToChar(TBLOB *Blob, } } /* AdaptToChar */ -void Classify::DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm, - INT_CLASS_STRUCT* int_class) { +void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { #ifndef GRAPHICS_DISABLED int bloblength = 0; INT_FEATURE_ARRAY features; uinT8* norm_array = new uinT8[unicharset.size()]; - int num_features = GetBaselineFeatures(blob, denorm, PreTrainedTemplates, + int num_features = GetBaselineFeatures(blob, PreTrainedTemplates, features, norm_array, &bloblength); delete [] norm_array; @@ -1068,7 +1036,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm, /*---------------------------------------------------------------------------*/ /** * @param Blob blob to add to templates for ClassId - * @param denorm normalization/denormalization parameters * @param ClassId class to add blob to * @param FontinfoId font information from pre-trained teamples * @param Threshold minimum match rating to existing template @@ -1080,7 +1047,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm, * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created. */ void Classify::AdaptToPunc(TBLOB *Blob, - const DENORM& denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold) { @@ -1088,7 +1054,7 @@ void Classify::AdaptToPunc(TBLOB *Blob, int i; Results->Initialize(); - CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results); + CharNormClassifier(Blob, PreTrainedTemplates, Results); RemoveBadMatches(Results); if (Results->NumMatches != 1) { @@ -1106,7 +1072,7 @@ void Classify::AdaptToPunc(TBLOB *Blob, cprintf ("Adapting to punc = %s, thr= %g\n", unicharset.id_to_unichar(ClassId), Threshold); #endif - AdaptToChar(Blob, denorm, ClassId, FontinfoId, Threshold); + AdaptToChar(Blob, ClassId, FontinfoId, Threshold); } delete Results; } /* AdaptToPunc */ @@ -1193,7 +1159,6 @@ void Classify::AddNewResult(ADAPT_RESULTS *results, * - #AllConfigsOn mask that enables all configs * * @param Blob blob to be classified - * @param denorm normalization/denormalization parameters * @param Templates built-in templates to classify against * @param Classes adapted class templates * @param Ambiguities array of class id's to match against @@ -1203,7 +1168,6 @@ void Classify::AddNewResult(ADAPT_RESULTS *results, * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created. */ void Classify::AmbigClassifier(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, @@ -1216,9 +1180,9 @@ void Classify::AmbigClassifier(TBLOB *Blob, AmbigClassifierCalls++; - NumFeatures = GetCharNormFeatures(Blob, denorm, Templates, IntFeatures, + NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures, NULL, CharNormArray, - &(Results->BlobLength), NULL); + &(Results->BlobLength)); if (NumFeatures <= 0) { delete [] CharNormArray; return; @@ -1412,7 +1376,6 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id, * - BaselineCutoffs expected num features for each class * * @param Blob blob to be classified - * @param denorm normalization/denormalization parameters * @param Templates current set of adapted templates * @param Results place to put match results * @@ -1421,7 +1384,6 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id, * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created. */ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, - const DENORM& denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { int NumFeatures; @@ -1432,9 +1394,8 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, BaselineClassifierCalls++; - NumFeatures = GetBaselineFeatures( - Blob, denorm, Templates->Templates, IntFeatures, CharNormArray, - &(Results->BlobLength)); + NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures, + CharNormArray, &Results->BlobLength); if (NumFeatures <= 0) { delete [] CharNormArray; return NULL; @@ -1472,7 +1433,6 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, * are added to Results. * * @param Blob blob to be classified - * @param denorm normalization/denormalization parameters * @param Templates templates to classify unknown against * @param Results place to put match results * @@ -1484,70 +1444,52 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, * @note Exceptions: none * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created. */ -int Classify::CharNormClassifier(TBLOB *Blob, - const DENORM& denorm, +int Classify::CharNormClassifier(TBLOB *blob, INT_TEMPLATES Templates, - ADAPT_RESULTS *Results) { - int NumFeatures; - int NumClasses; - INT_FEATURE_ARRAY IntFeatures; - + ADAPT_RESULTS *adapt_results) { CharNormClassifierCalls++; - - uinT8* CharNormArray = new uinT8[unicharset.size()]; - int num_pruner_classes = MAX(unicharset.size(), - PreTrainedTemplates->NumClasses); - uinT8* PrunerNormArray = new uinT8[num_pruner_classes]; - NumFeatures = GetCharNormFeatures(Blob, denorm, Templates, IntFeatures, - PrunerNormArray, CharNormArray, - &(Results->BlobLength), NULL); - if (NumFeatures <= 0) { - delete [] CharNormArray; - delete [] PrunerNormArray; - return 0; + TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC, + classify_nonlinear_norm); + if (sample == NULL) return 0; + // This is the length that is used for scaling ratings vs certainty. + adapt_results->BlobLength = + IntCastRounded(sample->outline_length() / kStandardFeatureLength); + GenericVector unichar_results; + static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0, + -1, &unichar_results); + // Convert results to the format used internally by AdaptiveClassifier. + for (int r = 0; r < unichar_results.size(); ++r) { + int unichar_id = unichar_results[r].unichar_id; + // Fonts are listed in order of preference. + int font1 = unichar_results[r].fonts.size() >= 1 + ? unichar_results[r].fonts[0] : kBlankFontinfoId; + int font2 = unichar_results[r].fonts.size() >= 2 + ? unichar_results[r].fonts[1] : kBlankFontinfoId; + float rating = 1.0f - unichar_results[r].rating; + AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2); } - - NumClasses = PruneClasses(Templates, NumFeatures, IntFeatures, - PrunerNormArray, - shape_table_ != NULL ? &shapetable_cutoffs_[0] - : CharNormCutoffs, - Results->CPResults); - - if (tessedit_single_match && NumClasses > 1) - NumClasses = 1; - NumCharNormClassesTried += NumClasses; - - im_.SetCharNormMatch(classify_integer_matcher_multiplier); - MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray, - NULL, matcher_debug_flags, NumClasses, - Blob->bounding_box(), Results->CPResults, Results); - delete [] CharNormArray; - delete [] PrunerNormArray; - return NumFeatures; + int num_features = sample->num_features(); + delete sample; + return num_features; } /* CharNormClassifier */ // As CharNormClassifier, but operates on a TrainingSample and outputs to // a GenericVector of ShapeRating without conversion to classes. int Classify::CharNormTrainingSample(bool pruner_only, + int keep_this, const TrainingSample& sample, - GenericVector* results) { + GenericVector* results) { results->clear(); ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS(); adapt_results->Initialize(); // Compute the bounding box of the features. int num_features = sample.num_features(); - TBOX blob_box; - for (int f = 0; f < num_features; ++f) { - const INT_FEATURE_STRUCT feature = sample.features()[f]; - TBOX fbox(feature.X, feature.Y, feature.X, feature.Y); - blob_box += fbox; - } + // Only the top and bottom of the blob_box are used by MasterMatcher, so + // fabricate right and left using top and bottom. + TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom), + sample.geo_feature(GeoTop), sample.geo_feature(GeoTop)); // Compute the char_norm_array from the saved cn_feature. - FEATURE norm_feature = NewFeature(&CharNormDesc); - norm_feature->Params[CharNormY] = sample.cn_feature(CharNormY); - norm_feature->Params[CharNormLength] = sample.cn_feature(CharNormLength); - norm_feature->Params[CharNormRx] = sample.cn_feature(CharNormRx); - norm_feature->Params[CharNormRy] = sample.cn_feature(CharNormRy); + FEATURE norm_feature = sample.GetCNFeature(); uinT8* char_norm_array = new uinT8[unicharset.size()]; int num_pruner_classes = MAX(unicharset.size(), PreTrainedTemplates->NumClasses); @@ -1564,19 +1506,16 @@ int Classify::CharNormTrainingSample(bool pruner_only, : CharNormCutoffs, adapt_results->CPResults); delete [] pruner_norm_array; + if (keep_this >= 0) { + num_classes = 1; + adapt_results->CPResults[0].Class = keep_this; + } if (pruner_only) { // Convert pruner results to output format. for (int i = 0; i < num_classes; ++i) { int class_id = adapt_results->CPResults[i].Class; - int shape_id = class_id; - if (shape_table_ != NULL) { - // All shapes in a class have the same combination of unichars, so - // it doesn't really matter which config we give it, as we aren't - // trying to get the font here. - shape_id = ClassAndConfigIDToFontOrShapeID(class_id, 0); - } results->push_back( - ShapeRating(shape_id, 1.0f - adapt_results->CPResults[i].Rating)); + UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating)); } } else { im_.SetCharNormMatch(classify_integer_matcher_multiplier); @@ -1587,9 +1526,15 @@ int Classify::CharNormTrainingSample(bool pruner_only, // Convert master matcher results to output format. for (int i = 0; i < adapt_results->NumMatches; i++) { ScoredClass next = adapt_results->match[i]; - results->push_back(ShapeRating(next.shape_id, 1.0f - next.rating)); + UnicharRating rating(next.unichar_id, 1.0f - next.rating); + if (next.fontinfo_id >= 0) { + rating.fonts.push_back(next.fontinfo_id); + if (next.fontinfo_id2 >= 0) + rating.fonts.push_back(next.fontinfo_id2); + } + results->push_back(rating); } - results->sort(&ShapeRating::SortDescendingRating); + results->sort(&UnicharRating::SortDescendingRating); } delete [] char_norm_array; delete adapt_results; @@ -1694,6 +1639,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, max_matches = MAX_MATCHES; } + float best_certainty = -MAX_FLOAT32; for (int i = 0; i < Results->NumMatches; i++) { ScoredClass next = Results->match[i]; int fontinfo_id = next.fontinfo_id; @@ -1717,13 +1663,27 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, Rating *= rating_scale * Results->BlobLength; Certainty *= -(getDict().certainty_scale); } - inT16 min_xheight, max_xheight; + // Adapted results, by their very nature, should have good certainty. + // Those that don't are at best misleading, and often lead to errors, + // so don't accept adapted results that are too far behind the best result, + // whether adapted or static. + // TODO(rays) find some way of automatically tuning these constants. + if (Certainty > best_certainty) { + best_certainty = MIN(Certainty, classify_adapted_pruning_threshold); + } else if (adapted && + Certainty / classify_adapted_pruning_factor < best_certainty) { + continue; // Don't accept bad adapted results. + } + + float min_xheight, max_xheight, yshift; denorm.XHeightRange(next.unichar_id, unicharset, box, - &min_xheight, &max_xheight); + &min_xheight, &max_xheight, &yshift); temp_it.add_to_end(new BLOB_CHOICE(next.unichar_id, Rating, Certainty, fontinfo_id, fontinfo_id2, unicharset.get_script(next.unichar_id), - min_xheight, max_xheight, adapted)); + min_xheight, max_xheight, yshift, + adapted ? BCC_ADAPTED_CLASSIFIER + : BCC_STATIC_CLASSIFIER)); contains_nonfrag |= !current_is_frag; // update contains_nonfrag choices_length++; if (choices_length >= max_matches) break; @@ -1737,7 +1697,6 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, /** * * @param Blob blob whose classification is being debugged - * @param denorm normalization/denormalization parameters * @param Results results of match being debugged * * Globals: none @@ -1745,39 +1704,18 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box, * @note Exceptions: none * @note History: Wed Mar 13 16:44:41 1991, DSJ, Created. */ -void Classify::DebugAdaptiveClassifier(TBLOB *Blob, - const DENORM& denorm, +void Classify::DebugAdaptiveClassifier(TBLOB *blob, ADAPT_RESULTS *Results) { + if (static_classifier_ == NULL) return; for (int i = 0; i < Results->NumMatches; i++) { - if (Results->match[i].rating < Results->best_match.rating) + if (i == 0 || Results->match[i].rating < Results->best_match.rating) Results->best_match = Results->match[i]; } - const char *Prompt = - "Left-click in IntegerMatch Window to continue or right click to debug..."; - CLASS_ID unichar_id = Results->best_match.unichar_id; - int shape_id = Results->best_match.shape_id; - bool adaptive_on = true; - bool pretrained_on = true; - - const char* debug_mode; - do { - if (!pretrained_on) - debug_mode = "Adaptive Templates Only"; - else if (!adaptive_on) - debug_mode = "PreTrained Templates Only"; - else - debug_mode = "All Templates"; - ShowMatchDisplay(); - tprintf("Debugging class %d = %s in mode %s ...", - unichar_id, unicharset.id_to_unichar(unichar_id), debug_mode); - if (shape_id >= 0 && shape_table_ != NULL) { - tprintf(" from shape %s\n", shape_table_->DebugStr(shape_id).string()); - } - ShowBestMatchFor(Blob, denorm, unichar_id, shape_id, adaptive_on, - pretrained_on, Results); - UpdateMatchDisplay(); - } while ((unichar_id = GetClassToDebug(Prompt, &adaptive_on, - &pretrained_on, &shape_id)) != 0); + TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC, + classify_nonlinear_norm); + if (sample == NULL) return; + static_classifier_->DebugDisplay(*sample, blob->denorm().pix(), + Results->best_match.unichar_id); } /* DebugAdaptiveClassifier */ #endif @@ -1794,7 +1732,6 @@ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, * of these classifications are merged together into Results. * * @param Blob blob to be classified - * @param denorm normalization/denormalization parameters * @param Results place to put match results * * Globals: @@ -1805,9 +1742,7 @@ void Classify::DebugAdaptiveClassifier(TBLOB *Blob, * @note Exceptions: none * @note History: Tue Mar 12 08:50:11 1991, DSJ, Created. */ -void Classify::DoAdaptiveMatch(TBLOB *Blob, - const DENORM& denorm, - ADAPT_RESULTS *Results) { +void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) { UNICHAR_ID *Ambiguities; AdaptiveMatcherCalls++; @@ -1815,16 +1750,16 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min || tess_cn_matching) { - CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results); + CharNormClassifier(Blob, PreTrainedTemplates, Results); } else { - Ambiguities = BaselineClassifier(Blob, denorm, AdaptedTemplates, Results); + Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results); if ((Results->NumMatches > 0 && MarginalMatch (Results->best_match.rating) && !tess_bn_matching) || Results->NumMatches == 0) { - CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results); + CharNormClassifier(Blob, PreTrainedTemplates, Results); } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { - AmbigClassifier(Blob, denorm, + AmbigClassifier(Blob, PreTrainedTemplates, AdaptedTemplates->Class, Ambiguities, @@ -1840,43 +1775,6 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ClassifyAsNoise(Results); } /* DoAdaptiveMatch */ -/*---------------------------------------------------------------------------*/ -/** - * This routine tries to estimate how tight the adaptation - * threshold should be set for each character in the current - * word. In general, the routine tries to set tighter - * thresholds for a character when the current set of templates - * would have made an error on that character. It tries - * to set a threshold tight enough to eliminate the error. - * Two different sets of rules can be used to determine the - * desired thresholds. - * - * @param Word current word - * @param denorm normalization/denormalization parameters - * @param BestChoice best choice for current word with context - * @param BestRawChoice best choice for current word without context - * @param[out] Thresholds array of thresholds to be filled in - * - * Globals: - * - matcher_good_threshold - * - matcher_perfect_threshold - * - matcher_rating_margin - * - * @return none (results are returned in Thresholds) - * @note Exceptions: none - * @note History: Fri May 31 09:22:08 1991, DSJ, Created. - */ -void Classify::GetAdaptThresholds(TWERD * Word, - const DENORM& denorm, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - FLOAT32 Thresholds[]) { - getDict().FindClassifierErrors(matcher_perfect_threshold, - matcher_good_threshold, - matcher_rating_margin, - Thresholds); -} /* GetAdaptThresholds */ - /*---------------------------------------------------------------------------*/ /** * This routine matches blob to the built-in templates @@ -1884,7 +1782,6 @@ void Classify::GetAdaptThresholds(TWERD * Word, * class which are potential ambiguities. * * @param Blob blob to get classification ambiguities for - * @param denorm normalization/denormalization parameters * @param CorrectClass correct class for Blob * * Globals: @@ -1896,7 +1793,6 @@ void Classify::GetAdaptThresholds(TWERD * Word, * @note History: Fri Mar 15 08:08:22 1991, DSJ, Created. */ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, - const DENORM& denorm, CLASS_ID CorrectClass) { ADAPT_RESULTS *Results = new ADAPT_RESULTS(); UNICHAR_ID *Ambiguities; @@ -1904,7 +1800,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, Results->Initialize(); - CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results); + CharNormClassifier(Blob, PreTrainedTemplates, Results); RemoveBadMatches(Results); qsort((void *)Results->match, Results->NumMatches, sizeof(ScoredClass), CompareByRating); @@ -1938,7 +1834,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, * array provided by the caller. * * @param Blob blob to extract features from - * @param denorm normalization/denormalization parameters * @param Templates used to compute char norm adjustments * @param IntFeatures array to fill with integer features * @param CharNormArray array to fill with dummy char norm adjustments @@ -1955,30 +1850,24 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, * @note History: Tue May 28 10:40:52 1991, DSJ, Created. */ int Classify::GetBaselineFeatures(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8* CharNormArray, inT32 *BlobLength) { - register INT_FEATURE Src, Dest, End; - if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat(Blob, denorm, BaselineFeatures, - CharNormFeatures, &FXInfo, NULL); + FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm, + BaselineFeatures, CharNormFeatures, &FXInfo); FeaturesHaveBeenExtracted = TRUE; } + *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength); if (!FeaturesOK) { - *BlobLength = FXInfo.NumBL; return 0; } - for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures; - Src < End; - *Dest++ = *Src++); + memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0])); ClearCharNormArray(CharNormArray); - *BlobLength = FXInfo.NumBL; return FXInfo.NumBL; } /* GetBaselineFeatures */ @@ -1988,9 +1877,9 @@ void Classify::ResetFeaturesHaveBeenExtracted() { // Returns true if the given blob looks too dissimilar to any character // present in the classifier templates. -bool Classify::LooksLikeGarbage(const DENORM& denorm, TBLOB *blob) { +bool Classify::LooksLikeGarbage(TBLOB *blob) { BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); - AdaptiveClassifier(blob, denorm, ratings, NULL); + AdaptiveClassifier(blob, ratings, NULL); BLOB_CHOICE_IT ratings_it(ratings); const UNICHARSET &unicharset = getDict().getUnicharset(); if (classify_debug_character_fragments) { @@ -2002,9 +1891,10 @@ bool Classify::LooksLikeGarbage(const DENORM& denorm, TBLOB *blob) { if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) { continue; } + float certainty = ratings_it.data()->certainty(); delete ratings; - return (ratings_it.data()->certainty() < - classify_character_fragments_garbage_certainty_threshold); + return certainty < + classify_character_fragments_garbage_certainty_threshold; } delete ratings; return true; // no whole characters in ratings @@ -2023,14 +1913,12 @@ bool Classify::LooksLikeGarbage(const DENORM& denorm, TBLOB *blob) { * array provided by the caller. * * @param Blob blob to extract features from - * @param denorm normalization/denormalization parameters * @param Templates used to compute char norm adjustments * @param IntFeatures array to fill with integer features * @param PrunerNormArray Array of factors from blob normalization * process * @param CharNormArray array to fill with dummy char norm adjustments * @param BlobLength length of blob in baseline-normalized units - * @param FeatureOutlineArray * * Globals: * - FeaturesHaveBeenExtracted TRUE if fx has been done @@ -2043,39 +1931,29 @@ bool Classify::LooksLikeGarbage(const DENORM& denorm, TBLOB *blob) { * @note History: Tue May 28 10:40:52 1991, DSJ, Created. */ int Classify::GetCharNormFeatures(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8* PrunerNormArray, uinT8* CharNormArray, - inT32 *BlobLength, - inT32 *FeatureOutlineArray) { - register INT_FEATURE Src, Dest, End; + inT32 *BlobLength) { FEATURE NormFeature; FLOAT32 Baseline, Scale; - inT32 FeatureOutlineIndex[MAX_NUM_INT_FEATURES]; if (!FeaturesHaveBeenExtracted) { - FeaturesOK = ExtractIntFeat(Blob, denorm, BaselineFeatures, - CharNormFeatures, &FXInfo, - FeatureOutlineIndex); + FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm, + BaselineFeatures, CharNormFeatures, &FXInfo); FeaturesHaveBeenExtracted = TRUE; } + *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength); if (!FeaturesOK) { - *BlobLength = FXInfo.NumBL; - return (0); + return 0; } - for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures; - Src < End; - *Dest++ = *Src++); - for (int i = 0; FeatureOutlineArray && i < FXInfo.NumCN; ++i) { - FeatureOutlineArray[i] = FeatureOutlineIndex[i]; - } + memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0])); NormFeature = NewFeature(&CharNormDesc); - Baseline = BASELINE_OFFSET; + Baseline = kBlnBaselineOffset; Scale = MF_SCALE_FACTOR; NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale; NormFeature->Params[CharNormLength] = @@ -2083,8 +1961,7 @@ int Classify::GetCharNormFeatures(TBLOB *Blob, NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale; NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale; ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray); - *BlobLength = FXInfo.NumBL; - return (FXInfo.NumCN); + return FXInfo.NumCN; } /* GetCharNormFeatures */ // Computes the char_norm_array for the unicharset and, if not NULL, the @@ -2312,7 +2189,6 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, * @param Templates current set of adaptive templates * @param ClassId class containing config to be made permanent * @param ConfigId config to be made permanent - * @param denorm normalization/denormalization parameters * @param Blob current blob being adapted to * * Globals: none @@ -2323,7 +2199,6 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, void Classify::MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, - const DENORM& denorm, TBLOB *Blob) { UNICHAR_ID *Ambigs; TEMP_CONFIG Config; @@ -2339,7 +2214,7 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates, Class->NumPermConfigs++; // Initialize permanent config. - Ambigs = GetAmbiguities(Blob, denorm, ClassId); + Ambigs = GetAmbiguities(Blob, ClassId); PERM_CONFIG Perm = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT"); Perm->Ambigs = Ambigs; @@ -2555,164 +2430,48 @@ void Classify::SetAdaptiveThreshold(FLOAT32 Threshold) { /*---------------------------------------------------------------------------*/ /** - * This routine compares Blob to both sets of templates - * (adaptive and pre-trained) and then displays debug - * information for the config which matched best. + * This routine displays debug information for the best config + * of the given shape_id for the given set of features. * - * @param Blob blob to show best matching config for - * @param denorm normalization/denormalization parameters - * @param ClassId class whose configs are to be searched - * @param shape_id shape index - * @param AdaptiveOn TRUE if adaptive configs are enabled - * @param PreTrainedOn TRUE if pretrained configs are enabled - * @param Results results of match being debugged - * - * Globals: - * - PreTrainedTemplates built-in training - * - AdaptedTemplates adaptive templates - * - AllProtosOn dummy proto mask - * - AllConfigsOn dummy config mask + * @param shape_id classifier id to work with + * @param features features of the unknown character + * @param num_features Number of features in the features array. * * @note Exceptions: none * @note History: Fri Mar 22 08:43:52 1991, DSJ, Created. */ -void Classify::ShowBestMatchFor(TBLOB *Blob, - const DENORM& denorm, - CLASS_ID ClassId, - int shape_id, - BOOL8 AdaptiveOn, - BOOL8 PreTrainedOn, - ADAPT_RESULTS *Results) { - int NumCNFeatures = 0, NumBLFeatures = 0; - INT_FEATURE_ARRAY CNFeatures, BLFeatures; - INT_RESULT_STRUCT CNResult, BLResult; - inT32 BlobLength; - uinT32 ConfigMask; - static int next_config = -1; - - if (PreTrainedOn) next_config = -1; - - CNResult.Rating = BLResult.Rating = 2.0; - - if (!LegalClassId (ClassId)) { - cprintf ("%d is not a legal class id!!\n", ClassId); +void Classify::ShowBestMatchFor(int shape_id, + const INT_FEATURE_STRUCT* features, + int num_features) { + uinT32 config_mask; + if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) { + tprintf("No built-in templates for class/shape %d\n", shape_id); return; } - - uinT8 *CNAdjust = new uinT8[MAX_NUM_CLASSES]; - uinT8 *BLAdjust = new uinT8[MAX_NUM_CLASSES]; - - if (shape_table_ == NULL) - shape_id = ClassId; - else - shape_id = ShapeIDToClassID(shape_id); - if (PreTrainedOn && shape_id >= 0) { - if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) { - tprintf("No built-in templates for class/shape %d\n", shape_id); - } else { - NumCNFeatures = GetCharNormFeatures(Blob, denorm, PreTrainedTemplates, - CNFeatures, NULL, CNAdjust, - &BlobLength, NULL); - if (NumCNFeatures <= 0) { - tprintf("Illegal blob (char norm features)!\n"); - } else { - im_.SetCharNormMatch(classify_integer_matcher_multiplier); - im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), - AllProtosOn, AllConfigsOn, - NumCNFeatures, CNFeatures, - &CNResult, - classify_adapt_feature_threshold, NO_DEBUG, - matcher_debug_separate_windows); - ExpandShapesAndApplyCorrections(NULL, false, shape_id, - Blob->bounding_box().bottom(), - Blob->bounding_box().top(), - 0, BlobLength, CNAdjust, - CNResult, Results); - } - } + if (num_features <= 0) { + tprintf("Illegal blob (char norm features)!\n"); + return; } - - if (AdaptiveOn) { - if (ClassId < 0 || ClassId >= AdaptedTemplates->Templates->NumClasses) { - tprintf("Invalid adapted class id: %d\n", ClassId); - } else if (UnusedClassIdIn(AdaptedTemplates->Templates, ClassId) || - AdaptedTemplates->Class[ClassId] == NULL || - IsEmptyAdaptedClass(AdaptedTemplates->Class[ClassId])) { - tprintf("No AD templates for class %d = %s\n", - ClassId, unicharset.id_to_unichar(ClassId)); - } else { - NumBLFeatures = GetBaselineFeatures(Blob, - denorm, - AdaptedTemplates->Templates, - BLFeatures, BLAdjust, - &BlobLength); - if (NumBLFeatures <= 0) - tprintf("Illegal blob (baseline features)!\n"); - else { - im_.SetBaseLineMatch(); - im_.Match(ClassForClassId(AdaptedTemplates->Templates, ClassId), - AllProtosOn, AllConfigsOn, - NumBLFeatures, BLFeatures, - &BLResult, - classify_adapt_feature_threshold, NO_DEBUG, - matcher_debug_separate_windows); - ExpandShapesAndApplyCorrections( - AdaptedTemplates->Class, false, - ClassId, Blob->bounding_box().bottom(), - Blob->bounding_box().top(), 0, BlobLength, CNAdjust, - BLResult, Results); - } - } - } - + INT_RESULT_STRUCT cn_result; + classify_norm_method.set_value(character); + im_.SetCharNormMatch(classify_integer_matcher_multiplier); + im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), + AllProtosOn, AllConfigsOn, + num_features, features, &cn_result, + classify_adapt_feature_threshold, NO_DEBUG, + matcher_debug_separate_windows); tprintf("\n"); - if (BLResult.Rating < CNResult.Rating) { - if (next_config < 0) { - ConfigMask = 1 << BLResult.Config; - next_config = 0; - } else { - ConfigMask = 1 << next_config; - ++next_config; - } - classify_norm_method.set_value(baseline); + config_mask = 1 << cn_result.Config; - im_.SetBaseLineMatch(); - tprintf("Adaptive Class ID: %d\n", ClassId); - im_.Match(ClassForClassId(AdaptedTemplates->Templates, ClassId), - AllProtosOn, (BIT_VECTOR) &ConfigMask, - NumBLFeatures, BLFeatures, - &BLResult, - classify_adapt_feature_threshold, - matcher_debug_flags, - matcher_debug_separate_windows); - ExpandShapesAndApplyCorrections( - AdaptedTemplates->Class, true, - ClassId, Blob->bounding_box().bottom(), - Blob->bounding_box().top(), 0, BlobLength, CNAdjust, - BLResult, Results); - } else if (shape_id >= 0) { - ConfigMask = 1 << CNResult.Config; - classify_norm_method.set_value(character); - - tprintf("Static Shape ID: %d\n", shape_id); - im_.SetCharNormMatch(classify_integer_matcher_multiplier); - im_.Match(ClassForClassId (PreTrainedTemplates, shape_id), - AllProtosOn, (BIT_VECTOR) & ConfigMask, - NumCNFeatures, CNFeatures, - &CNResult, - classify_adapt_feature_threshold, - matcher_debug_flags, - matcher_debug_separate_windows); - ExpandShapesAndApplyCorrections(NULL, true, shape_id, - Blob->bounding_box().bottom(), - Blob->bounding_box().top(), - 0, BlobLength, CNAdjust, - CNResult, Results); - } - - // Clean up. - delete[] CNAdjust; - delete[] BLAdjust; + tprintf("Static Shape ID: %d\n", shape_id); + ShowMatchDisplay(); + im_.Match(ClassForClassId(PreTrainedTemplates, shape_id), + AllProtosOn, reinterpret_cast(&config_mask), + num_features, features, &cn_result, + classify_adapt_feature_threshold, + matcher_debug_flags, + matcher_debug_separate_windows); + UpdateMatchDisplay(); } /* ShowBestMatchFor */ // Returns a string for the classifier class_id: either the corresponding @@ -2796,8 +2555,7 @@ bool Classify::TempConfigReliable(CLASS_ID class_id, return true; } -void Classify::UpdateAmbigsGroup(CLASS_ID class_id, const DENORM& denorm, - TBLOB *Blob) { +void Classify::UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob) { const UnicharIdVector *ambigs = getDict().getUnicharAmbigs().ReverseAmbigsForAdaption(class_id); int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size(); @@ -2818,7 +2576,7 @@ void Classify::UpdateAmbigsGroup(CLASS_ID class_id, const DENORM& denorm, getDict().getUnicharset().debug_str( ambig_class_id).string()); } - MakePermanent(AdaptedTemplates, ambig_class_id, cfg, denorm, Blob); + MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob); } } } diff --git a/classify/baseline.h b/classify/baseline.h deleted file mode 100644 index e4addca8b..000000000 --- a/classify/baseline.h +++ /dev/null @@ -1,41 +0,0 @@ -/* -*-C-*- - ******************************************************************************** - * - * File: baseline.h (Formerly baseline.h) - * Description: - * Author: Mark Seaman, SW Productivity - * Created: Fri Oct 16 14:37:00 1987 - * Modified: Wed Feb 27 13:39:35 1991 (Mark Seaman) marks@hpgrlt - * Language: C - * Package: N/A - * Status: Reusable Software Component - * - * (c) Copyright 1987, Hewlett-Packard Company. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - *************************************************************************/ -#ifndef BASELINE_H -#define BASELINE_H - -/*---------------------------------------------------------------------- - I n c l u d e s -----------------------------------------------------------------------*/ -#include "host.h" -#include "blobs.h" -#include "params.h" - -/*---------------------------------------------------------------------- - T y p e s -----------------------------------------------------------------------*/ -#define BASELINE_OFFSET 64 -#define BASELINE_SCALE 128 - -#endif diff --git a/classify/blobclass.cpp b/classify/blobclass.cpp index 93ea3fc03..cac3b409f 100644 --- a/classify/blobclass.cpp +++ b/classify/blobclass.cpp @@ -49,8 +49,11 @@ extern char imagefile[]; ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ +// As all TBLOBs, Blob is in baseline normalized coords. +// See SetupBLCNDenorms in intfx.cpp for other args. void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename, - TBLOB * Blob, const DENORM& denorm, const char* BlobText) { + TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, const char* BlobText) { /* ** Parameters: ** Blob blob whose micro-features are to be learned @@ -95,18 +98,20 @@ void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename, cprintf("TRAINING ... Font name = %s\n", CurrFontName.string()); } - LearnBlob(FeatureDefs, FeatureFile, Blob, denorm, BlobText, - CurrFontName.string()); + LearnBlob(FeatureDefs, FeatureFile, Blob, bl_denorm, cn_denorm, fx_info, + BlobText, CurrFontName.string()); } // LearnBlob void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile, - TBLOB* Blob, const DENORM& denorm, + TBLOB* Blob, const DENORM& bl_denorm, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, const char* BlobText, const char* FontName) { CHAR_DESC CharDesc; ASSERT_HOST(FeatureFile != NULL); - CharDesc = ExtractBlobFeatures(FeatureDefs, denorm, Blob); + CharDesc = ExtractBlobFeatures(FeatureDefs, bl_denorm, cn_denorm, fx_info, + Blob); if (CharDesc == NULL) { cprintf("LearnBLob: CharDesc was NULL. Aborting.\n"); return; diff --git a/classify/blobclass.h b/classify/blobclass.h index 57d27a0da..95510a2f9 100644 --- a/classify/blobclass.h +++ b/classify/blobclass.h @@ -40,11 +40,14 @@ Public Function Prototypes ----------------------------------------------------------------------------**/ void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename, - TBLOB * Blob, const DENORM& denorm, const char* BlobText); + TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, + const char* BlobText); void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* File, TBLOB* Blob, - const DENORM& denorm, const char* BlobText, - const char* FontName); + const DENORM& bl_denorm, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, + const char* BlobText, const char* FontName); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/classify/classify.cpp b/classify/classify.cpp index b88c1510f..1eca2e9c2 100644 --- a/classify/classify.cpp +++ b/classify/classify.cpp @@ -26,6 +26,7 @@ #include "intproto.h" #include "mfoutline.h" #include "scrollview.h" +#include "shapeclassifier.h" #include "shapetable.h" #include "unicity_table.h" #include @@ -52,6 +53,11 @@ Classify::Classify() this->params()), /* PREV DEFAULT 0.1 */ double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...", this->params()), /* PREV DEFAULT 0.3 */ + double_MEMBER(classify_max_rating_ratio, 1.5, + "Veto ratio between classifier ratings", this->params()), + double_MEMBER(classify_max_certainty_margin, 5.5, + "Veto difference between classifier certainties", + this->params()), BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching", this->params()), BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching", @@ -65,6 +71,8 @@ Classify::Classify() "Save adapted templates to a file", this->params()), BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger", this->params()), + BOOL_MEMBER(classify_nonlinear_norm, 0, + "Non-linear stroke-density normalization", this->params()), INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()), INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()), INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ", @@ -100,6 +108,12 @@ Classify::Classify() this->params()), double_MEMBER(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used", this->params()), + double_MEMBER(classify_adapted_pruning_factor, 2.5, + "Prune poor adapted results this much worse than best result", + this->params()), + double_MEMBER(classify_adapted_pruning_threshold, -1.0, + "Threshold at which classify_adapted_pruning_factor starts", + this->params()), INT_MEMBER(classify_adapt_proto_threshold, 230, "Threshold for good protos during adaptive 0-255", this->params()), @@ -122,19 +136,24 @@ Classify::Classify() this->params()), INT_MEMBER(classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255", this->params()), - INT_MEMBER(classify_class_pruner_multiplier, 30, + INT_MEMBER(classify_class_pruner_multiplier, 15, "Class Pruner Multiplier 0-255: ", this->params()), INT_MEMBER(classify_cp_cutoff_strength, 7, "Class Pruner CutoffStrength: ", this->params()), - INT_MEMBER(classify_integer_matcher_multiplier, 14, + INT_MEMBER(classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: ", this->params()), EnableLearning(true), INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word", this->params()), BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].", this->params()), + double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", + this->params()), + double_MEMBER(speckle_rating_penalty, 10.0, + "Penalty to add to worst rating for noise", this->params()), shape_table_(NULL), - dict_(&image_) { + dict_(&image_), + static_classifier_(NULL) { fontinfo_table_.set_compare_callback( NewPermanentTessCallback(CompareFontInfo)); fontinfo_table_.set_clear_callback( @@ -184,4 +203,45 @@ Classify::~Classify() { delete[] BaselineCutoffs; } + +// Takes ownership of the given classifier, and uses it for future calls +// to CharNormClassifier. +void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) { + delete static_classifier_; + static_classifier_ = static_classifier; +} + +// Moved from speckle.cpp +// Adds a noise classification result that is a bit worse than the worst +// current result, or the worst possible result if no current results. +void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) { + BLOB_CHOICE_IT bc_it(choices); + // If there is no classifier result, we will use the worst possible certainty + // and corresponding rating. + float certainty = -getDict().certainty_scale; + float rating = rating_scale * blob_length; + if (!choices->empty() && blob_length > 0) { + bc_it.move_to_last(); + BLOB_CHOICE* worst_choice = bc_it.data(); + // Add speckle_rating_penalty to worst rating, matching old value. + rating = worst_choice->rating() + speckle_rating_penalty; + // Compute the rating to correspond to the certainty. (Used to be kept + // the same, but that messes up the language model search.) + certainty = -rating * getDict().certainty_scale / + (rating_scale * blob_length); + } + BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty, + -1, -1, 0, 0, MAX_FLOAT32, 0, + BCC_SPECKLE_CLASSIFIER); + bc_it.add_to_end(blob_choice); +} + +// Returns true if the blob is small enough to be a large speckle. +bool Classify::LargeSpeckle(const TBLOB &blob) { + double speckle_size = kBlnXHeight * speckle_large_max_size; + TBOX bbox = blob.bounding_box(); + return bbox.width() < speckle_size && bbox.height() < speckle_size; +} + + } // namespace tesseract diff --git a/classify/classify.h b/classify/classify.h index abdceef2d..92629da71 100644 --- a/classify/classify.h +++ b/classify/classify.h @@ -43,8 +43,10 @@ static const int kBlankFontinfoId = -2; namespace tesseract { +class ShapeClassifier; struct ShapeRating; class ShapeTable; +struct UnicharRating; // How segmented is a blob. In this enum, character refers to a classifiable // unit, but that is too long and character is usually easier to understand. @@ -67,6 +69,17 @@ class Classify : public CCStruct { return shape_table_; } + // Takes ownership of the given classifier, and uses it for future calls + // to CharNormClassifier. + void SetStaticClassifier(ShapeClassifier* static_classifier); + + // Adds a noise classification result that is a bit worse than the worst + // current result, or the worst possible result if no current results. + void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices); + + // Returns true if the blob is small enough to be a large speckle. + bool LargeSpeckle(const TBLOB &blob); + /* adaptive.cpp ************************************************************/ ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId); @@ -112,9 +125,7 @@ class Classify : public CCStruct { // incorrectly segmented blobs. If filename is not NULL, then LearnBlob // is called and the data will be written to a file for static training. // Otherwise AdaptToBlob is called for adaption within a document. - // If rejmap is not NULL, then only chars with a rejmap entry of '1' will - // be learned, otherwise all chars with good correct_text are learned. - void LearnWord(const char* filename, const char *rejmap, WERD_RES *word); + void LearnWord(const char* filename, WERD_RES *word); // Builds a blob of length fragments, from the word, starting at start, // and then learn it, as having the given correct_text. @@ -130,18 +141,15 @@ class Classify : public CCStruct { const char* correct_text, WERD_RES *word); void InitAdaptiveClassifier(bool load_pre_trained_templates); void InitAdaptedClass(TBLOB *Blob, - const DENORM& denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates); void AdaptToPunc(TBLOB *Blob, - const DENORM& denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold); void AmbigClassifier(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, @@ -194,15 +202,8 @@ class Classify : public CCStruct { #ifndef GRAPHICS_DISABLED void DebugAdaptiveClassifier(TBLOB *Blob, - const DENORM& denorm, ADAPT_RESULTS *Results); #endif - void GetAdaptThresholds (TWERD * Word, - const DENORM& denorm, - const WERD_CHOICE& BestChoice, - const WERD_CHOICE& BestRawChoice, - FLOAT32 Thresholds[]); - PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], @@ -218,19 +219,14 @@ class Classify : public CCStruct { void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, - const DENORM& denorm, TBLOB *Blob); void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results); void RemoveExtraPuncs(ADAPT_RESULTS *Results); void RemoveBadMatches(ADAPT_RESULTS *Results); void SetAdaptiveThreshold(FLOAT32 Threshold); - void ShowBestMatchFor(TBLOB *Blob, - const DENORM& denorm, - CLASS_ID ClassId, - int shape_id, - BOOL8 AdaptiveOn, - BOOL8 PreTrainedOn, - ADAPT_RESULTS *Results); + void ShowBestMatchFor(int shape_id, + const INT_FEATURE_STRUCT* features, + int num_features); // Returns a string for the classifier class_id: either the corresponding // unicharset debug_str or the shape_table_ debug str. STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates, @@ -251,59 +247,46 @@ class Classify : public CCStruct { // unichar-id!). Uses a search, so not fast. int ShapeIDToClassID(int shape_id) const; UNICHAR_ID *BaselineClassifier(TBLOB *Blob, - const DENORM& denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results); int CharNormClassifier(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results); // As CharNormClassifier, but operates on a TrainingSample and outputs to // a GenericVector of ShapeRating without conversion to classes. - int CharNormTrainingSample(bool pruner_only, const TrainingSample& sample, - GenericVector* results); - UNICHAR_ID *GetAmbiguities(TBLOB *Blob, - const DENORM& denorm, - CLASS_ID CorrectClass); - void DoAdaptiveMatch(TBLOB *Blob, - const DENORM& denorm, - ADAPT_RESULTS *Results); + int CharNormTrainingSample(bool pruner_only, int keep_this, + const TrainingSample& sample, + GenericVector* results); + UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass); + void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results); void AdaptToChar(TBLOB *Blob, - const DENORM& denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold); - void DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm, - INT_CLASS_STRUCT* int_class); - int AdaptableWord(TWERD *Word, - const WERD_CHOICE &BestChoiceWord, - const WERD_CHOICE &RawChoiceWord); + void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); + bool AdaptableWord(WERD_RES* word); void EndAdaptiveClassifier(); void PrintAdaptiveStatistics(FILE *File); void SettupPass1(); void SettupPass2(); void AdaptiveClassifier(TBLOB *Blob, - const DENORM& denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results); void ClassifyAsNoise(ADAPT_RESULTS *Results); void ResetAdaptiveClassifierInternal(); int GetBaselineFeatures(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8* CharNormArray, inT32 *BlobLength); int GetCharNormFeatures(TBLOB *Blob, - const DENORM& denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8* PrunerNormArray, uinT8* CharNormArray, - inT32 *BlobLength, - inT32 *FeatureOutlineIndex); + inT32 *BlobLength); // Computes the char_norm_array for the unicharset and, if not NULL, the // pruner_array as appropriate according to the existence of the shape_table. // The norm_feature is deleted as it is almost certainly no longer needed. @@ -313,13 +296,54 @@ class Classify : public CCStruct { uinT8* pruner_array); bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); - void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM& denorm, TBLOB *Blob); + void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); void ResetFeaturesHaveBeenExtracted(); bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; } - bool LooksLikeGarbage(const DENORM& denorm, TBLOB *blob); + bool LooksLikeGarbage(TBLOB *blob); void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox); + // intfx.cpp + // Computes the DENORMS for bl(baseline) and cn(character) normalization + // during feature extraction. The input denorm describes the current state + // of the blob, which is usually a baseline-normalized word. + // The Transforms setup are as follows: + // Baseline Normalized (bl) Output: + // We center the grapheme by aligning the x-coordinate of its centroid with + // x=128 and leaving the already-baseline-normalized y as-is. + // + // Character Normalized (cn) Output: + // We align the grapheme's centroid at the origin and scale it + // asymmetrically in x and y so that the 2nd moments are a standard value + // (51.2) ie the result is vaguely square. + // If classify_nonlinear_norm is true: + // A non-linear normalization is setup that attempts to evenly distribute + // edges across x and y. + // + // Some of the fields of fx_info are also setup: + // Length: Total length of outline. + // Rx: Rounded y second moment. (Reversed by convention.) + // Ry: rounded x second moment. + // Xmean: Rounded x center of mass of the blob. + // Ymean: Rounded y center of mass of the blob. + static void SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, + DENORM* bl_denorm, DENORM* cn_denorm, + INT_FX_RESULT_STRUCT* fx_info); + + // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as + // (x,y) position and angle as measured counterclockwise from the vector + // <-1, 0>, from blob using two normalizations defined by bl_denorm and + // cn_denorm. See SetpuBLCNDenorms for definitions. + // If outline_cn_counts is not NULL, on return it contains the cumulative + // number of cn features generated for each outline in the blob (in order). + // Thus after the first outline, there were (*outline_cn_counts)[0] features, + // after the second outline, there were (*outline_cn_counts)[1] features etc. + static void ExtractFeatures(const TBLOB& blob, + bool nonlinear_norm, + GenericVector* bl_features, + GenericVector* cn_features, + INT_FX_RESULT_STRUCT* results, + GenericVector* outline_cn_counts); /* float2int.cpp ************************************************************/ void ClearCharNormArray(uinT8* char_norm_array); void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature, @@ -336,6 +360,9 @@ class Classify : public CCStruct { UnicityTable& get_fontinfo_table() { return fontinfo_table_; } + const UnicityTable& get_fontinfo_table() const { + return fontinfo_table_; + } UnicityTable& get_fontset_table() { return fontset_table_; } @@ -365,6 +392,10 @@ class Classify : public CCStruct { double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ..."); double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ..."); double_VAR_H(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ..."); + double_VAR_H(classify_max_rating_ratio, 1.5, + "Veto ratio between classifier ratings"); + double_VAR_H(classify_max_certainty_margin, 5.5, + "Veto difference between classifier certainties"); /* adaptmatch.cpp ***********************************************************/ BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching"); @@ -375,6 +406,8 @@ class Classify : public CCStruct { BOOL_VAR_H(classify_save_adapted_templates, 0, "Save adapted templates to a file"); BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger"); + BOOL_VAR_H(classify_nonlinear_norm, 0, + "Non-linear stroke-density normalization"); INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level"); INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags"); INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); @@ -398,6 +431,10 @@ class Classify : public CCStruct { double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor"); double_VAR_H(tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used"); + double_VAR_H(classify_adapted_pruning_factor, 2.5, + "Prune poor adapted results this much worse than best result"); + double_VAR_H(classify_adapted_pruning_threshold, -1.0, + "Threshold at which classify_adapted_pruning_factor starts"); INT_VAR_H(classify_adapt_proto_threshold, 230, "Threshold for good protos during adaptive 0-255"); INT_VAR_H(classify_adapt_feature_threshold, 230, @@ -418,11 +455,11 @@ class Classify : public CCStruct { /* intmatcher.cpp **********************************************************/ INT_VAR_H(classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255"); - INT_VAR_H(classify_class_pruner_multiplier, 30, + INT_VAR_H(classify_class_pruner_multiplier, 15, "Class Pruner Multiplier 0-255: "); INT_VAR_H(classify_cp_cutoff_strength, 7, "Class Pruner CutoffStrength: "); - INT_VAR_H(classify_integer_matcher_multiplier, 14, + INT_VAR_H(classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: "); // Use class variables to hold onto built-in templates and adapted templates. @@ -453,6 +490,9 @@ class Classify : public CCStruct { INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word"); BOOL_VAR_H(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9]."); + double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size"); + double_VAR_H(speckle_rating_penalty, 10.0, + "Penalty to add to worst rating for noise"); protected: IntegerMatcher im_; @@ -466,6 +506,8 @@ class Classify : public CCStruct { private: Dict dict_; + // The currently active static classifier. + ShapeClassifier* static_classifier_; /* variables used to hold performance statistics */ int AdaptiveMatcherCalls; diff --git a/classify/cluster.cpp b/classify/cluster.cpp index 964ab2b7b..6c78c6f14 100644 --- a/classify/cluster.cpp +++ b/classify/cluster.cpp @@ -15,11 +15,12 @@ ** See the License for the specific language governing permissions and ** limitations under the License. ******************************************************************************/ -#include "oldheap.h" #include "const.h" #include "cluster.h" #include "emalloc.h" +#include "genericheap.h" #include "helpers.h" +#include "kdpair.h" #include "matrix.h" #include "tprintf.h" #include "danerror.h" @@ -164,6 +165,9 @@ struct TEMPCLUSTER { CLUSTER *Neighbor; }; +typedef tesseract::KDPairInc ClusterPair; +typedef tesseract::GenericHeap ClusterHeap; + struct STATISTICS { FLOAT32 AvgVariance; FLOAT32 *CoVariance; @@ -190,7 +194,7 @@ struct CHISTRUCT{ // For use with KDWalk / MakePotentialClusters struct ClusteringContext { - HEAP *heap; // heap used to hold temp clusters, "best" on top + ClusterHeap *heap; // heap used to hold temp clusters, "best" on top TEMPCLUSTER *candidates; // array of potential clusters KDTREE *tree; // kd-tree to be searched for neighbors inT32 next; // next candidate to be used @@ -693,7 +697,7 @@ History: 5/29/89, DSJ, Created. ******************************************************************************/ void CreateClusterTree(CLUSTERER *Clusterer) { ClusteringContext context; - HEAPENTRY HeapEntry; + ClusterPair HeapEntry; TEMPCLUSTER *PotentialCluster; // each sample and its nearest neighbor form a "potential" cluster @@ -702,12 +706,12 @@ void CreateClusterTree(CLUSTERER *Clusterer) { context.candidates = (TEMPCLUSTER *) Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER)); context.next = 0; - context.heap = MakeHeap(Clusterer->NumberOfSamples); + context.heap = new ClusterHeap(Clusterer->NumberOfSamples); KDWalk(context.tree, (void_proc)MakePotentialClusters, &context); // form potential clusters into actual clusters - always do "best" first - while (GetTopOfHeap(context.heap, &HeapEntry) != EMPTY) { - PotentialCluster = (TEMPCLUSTER *)HeapEntry.Data; + while (context.heap->Pop(&HeapEntry)) { + PotentialCluster = HeapEntry.data; // if main cluster of potential cluster is already in another cluster // then we don't need to worry about it @@ -720,9 +724,9 @@ void CreateClusterTree(CLUSTERER *Clusterer) { else if (PotentialCluster->Neighbor->Clustered) { PotentialCluster->Neighbor = FindNearestNeighbor(context.tree, PotentialCluster->Cluster, - &HeapEntry.Key); + &HeapEntry.key); if (PotentialCluster->Neighbor != NULL) { - HeapStore(context.heap, &HeapEntry); + context.heap->Push(&HeapEntry); } } @@ -732,9 +736,9 @@ void CreateClusterTree(CLUSTERER *Clusterer) { MakeNewCluster(Clusterer, PotentialCluster); PotentialCluster->Neighbor = FindNearestNeighbor(context.tree, PotentialCluster->Cluster, - &HeapEntry.Key); + &HeapEntry.key); if (PotentialCluster->Neighbor != NULL) { - HeapStore(context.heap, &HeapEntry); + context.heap->Push(&HeapEntry); } } } @@ -745,7 +749,7 @@ void CreateClusterTree(CLUSTERER *Clusterer) { // free up the memory used by the K-D tree, heap, and temp clusters FreeKDTree(context.tree); Clusterer->KDTree = NULL; - FreeHeap(context.heap); + delete context.heap; memfree(context.candidates); } // CreateClusterTree @@ -763,16 +767,16 @@ void CreateClusterTree(CLUSTERER *Clusterer) { ******************************************************************************/ void MakePotentialClusters(ClusteringContext *context, CLUSTER *Cluster, inT32 Level) { - HEAPENTRY HeapEntry; + ClusterPair HeapEntry; int next = context->next; context->candidates[next].Cluster = Cluster; - HeapEntry.Data = (char *) &(context->candidates[next]); + HeapEntry.data = &(context->candidates[next]); context->candidates[next].Neighbor = FindNearestNeighbor(context->tree, context->candidates[next].Cluster, - &HeapEntry.Key); + &HeapEntry.key); if (context->candidates[next].Neighbor != NULL) { - HeapStore(context->heap, &HeapEntry); + context->heap->Push(&HeapEntry); context->next++; } } // MakePotentialClusters diff --git a/classify/errorcounter.cpp b/classify/errorcounter.cpp index 06d973546..706e534a2 100644 --- a/classify/errorcounter.cpp +++ b/classify/errorcounter.cpp @@ -27,6 +27,9 @@ namespace tesseract { +// Difference in result rating to be thought of as an "equal" choice. +const double kRatingEpsilon = 1.0 / 32; + // Tests a classifier, computing its error rate. // See errorcounter.h for description of arguments. // Iterates over the samples, calling the classifier in normal/silent mode. @@ -35,14 +38,12 @@ namespace tesseract { // with a debug flag and a keep_this argument to find out what is going on. double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, int report_level, CountTypes boosting_mode, - const UnicityTable& fontinfo_table, + const FontInfoTable& fontinfo_table, const GenericVector& page_images, SampleIterator* it, double* unichar_error, double* scaled_error, STRING* fonts_report) { - int charsetsize = it->shape_table()->unicharset().size(); - int shapesize = it->CompactCharsetSize(); int fontsize = it->sample_set()->NumFonts(); - ErrorCounter counter(charsetsize, shapesize, fontsize); - GenericVector results; + ErrorCounter counter(classifier->GetUnicharset(), fontsize); + GenericVector results; clock_t start = clock(); int total_samples = 0; @@ -56,21 +57,28 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, Pix* page_pix = 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : NULL; // No debug, no keep this. - classifier->ClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, - &results); - if (mutable_sample->class_id() == 0) { + classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, + INVALID_UNICHAR_ID, &results); + bool debug_it = false; + int correct_id = mutable_sample->class_id(); + if (counter.unicharset_.has_special_codes() && + (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED || + correct_id == UNICHAR_BROKEN)) { // This is junk so use the special counter. - counter.AccumulateJunk(*it->shape_table(), results, mutable_sample); - } else if (counter.AccumulateErrors(report_level > 3, boosting_mode, - fontinfo_table, *it->shape_table(), - results, mutable_sample) && - error_samples > 0) { + debug_it = counter.AccumulateJunk(report_level > 3, + results, + mutable_sample); + } else { + debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, + fontinfo_table, + results, mutable_sample); + } + if (debug_it && error_samples > 0) { // Running debug, keep the correct answer, and debug the classifier. - tprintf("Error on sample %d: Classifier debug output:\n", - it->GlobalSampleIndex()); - int keep_this = it->GetSparseClassID(); - classifier->ClassifySample(*mutable_sample, page_pix, 1, keep_this, - &results); + tprintf("Error on sample %d: %s Classifier debug output:\n", + it->GlobalSampleIndex(), + it->sample_set()->SampleToString(*mutable_sample).string()); + classifier->DebugDisplay(*mutable_sample, page_pix, correct_id); --error_samples; } ++total_samples; @@ -89,12 +97,70 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier, return unscaled_error; } +// Tests a pair of classifiers, debugging errors of the new against the old. +// See errorcounter.h for description of arguments. +// Iterates over the samples, calling the classifiers in normal/silent mode. +// If the new_classifier makes a boosting_mode error that the old_classifier +// does not, it will then call the new_classifier again with a debug flag +// and a keep_this argument to find out what is going on. +void ErrorCounter::DebugNewErrors( + ShapeClassifier* new_classifier, ShapeClassifier* old_classifier, + CountTypes boosting_mode, + const FontInfoTable& fontinfo_table, + const GenericVector& page_images, SampleIterator* it) { + int fontsize = it->sample_set()->NumFonts(); + ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize); + ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize); + GenericVector results; + + int total_samples = 0; + int error_samples = 25; + int total_new_errors = 0; + // Iterate over all the samples, accumulating errors. + for (it->Begin(); !it->AtEnd(); it->Next()) { + TrainingSample* mutable_sample = it->MutableSample(); + int page_index = mutable_sample->page_num(); + Pix* page_pix = 0 <= page_index && page_index < page_images.size() + ? page_images[page_index] : NULL; + // No debug, no keep this. + old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, + INVALID_UNICHAR_ID, &results); + int correct_id = mutable_sample->class_id(); + if (correct_id != 0 && + !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, + results, mutable_sample)) { + // old classifier was correct, check the new one. + new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, + INVALID_UNICHAR_ID, &results); + if (correct_id != 0 && + new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, + results, mutable_sample)) { + tprintf("New Error on sample %d: Classifier debug output:\n", + it->GlobalSampleIndex()); + ++total_new_errors; + new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, + correct_id, &results); + if (results.size() > 0 && error_samples > 0) { + new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id); + --error_samples; + } + } + } + ++total_samples; + } + tprintf("Total new errors = %d\n", total_new_errors); +} + // Constructor is private. Only anticipated use of ErrorCounter is via // the static ComputeErrorRate. -ErrorCounter::ErrorCounter(int charsetsize, int shapesize, int fontsize) - : scaled_error_(0.0), unichar_counts_(charsetsize, shapesize, 0) { +ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize) + : scaled_error_(0.0), rating_epsilon_(kRatingEpsilon), + unichar_counts_(unicharset.size(), unicharset.size(), 0), + ok_score_hist_(0, 101), bad_score_hist_(0, 101), + unicharset_(unicharset) { Counts empty_counts; font_counts_.init_to_size(fontsize, empty_counts); + multi_unichar_counts_.init_to_size(unicharset.size(), 0); } ErrorCounter::~ErrorCounter() { } @@ -107,13 +173,11 @@ ErrorCounter::~ErrorCounter() { // for error counting and shape_table is used to understand the relationship // between unichar_ids and shape_ids in the results bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, - const UnicityTable& font_table, - const ShapeTable& shape_table, - const GenericVector& results, + const FontInfoTable& font_table, + const GenericVector& results, TrainingSample* sample) { int num_results = results.size(); - int res_index = 0; - bool debug_it = false; + int answer_actual_rank = -1; int font_id = sample->font_id(); int unichar_id = sample->class_id(); sample->set_is_error(false); @@ -123,107 +187,143 @@ bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode, // improve the classifier. sample->set_is_error(true); ++font_counts_[font_id].n[CT_REJECT]; - } else if (shape_table.GetShape(results[0].shape_id). - ContainsUnicharAndFont(unichar_id, font_id)) { - ++font_counts_[font_id].n[CT_SHAPE_TOP_CORRECT]; - // Unichar and font OK, but count if multiple unichars. - if (shape_table.GetShape(results[0].shape_id).size() > 1) - ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; } else { - // This is a top shape error. - ++font_counts_[font_id].n[CT_SHAPE_TOP_ERR]; - // Check to see if any font in the top choice has attributes that match. - bool attributes_match = false; - uinT32 font_props = font_table.get(font_id).properties; - const Shape& shape = shape_table.GetShape(results[0].shape_id); - for (int c = 0; c < shape.size() && !attributes_match; ++c) { - for (int f = 0; f < shape[c].font_ids.size(); ++f) { - if (font_table.get(shape[c].font_ids[f]).properties == font_props) { - attributes_match = true; - break; - } + // Find rank of correct unichar answer, using rating_epsilon_ to allow + // different answers to score as equal. (Ignoring the font.) + int epsilon_rank = 0; + int answer_epsilon_rank = -1; + int num_top_answers = 0; + double prev_rating = results[0].rating; + bool joined = false; + bool broken = false; + int res_index = 0; + while (res_index < num_results) { + if (results[res_index].rating < prev_rating - rating_epsilon_) { + ++epsilon_rank; + prev_rating = results[res_index].rating; } - } - // TODO(rays) It is easy to add counters for individual font attributes - // here if we want them. - if (!attributes_match) - ++font_counts_[font_id].n[CT_FONT_ATTR_ERR]; - if (boosting_mode == CT_SHAPE_TOP_ERR) sample->set_is_error(true); - // Find rank of correct unichar answer. (Ignoring the font.) - while (res_index < num_results && - !shape_table.GetShape(results[res_index].shape_id). - ContainsUnichar(unichar_id)) { + if (results[res_index].unichar_id == unichar_id && + answer_epsilon_rank < 0) { + answer_epsilon_rank = epsilon_rank; + answer_actual_rank = res_index; + } + if (results[res_index].unichar_id == UNICHAR_JOINED && + unicharset_.has_special_codes()) + joined = true; + else if (results[res_index].unichar_id == UNICHAR_BROKEN && + unicharset_.has_special_codes()) + broken = true; + else if (epsilon_rank == 0) + ++num_top_answers; ++res_index; } - if (res_index == 0) { + if (answer_actual_rank != 0) { + // Correct result is not absolute top. + ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR]; + if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) sample->set_is_error(true); + } + if (answer_epsilon_rank == 0) { + ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK]; // Unichar OK, but count if multiple unichars. - if (shape_table.GetShape(results[res_index].shape_id).size() > 1) { + if (num_top_answers > 1) { ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR]; + ++multi_unichar_counts_[unichar_id]; + } + // Check to see if any font in the top choice has attributes that match. + // TODO(rays) It is easy to add counters for individual font attributes + // here if we want them. + if (font_table.SetContainsFontProperties( + font_id, results[answer_actual_rank].fonts)) { + // Font attributes were matched. + // Check for multiple properties. + if (font_table.SetContainsMultipleFontProperties( + results[answer_actual_rank].fonts)) + ++font_counts_[font_id].n[CT_OK_MULTI_FONT]; + } else { + // Font attributes weren't matched. + ++font_counts_[font_id].n[CT_FONT_ATTR_ERR]; } } else { - // Count maps from unichar id to shape id. - if (num_results > 0) - ++unichar_counts_(unichar_id, results[0].shape_id); - // This is a unichar error. + // This is a top unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR]; if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true); - if (res_index >= MIN(2, num_results)) { + // Count maps from unichar id to wrong unichar id. + ++unichar_counts_(unichar_id, results[0].unichar_id); + if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) { // It is also a 2nd choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR]; if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true); } - if (res_index >= num_results) { + if (answer_epsilon_rank < 0) { // It is also a top-n choice unichar error. ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR]; if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true); - debug_it = debug; + answer_epsilon_rank = epsilon_rank; } } + // Compute mean number of return values and mean rank of correct answer. + font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; + font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank; + if (joined) + ++font_counts_[font_id].n[CT_OK_JOINED]; + if (broken) + ++font_counts_[font_id].n[CT_OK_BROKEN]; } - // Compute mean number of return values and mean rank of correct answer. - font_counts_[font_id].n[CT_NUM_RESULTS] += num_results; - font_counts_[font_id].n[CT_RANK] += res_index; // If it was an error for boosting then sum the weight. if (sample->is_error()) { scaled_error_ += sample->weight(); - } - if (debug_it) { - tprintf("%d results for char %s font %d :", - num_results, shape_table.unicharset().id_to_unichar(unichar_id), - font_id); - for (int i = 0; i < num_results; ++i) { - tprintf(" %.3f/%.3f:%s", - results[i].rating, results[i].font, - shape_table.DebugStr(results[i].shape_id).string()); + if (debug) { + tprintf("%d results for char %s font %d :", + num_results, unicharset_.id_to_unichar(unichar_id), + font_id); + for (int i = 0; i < num_results; ++i) { + tprintf(" %.3f : %s\n", + results[i].rating, + unicharset_.id_to_unichar(results[i].unichar_id)); + } + return true; } - tprintf("\n"); - return true; + int percent = 0; + if (num_results > 0) + percent = IntCastRounded(results[0].rating * 100); + bad_score_hist_.add(percent, 1); + } else { + int percent = 0; + if (answer_actual_rank >= 0) + percent = IntCastRounded(results[answer_actual_rank].rating * 100); + ok_score_hist_.add(percent, 1); } return false; } // Accumulates counts for junk. Counts only whether the junk was correctly // rejected or not. -void ErrorCounter::AccumulateJunk(const ShapeTable& shape_table, - const GenericVector& results, +bool ErrorCounter::AccumulateJunk(bool debug, + const GenericVector& results, TrainingSample* sample) { // For junk we accept no answer, or an explicit shape answer matching the // class id of the sample. int num_results = results.size(); int font_id = sample->font_id(); int unichar_id = sample->class_id(); - if (num_results > 0 && - !shape_table.GetShape(results[0].shape_id).ContainsUnichar(unichar_id)) { + int percent = 0; + if (num_results > 0) + percent = IntCastRounded(results[0].rating * 100); + if (num_results > 0 && results[0].unichar_id != unichar_id) { // This is a junk error. ++font_counts_[font_id].n[CT_ACCEPTED_JUNK]; sample->set_is_error(true); // It counts as an error for boosting too so sum the weight. scaled_error_ += sample->weight(); + bad_score_hist_.add(percent, 1); + return debug; } else { // Correctly rejected. ++font_counts_[font_id].n[CT_REJECTED_JUNK]; sample->set_is_error(false); + ok_score_hist_.add(percent, 1); } + return false; } // Creates a report of the error rate. The report_level controls the detail @@ -239,7 +339,7 @@ void ErrorCounter::AccumulateJunk(const ShapeTable& shape_table, // If not NULL, the report string is saved in fonts_report. // (Ignoring report_level). double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, - const UnicityTable& fontinfo_table, + const FontInfoTable& fontinfo_table, const SampleIterator& it, double* unichar_error, STRING* fonts_report) { @@ -251,7 +351,7 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, // Accumulate counts over fonts. totals += font_counts_[f]; STRING font_report; - if (ReportString(font_counts_[f], &font_report)) { + if (ReportString(false, font_counts_[f], &font_report)) { if (fonts_report != NULL) { *fonts_report += fontinfo_table.get(f).name; *fonts_report += ": "; @@ -264,39 +364,59 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, } } } + // Report the totals. + STRING total_report; + bool any_results = ReportString(true, totals, &total_report); + if (fonts_report != NULL && fonts_report->length() == 0) { + // Make sure we return something even if there were no samples. + *fonts_report = "NoSamplesFound: "; + *fonts_report += total_report; + *fonts_report += "\n"; + } if (report_level > 0) { // Report the totals. STRING total_report; - if (ReportString(totals, &total_report)) { + if (any_results) { tprintf("TOTAL Scaled Err=%.4g%%, %s\n", scaled_error_ * 100.0, total_report.string()); } // Report the worst substitution error only for now. if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) { - const UNICHARSET& unicharset = it.shape_table()->unicharset(); - int charsetsize = unicharset.size(); - int shapesize = it.CompactCharsetSize(); + int charsetsize = unicharset_.size(); int worst_uni_id = 0; - int worst_shape_id = 0; + int worst_result_id = 0; int worst_err = 0; for (int u = 0; u < charsetsize; ++u) { - for (int s = 0; s < shapesize; ++s) { - if (unichar_counts_(u, s) > worst_err) { - worst_err = unichar_counts_(u, s); + for (int v = 0; v < charsetsize; ++v) { + if (unichar_counts_(u, v) > worst_err) { + worst_err = unichar_counts_(u, v); worst_uni_id = u; - worst_shape_id = s; + worst_result_id = v; } } } if (worst_err > 0) { tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n", - worst_uni_id, unicharset.id_to_unichar(worst_uni_id), - it.shape_table()->DebugStr(worst_shape_id).string(), + worst_uni_id, unicharset_.id_to_unichar(worst_uni_id), + unicharset_.id_to_unichar(worst_result_id), worst_err, totals.n[CT_UNICHAR_TOP1_ERR], 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]); } } + tprintf("Multi-unichar shape use:\n"); + for (int u = 0; u < multi_unichar_counts_.size(); ++u) { + if (multi_unichar_counts_[u] > 0) { + tprintf("%d multiple answers for unichar: %s\n", + multi_unichar_counts_[u], + unicharset_.id_to_unichar(u)); + } + } + tprintf("OK Score histogram:\n"); + ok_score_hist_.print(); + tprintf("ERROR Score histogram:\n"); + bad_score_hist_.print(); } + double rates[CT_SIZE]; if (!ComputeRates(totals, rates)) return 0.0; @@ -308,32 +428,37 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode, // Sets the report string to a combined human and machine-readable report // string of the error rates. -// Returns false if there is no data, leaving report unchanged. -bool ErrorCounter::ReportString(const Counts& counts, STRING* report) { +// Returns false if there is no data, leaving report unchanged, unless +// even_if_empty is true. +bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts, + STRING* report) { // Compute the error rates. double rates[CT_SIZE]; - if (!ComputeRates(counts, rates)) + if (!ComputeRates(counts, rates) && !even_if_empty) return false; // Using %.4g%%, the length of the output string should exactly match the // length of the format string, but in case of overflow, allow for +eddd // on each number. const int kMaxExtraLength = 5; // Length of +eddd. // Keep this format string and the snprintf in sync with the CountTypes enum. - const char* format_str = "ShapeErr=%.4g%%, FontAttr=%.4g%%, " - "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], " - "Multi=%.4g%%, Rej=%.4g%%, " + const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] " + "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, " + "FontAttr=%.4g%%, Multi=%.4g%%, " "Answers=%.3g, Rank=%.3g, " "OKjunk=%.4g%%, Badjunk=%.4g%%"; int max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1; char* formatted_str = new char[max_str_len]; snprintf(formatted_str, max_str_len, format_str, - rates[CT_SHAPE_TOP_ERR] * 100.0, - rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_UNICHAR_TOP1_ERR] * 100.0, rates[CT_UNICHAR_TOP2_ERR] * 100.0, rates[CT_UNICHAR_TOPN_ERR] * 100.0, + rates[CT_UNICHAR_TOPTOP_ERR] * 100.0, rates[CT_OK_MULTI_UNICHAR] * 100.0, + rates[CT_OK_JOINED] * 100.0, + rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0, + rates[CT_FONT_ATTR_ERR] * 100.0, + rates[CT_OK_MULTI_FONT] * 100.0, rates[CT_NUM_RESULTS], rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], @@ -350,13 +475,9 @@ bool ErrorCounter::ReportString(const Counts& counts, STRING* report) { // Computes the error rates and returns in rates which is an array of size // CT_SIZE. Returns false if there is no data, leaving rates unchanged. bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) { - int ok_samples = counts.n[CT_SHAPE_TOP_CORRECT] + counts.n[CT_SHAPE_TOP_ERR] + + int ok_samples = counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] + counts.n[CT_REJECT]; int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK]; - if (ok_samples == 0 && junk_samples == 0) { - // There is no data. - return false; - } // Compute rates for normal chars. double denominator = static_cast(MAX(ok_samples, 1)); for (int ct = 0; ct <= CT_RANK; ++ct) @@ -365,7 +486,7 @@ bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) { denominator = static_cast(MAX(junk_samples, 1)); for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct) rates[ct] = counts.n[ct] / denominator; - return true; + return ok_samples != 0 || junk_samples != 0; } ErrorCounter::Counts::Counts() { diff --git a/classify/errorcounter.h b/classify/errorcounter.h index 618d56878..61af5014c 100644 --- a/classify/errorcounter.h +++ b/classify/errorcounter.h @@ -18,6 +18,7 @@ #include "genericvector.h" #include "matrix.h" +#include "statistc.h" struct Pix; template class UnicityTable; @@ -25,11 +26,11 @@ template class UnicityTable; namespace tesseract { struct FontInfo; +class FontInfoTable; class SampleIterator; class ShapeClassifier; -class ShapeRating; -class ShapeTable; class TrainingSample; +class UnicharRating; // Enumeration of the different types of error count. // Error counts work as follows: @@ -37,22 +38,21 @@ class TrainingSample; // Ground truth is a valid unichar-id / font-id pair: // Number of classifier answers? // 0 >0 -// CT_REJECT BOTH unichar-id and font-id match top shape? -// __________ yes! no -// CT_SHAPE_TOP_CORRECT CT_SHAPE_TOP_ERR -// | Font attributes match? -// | yes! no -// | | CT_FONT_ATTR_ERROR -// | Top unichar-id matches? -// | yes! no -// Top shape-id has multiple unichars? CT_UNICHAR_TOP1_ERR -// yes! no 2nd shape unichar id matches? -// CT_OK_MULTI_UNICHAR ________ yes! no -// ___________________ _____ CT_UNICHAR_TOP2_ERR -// Any unichar-id matches? -// yes! no -// ______ CT_UNICHAR_TOPN_ERR -// _________________ +// CT_REJECT unichar-id matches top shape? +// __________ yes! no +// CT_UNICHAR_TOP_OK CT_UNICHAR_TOP1_ERR +// Top shape-id has multiple unichars? 2nd shape unichar id matches? +// yes! no yes! no +// CT_OK_MULTI_UNICHAR | _____ CT_UNICHAR_TOP2_ERR +// Font attributes match? Any unichar-id matches? +// yes! no yes! no +// CT_FONT_ATTR_OK CT_FONT_ATTR_ERR ______ CT_UNICHAR_TOPN_ERR +// | __________________ _________________ +// Top shape-id has multiple font attrs? +// yes! no +// CT_OK_MULTI_FONT +// _____________________________ +// // Note that multiple counts may be activated for a single sample! // // Ground truth is for a fragment/n-gram that is NOT in the unicharset. @@ -67,14 +67,20 @@ class TrainingSample; // // Keep in sync with the ReportString function. enum CountTypes { - CT_SHAPE_TOP_CORRECT, // Top shape id is actually correct. - CT_SHAPE_TOP_ERR, // Top shape id is not correct. - CT_FONT_ATTR_ERR, // Font attributes incorrect, ignoring unichar. + CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id. + // The rank of the results in TOP1, TOP2, TOPN is determined by a gap of + // kRatingEpsilon from the first result in each group. The real top choice + // is measured using TOPTOP. CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id. CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id. CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id. + CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct. CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others. + CT_OK_JOINED, // Top shape id is correct but marked joined. + CT_OK_BROKEN, // Top shape id is correct but marked broken. CT_REJECT, // Classifier hates this. + CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect. + CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs. CT_NUM_RESULTS, // Number of answers produced. CT_RANK, // Rank of correct answer. CT_REJECTED_JUNK, // Junk that was correctly rejected. @@ -115,12 +121,24 @@ class ErrorCounter { // * The return value is the un-weighted version of the scaled_error. static double ComputeErrorRate(ShapeClassifier* classifier, int report_level, CountTypes boosting_mode, - const UnicityTable& fontinfo_table, + const FontInfoTable& fontinfo_table, const GenericVector& page_images, SampleIterator* it, double* unichar_error, double* scaled_error, STRING* fonts_report); + // Tests a pair of classifiers, debugging errors of the new against the old. + // See errorcounter.h for description of arguments. + // Iterates over the samples, calling the classifiers in normal/silent mode. + // If the new_classifier makes a boosting_mode error that the old_classifier + // does not, and the appropriate, it will then call the new_classifier again + // with a debug flag and a keep_this argument to find out what is going on. + static void DebugNewErrors(ShapeClassifier* new_classifier, + ShapeClassifier* old_classifier, + CountTypes boosting_mode, + const FontInfoTable& fontinfo_table, + const GenericVector& page_images, + SampleIterator* it); private: // Simple struct to hold an array of counts. @@ -134,7 +152,7 @@ class ErrorCounter { // Constructor is private. Only anticipated use of ErrorCounter is via // the static ComputeErrorRate. - ErrorCounter(int charsetsize, int shapesize, int fontsize); + ErrorCounter(const UNICHARSET& unicharset, int fontsize); ~ErrorCounter(); // Accumulates the errors from the classifier results on a single sample. @@ -145,15 +163,13 @@ class ErrorCounter { // for error counting and shape_table is used to understand the relationship // between unichar_ids and shape_ids in the results bool AccumulateErrors(bool debug, CountTypes boosting_mode, - const UnicityTable& font_table, - const ShapeTable& shape_table, - const GenericVector& results, + const FontInfoTable& font_table, + const GenericVector& results, TrainingSample* sample); // Accumulates counts for junk. Counts only whether the junk was correctly // rejected or not. - void AccumulateJunk(const ShapeTable& shape_table, - const GenericVector& results, + bool AccumulateJunk(bool debug, const GenericVector& results, TrainingSample* sample); // Creates a report of the error rate. The report_level controls the detail @@ -169,15 +185,17 @@ class ErrorCounter { // If not NULL, the report string is saved in fonts_report. // (Ignoring report_level). double ReportErrors(int report_level, CountTypes boosting_mode, - const UnicityTable& fontinfo_table, + const FontInfoTable& fontinfo_table, const SampleIterator& it, double* unichar_error, STRING* fonts_report); // Sets the report string to a combined human and machine-readable report // string of the error rates. - // Returns false if there is no data, leaving report unchanged. - static bool ReportString(const Counts& counts, STRING* report); + // Returns false if there is no data, leaving report unchanged, unless + // even_if_empty is true. + static bool ReportString(bool even_if_empty, const Counts& counts, + STRING* report); // Computes the error rates and returns in rates which is an array of size // CT_SIZE. Returns false if there is no data, leaving rates unchanged. @@ -186,11 +204,22 @@ class ErrorCounter { // Total scaled error used by boosting algorithms. double scaled_error_; + // Difference in result rating to be thought of as an "equal" choice. + double rating_epsilon_; // Vector indexed by font_id from the samples of error accumulators. GenericVector font_counts_; // Counts of the results that map each unichar_id (from samples) to an // incorrect shape_id. GENERIC_2D_ARRAY unichar_counts_; + // Count of the number of times each shape_id occurs, is correct, and multi- + // unichar. + GenericVector multi_unichar_counts_; + // Histogram of scores (as percent) for correct answers. + STATS ok_score_hist_; + // Histogram of scores (as percent) for incorrect answers. + STATS bad_score_hist_; + // Unicharset for printing character ids in results. + const UNICHARSET& unicharset_; }; } // namespace tesseract. diff --git a/classify/extract.cpp b/classify/extract.cpp index b8e595a7c..822c733e4 100644 --- a/classify/extract.cpp +++ b/classify/extract.cpp @@ -49,8 +49,10 @@ void ExtractorStub(); * @note History: Sun Jan 21 10:07:28 1990, DSJ, Created. */ CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, - const DENORM& denorm, TBLOB *Blob) { - return (ExtractFlexFeatures(FeatureDefs, Blob, denorm)); + const DENORM& bl_denorm, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, + TBLOB *Blob) { + return ExtractFlexFeatures(FeatureDefs, Blob, bl_denorm, cn_denorm, fx_info); } /* ExtractBlobFeatures */ /*----------------------------------------------------------------------------- diff --git a/classify/extract.h b/classify/extract.h index 844393157..1f80c20e4 100644 --- a/classify/extract.h +++ b/classify/extract.h @@ -26,8 +26,12 @@ class DENORM; /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ +// Deprecated! Will be deleted soon! +// In the meantime, as all TBLOBs, Blob is in baseline normalized coords. +// See SetupBLCNDenorms in intfx.cpp for other args. CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, - const DENORM& denorm, TBLOB *Blob); + const DENORM& bl_denorm, const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info, TBLOB *Blob); /*--------------------------------------------------------------------------- Private Function Prototypes diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp index 95d8cd8c3..df03bf7f5 100644 --- a/classify/featdefs.cpp +++ b/classify/featdefs.cpp @@ -19,7 +19,7 @@ Include Files and Type Defines -----------------------------------------------------------------------------*/ #ifdef _MSC_VER -#include "mathfix.h" +#include #endif #include "featdefs.h" diff --git a/classify/flexfx.cpp b/classify/flexfx.cpp index 44f975e7e..2ddbe3a02 100644 --- a/classify/flexfx.cpp +++ b/classify/flexfx.cpp @@ -28,8 +28,13 @@ Public Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ +// Deprecated! Will be deleted soon! +// In the meantime, as all TBLOBs, Blob is in baseline normalized coords. +// See SetupBLCNDenorms in intfx.cpp for other args. CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, - TBLOB *Blob, const DENORM& denorm) { + TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info) { /* ** Parameters: ** Blob blob to extract features from @@ -50,8 +55,13 @@ CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, if (FeatureDefs.FeatureExtractors[Type] != NULL && FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) { CharDesc->FeatureSets[Type] = - (FeatureDefs.FeatureExtractors[Type])->Extractor(Blob, denorm); + (FeatureDefs.FeatureExtractors[Type])->Extractor(Blob, + bl_denorm, + cn_denorm, + fx_info); if (CharDesc->FeatureSets[Type] == NULL) { + tprintf("Feature extractor for type %d = %s returned NULL!\n", + Type, FeatureDefs.FeatureDesc[Type]->ShortName); FreeCharDescription(CharDesc); return NULL; } diff --git a/classify/flexfx.h b/classify/flexfx.h index 52e45a6a3..21c4fa261 100644 --- a/classify/flexfx.h +++ b/classify/flexfx.h @@ -27,7 +27,10 @@ /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ +// As with all TBLOBs this one is also baseline normalized. CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs, - TBLOB *Blob, const DENORM& denorm); + TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info); #endif diff --git a/classify/intfeaturespace.cpp b/classify/intfeaturespace.cpp index 5f911dcee..866a539e7 100644 --- a/classify/intfeaturespace.cpp +++ b/classify/intfeaturespace.cpp @@ -90,8 +90,7 @@ void IntFeatureSpace::IndexAndSortFeatures( // window, or -1 if the feature is a miss. int IntFeatureSpace::XYToFeatureIndex(int x, int y) const { // Round the x,y position to a feature. Search for a valid theta. - INT_FEATURE_STRUCT feature = {static_cast(x), static_cast(y), - 0, 0}; + INT_FEATURE_STRUCT feature(x, y, 0); int index = -1; for (int theta = 0; theta <= MAX_UINT8 && index < 0; ++theta) { feature.Theta = theta; @@ -127,16 +126,10 @@ int IntFeatureSpace::XYToFeatureIndex(int x, int y) const { INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x, int y, int theta) const { - INT_FEATURE_STRUCT pos = { - static_cast(ClipToRange( - (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, - 0, MAX_UINT8)), - static_cast(ClipToRange( - (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, - 0, MAX_UINT8)), - static_cast(ClipToRange( - DivRounded(theta * kIntFeatureExtent, theta_buckets_), - 0, MAX_UINT8))}; + INT_FEATURE_STRUCT pos( + (x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_, + (y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_, + DivRounded(theta * kIntFeatureExtent, theta_buckets_)); return pos; } diff --git a/classify/intfx.cpp b/classify/intfx.cpp index 0763b7afb..63d6ddb1e 100644 --- a/classify/intfx.cpp +++ b/classify/intfx.cpp @@ -1,8 +1,11 @@ /****************************************************************************** ** Filename: intfx.c ** Purpose: Integer character normalization & feature extraction - ** Author: Robert Moss + ** Author: Robert Moss, rays@google.com (Ray Smith) ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. + ** Tue Feb 28 10:42:00 PST 2012, vastly rewritten to allow + greyscale fx and non-linear + normalization. ** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,48 +22,26 @@ Include Files and Type Defines ----------------------------------------------------------------------------**/ #include "intfx.h" -#include "intmatcher.h" +#include "allheaders.h" +#include "ccutil.h" +#include "classify.h" #include "const.h" #include "helpers.h" -#include "ccutil.h" +#include "intmatcher.h" +#include "linlsq.h" +#include "ndminx.h" +#include "normalis.h" #include "statistc.h" #include "trainingsample.h" -#ifdef __UNIX__ -#endif using tesseract::TrainingSample; -/**---------------------------------------------------------------------------- - Private Function Prototypes -----------------------------------------------------------------------------**/ -int SaveFeature(); -uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X); -uinT8 MySqrt2(); -void ClipRadius(); - -INT_VAR(classify_radius_gyr_min_man, 255, - "Minimum Radius of Gyration Mantissa 0-255: "); - -INT_VAR(classify_radius_gyr_min_exp, 0, - "Minimum Radius of Gyration Exponent 0-255: "); - -INT_VAR(classify_radius_gyr_max_man, 158, - "Maximum Radius of Gyration Mantissa 0-255: "); - -INT_VAR(classify_radius_gyr_max_exp, 8, - "Maximum Radius of Gyration Exponent 0-255: "); - /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -#define ATAN_TABLE_SIZE 64 - -// Look up table for arc tangent containing: -// atan(0.0) ... atan(ATAN_TABLE_SIZE - 1 / ATAN_TABLE_SIZE) -// The entries are in binary degrees where a full circle is 256 binary degrees. -static uinT8 AtanTable[ATAN_TABLE_SIZE]; // Look up table for cos and sin to turn the intfx feature angle to a vector. -// Also protected by atan_table_mutex. +// Protected by atan_table_mutex. +// The entries are in binary degrees where a full circle is 256 binary degrees. static float cos_table[INT_CHAR_NORM_RANGE]; static float sin_table[INT_CHAR_NORM_RANGE]; // Guards write access to AtanTable so we dont create it more than once. @@ -75,10 +56,6 @@ void InitIntegerFX() { static bool atan_table_init = false; atan_table_mutex.Lock(); if (!atan_table_init) { - for (int i = 0; i < ATAN_TABLE_SIZE; i++) { - AtanTable[i] = - (uinT8) (atan ((i / (float) ATAN_TABLE_SIZE)) * 128.0 / PI + 0.5); - } for (int i = 0; i < INT_CHAR_NORM_RANGE; ++i) { cos_table[i] = cos(i * 2 * PI / INT_CHAR_NORM_RANGE + PI); sin_table[i] = sin(i * 2 * PI / INT_CHAR_NORM_RANGE + PI); @@ -94,31 +71,435 @@ FCOORD FeatureDirection(uinT8 theta) { return FCOORD(cos_table[theta], sin_table[theta]); } -TrainingSample* GetIntFeatures(tesseract::NormalizationMode mode, - TBLOB *blob, const DENORM& denorm) { - INT_FEATURE_ARRAY blfeatures; - INT_FEATURE_ARRAY cnfeatures; +namespace tesseract { + +// Generates a TrainingSample from a TBLOB. Extracts features and sets +// the bounding box, so classifiers that operate on the image can work. +// TODO(rays) BlobToTrainingSample must remain a global function until +// the FlexFx and FeatureDescription code can be removed and LearnBlob +// made a member of Classify. +TrainingSample* BlobToTrainingSample(const TBLOB& blob, + tesseract::NormalizationMode mode, + bool nonlinear_norm) { INT_FX_RESULT_STRUCT fx_info; - ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &fx_info, NULL); + GenericVector bl_features; + GenericVector cn_features; + Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features, + &cn_features, &fx_info, NULL); + // TODO(rays) Use blob->PreciseBoundingBox() instead. + TBOX box = blob.bounding_box(); TrainingSample* sample = NULL; if (mode == tesseract::NM_CHAR_ANISOTROPIC) { int num_features = fx_info.NumCN; if (num_features > 0) { - sample = TrainingSample::CopyFromFeatures(fx_info, cnfeatures, + sample = TrainingSample::CopyFromFeatures(fx_info, box, &cn_features[0], num_features); } } else if (mode == tesseract::NM_BASELINE) { int num_features = fx_info.NumBL; if (num_features > 0) { - sample = TrainingSample::CopyFromFeatures(fx_info, blfeatures, + sample = TrainingSample::CopyFromFeatures(fx_info, box, &bl_features[0], num_features); } } else { ASSERT_HOST(!"Unsupported normalization mode!"); } + if (sample != NULL) { + // Set the bounding box (in original image coordinates) in the sample. + TPOINT topleft, botright; + topleft.x = box.left(); + topleft.y = box.top(); + botright.x = box.right(); + botright.y = box.bottom(); + TPOINT original_topleft, original_botright; + blob.denorm().DenormTransform(NULL, topleft, &original_topleft); + blob.denorm().DenormTransform(NULL, botright, &original_botright); + sample->set_bounding_box(TBOX(original_topleft.x, original_botright.y, + original_botright.x, original_topleft.y)); + } return sample; } +// Computes the DENORMS for bl(baseline) and cn(character) normalization +// during feature extraction. The input denorm describes the current state +// of the blob, which is usually a baseline-normalized word. +// The Transforms setup are as follows: +// Baseline Normalized (bl) Output: +// We center the grapheme by aligning the x-coordinate of its centroid with +// x=128 and leaving the already-baseline-normalized y as-is. +// +// Character Normalized (cn) Output: +// We align the grapheme's centroid at the origin and scale it +// asymmetrically in x and y so that the 2nd moments are a standard value +// (51.2) ie the result is vaguely square. +// If classify_nonlinear_norm is true: +// A non-linear normalization is setup that attempts to evenly distribute +// edges across x and y. +// +// Some of the fields of fx_info are also setup: +// Length: Total length of outline. +// Rx: Rounded y second moment. (Reversed by convention.) +// Ry: rounded x second moment. +// Xmean: Rounded x center of mass of the blob. +// Ymean: Rounded y center of mass of the blob. +void Classify::SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm, + DENORM* bl_denorm, DENORM* cn_denorm, + INT_FX_RESULT_STRUCT* fx_info) { + // Compute 1st and 2nd moments of the original outline. + FCOORD center, second_moments; + int length = blob.ComputeMoments(¢er, &second_moments); + if (fx_info != NULL) { + fx_info->Length = length; + fx_info->Rx = IntCastRounded(second_moments.y()); + fx_info->Ry = IntCastRounded(second_moments.x()); + + fx_info->Xmean = IntCastRounded(center.x()); + fx_info->Ymean = IntCastRounded(center.y()); + } + // Setup the denorm for Baseline normalization. + bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f, + 1.0f, 1.0f, 128.0f, 128.0f); + // Setup the denorm for character normalization. + if (nonlinear_norm) { + GenericVector > x_coords; + GenericVector > y_coords; + TBOX box; + blob.GetPreciseBoundingBox(&box); + box.pad(1, 1); + blob.GetEdgeCoords(box, &x_coords, &y_coords); + cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8, + 0.0f, 0.0f, x_coords, y_coords); + } else { + cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), + center.x(), center.y(), + 51.2f / second_moments.x(), + 51.2f / second_moments.y(), + 128.0f, 128.0f); + } +} + +// Helper normalizes the direction, assuming that it is at the given +// unnormed_pos, using the given denorm, starting at the root_denorm. +uinT8 NormalizeDirection(uinT8 dir, const FCOORD& unnormed_pos, + const DENORM& denorm, const DENORM* root_denorm) { + // Convert direction to a vector. + FCOORD unnormed_end; + unnormed_end.from_direction(dir); + unnormed_end += unnormed_pos; + FCOORD normed_pos, normed_end; + denorm.NormTransform(root_denorm, unnormed_pos, &normed_pos); + denorm.NormTransform(root_denorm, unnormed_end, &normed_end); + normed_end -= normed_pos; + return normed_end.to_direction(); +} + +// Helper returns the mean direction vector from the given stats. Use the +// mean direction from dirs if there is information available, otherwise, use +// the fit_vector from point_diffs. +static FCOORD MeanDirectionVector(const LLSQ& point_diffs, const LLSQ& dirs, + const FCOORD& start_pt, + const FCOORD& end_pt) { + FCOORD fit_vector; + if (dirs.count() > 0) { + // There were directions, so use them. To avoid wrap-around problems, we + // have 2 accumulators in dirs: x for normal directions and y for + // directions offset by 128. We will use the one with the least variance. + FCOORD mean_pt = dirs.mean_point(); + double mean_dir = 0.0; + if (dirs.x_variance() <= dirs.y_variance()) { + mean_dir = mean_pt.x(); + } else { + mean_dir = mean_pt.y() + 128; + } + fit_vector.from_direction(Modulo(IntCastRounded(mean_dir), 256)); + } else { + // There were no directions, so we rely on the vector_fit to the points. + // Since the vector_fit is 180 degrees ambiguous, we align with the + // supplied feature_dir by making the scalar product non-negative. + FCOORD feature_dir(end_pt - start_pt); + fit_vector = point_diffs.vector_fit(); + if (fit_vector.x() == 0.0f && fit_vector.y() == 0.0f) { + // There was only a single point. Use feature_dir directly. + fit_vector = feature_dir; + } else { + // Sometimes the least mean squares fit is wrong, due to the small sample + // of points and scaling. Use a 90 degree rotated vector if that matches + // feature_dir better. + FCOORD fit_vector2 = !fit_vector; + // The fit_vector is 180 degrees ambiguous, so resolve the ambiguity by + // insisting that the scalar product with the feature_dir should be +ve. + if (fit_vector % feature_dir < 0.0) + fit_vector = -fit_vector; + if (fit_vector2 % feature_dir < 0.0) + fit_vector2 = -fit_vector2; + // Even though fit_vector2 has a higher mean squared error, it might be + // a better fit, so use it if the dot product with feature_dir is bigger. + if (fit_vector2 % feature_dir > fit_vector % feature_dir) + fit_vector = fit_vector2; + } + } + return fit_vector; +} + +// Helper computes one or more features corresponding to the given points. +// Emitted features are on the line defined by: +// start_pt + lambda * (end_pt - start_pt) for scalar lambda. +// Features are spaced at feature_length intervals. +static int ComputeFeatures(const FCOORD& start_pt, const FCOORD& end_pt, + double feature_length, + GenericVector* features) { + FCOORD feature_vector(end_pt - start_pt); + if (feature_vector.x() == 0.0f && feature_vector.y() == 0.0f) return 0; + // Compute theta for the feature based on its direction. + uinT8 theta = feature_vector.to_direction(); + // Compute the number of features and lambda_step. + double target_length = feature_vector.length(); + int num_features = IntCastRounded(target_length / feature_length); + if (num_features == 0) return 0; + // Divide the length evenly into num_features pieces. + double lambda_step = 1.0 / num_features; + double lambda = lambda_step / 2.0; + for (int f = 0; f < num_features; ++f, lambda += lambda_step) { + FCOORD feature_pt(start_pt); + feature_pt += feature_vector * lambda; + INT_FEATURE_STRUCT feature(feature_pt, theta); + features->push_back(feature); + } + return num_features; +} + +// Gathers outline points and their directions from start_index into dirs by +// stepping along the outline and normalizing the coordinates until the +// required feature_length has been collected or end_index is reached. +// On input pos must point to the position corresponding to start_index and on +// return pos is updated to the current raw position, and pos_normed is set to +// the normed version of pos. +// Since directions wrap-around, they need special treatment to get the mean. +// Provided the cluster of directions doesn't straddle the wrap-around point, +// the simple mean works. If they do, then, unless the directions are wildly +// varying, the cluster rotated by 180 degrees will not straddle the wrap- +// around point, so mean(dir + 180 degrees) - 180 degrees will work. Since +// LLSQ conveniently stores the mean of 2 variables, we use it to store +// dir and dir+128 (128 is 180 degrees) and then use the resulting mean +// with the least variance. +static int GatherPoints(const C_OUTLINE* outline, double feature_length, + const DENORM& denorm, const DENORM* root_denorm, + int start_index, int end_index, + ICOORD* pos, FCOORD* pos_normed, + LLSQ* points, LLSQ* dirs) { + int step_length = outline->pathlength(); + ICOORD step = outline->step(start_index % step_length); + // Prev_normed is the start point of this collection and will be set on the + // first iteration, and on later iterations used to determine the length + // that has been collected. + FCOORD prev_normed; + points->clear(); + dirs->clear(); + int num_points = 0; + int index; + for (index = start_index; index <= end_index; ++index, *pos += step) { + step = outline->step(index % step_length); + int edge_weight = outline->edge_strength_at_index(index % step_length); + if (edge_weight == 0) { + // This point has conflicting gradient and step direction, so ignore it. + continue; + } + // Get the sub-pixel precise location and normalize. + FCOORD f_pos = outline->sub_pixel_pos_at_index(*pos, index % step_length); + denorm.NormTransform(root_denorm, f_pos, pos_normed); + if (num_points == 0) { + // The start of this segment. + prev_normed = *pos_normed; + } else { + FCOORD offset = *pos_normed - prev_normed; + float length = offset.length(); + if (length > feature_length) { + // We have gone far enough from the start. We will use this point in + // the next set so return what we have so far. + return index; + } + } + points->add(pos_normed->x(), pos_normed->y(), edge_weight); + int direction = outline->direction_at_index(index % step_length); + if (direction >= 0) { + direction = NormalizeDirection(direction, f_pos, denorm, root_denorm); + // Use both the direction and direction +128 so we are not trying to + // take the mean of something straddling the wrap-around point. + dirs->add(direction, Modulo(direction + 128, 256)); + } + ++num_points; + } + return index; +} + +// Extracts Tesseract features and appends them to the features vector. +// Startpt to lastpt, inclusive, MUST have the same src_outline member, +// which may be NULL. The vector from lastpt to its next is included in +// the feature extraction. Hidden edges should be excluded by the caller. +// If force_poly is true, the features will be extracted from the polygonal +// approximation even if more accurate data is available. +static void ExtractFeaturesFromRun( + const EDGEPT* startpt, const EDGEPT* lastpt, + const DENORM& denorm, double feature_length, bool force_poly, + GenericVector* features) { + const EDGEPT* endpt = lastpt->next; + const C_OUTLINE* outline = startpt->src_outline; + if (outline != NULL && !force_poly) { + // Detailed information is available. We have to normalize only from + // the root_denorm to denorm. + const DENORM* root_denorm = denorm.RootDenorm(); + int total_features = 0; + // Get the features from the outline. + int step_length = outline->pathlength(); + int start_index = startpt->start_step; + // pos is the integer coordinates of the binary image steps. + ICOORD pos = outline->position_at_index(start_index); + // We use an end_index that allows us to use a positive increment, but that + // may be beyond the bounds of the outline steps/ due to wrap-around, to + // so we use % step_length everywhere, except for start_index. + int end_index = lastpt->start_step + lastpt->step_count; + if (end_index <= start_index) + end_index += step_length; + LLSQ prev_points; + LLSQ prev_dirs; + FCOORD prev_normed_pos = outline->sub_pixel_pos_at_index(pos, start_index); + denorm.NormTransform(root_denorm, prev_normed_pos, &prev_normed_pos); + LLSQ points; + LLSQ dirs; + FCOORD normed_pos; + int index = GatherPoints(outline, feature_length, denorm, root_denorm, + start_index, end_index, &pos, &normed_pos, + &points, &dirs); + while (index <= end_index) { + // At each iteration we nominally have 3 accumulated sets of points and + // dirs: prev_points/dirs, points/dirs, next_points/dirs and sum them + // into sum_points/dirs, but we don't necessarily get any features out, + // so if that is the case, we keep accumulating instead of rotating the + // accumulators. + LLSQ next_points; + LLSQ next_dirs; + FCOORD next_normed_pos; + index = GatherPoints(outline, feature_length, denorm, root_denorm, + index, end_index, &pos, &next_normed_pos, + &next_points, &next_dirs); + LLSQ sum_points(prev_points); + // TODO(rays) find out why it is better to use just dirs and next_dirs + // in sum_dirs, instead of using prev_dirs as well. + LLSQ sum_dirs(dirs); + sum_points.add(points); + sum_points.add(next_points); + sum_dirs.add(next_dirs); + bool made_features = false; + // If we have some points, we can try making some features. + if (sum_points.count() > 0) { + // We have gone far enough from the start. Make a feature and restart. + FCOORD fit_pt = sum_points.mean_point(); + FCOORD fit_vector = MeanDirectionVector(sum_points, sum_dirs, + prev_normed_pos, normed_pos); + // The segment to which we fit features is the line passing through + // fit_pt in direction of fit_vector that starts nearest to + // prev_normed_pos and ends nearest to normed_pos. + FCOORD start_pos = prev_normed_pos.nearest_pt_on_line(fit_pt, + fit_vector); + FCOORD end_pos = normed_pos.nearest_pt_on_line(fit_pt, fit_vector); + // Possible correction to match the adjacent polygon segment. + if (total_features == 0 && startpt != endpt) { + FCOORD poly_pos(startpt->pos.x, startpt->pos.y); + denorm.LocalNormTransform(poly_pos, &start_pos); + } + if (index > end_index && startpt != endpt) { + FCOORD poly_pos(endpt->pos.x, endpt->pos.y); + denorm.LocalNormTransform(poly_pos, &end_pos); + } + int num_features = ComputeFeatures(start_pos, end_pos, feature_length, + features); + if (num_features > 0) { + // We made some features so shuffle the accumulators. + prev_points = points; + prev_dirs = dirs; + prev_normed_pos = normed_pos; + points = next_points; + dirs = next_dirs; + made_features = true; + total_features += num_features; + } + // The end of the next set becomes the end next time around. + normed_pos = next_normed_pos; + } + if (!made_features) { + // We didn't make any features, so keep the prev accumulators and + // add the next ones into the current. + points.add(next_points); + dirs.add(next_dirs); + } + } + } else { + // There is no outline, so we are forced to use the polygonal approximation. + const EDGEPT* pt = startpt; + do { + FCOORD start_pos(pt->pos.x, pt->pos.y); + FCOORD end_pos(pt->next->pos.x, pt->next->pos.y); + denorm.LocalNormTransform(start_pos, &start_pos); + denorm.LocalNormTransform(end_pos, &end_pos); + ComputeFeatures(start_pos, end_pos, feature_length, features); + } while ((pt = pt->next) != endpt); + } +} + +// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as +// (x,y) position and angle as measured counterclockwise from the vector +// <-1, 0>, from blob using two normalizations defined by bl_denorm and +// cn_denorm. See SetpuBLCNDenorms for definitions. +// If outline_cn_counts is not NULL, on return it contains the cumulative +// number of cn features generated for each outline in the blob (in order). +// Thus after the first outline, there were (*outline_cn_counts)[0] features, +// after the second outline, there were (*outline_cn_counts)[1] features etc. +void Classify::ExtractFeatures(const TBLOB& blob, + bool nonlinear_norm, + GenericVector* bl_features, + GenericVector* cn_features, + INT_FX_RESULT_STRUCT* results, + GenericVector* outline_cn_counts) { + DENORM bl_denorm, cn_denorm; + tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, + &bl_denorm, &cn_denorm, results); + if (outline_cn_counts != NULL) + outline_cn_counts->truncate(0); + // Iterate the outlines. + for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) { + // Iterate the polygon. + EDGEPT* loop_pt = ol->FindBestStartPt(); + EDGEPT* pt = loop_pt; + if (pt == NULL) continue; + do { + if (pt->IsHidden()) continue; + // Find a run of equal src_outline. + EDGEPT* last_pt = pt; + do { + last_pt = last_pt->next; + } while (last_pt != loop_pt && !last_pt->IsHidden() && + last_pt->src_outline == pt->src_outline); + last_pt = last_pt->prev; + // Until the adaptive classifier can be weaned off polygon segments, + // we have to force extraction from the polygon for the bl_features. + ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength, + true, bl_features); + ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength, + false, cn_features); + pt = last_pt; + } while ((pt = pt->next) != loop_pt); + if (outline_cn_counts != NULL) + outline_cn_counts->push_back(cn_features->size()); + } + results->NumBL = bl_features->size(); + results->NumCN = cn_features->size(); + results->YBottom = blob.bounding_box().bottom(); + results->YTop = blob.bounding_box().top(); + results->Width = blob.bounding_box().width(); +} + +} // namespace tesseract + /*--------------------------------------------------------------------------*/ // Extract a set of standard-sized features from Blobs and write them out in @@ -140,608 +521,25 @@ TrainingSample* GetIntFeatures(tesseract::NormalizationMode mode, // We align the grapheme's centroid at the origin and scale it asymmetrically // in x and y so that the result is vaguely square. // -int ExtractIntFeat(TBLOB *Blob, - const DENORM& denorm, - INT_FEATURE_ARRAY BLFeat, - INT_FEATURE_ARRAY CNFeat, - INT_FX_RESULT_STRUCT* Results, - inT32 *FeatureOutlineArray) { - - TESSLINE *OutLine; - EDGEPT *Loop, *LoopStart, *Segment; - inT16 LastX, LastY, Xmean, Ymean; - inT32 NormX, NormY, DeltaX, DeltaY; - inT32 Xsum, Ysum; - uinT32 Ix, Iy, LengthSum; - uinT16 n; - // n - the number of features to extract from a given outline segment. - // We extract features from every outline segment longer than ~6 units. - // We chop these long segments into standard-sized features approximately - // 13 (= 64 / 5) units in length. - uinT8 Theta; - uinT16 NumBLFeatures, NumCNFeatures; - uinT8 RxInv, RyInv; /* x.xxxxxxx * 2^Exp */ - uinT8 RxExp, RyExp; - /* sxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxx */ - register inT32 pfX, pfY, dX, dY; - uinT16 Length; - register int i; - - Results->Length = 0; - Results->Xmean = 0; - Results->Ymean = 0; - Results->Rx = 0; - Results->Ry = 0; - Results->NumBL = 0; - Results->NumCN = 0; - Results->YBottom = MAX_UINT8; - Results->YTop = 0; - - // Calculate the centroid (Xmean, Ymean) for the blob. - // We use centroid (instead of center of bounding box or center of smallest - // enclosing circle) so the algorithm will not be too greatly influenced by - // small amounts of information at the edge of a character's bounding box. - NumBLFeatures = 0; - NumCNFeatures = 0; - OutLine = Blob->outlines; - Xsum = 0; - Ysum = 0; - LengthSum = 0; - while (OutLine != NULL) { - LoopStart = OutLine->loop; - Loop = LoopStart; - LastX = Loop->pos.x; - LastY = Loop->pos.y; - /* Check for bad loops */ - if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) - return FALSE; - do { - Segment = Loop; - Loop = Loop->next; - NormX = Loop->pos.x; - NormY = Loop->pos.y; - - n = 1; - if (!Segment->IsHidden()) { - DeltaX = NormX - LastX; - DeltaY = NormY - LastY; - Length = MySqrt(DeltaX, DeltaY); - n = ((Length << 2) + Length + 32) >> 6; - if (n != 0) { - Xsum += ((LastX << 1) + DeltaX) * (int) Length; - Ysum += ((LastY << 1) + DeltaY) * (int) Length; - LengthSum += Length; - } - } - if (n != 0) { /* Throw away a point that is too close */ - LastX = NormX; - LastY = NormY; - } - } - while (Loop != LoopStart); - OutLine = OutLine->next; +// Deprecated! Prefer tesseract::Classify::ExtractFeatures instead. +bool ExtractIntFeat(const TBLOB& blob, + bool nonlinear_norm, + INT_FEATURE_ARRAY baseline_features, + INT_FEATURE_ARRAY charnorm_features, + INT_FX_RESULT_STRUCT* results) { + GenericVector bl_features; + GenericVector cn_features; + tesseract::Classify::ExtractFeatures(blob, nonlinear_norm, + &bl_features, &cn_features, results, + NULL); + if (bl_features.size() == 0 || cn_features.size() == 0 || + bl_features.size() > MAX_NUM_INT_FEATURES || + cn_features.size() > MAX_NUM_INT_FEATURES) { + return false; // Feature extraction failed. } - if (LengthSum == 0) - return FALSE; - Xmean = (Xsum / (inT32) LengthSum) >> 1; - Ymean = (Ysum / (inT32) LengthSum) >> 1; - - Results->Length = LengthSum; - Results->Xmean = Xmean; - Results->Ymean = Ymean; - - // Extract Baseline normalized features, - // and find 2nd moments (Ix, Iy) & radius of gyration (Rx, Ry). - // - // Ix = Sum y^2 dA, where: - // Ix: the second moment of area about the axis x - // dA = 1 for our standard-sized piece of outline - // y: the perependicular distance to the x axis - // Rx = sqrt(Ix / A) - // Note: 1 <= Rx <= height of blob / 2 - // Ry = sqrt(Iy / A) - // Note: 1 <= Ry <= width of blob / 2 - Ix = 0; - Iy = 0; - NumBLFeatures = 0; - OutLine = Blob->outlines; - int min_x = 0; - int max_x = 0; - while (OutLine != NULL) { - LoopStart = OutLine->loop; - Loop = LoopStart; - LastX = Loop->pos.x - Xmean; - LastY = Loop->pos.y; - /* Check for bad loops */ - if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) - return FALSE; - do { - Segment = Loop; - Loop = Loop->next; - NormX = Loop->pos.x - Xmean; - NormY = Loop->pos.y; - if (NormY < Results->YBottom) - Results->YBottom = ClipToRange(NormY, 0, MAX_UINT8); - if (NormY > Results->YTop) - Results->YTop = ClipToRange(NormY, 0, MAX_UINT8); - UpdateRange(NormX, &min_x, &max_x); - - n = 1; - if (!Segment->IsHidden()) { - DeltaX = NormX - LastX; - DeltaY = NormY - LastY; - Length = MySqrt(DeltaX, DeltaY); - n = ((Length << 2) + Length + 32) >> 6; - if (n != 0) { - Theta = BinaryAnglePlusPi(DeltaY, DeltaX); - dX = (DeltaX << 8) / n; - dY = (DeltaY << 8) / n; - pfX = (LastX << 8) + (dX >> 1); - pfY = (LastY << 8) + (dY >> 1); - Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean); - // TODO(eger): Hmmm... Xmean is not necessarily 0. - // Figure out if we should center against Xmean for these - // features, and if so fix Iy & SaveFeature(). - Iy += (pfX >> 8) * (pfX >> 8); - if (SaveFeature(BLFeat, - NumBLFeatures, - (inT16) (pfX >> 8), - (inT16) ((pfY >> 8) - 128), - Theta) == FALSE) - return FALSE; - NumBLFeatures++; - for (i = 1; i < n; i++) { - pfX += dX; - pfY += dY; - Ix += ((pfY >> 8) - Ymean) * ((pfY >> 8) - Ymean); - Iy += (pfX >> 8) * (pfX >> 8); - if (SaveFeature(BLFeat, - NumBLFeatures, - (inT16) (pfX >> 8), - (inT16) ((pfY >> 8) - 128), - Theta) == FALSE) - return FALSE; - NumBLFeatures++; - } - } - } - if (n != 0) { /* Throw away a point that is too close */ - LastX = NormX; - LastY = NormY; - } - } - while (Loop != LoopStart); - OutLine = OutLine->next; - } - Results->Width = max_x - min_x; - if (Ix == 0) - Ix = 1; - if (Iy == 0) - Iy = 1; - RxInv = MySqrt2 (NumBLFeatures, Ix, &RxExp); - RyInv = MySqrt2 (NumBLFeatures, Iy, &RyExp); - ClipRadius(&RxInv, &RxExp, &RyInv, &RyExp); - - Results->Rx = (inT16) (51.2 / (double) RxInv * pow (2.0, (double) RxExp)); - Results->Ry = (inT16) (51.2 / (double) RyInv * pow (2.0, (double) RyExp)); - if (Results->Ry == 0) { - /* - This would result in features having 'nan' values. - Since the expression is always > 0, assign a value of 1. - */ - Results->Ry = 1; - } - if (Results->Rx == 0) { - Results->Rx = 1; - } - Results->NumBL = NumBLFeatures; - - // Extract character normalized features - // - // Rescale the co-ordinates to "equalize" distribution in X and Y, making - // all of the following unichars be sized to look similar: , ' 1 i - // - // We calculate co-ordinates relative to the centroid, and then scale them - // as follows (accomplishing a scale of up to 102.4 / dimension): - // y *= 51.2 / Rx [ y scaled by 0.0 ... 102.4 / height of glyph ] - // x *= 51.2 / Ry [ x scaled by 0.0 ... 102.4 / width of glyph ] - // Although tempting to think so, this does not guarantee that our range - // is within [-102.4...102.4] x [-102.4...102.4] because (Xmean, Ymean) - // is the centroid, not the center of the bounding box. Instead, we can - // only bound the result to [-204 ... 204] x [-204 ... 204] - // - NumCNFeatures = 0; - OutLine = Blob->outlines; - int OutLineIndex = -1; - while (OutLine != NULL) { - LoopStart = OutLine->loop; - Loop = LoopStart; - LastX = (Loop->pos.x - Xmean) * RyInv; - LastY = (Loop->pos.y - Ymean) * RxInv; - LastX >>= (inT8) RyExp; - LastY >>= (inT8) RxExp; - OutLineIndex++; - - /* Check for bad loops */ - if ((Loop == NULL) || (Loop->next == NULL) || (Loop->next == LoopStart)) - return FALSE; - do { - Segment = Loop; - Loop = Loop->next; - NormX = (Loop->pos.x - Xmean) * RyInv; - NormY = (Loop->pos.y - Ymean) * RxInv; - NormX >>= (inT8) RyExp; - NormY >>= (inT8) RxExp; - - n = 1; - if (!Segment->IsHidden()) { - DeltaX = NormX - LastX; - DeltaY = NormY - LastY; - Length = MySqrt(DeltaX, DeltaY); - n = ((Length << 2) + Length + 32) >> 6; - if (n != 0) { - Theta = BinaryAnglePlusPi(DeltaY, DeltaX); - dX = (DeltaX << 8) / n; - dY = (DeltaY << 8) / n; - pfX = (LastX << 8) + (dX >> 1); - pfY = (LastY << 8) + (dY >> 1); - if (SaveFeature(CNFeat, - NumCNFeatures, - (inT16) (pfX >> 8), - (inT16) (pfY >> 8), - Theta) == FALSE) - return FALSE; - if (FeatureOutlineArray) { - FeatureOutlineArray[NumCNFeatures] = OutLineIndex; - } - NumCNFeatures++; - for (i = 1; i < n; i++) { - pfX += dX; - pfY += dY; - if (SaveFeature(CNFeat, - NumCNFeatures, - (inT16) (pfX >> 8), - (inT16) (pfY >> 8), - Theta) == FALSE) - return FALSE; - if (FeatureOutlineArray) { - FeatureOutlineArray[NumCNFeatures] = OutLineIndex; - } - NumCNFeatures++; - } - } - } - if (n != 0) { /* Throw away a point that is too close */ - LastX = NormX; - LastY = NormY; - } - } - while (Loop != LoopStart); - OutLine = OutLine->next; - } - - Results->NumCN = NumCNFeatures; - return TRUE; -} - - -/*--------------------------------------------------------------------------*/ -// Return the "binary angle" [0..255] -// made by vector as measured counterclockwise from <-1, 0> -// The order of the arguments follows the convention of atan2(3) -uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X) { - inT16 Angle, Atan; - uinT16 Ratio; - uinT32 AbsX, AbsY; - - assert ((X != 0) || (Y != 0)); - if (X < 0) - AbsX = -X; - else - AbsX = X; - if (Y < 0) - AbsY = -Y; - else - AbsY = Y; - if (AbsX > AbsY) - Ratio = AbsY * ATAN_TABLE_SIZE / AbsX; - else - Ratio = AbsX * ATAN_TABLE_SIZE / AbsY; - if (Ratio >= ATAN_TABLE_SIZE) - Ratio = ATAN_TABLE_SIZE - 1; - Atan = AtanTable[Ratio]; - if (X >= 0) - if (Y >= 0) - if (AbsX > AbsY) - Angle = Atan; - else - Angle = 64 - Atan; - else if (AbsX > AbsY) - Angle = 256 - Atan; - else - Angle = 192 + Atan; - else if (Y >= 0) - if (AbsX > AbsY) - Angle = 128 - Atan; - else - Angle = 64 + Atan; - else if (AbsX > AbsY) - Angle = 128 + Atan; - else - Angle = 192 - Atan; - - /* reverse angles to match old feature extractor: Angle += PI */ - Angle += 128; - Angle &= 255; - return (uinT8) Angle; -} - - -/*--------------------------------------------------------------------------*/ -int SaveFeature(INT_FEATURE_ARRAY FeatureArray, - uinT16 FeatureNum, - inT16 X, - inT16 Y, - uinT8 Theta) { - INT_FEATURE Feature; - - if (FeatureNum >= MAX_NUM_INT_FEATURES) - return FALSE; - - Feature = &(FeatureArray[FeatureNum]); - - X = X + 128; - Y = Y + 128; - - Feature->X = ClipToRange(X, 0, 255); - Feature->Y = ClipToRange(Y, 0, 255); - Feature->Theta = Theta; - Feature->CP_misses = 0; - - return TRUE; -} - - -/*---------------------------------------------------------------------------*/ -// Return floor(sqrt(min(emm, x)^2 + min(emm, y)^2)) -// where emm = EvidenceMultMask. -uinT16 MySqrt(inT32 X, inT32 Y) { - register uinT16 SqRoot; - register uinT32 Square; - register uinT16 BitLocation; - register uinT32 Sum; - const uinT32 EvidenceMultMask = - ((1 << IntegerMatcher::kIntEvidenceTruncBits) - 1); - - if (X < 0) - X = -X; - if (Y < 0) - Y = -Y; - - if (X > EvidenceMultMask) - X = EvidenceMultMask; - if (Y > EvidenceMultMask) - Y = EvidenceMultMask; - - Sum = X * X + Y * Y; - - BitLocation = (EvidenceMultMask + 1) << 1; - SqRoot = 0; - do { - Square = (SqRoot | BitLocation) * (SqRoot | BitLocation); - if (Square <= Sum) - SqRoot |= BitLocation; - BitLocation >>= 1; - } - while (BitLocation); - - return SqRoot; -} - - -/*--------------------------------------------------------------------------*/ -// Return two integers which can be used to express the sqrt(I/N): -// sqrt(I/N) = 51.2 * 2^(*Exp) / retval -uinT8 MySqrt2(uinT16 N, uinT32 I, uinT8 *Exp) { - register inT8 k; - register uinT32 N2; - register uinT8 SqRoot; - register uinT16 Square; - register uinT8 BitLocation; - register uinT16 Ratio; - - N2 = N * 41943; - - k = 9; - while ((N2 & 0xc0000000) == 0) { - N2 <<= 2; - k += 1; - } - - while ((I & 0xc0000000) == 0) { - I <<= 2; - k -= 1; - } - - if (((N2 & 0x80000000) == 0) && ((I & 0x80000000) == 0)) { - N2 <<= 1; - I <<= 1; - } - - N2 &= 0xffff0000; - I >>= 14; - Ratio = N2 / I; - - BitLocation = 128; - SqRoot = 0; - do { - Square = (SqRoot | BitLocation) * (SqRoot | BitLocation); - if (Square <= Ratio) - SqRoot |= BitLocation; - BitLocation >>= 1; - } - while (BitLocation); - - if (k < 0) { - *Exp = 0; - return 255; - } - else { - *Exp = k; - return SqRoot; - } -} - - -/*-------------------------------------------------------------------------*/ -void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp) { - register uinT8 AM, BM, AE, BE; - register uinT8 BitN, LastCarry; - int RxInvLarge, RyInvSmall; - - AM = classify_radius_gyr_min_man; - AE = classify_radius_gyr_min_exp; - BM = *RxInv; - BE = *RxExp; - LastCarry = 1; - while ((AM != 0) || (BM != 0)) { - if (AE > BE) { - BitN = LastCarry + (AM & 1) + 1; - AM >>= 1; - AE--; - } - else if (AE < BE) { - BitN = LastCarry + (!(BM & 1)); - BM >>= 1; - BE--; - } - else { /* AE == BE */ - BitN = LastCarry + (AM & 1) + (!(BM & 1)); - AM >>= 1; - BM >>= 1; - AE--; - BE--; - } - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - } - BitN = LastCarry + 1; - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - - if (BitN == 1) { - *RxInv = classify_radius_gyr_min_man; - *RxExp = classify_radius_gyr_min_exp; - } - - AM = classify_radius_gyr_min_man; - AE = classify_radius_gyr_min_exp; - BM = *RyInv; - BE = *RyExp; - LastCarry = 1; - while ((AM != 0) || (BM != 0)) { - if (AE > BE) { - BitN = LastCarry + (AM & 1) + 1; - AM >>= 1; - AE--; - } - else if (AE < BE) { - BitN = LastCarry + (!(BM & 1)); - BM >>= 1; - BE--; - } - else { /* AE == BE */ - BitN = LastCarry + (AM & 1) + (!(BM & 1)); - AM >>= 1; - BM >>= 1; - AE--; - BE--; - } - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - } - BitN = LastCarry + 1; - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - - if (BitN == 1) { - *RyInv = classify_radius_gyr_min_man; - *RyExp = classify_radius_gyr_min_exp; - } - - AM = classify_radius_gyr_max_man; - AE = classify_radius_gyr_max_exp; - BM = *RxInv; - BE = *RxExp; - LastCarry = 1; - while ((AM != 0) || (BM != 0)) { - if (AE > BE) { - BitN = LastCarry + (AM & 1) + 1; - AM >>= 1; - AE--; - } - else if (AE < BE) { - BitN = LastCarry + (!(BM & 1)); - BM >>= 1; - BE--; - } - else { /* AE == BE */ - BitN = LastCarry + (AM & 1) + (!(BM & 1)); - AM >>= 1; - BM >>= 1; - AE--; - BE--; - } - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - } - BitN = LastCarry + 1; - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - - if (BitN == 1) - RxInvLarge = 1; - else - RxInvLarge = 0; - - AM = *RyInv; - AE = *RyExp; - BM = classify_radius_gyr_max_man; - BE = classify_radius_gyr_max_exp; - LastCarry = 1; - while ((AM != 0) || (BM != 0)) { - if (AE > BE) { - BitN = LastCarry + (AM & 1) + 1; - AM >>= 1; - AE--; - } - else if (AE < BE) { - BitN = LastCarry + (!(BM & 1)); - BM >>= 1; - BE--; - } - else { /* AE == BE */ - BitN = LastCarry + (AM & 1) + (!(BM & 1)); - AM >>= 1; - BM >>= 1; - AE--; - BE--; - } - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - } - BitN = LastCarry + 1; - LastCarry = (BitN & 2) > 1; - BitN = BitN & 1; - - if (BitN == 1) - RyInvSmall = 1; - else - RyInvSmall = 0; - - if (RxInvLarge && RyInvSmall) { - *RyInv = classify_radius_gyr_max_man; - *RyExp = classify_radius_gyr_max_exp; - } - + memcpy(baseline_features, &bl_features[0], + bl_features.size() * sizeof(bl_features[0])); + memcpy(charnorm_features, &cn_features[0], + cn_features.size() * sizeof(cn_features[0])); + return true; } diff --git a/classify/intfx.h b/classify/intfx.h index 30150947d..11a68377c 100644 --- a/classify/intfx.h +++ b/classify/intfx.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: intfx.h - ** Purpose: Interface to high level integer feature extractor. - ** Author: Robert Moss - ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. + ** Filename: intfx.h + ** Purpose: Interface to high level integer feature extractor. + ** Author: Robert Moss + ** History: Tue May 21 15:51:57 MDT 1991, RWM, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -42,6 +42,9 @@ struct INT_FX_RESULT_STRUCT { uinT8 YTop; // Top of blob in BLN coords. }; +// The standard feature length +const double kStandardFeatureLength = 64.0 / 5; + /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ @@ -51,28 +54,22 @@ void InitIntegerFX(); // theta direction in an INT_FEATURE_STRUCT. FCOORD FeatureDirection(uinT8 theta); -tesseract::TrainingSample* GetIntFeatures( - tesseract::NormalizationMode mode, TBLOB *blob, - const DENORM& denorm); +namespace tesseract { + // Generates a TrainingSample from a TBLOB. Extracts features and sets + // the bounding box, so classifiers that operate on the image can work. + // TODO(rays) BlobToTrainingSample must remain a global function until + // the FlexFx and FeatureDescription code can be removed and LearnBlob + // made a member of Classify. + TrainingSample* BlobToTrainingSample(const TBLOB& blob, + tesseract::NormalizationMode mode, + bool nonlinear_norm); +} -int ExtractIntFeat(TBLOB *Blob, - const DENORM& denorm, - INT_FEATURE_ARRAY BLFeat, - INT_FEATURE_ARRAY CNFeat, - INT_FX_RESULT_STRUCT* Results, - inT32 *FeatureOutlineArray = 0); +// Deprecated! Prefer tesseract::Classify::ExtractFeatures instead. +bool ExtractIntFeat(const TBLOB& blob, + bool nonlinear_norm, + INT_FEATURE_ARRAY BLFeat, + INT_FEATURE_ARRAY CNFeat, + INT_FX_RESULT_STRUCT* Results); -uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X); - -int SaveFeature(INT_FEATURE_ARRAY FeatureArray, - uinT16 FeatureNum, - inT16 X, - inT16 Y, - uinT8 Theta); - -uinT16 MySqrt(inT32 X, inT32 Y); - -uinT8 MySqrt2(uinT16 N, uinT32 I, uinT8 *Exp); - -void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp); #endif diff --git a/classify/intmatcher.h b/classify/intmatcher.h index 50dbd9796..5598d273a 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -28,7 +28,7 @@ extern BOOL_VAR_H(disable_character_fragments, FALSE, "Do not include character fragments in the" " results of the classifier"); -extern INT_VAR_H(classify_integer_matcher_multiplier, 14, +extern INT_VAR_H(classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: "); diff --git a/classify/intproto.cpp b/classify/intproto.cpp index 800e673d3..e5621b1d4 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -37,6 +37,7 @@ #include "mfoutline.h" #include "ndminx.h" #include "picofeat.h" +#include "points.h" #include "shapetable.h" #include "svmnode.h" @@ -206,6 +207,22 @@ double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad"); /*----------------------------------------------------------------------------- Public Code -----------------------------------------------------------------------------*/ +// Builds a feature from an FCOORD for position with all the necessary +// clipping and rounding. +INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta) + : X(ClipToRange(static_cast(pos.x() + 0.5), 0, 255)), + Y(ClipToRange(static_cast(pos.y() + 0.5), 0, 255)), + Theta(theta), + CP_misses(0) { +} +// Builds a feature from ints with all the necessary clipping and casting. +INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta) + : X(static_cast(ClipToRange(x, 0, MAX_UINT8))), + Y(static_cast(ClipToRange(y, 0, MAX_UINT8))), + Theta(static_cast(ClipToRange(theta, 0, MAX_UINT8))), + CP_misses(0) { +} + /*---------------------------------------------------------------------------*/ /** * This routine adds a new class structure to a set of diff --git a/classify/intproto.h b/classify/intproto.h index 718689d67..302048757 100644 --- a/classify/intproto.h +++ b/classify/intproto.h @@ -28,6 +28,8 @@ #include "scrollview.h" #include "unicharset.h" +class FCOORD; + /* define order of params in pruners */ #define PRUNER_X 0 #define PRUNER_Y 1 @@ -130,8 +132,14 @@ INT_TEMPLATES_STRUCT, *INT_TEMPLATES; #define MAX_NUM_INT_FEATURES 512 #define INT_CHAR_NORM_RANGE 256 -struct INT_FEATURE_STRUCT -{ +struct INT_FEATURE_STRUCT { + INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) { } + // Builds a feature from an FCOORD for position with all the necessary + // clipping and rounding. + INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta); + // Builds a feature from ints with all the necessary clipping and casting. + INT_FEATURE_STRUCT(int x, int y, int theta); + uinT8 X; uinT8 Y; uinT8 Theta; diff --git a/classify/mastertrainer.cpp b/classify/mastertrainer.cpp index 3da91e6b1..1e69da14b 100644 --- a/classify/mastertrainer.cpp +++ b/classify/mastertrainer.cpp @@ -30,6 +30,7 @@ #include "allheaders.h" #include "boxread.h" #include "classify.h" +#include "efio.h" #include "errorcounter.h" #include "featdefs.h" #include "sampleiterator.h" @@ -58,10 +59,6 @@ MasterTrainer::MasterTrainer(NormalizationMode norm_mode, enable_shape_anaylsis_(shape_analysis), enable_replication_(replicate_samples), fragments_(NULL), prev_unichar_id_(-1), debug_level_(debug_level) { - fontinfo_table_.set_compare_callback( - NewPermanentTessCallback(CompareFontInfo)); - fontinfo_table_.set_clear_callback( - NewPermanentTessCallback(FontInfoDeleteCallback)); } MasterTrainer::~MasterTrainer() { @@ -82,10 +79,7 @@ bool MasterTrainer::Serialize(FILE* fp) const { if (!verify_samples_.Serialize(fp)) return false; if (!master_shapes_.Serialize(fp)) return false; if (!flat_shapes_.Serialize(fp)) return false; - if (!fontinfo_table_.write(fp, NewPermanentTessCallback(write_info))) - return false; - if (!fontinfo_table_.write(fp, NewPermanentTessCallback(write_spacing_info))) - return false; + if (!fontinfo_table_.Serialize(fp)) return false; if (!xheights_.Serialize(fp)) return false; return true; } @@ -106,11 +100,7 @@ bool MasterTrainer::DeSerialize(bool swap, FILE* fp) { if (!verify_samples_.DeSerialize(swap, fp)) return false; if (!master_shapes_.DeSerialize(swap, fp)) return false; if (!flat_shapes_.DeSerialize(swap, fp)) return false; - if (!fontinfo_table_.read(fp, NewPermanentTessCallback(read_info), swap)) - return false; - if (!fontinfo_table_.read(fp, NewPermanentTessCallback(read_spacing_info), - swap)) - return false; + if (!fontinfo_table_.DeSerialize(swap, fp)) return false; if (!xheights_.DeSerialize(swap, fp)) return false; return true; } @@ -122,8 +112,10 @@ void MasterTrainer::LoadUnicharset(const char* filename) { "Building unicharset for training from scratch...\n", filename); unicharset_.clear(); - // Space character needed to represent NIL_LIST classification. - unicharset_.unichar_insert(" "); + UNICHARSET initialized; + // Add special characters, as they were removed by the clear, but the + // default constructor puts them in. + unicharset_.AppendOtherUnicharset(initialized); } charsetsize_ = unicharset_.size(); delete [] fragments_; @@ -138,7 +130,7 @@ void MasterTrainer::LoadUnicharset(const char* filename) { // adding them to the trainer with the font_id from the content of the file. // See mftraining.cpp for a description of the file format. // If verification, then these are verification samples, not training. -void MasterTrainer::ReadTrainingSamples(FILE *fp, +void MasterTrainer::ReadTrainingSamples(const char* page_name, const FEATURE_DEFS_STRUCT& feature_defs, bool verification) { char buffer[2048]; @@ -148,6 +140,12 @@ void MasterTrainer::ReadTrainingSamples(FILE *fp, int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType); int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType); + FILE* fp = Efopen(page_name, "rb"); + if (fp == NULL) { + tprintf("Failed to open tr file: %s\n", page_name); + return; + } + tr_filenames_.push_back(STRING(page_name)); while (fgets(buffer, sizeof(buffer), fp) != NULL) { if (buffer[0] == '\n') continue; @@ -159,6 +157,7 @@ void MasterTrainer::ReadTrainingSamples(FILE *fp, } *space++ = '\0'; int font_id = GetFontInfoId(buffer); + if (font_id < 0) font_id = 0; int page_number; STRING unichar; TBOX bounding_box; @@ -177,6 +176,7 @@ void MasterTrainer::ReadTrainingSamples(FILE *fp, FreeCharDescription(char_desc); } charsetsize_ = unicharset_.size(); + fclose(fp); } // Adds the given single sample to the trainer, setting the classid @@ -278,23 +278,23 @@ void MasterTrainer::SetupMasterShapes() { const CHAR_FRAGMENT *fragment = samples_.unicharset().get_fragment(c); if (fragment == NULL) - char_shapes.AppendMasterShapes(shapes); + char_shapes.AppendMasterShapes(shapes, NULL); else if (fragment->is_beginning()) - char_shapes_begin_fragment.AppendMasterShapes(shapes); + char_shapes_begin_fragment.AppendMasterShapes(shapes, NULL); else if (fragment->is_ending()) - char_shapes_end_fragment.AppendMasterShapes(shapes); + char_shapes_end_fragment.AppendMasterShapes(shapes, NULL); else - char_shapes.AppendMasterShapes(shapes); + char_shapes.AppendMasterShapes(shapes, NULL); } ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes_begin_fragment); - char_shapes.AppendMasterShapes(char_shapes_begin_fragment); + char_shapes.AppendMasterShapes(char_shapes_begin_fragment, NULL); ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes_end_fragment); - char_shapes.AppendMasterShapes(char_shapes_end_fragment); + char_shapes.AppendMasterShapes(char_shapes_end_fragment, NULL); ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes); - master_shapes_.AppendMasterShapes(char_shapes); + master_shapes_.AppendMasterShapes(char_shapes, NULL); tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().string()); } @@ -401,7 +401,7 @@ bool MasterTrainer::LoadXHeights(const char* filename) { continue; fontinfo.name = buffer; if (!fontinfo_table_.contains(fontinfo)) continue; - int fontinfo_id = fontinfo_table_.get_id(fontinfo); + int fontinfo_id = fontinfo_table_.get_index(fontinfo); xheights_[fontinfo_id] = xht; total_xheight += xht; ++xheight_count; @@ -439,7 +439,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) { char kerned_uch[UNICHAR_LEN]; int x_gap, x_gap_before, x_gap_after, num_kerned; ASSERT_HOST(fscanf(fontinfo_file, "%d\n", &num_unichars) == 1); - FontInfo *fi = fontinfo_table_.get_mutable(fontinfo_id); + FontInfo *fi = &fontinfo_table_.get(fontinfo_id); fi->init_spacing(unicharset_.size()); FontSpacingInfo *spacing = NULL; for (int l = 0; l < num_unichars; ++l) { @@ -480,11 +480,7 @@ int MasterTrainer::GetFontInfoId(const char* font_name) { fontinfo.name = const_cast(font_name); fontinfo.properties = 0; // Not used to lookup in the table fontinfo.universal_id = 0; - if (!fontinfo_table_.contains(fontinfo)) { - return -1; - } else { - return fontinfo_table_.get_id(fontinfo); - } + return fontinfo_table_.get_index(fontinfo); } // Returns the font_id of the closest matching font name to the given // filename. It is assumed that a substring of the filename will match @@ -585,7 +581,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset, const char* pffmtable_file) { tesseract::Classify *classify = new tesseract::Classify(); // Move the fontinfo table to classify. - classify->get_fontinfo_table().move(&fontinfo_table_); + fontinfo_table_.MoveTo(&classify->get_fontinfo_table()); INT_TEMPLATES int_templates = classify->CreateIntTemplates(float_classes, shape_set); FILE* fp = fopen(inttemp_file, "wb"); @@ -750,17 +746,29 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font, } #endif // GRAPHICS_DISABLED +void MasterTrainer::TestClassifierVOld(bool replicate_samples, + ShapeClassifier* test_classifier, + ShapeClassifier* old_classifier) { + SampleIterator sample_it; + sample_it.Init(NULL, NULL, replicate_samples, &samples_); + ErrorCounter::DebugNewErrors(test_classifier, old_classifier, + CT_UNICHAR_TOPN_ERR, fontinfo_table_, + page_images_, &sample_it); +} + // Tests the given test_classifier on the internal samples. // See TestClassifier for details. -void MasterTrainer::TestClassifierOnSamples(int report_level, +void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, + int report_level, bool replicate_samples, ShapeClassifier* test_classifier, STRING* report_string) { - TestClassifier(report_level, replicate_samples, &samples_, + TestClassifier(error_mode, report_level, replicate_samples, &samples_, test_classifier, report_string); } -// Tests the given test_classifier on the given samples +// Tests the given test_classifier on the given samples. +// error_mode indicates what counts as an error. // report_levels: // 0 = no output. // 1 = bottom-line error rate. @@ -772,14 +780,14 @@ void MasterTrainer::TestClassifierOnSamples(int report_level, // sample including replicated and systematically perturbed samples. // If report_string is non-NULL, a summary of the results for each font // is appended to the report_string. -double MasterTrainer::TestClassifier(int report_level, +double MasterTrainer::TestClassifier(CountTypes error_mode, + int report_level, bool replicate_samples, TrainingSampleSet* samples, ShapeClassifier* test_classifier, STRING* report_string) { SampleIterator sample_it; - sample_it.Init(NULL, test_classifier->GetShapeTable(), replicate_samples, - samples); + sample_it.Init(NULL, NULL, replicate_samples, samples); if (report_level > 0) { int num_samples = 0; for (sample_it.Begin(); !sample_it.AtEnd(); sample_it.Next()) @@ -791,7 +799,7 @@ double MasterTrainer::TestClassifier(int report_level, } double unichar_error = 0.0; ErrorCounter::ComputeErrorRate(test_classifier, report_level, - CT_SHAPE_TOP_ERR, fontinfo_table_, + error_mode, fontinfo_table_, page_images_, &sample_it, &unichar_error, NULL, report_string); return unichar_error; diff --git a/classify/mastertrainer.h b/classify/mastertrainer.h index 633c39750..0cc2ea11c 100644 --- a/classify/mastertrainer.h +++ b/classify/mastertrainer.h @@ -29,6 +29,7 @@ #include "cluster.h" #include "intfx.h" #include "elst.h" +#include "errorcounter.h" #include "featdefs.h" #include "fontinfo.h" #include "indexmapbidi.h" @@ -89,7 +90,7 @@ class MasterTrainer { // Reads the samples and their features from the given file, // adding them to the trainer with the font_id from the content of the file. // If verification, then these are verification samples, not training. - void ReadTrainingSamples(FILE *fp, + void ReadTrainingSamples(const char* page_name, const FEATURE_DEFS_STRUCT& feature_defs, bool verification); @@ -159,6 +160,12 @@ class MasterTrainer { // one of the fonts. If more than one is matched, the longest is returned. int GetBestMatchingFontInfoId(const char* filename); + // Returns the filename of the tr file corresponding to the command-line + // argument with the given index. + const STRING& GetTRFileName(int index) const { + return tr_filenames_[index]; + } + // Sets up a flat shapetable with one shape per class/font combination. void SetupFlatShapeTable(ShapeTable* shape_table); @@ -207,13 +214,19 @@ class MasterTrainer { const char* unichar_str2, int canonical_font); #endif // GRAPHICS_DISABLED + void TestClassifierVOld(bool replicate_samples, + ShapeClassifier* test_classifier, + ShapeClassifier* old_classifier); + // Tests the given test_classifier on the internal samples. // See TestClassifier for details. - void TestClassifierOnSamples(int report_level, + void TestClassifierOnSamples(CountTypes error_mode, + int report_level, bool replicate_samples, ShapeClassifier* test_classifier, STRING* report_string); // Tests the given test_classifier on the given samples + // error_mode indicates what counts as an error. // report_levels: // 0 = no output. // 1 = bottom-line error rate. @@ -225,7 +238,8 @@ class MasterTrainer { // sample including replicated and systematically perturbed samples. // If report_string is non-NULL, a summary of the results for each font // is appended to the report_string. - double TestClassifier(int report_level, + double TestClassifier(CountTypes error_mode, + int report_level, bool replicate_samples, TrainingSampleSet* samples, ShapeClassifier* test_classifier, @@ -263,9 +277,9 @@ class MasterTrainer { // Flat shape table has each unichar/font id pair in a separate shape. ShapeTable flat_shapes_; // Font metrics gathered from multiple files. - UnicityTable fontinfo_table_; + FontInfoTable fontinfo_table_; // Array of xheights indexed by font ids in fontinfo_table_; - GenericVector xheights_; + GenericVector xheights_; // Non-serialized data initialized by other means or used temporarily // during loading of training samples. @@ -291,6 +305,8 @@ class MasterTrainer { // Indexed by page_num_ in the samples. // These images are owned by the trainer and need to be pixDestroyed. GenericVector page_images_; + // Vector of filenames of loaded tr files. + GenericVector tr_filenames_; }; } // namespace tesseract. diff --git a/classify/mf.cpp b/classify/mf.cpp index 714f04083..ad1ba285f 100644 --- a/classify/mf.cpp +++ b/classify/mf.cpp @@ -33,7 +33,9 @@ Private Code ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm) { +FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info) { /* ** Parameters: ** Blob blob to extract micro-features from @@ -52,7 +54,8 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm) { FEATURE Feature; MICROFEATURE OldFeature; - OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, denorm); + OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, bl_denorm, cn_denorm, + fx_info); if (OldFeatures == NULL) return NULL; NumFeatures = count (OldFeatures); diff --git a/classify/mf.h b/classify/mf.h index 0f5e3f64b..716f5b8c0 100644 --- a/classify/mf.h +++ b/classify/mf.h @@ -34,6 +34,8 @@ typedef float MicroFeature[MFCount]; /*---------------------------------------------------------------------------- Private Function Prototypes -----------------------------------------------------------------------------*/ -FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm); +FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info); #endif diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp index 5903f5d3a..f39ec0ec5 100644 --- a/classify/mfoutline.cpp +++ b/classify/mfoutline.cpp @@ -103,56 +103,6 @@ LIST ConvertOutlines(TESSLINE *outline, return mf_outlines; } - -/*---------------------------------------------------------------------------*/ -void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats) { -/* - ** Parameters: - ** Outlines list of outlines to compute stats for - ** OutlineStats place to put results - ** Globals: none - ** Operation: This routine computes several statistics about the outlines - ** in Outlines. These statistics are usually used to perform - ** anistropic normalization of all of the outlines. The - ** statistics generated are: - ** first moments about x and y axes - ** total length of all outlines - ** center of mass of all outlines - ** second moments about center of mass axes - ** radius of gyration about center of mass axes - ** Return: none (results are returned in OutlineStats) - ** Exceptions: none - ** History: Fri Dec 14 08:32:03 1990, DSJ, Created. - */ - MFOUTLINE Outline; - MFOUTLINE EdgePoint; - MFEDGEPT *Current; - MFEDGEPT *Last; - - InitOutlineStats(OutlineStats); - iterate(Outlines) { - Outline = (MFOUTLINE) first_node (Outlines); - - Last = PointAt (Outline); - Outline = NextPointAfter (Outline); - EdgePoint = Outline; - do { - Current = PointAt (EdgePoint); - - UpdateOutlineStats (OutlineStats, - Last->Point.x, Last->Point.y, - Current->Point.x, Current->Point.y); - - Last = Current; - EdgePoint = NextPointAfter (EdgePoint); - } - while (EdgePoint != Outline); - } - FinishOutlineStats(OutlineStats); - -} /* ComputeOutlineStats */ - - /*---------------------------------------------------------------------------*/ void FindDirectionChanges(MFOUTLINE Outline, FLOAT32 MinSlope, @@ -334,7 +284,8 @@ void NormalizeOutline(MFOUTLINE Outline, MFOUTLINE EdgePoint = Outline; do { MFEDGEPT *Current = PointAt(EdgePoint); - Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - BASELINE_OFFSET); + Current->Point.y = MF_SCALE_FACTOR * + (Current->Point.y - kBlnBaselineOffset); Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); EdgePoint = NextPointAfter(EdgePoint); } while (EdgePoint != Outline); @@ -365,34 +316,10 @@ void Classify::NormalizeOutlines(LIST Outlines, ** History: Fri Dec 14 08:14:55 1990, DSJ, Created. */ MFOUTLINE Outline; - OUTLINE_STATS OutlineStats; - FLOAT32 BaselineScale; switch (classify_norm_method) { case character: - ComputeOutlineStats(Outlines, &OutlineStats); - - /* limit scale factor to avoid overscaling small blobs (.,`'), - thin blobs (l1ift), and merged blobs */ - *XScale = *YScale = BaselineScale = MF_SCALE_FACTOR; - *XScale *= OutlineStats.Ry; - *YScale *= OutlineStats.Rx; - if (*XScale < classify_min_norm_scale_x) - *XScale = classify_min_norm_scale_x; - if (*YScale < classify_min_norm_scale_y) - *YScale = classify_min_norm_scale_y; - if (*XScale > classify_max_norm_scale_x && - *YScale <= classify_max_norm_scale_y) - *XScale = classify_max_norm_scale_x; - *XScale = classify_char_norm_range * BaselineScale / *XScale; - *YScale = classify_char_norm_range * BaselineScale / *YScale; - - iterate(Outlines) { - Outline = (MFOUTLINE) first_node (Outlines); - CharNormalizeOutline (Outline, - OutlineStats.x, OutlineStats.y, - *XScale, *YScale); - } + ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); break; case baseline: @@ -436,11 +363,7 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { /*---------------------------------------------------------------------------*/ -void CharNormalizeOutline(MFOUTLINE Outline, - FLOAT32 XCenter, - FLOAT32 YCenter, - FLOAT32 XScale, - FLOAT32 YScale) { +void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { /* ** Parameters: ** Outline outline to be character normalized @@ -463,13 +386,13 @@ void CharNormalizeOutline(MFOUTLINE Outline, First = Outline; Current = First; do { - CurrentPoint = PointAt (Current); - CurrentPoint->Point.x = - (CurrentPoint->Point.x - XCenter) * XScale; - CurrentPoint->Point.y = - (CurrentPoint->Point.y - YCenter) * YScale; + CurrentPoint = PointAt(Current); + FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); + cn_denorm.LocalNormTransform(pos, &pos); + CurrentPoint->Point.x = (pos.x() - MAX_UINT8 / 2) * MF_SCALE_FACTOR; + CurrentPoint->Point.y = (pos.y() - MAX_UINT8 / 2) * MF_SCALE_FACTOR; - Current = NextPointAfter (Current); + Current = NextPointAfter(Current); } while (Current != First); diff --git a/classify/mfoutline.h b/classify/mfoutline.h index 71dd310b8..bdf20f372 100644 --- a/classify/mfoutline.h +++ b/classify/mfoutline.h @@ -21,10 +21,10 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ +#include "blobs.h" #include "host.h" #include "oldlist.h" #include "fpoint.h" -#include "baseline.h" #include "params.h" #define NORMAL_X_HEIGHT (0.5) @@ -68,7 +68,7 @@ typedef enum { #define AverageOf(A,B) (((A) + (B)) / 2) /* macro for computing the scale factor to use to normalize characters */ -#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / BASELINE_SCALE) +#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / kBlnXHeight) /* macros for manipulating micro-feature outlines */ #define DegenerateOutline(O) (((O) == NIL_LIST) || ((O) == list_rest(O))) @@ -93,8 +93,6 @@ LIST ConvertOutlines(TESSLINE *Outline, LIST ConvertedOutlines, OUTLINETYPE OutlineType); -void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats); - void FilterEdgeNoise(MFOUTLINE Outline, FLOAT32 NoiseSegmentLength); void FindDirectionChanges(MFOUTLINE Outline, @@ -119,11 +117,10 @@ void NormalizeOutline(MFOUTLINE Outline, -----------------------------------------------------------------------------*/ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction); -void CharNormalizeOutline(MFOUTLINE Outline, - FLOAT32 XCenter, - FLOAT32 YCenter, - FLOAT32 XScale, - FLOAT32 YScale); +// Normalizes the Outline in-place using cn_denorm's local transformation, +// then converts from the integer feature range [0,255] to the clusterer +// feature range of [-0.5, 0.5]. +void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm); void ComputeDirection(MFEDGEPT *Start, MFEDGEPT *Finish, diff --git a/classify/mfx.cpp b/classify/mfx.cpp index a053a051f..9f3e3d242 100644 --- a/classify/mfx.cpp +++ b/classify/mfx.cpp @@ -59,7 +59,9 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End); ----------------------------------------------------------------------------**/ /*---------------------------------------------------------------------------*/ -CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm) { +CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info) { /* ** Parameters: ** Blob blob to extract micro-features from @@ -74,35 +76,25 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm) { ** History: 7/21/89, DSJ, Created. */ MICROFEATURES MicroFeatures = NIL_LIST; - FLOAT32 XScale, YScale; LIST Outlines; LIST RemainingOutlines; MFOUTLINE Outline; - INT_FEATURE_ARRAY blfeatures; - INT_FEATURE_ARRAY cnfeatures; - INT_FX_RESULT_STRUCT results; if (Blob != NULL) { - Outlines = ConvertBlob (Blob); - if (!ExtractIntFeat(Blob, denorm, blfeatures, cnfeatures, &results)) - return NULL; - XScale = 0.2f / results.Ry; - YScale = 0.2f / results.Rx; + Outlines = ConvertBlob(Blob); RemainingOutlines = Outlines; iterate(RemainingOutlines) { Outline = (MFOUTLINE) first_node (RemainingOutlines); - CharNormalizeOutline (Outline, - results.Xmean, results.Ymean, - XScale, YScale); + CharNormalizeOutline(Outline, cn_denorm); } RemainingOutlines = Outlines; iterate(RemainingOutlines) { - Outline = (MFOUTLINE) first_node (RemainingOutlines); + Outline = (MFOUTLINE) first_node(RemainingOutlines); FindDirectionChanges(Outline, classify_min_slope, classify_max_slope); MarkDirectionChanges(Outline); - MicroFeatures = ConvertToMicroFeatures (Outline, MicroFeatures); + MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures); } FreeOutlines(Outlines); } diff --git a/classify/mfx.h b/classify/mfx.h index bd3139967..7e7fe1cfb 100644 --- a/classify/mfx.h +++ b/classify/mfx.h @@ -35,6 +35,8 @@ extern double_VAR_H(classify_max_slope, 2.414213562, /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm); +CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info); #endif diff --git a/classify/normfeat.cpp b/classify/normfeat.cpp index a377a6b38..3f8013aa3 100644 --- a/classify/normfeat.cpp +++ b/classify/normfeat.cpp @@ -59,22 +59,18 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) { // the x center of the grapheme's bounding box. // English: [0.011, 0.31] // -FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& denorm) { +FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info) { FEATURE_SET feature_set = NewFeatureSet(1); FEATURE feature = NewFeature(&CharNormDesc); - INT_FEATURE_ARRAY blfeatures; - INT_FEATURE_ARRAY cnfeatures; - INT_FX_RESULT_STRUCT FXInfo; - - ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &FXInfo); - feature->Params[CharNormY] = - MF_SCALE_FACTOR * (FXInfo.Ymean - BASELINE_OFFSET); + MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); feature->Params[CharNormLength] = - MF_SCALE_FACTOR * FXInfo.Length / LENGTH_COMPRESSION; - feature->Params[CharNormRx] = MF_SCALE_FACTOR * FXInfo.Rx; - feature->Params[CharNormRy] = MF_SCALE_FACTOR * FXInfo.Ry; + MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; + feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; + feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; AddFeature(feature_set, feature); diff --git a/classify/normfeat.h b/classify/normfeat.h index 54bf6ae57..59703a517 100644 --- a/classify/normfeat.h +++ b/classify/normfeat.h @@ -34,6 +34,8 @@ typedef enum { ----------------------------------------------------------------------------**/ FLOAT32 ActualOutlineLength(FEATURE Feature); -FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& denorm); +FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info); #endif diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index 830181e85..9dfe5a76d 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -94,7 +94,7 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId, PROTOTYPE *Proto; int ProtoId; - if(ClassId > NormProtos->NumProtos) { + if (ClassId > NormProtos->NumProtos) { ClassId = NO_CLASS; } diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp index 3685c5c6b..7a791338e 100644 --- a/classify/ocrfeatures.cpp +++ b/classify/ocrfeatures.cpp @@ -230,7 +230,7 @@ void WriteFeature(FILE *File, FEATURE Feature) { int i; for (i = 0; i < Feature->Type->NumParams; i++) { -#ifndef _WIN32 +#ifndef WIN32 assert(!isnan(Feature->Params[i])); #endif fprintf(File, " %g", Feature->Params[i]); diff --git a/classify/ocrfeatures.h b/classify/ocrfeatures.h index 8ca9e5975..734b4ff07 100644 --- a/classify/ocrfeatures.h +++ b/classify/ocrfeatures.h @@ -26,6 +26,7 @@ #include class DENORM; +struct INT_FX_RESULT_STRUCT; #undef Min #undef Max @@ -78,7 +79,8 @@ typedef FEATURE_SET_STRUCT *FEATURE_SET; // classifier does not need to know the details of this data structure. typedef char *CHAR_FEATURES; -typedef FEATURE_SET (*FX_FUNC) (TBLOB *, const DENORM&); +typedef FEATURE_SET (*FX_FUNC)(TBLOB *, const DENORM&, const DENORM&, + const INT_FX_RESULT_STRUCT&); struct FEATURE_EXT_STRUCT { FX_FUNC Extractor; // func to extract features diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp index 9f2a4ead3..ba19fb1ca 100644 --- a/classify/picofeat.cpp +++ b/classify/picofeat.cpp @@ -224,7 +224,9 @@ void NormalizePicoX(FEATURE_SET FeatureSet) { } /* NormalizePicoX */ /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) { +FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info) { /* ** Parameters: ** blob blob to extract features from @@ -233,8 +235,8 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) { ** Exceptions: none ** History: 8/8/2011, rays, Created. */ - tesseract::TrainingSample* sample = GetIntFeatures( - tesseract::NM_CHAR_ANISOTROPIC, blob, denorm); + tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( + *blob, tesseract::NM_CHAR_ANISOTROPIC, false); if (sample == NULL) return NULL; int num_features = sample->num_features(); @@ -254,7 +256,9 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) { } /* ExtractIntCNFeatures */ /*---------------------------------------------------------------------------*/ -FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& denorm) { +FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info) { /* ** Parameters: ** blob blob to extract features from @@ -263,8 +267,8 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& denorm) { ** Exceptions: none ** History: 8/8/2011, rays, Created. */ - tesseract::TrainingSample* sample = GetIntFeatures( - tesseract::NM_CHAR_ANISOTROPIC, blob, denorm); + tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample( + *blob, tesseract::NM_CHAR_ANISOTROPIC, false); if (sample == NULL) return NULL; FEATURE_SET feature_set = NewFeatureSet(1); diff --git a/classify/picofeat.h b/classify/picofeat.h index db63d2119..ab37ba038 100644 --- a/classify/picofeat.h +++ b/classify/picofeat.h @@ -58,8 +58,12 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); ----------------------------------------------------------------------------**/ #define GetPicoFeatureLength() (PicoFeatureLength) -FEATURE_SET ExtractIntCNFeatures(TBLOB *Blob, const DENORM& denorm); -FEATURE_SET ExtractIntGeoFeatures(TBLOB *Blob, const DENORM& denorm); +FEATURE_SET ExtractIntCNFeatures(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info); +FEATURE_SET ExtractIntGeoFeatures(TBLOB *Blob, const DENORM& bl_denorm, + const DENORM& cn_denorm, + const INT_FX_RESULT_STRUCT& fx_info); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations diff --git a/classify/shapeclassifier.cpp b/classify/shapeclassifier.cpp new file mode 100644 index 000000000..e357f66fc --- /dev/null +++ b/classify/shapeclassifier.cpp @@ -0,0 +1,230 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +/////////////////////////////////////////////////////////////////////// +// File: shapeclassifier.h +// Description: Base interface class for classifiers that return a +// shape index. +// Author: Ray Smith +// Created: Thu Dec 15 15:24:27 PST 2011 +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "shapeclassifier.h" +#include "genericvector.h" +#include "scrollview.h" +#include "shapetable.h" +#include "svmnode.h" +#include "trainingsample.h" +#include "tprintf.h" + +namespace tesseract { + +// Classifies the given [training] sample, writing to results. +// See shapeclassifier.h for a full description. +// Default implementation calls the ShapeRating version. +int ShapeClassifier::UnicharClassifySample( + const TrainingSample& sample, Pix* page_pix, int debug, + UNICHAR_ID keep_this, GenericVector* results) { + results->truncate(0); + GenericVector shape_results; + int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, + &shape_results); + const ShapeTable* shapes = GetShapeTable(); + GenericVector unichar_map; + unichar_map.init_to_size(shapes->unicharset().size(), -1); + for (int r = 0; r < num_shape_results; ++r) { + shapes->AddShapeToResults(shape_results[r], &unichar_map, results); + } + return results->size(); +} + +// Classifies the given [training] sample, writing to results. +// See shapeclassifier.h for a full description. +// Default implementation aborts. +int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, int keep_this, + GenericVector* results) { + ASSERT_HOST("Must implement ClassifySample!" == NULL); + return 0; +} + +// Returns the shape that contains unichar_id that has the best result. +// If result is not NULL, it is set with the shape_id and rating. +// Does not need to be overridden if ClassifySample respects the keep_this +// rule. +int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample, + Pix* page_pix, UNICHAR_ID unichar_id, + ShapeRating* result) { + GenericVector results; + const ShapeTable* shapes = GetShapeTable(); + int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results); + for (int r = 0; r < num_results; ++r) { + if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) { + if (result != NULL) + *result = results[r]; + return results[r].shape_id; + } + } + return -1; +} + +// Provides access to the UNICHARSET that this classifier works with. +// Only needs to be overridden if GetShapeTable() can return NULL. +const UNICHARSET& ShapeClassifier::GetUnicharset() const { + return GetShapeTable()->unicharset(); +} + +// Visual debugger classifies the given sample, displays the results and +// solicits user input to display other classifications. Returns when +// the user has finished with debugging the sample. +// Probably doesn't need to be overridden if the subclass provides +// DisplayClassifyAs. +void ShapeClassifier::DebugDisplay(const TrainingSample& sample, + Pix* page_pix, + UNICHAR_ID unichar_id) { + static ScrollView* terminator = NULL; + if (terminator == NULL) { + terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true); + } + ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0); + // Provide a right-click menu to choose the class. + SVMenuNode* popup_menu = new SVMenuNode(); + popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug"); + popup_menu->BuildMenu(debug_win, false); + // Display the features in green. + const INT_FEATURE_STRUCT* features = sample.features(); + int num_features = sample.num_features(); + for (int f = 0; f < num_features; ++f) { + RenderIntFeature(debug_win, &features[f], ScrollView::GREEN); + } + debug_win->Update(); + GenericVector results; + // Debug classification until the user quits. + const UNICHARSET& unicharset = GetUnicharset(); + SVEvent* ev; + SVEventType ev_type; + do { + PointerVector windows; + if (unichar_id >= 0) { + tprintf("Debugging class %d = %s\n", + unichar_id, unicharset.id_to_unichar(unichar_id)); + UnicharClassifySample(sample, page_pix, 1, unichar_id, &results); + DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows); + } else { + tprintf("Invalid unichar_id: %d\n", unichar_id); + UnicharClassifySample(sample, page_pix, 1, -1, &results); + } + if (unichar_id >= 0) { + tprintf("Debugged class %d = %s\n", + unichar_id, unicharset.id_to_unichar(unichar_id)); + } + tprintf("Right-click in ClassifierDebug window to choose debug class,"); + tprintf(" Left-click or close window to quit...\n"); + UNICHAR_ID old_unichar_id; + do { + old_unichar_id = unichar_id; + ev = debug_win->AwaitEvent(SVET_ANY); + ev_type = ev->type; + if (ev_type == SVET_POPUP) { + if (unicharset.contains_unichar(ev->parameter)) { + unichar_id = unicharset.unichar_to_id(ev->parameter); + } else { + tprintf("Char class '%s' not found in unicharset", ev->parameter); + } + } + delete ev; + } while (unichar_id == old_unichar_id && + ev_type != SVET_CLICK && ev_type != SVET_DESTROY); + } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY); + delete debug_win; +} + +// Displays classification as the given shape_id. Creates as many windows +// as it feels fit, using index as a guide for placement. Adds any created +// windows to the windows output and returns a new index that may be used +// by any subsequent classifiers. Caller waits for the user to view and +// then destroys the windows by clearing the vector. +int ShapeClassifier::DisplayClassifyAs( + const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id, int index, + PointerVector* windows) { + // Does nothing in the default implementation. + return index; +} + +// Prints debug information on the results. +void ShapeClassifier::UnicharPrintResults( + const char* context, const GenericVector& results) const { + tprintf("%s\n", context); + for (int i = 0; i < results.size(); ++i) { + tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id, + GetUnicharset().id_to_unichar(results[i].unichar_id)); + if (results[i].fonts.size() != 0) { + tprintf(" Font Vector:"); + for (int f = 0; f < results[i].fonts.size(); ++f) { + tprintf(" %d", results[i].fonts[f]); + } + } + tprintf("\n"); + } +} +void ShapeClassifier::PrintResults( + const char* context, const GenericVector& results) const { + tprintf("%s\n", context); + for (int i = 0; i < results.size(); ++i) { + tprintf("%g:", results[i].rating); + if (results[i].joined) + tprintf("[J]"); + if (results[i].broken) + tprintf("[B]"); + tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string()); + } +} + +// Removes any result that has all its unichars covered by a better choice, +// regardless of font. +void ShapeClassifier::FilterDuplicateUnichars( + GenericVector* results) const { + GenericVector filtered_results; + // Copy results to filtered results and knock out duplicate unichars. + const ShapeTable* shapes = GetShapeTable(); + for (int r = 0; r < results->size(); ++r) { + if (r > 0) { + const Shape& shape_r = shapes->GetShape((*results)[r].shape_id); + int c; + for (c = 0; c < shape_r.size(); ++c) { + int unichar_id = shape_r[c].unichar_id; + int s; + for (s = 0; s < r; ++s) { + const Shape& shape_s = shapes->GetShape((*results)[s].shape_id); + if (shape_s.ContainsUnichar(unichar_id)) + break; // We found unichar_id. + } + if (s == r) + break; // We didn't find unichar_id. + } + if (c == shape_r.size()) + continue; // We found all the unichar ids in previous answers. + } + filtered_results.push_back((*results)[r]); + } + *results = filtered_results; +} + +} // namespace tesseract. + + + + + diff --git a/classify/shapeclassifier.h b/classify/shapeclassifier.h index 08808127f..5069f375b 100644 --- a/classify/shapeclassifier.h +++ b/classify/shapeclassifier.h @@ -23,44 +23,21 @@ #ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ #define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_ +#include "unichar.h" + template class GenericVector; struct Pix; +class ScrollView; +class UNICHARSET; namespace tesseract { +template class PointerVector; +struct ShapeRating; class ShapeTable; class TrainingSample; - -// Classifier result from a low-level classification is an index into some -// ShapeTable and a rating. -struct ShapeRating { - ShapeRating() : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f) {} - ShapeRating(int s, float r) - : shape_id(s), rating(r), raw(1.0f), font(0.0f) {} - - // Sort function to sort ratings appropriately by descending rating. - static int SortDescendingRating(const void* t1, const void* t2) { - const ShapeRating* a = reinterpret_cast(t1); - const ShapeRating* b = reinterpret_cast(t2); - if (a->rating > b->rating) { - return -1; - } else if (a->rating < b->rating) { - return 1; - } else { - return a->shape_id - b->shape_id; - } - } - - // Index into some shape table indicates the class of the answer. - int shape_id; - // Rating from classifier with 1.0 perfect and 0.0 impossible. - // Call it a probability if you must. - float rating; - // Subsidiary rating that a classifier may use internally. - float raw; - // Subsidiary rating that a classifier may use internally. - float font; -}; +class TrainingSampleSet; +struct UnicharRating; // Interface base class for classifiers that produce ShapeRating results. class ShapeClassifier { @@ -76,18 +53,70 @@ class ShapeClassifier { // to get the appropriate tesseract features. // If debug is non-zero, then various degrees of classifier dependent debug // information is provided. - // If keep_this (a shape index) is >= 0, then the results should always + // If keep_this (a UNICHAR_ID) is >= 0, then the results should always // contain keep_this, and (if possible) anything of intermediate confidence. - // (Used for answering "Why didn't it get that right?" questions.) + // (Used for answering "Why didn't it get that right?" questions.) It must + // be a UNICHAR_ID as the callers have no clue how to choose the best shape + // that may contain a desired answer. // The return value is the number of classes saved in results. - // NOTE that overriding functions MUST clear results unless the classifier - // is working with a team of such classifiers. + // NOTE that overriding functions MUST clear and sort the results by + // descending rating unless the classifier is working with a team of such + // classifiers. + // NOTE: Neither overload of ClassifySample is pure, but at least one must + // be overridden by a classifier in order for it to do anything. + virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, UNICHAR_ID keep_this, + GenericVector* results); + + protected: virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, int keep_this, - GenericVector* results) = 0; + int debug, UNICHAR_ID keep_this, + GenericVector* results); + + public: + // Returns the shape that contains unichar_id that has the best result. + // If result is not NULL, it is set with the shape_id and rating. + // Returns -1 if ClassifySample fails to provide any result containing + // unichar_id. BestShapeForUnichar does not need to be overridden if + // ClassifySample respects the keep_this rule. + virtual int BestShapeForUnichar(const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id, ShapeRating* result); // Provides access to the ShapeTable that this classifier works with. virtual const ShapeTable* GetShapeTable() const = 0; + // Provides access to the UNICHARSET that this classifier works with. + // Must be overridden IFF GetShapeTable() returns NULL. + virtual const UNICHARSET& GetUnicharset() const; + + // Visual debugger classifies the given sample, displays the results and + // solicits user input to display other classifications. Returns when + // the user has finished with debugging the sample. + // Probably doesn't need to be overridden if the subclass provides + // DisplayClassifyAs. + virtual void DebugDisplay(const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id); + + + // Displays classification as the given unichar_id. Creates as many windows + // as it feels fit, using index as a guide for placement. Adds any created + // windows to the windows output and returns a new index that may be used + // by any subsequent classifiers. Caller waits for the user to view and + // then destroys the windows by clearing the vector. + virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, + UNICHAR_ID unichar_id, int index, + PointerVector* windows); + + // Prints debug information on the results. context is some introductory/title + // message. + virtual void UnicharPrintResults( + const char* context, const GenericVector& results) const; + virtual void PrintResults(const char* context, + const GenericVector& results) const; + + protected: + // Removes any result that has all its unichars covered by a better choice, + // regardless of font. + void FilterDuplicateUnichars(GenericVector* results) const; }; } // namespace tesseract. diff --git a/classify/shapetable.cpp b/classify/shapetable.cpp index dd0e4772c..325a0e283 100644 --- a/classify/shapetable.cpp +++ b/classify/shapetable.cpp @@ -22,12 +22,47 @@ #include "shapetable.h" +#include "bitvector.h" +#include "fontinfo.h" #include "intfeaturespace.h" #include "strngs.h" #include "unicharset.h" +#include "unicity_table.h" namespace tesseract { +// Helper function to get the index of the first result with the required +// unichar_id. If the results are sorted by rating, this will also be the +// best result with the required unichar_id. +// Returns -1 if the unichar_id is not found +int ShapeRating::FirstResultWithUnichar( + const GenericVector& results, + const ShapeTable& shape_table, + UNICHAR_ID unichar_id) { + for (int r = 0; r < results.size(); ++r) { + int shape_id = results[r].shape_id; + const Shape& shape = shape_table.GetShape(shape_id); + if (shape.ContainsUnichar(unichar_id)) { + return r; + } + } + return -1; +} + +// Helper function to get the index of the first result with the required +// unichar_id. If the results are sorted by rating, this will also be the +// best result with the required unichar_id. +// Returns -1 if the unichar_id is not found +int UnicharRating::FirstResultWithUnichar( + const GenericVector& results, + UNICHAR_ID unichar_id) { + for (int r = 0; r < results.size(); ++r) { + if (results[r].unichar_id == unichar_id) + return r; + } + return -1; +} + // Writes to the given file. Returns false in case of error. bool UnicharAndFonts::Serialize(FILE* fp) const { if (fwrite(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false; @@ -138,6 +173,39 @@ bool Shape::ContainsFont(int font_id) const { } return false; } +// Returns true if the shape contains the given font properties, ignoring +// unichar_id. +bool Shape::ContainsFontProperties(const FontInfoTable& font_table, + uinT32 properties) const { + for (int c = 0; c < unichars_.size(); ++c) { + GenericVector& font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_table.get(font_list[f]).properties == properties) + return true; + } + } + return false; +} +// Returns true if the shape contains multiple different font properties, +// ignoring unichar_id. +bool Shape::ContainsMultipleFontProperties( + const FontInfoTable& font_table) const { + uinT32 properties = font_table.get(unichars_[0].font_ids[0]).properties; + for (int c = 0; c < unichars_.size(); ++c) { + GenericVector& font_list = unichars_[c].font_ids; + for (int f = 0; f < font_list.size(); ++f) { + if (font_table.get(font_list[f]).properties != properties) + return true; + } + } + return false; +} + +// Returns true if this shape is equal to other (ignoring order of unichars +// and fonts). +bool Shape::operator==(const Shape& other) const { + return IsSubsetOf(other) && other.IsSubsetOf(*this); +} // Returns true if this is a subset (including equal) of other. bool Shape::IsSubsetOf(const Shape& other) const { @@ -172,10 +240,10 @@ void Shape::SortUnichars() { unichars_sorted_ = true; } -ShapeTable::ShapeTable() : unicharset_(NULL) { +ShapeTable::ShapeTable() : unicharset_(NULL), num_fonts_(0) { } ShapeTable::ShapeTable(const UNICHARSET& unicharset) - : unicharset_(&unicharset) { + : unicharset_(&unicharset), num_fonts_(0) { } // Writes to the given file. Returns false in case of error. @@ -187,9 +255,38 @@ bool ShapeTable::Serialize(FILE* fp) const { // If swap is true, assumes a big/little-endian swap is needed. bool ShapeTable::DeSerialize(bool swap, FILE* fp) { if (!shape_table_.DeSerialize(swap, fp)) return false; + num_fonts_ = 0; return true; } +// Returns the number of fonts used in this ShapeTable, computing it if +// necessary. +int ShapeTable::NumFonts() const { + if (num_fonts_ <= 0) { + for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) { + const Shape& shape = *shape_table_[shape_id]; + for (int c = 0; c < shape.size(); ++c) { + for (int f = 0; f < shape[c].font_ids.size(); ++f) { + if (shape[c].font_ids[f] >= num_fonts_) + num_fonts_ = shape[c].font_ids[f] + 1; + } + } + } + } + return num_fonts_; +} + +// Re-indexes the class_ids in the shapetable according to the given map. +// Useful in conjunction with set_unicharset. +void ShapeTable::ReMapClassIds(const GenericVector& unicharset_map) { + for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) { + Shape* shape = shape_table_[shape_id]; + for (int c = 0; c < shape->size(); ++c) { + shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]); + } + } +} + // Returns a string listing the classes/fonts in a shape. STRING ShapeTable::DebugStr(int shape_id) const { if (shape_id < 0 || shape_id >= shape_table_.size()) @@ -251,15 +348,22 @@ int ShapeTable::AddShape(int unichar_id, int font_id) { Shape* shape = new Shape; shape->AddToShape(unichar_id, font_id); shape_table_.push_back(shape); + num_fonts_ = MAX(num_fonts_, font_id + 1); return index; } -// Adds a copy of the given shape. -// Returns the assigned index. +// Adds a copy of the given shape unless it is already present. +// Returns the assigned index or index of existing shape if already present. int ShapeTable::AddShape(const Shape& other) { - int index = shape_table_.size(); - Shape* shape = new Shape(other); - shape_table_.push_back(shape); + int index; + for (index = 0; index < shape_table_.size() && + !(other == *shape_table_[index]); ++index) + continue; + if (index == shape_table_.size()) { + Shape* shape = new Shape(other); + shape_table_.push_back(shape); + } + num_fonts_ = 0; return index; } @@ -275,12 +379,14 @@ void ShapeTable::DeleteShape(int shape_id) { void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) { Shape& shape = *shape_table_[shape_id]; shape.AddToShape(unichar_id, font_id); + num_fonts_ = MAX(num_fonts_, font_id + 1); } // Adds the given shape to the existing shape with the given index. void ShapeTable::AddShapeToShape(int shape_id, const Shape& other) { Shape& shape = *shape_table_[shape_id]; shape.AddShape(other); + num_fonts_ = 0; } // Returns the id of the shape that contains the given unichar and font. @@ -316,25 +422,26 @@ void ShapeTable::GetFirstUnicharAndFont(int shape_id, // a ShapeTable. int ShapeTable::BuildFromShape(const Shape& shape, const ShapeTable& master_shapes) { - int num_masters = 0; + BitVector shape_map(master_shapes.NumShapes()); for (int u_ind = 0; u_ind < shape.size(); ++u_ind) { for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) { int c = shape[u_ind].unichar_id; int f = shape[u_ind].font_ids[f_ind]; - if (FindShape(c, f) < 0) { - int shape_id = AddShape(c, f); - int master_id = master_shapes.FindShape(c, f); - if (master_id >= 0 && shape.size() > 1) { - const Shape& master = master_shapes.GetShape(master_id); - if (master.IsSubsetOf(shape) && !shape.IsSubsetOf(master)) { - // Add everything else from the master shape. - shape_table_[shape_id]->AddShape(master); - ++num_masters; - } - } + int master_id = master_shapes.FindShape(c, f); + if (master_id >= 0) { + shape_map.SetBit(master_id); + } else if (FindShape(c, f) < 0) { + AddShape(c, f); } } } + int num_masters = 0; + for (int s = 0; s < master_shapes.NumShapes(); ++s) { + if (shape_map[s]) { + AddShape(master_shapes.GetShape(s)); + ++num_masters; + } + } return num_masters; } @@ -381,7 +488,7 @@ void ShapeTable::ForceFontMerges(int start, int end) { } } ShapeTable compacted(*unicharset_); - compacted.AppendMasterShapes(*this); + compacted.AppendMasterShapes(*this, NULL); *this = compacted; } @@ -422,6 +529,13 @@ void ShapeTable::MergeShapes(int shape_id1, int shape_id2) { shape_table_[master_id1]->AddShape(*shape_table_[master_id2]); } +// Swaps two shape_ids. +void ShapeTable::SwapShapes(int shape_id1, int shape_id2) { + Shape* tmp = shape_table_[shape_id1]; + shape_table_[shape_id1] = shape_table_[shape_id2]; + shape_table_[shape_id2] = tmp; +} + // Returns the destination of this shape, (if merged), taking into account // the fact that the destination may itself have been merged. int ShapeTable::MasterDestinationIndex(int shape_id) const { @@ -435,11 +549,129 @@ int ShapeTable::MasterDestinationIndex(int shape_id) const { return master_id; } +// Returns false if the unichars in neither shape is a subset of the other. +bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + int c1, c2; + for (c1 = 0; c1 < shape1.size(); ++c1) { + int unichar_id1 = shape1[c1].unichar_id; + if (!shape2.ContainsUnichar(unichar_id1)) + break; + } + for (c2 = 0; c2 < shape2.size(); ++c2) { + int unichar_id2 = shape2[c2].unichar_id; + if (!shape1.ContainsUnichar(unichar_id2)) + break; + } + return c1 == shape1.size() || c2 == shape2.size(); +} + +// Returns false if the unichars in neither shape is a subset of the other. +bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, + int shape_id) const { + const Shape& merge1 = GetShape(merge_id1); + const Shape& merge2 = GetShape(merge_id2); + const Shape& shape = GetShape(shape_id); + int cm1, cm2, cs; + for (cs = 0; cs < shape.size(); ++cs) { + int unichar_id = shape[cs].unichar_id; + if (!merge1.ContainsUnichar(unichar_id) && + !merge2.ContainsUnichar(unichar_id)) + break; // Shape is not a subset of the merge. + } + for (cm1 = 0; cm1 < merge1.size(); ++cm1) { + int unichar_id1 = merge1[cm1].unichar_id; + if (!shape.ContainsUnichar(unichar_id1)) + break; // Merge is not a subset of shape + } + for (cm2 = 0; cm2 < merge2.size(); ++cm2) { + int unichar_id2 = merge2[cm2].unichar_id; + if (!shape.ContainsUnichar(unichar_id2)) + break; // Merge is not a subset of shape + } + return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size()); +} + +// Returns true if the unichar sets are equal between the shapes. +bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + for (int c1 = 0; c1 < shape1.size(); ++c1) { + int unichar_id1 = shape1[c1].unichar_id; + if (!shape2.ContainsUnichar(unichar_id1)) + return false; + } + for (int c2 = 0; c2 < shape2.size(); ++c2) { + int unichar_id2 = shape2[c2].unichar_id; + if (!shape1.ContainsUnichar(unichar_id2)) + return false; + } + return true; +} + +// Returns true if the unichar sets are equal between the shapes. +bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, + int shape_id) const { + const Shape& merge1 = GetShape(merge_id1); + const Shape& merge2 = GetShape(merge_id2); + const Shape& shape = GetShape(shape_id); + for (int cs = 0; cs < shape.size(); ++cs) { + int unichar_id = shape[cs].unichar_id; + if (!merge1.ContainsUnichar(unichar_id) && + !merge2.ContainsUnichar(unichar_id)) + return false; // Shape has a unichar that appears in neither merge. + } + for (int cm1 = 0; cm1 < merge1.size(); ++cm1) { + int unichar_id1 = merge1[cm1].unichar_id; + if (!shape.ContainsUnichar(unichar_id1)) + return false; // Merge has a unichar that is not in shape. + } + for (int cm2 = 0; cm2 < merge2.size(); ++cm2) { + int unichar_id2 = merge2[cm2].unichar_id; + if (!shape.ContainsUnichar(unichar_id2)) + return false; // Merge has a unichar that is not in shape. + } + return true; +} + +// Returns true if there is a common unichar between the shapes. +bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + for (int c1 = 0; c1 < shape1.size(); ++c1) { + int unichar_id1 = shape1[c1].unichar_id; + if (shape2.ContainsUnichar(unichar_id1)) + return true; + } + return false; +} + +// Returns true if there is a common font id between the shapes. +bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const { + const Shape& shape1 = GetShape(shape_id1); + const Shape& shape2 = GetShape(shape_id2); + for (int c1 = 0; c1 < shape1.size(); ++c1) { + const GenericVector& font_list1 = shape1[c1].font_ids; + for (int f = 0; f < font_list1.size(); ++f) { + if (shape2.ContainsFont(font_list1[f])) + return true; + } + } + return false; +} + // Appends the master shapes from other to this. -void ShapeTable::AppendMasterShapes(const ShapeTable& other) { +// If not NULL, shape_map is set to map other shape_ids to this's shape_ids. +void ShapeTable::AppendMasterShapes(const ShapeTable& other, + GenericVector* shape_map) { + if (shape_map != NULL) + shape_map->init_to_size(other.NumShapes(), -1); for (int s = 0; s < other.shape_table_.size(); ++s) { if (other.shape_table_[s]->destination_index() < 0) { - AddShape(*other.shape_table_[s]); + int index = AddShape(*other.shape_table_[s]); + if (shape_map != NULL) + (*shape_map)[s] = index; } } } @@ -455,6 +687,46 @@ int ShapeTable::NumMasterShapes() const { } +// Adds the unichars of the given shape_id to the vector of results. Any +// unichar_id that is already present just has the fonts added to the +// font set for that result without adding a new entry in the vector. +// NOTE: it is assumed that the results are given to this function in order +// of decreasing rating. +// The unichar_map vector indicates the index of the results entry containing +// each unichar, or -1 if the unichar is not yet included in results. +void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating, + GenericVector* unichar_map, + GenericVector* results)const { + if (shape_rating.joined) { + AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, + results); + } + if (shape_rating.broken) { + AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map, + results); + } + const Shape& shape = GetShape(shape_rating.shape_id); + for (int u = 0; u < shape.size(); ++u) { + int result_index = AddUnicharToResults(shape[u].unichar_id, + shape_rating.rating, + unichar_map, results); + (*results)[result_index].fonts += shape[u].font_ids; + } +} + +// Adds the given unichar_id to the results if needed, updating unichar_map +// and returning the index of unichar in results. +int ShapeTable::AddUnicharToResults( + int unichar_id, float rating, GenericVector* unichar_map, + GenericVector* results) const { + int result_index = unichar_map->get(unichar_id); + if (result_index < 0) { + UnicharRating result(unichar_id, rating); + result_index = results->push_back(result); + (*unichar_map)[unichar_id] = result_index; + } + return result_index; +} + + } // namespace tesseract - - diff --git a/classify/shapetable.h b/classify/shapetable.h index 0992fbcb6..87f4245fd 100644 --- a/classify/shapetable.h +++ b/classify/shapetable.h @@ -23,6 +23,8 @@ #ifndef TESSERACT_CLASSIFY_SHAPETABLE_H_ #define TESSERACT_CLASSIFY_SHAPETABLE_H_ +#include "bitvector.h" +#include "genericheap.h" #include "genericvector.h" #include "intmatcher.h" @@ -31,6 +33,113 @@ class UNICHARSET; namespace tesseract { +struct FontInfo; +class FontInfoTable; +class ShapeTable; + +// Simple struct to hold a single classifier unichar selection, a corresponding +// rating, and a list of appropriate fonts. +struct UnicharRating { + UnicharRating() : unichar_id(0), rating(0.0f) {} + UnicharRating(int u, float r) + : unichar_id(u), rating(r) {} + + // Sort function to sort ratings appropriately by descending rating. + static int SortDescendingRating(const void* t1, const void* t2) { + const UnicharRating* a = reinterpret_cast(t1); + const UnicharRating* b = reinterpret_cast(t2); + if (a->rating > b->rating) { + return -1; + } else if (a->rating < b->rating) { + return 1; + } else { + return a->unichar_id - b->unichar_id; + } + } + // Helper function to get the index of the first result with the required + // unichar_id. If the results are sorted by rating, this will also be the + // best result with the required unichar_id. + // Returns -1 if the unichar_id is not found + static int FirstResultWithUnichar(const GenericVector& results, + UNICHAR_ID unichar_id); + + // Index into some UNICHARSET table indicates the class of the answer. + UNICHAR_ID unichar_id; + // Rating from classifier with 1.0 perfect and 0.0 impossible. + // Call it a probability if you must. + float rating; + // Set of fonts for this shape in order of decreasing preference. + // (There is no mechanism for storing scores for fonts as yet.) + GenericVector fonts; +}; + +// Classifier result from a low-level classification is an index into some +// ShapeTable and a rating. +struct ShapeRating { + ShapeRating() + : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f), + joined(false), broken(false) {} + ShapeRating(int s, float r) + : shape_id(s), rating(r), raw(1.0f), font(0.0f), + joined(false), broken(false) {} + + // Sort function to sort ratings appropriately by descending rating. + static int SortDescendingRating(const void* t1, const void* t2) { + const ShapeRating* a = reinterpret_cast(t1); + const ShapeRating* b = reinterpret_cast(t2); + if (a->rating > b->rating) { + return -1; + } else if (a->rating < b->rating) { + return 1; + } else { + return a->shape_id - b->shape_id; + } + } + // Helper function to get the index of the first result with the required + // unichar_id. If the results are sorted by rating, this will also be the + // best result with the required unichar_id. + // Returns -1 if the unichar_id is not found + static int FirstResultWithUnichar(const GenericVector& results, + const ShapeTable& shape_table, + UNICHAR_ID unichar_id); + + // Index into some shape table indicates the class of the answer. + int shape_id; + // Rating from classifier with 1.0 perfect and 0.0 impossible. + // Call it a probability if you must. + float rating; + // Subsidiary rating that a classifier may use internally. + float raw; + // Subsidiary rating that a classifier may use internally. + float font; + // Flag indicating that the input may be joined. + bool joined; + // Flag indicating that the input may be broken (a fragment). + bool broken; +}; + +// Simple struct to hold an entry for a heap-based priority queue of +// ShapeRating. +struct ShapeQueueEntry { + ShapeQueueEntry() : result(ShapeRating(0, 0.0f)), level(0) {} + ShapeQueueEntry(const ShapeRating& rating, int level0) + : result(rating), level(level0) {} + + // Sort by decreasing rating and decreasing level for equal rating. + bool operator<(const ShapeQueueEntry& other) const { + if (result.rating > other.result.rating) return true; + if (result.rating == other.result.rating) + return level > other.level; + return false; + } + + // Output from classifier. + ShapeRating result; + // Which level in the tree did this come from? + int level; +}; +typedef GenericHeap ShapeQueue; + // Simple struct to hold a set of fonts associated with a single unichar-id. // A vector of UnicharAndFonts makes a shape. struct UnicharAndFonts { @@ -83,6 +192,10 @@ class Shape { const UnicharAndFonts& operator[](int index) const { return unichars_[index]; } + // Sets the unichar_id of the given index to the new unichar_id. + void SetUnicharId(int index, int unichar_id) { + unichars_[index].unichar_id = unichar_id; + } // Adds a font_id for the given unichar_id. If the unichar_id is not // in the shape, it is added. void AddToShape(int unichar_id, int font_id); @@ -94,6 +207,16 @@ class Shape { bool ContainsUnichar(int unichar_id) const; // Returns true if the shape contains the given font, ignoring unichar_id. bool ContainsFont(int font_id) const; + // Returns true if the shape contains the given font properties, ignoring + // unichar_id. + bool ContainsFontProperties(const FontInfoTable& font_table, + uinT32 properties) const; + // Returns true if the shape contains multiple different font properties, + // ignoring unichar_id. + bool ContainsMultipleFontProperties(const FontInfoTable& font_table) const; + // Returns true if this shape is equal to other (ignoring order of unichars + // and fonts). + bool operator==(const Shape& other) const; // Returns true if this is a subset (including equal) of other. bool IsSubsetOf(const Shape& other) const; // Returns true if the lists of unichar ids are the same in this and other, @@ -143,11 +266,17 @@ class ShapeTable { const UNICHARSET& unicharset() const { return *unicharset_; } + // Returns the number of fonts used in this ShapeTable, computing it if + // necessary. + int NumFonts() const; // Shapetable takes a pointer to the UNICHARSET, so it must persist for the // entire life of the ShapeTable. void set_unicharset(const UNICHARSET& unicharset) { unicharset_ = &unicharset; } + // Re-indexes the class_ids in the shapetable according to the given map. + // Useful in conjunction with set_unicharset. + void ReMapClassIds(const GenericVector& unicharset_map); // Returns a string listing the classes/fonts in a shape. STRING DebugStr(int shape_id) const; // Returns a debug string summarizing the table. @@ -156,8 +285,8 @@ class ShapeTable { // Adds a new shape starting with the given unichar_id and font_id. // Returns the assigned index. int AddShape(int unichar_id, int font_id); - // Adds a copy of the given shape. - // Returns the assigned index. + // Adds a copy of the given shape unless it is already present. + // Returns the assigned index or index of existing shape if already present. int AddShape(const Shape& other); // Removes the shape given by the shape index. All indices above are changed! void DeleteShape(int shape_id); @@ -204,10 +333,14 @@ class ShapeTable { int MergedUnicharCount(int shape_id1, int shape_id2) const; // Merges two shape_ids, leaving shape_id2 marked as merged. void MergeShapes(int shape_id1, int shape_id2); + // Swaps two shape_ids. + void SwapShapes(int shape_id1, int shape_id2); // Appends the master shapes from other to this. // Used to create a clean ShapeTable from a merged one, or to create a // copy of a ShapeTable. - void AppendMasterShapes(const ShapeTable& other); + // If not NULL, shape_map is set to map other shape_ids to this's shape_ids. + void AppendMasterShapes(const ShapeTable& other, + GenericVector* shape_map); // Returns the number of master shapes remaining after merging. int NumMasterShapes() const; // Returns the destination of this shape, (if merged), taking into account @@ -215,11 +348,43 @@ class ShapeTable { // For a non-merged shape, returns the input shape_id. int MasterDestinationIndex(int shape_id) const; + // Returns false if the unichars in neither shape is a subset of the other.. + bool SubsetUnichar(int shape_id1, int shape_id2) const; + // Returns false if the unichars in neither shape is a subset of the other.. + bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const; + // Returns true if the unichar sets are equal between the shapes. + bool EqualUnichars(int shape_id1, int shape_id2) const; + bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const; + // Returns true if there is a common unichar between the shapes. + bool CommonUnichars(int shape_id1, int shape_id2) const; + // Returns true if there is a common font id between the shapes. + bool CommonFont(int shape_id1, int shape_id2) const; + + // Adds the unichars of the given shape_id to the vector of results. Any + // unichar_id that is already present just has the fonts added to the + // font set for that result without adding a new entry in the vector. + // NOTE: it is assumed that the results are given to this function in order + // of decreasing rating. + // The unichar_map vector indicates the index of the results entry containing + // each unichar, or -1 if the unichar is not yet included in results. + void AddShapeToResults(const ShapeRating& shape_rating, + GenericVector* unichar_map, + GenericVector* results) const; + private: + // Adds the given unichar_id to the results if needed, updating unichar_map + // and returning the index of unichar in results. + int AddUnicharToResults(int unichar_id, float rating, + GenericVector* unichar_map, + GenericVector* results) const; + // Pointer to a provided unicharset used only by the Debugstr member. const UNICHARSET* unicharset_; // Vector of pointers to the Shapes in this ShapeTable. PointerVector shape_table_; + + // Cached data calculated on demand. + mutable int num_fonts_; }; } // namespace tesseract. diff --git a/classify/speckle.cpp b/classify/speckle.cpp deleted file mode 100644 index e33ce5f7c..000000000 --- a/classify/speckle.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/****************************************************************************** - ** Filename: speckle.c - ** Purpose: Routines used by classifier to filter out speckle. - ** Author: Dan Johnson - ** History: Mon Mar 11 10:06:14 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ -#include "speckle.h" - -#include "blobs.h" -#include "ratngs.h" -#include "params.h" - -/*----------------------------------------------------------------------------- - Global Data Definitions and Declarations ------------------------------------------------------------------------------*/ -/** define control knobs for adjusting definition of speckle*/ -double_VAR(speckle_large_max_size, 0.30, "Max large speckle size"); - -double_VAR(speckle_small_penalty, 10.0, "Small speckle penalty"); - -double_VAR(speckle_large_penalty, 10.0, "Large speckle penalty"); - -double_VAR(speckle_small_certainty, -1.0, "Small speckle certainty"); - -/*----------------------------------------------------------------------------- - Public Code ------------------------------------------------------------------------------*/ -/*---------------------------------------------------------------------------*/ -/** - * This routine adds a null choice to Choices with a - * rating equal to the worst rating in Choices plus a pad. - * The certainty of the new choice is the same as the - * certainty of the worst choice in Choices. The new choice - * is added to the end of Choices. - * - * Globals: - * - #speckle_small_penalty rating for a small speckle - * - #speckle_large_penalty rating penalty for a large speckle - * - #speckle_small_certainty certainty for a small speckle - * - * @param Choices choices to add a speckle choice to - * - * @return New Choices list with null choice added to end. - * - * Exceptions: none - * History: Mon Mar 11 11:08:11 1991, DSJ, Created. - */ -void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices) { - assert(Choices != NULL); - BLOB_CHOICE *blob_choice; - BLOB_CHOICE_IT temp_it; - temp_it.set_to_list(Choices); - - // If there are no other choices, use the small speckle penalty plus - // the large speckle penalty. - if (Choices->length() == 0) { - blob_choice = - new BLOB_CHOICE(0, speckle_small_certainty + speckle_large_penalty, - speckle_small_certainty, -1, -1, NULL, 0, 0, false); - temp_it.add_to_end(blob_choice); - return; - } - - // If there are other choices, add a null choice that is slightly worse - // than the worst choice so far. - temp_it.move_to_last(); - blob_choice = temp_it.data(); // pick the worst choice - temp_it.add_to_end( - new BLOB_CHOICE(0, blob_choice->rating() + speckle_large_penalty, - blob_choice->certainty(), -1, -1, NULL, 0, 0, false)); -} /* AddLargeSpeckleTo */ - - -/*---------------------------------------------------------------------------*/ -/** - * This routine returns TRUE if both the width of height - * of Blob are less than the MaxLargeSpeckleSize. - * - * Globals: - * - #speckle_large_max_size largest allowed speckle - * - * Exceptions: none - * History: Mon Mar 11 10:06:49 1991, DSJ, Created. - * - * @param blob blob to test against speckle criteria - * - * @return TRUE if blob is speckle, FALSE otherwise. - */ -BOOL8 LargeSpeckle(TBLOB *blob) { - double speckle_size = BASELINE_SCALE * speckle_large_max_size; - TBOX bbox = blob->bounding_box(); - return (bbox.width() < speckle_size && bbox.height() < speckle_size); -} /* LargeSpeckle */ diff --git a/classify/speckle.h b/classify/speckle.h deleted file mode 100644 index 9676dc0a4..000000000 --- a/classify/speckle.h +++ /dev/null @@ -1,35 +0,0 @@ -/****************************************************************************** - ** Filename: speckle.h - ** Purpose: Interface to classifier speckle filtering routines. - ** Author: Dan Johnson - ** History: Mon Mar 11 10:14:16 1991, DSJ, Created. - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ -#ifndef SPECKLE_H -#define SPECKLE_H - -/*----------------------------------------------------------------------------- - Include Files and Type Defines ------------------------------------------------------------------------------*/ - -#include "baseline.h" -#include "ratngs.h" - -/*----------------------------------------------------------------------------- - Public Function Prototypes ------------------------------------------------------------------------------*/ -void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices); - -BOOL8 LargeSpeckle(TBLOB *Blob); - -#endif diff --git a/classify/tessclassifier.cpp b/classify/tessclassifier.cpp index f7735d8a2..4b6cad019 100644 --- a/classify/tessclassifier.cpp +++ b/classify/tessclassifier.cpp @@ -28,17 +28,25 @@ namespace tesseract { // Classifies the given [training] sample, writing to results. // See ShapeClassifier for a full description. -int TessClassifier::ClassifySample(const TrainingSample& sample, - Pix* page_pix, int debug, int keep_this, - GenericVector* results) { +int TessClassifier::UnicharClassifySample( + const TrainingSample& sample, Pix* page_pix, int debug, + UNICHAR_ID keep_this, GenericVector* results) { + int old_matcher_level = classify_->matcher_debug_level; + int old_matcher_flags = classify_->matcher_debug_flags; + int old_classify_level = classify_->classify_debug_level; if (debug) { - classify_->matcher_debug_level.set_value(debug ? 2 : 0); - classify_->matcher_debug_flags.set_value(debug ? 25 : 0); - classify_->classify_debug_level.set_value(debug ? 3 : 0); - } else { - classify_->classify_debug_level.set_value(debug ? 2 : 0); + // Explicitly set values of various control parameters to generate debug + // output if required, restoring the old values after classifying. + classify_->matcher_debug_level.set_value(2); + classify_->matcher_debug_flags.set_value(25); + classify_->classify_debug_level.set_value(3); + } + classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results); + if (debug) { + classify_->matcher_debug_level.set_value(old_matcher_level); + classify_->matcher_debug_flags.set_value(old_matcher_flags); + classify_->classify_debug_level.set_value(old_classify_level); } - classify_->CharNormTrainingSample(pruner_only_, sample, results); return results->size(); } @@ -46,6 +54,32 @@ int TessClassifier::ClassifySample(const TrainingSample& sample, const ShapeTable* TessClassifier::GetShapeTable() const { return classify_->shape_table(); } +// Provides access to the UNICHARSET that this classifier works with. +// Only needs to be overridden if GetShapeTable() can return NULL. +const UNICHARSET& TessClassifier::GetUnicharset() const { + return classify_->unicharset; +} + +// Displays classification as the given shape_id. Creates as many windows +// as it feels fit, using index as a guide for placement. Adds any created +// windows to the windows output and returns a new index that may be used +// by any subsequent classifiers. Caller waits for the user to view and +// then destroys the windows by clearing the vector. +int TessClassifier::DisplayClassifyAs( + const TrainingSample& sample, Pix* page_pix, int unichar_id, int index, + PointerVector* windows) { + int shape_id = unichar_id; + if (GetShapeTable() != NULL) + shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, NULL); + if (shape_id < 0) return index; + if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) { + tprintf("No built-in templates for class/shape %d\n", shape_id); + return index; + } + classify_->ShowBestMatchFor(shape_id, sample.features(), + sample.num_features()); + return index; +} } // namespace tesseract diff --git a/classify/tessclassifier.h b/classify/tessclassifier.h index f2483b7a0..57a04861e 100644 --- a/classify/tessclassifier.h +++ b/classify/tessclassifier.h @@ -41,11 +41,23 @@ class TessClassifier : public ShapeClassifier { // Classifies the given [training] sample, writing to results. // See ShapeClassifier for a full description. - virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix, - int debug, int keep_this, - GenericVector* results); + virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix, + int debug, UNICHAR_ID keep_this, + GenericVector* results); // Provides access to the ShapeTable that this classifier works with. virtual const ShapeTable* GetShapeTable() const; + // Provides access to the UNICHARSET that this classifier works with. + // Only needs to be overridden if GetShapeTable() can return NULL. + virtual const UNICHARSET& GetUnicharset() const; + + // Displays classification as the given shape_id. Creates as many windows + // as it feels fit, using index as a guide for placement. Adds any created + // windows to the windows output and returns a new index that may be used + // by any subsequent classifiers. Caller waits for the user to view and + // then destroys the windows by clearing the vector. + virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix, + int unichar_id, int index, + PointerVector* windows); private: // Indicates that this classifier is to use just the ClassPruner, or the diff --git a/classify/trainingsample.cpp b/classify/trainingsample.cpp index 450b925f3..4557da489 100644 --- a/classify/trainingsample.cpp +++ b/classify/trainingsample.cpp @@ -59,6 +59,8 @@ bool TrainingSample::Serialize(FILE* fp) const { if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) return false; + if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) + return false; if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_) return false; if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_, @@ -90,10 +92,13 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) { if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false; if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1) return false; + if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1) + return false; if (swap) { ReverseN(&class_id_, sizeof(class_id_)); ReverseN(&num_features_, sizeof(num_features_)); ReverseN(&num_micro_features_, sizeof(num_micro_features_)); + ReverseN(&outline_length_, sizeof(outline_length_)); } delete [] features_; features_ = new INT_FEATURE_STRUCT[num_features_]; @@ -113,20 +118,40 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) { // Saves the given features into a TrainingSample. TrainingSample* TrainingSample::CopyFromFeatures( - const INT_FX_RESULT_STRUCT& fx_info, const INT_FEATURE_STRUCT* features, + const INT_FX_RESULT_STRUCT& fx_info, + const TBOX& bounding_box, + const INT_FEATURE_STRUCT* features, int num_features) { TrainingSample* sample = new TrainingSample; sample->num_features_ = num_features; sample->features_ = new INT_FEATURE_STRUCT[num_features]; + sample->outline_length_ = fx_info.Length; memcpy(sample->features_, features, num_features * sizeof(features[0])); - sample->geo_feature_[GeoBottom] = fx_info.YBottom; - sample->geo_feature_[GeoTop] = fx_info.YTop; - sample->geo_feature_[GeoWidth] = fx_info.Width; + sample->geo_feature_[GeoBottom] = bounding_box.bottom(); + sample->geo_feature_[GeoTop] = bounding_box.top(); + sample->geo_feature_[GeoWidth] = bounding_box.width(); + + // Generate the cn_feature_ from the fx_info. + sample->cn_feature_[CharNormY] = + MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset); + sample->cn_feature_[CharNormLength] = + MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION; + sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx; + sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry; + sample->features_are_indexed_ = false; sample->features_are_mapped_ = false; return sample; } +// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. +FEATURE_STRUCT* TrainingSample::GetCNFeature() const { + FEATURE feature = NewFeature(&CharNormDesc); + for (int i = 0; i < kNumCNParams; ++i) + feature->Params[i] = cn_feature_[i]; + return feature; +} + // Constructs and returns a copy randomized by the method given by // the randomizer index. If index is out of [0, kSampleRandomSize) then // an exact copy is returned. diff --git a/classify/trainingsample.h b/classify/trainingsample.h index 821bbed99..6df1ce824 100644 --- a/classify/trainingsample.h +++ b/classify/trainingsample.h @@ -54,7 +54,7 @@ class TrainingSample : public ELIST_LINK { public: TrainingSample() : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0), - num_features_(0), num_micro_features_(0), + num_features_(0), num_micro_features_(0), outline_length_(0), features_(NULL), micro_features_(NULL), weight_(1.0), max_dist_(0.0), sample_index_(0), features_are_indexed_(false), features_are_mapped_(false), @@ -65,8 +65,11 @@ class TrainingSample : public ELIST_LINK { // Saves the given features into a TrainingSample. The features are copied, // so may be deleted afterwards. Delete the return value after use. static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info, + const TBOX& bounding_box, const INT_FEATURE_STRUCT* features, int num_features); + // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. + FEATURE_STRUCT* GetCNFeature() const; // Constructs and returns a copy "randomized" by the method given by // the randomizer index. If index is out of [0, kSampleRandomSize) then // an exact copy is returned. @@ -146,6 +149,9 @@ class TrainingSample : public ELIST_LINK { const MicroFeature* micro_features() const { return micro_features_; } + int outline_length() const { + return outline_length_; + } float cn_feature(int index) const { return cn_feature_[index]; } @@ -203,6 +209,10 @@ class TrainingSample : public ELIST_LINK { int num_features_; // Number of MicroFeature in micro_features_ array. int num_micro_features_; + // Total length of outline in the baseline normalized coordinate space. + // See comment in WERD_RES class definition for a discussion of coordinate + // spaces. + int outline_length_; // Array of features. INT_FEATURE_STRUCT* features_; // Array of features. diff --git a/classify/trainingsampleset.cpp b/classify/trainingsampleset.cpp index 2e7f77da4..afbf3f420 100644 --- a/classify/trainingsampleset.cpp +++ b/classify/trainingsampleset.cpp @@ -67,7 +67,7 @@ bool TrainingSampleSet::FontClassInfo::DeSerialize(bool swap, FILE* fp) { return true; } -TrainingSampleSet::TrainingSampleSet(const UnicityTable& font_table) +TrainingSampleSet::TrainingSampleSet(const FontInfoTable& font_table) : num_raw_samples_(0), unicharset_size_(0), font_class_array_(NULL), fontinfo_table_(font_table) { } @@ -115,11 +115,12 @@ bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) { void TrainingSampleSet::LoadUnicharset(const char* filename) { if (!unicharset_.load_from_file(filename)) { tprintf("Failed to load unicharset from file %s\n" - "Building unicharset for boosting from scratch...\n", + "Building unicharset from scratch...\n", filename); unicharset_.clear(); - // Space character needed to represent NIL_LIST classification. - unicharset_.unichar_insert(" "); + // Add special characters as they were removed by the clear. + UNICHARSET empty; + unicharset_.AppendOtherUnicharset(empty); } unicharset_size_ = unicharset_.size(); } @@ -708,14 +709,6 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, continue; GenericVector features2 = samples_[s2]->indexed_features(); double dist = f_table.FeatureDistance(features2); - int height = samples_[s2]->geo_feature(GeoTop) - - samples_[s2]->geo_feature(GeoBottom); - if (dist == 1.0 && height > 64) { - // TODO(rays) rethink this when the polygonal approximation goes. - // Currently it is possible for dots and other small characters - // to be completely different, even within the same class. - f_table.DebugFeatureDistance(features2); - } if (dist > max_dist) { max_dist = dist; if (dist > max_max_dist) { diff --git a/classify/trainingsampleset.h b/classify/trainingsampleset.h index 4ff4e86e0..4c843f41c 100644 --- a/classify/trainingsampleset.h +++ b/classify/trainingsampleset.h @@ -24,11 +24,11 @@ #include "trainingsample.h" class UNICHARSET; -template class UnicityTable; namespace tesseract { struct FontInfo; +class FontInfoTable; class IntFeatureMap; class IntFeatureSpace; class TrainingSample; @@ -42,7 +42,7 @@ class UnicharAndFonts; // metrics. class TrainingSampleSet { public: - explicit TrainingSampleSet(const UnicityTable& fontinfo_table); + explicit TrainingSampleSet(const FontInfoTable& fontinfo_table); ~TrainingSampleSet(); // Writes to the given file. Returns false in case of error. @@ -67,6 +67,9 @@ class TrainingSampleSet { int charsetsize() const { return unicharset_size_; } + const FontInfoTable& fontinfo_table() const { + return fontinfo_table_; + } // Loads an initial unicharset, or sets one up if the file cannot be read. void LoadUnicharset(const char* filename); @@ -281,7 +284,7 @@ class TrainingSampleSet { // Reference to the fontinfo_table_ in MasterTrainer. Provides names // for font_ids in the samples. Not serialized! - const UnicityTable& fontinfo_table_; + const FontInfoTable& fontinfo_table_; }; } // namespace tesseract.