diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp index 0dd424a25..2397e4547 100644 --- a/classify/adaptive.cpp +++ b/classify/adaptive.cpp @@ -86,7 +86,6 @@ void FreeTempConfig(TEMP_CONFIG Config) { } /* FreeTempConfig */ - /*---------------------------------------------------------------------------*/ void FreeTempProto(void *arg) { PROTO proto = (PROTO) arg; @@ -94,6 +93,11 @@ void FreeTempProto(void *arg) { free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"); } +void FreePermConfig(PERM_CONFIG Config) { + assert(Config != NULL); + Efree(Config->Ambigs); + free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT"); +} /*---------------------------------------------------------------------------*/ /** @@ -135,7 +139,7 @@ void free_adapted_class(ADAPT_CLASS adapt_class) { for (i = 0; i < MAX_NUM_CONFIGS; i++) { if (ConfigIsPermanent (adapt_class, i) && PermConfigFor (adapt_class, i) != NULL) - Efree (PermConfigFor (adapt_class, i)); + FreePermConfig (PermConfigFor (adapt_class, i)); else if (!ConfigIsPermanent (adapt_class, i) && TempConfigFor (adapt_class, i) != NULL) FreeTempConfig (TempConfigFor (adapt_class, i)); @@ -181,6 +185,14 @@ ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) { return (Templates); } /* NewAdaptedTemplates */ + +// Returns FontinfoId of the given config of the given adapted class. +int Classify::GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId) { + return (ConfigIsPermanent(Class, ConfigId) ? + PermConfigFor(Class, ConfigId)->FontinfoId : + TempConfigFor(Class, ConfigId)->FontinfoId); +} + } // namespace tesseract /*----------------------------------------------------------------------------*/ @@ -201,13 +213,14 @@ void free_adapted_templates(ADAPT_TEMPLATES templates) { * This routine allocates and returns a new temporary config. * * @param MaxProtoId max id of any proto in new config + * @param FontinfoId font information from pre-trained templates * @return Ptr to new temp config. * * @note Globals: none * @note Exceptions: none * @note History: Thu Mar 14 13:28:21 1991, DSJ, Created. */ -TEMP_CONFIG NewTempConfig(int MaxProtoId) { +TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) { TEMP_CONFIG Config; int NumProtos = MaxProtoId + 1; @@ -221,6 +234,7 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId) { Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos); Config->ContextsSeen = NIL_LIST; zero_all_bits (Config->Protos, Config->ProtoVectorSize); + Config->FontinfoId = FontinfoId; return (Config); @@ -388,13 +402,14 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) { * @note History: Tue Mar 19 14:25:26 1991, DSJ, Created. */ PERM_CONFIG ReadPermConfig(FILE *File) { - PERM_CONFIG Config; + PERM_CONFIG Config = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT), + "PERM_CONFIG_STRUCT"); uinT8 NumAmbigs; - - fread ((char *) &NumAmbigs, sizeof (uinT8), 1, File); - Config = (PERM_CONFIG) Emalloc (sizeof (UNICHAR_ID) * (NumAmbigs + 1)); - fread (Config, sizeof (UNICHAR_ID), NumAmbigs, File); - Config[NumAmbigs] = -1; + fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File); + Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1)); + fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); + Config->Ambigs[NumAmbigs] = -1; + fread(&(Config->FontinfoId), sizeof(int), 1, File); return (Config); @@ -523,12 +538,11 @@ void WritePermConfig(FILE *File, PERM_CONFIG Config) { uinT8 NumAmbigs = 0; assert (Config != NULL); - while (Config[NumAmbigs] > 0) - ++NumAmbigs; - - fwrite ((char *) &NumAmbigs, sizeof (uinT8), 1, File); - fwrite (Config, sizeof (UNICHAR_ID), NumAmbigs, File); + while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs; + fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File); + fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); + fwrite(&(Config->FontinfoId), sizeof(int), 1, File); } /* WritePermConfig */ diff --git a/classify/adaptive.h b/classify/adaptive.h index 025632e51..c67670f6c 100644 --- a/classify/adaptive.h +++ b/classify/adaptive.h @@ -43,10 +43,16 @@ typedef struct PROTO_ID MaxProtoId; LIST ContextsSeen; BIT_VECTOR Protos; + int FontinfoId; // font information inferred from pre-trained templates } TEMP_CONFIG_STRUCT; typedef TEMP_CONFIG_STRUCT *TEMP_CONFIG; -typedef UNICHAR_ID *PERM_CONFIG; +typedef struct +{ + UNICHAR_ID *Ambigs; + int FontinfoId; // font information inferred from pre-trained templates +} PERM_CONFIG_STRUCT; +typedef PERM_CONFIG_STRUCT *PERM_CONFIG; typedef union { @@ -116,7 +122,7 @@ void free_adapted_class(ADAPT_CLASS adapt_class); void free_adapted_templates(ADAPT_TEMPLATES templates); -TEMP_CONFIG NewTempConfig(int MaxProtoId); +TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId); TEMP_PROTO NewTempProto(); diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 344cf3106..e83efc3dc 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -80,6 +80,8 @@ struct ScoredClass { FLOAT32 rating; inT16 config; inT16 config2; + inT16 fontinfo_id; + inT16 fontinfo_id2; }; struct ADAPT_RESULTS { @@ -100,6 +102,8 @@ struct ADAPT_RESULTS { best_match.rating = WORST_POSSIBLE_RATING; best_match.config = 0; best_match.config2 = 0; + best_match.fontinfo_id = kBlankFontinfoId; + best_match.fontinfo_id2 = kBlankFontinfoId; } }; @@ -403,9 +407,11 @@ void Classify::LearnPieces(const char* filename, int start, int length, } UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text); if (classify_learning_debug_level >= 1) - tprintf("Adapting to char = %s, thr= %g\n", - unicharset.id_to_unichar(class_id), threshold); - AdaptToChar(blob, class_id, threshold); + tprintf("Adapting to char = %s, thr= %g font_id= %d\n", + unicharset.id_to_unichar(class_id), threshold, word->fontinfo_id); + // If filename is not NULL we are doing recognition + // (as opposed to training), so we must have already set word fonts. + AdaptToChar(blob, class_id, word->fontinfo_id, threshold); } break_pieces(blob, word->seam_array, start, start + length - 1); @@ -680,6 +686,7 @@ void Classify::SettupPass2() { * * @param Blob blob to model new class after * @param ClassId id of the class to be initialized + * @param FontinfoId font information inferred from pre-trained templates * @param Class adapted class to be initialized * @param Templates adapted templates to add new class to * @@ -693,6 +700,7 @@ void Classify::SettupPass2() { */ void Classify::InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, + int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates) { FEATURE_SET Features; @@ -712,8 +720,8 @@ void Classify::InitAdaptedClass(TBLOB *Blob, return; } - Config = NewTempConfig (NumFeatures - 1); - TempConfigFor (Class, 0) = Config; + Config = NewTempConfig(NumFeatures - 1, FontinfoId); + TempConfigFor(Class, 0) = Config; /* this is a kludge to construct cutoffs for adapted templates */ if (Templates == AdaptedTemplates) @@ -851,6 +859,7 @@ int Classify::AdaptableWord(TWERD *Word, * @param Blob blob to add to templates for ClassId * @param LineStats statistics about text line blob is in * @param ClassId class to add blob to + * @param FontinfoId font information from pre-trained templates * @param Threshold minimum match rating to existing template * * Globals: @@ -864,6 +873,7 @@ int Classify::AdaptableWord(TWERD *Word, */ void Classify::AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, + int FontinfoId, FLOAT32 Threshold) { int NumFeatures; INT_FEATURE_ARRAY IntFeatures; @@ -882,7 +892,7 @@ void Classify::AdaptToChar(TBLOB *Blob, Class = AdaptedTemplates->Class[ClassId]; assert(Class != NULL); if (IsEmptyAdaptedClass(Class)) { - InitAdaptedClass(Blob, ClassId, Class, AdaptedTemplates); + InitAdaptedClass(Blob, ClassId, FontinfoId, Class, AdaptedTemplates); } else { IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId); @@ -892,10 +902,20 @@ void Classify::AdaptToChar(TBLOB *Blob, return; im_.SetBaseLineMatch(); - im_.Match(IClass, AllProtosOn, AllConfigsOn, + // Only match configs with the matching font. + BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS); + for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) { + if (GetFontinfoId(Class, cfg) == FontinfoId) { + SET_BIT(MatchingFontConfigs, cfg); + } else { + reset_bit(MatchingFontConfigs, cfg); + } + } + im_.Match(IClass, AllProtosOn, MatchingFontConfigs, NumFeatures, NumFeatures, IntFeatures, 0, &IntResult, classify_adapt_feature_threshold, NO_DEBUG, matcher_debug_separate_windows); + FreeBitVector(MatchingFontConfigs); SetAdaptiveThreshold(Threshold); @@ -931,10 +951,10 @@ void Classify::AdaptToChar(TBLOB *Blob, } NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates, ClassId, + FontinfoId, NumFeatures, IntFeatures, FloatFeatures); - if (NewTempConfigId >= 0 && TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) { MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, Blob); @@ -988,6 +1008,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { * @param Blob blob to add to templates for ClassId * @param LineStats statistics about text line blob is in * @param ClassId class to add blob to + * @param FontinfoId font information from pre-trained teamples * @param Threshold minimum match rating to existing template * * Globals: @@ -998,6 +1019,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { */ void Classify::AdaptToPunc(TBLOB *Blob, CLASS_ID ClassId, + int FontinfoId, FLOAT32 Threshold) { ADAPT_RESULTS *Results = new ADAPT_RESULTS(); int i; @@ -1021,7 +1043,7 @@ void Classify::AdaptToPunc(TBLOB *Blob, cprintf ("Adapting to punc = %s, thr= %g\n", unicharset.id_to_unichar(ClassId), Threshold); #endif - AdaptToChar(Blob, ClassId, Threshold); + AdaptToChar(Blob, ClassId, FontinfoId, Threshold); } delete Results; } /* AdaptToPunc */ @@ -1045,8 +1067,10 @@ void Classify::AdaptToPunc(TBLOB *Blob, * @param[out] results results to add new result to * @param class_id class of new result * @param rating rating of new result - * @param config_id config id of new result - * @param config2_id config id of 2nd choice result + * @param config config id of new result + * @param config2 config id of 2nd choice result + * @param fontinfo_id font information of the new result + * @param fontinfo_id2 font information of the 2nd choice result * * @note Exceptions: none * @note History: Tue Mar 12 18:19:29 1991, DSJ, Created. @@ -1054,10 +1078,13 @@ void Classify::AdaptToPunc(TBLOB *Blob, void Classify::AddNewResult(ADAPT_RESULTS *results, CLASS_ID class_id, FLOAT32 rating, - int config_id, - int config2_id) { + int config, + int config2, + int fontinfo_id, + int fontinfo_id2) { ScoredClass *old_match = FindScoredUnichar(results, class_id); - ScoredClass match = {class_id, rating, config_id, config2_id}; + ScoredClass match = + { class_id, rating, config, config2, fontinfo_id, fontinfo_id2 }; if (rating > results->best_match.rating + matcher_bad_match_pad || (old_match && rating >= old_match->rating)) @@ -1108,6 +1135,7 @@ void Classify::AddNewResult(ADAPT_RESULTS *results, */ void Classify::AmbigClassifier(TBLOB *Blob, INT_TEMPLATES Templates, + ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results) { int NumFeatures; @@ -1142,9 +1170,11 @@ void Classify::AmbigClassifier(TBLOB *Blob, IntResult.Config, IntResult.Rating * 100.0); + assert(Classes != NULL); AddNewResult(Results, ClassId, IntResult.Rating, - IntResult.Config, IntResult.Config2); - + IntResult.Config, IntResult.Config2, + GetFontinfoId(Classes[ClassId], IntResult.Config), + GetFontinfoId(Classes[ClassId], IntResult.Config2)); Ambiguities++; NumAmbigClassesTried++; @@ -1213,8 +1243,18 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, int_result.Rating += miss_penalty; if (int_result.Rating > WORST_POSSIBLE_RATING) int_result.Rating = WORST_POSSIBLE_RATING; - AddNewResult(final_results, class_id, int_result.Rating, - int_result.Config, int_result.Config2); + if (classes != NULL) { + AddNewResult(final_results, class_id, int_result.Rating, + int_result.Config, int_result.Config2, + GetFontinfoId(classes[class_id], int_result.Config), + GetFontinfoId(classes[class_id], int_result.Config2)); + } else { + AddNewResult(final_results, class_id, int_result.Rating, + int_result.Config, int_result.Config2, + kBlankFontinfoId, kBlankFontinfoId); + + } + // Add unichars ambiguous with class_id with the same rating as class_id. if (use_definite_ambigs_for_classifier) { const UnicharIdVector *definite_ambigs = @@ -1239,8 +1279,17 @@ void Classify::MasterMatcher(INT_TEMPLATES templates, ambig_match->rating = int_result.Rating; } } else { - AddNewResult(final_results, ambig_class_id, int_result.Rating, - int_result.Config, int_result.Config2); + if (classes != NULL) { + AddNewResult( + final_results, ambig_class_id, int_result.Rating, + int_result.Config, int_result.Config2, + GetFontinfoId(classes[class_id], int_result.Config), + GetFontinfoId(classes[class_id], int_result.Config2)); + } else { + AddNewResult(final_results, ambig_class_id, int_result.Rating, + int_result.Config, int_result.Config2, + kBlankFontinfoId, kBlankFontinfoId); + } } } } @@ -1302,7 +1351,8 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob, return (NULL); /* this is a bug - maybe should return "" */ - return Templates->Class[ClassId]->Config[Results->best_match.config].Perm; + return Templates->Class[ClassId]-> + Config[Results->best_match.config].Perm->Ambigs; } /* BaselineClassifier */ @@ -1377,7 +1427,8 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) { Rating *= Rating; Rating /= 1.0 + Rating; - AddNewResult(Results, NO_CLASS, Rating, -1, -1); + AddNewResult(Results, NO_CLASS, Rating, -1, -1, + kBlankFontinfoId, kBlankFontinfoId); } /* ClassifyAsNoise */ } // namespace tesseract @@ -1395,7 +1446,8 @@ ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) { // Retrieve the current rating for a unichar id if we have rated it, defaulting // to WORST_POSSIBLE_RATING. ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) { - ScoredClass poor_result = {id, WORST_POSSIBLE_RATING, -1, -1}; + ScoredClass poor_result = + {id, WORST_POSSIBLE_RATING, -1, -1, kBlankFontinfoId, kBlankFontinfoId}; ScoredClass *entry = FindScoredUnichar(results, id); return (entry == NULL) ? poor_result : *entry; } @@ -1439,6 +1491,22 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, for (int i = 0; i < Results->NumMatches; i++) { ScoredClass next = Results->match[i]; + int fontinfo_id = next.fontinfo_id; + int fontinfo_id2 = next.fontinfo_id2; + if (fontinfo_id == kBlankFontinfoId) { + // ScoredClass next must have come from pre-trained templates, + // so we infer its font information from fontset_table. + int font_set_id = PreTrainedTemplates->Class[next.id]->font_set_id; + if (font_set_id >= 0) { + const FontSet &fs = fontset_table_.get(font_set_id); + if (next.config >= 0 && next.config < fs.size) { + fontinfo_id = fs.configs[next.config]; + } + if (next.config2 >= 0 && next.config2 < fs.size) { + fontinfo_id2 = fs.configs[next.config2]; + } + } + } bool current_is_frag = (unicharset.get_fragment(next.id) != NULL); if (temp_it.length()+1 == MAX_MATCHES && !contains_nonfrag && current_is_frag) { @@ -1458,8 +1526,8 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results, Certainty *= -(getDict().certainty_scale); } temp_it.add_to_end(new BLOB_CHOICE(next.id, Rating, Certainty, - next.config, next.config2, - unicharset.get_script(next.id))); + fontinfo_id, fontinfo_id2, + unicharset.get_script(next.id))); contains_nonfrag |= !current_is_frag; // update contains_nonfrag choices_length++; if (choices_length >= MAX_MATCHES) break; @@ -1559,6 +1627,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { AmbigClassifier(Blob, PreTrainedTemplates, + AdaptedTemplates->Class, Ambiguities, Results); } @@ -1567,7 +1636,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, // Force the blob to be classified as noise // if the results contain only fragments. // TODO(daria): verify that this is better than - // just adding a NULL classificaiton. + // just adding a NULL classification. if (!Results->HasNonfragment) { Results->NumMatches = 0; } @@ -2006,6 +2075,7 @@ int Classify::GetIntCharNormFeatures(TBLOB *Blob, * * @param Templates adapted templates to add new config to * @param ClassId class id to associate with new config + * @param FontinfoId font information inferred from pre-trained templates * @param NumFeatures number of features in IntFeatures * @param Features features describing model for new config * @param FloatFeatures floating-pt representation of features @@ -2017,6 +2087,7 @@ int Classify::GetIntCharNormFeatures(TBLOB *Blob, */ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, + int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures) { @@ -2077,13 +2148,15 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, ConfigId = AddIntConfig(IClass); ConvertConfig(TempProtoMask, ConfigId, IClass); - Config = NewTempConfig(MaxProtoId); + Config = NewTempConfig(MaxProtoId, FontinfoId); TempConfigFor(Class, ConfigId) = Config; copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize); if (classify_learning_debug_level >= 1) - cprintf("Making new temp config %d using %d old and %d new protos.\n", - ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId); + cprintf("Making new temp config %d fontinfo id %d" + " using %d old and %d new protos.\n", + ConfigId, Config->FontinfoId, + NumOldProtos, MaxProtoId - OldMaxProtoId); return ConfigId; } /* MakeNewTemporaryConfig */ @@ -2215,23 +2288,33 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates, Templates->NumPermClasses++; Class->NumPermConfigs++; + // Initialize permanent config. + Ambigs = GetAmbiguities(Blob, ClassId); + PERM_CONFIG Perm = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT), + "PERM_CONFIG_STRUCT"); + Perm->Ambigs = Ambigs; + Perm->FontinfoId = Config->FontinfoId; + + // Free memory associated with temporary config (since ADAPTED_CONFIG + // is a union we need to clean up before we record permanent config). ProtoKey.Templates = Templates; ProtoKey.ClassId = ClassId; ProtoKey.ConfigId = ConfigId; - Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, - MakeTempProtoPerm); + Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm); FreeTempConfig(Config); - Ambigs = GetAmbiguities(Blob, ClassId); - PermConfigFor(Class, ConfigId) = Ambigs; + // Record permanent config. + PermConfigFor(Class, ConfigId) = Perm; if (classify_learning_debug_level >= 1) { - cprintf("Making config %d permanent with ambiguities '", - ConfigId, Ambigs); + tprintf("Making config %d for %s (ClassId %d) permanent:" + " fontinfo id %d, ambiguities '", + ConfigId, getDict().getUnicharset().debug_str(ClassId).string(), + ClassId, PermConfigFor(Class, ConfigId)->FontinfoId); for (UNICHAR_ID *AmbigsPointer = Ambigs; - *AmbigsPointer >= 0; ++AmbigsPointer) - cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); - cprintf("'.\n"); + *AmbigsPointer >= 0; ++AmbigsPointer) + tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer)); + tprintf("'.\n"); } } /* MakePermanent */ } // namespace tesseract diff --git a/classify/classify.cpp b/classify/classify.cpp index f000948ac..eb1e12ced 100644 --- a/classify/classify.cpp +++ b/classify/classify.cpp @@ -45,6 +45,10 @@ bool compare_font_set(const FontSet& fs1, const FontSet& fs2) { } void delete_callback(FontInfo f) { + if (f.spacing_vec != NULL) { + f.spacing_vec->delete_data_pointers(); + delete f.spacing_vec; + } delete[] f.name; } void delete_callback_fs(FontSet fs) { diff --git a/classify/classify.h b/classify/classify.h index 65638efea..b76d16a37 100644 --- a/classify/classify.h +++ b/classify/classify.h @@ -37,6 +37,9 @@ class WERD_RES; struct ADAPT_RESULTS; struct NORM_PROTOS; +static const int kUnknownFontinfoId = -1; +static const int kBlankFontinfoId = -2; + namespace tesseract { // How segmented is a blob. In this enum, character refers to a classifiable @@ -63,6 +66,7 @@ class Classify : public CCStruct { /* adaptive.cpp ************************************************************/ ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset); + int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId); int ClassPruner(INT_TEMPLATES IntTemplates, inT16 NumFeatures, INT_FEATURE_ARRAY Features, @@ -108,13 +112,16 @@ class Classify : public CCStruct { void InitAdaptiveClassifier(bool load_pre_trained_templates); void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, + int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates); void AdaptToPunc(TBLOB *Blob, CLASS_ID ClassId, + int FontinfoId, FLOAT32 Threshold); void AmbigClassifier(TBLOB *Blob, INT_TEMPLATES Templates, + ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results); void MasterMatcher(INT_TEMPLATES templates, @@ -129,11 +136,13 @@ class Classify : public CCStruct { ADAPT_RESULTS* final_results); void ConvertMatchesToChoices(ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices); - void AddNewResult(ADAPT_RESULTS *Results, - CLASS_ID ClassId, - FLOAT32 Rating, - int ConfigId, - int config2); + void AddNewResult(ADAPT_RESULTS *results, + CLASS_ID class_dd, + FLOAT32 rating, + int config, + int config2, + int fontinfo_id, + int fontinfo_id2); int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures); @@ -155,6 +164,7 @@ class Classify : public CCStruct { BIT_VECTOR TempProtoMask); int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, + int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures); @@ -182,6 +192,7 @@ class Classify : public CCStruct { ADAPT_RESULTS *Results); void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, + int FontinfoId, FLOAT32 Threshold); void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); int AdaptableWord(TWERD *Word, diff --git a/classify/intfx.cpp b/classify/intfx.cpp index 0d67b8662..240fdfc27 100644 --- a/classify/intfx.cpp +++ b/classify/intfx.cpp @@ -374,7 +374,7 @@ int ExtractIntFeat(TBLOB *Blob, // made by vector as measured counterclockwise from <-1, 0> // The order of the arguments follows the convention of atan2(3) uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X) { - inT16 Angle; + inT16 Angle, Atan; uinT16 Ratio; uinT32 AbsX, AbsY; @@ -393,26 +393,26 @@ uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X) { Ratio = AbsX * ATAN_TABLE_SIZE / AbsY; if (Ratio >= ATAN_TABLE_SIZE) Ratio = ATAN_TABLE_SIZE - 1; - Angle = AtanTable[Ratio]; + Atan = AtanTable[Ratio]; if (X >= 0) if (Y >= 0) if (AbsX > AbsY) - Angle = Angle; + Angle = Atan; else - Angle = 64 - Angle; + Angle = 64 - Atan; else if (AbsX > AbsY) - Angle = 256 - Angle; + Angle = 256 - Atan; else - Angle = 192 + Angle; + Angle = 192 + Atan; else if (Y >= 0) if (AbsX > AbsY) - Angle = 128 - Angle; + Angle = 128 - Atan; else - Angle = 64 + Angle; + Angle = 64 + Atan; else if (AbsX > AbsY) - Angle = 128 + Angle; + Angle = 128 + Atan; else - Angle = 192 - Angle; + Angle = 192 - Atan; /* reverse angles to match old feature extractor: Angle += PI */ Angle += 128; diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp index ae753eb1d..50a5495fb 100644 --- a/classify/intmatcher.cpp +++ b/classify/intmatcher.cpp @@ -1162,6 +1162,9 @@ int IntegerMatcher::FindBestMatch( */ int BestMatch = 0; int Best2Match = 0; + assert(ClassTemplate->NumConfigs > 0); + Result->Config = 0; + Result->Config2 = 0; /* Find best match */ for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) { diff --git a/classify/intproto.cpp b/classify/intproto.cpp index d89897ab3..28f52f227 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -45,23 +45,21 @@ #endif /* match debug display constants*/ -#define DISPLAY_OFFSET (0.5 * INT_CHAR_NORM_RANGE) #define PROTO_PRUNER_SCALE (4.0) -#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET) -#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET) -#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET) -#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET) +#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE) +#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE) +#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE) +#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE) -#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET) -#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET) +#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE) +#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE) #define INT_XRADIUS (0.2 * INT_CHAR_NORM_RANGE) #define INT_YRADIUS (0.2 * INT_CHAR_NORM_RANGE) -#define INT_MIN_X (- DISPLAY_OFFSET) -#define INT_MIN_Y (- DISPLAY_OFFSET) -#define INT_MAX_X ( DISPLAY_OFFSET) -#define INT_MAX_Y ( DISPLAY_OFFSET) -#define DOUBLE_OFFSET 0.095 +#define INT_MIN_X 0 +#define INT_MIN_Y 0 +#define INT_MAX_X INT_CHAR_NORM_RANGE +#define INT_MAX_Y INT_CHAR_NORM_RANGE /** define pad used to snap near horiz/vertical protos to horiz/vertical */ #define HV_TOLERANCE (0.0025) /* approx 0.9 degrees */ @@ -147,7 +145,7 @@ void GetCPPadsForLevel(int Level, FLOAT32 *SidePad, FLOAT32 *AnglePad); -C_COL GetMatchColorFor(FLOAT32 Evidence); +ScrollView::Color GetMatchColorFor(FLOAT32 Evidence); void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill); @@ -158,12 +156,13 @@ void InitTableFiller(FLOAT32 EndPad, TABLE_FILLER *Filler); #ifndef GRAPHICS_DISABLED -void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color); +void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, + ScrollView::Color color); -void RenderIntProto(void *window, +void RenderIntProto(ScrollView *window, INT_CLASS Class, PROTO_ID ProtoId, - C_COL Color); + ScrollView::Color color); #endif int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id); @@ -488,7 +487,7 @@ void UpdateMatchDisplay() { ** History: Thu Mar 21 15:40:19 1991, DSJ, Created. */ if (IntMatchWindow != NULL) - c_make_current(IntMatchWindow); + IntMatchWindow->Update(); } /* ClearMatchDisplay */ #endif @@ -650,12 +649,10 @@ void DisplayIntFeature(INT_FEATURE Feature, FLOAT32 Evidence) { ** Exceptions: none ** History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ - C_COL Color; - - Color = GetMatchColorFor(Evidence); - RenderIntFeature(IntMatchWindow, Feature, Color); + ScrollView::Color color = GetMatchColorFor(Evidence); + RenderIntFeature(IntMatchWindow, Feature, color); if (FeatureDisplayWindow) { - RenderIntFeature(FeatureDisplayWindow, Feature, Color); + RenderIntFeature(FeatureDisplayWindow, Feature, color); } } /* DisplayIntFeature */ @@ -675,12 +672,10 @@ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) { ** Exceptions: none ** History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ - C_COL Color; - - Color = GetMatchColorFor(Evidence); - RenderIntProto(IntMatchWindow, Class, ProtoId, Color); + ScrollView::Color color = GetMatchColorFor(Evidence); + RenderIntProto(IntMatchWindow, Class, ProtoId, color); if (ProtoDisplayWindow) { - RenderIntProto(ProtoDisplayWindow, Class, ProtoId, Color); + RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color); } } /* DisplayIntProto */ #endif @@ -812,6 +807,66 @@ bool write_info(FILE* f, const FontInfo& fi) { return true; } +bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) { + inT32 vec_size, kern_size; + if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false; + if (swap) Reverse32(&vec_size); + ASSERT_HOST(vec_size >= 0); + if (vec_size == 0) return true; + fi->init_spacing(vec_size); + for (int i = 0; i < vec_size; ++i) { + FontSpacingInfo *fs = new FontSpacingInfo(); + if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 || + fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 || + fread(&kern_size, sizeof(kern_size), 1, f) != 1) { + return false; + } + if (swap) { + ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before)); + ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after)); + Reverse32(&kern_size); + } + if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec + delete fs; + continue; + } + if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) || + !fs->kerned_x_gaps.DeSerialize(swap, f))) { + return false; + } + fi->add_spacing(i, fs); + } + return true; +} + +bool write_spacing_info(FILE* f, const FontInfo& fi) { + inT32 vec_size = (fi.spacing_vec == NULL) ? 0 : fi.spacing_vec->size(); + if (fwrite(&vec_size, sizeof(vec_size), 1, f) != 1) return false; + inT16 x_gap_invalid = -1; + for (int i = 0; i < vec_size; ++i) { + FontSpacingInfo *fs = fi.spacing_vec->get(i); + inT32 kern_size = (fs == NULL) ? -1 : fs->kerned_x_gaps.size(); + if (fs == NULL) { + if (fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 || + fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 || + fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) { + return false; + } + } else { + if (fwrite(&(fs->x_gap_before), sizeof(fs->x_gap_before), 1, f) != 1 || + fwrite(&(fs->x_gap_after), sizeof(fs->x_gap_after), 1, f) != 1 || + fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) { + return false; + } + } + if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) || + !fs->kerned_x_gaps.Serialize(f))) { + return false; + } + } + return true; +} + bool read_set(FILE* f, FontSet* fs, bool swap) { if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false; if (swap) @@ -1130,6 +1185,11 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) { } if (version_id >= 4) { this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap); + if (version_id >= 5) { + this->fontinfo_table_.read(File, + NewPermanentTessCallback(read_spacing_info), + swap); + } this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap); } @@ -1156,52 +1216,18 @@ void Classify::ShowMatchDisplay() { ** Exceptions: none ** History: Thu Mar 21 15:47:33 1991, DSJ, Created. */ - void *window; - /* Size of drawable */ InitIntMatchWindowIfReqd(); c_clear_window(IntMatchWindow); if (ProtoDisplayWindow) { - c_clear_window(ProtoDisplayWindow); + ProtoDisplayWindow->Clear(); } if (FeatureDisplayWindow) { - c_clear_window(FeatureDisplayWindow); + FeatureDisplayWindow->Clear(); } + ClearFeatureSpaceWindow( + static_cast(static_cast(classify_norm_method)), + IntMatchWindow); - window = IntMatchWindow; - c_line_color_index(window, Grey); - /* Default size of drawing */ - if (classify_norm_method == baseline) { - c_move (window, -1000.0, INT_BASELINE); - c_draw (window, 1000.0, INT_BASELINE); - c_move (window, -1000.0, INT_DESCENDER); - c_draw (window, 1000.0, INT_DESCENDER); - c_move (window, -1000.0, INT_XHEIGHT); - c_draw (window, 1000.0, INT_XHEIGHT); - c_move (window, -1000.0, INT_CAPHEIGHT); - c_draw (window, 1000.0, INT_CAPHEIGHT); - c_move (window, INT_MIN_X, -1000.0); - c_draw (window, INT_MIN_X, 1000.0); - c_move (window, INT_MAX_X, -1000.0); - c_draw (window, INT_MAX_X, 1000.0); - } - else { - c_move (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS); - c_draw (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER - INT_YRADIUS); - c_move (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER + INT_YRADIUS); - c_draw (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS); - c_move (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS); - c_draw (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER + INT_YRADIUS); - c_move (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER - INT_YRADIUS); - c_draw (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS); - c_move(window, INT_MIN_X, INT_MIN_Y); - c_draw(window, INT_MIN_X, INT_MAX_Y); - c_move(window, INT_MIN_X, INT_MIN_Y); - c_draw(window, INT_MAX_X, INT_MIN_Y); - c_move(window, INT_MAX_X, INT_MAX_Y); - c_draw(window, INT_MIN_X, INT_MAX_Y); - c_move(window, INT_MAX_X, INT_MAX_Y); - c_draw(window, INT_MAX_X, INT_MIN_Y); - } IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y, INT_MAX_X, INT_MAX_Y); if (ProtoDisplayWindow) { @@ -1213,6 +1239,29 @@ void Classify::ShowMatchDisplay() { INT_MAX_X, INT_MAX_Y); } } /* ShowMatchDisplay */ + +// Clears the given window and draws the featurespace guides for the +// appropriate normalization method. +void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) { + window->Clear(); + + window->Pen(ScrollView::GREY); + // Draw the feature space limit rectangle. + window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y); + if (norm_method == baseline) { + window->SetCursor(0, INT_DESCENDER); + window->DrawTo(INT_MAX_X, INT_DESCENDER); + window->SetCursor(0, INT_BASELINE); + window->DrawTo(INT_MAX_X, INT_BASELINE); + window->SetCursor(0, INT_XHEIGHT); + window->DrawTo(INT_MAX_X, INT_XHEIGHT); + window->SetCursor(0, INT_CAPHEIGHT); + window->DrawTo(INT_MAX_X, INT_CAPHEIGHT); + } else { + window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS, + INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS); + } +} #endif /*---------------------------------------------------------------------------*/ @@ -1233,7 +1282,7 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, int i, j; INT_CLASS Class; int unicharset_size = target_unicharset.size(); - int version_id = -4; // When negated by the reader -1 becomes +1 etc. + int version_id = -5; // When negated by the reader -1 becomes +1 etc. if (Templates->NumClasses != unicharset_size) { cprintf("Warning: executing WriteIntTemplates() with %d classes in" @@ -1283,6 +1332,8 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, /* Write the fonts info tables */ this->fontinfo_table_.write(File, NewPermanentTessCallback(write_info)); + this->fontinfo_table_.write(File, + NewPermanentTessCallback(write_spacing_info)); this->fontset_table_.write(File, NewPermanentTessCallback(write_set)); } /* WriteIntTemplates */ } // namespace tesseract @@ -1588,7 +1639,7 @@ void GetCPPadsForLevel(int Level, /*---------------------------------------------------------------------------*/ -C_COL GetMatchColorFor(FLOAT32 Evidence) { +ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { /* ** Parameters: ** Evidence evidence value to return color for @@ -1603,13 +1654,13 @@ C_COL GetMatchColorFor(FLOAT32 Evidence) { assert (Evidence <= 1.0); if (Evidence >= 0.90) - return White; + return ScrollView::WHITE; else if (Evidence >= 0.75) - return Green; + return ScrollView::GREEN; else if (Evidence >= 0.50) - return Red; + return ScrollView::RED; else - return Blue; + return ScrollView::BLUE; } /* GetMatchColorFor */ @@ -1839,7 +1890,8 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, /*---------------------------------------------------------------------------*/ #ifndef GRAPHICS_DISABLED -void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) { +void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, + ScrollView::Color color) { /* ** Parameters: ** ShapeList shape list to add feature rendering to @@ -1853,20 +1905,27 @@ void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) { */ FLOAT32 X, Y, Dx, Dy, Length; - c_line_color_index(window, Color); + window->Pen(color); assert(Feature != NULL); - assert(Color != 0); + assert(color != 0); - X = Feature->X - DISPLAY_OFFSET; - Y = Feature->Y - DISPLAY_OFFSET; + X = Feature->X; + Y = Feature->Y; Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE; - Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * PI); - Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * PI); + // The -PI has no significant effect here, but the value of Theta is computed + // using BinaryAnglePlusPi in intfx.cpp. + Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * PI - PI); + Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * PI - PI); + float x_offset = Dy / 4.0; + float y_offset = -Dx / 4.0; - c_move(window, X - Dx, Y - Dy); - c_draw(window, X + Dx, Y + Dy); - c_move(window, X - Dx - Dy * DOUBLE_OFFSET, Y - Dy + Dx * DOUBLE_OFFSET); - c_draw(window, X + Dx - Dy * DOUBLE_OFFSET, Y + Dy + Dx * DOUBLE_OFFSET); + window->SetCursor(X - Dx, Y - Dy); + window->DrawTo(X + Dx, Y + Dy); + // Draw another copy of the feature offset perpendicualar to its direction. + X += x_offset; + Y += y_offset; + window->SetCursor(X - Dx, Y - Dy); + window->DrawTo(X + Dx, Y + Dy); } /* RenderIntFeature */ @@ -1887,10 +1946,10 @@ void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) { * @note Exceptions: none * @note History: Thu Mar 21 10:21:09 1991, DSJ, Created. */ -void RenderIntProto(void *window, +void RenderIntProto(ScrollView *window, INT_CLASS Class, PROTO_ID ProtoId, - C_COL Color) { + ScrollView::Color color) { PROTO_SET ProtoSet; INT_PROTO Proto; int ProtoSetIndex; @@ -1904,8 +1963,8 @@ void RenderIntProto(void *window, assert(ProtoId >= 0); assert(Class != NULL); assert(ProtoId < Class->NumProtos); - assert(Color != 0); - c_line_color_index(window, Color); + assert(color != 0); + window->Pen(color); ProtoSet = Class->ProtoSets[SetForProto(ProtoId)]; ProtoSetIndex = IndexForProto(ProtoId); @@ -1927,13 +1986,15 @@ void RenderIntProto(void *window, UpdateRange(Bucket, &Ymin, &Ymax); } } - X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET; - Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET; - Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * PI); - Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * PI); + X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE; + Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE; + // The -PI has no significant effect here, but the value of Theta is computed + // using BinaryAnglePlusPi in intfx.cpp. + Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * PI - PI); + Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * PI - PI); - c_move(window, X - Dx, Y - Dy); - c_draw(window, X + Dx, Y + Dy); + window->SetCursor(X - Dx, Y - Dy); + window->DrawTo(X + Dx, Y + Dy); } /* RenderIntProto */ #endif @@ -1977,9 +2038,7 @@ int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id) { */ void InitIntMatchWindowIfReqd() { if (IntMatchWindow == NULL) { - IntMatchWindow = c_create_window("IntMatchWindow", 50, 200, - 520, 520, - -130.0, 130.0, -130.0, 130.0); + IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200); SVMenuNode* popup_menu = new SVMenuNode(); popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE, @@ -1998,10 +2057,9 @@ void InitIntMatchWindowIfReqd() { */ void InitProtoDisplayWindowIfReqd() { if (ProtoDisplayWindow == NULL) { - ProtoDisplayWindow = c_create_window("ProtoDisplayWindow", 50, 200, - 520, 520, - -130.0, 130.0, -130.0, 130.0); - } + ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow", + 550, 200); + } } /** @@ -2010,8 +2068,13 @@ void InitProtoDisplayWindowIfReqd() { */ void InitFeatureDisplayWindowIfReqd() { if (FeatureDisplayWindow == NULL) { - FeatureDisplayWindow = c_create_window("FeatureDisplayWindow", 50, 200, - 520, 520, - -130.0, 130.0, -130.0, 130.0); + FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow", + 50, 700); } } + +// Creates a window of the appropriate size for displaying elements +// in feature space. +ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) { + return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true); +} diff --git a/classify/intproto.h b/classify/intproto.h index be1b2b0bd..854045799 100644 --- a/classify/intproto.h +++ b/classify/intproto.h @@ -21,9 +21,12 @@ /**---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------**/ +#include "genericvector.h" #include "matchdefs.h" +#include "mfoutline.h" #include "protos.h" #include "callcpp.h" +#include "scrollview.h" #include "unicharset.h" /* define order of params in pruners */ @@ -100,18 +103,70 @@ PROTO_SET_STRUCT, *PROTO_SET; typedef uinT32 CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4]; +// Struct for information about spacing between characters in a particular font. +struct FontSpacingInfo { + inT16 x_gap_before; + inT16 x_gap_after; + GenericVector kerned_unichar_ids; + GenericVector kerned_x_gaps; +}; + /* * font_properties contains properties about boldness, italicness, fixed pitch, * serif, fraktur */ struct FontInfo { - char* name; - uinT32 properties; - bool is_italic() { return properties & 1; } - bool is_bold() { return (properties & 2) != 0; } - bool is_fixed_pitch() { return (properties & 4) != 0; } - bool is_serif() { return (properties & 8) != 0; } - bool is_fraktur() { return (properties & 16) != 0; } + FontInfo() : name(NULL), spacing_vec(NULL) {} + ~FontInfo() {} + // Reserves unicharset_size spots in spacing_vec. + void init_spacing(int unicharset_size) { + spacing_vec = new GenericVector(); + spacing_vec->init_to_size(unicharset_size, NULL); + } + // Adds the given pointer to FontSpacingInfo to spacing_vec member + // (FontInfo class takes ownership of the pointer). + // Note: init_spacing should be called before calling this function. + void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) { + ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id); + (*spacing_vec)[uch_id] = spacing_info; + } + + // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID. + const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const { + return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ? + NULL : (*spacing_vec)[uch_id]; + } + + // Fills spacing with the value of the x gap expected between the two given + // UNICHAR_IDs. Returns true on success. + bool get_spacing(UNICHAR_ID prev_uch_id, + UNICHAR_ID uch_id, + int *spacing) const { + const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id); + const FontSpacingInfo *fsi = this->get_spacing(uch_id); + if (prev_fsi == NULL || fsi == NULL) return false; + int i = 0; + for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) { + if (prev_fsi->kerned_unichar_ids[i] == uch_id) break; + } + if (i < prev_fsi->kerned_unichar_ids.size()) { + *spacing = prev_fsi->kerned_x_gaps[i]; + } else { + *spacing = prev_fsi->x_gap_after + fsi->x_gap_before; + } + return true; + } + + bool is_italic() const { return properties & 1; } + bool is_bold() const { return (properties & 2) != 0; } + bool is_fixed_pitch() const { return (properties & 4) != 0; } + bool is_serif() const { return (properties & 8) != 0; } + bool is_fraktur() const { return (properties & 16) != 0; } + + char* name; + uinT32 properties; + // Horizontal spacing between characters (indexed by UNICHAR_ID). + GenericVector *spacing_vec; }; // Every class (character) owns a FontSet that represents all the fonts that can @@ -164,6 +219,10 @@ struct INT_FEATURE_STRUCT uinT8 Y; uinT8 Theta; inT8 CP_misses; + + void print() const { + tprintf("(%d,%d):%d\n", X, Y, Theta); + } }; typedef INT_FEATURE_STRUCT *INT_FEATURE; @@ -253,7 +312,17 @@ void free_int_templates(INT_TEMPLATES templates); void ShowMatchDisplay(); +namespace tesseract { + +// Clears the given window and draws the featurespace guides for the +// appropriate normalization method. +void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window); + +} // namespace tesseract. + /*----------------------------------------------------------------------------*/ +void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, + ScrollView::Color color); void InitIntMatchWindowIfReqd(); @@ -261,4 +330,8 @@ void InitProtoDisplayWindowIfReqd(); void InitFeatureDisplayWindowIfReqd(); +// Creates a window of the appropriate size for displaying elements +// in feature space. +ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos); + #endif