Added a backup adaptive classifier to take over from primary when it fills on a large document

This commit is contained in:
Ray Smith 2015-06-12 11:10:53 -07:00
parent 78b5e1a77d
commit b1d99dfe23
5 changed files with 101 additions and 55 deletions

View File

@ -2380,7 +2380,8 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
threshold = tesseract_->matcher_good_threshold; threshold = tesseract_->matcher_good_threshold;
if (blob->outlines) if (blob->outlines)
tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold); tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
tesseract_->AdaptedTemplates);
delete blob; delete blob;
} }

View File

@ -306,17 +306,22 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
page_res_it.restart_page(); page_res_it.restart_page();
// ****************** Pass 1 ******************* // ****************** Pass 1 *******************
// Clear adaptive classifier at the beginning of the page if it is full. // If the adaptive classifier is full switch to one we prepared earlier,
// This is done only at the beginning of the page to ensure that the // ie on the previous page. If the current adaptive classifier is non-empty,
// classifier is not reset at an arbitrary point while processing the page, // prepare a backup starting at this page, in case it fills up. Do all this
// which would cripple Passes 2+ if the reset happens towards the end of // independently for each language.
// Pass 1 on a page with very difficult text. if (AdaptiveClassifierIsFull()) {
// TODO(daria): preemptively clear the classifier if it is almost full. SwitchAdaptiveClassifier();
if (AdaptiveClassifierIsFull()) ResetAdaptiveClassifierInternal(); } else if (!AdaptiveClassifierIsEmpty()) {
StartBackupAdaptiveClassifier();
}
// Now check the sub-langs as well. // Now check the sub-langs as well.
for (int i = 0; i < sub_langs_.size(); ++i) { for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->AdaptiveClassifierIsFull()) if (sub_langs_[i]->AdaptiveClassifierIsFull()) {
sub_langs_[i]->ResetAdaptiveClassifierInternal(); sub_langs_[i]->SwitchAdaptiveClassifier();
} else if (!sub_langs_[i]->AdaptiveClassifierIsEmpty()) {
sub_langs_[i]->StartBackupAdaptiveClassifier();
}
} }
// Set up all words ready for recognition, so that if parallelism is on // Set up all words ready for recognition, so that if parallelism is on
// all the input and output classes are ready to run the classifier. // all the input and output classes are ready to run the classifier.

View File

@ -24,36 +24,36 @@
#endif #endif
#include <ctype.h> #include <ctype.h>
#include "shapeclassifier.h"
#include "ambigs.h" #include "ambigs.h"
#include "blobclass.h" #include "blobclass.h"
#include "blobs.h" #include "blobs.h"
#include "helpers.h"
#include "normfeat.h"
#include "mfoutline.h"
#include "picofeat.h"
#include "float2int.h"
#include "outfeat.h"
#include "emalloc.h"
#include "intfx.h"
#include "efio.h"
#include "normmatch.h"
#include "ndminx.h"
#include "intproto.h"
#include "const.h"
#include "globals.h"
#include "werd.h"
#include "callcpp.h" #include "callcpp.h"
#include "classify.h"
#include "const.h"
#include "dict.h"
#include "efio.h"
#include "emalloc.h"
#include "featdefs.h"
#include "float2int.h"
#include "genericvector.h"
#include "globals.h"
#include "helpers.h"
#include "intfx.h"
#include "intproto.h"
#include "mfoutline.h"
#include "ndminx.h"
#include "normfeat.h"
#include "normmatch.h"
#include "outfeat.h"
#include "pageres.h" #include "pageres.h"
#include "params.h" #include "params.h"
#include "classify.h" #include "picofeat.h"
#include "shapeclassifier.h"
#include "shapetable.h" #include "shapetable.h"
#include "tessclassifier.h" #include "tessclassifier.h"
#include "trainingsample.h" #include "trainingsample.h"
#include "unicharset.h" #include "unicharset.h"
#include "dict.h" #include "werd.h"
#include "featdefs.h"
#include "genericvector.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
@ -420,7 +420,13 @@ void Classify::LearnPieces(const char* fontname, int start, int length,
unicharset.id_to_unichar(class_id), threshold, font_id); unicharset.id_to_unichar(class_id), threshold, font_id);
// If filename is not NULL we are doing recognition // If filename is not NULL we are doing recognition
// (as opposed to training), so we must have already set word fonts. // (as opposed to training), so we must have already set word fonts.
AdaptToChar(rotated_blob, class_id, font_id, threshold); AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
if (BackupAdaptedTemplates != NULL) {
// Adapt the backup templates too. They will be used if the primary gets
// too full.
AdaptToChar(rotated_blob, class_id, font_id, threshold,
BackupAdaptedTemplates);
}
} else if (classify_debug_level >= 1) { } else if (classify_debug_level >= 1) {
tprintf("Can't adapt to %s not in unicharset\n", correct_text); tprintf("Can't adapt to %s not in unicharset\n", correct_text);
} }
@ -470,6 +476,10 @@ void Classify::EndAdaptiveClassifier() {
free_adapted_templates(AdaptedTemplates); free_adapted_templates(AdaptedTemplates);
AdaptedTemplates = NULL; AdaptedTemplates = NULL;
} }
if (BackupAdaptedTemplates != NULL) {
free_adapted_templates(BackupAdaptedTemplates);
BackupAdaptedTemplates = NULL;
}
if (PreTrainedTemplates != NULL) { if (PreTrainedTemplates != NULL) {
free_int_templates(PreTrainedTemplates); free_int_templates(PreTrainedTemplates);
@ -607,10 +617,35 @@ void Classify::ResetAdaptiveClassifierInternal() {
} }
free_adapted_templates(AdaptedTemplates); free_adapted_templates(AdaptedTemplates);
AdaptedTemplates = NewAdaptedTemplates(true); AdaptedTemplates = NewAdaptedTemplates(true);
if (BackupAdaptedTemplates != NULL)
free_adapted_templates(BackupAdaptedTemplates);
BackupAdaptedTemplates = NULL;
NumAdaptationsFailed = 0; NumAdaptationsFailed = 0;
} }
// If there are backup adapted templates, switches to those, otherwise resets
// the main adaptive classifier (because it is full.)
void Classify::SwitchAdaptiveClassifier() {
if (BackupAdaptedTemplates == NULL) {
ResetAdaptiveClassifierInternal();
return;
}
if (classify_learning_debug_level > 0) {
tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
NumAdaptationsFailed);
}
free_adapted_templates(AdaptedTemplates);
AdaptedTemplates = BackupAdaptedTemplates;
BackupAdaptedTemplates = NULL;
NumAdaptationsFailed = 0;
}
// Resets the backup adaptive classifier to empty.
void Classify::StartBackupAdaptiveClassifier() {
if (BackupAdaptedTemplates != NULL)
free_adapted_templates(BackupAdaptedTemplates);
BackupAdaptedTemplates = NewAdaptedTemplates(true);
}
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/** /**
@ -839,9 +874,9 @@ bool Classify::AdaptableWord(WERD_RES* word) {
* @param ClassId class to add blob to * @param ClassId class to add blob to
* @param FontinfoId font information from pre-trained templates * @param FontinfoId font information from pre-trained templates
* @param Threshold minimum match rating to existing template * @param Threshold minimum match rating to existing template
* @param adaptive_templates current set of adapted templates
* *
* Globals: * Globals:
* - AdaptedTemplates current set of adapted templates
* - AllProtosOn dummy mask to match against all protos * - AllProtosOn dummy mask to match against all protos
* - AllConfigsOn dummy mask to match against all configs * - AllConfigsOn dummy mask to match against all configs
* *
@ -849,10 +884,9 @@ bool Classify::AdaptableWord(WERD_RES* word) {
* @note Exceptions: none * @note Exceptions: none
* @note History: Thu Mar 14 09:36:03 1991, DSJ, Created. * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created.
*/ */
void Classify::AdaptToChar(TBLOB *Blob, void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
CLASS_ID ClassId, FLOAT32 Threshold,
int FontinfoId, ADAPT_TEMPLATES adaptive_templates) {
FLOAT32 Threshold) {
int NumFeatures; int NumFeatures;
INT_FEATURE_ARRAY IntFeatures; INT_FEATURE_ARRAY IntFeatures;
UnicharRating int_result; UnicharRating int_result;
@ -866,12 +900,12 @@ void Classify::AdaptToChar(TBLOB *Blob,
return; return;
int_result.unichar_id = ClassId; int_result.unichar_id = ClassId;
Class = AdaptedTemplates->Class[ClassId]; Class = adaptive_templates->Class[ClassId];
assert(Class != NULL); assert(Class != NULL);
if (IsEmptyAdaptedClass(Class)) { if (IsEmptyAdaptedClass(Class)) {
InitAdaptedClass(Blob, ClassId, FontinfoId, Class, AdaptedTemplates); InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
} else { } else {
IClass = ClassForClassId(AdaptedTemplates->Templates, ClassId); IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures); NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
if (NumFeatures <= 0) if (NumFeatures <= 0)
@ -913,7 +947,7 @@ void Classify::AdaptToChar(TBLOB *Blob,
int_result.config, TempConfig->NumTimesSeen); int_result.config, TempConfig->NumTimesSeen);
if (TempConfigReliable(ClassId, TempConfig)) { if (TempConfigReliable(ClassId, TempConfig)) {
MakePermanent(AdaptedTemplates, ClassId, int_result.config, Blob); MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
UpdateAmbigsGroup(ClassId, Blob); UpdateAmbigsGroup(ClassId, Blob);
} }
} else { } else {
@ -923,15 +957,12 @@ void Classify::AdaptToChar(TBLOB *Blob,
if (classify_learning_debug_level > 2) if (classify_learning_debug_level > 2)
DisplayAdaptedChar(Blob, IClass); DisplayAdaptedChar(Blob, IClass);
} }
NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates, NewTempConfigId =
ClassId, MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
FontinfoId, NumFeatures, IntFeatures, FloatFeatures);
NumFeatures,
IntFeatures,
FloatFeatures);
if (NewTempConfigId >= 0 && if (NewTempConfigId >= 0 &&
TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) { TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, Blob); MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
UpdateAmbigsGroup(ClassId, Blob); UpdateAmbigsGroup(ClassId, Blob);
} }
@ -1547,7 +1578,7 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
* Globals: * Globals:
* - PreTrainedTemplates built-in training templates * - PreTrainedTemplates built-in training templates
* - AdaptedTemplates templates adapted for this page * - AdaptedTemplates templates adapted for this page
* - matcher_great_threshold rating limit for a great match * - matcher_reliable_adaptive_result rating limit for a great match
* *
* @note Exceptions: none * @note Exceptions: none
* @note History: Tue Mar 12 08:50:11 1991, DSJ, Created. * @note History: Tue Mar 12 08:50:11 1991, DSJ, Created.
@ -1569,7 +1600,8 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info, Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
AdaptedTemplates, Results); AdaptedTemplates, Results);
if ((!Results->match.empty() && if ((!Results->match.empty() &&
MarginalMatch(Results->best_rating, matcher_great_threshold) && MarginalMatch(Results->best_rating,
matcher_reliable_adaptive_result) &&
!tess_bn_matching) || !tess_bn_matching) ||
Results->match.empty()) { Results->match.empty()) {
CharNormClassifier(Blob, *sample, Results); CharNormClassifier(Blob, *sample, Results);

View File

@ -171,6 +171,7 @@ Classify::Classify()
fontset_table_.set_clear_callback( fontset_table_.set_clear_callback(
NewPermanentTessCallback(FontSetDeleteCallback)); NewPermanentTessCallback(FontSetDeleteCallback));
AdaptedTemplates = NULL; AdaptedTemplates = NULL;
BackupAdaptedTemplates = NULL;
PreTrainedTemplates = NULL; PreTrainedTemplates = NULL;
AllProtosOn = NULL; AllProtosOn = NULL;
AllConfigsOn = NULL; AllConfigsOn = NULL;

View File

@ -253,10 +253,8 @@ class Classify : public CCStruct {
GenericVector<UnicharRating>* results); GenericVector<UnicharRating>* results);
UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass); UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass);
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results); void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results);
void AdaptToChar(TBLOB *Blob, void AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
CLASS_ID ClassId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates);
int FontinfoId,
FLOAT32 Threshold);
void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class); void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
bool AdaptableWord(WERD_RES* word); bool AdaptableWord(WERD_RES* word);
void EndAdaptiveClassifier(); void EndAdaptiveClassifier();
@ -265,6 +263,8 @@ class Classify : public CCStruct {
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices); void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
void ClassifyAsNoise(ADAPT_RESULTS *Results); void ClassifyAsNoise(ADAPT_RESULTS *Results);
void ResetAdaptiveClassifierInternal(); void ResetAdaptiveClassifierInternal();
void SwitchAdaptiveClassifier();
void StartBackupAdaptiveClassifier();
int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info, int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
INT_TEMPLATES templates, INT_TEMPLATES templates,
@ -281,7 +281,10 @@ class Classify : public CCStruct {
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config); bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob); void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; } bool AdaptiveClassifierIsFull() const { return NumAdaptationsFailed > 0; }
bool AdaptiveClassifierIsEmpty() const {
return AdaptedTemplates->NumPermClasses == 0;
}
bool LooksLikeGarbage(TBLOB *blob); bool LooksLikeGarbage(TBLOB *blob);
void RefreshDebugWindow(ScrollView **win, const char *msg, void RefreshDebugWindow(ScrollView **win, const char *msg,
int y_offset, const TBOX &wbox); int y_offset, const TBOX &wbox);
@ -415,7 +418,7 @@ class Classify : public CCStruct {
INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags"); INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags");
INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: "); INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: ");
double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)"); double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)");
double_VAR_H(matcher_great_threshold, 0.0, "Great Match (0-1)"); double_VAR_H(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)");
double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)"); double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)");
double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)"); double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)");
double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)"); double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)");
@ -468,6 +471,10 @@ class Classify : public CCStruct {
// Use class variables to hold onto built-in templates and adapted templates. // Use class variables to hold onto built-in templates and adapted templates.
INT_TEMPLATES PreTrainedTemplates; INT_TEMPLATES PreTrainedTemplates;
ADAPT_TEMPLATES AdaptedTemplates; ADAPT_TEMPLATES AdaptedTemplates;
// The backup adapted templates are created from the previous page (only)
// so they are always ready and reasonably well trained if the primary
// adapted templates become full.
ADAPT_TEMPLATES BackupAdaptedTemplates;
// Create dummy proto and config masks for use with the built-in templates. // Create dummy proto and config masks for use with the built-in templates.
BIT_VECTOR AllProtosOn; BIT_VECTOR AllProtosOn;