Fixed slow-down that was caused by upping MAX_NUM_CLASSES

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1013 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2014-01-24 21:12:35 +00:00
parent cd15c5e2c2
commit 1a487252f4
10 changed files with 57 additions and 287 deletions

View File

@ -31,15 +31,15 @@
#include "mathfix.h" #include "mathfix.h"
#elif MINGW #elif MINGW
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
#undef __STRICT_ANSI__ #undef __STRICT_ANSI__
#endif // _MSC_VER #endif // _MSC_VER
#include <stdlib.h> #include <stdlib.h>
#include <windows.h> #include <windows.h>
#else #else
#include <dirent.h> #include <dirent.h>
#include <libgen.h> #include <libgen.h>
#include <string.h> #include <string.h>
#endif // _WIN32 #endif // _WIN32
#if !defined(VERSION) #if !defined(VERSION)
#include "version.h" #include "version.h"
@ -2288,7 +2288,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
// Classify to get a raw choice. // Classify to get a raw choice.
BLOB_CHOICE_LIST choices; BLOB_CHOICE_LIST choices;
tesseract_->AdaptiveClassifier(blob, &choices, NULL); tesseract_->AdaptiveClassifier(blob, &choices);
BLOB_CHOICE_IT choice_it; BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(&choices); choice_it.set_to_list(&choices);
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
@ -2520,7 +2520,7 @@ void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
float* ratings, float* ratings,
int* num_matches_returned) { int* num_matches_returned) {
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
tesseract_->AdaptiveClassifier(blob, choices, NULL); tesseract_->AdaptiveClassifier(blob, choices);
BLOB_CHOICE_IT choices_it(choices); BLOB_CHOICE_IT choices_it(choices);
int& index = *num_matches_returned; int& index = *num_matches_returned;
index = 0; index = 0;

View File

@ -186,8 +186,8 @@ void EquationDetect::IdentifySpecialText(
normed_blob->Normalize(NULL, NULL, NULL, x_orig, y_orig, scaling, scaling, normed_blob->Normalize(NULL, NULL, NULL, x_orig, y_orig, scaling, scaling,
0.0f, static_cast<float>(kBlnBaselineOffset), 0.0f, static_cast<float>(kBlnBaselineOffset),
false, NULL); false, NULL);
equ_tesseract_->AdaptiveClassifier(normed_blob, &ratings_equ, NULL); equ_tesseract_->AdaptiveClassifier(normed_blob, &ratings_equ);
lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang, NULL); lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang);
delete normed_blob; delete normed_blob;
delete tblob; delete tblob;

View File

@ -352,7 +352,7 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
x_origin, y_origin, scaling, scaling, x_origin, y_origin, scaling, scaling,
0.0f, static_cast<float>(kBlnBaselineOffset), 0.0f, static_cast<float>(kBlnBaselineOffset),
false, NULL); false, NULL);
tess->AdaptiveClassifier(rotated_blob, ratings + i, NULL); tess->AdaptiveClassifier(rotated_blob, ratings + i);
delete rotated_blob; delete rotated_blob;
current_rotation.rotate(rotation90); current_rotation.rotate(rotation90);
} }

View File

@ -95,7 +95,7 @@ void FreeTempProto(void *arg) {
void FreePermConfig(PERM_CONFIG Config) { void FreePermConfig(PERM_CONFIG Config) {
assert(Config != NULL); assert(Config != NULL);
Efree(Config->Ambigs); delete [] Config->Ambigs;
free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT"); free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
} }
@ -406,7 +406,7 @@ PERM_CONFIG ReadPermConfig(FILE *File) {
"PERM_CONFIG_STRUCT"); "PERM_CONFIG_STRUCT");
uinT8 NumAmbigs; uinT8 NumAmbigs;
fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File); fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1)); Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File); fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
Config->Ambigs[NumAmbigs] = -1; Config->Ambigs[NumAmbigs] = -1;
fread(&(Config->FontinfoId), sizeof(int), 1, File); fread(&(Config->FontinfoId), sizeof(int), 1, File);

View File

@ -87,17 +87,15 @@ struct ScoredClass {
struct ADAPT_RESULTS { struct ADAPT_RESULTS {
inT32 BlobLength; inT32 BlobLength;
int NumMatches;
bool HasNonfragment; bool HasNonfragment;
ScoredClass match[MAX_NUM_CLASSES]; GenericVector<ScoredClass> match;
ScoredClass best_match; ScoredClass best_match;
CLASS_PRUNER_RESULTS CPResults; GenericVector<CP_RESULT_STRUCT> CPResults;
/// Initializes data members to the default values. Sets the initial /// Initializes data members to the default values. Sets the initial
/// rating of each class to be the worst possible rating (1.0). /// rating of each class to be the worst possible rating (1.0).
inline void Initialize() { inline void Initialize() {
BlobLength = MAX_INT32; BlobLength = MAX_INT32;
NumMatches = 0;
HasNonfragment = false; HasNonfragment = false;
best_match.unichar_id = NO_CLASS; best_match.unichar_id = NO_CLASS;
best_match.shape_id = -1; best_match.shape_id = -1;
@ -163,29 +161,22 @@ namespace tesseract {
* *
* @param Blob blob to be classified * @param Blob blob to be classified
* @param[out] Choices List of choices found by adaptive matcher. * @param[out] Choices List of choices found by adaptive matcher.
* @param[out] CPResults Array of CPResultStruct of size MAX_NUM_CLASSES is
* filled on return with the choices found by the * filled on return with the choices found by the
* class pruner and the ratings therefrom. Also * class pruner and the ratings therefrom. Also
* contains the detailed results of the integer matcher. * contains the detailed results of the integer matcher.
* *
*/ */
void Classify::AdaptiveClassifier(TBLOB *Blob, void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
BLOB_CHOICE_LIST *Choices,
CLASS_PRUNER_RESULTS CPResults) {
assert(Choices != NULL); assert(Choices != NULL);
ADAPT_RESULTS *Results = new ADAPT_RESULTS(); ADAPT_RESULTS *Results = new ADAPT_RESULTS;
Results->Initialize(); Results->Initialize();
ASSERT_HOST(AdaptedTemplates != NULL); ASSERT_HOST(AdaptedTemplates != NULL);
DoAdaptiveMatch(Blob, Results); DoAdaptiveMatch(Blob, Results);
if (CPResults != NULL)
memcpy(CPResults, Results->CPResults,
sizeof(CPResults[0]) * Results->NumMatches);
RemoveBadMatches(Results); RemoveBadMatches(Results);
qsort((void *)Results->match, Results->NumMatches, Results->match.sort(CompareByRating);
sizeof(ScoredClass), CompareByRating);
RemoveExtraPuncs(Results); RemoveExtraPuncs(Results);
ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results, ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
Choices); Choices);
@ -1029,7 +1020,7 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
if (old_match) if (old_match)
old_match->rating = rating; old_match->rating = rating;
else else
results->match[results->NumMatches++] = match; results->match.push_back(match);
if (rating < results->best_match.rating && if (rating < results->best_match.rating &&
// Ensure that fragments do not affect best rating, class and config. // Ensure that fragments do not affect best rating, class and config.
@ -1111,14 +1102,13 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
const uinT8* norm_factors, const uinT8* norm_factors,
ADAPT_CLASS* classes, ADAPT_CLASS* classes,
int debug, int debug,
int num_classes,
int matcher_multiplier, int matcher_multiplier,
const TBOX& blob_box, const TBOX& blob_box,
CLASS_PRUNER_RESULTS results, const GenericVector<CP_RESULT_STRUCT>& results,
ADAPT_RESULTS* final_results) { ADAPT_RESULTS* final_results) {
int top = blob_box.top(); int top = blob_box.top();
int bottom = blob_box.bottom(); int bottom = blob_box.bottom();
for (int c = 0; c < num_classes; c++) { for (int c = 0; c < results.size(); c++) {
CLASS_ID class_id = results[c].Class; CLASS_ID class_id = results[c].Class;
INT_RESULT_STRUCT& int_result = results[c].IMResult; INT_RESULT_STRUCT& int_result = results[c].IMResult;
BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
@ -1279,21 +1269,19 @@ UNICHAR_ID *Classify::BaselineClassifier(
const INT_FX_RESULT_STRUCT& fx_info, const INT_FX_RESULT_STRUCT& fx_info,
ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) { ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
if (int_features.empty()) return NULL; if (int_features.empty()) return NULL;
int NumClasses;
uinT8* CharNormArray = new uinT8[unicharset.size()]; uinT8* CharNormArray = new uinT8[unicharset.size()];
ClearCharNormArray(CharNormArray); ClearCharNormArray(CharNormArray);
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength); Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
NumClasses = PruneClasses(Templates->Templates, int_features.size(), PruneClasses(Templates->Templates, int_features.size(), &int_features[0],
&int_features[0], CharNormArray, BaselineCutoffs, &Results->CPResults);
CharNormArray, BaselineCutoffs, Results->CPResults);
if (matcher_debug_level >= 2 || classify_debug_level > 1) if (matcher_debug_level >= 2 || classify_debug_level > 1)
cprintf ("BL Matches = "); cprintf ("BL Matches = ");
MasterMatcher(Templates->Templates, int_features.size(), &int_features[0], MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
CharNormArray, CharNormArray,
Templates->Class, matcher_debug_flags, NumClasses, 0, Templates->Class, matcher_debug_flags, 0,
Blob->bounding_box(), Results->CPResults, Results); Blob->bounding_box(), Results->CPResults, Results);
delete [] CharNormArray; delete [] CharNormArray;
@ -1375,20 +1363,18 @@ int Classify::CharNormTrainingSample(bool pruner_only,
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array, ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
pruner_norm_array); pruner_norm_array);
int num_classes = PruneClasses(PreTrainedTemplates, num_features, PruneClasses(PreTrainedTemplates, num_features, sample.features(),
sample.features(), pruner_norm_array,
pruner_norm_array, shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
shape_table_ != NULL ? &shapetable_cutoffs_[0] &adapt_results->CPResults);
: CharNormCutoffs,
adapt_results->CPResults);
delete [] pruner_norm_array; delete [] pruner_norm_array;
if (keep_this >= 0) { if (keep_this >= 0) {
num_classes = 1;
adapt_results->CPResults[0].Class = keep_this; adapt_results->CPResults[0].Class = keep_this;
adapt_results->CPResults.truncate(1);
} }
if (pruner_only) { if (pruner_only) {
// Convert pruner results to output format. // Convert pruner results to output format.
for (int i = 0; i < num_classes; ++i) { for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
int class_id = adapt_results->CPResults[i].Class; int class_id = adapt_results->CPResults[i].Class;
results->push_back( results->push_back(
UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating)); UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
@ -1396,11 +1382,11 @@ int Classify::CharNormTrainingSample(bool pruner_only,
} else { } else {
MasterMatcher(PreTrainedTemplates, num_features, sample.features(), MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
char_norm_array, char_norm_array,
NULL, matcher_debug_flags, num_classes, NULL, matcher_debug_flags,
classify_integer_matcher_multiplier, classify_integer_matcher_multiplier,
blob_box, adapt_results->CPResults, adapt_results); blob_box, adapt_results->CPResults, adapt_results);
// Convert master matcher results to output format. // Convert master matcher results to output format.
for (int i = 0; i < adapt_results->NumMatches; i++) { for (int i = 0; i < adapt_results->match.size(); i++) {
ScoredClass next = adapt_results->match[i]; ScoredClass next = adapt_results->match[i];
UnicharRating rating(next.unichar_id, 1.0f - next.rating); UnicharRating rating(next.unichar_id, 1.0f - next.rating);
if (next.fontinfo_id >= 0) { if (next.fontinfo_id >= 0) {
@ -1449,7 +1435,7 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
// Return a pointer to the scored unichar in results, or NULL if not present. // Return a pointer to the scored unichar in results, or NULL if not present.
ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) { ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
for (int i = 0; i < results->NumMatches; i++) { for (int i = 0; i < results->match.size(); i++) {
if (results->match[i].unichar_id == id) if (results->match[i].unichar_id == id)
return &results->match[i]; return &results->match[i];
} }
@ -1516,7 +1502,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
} }
float best_certainty = -MAX_FLOAT32; float best_certainty = -MAX_FLOAT32;
for (int i = 0; i < Results->NumMatches; i++) { for (int i = 0; i < Results->match.size(); i++) {
ScoredClass next = Results->match[i]; ScoredClass next = Results->match[i];
int fontinfo_id = next.fontinfo_id; int fontinfo_id = next.fontinfo_id;
int fontinfo_id2 = next.fontinfo_id2; int fontinfo_id2 = next.fontinfo_id2;
@ -1564,7 +1550,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
choices_length++; choices_length++;
if (choices_length >= max_matches) break; if (choices_length >= max_matches) break;
} }
Results->NumMatches = choices_length; Results->match.truncate(choices_length);
} // ConvertMatchesToChoices } // ConvertMatchesToChoices
@ -1583,7 +1569,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
void Classify::DebugAdaptiveClassifier(TBLOB *blob, void Classify::DebugAdaptiveClassifier(TBLOB *blob,
ADAPT_RESULTS *Results) { ADAPT_RESULTS *Results) {
if (static_classifier_ == NULL) return; if (static_classifier_ == NULL) return;
for (int i = 0; i < Results->NumMatches; i++) { for (int i = 0; i < Results->match.size(); i++) {
if (i == 0 || Results->match[i].rating < Results->best_match.rating) if (i == 0 || Results->match[i].rating < Results->best_match.rating)
Results->best_match = Results->match[i]; Results->best_match = Results->match[i];
} }
@ -1636,10 +1622,9 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
} else { } else {
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info, Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
AdaptedTemplates, Results); AdaptedTemplates, Results);
if ((Results->NumMatches > 0 && if ((!Results->match.empty() && MarginalMatch(Results->best_match.rating) &&
MarginalMatch (Results->best_match.rating) &&
!tess_bn_matching) || !tess_bn_matching) ||
Results->NumMatches == 0) { Results->match.empty()) {
CharNormClassifier(Blob, *sample, Results); CharNormClassifier(Blob, *sample, Results);
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) { } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
AmbigClassifier(bl_features, fx_info, Blob, AmbigClassifier(bl_features, fx_info, Blob,
@ -1654,7 +1639,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
// if the results contain only fragments. // if the results contain only fragments.
// TODO(daria): verify that this is better than // TODO(daria): verify that this is better than
// just adding a NULL classification. // just adding a NULL classification.
if (!Results->HasNonfragment || Results->NumMatches == 0) if (!Results->HasNonfragment || Results->match.empty())
ClassifyAsNoise(Results); ClassifyAsNoise(Results);
delete sample; delete sample;
} /* DoAdaptiveMatch */ } /* DoAdaptiveMatch */
@ -1696,17 +1681,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
CharNormClassifier(Blob, *sample, Results); CharNormClassifier(Blob, *sample, Results);
delete sample; delete sample;
RemoveBadMatches(Results); RemoveBadMatches(Results);
qsort((void *)Results->match, Results->NumMatches, Results->match.sort(CompareByRating);
sizeof(ScoredClass), CompareByRating);
/* copy the class id's into an string of ambiguities - don't copy if /* copy the class id's into an string of ambiguities - don't copy if
the correct class is the only class id matched */ the correct class is the only class id matched */
Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) * Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
(Results->NumMatches + 1)); if (Results->match.size() > 1 ||
if (Results->NumMatches > 1 || (Results->match.size() == 1 &&
(Results->NumMatches == 1 &&
Results->match[0].unichar_id != CorrectClass)) { Results->match[0].unichar_id != CorrectClass)) {
for (i = 0; i < Results->NumMatches; i++) for (i = 0; i < Results->match.size(); i++)
Ambiguities[i] = Results->match[i].unichar_id; Ambiguities[i] = Results->match[i].unichar_id;
Ambiguities[i] = -1; Ambiguities[i] = -1;
} else { } else {
@ -1721,7 +1704,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
// present in the classifier templates. // present in the classifier templates.
bool Classify::LooksLikeGarbage(TBLOB *blob) { bool Classify::LooksLikeGarbage(TBLOB *blob) {
BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
AdaptiveClassifier(blob, ratings, NULL); AdaptiveClassifier(blob, ratings);
BLOB_CHOICE_IT ratings_it(ratings); BLOB_CHOICE_IT ratings_it(ratings);
const UNICHARSET &unicharset = getDict().getUnicharset(); const UNICHARSET &unicharset = getDict().getUnicharset();
if (classify_debug_character_fragments) { if (classify_debug_character_fragments) {
@ -2119,7 +2102,7 @@ namespace tesseract {
* @note History: Mon Mar 18 09:24:53 1991, DSJ, Created. * @note History: Mon Mar 18 09:24:53 1991, DSJ, Created.
*/ */
void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) { void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
for (int i = 0; i < Results->NumMatches; ++i) { for (int i = 0; i < Results->match.size(); ++i) {
tprintf("%s(%d), shape %d, %.2f ", tprintf("%s(%d), shape %d, %.2f ",
unicharset.debug_str(Results->match[i].unichar_id).string(), unicharset.debug_str(Results->match[i].unichar_id).string(),
Results->match[i].unichar_id, Results->match[i].shape_id, Results->match[i].unichar_id, Results->match[i].shape_id,
@ -2158,7 +2141,7 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one); ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one);
ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero); ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero);
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
if (Results->match[Next].rating <= BadMatchThreshold) { if (Results->match[Next].rating <= BadMatchThreshold) {
ScoredClass match = Results->match[Next]; ScoredClass match = Results->match[Next];
if (!unicharset.get_isalpha(match.unichar_id) || if (!unicharset.get_isalpha(match.unichar_id) ||
@ -2179,12 +2162,12 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
} }
} }
} else { } else {
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
if (Results->match[Next].rating <= BadMatchThreshold) if (Results->match[Next].rating <= BadMatchThreshold)
Results->match[NextGood++] = Results->match[Next]; Results->match[NextGood++] = Results->match[Next];
} }
} }
Results->NumMatches = NextGood; Results->match.truncate(NextGood);
} /* RemoveBadMatches */ } /* RemoveBadMatches */
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
@ -2207,7 +2190,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
punc_count = 0; punc_count = 0;
digit_count = 0; digit_count = 0;
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) { for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
ScoredClass match = Results->match[Next]; ScoredClass match = Results->match[Next];
if (strstr(punc_chars, if (strstr(punc_chars,
unicharset.id_to_unichar(match.unichar_id)) != NULL) { unicharset.id_to_unichar(match.unichar_id)) != NULL) {
@ -2225,7 +2208,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
} }
} }
} }
Results->NumMatches = NextGood; Results->match.truncate(NextGood);
} /* RemoveExtraPuncs */ } /* RemoveExtraPuncs */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/

View File

@ -102,7 +102,7 @@ class Classify : public CCStruct {
const INT_FEATURE_STRUCT* features, const INT_FEATURE_STRUCT* features,
const uinT8* normalization_factors, const uinT8* normalization_factors,
const uinT16* expected_num_features, const uinT16* expected_num_features,
CP_RESULT_STRUCT* results); GenericVector<CP_RESULT_STRUCT>* results);
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
CLASS_CUTOFF_ARRAY Cutoffs); CLASS_CUTOFF_ARRAY Cutoffs);
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
@ -114,7 +114,6 @@ class Classify : public CCStruct {
void FreeNormProtos(); void FreeNormProtos();
NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset); NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
/* protos.cpp ***************************************************************/ /* protos.cpp ***************************************************************/
void ReadClassFile();
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class); void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
const UNICHARSET& target_unicharset); const UNICHARSET& target_unicharset);
@ -158,10 +157,9 @@ class Classify : public CCStruct {
const uinT8* norm_factors, const uinT8* norm_factors,
ADAPT_CLASS* classes, ADAPT_CLASS* classes,
int debug, int debug,
int num_classes,
int matcher_multiplier, int matcher_multiplier,
const TBOX& blob_box, const TBOX& blob_box,
CLASS_PRUNER_RESULTS results, const GenericVector<CP_RESULT_STRUCT>& results,
ADAPT_RESULTS* final_results); ADAPT_RESULTS* final_results);
// Converts configs to fonts, and if the result is not adapted, and a // Converts configs to fonts, and if the result is not adapted, and a
// shape_table_ is present, the shape is expanded to include all // shape_table_ is present, the shape is expanded to include all
@ -271,9 +269,7 @@ class Classify : public CCStruct {
void EndAdaptiveClassifier(); void EndAdaptiveClassifier();
void SettupPass1(); void SettupPass1();
void SettupPass2(); void SettupPass2();
void AdaptiveClassifier(TBLOB *Blob, void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
BLOB_CHOICE_LIST *Choices,
CLASS_PRUNER_RESULTS cp_results);
void ClassifyAsNoise(ADAPT_RESULTS *Results); void ClassifyAsNoise(ADAPT_RESULTS *Results);
void ResetAdaptiveClassifierInternal(); void ResetAdaptiveClassifierInternal();

View File

@ -357,10 +357,12 @@ class ClassPruner {
// Copies the pruned, sorted classes into the output results and returns // Copies the pruned, sorted classes into the output results and returns
// the number of classes. // the number of classes.
int SetupResults(CP_RESULT_STRUCT* results) const { int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
CP_RESULT_STRUCT empty;
results->init_to_size(num_classes_, empty);
for (int c = 0; c < num_classes_; ++c) { for (int c = 0; c < num_classes_; ++c) {
results[c].Class = sort_index_[num_classes_ - c]; (*results)[c].Class = sort_index_[num_classes_ - c];
results[c].Rating = 1.0 - sort_key_[num_classes_ - c] / (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
(static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_); (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
} }
return num_classes_; return num_classes_;
@ -408,7 +410,7 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
const INT_FEATURE_STRUCT* features, const INT_FEATURE_STRUCT* features,
const uinT8* normalization_factors, const uinT8* normalization_factors,
const uinT16* expected_num_features, const uinT16* expected_num_features,
CP_RESULT_STRUCT* results) { GenericVector<CP_RESULT_STRUCT>* results) {
/* /*
** Operation: ** Operation:
** Prunes the classes using a modified fast match table. ** Prunes the classes using a modified fast match table.

View File

@ -54,8 +54,6 @@ struct CP_RESULT_STRUCT {
CLASS_ID Class; CLASS_ID Class;
}; };
typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES];
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Variables Variables
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/

View File

@ -281,200 +281,3 @@ void PrintProtos(CLASS_TYPE Class) {
new_line(); new_line();
} }
} }
namespace tesseract {
/**
* @name ReadClassFile
*
* Read in the training data from a file. All of the classes are read
* in. The results are stored in the global variable, 'TrainingData'.
*/
void Classify::ReadClassFile() {
FILE *File;
char TextLine[CHARS_PER_LINE];
char unichar[CHARS_PER_LINE];
cprintf ("Reading training data from '%s' ...",
static_cast<STRING>(classify_training_file).string());
fflush(stdout);
File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
sscanf(TextLine, "%s", unichar);
ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
fgets(TextLine, CHARS_PER_LINE, File);
fgets(TextLine, CHARS_PER_LINE, File);
}
fclose(File);
new_line();
}
} // namespace tesseract
/**
* ReadClassFromFile
*
* Read in a class description (protos and configs) from a file. Update
* the class structure record.
*/
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
CLASS_TYPE Class;
Class = &TrainingData[unichar_id];
ReadProtos(File, Class);
ReadConfigs(File, Class);
}
/**
* ReadConfigs
*
* Read the prototype configurations for this class from a file. Read
* the requested number of lines.
*/
void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
inT16 Cid;
register inT16 Wid;
register BIT_VECTOR ThisConfig;
int NumWords;
int NumConfigs;
fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
Class->NumConfigs = NumConfigs;
Class->MaxNumConfigs = NumConfigs;
Class->Configurations =
(CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
NumWords = WordsInVectorOfSize (Class->NumProtos);
for (Cid = 0; Cid < NumConfigs; Cid++) {
ThisConfig = NewBitVector (Class->NumProtos);
for (Wid = 0; Wid < NumWords; Wid++)
fscanf (File, "%x", &ThisConfig[Wid]);
Class->Configurations[Cid] = ThisConfig;
}
}
/**
* ReadProtos
*
* Read in all the prototype information from a file. Read the number
* of lines requested.
*/
void ReadProtos(register FILE *File, CLASS_TYPE Class) {
register inT16 Pid;
register PROTO Proto;
int NumProtos;
fscanf (File, "%d\n", &NumProtos);
Class->NumProtos = NumProtos;
Class->MaxNumProtos = NumProtos;
Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
for (Pid = 0; Pid < NumProtos; Pid++) {
Proto = ProtoIn (Class, Pid);
fscanf (File, "%f %f %f %f %f %f %f\n",
&Proto->X,
&Proto->Y,
&Proto->Length,
&Proto->Angle,
&Proto->A,
&Proto->B, &Proto->C);
}
}
/**
* @name SplitProto
*
* Add a new proto to this class. Malloc new space and copy the
* old protos if necessary. Return the proto id for the new proto.
* Update all configurations so that each config which contained the
* specified old proto will also contain the new proto. The caller
* is responsible for actually filling in the appropriate proto params.
*/
int SplitProto(CLASS_TYPE Class, int OldPid) {
int i;
int NewPid;
BIT_VECTOR Config;
NewPid = AddProtoToClass (Class);
for (i = 0; i < Class->NumConfigs; i++) {
Config = Class->Configurations[i];
if (test_bit (Config, OldPid))
SET_BIT(Config, NewPid);
}
return (NewPid);
}
/**
* @deprecated
* @name WriteOldConfigFile
*
* Write the configs in the given class to the specified file in the
* old config format.
*
* @param File The file to write to
* @param Class The class to write
*/
void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
int Cid, Pid;
BIT_VECTOR Config;
fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
fprintf (File, "1 ");
Config = Class->Configurations[Cid];
for (Pid = 0; Pid < Class->NumProtos; Pid++) {
if (test_bit (Config, Pid))
fprintf (File, "1");
else
fprintf (File, "0");
}
fprintf (File, "\n");
}
}
/**
* @deprecated
* @name WriteOldProtoFile
*
* Write the protos in the given class to the specified file in the
* old proto format.
*
* @param File The file to write to
* @param Class The class to write
*/
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
int Pid;
PROTO Proto;
/* print old header */
fprintf (File, "6\n");
fprintf (File, "linear essential -0.500000 0.500000\n");
fprintf (File, "linear essential -0.250000 0.750000\n");
fprintf (File, "linear essential 0.000000 1.000000\n");
fprintf (File, "circular essential 0.000000 1.000000\n");
fprintf (File, "linear non-essential -0.500000 0.500000\n");
fprintf (File, "linear non-essential -0.500000 0.500000\n");
for (Pid = 0; Pid < Class->NumProtos; Pid++) {
Proto = ProtoIn (Class, Pid);
fprintf (File, "significant elliptical 1\n");
fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
Proto->X, Proto->Y,
Proto->Length, Proto->Angle, 0.0, 0.0);
fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
}
}

View File

@ -176,16 +176,4 @@ CLASS_TYPE NewClass(int NumProtos, int NumConfigs);
void PrintProtos(CLASS_TYPE Class); void PrintProtos(CLASS_TYPE Class);
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id);
void ReadConfigs(register FILE *File, CLASS_TYPE Class);
void ReadProtos(register FILE *File, CLASS_TYPE Class);
int SplitProto(CLASS_TYPE Class, int OldPid);
void WriteOldConfigFile(FILE *File, CLASS_TYPE Class);
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class);
#endif #endif