mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-08 02:12:40 +08:00
Fixed slow-down that was caused by upping MAX_NUM_CLASSES
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1013 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
cd15c5e2c2
commit
1a487252f4
@ -32,14 +32,14 @@
|
|||||||
#elif MINGW
|
#elif MINGW
|
||||||
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
|
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
|
||||||
#undef __STRICT_ANSI__
|
#undef __STRICT_ANSI__
|
||||||
#endif // _MSC_VER
|
#endif // _MSC_VER
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#else
|
#else
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
|
||||||
#if !defined(VERSION)
|
#if !defined(VERSION)
|
||||||
#include "version.h"
|
#include "version.h"
|
||||||
@ -2288,7 +2288,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
|
|||||||
|
|
||||||
// Classify to get a raw choice.
|
// Classify to get a raw choice.
|
||||||
BLOB_CHOICE_LIST choices;
|
BLOB_CHOICE_LIST choices;
|
||||||
tesseract_->AdaptiveClassifier(blob, &choices, NULL);
|
tesseract_->AdaptiveClassifier(blob, &choices);
|
||||||
BLOB_CHOICE_IT choice_it;
|
BLOB_CHOICE_IT choice_it;
|
||||||
choice_it.set_to_list(&choices);
|
choice_it.set_to_list(&choices);
|
||||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||||
@ -2520,7 +2520,7 @@ void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
|
|||||||
float* ratings,
|
float* ratings,
|
||||||
int* num_matches_returned) {
|
int* num_matches_returned) {
|
||||||
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
|
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
|
||||||
tesseract_->AdaptiveClassifier(blob, choices, NULL);
|
tesseract_->AdaptiveClassifier(blob, choices);
|
||||||
BLOB_CHOICE_IT choices_it(choices);
|
BLOB_CHOICE_IT choices_it(choices);
|
||||||
int& index = *num_matches_returned;
|
int& index = *num_matches_returned;
|
||||||
index = 0;
|
index = 0;
|
||||||
|
@ -186,8 +186,8 @@ void EquationDetect::IdentifySpecialText(
|
|||||||
normed_blob->Normalize(NULL, NULL, NULL, x_orig, y_orig, scaling, scaling,
|
normed_blob->Normalize(NULL, NULL, NULL, x_orig, y_orig, scaling, scaling,
|
||||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||||
false, NULL);
|
false, NULL);
|
||||||
equ_tesseract_->AdaptiveClassifier(normed_blob, &ratings_equ, NULL);
|
equ_tesseract_->AdaptiveClassifier(normed_blob, &ratings_equ);
|
||||||
lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang, NULL);
|
lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang);
|
||||||
delete normed_blob;
|
delete normed_blob;
|
||||||
delete tblob;
|
delete tblob;
|
||||||
|
|
||||||
|
@ -352,7 +352,7 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
|||||||
x_origin, y_origin, scaling, scaling,
|
x_origin, y_origin, scaling, scaling,
|
||||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||||
false, NULL);
|
false, NULL);
|
||||||
tess->AdaptiveClassifier(rotated_blob, ratings + i, NULL);
|
tess->AdaptiveClassifier(rotated_blob, ratings + i);
|
||||||
delete rotated_blob;
|
delete rotated_blob;
|
||||||
current_rotation.rotate(rotation90);
|
current_rotation.rotate(rotation90);
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,7 @@ void FreeTempProto(void *arg) {
|
|||||||
|
|
||||||
void FreePermConfig(PERM_CONFIG Config) {
|
void FreePermConfig(PERM_CONFIG Config) {
|
||||||
assert(Config != NULL);
|
assert(Config != NULL);
|
||||||
Efree(Config->Ambigs);
|
delete [] Config->Ambigs;
|
||||||
free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
|
free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -406,7 +406,7 @@ PERM_CONFIG ReadPermConfig(FILE *File) {
|
|||||||
"PERM_CONFIG_STRUCT");
|
"PERM_CONFIG_STRUCT");
|
||||||
uinT8 NumAmbigs;
|
uinT8 NumAmbigs;
|
||||||
fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
|
fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
|
||||||
Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1));
|
Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
|
||||||
fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
|
fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
|
||||||
Config->Ambigs[NumAmbigs] = -1;
|
Config->Ambigs[NumAmbigs] = -1;
|
||||||
fread(&(Config->FontinfoId), sizeof(int), 1, File);
|
fread(&(Config->FontinfoId), sizeof(int), 1, File);
|
||||||
|
@ -87,17 +87,15 @@ struct ScoredClass {
|
|||||||
|
|
||||||
struct ADAPT_RESULTS {
|
struct ADAPT_RESULTS {
|
||||||
inT32 BlobLength;
|
inT32 BlobLength;
|
||||||
int NumMatches;
|
|
||||||
bool HasNonfragment;
|
bool HasNonfragment;
|
||||||
ScoredClass match[MAX_NUM_CLASSES];
|
GenericVector<ScoredClass> match;
|
||||||
ScoredClass best_match;
|
ScoredClass best_match;
|
||||||
CLASS_PRUNER_RESULTS CPResults;
|
GenericVector<CP_RESULT_STRUCT> CPResults;
|
||||||
|
|
||||||
/// Initializes data members to the default values. Sets the initial
|
/// Initializes data members to the default values. Sets the initial
|
||||||
/// rating of each class to be the worst possible rating (1.0).
|
/// rating of each class to be the worst possible rating (1.0).
|
||||||
inline void Initialize() {
|
inline void Initialize() {
|
||||||
BlobLength = MAX_INT32;
|
BlobLength = MAX_INT32;
|
||||||
NumMatches = 0;
|
|
||||||
HasNonfragment = false;
|
HasNonfragment = false;
|
||||||
best_match.unichar_id = NO_CLASS;
|
best_match.unichar_id = NO_CLASS;
|
||||||
best_match.shape_id = -1;
|
best_match.shape_id = -1;
|
||||||
@ -163,29 +161,22 @@ namespace tesseract {
|
|||||||
*
|
*
|
||||||
* @param Blob blob to be classified
|
* @param Blob blob to be classified
|
||||||
* @param[out] Choices List of choices found by adaptive matcher.
|
* @param[out] Choices List of choices found by adaptive matcher.
|
||||||
* @param[out] CPResults Array of CPResultStruct of size MAX_NUM_CLASSES is
|
|
||||||
* filled on return with the choices found by the
|
* filled on return with the choices found by the
|
||||||
* class pruner and the ratings therefrom. Also
|
* class pruner and the ratings therefrom. Also
|
||||||
* contains the detailed results of the integer matcher.
|
* contains the detailed results of the integer matcher.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void Classify::AdaptiveClassifier(TBLOB *Blob,
|
void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
|
||||||
BLOB_CHOICE_LIST *Choices,
|
|
||||||
CLASS_PRUNER_RESULTS CPResults) {
|
|
||||||
assert(Choices != NULL);
|
assert(Choices != NULL);
|
||||||
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
|
ADAPT_RESULTS *Results = new ADAPT_RESULTS;
|
||||||
Results->Initialize();
|
Results->Initialize();
|
||||||
|
|
||||||
ASSERT_HOST(AdaptedTemplates != NULL);
|
ASSERT_HOST(AdaptedTemplates != NULL);
|
||||||
|
|
||||||
DoAdaptiveMatch(Blob, Results);
|
DoAdaptiveMatch(Blob, Results);
|
||||||
if (CPResults != NULL)
|
|
||||||
memcpy(CPResults, Results->CPResults,
|
|
||||||
sizeof(CPResults[0]) * Results->NumMatches);
|
|
||||||
|
|
||||||
RemoveBadMatches(Results);
|
RemoveBadMatches(Results);
|
||||||
qsort((void *)Results->match, Results->NumMatches,
|
Results->match.sort(CompareByRating);
|
||||||
sizeof(ScoredClass), CompareByRating);
|
|
||||||
RemoveExtraPuncs(Results);
|
RemoveExtraPuncs(Results);
|
||||||
ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
|
ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
|
||||||
Choices);
|
Choices);
|
||||||
@ -1029,7 +1020,7 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
|
|||||||
if (old_match)
|
if (old_match)
|
||||||
old_match->rating = rating;
|
old_match->rating = rating;
|
||||||
else
|
else
|
||||||
results->match[results->NumMatches++] = match;
|
results->match.push_back(match);
|
||||||
|
|
||||||
if (rating < results->best_match.rating &&
|
if (rating < results->best_match.rating &&
|
||||||
// Ensure that fragments do not affect best rating, class and config.
|
// Ensure that fragments do not affect best rating, class and config.
|
||||||
@ -1111,14 +1102,13 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
|
|||||||
const uinT8* norm_factors,
|
const uinT8* norm_factors,
|
||||||
ADAPT_CLASS* classes,
|
ADAPT_CLASS* classes,
|
||||||
int debug,
|
int debug,
|
||||||
int num_classes,
|
|
||||||
int matcher_multiplier,
|
int matcher_multiplier,
|
||||||
const TBOX& blob_box,
|
const TBOX& blob_box,
|
||||||
CLASS_PRUNER_RESULTS results,
|
const GenericVector<CP_RESULT_STRUCT>& results,
|
||||||
ADAPT_RESULTS* final_results) {
|
ADAPT_RESULTS* final_results) {
|
||||||
int top = blob_box.top();
|
int top = blob_box.top();
|
||||||
int bottom = blob_box.bottom();
|
int bottom = blob_box.bottom();
|
||||||
for (int c = 0; c < num_classes; c++) {
|
for (int c = 0; c < results.size(); c++) {
|
||||||
CLASS_ID class_id = results[c].Class;
|
CLASS_ID class_id = results[c].Class;
|
||||||
INT_RESULT_STRUCT& int_result = results[c].IMResult;
|
INT_RESULT_STRUCT& int_result = results[c].IMResult;
|
||||||
BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
|
BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
|
||||||
@ -1279,21 +1269,19 @@ UNICHAR_ID *Classify::BaselineClassifier(
|
|||||||
const INT_FX_RESULT_STRUCT& fx_info,
|
const INT_FX_RESULT_STRUCT& fx_info,
|
||||||
ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
|
ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
|
||||||
if (int_features.empty()) return NULL;
|
if (int_features.empty()) return NULL;
|
||||||
int NumClasses;
|
|
||||||
uinT8* CharNormArray = new uinT8[unicharset.size()];
|
uinT8* CharNormArray = new uinT8[unicharset.size()];
|
||||||
ClearCharNormArray(CharNormArray);
|
ClearCharNormArray(CharNormArray);
|
||||||
|
|
||||||
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
|
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
|
||||||
NumClasses = PruneClasses(Templates->Templates, int_features.size(),
|
PruneClasses(Templates->Templates, int_features.size(), &int_features[0],
|
||||||
&int_features[0],
|
CharNormArray, BaselineCutoffs, &Results->CPResults);
|
||||||
CharNormArray, BaselineCutoffs, Results->CPResults);
|
|
||||||
|
|
||||||
if (matcher_debug_level >= 2 || classify_debug_level > 1)
|
if (matcher_debug_level >= 2 || classify_debug_level > 1)
|
||||||
cprintf ("BL Matches = ");
|
cprintf ("BL Matches = ");
|
||||||
|
|
||||||
MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
|
MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
|
||||||
CharNormArray,
|
CharNormArray,
|
||||||
Templates->Class, matcher_debug_flags, NumClasses, 0,
|
Templates->Class, matcher_debug_flags, 0,
|
||||||
Blob->bounding_box(), Results->CPResults, Results);
|
Blob->bounding_box(), Results->CPResults, Results);
|
||||||
|
|
||||||
delete [] CharNormArray;
|
delete [] CharNormArray;
|
||||||
@ -1375,20 +1363,18 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
|||||||
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
|
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
|
||||||
pruner_norm_array);
|
pruner_norm_array);
|
||||||
|
|
||||||
int num_classes = PruneClasses(PreTrainedTemplates, num_features,
|
PruneClasses(PreTrainedTemplates, num_features, sample.features(),
|
||||||
sample.features(),
|
pruner_norm_array,
|
||||||
pruner_norm_array,
|
shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
|
||||||
shape_table_ != NULL ? &shapetable_cutoffs_[0]
|
&adapt_results->CPResults);
|
||||||
: CharNormCutoffs,
|
|
||||||
adapt_results->CPResults);
|
|
||||||
delete [] pruner_norm_array;
|
delete [] pruner_norm_array;
|
||||||
if (keep_this >= 0) {
|
if (keep_this >= 0) {
|
||||||
num_classes = 1;
|
|
||||||
adapt_results->CPResults[0].Class = keep_this;
|
adapt_results->CPResults[0].Class = keep_this;
|
||||||
|
adapt_results->CPResults.truncate(1);
|
||||||
}
|
}
|
||||||
if (pruner_only) {
|
if (pruner_only) {
|
||||||
// Convert pruner results to output format.
|
// Convert pruner results to output format.
|
||||||
for (int i = 0; i < num_classes; ++i) {
|
for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
|
||||||
int class_id = adapt_results->CPResults[i].Class;
|
int class_id = adapt_results->CPResults[i].Class;
|
||||||
results->push_back(
|
results->push_back(
|
||||||
UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
|
UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
|
||||||
@ -1396,11 +1382,11 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
|||||||
} else {
|
} else {
|
||||||
MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
|
MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
|
||||||
char_norm_array,
|
char_norm_array,
|
||||||
NULL, matcher_debug_flags, num_classes,
|
NULL, matcher_debug_flags,
|
||||||
classify_integer_matcher_multiplier,
|
classify_integer_matcher_multiplier,
|
||||||
blob_box, adapt_results->CPResults, adapt_results);
|
blob_box, adapt_results->CPResults, adapt_results);
|
||||||
// Convert master matcher results to output format.
|
// Convert master matcher results to output format.
|
||||||
for (int i = 0; i < adapt_results->NumMatches; i++) {
|
for (int i = 0; i < adapt_results->match.size(); i++) {
|
||||||
ScoredClass next = adapt_results->match[i];
|
ScoredClass next = adapt_results->match[i];
|
||||||
UnicharRating rating(next.unichar_id, 1.0f - next.rating);
|
UnicharRating rating(next.unichar_id, 1.0f - next.rating);
|
||||||
if (next.fontinfo_id >= 0) {
|
if (next.fontinfo_id >= 0) {
|
||||||
@ -1449,7 +1435,7 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) {
|
|||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
// Return a pointer to the scored unichar in results, or NULL if not present.
|
// Return a pointer to the scored unichar in results, or NULL if not present.
|
||||||
ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
|
ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
|
||||||
for (int i = 0; i < results->NumMatches; i++) {
|
for (int i = 0; i < results->match.size(); i++) {
|
||||||
if (results->match[i].unichar_id == id)
|
if (results->match[i].unichar_id == id)
|
||||||
return &results->match[i];
|
return &results->match[i];
|
||||||
}
|
}
|
||||||
@ -1516,7 +1502,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
|||||||
}
|
}
|
||||||
|
|
||||||
float best_certainty = -MAX_FLOAT32;
|
float best_certainty = -MAX_FLOAT32;
|
||||||
for (int i = 0; i < Results->NumMatches; i++) {
|
for (int i = 0; i < Results->match.size(); i++) {
|
||||||
ScoredClass next = Results->match[i];
|
ScoredClass next = Results->match[i];
|
||||||
int fontinfo_id = next.fontinfo_id;
|
int fontinfo_id = next.fontinfo_id;
|
||||||
int fontinfo_id2 = next.fontinfo_id2;
|
int fontinfo_id2 = next.fontinfo_id2;
|
||||||
@ -1564,7 +1550,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
|||||||
choices_length++;
|
choices_length++;
|
||||||
if (choices_length >= max_matches) break;
|
if (choices_length >= max_matches) break;
|
||||||
}
|
}
|
||||||
Results->NumMatches = choices_length;
|
Results->match.truncate(choices_length);
|
||||||
} // ConvertMatchesToChoices
|
} // ConvertMatchesToChoices
|
||||||
|
|
||||||
|
|
||||||
@ -1583,7 +1569,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
|||||||
void Classify::DebugAdaptiveClassifier(TBLOB *blob,
|
void Classify::DebugAdaptiveClassifier(TBLOB *blob,
|
||||||
ADAPT_RESULTS *Results) {
|
ADAPT_RESULTS *Results) {
|
||||||
if (static_classifier_ == NULL) return;
|
if (static_classifier_ == NULL) return;
|
||||||
for (int i = 0; i < Results->NumMatches; i++) {
|
for (int i = 0; i < Results->match.size(); i++) {
|
||||||
if (i == 0 || Results->match[i].rating < Results->best_match.rating)
|
if (i == 0 || Results->match[i].rating < Results->best_match.rating)
|
||||||
Results->best_match = Results->match[i];
|
Results->best_match = Results->match[i];
|
||||||
}
|
}
|
||||||
@ -1636,10 +1622,9 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
|
|||||||
} else {
|
} else {
|
||||||
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
|
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
|
||||||
AdaptedTemplates, Results);
|
AdaptedTemplates, Results);
|
||||||
if ((Results->NumMatches > 0 &&
|
if ((!Results->match.empty() && MarginalMatch(Results->best_match.rating) &&
|
||||||
MarginalMatch (Results->best_match.rating) &&
|
|
||||||
!tess_bn_matching) ||
|
!tess_bn_matching) ||
|
||||||
Results->NumMatches == 0) {
|
Results->match.empty()) {
|
||||||
CharNormClassifier(Blob, *sample, Results);
|
CharNormClassifier(Blob, *sample, Results);
|
||||||
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
|
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
|
||||||
AmbigClassifier(bl_features, fx_info, Blob,
|
AmbigClassifier(bl_features, fx_info, Blob,
|
||||||
@ -1654,7 +1639,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
|
|||||||
// if the results contain only fragments.
|
// if the results contain only fragments.
|
||||||
// TODO(daria): verify that this is better than
|
// TODO(daria): verify that this is better than
|
||||||
// just adding a NULL classification.
|
// just adding a NULL classification.
|
||||||
if (!Results->HasNonfragment || Results->NumMatches == 0)
|
if (!Results->HasNonfragment || Results->match.empty())
|
||||||
ClassifyAsNoise(Results);
|
ClassifyAsNoise(Results);
|
||||||
delete sample;
|
delete sample;
|
||||||
} /* DoAdaptiveMatch */
|
} /* DoAdaptiveMatch */
|
||||||
@ -1696,17 +1681,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
|
|||||||
CharNormClassifier(Blob, *sample, Results);
|
CharNormClassifier(Blob, *sample, Results);
|
||||||
delete sample;
|
delete sample;
|
||||||
RemoveBadMatches(Results);
|
RemoveBadMatches(Results);
|
||||||
qsort((void *)Results->match, Results->NumMatches,
|
Results->match.sort(CompareByRating);
|
||||||
sizeof(ScoredClass), CompareByRating);
|
|
||||||
|
|
||||||
/* copy the class id's into an string of ambiguities - don't copy if
|
/* copy the class id's into an string of ambiguities - don't copy if
|
||||||
the correct class is the only class id matched */
|
the correct class is the only class id matched */
|
||||||
Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
|
Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
|
||||||
(Results->NumMatches + 1));
|
if (Results->match.size() > 1 ||
|
||||||
if (Results->NumMatches > 1 ||
|
(Results->match.size() == 1 &&
|
||||||
(Results->NumMatches == 1 &&
|
|
||||||
Results->match[0].unichar_id != CorrectClass)) {
|
Results->match[0].unichar_id != CorrectClass)) {
|
||||||
for (i = 0; i < Results->NumMatches; i++)
|
for (i = 0; i < Results->match.size(); i++)
|
||||||
Ambiguities[i] = Results->match[i].unichar_id;
|
Ambiguities[i] = Results->match[i].unichar_id;
|
||||||
Ambiguities[i] = -1;
|
Ambiguities[i] = -1;
|
||||||
} else {
|
} else {
|
||||||
@ -1721,7 +1704,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
|
|||||||
// present in the classifier templates.
|
// present in the classifier templates.
|
||||||
bool Classify::LooksLikeGarbage(TBLOB *blob) {
|
bool Classify::LooksLikeGarbage(TBLOB *blob) {
|
||||||
BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
|
BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
|
||||||
AdaptiveClassifier(blob, ratings, NULL);
|
AdaptiveClassifier(blob, ratings);
|
||||||
BLOB_CHOICE_IT ratings_it(ratings);
|
BLOB_CHOICE_IT ratings_it(ratings);
|
||||||
const UNICHARSET &unicharset = getDict().getUnicharset();
|
const UNICHARSET &unicharset = getDict().getUnicharset();
|
||||||
if (classify_debug_character_fragments) {
|
if (classify_debug_character_fragments) {
|
||||||
@ -2119,7 +2102,7 @@ namespace tesseract {
|
|||||||
* @note History: Mon Mar 18 09:24:53 1991, DSJ, Created.
|
* @note History: Mon Mar 18 09:24:53 1991, DSJ, Created.
|
||||||
*/
|
*/
|
||||||
void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
|
void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
|
||||||
for (int i = 0; i < Results->NumMatches; ++i) {
|
for (int i = 0; i < Results->match.size(); ++i) {
|
||||||
tprintf("%s(%d), shape %d, %.2f ",
|
tprintf("%s(%d), shape %d, %.2f ",
|
||||||
unicharset.debug_str(Results->match[i].unichar_id).string(),
|
unicharset.debug_str(Results->match[i].unichar_id).string(),
|
||||||
Results->match[i].unichar_id, Results->match[i].shape_id,
|
Results->match[i].unichar_id, Results->match[i].shape_id,
|
||||||
@ -2158,7 +2141,7 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
|||||||
ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one);
|
ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one);
|
||||||
ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero);
|
ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero);
|
||||||
|
|
||||||
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
|
||||||
if (Results->match[Next].rating <= BadMatchThreshold) {
|
if (Results->match[Next].rating <= BadMatchThreshold) {
|
||||||
ScoredClass match = Results->match[Next];
|
ScoredClass match = Results->match[Next];
|
||||||
if (!unicharset.get_isalpha(match.unichar_id) ||
|
if (!unicharset.get_isalpha(match.unichar_id) ||
|
||||||
@ -2179,12 +2162,12 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
|
||||||
if (Results->match[Next].rating <= BadMatchThreshold)
|
if (Results->match[Next].rating <= BadMatchThreshold)
|
||||||
Results->match[NextGood++] = Results->match[Next];
|
Results->match[NextGood++] = Results->match[Next];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Results->NumMatches = NextGood;
|
Results->match.truncate(NextGood);
|
||||||
} /* RemoveBadMatches */
|
} /* RemoveBadMatches */
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
@ -2207,7 +2190,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
|||||||
|
|
||||||
punc_count = 0;
|
punc_count = 0;
|
||||||
digit_count = 0;
|
digit_count = 0;
|
||||||
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
|
||||||
ScoredClass match = Results->match[Next];
|
ScoredClass match = Results->match[Next];
|
||||||
if (strstr(punc_chars,
|
if (strstr(punc_chars,
|
||||||
unicharset.id_to_unichar(match.unichar_id)) != NULL) {
|
unicharset.id_to_unichar(match.unichar_id)) != NULL) {
|
||||||
@ -2225,7 +2208,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Results->NumMatches = NextGood;
|
Results->match.truncate(NextGood);
|
||||||
} /* RemoveExtraPuncs */
|
} /* RemoveExtraPuncs */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
@ -102,7 +102,7 @@ class Classify : public CCStruct {
|
|||||||
const INT_FEATURE_STRUCT* features,
|
const INT_FEATURE_STRUCT* features,
|
||||||
const uinT8* normalization_factors,
|
const uinT8* normalization_factors,
|
||||||
const uinT16* expected_num_features,
|
const uinT16* expected_num_features,
|
||||||
CP_RESULT_STRUCT* results);
|
GenericVector<CP_RESULT_STRUCT>* results);
|
||||||
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
|
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
|
||||||
CLASS_CUTOFF_ARRAY Cutoffs);
|
CLASS_CUTOFF_ARRAY Cutoffs);
|
||||||
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
|
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
|
||||||
@ -114,7 +114,6 @@ class Classify : public CCStruct {
|
|||||||
void FreeNormProtos();
|
void FreeNormProtos();
|
||||||
NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
|
NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
|
||||||
/* protos.cpp ***************************************************************/
|
/* protos.cpp ***************************************************************/
|
||||||
void ReadClassFile();
|
|
||||||
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
|
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
|
||||||
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
|
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
|
||||||
const UNICHARSET& target_unicharset);
|
const UNICHARSET& target_unicharset);
|
||||||
@ -158,10 +157,9 @@ class Classify : public CCStruct {
|
|||||||
const uinT8* norm_factors,
|
const uinT8* norm_factors,
|
||||||
ADAPT_CLASS* classes,
|
ADAPT_CLASS* classes,
|
||||||
int debug,
|
int debug,
|
||||||
int num_classes,
|
|
||||||
int matcher_multiplier,
|
int matcher_multiplier,
|
||||||
const TBOX& blob_box,
|
const TBOX& blob_box,
|
||||||
CLASS_PRUNER_RESULTS results,
|
const GenericVector<CP_RESULT_STRUCT>& results,
|
||||||
ADAPT_RESULTS* final_results);
|
ADAPT_RESULTS* final_results);
|
||||||
// Converts configs to fonts, and if the result is not adapted, and a
|
// Converts configs to fonts, and if the result is not adapted, and a
|
||||||
// shape_table_ is present, the shape is expanded to include all
|
// shape_table_ is present, the shape is expanded to include all
|
||||||
@ -271,9 +269,7 @@ class Classify : public CCStruct {
|
|||||||
void EndAdaptiveClassifier();
|
void EndAdaptiveClassifier();
|
||||||
void SettupPass1();
|
void SettupPass1();
|
||||||
void SettupPass2();
|
void SettupPass2();
|
||||||
void AdaptiveClassifier(TBLOB *Blob,
|
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
|
||||||
BLOB_CHOICE_LIST *Choices,
|
|
||||||
CLASS_PRUNER_RESULTS cp_results);
|
|
||||||
void ClassifyAsNoise(ADAPT_RESULTS *Results);
|
void ClassifyAsNoise(ADAPT_RESULTS *Results);
|
||||||
void ResetAdaptiveClassifierInternal();
|
void ResetAdaptiveClassifierInternal();
|
||||||
|
|
||||||
|
@ -357,10 +357,12 @@ class ClassPruner {
|
|||||||
|
|
||||||
// Copies the pruned, sorted classes into the output results and returns
|
// Copies the pruned, sorted classes into the output results and returns
|
||||||
// the number of classes.
|
// the number of classes.
|
||||||
int SetupResults(CP_RESULT_STRUCT* results) const {
|
int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
|
||||||
|
CP_RESULT_STRUCT empty;
|
||||||
|
results->init_to_size(num_classes_, empty);
|
||||||
for (int c = 0; c < num_classes_; ++c) {
|
for (int c = 0; c < num_classes_; ++c) {
|
||||||
results[c].Class = sort_index_[num_classes_ - c];
|
(*results)[c].Class = sort_index_[num_classes_ - c];
|
||||||
results[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
|
(*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
|
||||||
(static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
|
(static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
|
||||||
}
|
}
|
||||||
return num_classes_;
|
return num_classes_;
|
||||||
@ -408,7 +410,7 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
|||||||
const INT_FEATURE_STRUCT* features,
|
const INT_FEATURE_STRUCT* features,
|
||||||
const uinT8* normalization_factors,
|
const uinT8* normalization_factors,
|
||||||
const uinT16* expected_num_features,
|
const uinT16* expected_num_features,
|
||||||
CP_RESULT_STRUCT* results) {
|
GenericVector<CP_RESULT_STRUCT>* results) {
|
||||||
/*
|
/*
|
||||||
** Operation:
|
** Operation:
|
||||||
** Prunes the classes using a modified fast match table.
|
** Prunes the classes using a modified fast match table.
|
||||||
|
@ -54,8 +54,6 @@ struct CP_RESULT_STRUCT {
|
|||||||
CLASS_ID Class;
|
CLASS_ID Class;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES];
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
Variables
|
Variables
|
||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
|
@ -281,200 +281,3 @@ void PrintProtos(CLASS_TYPE Class) {
|
|||||||
new_line();
|
new_line();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace tesseract {
|
|
||||||
/**
|
|
||||||
* @name ReadClassFile
|
|
||||||
*
|
|
||||||
* Read in the training data from a file. All of the classes are read
|
|
||||||
* in. The results are stored in the global variable, 'TrainingData'.
|
|
||||||
*/
|
|
||||||
void Classify::ReadClassFile() {
|
|
||||||
FILE *File;
|
|
||||||
char TextLine[CHARS_PER_LINE];
|
|
||||||
char unichar[CHARS_PER_LINE];
|
|
||||||
|
|
||||||
cprintf ("Reading training data from '%s' ...",
|
|
||||||
static_cast<STRING>(classify_training_file).string());
|
|
||||||
fflush(stdout);
|
|
||||||
|
|
||||||
File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
|
|
||||||
while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
|
|
||||||
|
|
||||||
sscanf(TextLine, "%s", unichar);
|
|
||||||
ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
|
|
||||||
fgets(TextLine, CHARS_PER_LINE, File);
|
|
||||||
fgets(TextLine, CHARS_PER_LINE, File);
|
|
||||||
}
|
|
||||||
fclose(File);
|
|
||||||
new_line();
|
|
||||||
}
|
|
||||||
} // namespace tesseract
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ReadClassFromFile
|
|
||||||
*
|
|
||||||
* Read in a class description (protos and configs) from a file. Update
|
|
||||||
* the class structure record.
|
|
||||||
*/
|
|
||||||
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
|
|
||||||
CLASS_TYPE Class;
|
|
||||||
|
|
||||||
Class = &TrainingData[unichar_id];
|
|
||||||
|
|
||||||
ReadProtos(File, Class);
|
|
||||||
|
|
||||||
ReadConfigs(File, Class);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ReadConfigs
|
|
||||||
*
|
|
||||||
* Read the prototype configurations for this class from a file. Read
|
|
||||||
* the requested number of lines.
|
|
||||||
*/
|
|
||||||
void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
|
|
||||||
inT16 Cid;
|
|
||||||
register inT16 Wid;
|
|
||||||
register BIT_VECTOR ThisConfig;
|
|
||||||
int NumWords;
|
|
||||||
int NumConfigs;
|
|
||||||
|
|
||||||
fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
|
|
||||||
Class->NumConfigs = NumConfigs;
|
|
||||||
Class->MaxNumConfigs = NumConfigs;
|
|
||||||
Class->Configurations =
|
|
||||||
(CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
|
|
||||||
NumWords = WordsInVectorOfSize (Class->NumProtos);
|
|
||||||
|
|
||||||
for (Cid = 0; Cid < NumConfigs; Cid++) {
|
|
||||||
|
|
||||||
ThisConfig = NewBitVector (Class->NumProtos);
|
|
||||||
for (Wid = 0; Wid < NumWords; Wid++)
|
|
||||||
fscanf (File, "%x", &ThisConfig[Wid]);
|
|
||||||
Class->Configurations[Cid] = ThisConfig;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* ReadProtos
|
|
||||||
*
|
|
||||||
* Read in all the prototype information from a file. Read the number
|
|
||||||
* of lines requested.
|
|
||||||
*/
|
|
||||||
void ReadProtos(register FILE *File, CLASS_TYPE Class) {
|
|
||||||
register inT16 Pid;
|
|
||||||
register PROTO Proto;
|
|
||||||
int NumProtos;
|
|
||||||
|
|
||||||
fscanf (File, "%d\n", &NumProtos);
|
|
||||||
Class->NumProtos = NumProtos;
|
|
||||||
Class->MaxNumProtos = NumProtos;
|
|
||||||
Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
|
|
||||||
|
|
||||||
for (Pid = 0; Pid < NumProtos; Pid++) {
|
|
||||||
Proto = ProtoIn (Class, Pid);
|
|
||||||
fscanf (File, "%f %f %f %f %f %f %f\n",
|
|
||||||
&Proto->X,
|
|
||||||
&Proto->Y,
|
|
||||||
&Proto->Length,
|
|
||||||
&Proto->Angle,
|
|
||||||
&Proto->A,
|
|
||||||
&Proto->B, &Proto->C);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @name SplitProto
|
|
||||||
*
|
|
||||||
* Add a new proto to this class. Malloc new space and copy the
|
|
||||||
* old protos if necessary. Return the proto id for the new proto.
|
|
||||||
* Update all configurations so that each config which contained the
|
|
||||||
* specified old proto will also contain the new proto. The caller
|
|
||||||
* is responsible for actually filling in the appropriate proto params.
|
|
||||||
*/
|
|
||||||
int SplitProto(CLASS_TYPE Class, int OldPid) {
|
|
||||||
int i;
|
|
||||||
int NewPid;
|
|
||||||
BIT_VECTOR Config;
|
|
||||||
|
|
||||||
NewPid = AddProtoToClass (Class);
|
|
||||||
|
|
||||||
for (i = 0; i < Class->NumConfigs; i++) {
|
|
||||||
Config = Class->Configurations[i];
|
|
||||||
if (test_bit (Config, OldPid))
|
|
||||||
SET_BIT(Config, NewPid);
|
|
||||||
}
|
|
||||||
return (NewPid);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated
|
|
||||||
* @name WriteOldConfigFile
|
|
||||||
*
|
|
||||||
* Write the configs in the given class to the specified file in the
|
|
||||||
* old config format.
|
|
||||||
*
|
|
||||||
* @param File The file to write to
|
|
||||||
* @param Class The class to write
|
|
||||||
*/
|
|
||||||
void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
|
|
||||||
int Cid, Pid;
|
|
||||||
BIT_VECTOR Config;
|
|
||||||
|
|
||||||
fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
|
|
||||||
|
|
||||||
for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
|
|
||||||
fprintf (File, "1 ");
|
|
||||||
|
|
||||||
Config = Class->Configurations[Cid];
|
|
||||||
|
|
||||||
for (Pid = 0; Pid < Class->NumProtos; Pid++) {
|
|
||||||
if (test_bit (Config, Pid))
|
|
||||||
fprintf (File, "1");
|
|
||||||
else
|
|
||||||
fprintf (File, "0");
|
|
||||||
}
|
|
||||||
fprintf (File, "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated
|
|
||||||
* @name WriteOldProtoFile
|
|
||||||
*
|
|
||||||
* Write the protos in the given class to the specified file in the
|
|
||||||
* old proto format.
|
|
||||||
*
|
|
||||||
* @param File The file to write to
|
|
||||||
* @param Class The class to write
|
|
||||||
*/
|
|
||||||
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
|
|
||||||
int Pid;
|
|
||||||
PROTO Proto;
|
|
||||||
|
|
||||||
/* print old header */
|
|
||||||
fprintf (File, "6\n");
|
|
||||||
fprintf (File, "linear essential -0.500000 0.500000\n");
|
|
||||||
fprintf (File, "linear essential -0.250000 0.750000\n");
|
|
||||||
fprintf (File, "linear essential 0.000000 1.000000\n");
|
|
||||||
fprintf (File, "circular essential 0.000000 1.000000\n");
|
|
||||||
fprintf (File, "linear non-essential -0.500000 0.500000\n");
|
|
||||||
fprintf (File, "linear non-essential -0.500000 0.500000\n");
|
|
||||||
|
|
||||||
for (Pid = 0; Pid < Class->NumProtos; Pid++) {
|
|
||||||
Proto = ProtoIn (Class, Pid);
|
|
||||||
|
|
||||||
fprintf (File, "significant elliptical 1\n");
|
|
||||||
fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
|
|
||||||
Proto->X, Proto->Y,
|
|
||||||
Proto->Length, Proto->Angle, 0.0, 0.0);
|
|
||||||
fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
|
|
||||||
0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -176,16 +176,4 @@ CLASS_TYPE NewClass(int NumProtos, int NumConfigs);
|
|||||||
|
|
||||||
void PrintProtos(CLASS_TYPE Class);
|
void PrintProtos(CLASS_TYPE Class);
|
||||||
|
|
||||||
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id);
|
|
||||||
|
|
||||||
void ReadConfigs(register FILE *File, CLASS_TYPE Class);
|
|
||||||
|
|
||||||
void ReadProtos(register FILE *File, CLASS_TYPE Class);
|
|
||||||
|
|
||||||
int SplitProto(CLASS_TYPE Class, int OldPid);
|
|
||||||
|
|
||||||
void WriteOldConfigFile(FILE *File, CLASS_TYPE Class);
|
|
||||||
|
|
||||||
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user