mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 01:42:41 +08:00
Fixed slow-down that was caused by upping MAX_NUM_CLASSES
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1013 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
cd15c5e2c2
commit
1a487252f4
@ -31,15 +31,15 @@
|
||||
#include "mathfix.h"
|
||||
#elif MINGW
|
||||
// workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
|
||||
#undef __STRICT_ANSI__
|
||||
#endif // _MSC_VER
|
||||
#undef __STRICT_ANSI__
|
||||
#endif // _MSC_VER
|
||||
#include <stdlib.h>
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dirent.h>
|
||||
#include <libgen.h>
|
||||
#include <string.h>
|
||||
#endif // _WIN32
|
||||
#endif // _WIN32
|
||||
|
||||
#if !defined(VERSION)
|
||||
#include "version.h"
|
||||
@ -2288,7 +2288,7 @@ void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
|
||||
|
||||
// Classify to get a raw choice.
|
||||
BLOB_CHOICE_LIST choices;
|
||||
tesseract_->AdaptiveClassifier(blob, &choices, NULL);
|
||||
tesseract_->AdaptiveClassifier(blob, &choices);
|
||||
BLOB_CHOICE_IT choice_it;
|
||||
choice_it.set_to_list(&choices);
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||
@ -2520,7 +2520,7 @@ void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob,
|
||||
float* ratings,
|
||||
int* num_matches_returned) {
|
||||
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
|
||||
tesseract_->AdaptiveClassifier(blob, choices, NULL);
|
||||
tesseract_->AdaptiveClassifier(blob, choices);
|
||||
BLOB_CHOICE_IT choices_it(choices);
|
||||
int& index = *num_matches_returned;
|
||||
index = 0;
|
||||
|
@ -186,8 +186,8 @@ void EquationDetect::IdentifySpecialText(
|
||||
normed_blob->Normalize(NULL, NULL, NULL, x_orig, y_orig, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||
false, NULL);
|
||||
equ_tesseract_->AdaptiveClassifier(normed_blob, &ratings_equ, NULL);
|
||||
lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang, NULL);
|
||||
equ_tesseract_->AdaptiveClassifier(normed_blob, &ratings_equ);
|
||||
lang_tesseract_->AdaptiveClassifier(normed_blob, &ratings_lang);
|
||||
delete normed_blob;
|
||||
delete tblob;
|
||||
|
||||
|
@ -352,7 +352,7 @@ bool os_detect_blob(BLOBNBOX* bbox, OrientationDetector* o,
|
||||
x_origin, y_origin, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||
false, NULL);
|
||||
tess->AdaptiveClassifier(rotated_blob, ratings + i, NULL);
|
||||
tess->AdaptiveClassifier(rotated_blob, ratings + i);
|
||||
delete rotated_blob;
|
||||
current_rotation.rotate(rotation90);
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ void FreeTempProto(void *arg) {
|
||||
|
||||
void FreePermConfig(PERM_CONFIG Config) {
|
||||
assert(Config != NULL);
|
||||
Efree(Config->Ambigs);
|
||||
delete [] Config->Ambigs;
|
||||
free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
|
||||
}
|
||||
|
||||
@ -406,7 +406,7 @@ PERM_CONFIG ReadPermConfig(FILE *File) {
|
||||
"PERM_CONFIG_STRUCT");
|
||||
uinT8 NumAmbigs;
|
||||
fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
|
||||
Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1));
|
||||
Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
|
||||
fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
|
||||
Config->Ambigs[NumAmbigs] = -1;
|
||||
fread(&(Config->FontinfoId), sizeof(int), 1, File);
|
||||
|
@ -87,17 +87,15 @@ struct ScoredClass {
|
||||
|
||||
struct ADAPT_RESULTS {
|
||||
inT32 BlobLength;
|
||||
int NumMatches;
|
||||
bool HasNonfragment;
|
||||
ScoredClass match[MAX_NUM_CLASSES];
|
||||
GenericVector<ScoredClass> match;
|
||||
ScoredClass best_match;
|
||||
CLASS_PRUNER_RESULTS CPResults;
|
||||
GenericVector<CP_RESULT_STRUCT> CPResults;
|
||||
|
||||
/// Initializes data members to the default values. Sets the initial
|
||||
/// rating of each class to be the worst possible rating (1.0).
|
||||
inline void Initialize() {
|
||||
BlobLength = MAX_INT32;
|
||||
NumMatches = 0;
|
||||
HasNonfragment = false;
|
||||
best_match.unichar_id = NO_CLASS;
|
||||
best_match.shape_id = -1;
|
||||
@ -163,29 +161,22 @@ namespace tesseract {
|
||||
*
|
||||
* @param Blob blob to be classified
|
||||
* @param[out] Choices List of choices found by adaptive matcher.
|
||||
* @param[out] CPResults Array of CPResultStruct of size MAX_NUM_CLASSES is
|
||||
* filled on return with the choices found by the
|
||||
* class pruner and the ratings therefrom. Also
|
||||
* contains the detailed results of the integer matcher.
|
||||
*
|
||||
*/
|
||||
void Classify::AdaptiveClassifier(TBLOB *Blob,
|
||||
BLOB_CHOICE_LIST *Choices,
|
||||
CLASS_PRUNER_RESULTS CPResults) {
|
||||
void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
|
||||
assert(Choices != NULL);
|
||||
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
|
||||
ADAPT_RESULTS *Results = new ADAPT_RESULTS;
|
||||
Results->Initialize();
|
||||
|
||||
ASSERT_HOST(AdaptedTemplates != NULL);
|
||||
|
||||
DoAdaptiveMatch(Blob, Results);
|
||||
if (CPResults != NULL)
|
||||
memcpy(CPResults, Results->CPResults,
|
||||
sizeof(CPResults[0]) * Results->NumMatches);
|
||||
|
||||
RemoveBadMatches(Results);
|
||||
qsort((void *)Results->match, Results->NumMatches,
|
||||
sizeof(ScoredClass), CompareByRating);
|
||||
Results->match.sort(CompareByRating);
|
||||
RemoveExtraPuncs(Results);
|
||||
ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
|
||||
Choices);
|
||||
@ -1029,7 +1020,7 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
|
||||
if (old_match)
|
||||
old_match->rating = rating;
|
||||
else
|
||||
results->match[results->NumMatches++] = match;
|
||||
results->match.push_back(match);
|
||||
|
||||
if (rating < results->best_match.rating &&
|
||||
// Ensure that fragments do not affect best rating, class and config.
|
||||
@ -1111,14 +1102,13 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
|
||||
const uinT8* norm_factors,
|
||||
ADAPT_CLASS* classes,
|
||||
int debug,
|
||||
int num_classes,
|
||||
int matcher_multiplier,
|
||||
const TBOX& blob_box,
|
||||
CLASS_PRUNER_RESULTS results,
|
||||
const GenericVector<CP_RESULT_STRUCT>& results,
|
||||
ADAPT_RESULTS* final_results) {
|
||||
int top = blob_box.top();
|
||||
int bottom = blob_box.bottom();
|
||||
for (int c = 0; c < num_classes; c++) {
|
||||
for (int c = 0; c < results.size(); c++) {
|
||||
CLASS_ID class_id = results[c].Class;
|
||||
INT_RESULT_STRUCT& int_result = results[c].IMResult;
|
||||
BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
|
||||
@ -1279,21 +1269,19 @@ UNICHAR_ID *Classify::BaselineClassifier(
|
||||
const INT_FX_RESULT_STRUCT& fx_info,
|
||||
ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
|
||||
if (int_features.empty()) return NULL;
|
||||
int NumClasses;
|
||||
uinT8* CharNormArray = new uinT8[unicharset.size()];
|
||||
ClearCharNormArray(CharNormArray);
|
||||
|
||||
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
|
||||
NumClasses = PruneClasses(Templates->Templates, int_features.size(),
|
||||
&int_features[0],
|
||||
CharNormArray, BaselineCutoffs, Results->CPResults);
|
||||
PruneClasses(Templates->Templates, int_features.size(), &int_features[0],
|
||||
CharNormArray, BaselineCutoffs, &Results->CPResults);
|
||||
|
||||
if (matcher_debug_level >= 2 || classify_debug_level > 1)
|
||||
cprintf ("BL Matches = ");
|
||||
|
||||
MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
|
||||
CharNormArray,
|
||||
Templates->Class, matcher_debug_flags, NumClasses, 0,
|
||||
Templates->Class, matcher_debug_flags, 0,
|
||||
Blob->bounding_box(), Results->CPResults, Results);
|
||||
|
||||
delete [] CharNormArray;
|
||||
@ -1375,20 +1363,18 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
||||
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
|
||||
pruner_norm_array);
|
||||
|
||||
int num_classes = PruneClasses(PreTrainedTemplates, num_features,
|
||||
sample.features(),
|
||||
pruner_norm_array,
|
||||
shape_table_ != NULL ? &shapetable_cutoffs_[0]
|
||||
: CharNormCutoffs,
|
||||
adapt_results->CPResults);
|
||||
PruneClasses(PreTrainedTemplates, num_features, sample.features(),
|
||||
pruner_norm_array,
|
||||
shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
|
||||
&adapt_results->CPResults);
|
||||
delete [] pruner_norm_array;
|
||||
if (keep_this >= 0) {
|
||||
num_classes = 1;
|
||||
adapt_results->CPResults[0].Class = keep_this;
|
||||
adapt_results->CPResults.truncate(1);
|
||||
}
|
||||
if (pruner_only) {
|
||||
// Convert pruner results to output format.
|
||||
for (int i = 0; i < num_classes; ++i) {
|
||||
for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
|
||||
int class_id = adapt_results->CPResults[i].Class;
|
||||
results->push_back(
|
||||
UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
|
||||
@ -1396,11 +1382,11 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
||||
} else {
|
||||
MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
|
||||
char_norm_array,
|
||||
NULL, matcher_debug_flags, num_classes,
|
||||
NULL, matcher_debug_flags,
|
||||
classify_integer_matcher_multiplier,
|
||||
blob_box, adapt_results->CPResults, adapt_results);
|
||||
// Convert master matcher results to output format.
|
||||
for (int i = 0; i < adapt_results->NumMatches; i++) {
|
||||
for (int i = 0; i < adapt_results->match.size(); i++) {
|
||||
ScoredClass next = adapt_results->match[i];
|
||||
UnicharRating rating(next.unichar_id, 1.0f - next.rating);
|
||||
if (next.fontinfo_id >= 0) {
|
||||
@ -1449,7 +1435,7 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) {
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Return a pointer to the scored unichar in results, or NULL if not present.
|
||||
ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
|
||||
for (int i = 0; i < results->NumMatches; i++) {
|
||||
for (int i = 0; i < results->match.size(); i++) {
|
||||
if (results->match[i].unichar_id == id)
|
||||
return &results->match[i];
|
||||
}
|
||||
@ -1516,7 +1502,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
||||
}
|
||||
|
||||
float best_certainty = -MAX_FLOAT32;
|
||||
for (int i = 0; i < Results->NumMatches; i++) {
|
||||
for (int i = 0; i < Results->match.size(); i++) {
|
||||
ScoredClass next = Results->match[i];
|
||||
int fontinfo_id = next.fontinfo_id;
|
||||
int fontinfo_id2 = next.fontinfo_id2;
|
||||
@ -1564,7 +1550,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
||||
choices_length++;
|
||||
if (choices_length >= max_matches) break;
|
||||
}
|
||||
Results->NumMatches = choices_length;
|
||||
Results->match.truncate(choices_length);
|
||||
} // ConvertMatchesToChoices
|
||||
|
||||
|
||||
@ -1583,7 +1569,7 @@ void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
|
||||
void Classify::DebugAdaptiveClassifier(TBLOB *blob,
|
||||
ADAPT_RESULTS *Results) {
|
||||
if (static_classifier_ == NULL) return;
|
||||
for (int i = 0; i < Results->NumMatches; i++) {
|
||||
for (int i = 0; i < Results->match.size(); i++) {
|
||||
if (i == 0 || Results->match[i].rating < Results->best_match.rating)
|
||||
Results->best_match = Results->match[i];
|
||||
}
|
||||
@ -1636,10 +1622,9 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
|
||||
} else {
|
||||
Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
|
||||
AdaptedTemplates, Results);
|
||||
if ((Results->NumMatches > 0 &&
|
||||
MarginalMatch (Results->best_match.rating) &&
|
||||
if ((!Results->match.empty() && MarginalMatch(Results->best_match.rating) &&
|
||||
!tess_bn_matching) ||
|
||||
Results->NumMatches == 0) {
|
||||
Results->match.empty()) {
|
||||
CharNormClassifier(Blob, *sample, Results);
|
||||
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
|
||||
AmbigClassifier(bl_features, fx_info, Blob,
|
||||
@ -1654,7 +1639,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
|
||||
// if the results contain only fragments.
|
||||
// TODO(daria): verify that this is better than
|
||||
// just adding a NULL classification.
|
||||
if (!Results->HasNonfragment || Results->NumMatches == 0)
|
||||
if (!Results->HasNonfragment || Results->match.empty())
|
||||
ClassifyAsNoise(Results);
|
||||
delete sample;
|
||||
} /* DoAdaptiveMatch */
|
||||
@ -1696,17 +1681,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
|
||||
CharNormClassifier(Blob, *sample, Results);
|
||||
delete sample;
|
||||
RemoveBadMatches(Results);
|
||||
qsort((void *)Results->match, Results->NumMatches,
|
||||
sizeof(ScoredClass), CompareByRating);
|
||||
Results->match.sort(CompareByRating);
|
||||
|
||||
/* copy the class id's into an string of ambiguities - don't copy if
|
||||
the correct class is the only class id matched */
|
||||
Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
|
||||
(Results->NumMatches + 1));
|
||||
if (Results->NumMatches > 1 ||
|
||||
(Results->NumMatches == 1 &&
|
||||
Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
|
||||
if (Results->match.size() > 1 ||
|
||||
(Results->match.size() == 1 &&
|
||||
Results->match[0].unichar_id != CorrectClass)) {
|
||||
for (i = 0; i < Results->NumMatches; i++)
|
||||
for (i = 0; i < Results->match.size(); i++)
|
||||
Ambiguities[i] = Results->match[i].unichar_id;
|
||||
Ambiguities[i] = -1;
|
||||
} else {
|
||||
@ -1721,7 +1704,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
|
||||
// present in the classifier templates.
|
||||
bool Classify::LooksLikeGarbage(TBLOB *blob) {
|
||||
BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
|
||||
AdaptiveClassifier(blob, ratings, NULL);
|
||||
AdaptiveClassifier(blob, ratings);
|
||||
BLOB_CHOICE_IT ratings_it(ratings);
|
||||
const UNICHARSET &unicharset = getDict().getUnicharset();
|
||||
if (classify_debug_character_fragments) {
|
||||
@ -2119,7 +2102,7 @@ namespace tesseract {
|
||||
* @note History: Mon Mar 18 09:24:53 1991, DSJ, Created.
|
||||
*/
|
||||
void Classify::PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results) {
|
||||
for (int i = 0; i < Results->NumMatches; ++i) {
|
||||
for (int i = 0; i < Results->match.size(); ++i) {
|
||||
tprintf("%s(%d), shape %d, %.2f ",
|
||||
unicharset.debug_str(Results->match[i].unichar_id).string(),
|
||||
Results->match[i].unichar_id, Results->match[i].shape_id,
|
||||
@ -2158,7 +2141,7 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
||||
ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one);
|
||||
ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero);
|
||||
|
||||
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
||||
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
|
||||
if (Results->match[Next].rating <= BadMatchThreshold) {
|
||||
ScoredClass match = Results->match[Next];
|
||||
if (!unicharset.get_isalpha(match.unichar_id) ||
|
||||
@ -2179,12 +2162,12 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
||||
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
|
||||
if (Results->match[Next].rating <= BadMatchThreshold)
|
||||
Results->match[NextGood++] = Results->match[Next];
|
||||
}
|
||||
}
|
||||
Results->NumMatches = NextGood;
|
||||
Results->match.truncate(NextGood);
|
||||
} /* RemoveBadMatches */
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
@ -2207,7 +2190,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
||||
|
||||
punc_count = 0;
|
||||
digit_count = 0;
|
||||
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
|
||||
for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
|
||||
ScoredClass match = Results->match[Next];
|
||||
if (strstr(punc_chars,
|
||||
unicharset.id_to_unichar(match.unichar_id)) != NULL) {
|
||||
@ -2225,7 +2208,7 @@ void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
||||
}
|
||||
}
|
||||
}
|
||||
Results->NumMatches = NextGood;
|
||||
Results->match.truncate(NextGood);
|
||||
} /* RemoveExtraPuncs */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
@ -102,7 +102,7 @@ class Classify : public CCStruct {
|
||||
const INT_FEATURE_STRUCT* features,
|
||||
const uinT8* normalization_factors,
|
||||
const uinT16* expected_num_features,
|
||||
CP_RESULT_STRUCT* results);
|
||||
GenericVector<CP_RESULT_STRUCT>* results);
|
||||
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
|
||||
CLASS_CUTOFF_ARRAY Cutoffs);
|
||||
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
|
||||
@ -114,7 +114,6 @@ class Classify : public CCStruct {
|
||||
void FreeNormProtos();
|
||||
NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
|
||||
/* protos.cpp ***************************************************************/
|
||||
void ReadClassFile();
|
||||
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
|
||||
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
|
||||
const UNICHARSET& target_unicharset);
|
||||
@ -158,10 +157,9 @@ class Classify : public CCStruct {
|
||||
const uinT8* norm_factors,
|
||||
ADAPT_CLASS* classes,
|
||||
int debug,
|
||||
int num_classes,
|
||||
int matcher_multiplier,
|
||||
const TBOX& blob_box,
|
||||
CLASS_PRUNER_RESULTS results,
|
||||
const GenericVector<CP_RESULT_STRUCT>& results,
|
||||
ADAPT_RESULTS* final_results);
|
||||
// Converts configs to fonts, and if the result is not adapted, and a
|
||||
// shape_table_ is present, the shape is expanded to include all
|
||||
@ -271,9 +269,7 @@ class Classify : public CCStruct {
|
||||
void EndAdaptiveClassifier();
|
||||
void SettupPass1();
|
||||
void SettupPass2();
|
||||
void AdaptiveClassifier(TBLOB *Blob,
|
||||
BLOB_CHOICE_LIST *Choices,
|
||||
CLASS_PRUNER_RESULTS cp_results);
|
||||
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
|
||||
void ClassifyAsNoise(ADAPT_RESULTS *Results);
|
||||
void ResetAdaptiveClassifierInternal();
|
||||
|
||||
|
@ -357,10 +357,12 @@ class ClassPruner {
|
||||
|
||||
// Copies the pruned, sorted classes into the output results and returns
|
||||
// the number of classes.
|
||||
int SetupResults(CP_RESULT_STRUCT* results) const {
|
||||
int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
|
||||
CP_RESULT_STRUCT empty;
|
||||
results->init_to_size(num_classes_, empty);
|
||||
for (int c = 0; c < num_classes_; ++c) {
|
||||
results[c].Class = sort_index_[num_classes_ - c];
|
||||
results[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
|
||||
(*results)[c].Class = sort_index_[num_classes_ - c];
|
||||
(*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
|
||||
(static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
|
||||
}
|
||||
return num_classes_;
|
||||
@ -408,7 +410,7 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
const INT_FEATURE_STRUCT* features,
|
||||
const uinT8* normalization_factors,
|
||||
const uinT16* expected_num_features,
|
||||
CP_RESULT_STRUCT* results) {
|
||||
GenericVector<CP_RESULT_STRUCT>* results) {
|
||||
/*
|
||||
** Operation:
|
||||
** Prunes the classes using a modified fast match table.
|
||||
|
@ -54,8 +54,6 @@ struct CP_RESULT_STRUCT {
|
||||
CLASS_ID Class;
|
||||
};
|
||||
|
||||
typedef CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES];
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
Variables
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
@ -281,200 +281,3 @@ void PrintProtos(CLASS_TYPE Class) {
|
||||
new_line();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace tesseract {
|
||||
/**
|
||||
* @name ReadClassFile
|
||||
*
|
||||
* Read in the training data from a file. All of the classes are read
|
||||
* in. The results are stored in the global variable, 'TrainingData'.
|
||||
*/
|
||||
void Classify::ReadClassFile() {
|
||||
FILE *File;
|
||||
char TextLine[CHARS_PER_LINE];
|
||||
char unichar[CHARS_PER_LINE];
|
||||
|
||||
cprintf ("Reading training data from '%s' ...",
|
||||
static_cast<STRING>(classify_training_file).string());
|
||||
fflush(stdout);
|
||||
|
||||
File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
|
||||
while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
|
||||
|
||||
sscanf(TextLine, "%s", unichar);
|
||||
ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
|
||||
fgets(TextLine, CHARS_PER_LINE, File);
|
||||
fgets(TextLine, CHARS_PER_LINE, File);
|
||||
}
|
||||
fclose(File);
|
||||
new_line();
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
||||
/**
|
||||
* ReadClassFromFile
|
||||
*
|
||||
* Read in a class description (protos and configs) from a file. Update
|
||||
* the class structure record.
|
||||
*/
|
||||
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id) {
|
||||
CLASS_TYPE Class;
|
||||
|
||||
Class = &TrainingData[unichar_id];
|
||||
|
||||
ReadProtos(File, Class);
|
||||
|
||||
ReadConfigs(File, Class);
|
||||
}
|
||||
|
||||
/**
|
||||
* ReadConfigs
|
||||
*
|
||||
* Read the prototype configurations for this class from a file. Read
|
||||
* the requested number of lines.
|
||||
*/
|
||||
void ReadConfigs(register FILE *File, CLASS_TYPE Class) {
|
||||
inT16 Cid;
|
||||
register inT16 Wid;
|
||||
register BIT_VECTOR ThisConfig;
|
||||
int NumWords;
|
||||
int NumConfigs;
|
||||
|
||||
fscanf (File, "%d %d\n", &NumConfigs, &NumWords);
|
||||
Class->NumConfigs = NumConfigs;
|
||||
Class->MaxNumConfigs = NumConfigs;
|
||||
Class->Configurations =
|
||||
(CONFIGS) Emalloc (sizeof (BIT_VECTOR) * NumConfigs);
|
||||
NumWords = WordsInVectorOfSize (Class->NumProtos);
|
||||
|
||||
for (Cid = 0; Cid < NumConfigs; Cid++) {
|
||||
|
||||
ThisConfig = NewBitVector (Class->NumProtos);
|
||||
for (Wid = 0; Wid < NumWords; Wid++)
|
||||
fscanf (File, "%x", &ThisConfig[Wid]);
|
||||
Class->Configurations[Cid] = ThisConfig;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ReadProtos
|
||||
*
|
||||
* Read in all the prototype information from a file. Read the number
|
||||
* of lines requested.
|
||||
*/
|
||||
void ReadProtos(register FILE *File, CLASS_TYPE Class) {
|
||||
register inT16 Pid;
|
||||
register PROTO Proto;
|
||||
int NumProtos;
|
||||
|
||||
fscanf (File, "%d\n", &NumProtos);
|
||||
Class->NumProtos = NumProtos;
|
||||
Class->MaxNumProtos = NumProtos;
|
||||
Class->Prototypes = (PROTO) Emalloc (sizeof (PROTO_STRUCT) * NumProtos);
|
||||
|
||||
for (Pid = 0; Pid < NumProtos; Pid++) {
|
||||
Proto = ProtoIn (Class, Pid);
|
||||
fscanf (File, "%f %f %f %f %f %f %f\n",
|
||||
&Proto->X,
|
||||
&Proto->Y,
|
||||
&Proto->Length,
|
||||
&Proto->Angle,
|
||||
&Proto->A,
|
||||
&Proto->B, &Proto->C);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @name SplitProto
|
||||
*
|
||||
* Add a new proto to this class. Malloc new space and copy the
|
||||
* old protos if necessary. Return the proto id for the new proto.
|
||||
* Update all configurations so that each config which contained the
|
||||
* specified old proto will also contain the new proto. The caller
|
||||
* is responsible for actually filling in the appropriate proto params.
|
||||
*/
|
||||
int SplitProto(CLASS_TYPE Class, int OldPid) {
|
||||
int i;
|
||||
int NewPid;
|
||||
BIT_VECTOR Config;
|
||||
|
||||
NewPid = AddProtoToClass (Class);
|
||||
|
||||
for (i = 0; i < Class->NumConfigs; i++) {
|
||||
Config = Class->Configurations[i];
|
||||
if (test_bit (Config, OldPid))
|
||||
SET_BIT(Config, NewPid);
|
||||
}
|
||||
return (NewPid);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
* @name WriteOldConfigFile
|
||||
*
|
||||
* Write the configs in the given class to the specified file in the
|
||||
* old config format.
|
||||
*
|
||||
* @param File The file to write to
|
||||
* @param Class The class to write
|
||||
*/
|
||||
void WriteOldConfigFile(FILE *File, CLASS_TYPE Class) {
|
||||
int Cid, Pid;
|
||||
BIT_VECTOR Config;
|
||||
|
||||
fprintf (File, "%d %d\n", Class->NumConfigs, Class->NumProtos);
|
||||
|
||||
for (Cid = 0; Cid < Class->NumConfigs; Cid++) {
|
||||
fprintf (File, "1 ");
|
||||
|
||||
Config = Class->Configurations[Cid];
|
||||
|
||||
for (Pid = 0; Pid < Class->NumProtos; Pid++) {
|
||||
if (test_bit (Config, Pid))
|
||||
fprintf (File, "1");
|
||||
else
|
||||
fprintf (File, "0");
|
||||
}
|
||||
fprintf (File, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
* @name WriteOldProtoFile
|
||||
*
|
||||
* Write the protos in the given class to the specified file in the
|
||||
* old proto format.
|
||||
*
|
||||
* @param File The file to write to
|
||||
* @param Class The class to write
|
||||
*/
|
||||
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class) {
|
||||
int Pid;
|
||||
PROTO Proto;
|
||||
|
||||
/* print old header */
|
||||
fprintf (File, "6\n");
|
||||
fprintf (File, "linear essential -0.500000 0.500000\n");
|
||||
fprintf (File, "linear essential -0.250000 0.750000\n");
|
||||
fprintf (File, "linear essential 0.000000 1.000000\n");
|
||||
fprintf (File, "circular essential 0.000000 1.000000\n");
|
||||
fprintf (File, "linear non-essential -0.500000 0.500000\n");
|
||||
fprintf (File, "linear non-essential -0.500000 0.500000\n");
|
||||
|
||||
for (Pid = 0; Pid < Class->NumProtos; Pid++) {
|
||||
Proto = ProtoIn (Class, Pid);
|
||||
|
||||
fprintf (File, "significant elliptical 1\n");
|
||||
fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
|
||||
Proto->X, Proto->Y,
|
||||
Proto->Length, Proto->Angle, 0.0, 0.0);
|
||||
fprintf (File, " %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f\n",
|
||||
0.0001, 0.0001, 0.0001, 0.0001, 0.0001, 0.0001);
|
||||
}
|
||||
}
|
||||
|
@ -176,16 +176,4 @@ CLASS_TYPE NewClass(int NumProtos, int NumConfigs);
|
||||
|
||||
void PrintProtos(CLASS_TYPE Class);
|
||||
|
||||
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id);
|
||||
|
||||
void ReadConfigs(register FILE *File, CLASS_TYPE Class);
|
||||
|
||||
void ReadProtos(register FILE *File, CLASS_TYPE Class);
|
||||
|
||||
int SplitProto(CLASS_TYPE Class, int OldPid);
|
||||
|
||||
void WriteOldConfigFile(FILE *File, CLASS_TYPE Class);
|
||||
|
||||
void WriteOldProtoFile(FILE *File, CLASS_TYPE Class);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user