mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
Improved debug of class pruner
This commit is contained in:
parent
1e3b671298
commit
5bb0d89291
@ -1283,7 +1283,7 @@ UNICHAR_ID *Classify::BaselineClassifier(
|
||||
ClearCharNormArray(CharNormArray);
|
||||
|
||||
Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
|
||||
PruneClasses(Templates->Templates, int_features.size(), &int_features[0],
|
||||
PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
|
||||
CharNormArray, BaselineCutoffs, &Results->CPResults);
|
||||
|
||||
if (matcher_debug_level >= 2 || classify_debug_level > 1)
|
||||
@ -1365,7 +1365,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
||||
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
|
||||
pruner_norm_array);
|
||||
|
||||
PruneClasses(PreTrainedTemplates, num_features, sample.features(),
|
||||
PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
|
||||
pruner_norm_array,
|
||||
shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
|
||||
&adapt_results->CPResults);
|
||||
|
@ -98,9 +98,8 @@ class Classify : public CCStruct {
|
||||
// results (output) Sorted Array of pruned classes.
|
||||
// Array must be sized to take the maximum possible
|
||||
// number of outputs : int_templates->NumClasses.
|
||||
int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
int num_features,
|
||||
const INT_FEATURE_STRUCT* features,
|
||||
int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int num_features,
|
||||
int keep_this, const INT_FEATURE_STRUCT* features,
|
||||
const uinT8* normalization_factors,
|
||||
const uinT16* expected_num_features,
|
||||
GenericVector<CP_RESULT_STRUCT>* results);
|
||||
|
@ -261,8 +261,8 @@ class ClassPruner {
|
||||
// Prunes the classes using <the maximum count> * pruning_factor/256 as a
|
||||
// threshold for keeping classes. If max_of_non_fragments, then ignore
|
||||
// fragments in computing the maximum count.
|
||||
void PruneAndSort(int pruning_factor, bool max_of_non_fragments,
|
||||
const UNICHARSET& unicharset) {
|
||||
void PruneAndSort(int pruning_factor, int keep_this,
|
||||
bool max_of_non_fragments, const UNICHARSET& unicharset) {
|
||||
int max_count = 0;
|
||||
for (int c = 0; c < max_classes_; ++c) {
|
||||
if (norm_count_[c] > max_count &&
|
||||
@ -282,7 +282,8 @@ class ClassPruner {
|
||||
pruning_threshold_ = 1;
|
||||
num_classes_ = 0;
|
||||
for (int class_id = 0; class_id < max_classes_; class_id++) {
|
||||
if (norm_count_[class_id] >= pruning_threshold_) {
|
||||
if (norm_count_[class_id] >= pruning_threshold_ ||
|
||||
class_id == keep_this) {
|
||||
++num_classes_;
|
||||
sort_index_[num_classes_] = class_id;
|
||||
sort_key_[num_classes_] = norm_count_[class_id];
|
||||
@ -404,7 +405,7 @@ class ClassPruner {
|
||||
// results Sorted Array of pruned classes. Must be an array
|
||||
// of size at least int_templates->NumClasses.
|
||||
int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
int num_features,
|
||||
int num_features, int keep_this,
|
||||
const INT_FEATURE_STRUCT* features,
|
||||
const uinT8* normalization_factors,
|
||||
const uinT16* expected_num_features,
|
||||
@ -439,7 +440,7 @@ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
|
||||
pruner.NoNormalization();
|
||||
}
|
||||
// Do the actual pruning and sort the short-list.
|
||||
pruner.PruneAndSort(classify_class_pruner_threshold,
|
||||
pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
|
||||
shape_table_ == NULL, unicharset);
|
||||
|
||||
if (classify_debug_level > 2) {
|
||||
|
Loading…
Reference in New Issue
Block a user