tesseract/classify/intmatcher.cpp

1274 lines
46 KiB
C++
Raw Normal View History

/******************************************************************************
** Filename: intmatcher.c
** Purpose: Generic high level classification routines.
** Author: Robert Moss
** History: Wed Feb 13 17:35:28 MST 1991, RWM, Created.
** Mon Mar 11 16:33:02 MST 1991, RWM, Modified to add
** support for adaptive matching.
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
/*----------------------------------------------------------------------------
Include Files and Type Defines
----------------------------------------------------------------------------*/
#include "intmatcher.h"
#include "fontinfo.h"
#include "intproto.h"
#include "callcpp.h"
#include "scrollview.h"
#include "float2int.h"
#include "globals.h"
#include "helpers.h"
#include "classify.h"
#include "shapetable.h"
#include <math.h>
using tesseract::ScoredFont;
using tesseract::UnicharRating;
/*----------------------------------------------------------------------------
Global Data Definitions and Declarations
----------------------------------------------------------------------------*/
// Parameters of the sigmoid used to convert similarity to evidence in the
// similarity_evidence_table_ that is used to convert distance metric to an
// 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
const float IntegerMatcher::kSEExponentialMultiplier = 0.0;
const float IntegerMatcher::kSimilarityCenter = 0.0075;
#define offset_table_entries \
255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, \
0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, \
0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, \
0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, \
0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, \
0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, \
0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
#define INTMATCHER_OFFSET_TABLE_SIZE 256
#define next_table_entries \
0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e, \
0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a, \
0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26, \
0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32, \
0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e, \
0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a, \
0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56, \
0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62, \
0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e, \
0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a, \
0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86, \
0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92, \
0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e, \
0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa, \
0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6, \
0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2, \
0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce, \
0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda, \
0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6, \
0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2, \
0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
// See http://b/19318793 (#6) for a complete discussion. Merging arrays
// offset_table and next_table helps improve performance of PIE code.
static const uinT8 data_table[512] = {offset_table_entries, next_table_entries};
static const uinT8* const offset_table = &data_table[0];
static const uinT8* const next_table =
&data_table[INTMATCHER_OFFSET_TABLE_SIZE];
namespace tesseract {
// Encapsulation of the intermediate data and computations made by the class
// pruner. The class pruner implements a simple linear classifier on binary
// features by heavily quantizing the feature space, and applying
// NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
// weights is compensated by a non-constant bias that is dependent on the
// number of features present.
class ClassPruner {
public:
ClassPruner(int max_classes) {
// The unrolled loop in ComputeScores means that the array sizes need to
// be rounded up so that the array is big enough to accommodate the extra
// entries accessed by the unrolling. Each pruner word is of sized
// BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
// BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
// See ComputeScores.
max_classes_ = max_classes;
rounded_classes_ = RoundUp(
max_classes, WERDS_PER_CP_VECTOR * BITS_PER_WERD / NUM_BITS_PER_CLASS);
class_count_ = new int[rounded_classes_];
norm_count_ = new int[rounded_classes_];
sort_key_ = new int[rounded_classes_ + 1];
sort_index_ = new int[rounded_classes_ + 1];
for (int i = 0; i < rounded_classes_; i++) {
class_count_[i] = 0;
}
pruning_threshold_ = 0;
num_features_ = 0;
num_classes_ = 0;
}
~ClassPruner() {
delete []class_count_;
delete []norm_count_;
delete []sort_key_;
delete []sort_index_;
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/// Computes the scores for every class in the character set, by summing the
/// weights for each feature and stores the sums internally in class_count_.
void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
int num_features, const INT_FEATURE_STRUCT* features) {
num_features_ = num_features;
int num_pruners = int_templates->NumClassPruners;
for (int f = 0; f < num_features; ++f) {
const INT_FEATURE_STRUCT* feature = &features[f];
// Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
int x = feature->X * NUM_CP_BUCKETS >> 8;
int y = feature->Y * NUM_CP_BUCKETS >> 8;
int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
int class_id = 0;
// Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
// we need a collection of them, indexed by pruner_set.
for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
// Look up quantized feature in a 3-D array, an array of weights for
// each class.
const uinT32* pruner_word_ptr =
int_templates->ClassPruners[pruner_set]->p[x][y][theta];
for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
uinT32 pruner_word = *pruner_word_ptr++;
// This inner loop is unrolled to speed up the ClassPruner.
// Currently gcc would not unroll it unless it is set to O3
// level of optimization or -funroll-loops is specified.
/*
uinT32 class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
class_count_[class_id++] += pruner_word & class_mask;
pruner_word >>= NUM_BITS_PER_CLASS;
}
*/
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
pruner_word >>= NUM_BITS_PER_CLASS;
class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
}
}
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/// Adjusts the scores according to the number of expected features. Used
/// in lieu of a constant bias, this penalizes classes that expect more
/// features than there are present. Thus an actual c will score higher for c
/// than e, even though almost all the features match e as well as c, because
/// e expects more features to be present.
void AdjustForExpectedNumFeatures(const uinT16* expected_num_features,
int cutoff_strength) {
for (int class_id = 0; class_id < max_classes_; ++class_id) {
if (num_features_ < expected_num_features[class_id]) {
int deficit = expected_num_features[class_id] - num_features_;
class_count_[class_id] -= class_count_[class_id] * deficit /
(num_features_ * cutoff_strength + deficit);
}
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/// Zeros the scores for classes disabled in the unicharset.
/// Implements the black-list to recognize a subset of the character set.
void DisableDisabledClasses(const UNICHARSET& unicharset) {
for (int class_id = 0; class_id < max_classes_; ++class_id) {
if (!unicharset.get_enabled(class_id))
class_count_[class_id] = 0; // This char is disabled!
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Zeros the scores of fragments. */
void DisableFragments(const UNICHARSET& unicharset) {
for (int class_id = 0; class_id < max_classes_; ++class_id) {
// Do not include character fragments in the class pruner
// results if disable_character_fragments is true.
if (unicharset.get_fragment(class_id)) {
class_count_[class_id] = 0;
}
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/// Normalizes the counts for xheight, putting the normalized result in
/// norm_count_. Applies a simple subtractive penalty for incorrect vertical
/// position provided by the normalization_factors array, indexed by
/// character class, and scaled by the norm_multiplier.
void NormalizeForXheight(int norm_multiplier,
const uinT8* normalization_factors) {
for (int class_id = 0; class_id < max_classes_; class_id++) {
norm_count_[class_id] = class_count_[class_id] -
((norm_multiplier * normalization_factors[class_id]) >> 8);
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** The nop normalization copies the class_count_ array to norm_count_. */
void NoNormalization() {
for (int class_id = 0; class_id < max_classes_; class_id++) {
norm_count_[class_id] = class_count_[class_id];
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/// Prunes the classes using &lt;the maximum count> * pruning_factor/256 as a
/// threshold for keeping classes. If max_of_non_fragments, then ignore
/// fragments in computing the maximum count.
2015-05-14 08:07:11 +08:00
void PruneAndSort(int pruning_factor, int keep_this,
bool max_of_non_fragments, const UNICHARSET& unicharset) {
int max_count = 0;
for (int c = 0; c < max_classes_; ++c) {
if (norm_count_[c] > max_count &&
// This additional check is added in order to ensure that
// the classifier will return at least one non-fragmented
// character match.
// TODO(daria): verify that this helps accuracy and does not
// hurt performance.
(!max_of_non_fragments || !unicharset.get_fragment(c))) {
max_count = norm_count_[c];
}
}
// Prune Classes.
pruning_threshold_ = (max_count * pruning_factor) >> 8;
// Select Classes.
if (pruning_threshold_ < 1)
pruning_threshold_ = 1;
num_classes_ = 0;
for (int class_id = 0; class_id < max_classes_; class_id++) {
2015-05-14 08:07:11 +08:00
if (norm_count_[class_id] >= pruning_threshold_ ||
class_id == keep_this) {
++num_classes_;
sort_index_[num_classes_] = class_id;
sort_key_[num_classes_] = norm_count_[class_id];
}
}
// Sort Classes using Heapsort Algorithm.
if (num_classes_ > 1)
HeapSort(num_classes_, sort_key_, sort_index_);
}
2016-11-08 02:46:33 +08:00
/** Prints debug info on the class pruner matches for the pruned classes only.
*/
void DebugMatch(const Classify& classify,
const INT_TEMPLATES_STRUCT* int_templates,
const INT_FEATURE_STRUCT* features) const {
int num_pruners = int_templates->NumClassPruners;
int max_num_classes = int_templates->NumClasses;
for (int f = 0; f < num_features_; ++f) {
const INT_FEATURE_STRUCT* feature = &features[f];
tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
// Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
int x = feature->X * NUM_CP_BUCKETS >> 8;
int y = feature->Y * NUM_CP_BUCKETS >> 8;
int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
int class_id = 0;
for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
// Look up quantized feature in a 3-D array, an array of weights for
// each class.
const uinT32* pruner_word_ptr =
int_templates->ClassPruners[pruner_set]->p[x][y][theta];
for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
uinT32 pruner_word = *pruner_word_ptr++;
for (int word_class = 0; word_class < 16 &&
class_id < max_num_classes; ++word_class, ++class_id) {
if (norm_count_[class_id] >= pruning_threshold_) {
tprintf(" %s=%d,",
classify.ClassIDToDebugStr(int_templates,
class_id, 0).string(),
pruner_word & CLASS_PRUNER_CLASS_MASK);
}
pruner_word >>= NUM_BITS_PER_CLASS;
}
}
tprintf("\n");
}
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Prints a summary of the pruner result. */
void SummarizeResult(const Classify& classify,
const INT_TEMPLATES_STRUCT* int_templates,
const uinT16* expected_num_features,
int norm_multiplier,
const uinT8* normalization_factors) const {
tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
for (int i = 0; i < num_classes_; ++i) {
int class_id = sort_index_[num_classes_ - i];
STRING class_string = classify.ClassIDToDebugStr(int_templates,
class_id, 0);
tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
class_string.string(),
class_count_[class_id],
expected_num_features[class_id],
(norm_multiplier * normalization_factors[class_id]) >> 8,
sort_key_[num_classes_ - i],
100.0 - 100.0 * sort_key_[num_classes_ - i] /
(CLASS_PRUNER_CLASS_MASK * num_features_));
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/// Copies the pruned, sorted classes into the output results and returns
/// the number of classes.
int SetupResults(GenericVector<CP_RESULT_STRUCT>* results) const {
CP_RESULT_STRUCT empty;
results->init_to_size(num_classes_, empty);
for (int c = 0; c < num_classes_; ++c) {
(*results)[c].Class = sort_index_[num_classes_ - c];
(*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
(static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
}
return num_classes_;
}
private:
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Array[rounded_classes_] of initial counts for each class. */
int *class_count_;
2016-11-08 02:46:33 +08:00
/// Array[rounded_classes_] of modified counts for each class after
/// normalizing for expected number of features, disabled classes, fragments,
/// and xheights.
int *norm_count_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Array[rounded_classes_ +1] of pruned counts that gets sorted */
int *sort_key_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Array[rounded_classes_ +1] of classes corresponding to sort_key_. */
int *sort_index_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Number of classes in this class pruner. */
int max_classes_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Rounded up number of classes used for array sizes. */
int rounded_classes_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Threshold count applied to prune classes. */
int pruning_threshold_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** The number of features used to compute the scores. */
int num_features_;
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/** Final number of pruned classes. */
int num_classes_;
};
/*----------------------------------------------------------------------------
Public Code
----------------------------------------------------------------------------*/
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Runs the class pruner from int_templates on the given features, returning
* the number of classes output in results.
* @param int_templates Class pruner tables
* @param num_features Number of features in blob
* @param features Array of features
* @param normalization_factors Array of fudge factors from blob
* normalization process (by CLASS_INDEX)
* @param expected_num_features Array of expected number of features
* for each class (by CLASS_INDEX)
2016-11-08 02:46:33 +08:00
* @param results Sorted Array of pruned classes. Must be an
* array of size at least
* int_templates->NumClasses.
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
* @param keep_this
*/
int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates,
2015-05-14 08:07:11 +08:00
int num_features, int keep_this,
const INT_FEATURE_STRUCT* features,
const uinT8* normalization_factors,
const uinT16* expected_num_features,
GenericVector<CP_RESULT_STRUCT>* results) {
ClassPruner pruner(int_templates->NumClasses);
// Compute initial match scores for all classes.
pruner.ComputeScores(int_templates, num_features, features);
// Adjust match scores for number of expected features.
pruner.AdjustForExpectedNumFeatures(expected_num_features,
classify_cp_cutoff_strength);
// Apply disabled classes in unicharset - only works without a shape_table.
if (shape_table_ == NULL)
pruner.DisableDisabledClasses(unicharset);
// If fragments are disabled, remove them, also only without a shape table.
if (disable_character_fragments && shape_table_ == NULL)
pruner.DisableFragments(unicharset);
// If we have good x-heights, apply the given normalization factors.
if (normalization_factors != NULL) {
pruner.NormalizeForXheight(classify_class_pruner_multiplier,
normalization_factors);
} else {
pruner.NoNormalization();
}
// Do the actual pruning and sort the short-list.
2015-05-14 08:07:11 +08:00
pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
shape_table_ == NULL, unicharset);
if (classify_debug_level > 2) {
pruner.DebugMatch(*this, int_templates, features);
}
if (classify_debug_level > 1) {
pruner.SummarizeResult(*this, int_templates, expected_num_features,
classify_class_pruner_multiplier,
normalization_factors);
}
// Convert to the expected output format.
return pruner.SetupResults(results);
}
} // namespace tesseract
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* IntegerMatcher returns the best configuration and rating
* for a single class. The class matched against is determined
* by the uniqueness of the ClassTemplate parameter. The
* best rating and its associated configuration are returned.
*
* Globals:
* - local_matcher_multiplier_ Normalization factor multiplier
* param ClassTemplate Prototypes & tables for a class
* param BlobLength Length of unormalized blob
* param NumFeatures Number of features in blob
* param Features Array of features
* param NormalizationFactor Fudge factor from blob normalization process
* param Result Class rating & configuration: (0.0 -> 1.0), 0=bad, 1=good
* param Debug Debugger flag: 1=debugger on
* @return none
* @note Exceptions: none
* @note History: Tue Feb 19 16:36:23 MST 1991, RWM, Created.
*/
void IntegerMatcher::Match(INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
inT16 NumFeatures,
const INT_FEATURE_STRUCT* Features,
UnicharRating* Result,
int AdaptFeatureThreshold,
int Debug,
bool SeparateDebugWindows) {
ScratchEvidence *tables = new ScratchEvidence();
int Feature;
if (MatchDebuggingOn (Debug))
cprintf ("Integer Matcher -------------------------------------------\n");
tables->Clear(ClassTemplate);
Result->feature_misses = 0;
for (Feature = 0; Feature < NumFeatures; Feature++) {
int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
Feature, &Features[Feature],
tables, Debug);
// Count features that were missed over all configs.
if (csum == 0)
++Result->feature_misses;
}
#ifndef GRAPHICS_DISABLED
if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
NumFeatures, Debug);
}
if (DisplayProtoMatchesOn(Debug)) {
DisplayProtoDebugInfo(ClassTemplate, ProtoMask, ConfigMask,
*tables, SeparateDebugWindows);
}
if (DisplayFeatureMatchesOn(Debug)) {
DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
Features, AdaptFeatureThreshold, Debug,
SeparateDebugWindows);
}
#endif
tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask, NumFeatures);
tables->NormalizeSums(ClassTemplate, NumFeatures, NumFeatures);
FindBestMatch(ClassTemplate, *tables, Result);
#ifndef GRAPHICS_DISABLED
if (PrintMatchSummaryOn(Debug))
Result->Print();
if (MatchDebuggingOn(Debug))
cprintf("Match Complete --------------------------------------------\n");
#endif
delete tables;
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* FindGoodProtos finds all protos whose normalized proto-evidence
* exceed classify_adapt_proto_thresh. The list is ordered by increasing
* proto id number.
*
* Globals:
* - local_matcher_multiplier_ Normalization factor multiplier
* param ClassTemplate Prototypes & tables for a class
* param ProtoMask AND Mask for proto word
* param ConfigMask AND Mask for config word
* param BlobLength Length of unormalized blob
* param NumFeatures Number of features in blob
* param Features Array of features
* param ProtoArray Array of good protos
* param AdaptProtoThreshold Threshold for good protos
* param Debug Debugger flag: 1=debugger on
* @return Number of good protos in ProtoArray.
* @note Exceptions: none
* @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
*/
int IntegerMatcher::FindGoodProtos(
INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
uinT16 BlobLength,
inT16 NumFeatures,
INT_FEATURE_ARRAY Features,
PROTO_ID *ProtoArray,
int AdaptProtoThreshold,
int Debug) {
ScratchEvidence *tables = new ScratchEvidence();
int NumGoodProtos = 0;
/* DEBUG opening heading */
if (MatchDebuggingOn (Debug))
cprintf
("Find Good Protos -------------------------------------------\n");
tables->Clear(ClassTemplate);
for (int Feature = 0; Feature < NumFeatures; Feature++)
UpdateTablesForFeature(
ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
tables, Debug);
#ifndef GRAPHICS_DISABLED
if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
NumFeatures, Debug);
#endif
/* Average Proto Evidences & Find Good Protos */
for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
/* Compute Average for Actual Proto */
int Temp = 0;
for (int i = 0; i < ClassTemplate->ProtoLengths[proto]; i++)
Temp += tables->proto_evidence_[proto][i];
Temp /= ClassTemplate->ProtoLengths[proto];
/* Find Good Protos */
if (Temp >= AdaptProtoThreshold) {
*ProtoArray = proto;
ProtoArray++;
NumGoodProtos++;
}
}
if (MatchDebuggingOn (Debug))
cprintf ("Match Complete --------------------------------------------\n");
delete tables;
return NumGoodProtos;
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* FindBadFeatures finds all features with maximum feature-evidence <
* AdaptFeatureThresh. The list is ordered by increasing feature number.
* @param ClassTemplate Prototypes & tables for a class
* @param ProtoMask AND Mask for proto word
* @param ConfigMask AND Mask for config word
* @param BlobLength Length of unormalized blob
* @param NumFeatures Number of features in blob
* @param Features Array of features
* @param FeatureArray Array of bad features
* @param AdaptFeatureThreshold Threshold for bad features
* @param Debug Debugger flag: 1=debugger on
* @return Number of bad features in FeatureArray.
* @note History: Tue Mar 12 17:09:26 MST 1991, RWM, Created
*/
int IntegerMatcher::FindBadFeatures(
INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
uinT16 BlobLength,
inT16 NumFeatures,
INT_FEATURE_ARRAY Features,
FEATURE_ID *FeatureArray,
int AdaptFeatureThreshold,
int Debug) {
ScratchEvidence *tables = new ScratchEvidence();
int NumBadFeatures = 0;
/* DEBUG opening heading */
if (MatchDebuggingOn(Debug))
cprintf("Find Bad Features -------------------------------------------\n");
tables->Clear(ClassTemplate);
for (int Feature = 0; Feature < NumFeatures; Feature++) {
UpdateTablesForFeature(
ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
tables, Debug);
/* Find Best Evidence for Current Feature */
int best = 0;
for (int i = 0; i < ClassTemplate->NumConfigs; i++)
if (tables->feature_evidence_[i] > best)
best = tables->feature_evidence_[i];
/* Find Bad Features */
if (best < AdaptFeatureThreshold) {
*FeatureArray = Feature;
FeatureArray++;
NumBadFeatures++;
}
}
#ifndef GRAPHICS_DISABLED
if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
NumFeatures, Debug);
#endif
if (MatchDebuggingOn(Debug))
cprintf("Match Complete --------------------------------------------\n");
delete tables;
return NumBadFeatures;
}
void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
classify_debug_level_ = classify_debug_level;
/* Initialize table for evidence to similarity lookup */
for (int i = 0; i < SE_TABLE_SIZE; i++) {
uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
double Similarity = ((double) IntSimilarity) / 65536.0 / 65536.0;
double evidence = Similarity / kSimilarityCenter;
evidence = 255.0 / (evidence * evidence + 1.0);
if (kSEExponentialMultiplier > 0.0) {
double scale = 1.0 - exp(-kSEExponentialMultiplier) *
exp(kSEExponentialMultiplier * ((double) i / SE_TABLE_SIZE));
evidence *= ClipToRange(scale, 0.0, 1.0);
}
similarity_evidence_table_[i] = (uinT8) (evidence + 0.5);
}
/* Initialize evidence computation variables */
evidence_table_mask_ =
((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/*----------------------------------------------------------------------------
Private Code
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
----------------------------------------------------------------------------*/
void ScratchEvidence::Clear(const INT_CLASS class_template) {
memset(sum_feature_evidence_, 0,
class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
memset(proto_evidence_, 0,
class_template->NumProtos * sizeof(proto_evidence_[0]));
}
void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) {
memset(feature_evidence_, 0,
class_template->NumConfigs * sizeof(feature_evidence_[0]));
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Print debugging information for Configuations
* @return none
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
void IMDebugConfiguration(int FeatureNum,
uinT16 ActualProtoNum,
uinT8 Evidence,
BIT_VECTOR ConfigMask,
uinT32 ConfigWord) {
cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
FeatureNum, (int) ActualProtoNum, (int) Evidence);
while (ConfigWord) {
if (ConfigWord & 1)
cprintf ("1");
else
cprintf ("0");
ConfigWord >>= 1;
}
cprintf ("\n");
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Print debugging information for Configuations
* @return none
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
void IMDebugConfigurationSum(int FeatureNum,
uinT8 *FeatureEvidence,
inT32 ConfigCount) {
cprintf("F=%3d, C=", FeatureNum);
for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
cprintf("%4d", FeatureEvidence[ConfigNum]);
}
cprintf("\n");
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* For the given feature: prune protos, compute evidence,
* update Feature Evidence, Proto Evidence, and Sum of Feature
* Evidence tables.
* @param ClassTemplate Prototypes & tables for a class
* @param FeatureNum Current feature number (for DEBUG only)
* @param Feature Pointer to a feature struct
* @param tables Evidence tables
* @param Debug Debugger flag: 1=debugger on
* @return none
*/
int IntegerMatcher::UpdateTablesForFeature(
INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
int FeatureNum,
const INT_FEATURE_STRUCT* Feature,
ScratchEvidence *tables,
int Debug) {
uinT32 ConfigWord;
uinT32 ProtoWord;
uinT32 ProtoNum;
uinT32 ActualProtoNum;
uinT8 proto_byte;
inT32 proto_word_offset;
inT32 proto_offset;
uinT8 config_byte;
inT32 config_offset;
PROTO_SET ProtoSet;
uinT32 *ProtoPrunerPtr;
INT_PROTO Proto;
int ProtoSetIndex;
uinT8 Evidence;
uinT32 XFeatureAddress;
uinT32 YFeatureAddress;
uinT32 ThetaFeatureAddress;
2016-11-08 02:46:33 +08:00
uinT8* UINT8Pointer;
int ProtoIndex;
uinT8 Temp;
2016-11-08 02:46:33 +08:00
int* IntPointer;
int ConfigNum;
inT32 M3;
inT32 A3;
uinT32 A4;
tables->ClearFeatureEvidence(ClassTemplate);
/* Precompute Feature Address offset for Proto Pruning */
XFeatureAddress = ((Feature->X >> 2) << 1);
YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
for (ProtoSetIndex = 0, ActualProtoNum = 0;
ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
ProtoPrunerPtr = (uinT32 *) ((*ProtoSet).ProtoPruner);
for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
(PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
/* Prune Protos of current Proto Set */
ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
ProtoWord &= *ProtoMask;
if (ProtoWord != 0) {
proto_byte = ProtoWord & 0xff;
ProtoWord >>= 8;
proto_word_offset = 0;
while (ProtoWord != 0 || proto_byte != 0) {
while (proto_byte == 0) {
proto_byte = ProtoWord & 0xff;
ProtoWord >>= 8;
proto_word_offset += 8;
}
proto_offset = offset_table[proto_byte] + proto_word_offset;
proto_byte = next_table[proto_byte];
Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
ConfigWord = Proto->Configs[0];
A3 = (((Proto->A * (Feature->X - 128)) << 1)
- (Proto->B * (Feature->Y - 128)) + (Proto->C << 9));
M3 =
(((inT8) (Feature->Theta - Proto->Angle)) * kIntThetaFudge) << 1;
if (A3 < 0)
A3 = ~A3;
if (M3 < 0)
M3 = ~M3;
A3 >>= mult_trunc_shift_bits_;
M3 >>= mult_trunc_shift_bits_;
2017-09-08 17:13:33 +08:00
if (static_cast<uint32_t>(A3) > evidence_mult_mask_)
A3 = evidence_mult_mask_;
2017-09-08 17:13:33 +08:00
if (static_cast<uint32_t>(M3) > evidence_mult_mask_)
M3 = evidence_mult_mask_;
A4 = (A3 * A3) + (M3 * M3);
A4 >>= table_trunc_shift_bits_;
if (A4 > evidence_table_mask_)
Evidence = 0;
else
Evidence = similarity_evidence_table_[A4];
if (PrintFeatureMatchesOn (Debug))
IMDebugConfiguration (FeatureNum,
ActualProtoNum + proto_offset,
Evidence, ConfigMask, ConfigWord);
ConfigWord &= *ConfigMask;
UINT8Pointer = tables->feature_evidence_ - 8;
config_byte = 0;
while (ConfigWord != 0 || config_byte != 0) {
while (config_byte == 0) {
config_byte = ConfigWord & 0xff;
ConfigWord >>= 8;
UINT8Pointer += 8;
}
config_offset = offset_table[config_byte];
config_byte = next_table[config_byte];
if (Evidence > UINT8Pointer[config_offset])
UINT8Pointer[config_offset] = Evidence;
}
UINT8Pointer =
&(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
for (ProtoIndex =
ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
if (Evidence > *UINT8Pointer) {
Temp = *UINT8Pointer;
*UINT8Pointer = Evidence;
Evidence = Temp;
}
else if (Evidence == 0)
break;
}
}
}
}
}
if (PrintFeatureMatchesOn(Debug)) {
IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
ClassTemplate->NumConfigs);
}
IntPointer = tables->sum_feature_evidence_;
UINT8Pointer = tables->feature_evidence_;
int SumOverConfigs = 0;
for (ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
int evidence = *UINT8Pointer++;
SumOverConfigs += evidence;
*IntPointer++ += evidence;
}
return SumOverConfigs;
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Print debugging information for Configuations
* @return none
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
#ifndef GRAPHICS_DISABLED
void IntegerMatcher::DebugFeatureProtoError(
INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
const ScratchEvidence& tables,
inT16 NumFeatures,
int Debug) {
FLOAT32 ProtoConfigs[MAX_NUM_CONFIGS];
int ConfigNum;
uinT32 ConfigWord;
int ProtoSetIndex;
uinT16 ProtoNum;
uinT8 ProtoWordNum;
PROTO_SET ProtoSet;
uinT16 ActualProtoNum;
if (PrintMatchSummaryOn(Debug)) {
cprintf("Configuration Mask:\n");
for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
cprintf("\n");
cprintf("Feature Error for Configurations:\n");
for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
cprintf(
" %5.1f",
100.0 * (1.0 -
(FLOAT32) tables.sum_feature_evidence_[ConfigNum]
/ NumFeatures / 256.0));
}
cprintf("\n\n\n");
}
if (PrintMatchSummaryOn (Debug)) {
cprintf ("Proto Mask:\n");
for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
ProtoSetIndex++) {
ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
for (ProtoWordNum = 0; ProtoWordNum < 2;
ProtoWordNum++, ProtoMask++) {
ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
for (ProtoNum = 0;
((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
&& (ActualProtoNum < ClassTemplate->NumProtos));
ProtoNum++, ActualProtoNum++)
cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
cprintf ("\n");
}
}
cprintf ("\n");
}
for (int i = 0; i < ClassTemplate->NumConfigs; i++)
ProtoConfigs[i] = 0;
if (PrintProtoMatchesOn (Debug)) {
cprintf ("Proto Evidence:\n");
for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
ProtoSetIndex++) {
ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
for (ProtoNum = 0;
((ProtoNum < PROTOS_PER_PROTO_SET) &&
(ActualProtoNum < ClassTemplate->NumProtos));
ProtoNum++, ActualProtoNum++) {
cprintf ("P %3d =", ActualProtoNum);
int temp = 0;
for (int j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
uinT8 data = tables.proto_evidence_[ActualProtoNum][j];
cprintf(" %d", data);
temp += data;
}
cprintf(" = %6.4f%%\n",
temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
ConfigNum = 0;
while (ConfigWord) {
cprintf ("%5d", ConfigWord & 1 ? temp : 0);
if (ConfigWord & 1)
ProtoConfigs[ConfigNum] += temp;
ConfigNum++;
ConfigWord >>= 1;
}
cprintf("\n");
}
}
}
if (PrintMatchSummaryOn (Debug)) {
cprintf ("Proto Error for Configurations:\n");
for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
cprintf (" %5.1f",
100.0 * (1.0 -
ProtoConfigs[ConfigNum] /
ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
cprintf ("\n\n");
}
if (PrintProtoMatchesOn (Debug)) {
cprintf ("Proto Sum for Configurations:\n");
for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
cprintf ("\n\n");
cprintf ("Proto Length for Configurations:\n");
for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
cprintf (" %4.1f",
(float) ClassTemplate->ConfigLengths[ConfigNum]);
cprintf ("\n\n");
}
}
void IntegerMatcher::DisplayProtoDebugInfo(
INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
const ScratchEvidence& tables,
bool SeparateDebugWindows) {
uinT16 ProtoNum;
uinT16 ActualProtoNum;
PROTO_SET ProtoSet;
int ProtoSetIndex;
InitIntMatchWindowIfReqd();
if (SeparateDebugWindows) {
InitFeatureDisplayWindowIfReqd();
InitProtoDisplayWindowIfReqd();
}
for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
ProtoSetIndex++) {
ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
for (ProtoNum = 0;
((ProtoNum < PROTOS_PER_PROTO_SET) &&
(ActualProtoNum < ClassTemplate->NumProtos));
ProtoNum++, ActualProtoNum++) {
/* Compute Average for Actual Proto */
int temp = 0;
for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
temp += tables.proto_evidence_[ActualProtoNum][i];
temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
}
}
}
}
void IntegerMatcher::DisplayFeatureDebugInfo(
INT_CLASS ClassTemplate,
BIT_VECTOR ProtoMask,
BIT_VECTOR ConfigMask,
inT16 NumFeatures,
const INT_FEATURE_STRUCT* Features,
int AdaptFeatureThreshold,
int Debug,
bool SeparateDebugWindows) {
ScratchEvidence *tables = new ScratchEvidence();
tables->Clear(ClassTemplate);
InitIntMatchWindowIfReqd();
if (SeparateDebugWindows) {
InitFeatureDisplayWindowIfReqd();
InitProtoDisplayWindowIfReqd();
}
for (int Feature = 0; Feature < NumFeatures; Feature++) {
UpdateTablesForFeature(
ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
tables, 0);
/* Find Best Evidence for Current Feature */
int best = 0;
for (int i = 0; i < ClassTemplate->NumConfigs; i++)
if (tables->feature_evidence_[i] > best)
best = tables->feature_evidence_[i];
/* Update display for current feature */
if (ClipMatchEvidenceOn(Debug)) {
if (best < AdaptFeatureThreshold)
DisplayIntFeature(&Features[Feature], 0.0);
else
DisplayIntFeature(&Features[Feature], 1.0);
} else {
DisplayIntFeature(&Features[Feature], best / 255.0);
}
}
delete tables;
}
#endif
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Add sum of Proto Evidences into Sum Of Feature Evidence Array
*/
void ScratchEvidence::UpdateSumOfProtoEvidences(
INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures) {
int *IntPointer;
uinT32 ConfigWord;
int ProtoSetIndex;
uinT16 ProtoNum;
PROTO_SET ProtoSet;
int NumProtos;
uinT16 ActualProtoNum;
NumProtos = ClassTemplate->NumProtos;
for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
ProtoSetIndex++) {
ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
for (ProtoNum = 0;
((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
ProtoNum++, ActualProtoNum++) {
int temp = 0;
for (int i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
temp += proto_evidence_[ActualProtoNum] [i];
ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
ConfigWord &= *ConfigMask;
IntPointer = sum_feature_evidence_;
while (ConfigWord) {
if (ConfigWord & 1)
*IntPointer += temp;
IntPointer++;
ConfigWord >>= 1;
}
}
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Normalize Sum of Proto and Feature Evidence by dividing by the sum of
* the Feature Lengths and the Proto Lengths for each configuration.
*/
void ScratchEvidence::NormalizeSums(
INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features) {
for (int i = 0; i < ClassTemplate->NumConfigs; i++) {
sum_feature_evidence_[i] = (sum_feature_evidence_[i] << 8) /
(NumFeatures + ClassTemplate->ConfigLengths[i]);
}
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Find the best match for the current class and update the Result
* with the configuration and match rating.
* @return The best normalized sum of evidences
* @note Exceptions: none
* @note History: Wed Feb 27 14:12:28 MST 1991, RWM, Created.
*/
int IntegerMatcher::FindBestMatch(
INT_CLASS class_template,
const ScratchEvidence &tables,
UnicharRating* result) {
int best_match = 0;
result->config = 0;
result->fonts.truncate(0);
result->fonts.reserve(class_template->NumConfigs);
/* Find best match */
for (int c = 0; c < class_template->NumConfigs; ++c) {
int rating = tables.sum_feature_evidence_[c];
if (*classify_debug_level_ > 2)
tprintf("Config %d, rating=%d\n", c, rating);
if (rating > best_match) {
result->config = c;
best_match = rating;
}
result->fonts.push_back(ScoredFont(c, rating));
}
// Compute confidence on a Probability scale.
result->rating = best_match / 65536.0f;
return best_match;
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Applies the CN normalization factor to the given rating and returns
* the modified rating.
*/
float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
int normalization_factor,
int matcher_multiplier) {
return (rating * blob_length +
matcher_multiplier * normalization_factor / 256.0) /
(blob_length + matcher_multiplier);
}
Doxygen Squashed commit from https://github.com/tesseract-ocr/tesseract/tree/more-doxygen closes #14 Commits: 6317305 doxygen 9f42f69 doxygen 0fc4d52 doxygen 37b4b55 fix typo bded8f1 some more doxy 020eb00 slight tweak 524666d doxygenify 2a36a3e doxygenify 229d218 doxygenify 7fd28ae doxygenify a8c64bc doxygenify f5d21b6 fix 5d8ede8 doxygenify a58a4e0 language_model.cpp fa85709 lm_pain_points.cpp lm_state.cpp 6418da3 merge 06190ba Merge branch 'old_doxygen_merge' into more-doxygen 84acf08 Merge branch 'master' into more-doxygen 50fe1ff pagewalk.cpp cube_reco_context.cpp 2982583 change to relative 192a24a applybox.cpp, take one 8eeb053 delete docs for obsolete params 52e4c77 modernise classify/ocrfeatures.cpp 2a1cba6 modernise cutil/emalloc.cpp 773e006 silence doxygen warning aeb1731 silence doxygen warning f18387f silence doxygen; new params are unused? 15ad6bd doxygenify cutil/efio.cpp c8b5dad doxygenify cutil/danerror.cpp 784450f the globals and exceptions parts are obsolete; remove 8bca324 doxygen classify/normfeat.cpp 9bcbe16 doxygen classify/normmatch.cpp aa9a971 doxygen ccmain/cube_control.cpp c083ff2 doxygen ccmain/cube_reco_context.cpp f842850 params changed 5c94f12 doxygen ccmain/cubeclassifier.cpp 15ba750 case sensitive f5c71d4 case sensitive f85655b doxygen classify/intproto.cpp 4bbc7aa partial doxygen classify/mfx.cpp dbb6041 partial doxygen classify/intproto.cpp 2aa72db finish doxygen classify/intproto.cpp 0b8de99 doxygen training/mftraining.cpp 0b5b35c partial doxygen ccstruct/coutln.cpp b81c766 partial doxygen ccstruct/coutln.cpp 40fc415 finished? doxygen ccstruct/coutln.cpp 6e4165c doxygen classify/clusttool.cpp 0267dec doxygen classify/cutoffs.cpp 7f0c70c doxygen classify/fpoint.cpp 512f3bd ignore ~ files 5668a52 doxygen classify/intmatcher.cpp 84788d4 doxygen classify/kdtree.cpp 29f36ca doxygen classify/mfoutline.cpp 40b94b1 silence doxygen warnings 6c511b9 doxygen classify/mfx.cpp f9b4080 doxygen classify/outfeat.cpp aa1df05 doxygen classify/picofeat.cpp cc5f466 doxygen training/cntraining.cpp cce044f doxygen training/commontraining.cpp 167e216 missing param 9498383 renamed params 37eeac2 renamed param d87b5dd case c8ee174 renamed params b858db8 typo 4c2a838 h2 context? 81a2c0c fix some param names; add some missing params, no docs bcf8a4c add some missing params, no docs af77f86 add some missing params, no docs; fix some param names 01df24e fix some params 6161056 fix some params 68508b6 fix some params 285aeb6 doxygen complains here no matter what 529bcfa rm some missing params, typos cd21226 rm some missing params, add some new ones 48a4bc2 fix params c844628 missing param 312ce37 missing param; rename one ec2fdec missing param 05e15e0 missing params d515858 change "<" to &lt; to make doxygen happy b476a28 wrong place
2014-09-13 04:41:19 +08:00
/**
* Sort Key array in ascending order using heap sort
* algorithm. Also sort Index array that is tied to
* the key array.
* @param n Number of elements to sort
* @param ra Key array [1..n]
* @param rb Index array [1..n]
* @return none
* @note Exceptions: none
* @note History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
*/
void
HeapSort (int n, register int ra[], register int rb[]) {
int i, rra, rrb;
int l, j, ir;
l = (n >> 1) + 1;
ir = n;
for (;;) {
if (l > 1) {
rra = ra[--l];
rrb = rb[l];
}
else {
rra = ra[ir];
rrb = rb[ir];
ra[ir] = ra[1];
rb[ir] = rb[1];
if (--ir == 1) {
ra[1] = rra;
rb[1] = rrb;
return;
}
}
i = l;
j = l << 1;
while (j <= ir) {
if (j < ir && ra[j] < ra[j + 1])
++j;
if (rra < ra[j]) {
ra[i] = ra[j];
rb[i] = rb[j];
j += (i = j);
}
else
j = ir + 1;
}
ra[i] = rra;
rb[i] = rrb;
}
}