Refactored classifier to make it easier to add new ones and generalized feature extractor to allow fx from grey

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@873 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2013-09-23 15:15:06 +00:00
parent 2aafc9df24
commit 99edf4ccbd
48 changed files with 2192 additions and 1797 deletions

View File

@ -9,7 +9,7 @@ AM_CPPFLAGS += -DTESS_EXPORTS \
endif
noinst_HEADERS = \
adaptive.h baseline.h blobclass.h chartoname.h \
adaptive.h blobclass.h chartoname.h \
classify.h cluster.h clusttool.h cutoffs.h \
errorcounter.h extern.h extract.h \
featdefs.h flexfx.h float2int.h fpoint.h fxdefs.h \
@ -19,7 +19,7 @@ noinst_HEADERS = \
normfeat.h normmatch.h \
ocrfeatures.h outfeat.h picofeat.h protos.h \
sampleiterator.h shapeclassifier.h shapetable.h \
speckle.h tessclassifier.h trainingsample.h trainingsampleset.h xform2d.h
tessclassifier.h trainingsample.h trainingsampleset.h xform2d.h
if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_classify.la
@ -45,7 +45,7 @@ libtesseract_classify_la_SOURCES = \
mastertrainer.cpp mf.cpp mfdefs.cpp mfoutline.cpp mfx.cpp \
normfeat.cpp normmatch.cpp \
ocrfeatures.cpp outfeat.cpp picofeat.cpp protos.cpp \
sampleiterator.cpp shapetable.cpp speckle.cpp \
sampleiterator.cpp shapeclassifier.cpp shapetable.cpp \
tessclassifier.cpp trainingsample.cpp trainingsampleset.cpp xform2d.cpp

File diff suppressed because it is too large Load Diff

View File

@ -1,41 +0,0 @@
/* -*-C-*-
********************************************************************************
*
* File: baseline.h (Formerly baseline.h)
* Description:
* Author: Mark Seaman, SW Productivity
* Created: Fri Oct 16 14:37:00 1987
* Modified: Wed Feb 27 13:39:35 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Reusable Software Component
*
* (c) Copyright 1987, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
*************************************************************************/
#ifndef BASELINE_H
#define BASELINE_H
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#include "host.h"
#include "blobs.h"
#include "params.h"
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
#define BASELINE_OFFSET 64
#define BASELINE_SCALE 128
#endif

View File

@ -49,8 +49,11 @@ extern char imagefile[];
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
// As all TBLOBs, Blob is in baseline normalized coords.
// See SetupBLCNDenorms in intfx.cpp for other args.
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
TBLOB * Blob, const DENORM& denorm, const char* BlobText) {
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info, const char* BlobText) {
/*
** Parameters:
** Blob blob whose micro-features are to be learned
@ -95,18 +98,20 @@ void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
cprintf("TRAINING ... Font name = %s\n", CurrFontName.string());
}
LearnBlob(FeatureDefs, FeatureFile, Blob, denorm, BlobText,
CurrFontName.string());
LearnBlob(FeatureDefs, FeatureFile, Blob, bl_denorm, cn_denorm, fx_info,
BlobText, CurrFontName.string());
} // LearnBlob
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* FeatureFile,
TBLOB* Blob, const DENORM& denorm,
TBLOB* Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* BlobText, const char* FontName) {
CHAR_DESC CharDesc;
ASSERT_HOST(FeatureFile != NULL);
CharDesc = ExtractBlobFeatures(FeatureDefs, denorm, Blob);
CharDesc = ExtractBlobFeatures(FeatureDefs, bl_denorm, cn_denorm, fx_info,
Blob);
if (CharDesc == NULL) {
cprintf("LearnBLob: CharDesc was NULL. Aborting.\n");
return;

View File

@ -40,11 +40,14 @@
Public Function Prototypes
----------------------------------------------------------------------------**/
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING& filename,
TBLOB * Blob, const DENORM& denorm, const char* BlobText);
TBLOB * Blob, const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* BlobText);
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE* File, TBLOB* Blob,
const DENORM& denorm, const char* BlobText,
const char* FontName);
const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
const char* BlobText, const char* FontName);
/**----------------------------------------------------------------------------
Global Data Definitions and Declarations

View File

@ -26,6 +26,7 @@
#include "intproto.h"
#include "mfoutline.h"
#include "scrollview.h"
#include "shapeclassifier.h"
#include "shapetable.h"
#include "unicity_table.h"
#include <string.h>
@ -52,6 +53,11 @@ Classify::Classify()
this->params()), /* PREV DEFAULT 0.1 */
double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...",
this->params()), /* PREV DEFAULT 0.3 */
double_MEMBER(classify_max_rating_ratio, 1.5,
"Veto ratio between classifier ratings", this->params()),
double_MEMBER(classify_max_certainty_margin, 5.5,
"Veto difference between classifier certainties",
this->params()),
BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
this->params()),
BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
@ -65,6 +71,8 @@ Classify::Classify()
"Save adapted templates to a file", this->params()),
BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
this->params()),
BOOL_MEMBER(classify_nonlinear_norm, 0,
"Non-linear stroke-density normalization", this->params()),
INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
@ -100,6 +108,12 @@ Classify::Classify()
this->params()),
double_MEMBER(tessedit_class_miss_scale, 0.00390625,
"Scale factor for features not used", this->params()),
double_MEMBER(classify_adapted_pruning_factor, 2.5,
"Prune poor adapted results this much worse than best result",
this->params()),
double_MEMBER(classify_adapted_pruning_threshold, -1.0,
"Threshold at which classify_adapted_pruning_factor starts",
this->params()),
INT_MEMBER(classify_adapt_proto_threshold, 230,
"Threshold for good protos during adaptive 0-255",
this->params()),
@ -122,19 +136,24 @@ Classify::Classify()
this->params()),
INT_MEMBER(classify_class_pruner_threshold, 229,
"Class Pruner Threshold 0-255", this->params()),
INT_MEMBER(classify_class_pruner_multiplier, 30,
INT_MEMBER(classify_class_pruner_multiplier, 15,
"Class Pruner Multiplier 0-255: ", this->params()),
INT_MEMBER(classify_cp_cutoff_strength, 7,
"Class Pruner CutoffStrength: ", this->params()),
INT_MEMBER(classify_integer_matcher_multiplier, 14,
INT_MEMBER(classify_integer_matcher_multiplier, 10,
"Integer Matcher Multiplier 0-255: ", this->params()),
EnableLearning(true),
INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
this->params()),
BOOL_MEMBER(classify_bln_numeric_mode, 0,
"Assume the input is numbers [0-9].", this->params()),
double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
this->params()),
double_MEMBER(speckle_rating_penalty, 10.0,
"Penalty to add to worst rating for noise", this->params()),
shape_table_(NULL),
dict_(&image_) {
dict_(&image_),
static_classifier_(NULL) {
fontinfo_table_.set_compare_callback(
NewPermanentTessCallback(CompareFontInfo));
fontinfo_table_.set_clear_callback(
@ -184,4 +203,45 @@ Classify::~Classify() {
delete[] BaselineCutoffs;
}
// Takes ownership of the given classifier, and uses it for future calls
// to CharNormClassifier.
void Classify::SetStaticClassifier(ShapeClassifier* static_classifier) {
delete static_classifier_;
static_classifier_ = static_classifier;
}
// Moved from speckle.cpp
// Adds a noise classification result that is a bit worse than the worst
// current result, or the worst possible result if no current results.
void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
BLOB_CHOICE_IT bc_it(choices);
// If there is no classifier result, we will use the worst possible certainty
// and corresponding rating.
float certainty = -getDict().certainty_scale;
float rating = rating_scale * blob_length;
if (!choices->empty() && blob_length > 0) {
bc_it.move_to_last();
BLOB_CHOICE* worst_choice = bc_it.data();
// Add speckle_rating_penalty to worst rating, matching old value.
rating = worst_choice->rating() + speckle_rating_penalty;
// Compute the rating to correspond to the certainty. (Used to be kept
// the same, but that messes up the language model search.)
certainty = -rating * getDict().certainty_scale /
(rating_scale * blob_length);
}
BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
-1, -1, 0, 0, MAX_FLOAT32, 0,
BCC_SPECKLE_CLASSIFIER);
bc_it.add_to_end(blob_choice);
}
// Returns true if the blob is small enough to be a large speckle.
bool Classify::LargeSpeckle(const TBLOB &blob) {
double speckle_size = kBlnXHeight * speckle_large_max_size;
TBOX bbox = blob.bounding_box();
return bbox.width() < speckle_size && bbox.height() < speckle_size;
}
} // namespace tesseract

View File

@ -43,8 +43,10 @@ static const int kBlankFontinfoId = -2;
namespace tesseract {
class ShapeClassifier;
struct ShapeRating;
class ShapeTable;
struct UnicharRating;
// How segmented is a blob. In this enum, character refers to a classifiable
// unit, but that is too long and character is usually easier to understand.
@ -67,6 +69,17 @@ class Classify : public CCStruct {
return shape_table_;
}
// Takes ownership of the given classifier, and uses it for future calls
// to CharNormClassifier.
void SetStaticClassifier(ShapeClassifier* static_classifier);
// Adds a noise classification result that is a bit worse than the worst
// current result, or the worst possible result if no current results.
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices);
// Returns true if the blob is small enough to be a large speckle.
bool LargeSpeckle(const TBLOB &blob);
/* adaptive.cpp ************************************************************/
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset);
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId);
@ -112,9 +125,7 @@ class Classify : public CCStruct {
// incorrectly segmented blobs. If filename is not NULL, then LearnBlob
// is called and the data will be written to a file for static training.
// Otherwise AdaptToBlob is called for adaption within a document.
// If rejmap is not NULL, then only chars with a rejmap entry of '1' will
// be learned, otherwise all chars with good correct_text are learned.
void LearnWord(const char* filename, const char *rejmap, WERD_RES *word);
void LearnWord(const char* filename, WERD_RES *word);
// Builds a blob of length fragments, from the word, starting at start,
// and then learn it, as having the given correct_text.
@ -130,18 +141,15 @@ class Classify : public CCStruct {
const char* correct_text, WERD_RES *word);
void InitAdaptiveClassifier(bool load_pre_trained_templates);
void InitAdaptedClass(TBLOB *Blob,
const DENORM& denorm,
CLASS_ID ClassId,
int FontinfoId,
ADAPT_CLASS Class,
ADAPT_TEMPLATES Templates);
void AdaptToPunc(TBLOB *Blob,
const DENORM& denorm,
CLASS_ID ClassId,
int FontinfoId,
FLOAT32 Threshold);
void AmbigClassifier(TBLOB *Blob,
const DENORM& denorm,
INT_TEMPLATES Templates,
ADAPT_CLASS *Classes,
UNICHAR_ID *Ambiguities,
@ -194,15 +202,8 @@ class Classify : public CCStruct {
#ifndef GRAPHICS_DISABLED
void DebugAdaptiveClassifier(TBLOB *Blob,
const DENORM& denorm,
ADAPT_RESULTS *Results);
#endif
void GetAdaptThresholds (TWERD * Word,
const DENORM& denorm,
const WERD_CHOICE& BestChoice,
const WERD_CHOICE& BestRawChoice,
FLOAT32 Thresholds[]);
PROTO_ID MakeNewTempProtos(FEATURE_SET Features,
int NumBadFeat,
FEATURE_ID BadFeat[],
@ -218,19 +219,14 @@ class Classify : public CCStruct {
void MakePermanent(ADAPT_TEMPLATES Templates,
CLASS_ID ClassId,
int ConfigId,
const DENORM& denorm,
TBLOB *Blob);
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results);
void RemoveExtraPuncs(ADAPT_RESULTS *Results);
void RemoveBadMatches(ADAPT_RESULTS *Results);
void SetAdaptiveThreshold(FLOAT32 Threshold);
void ShowBestMatchFor(TBLOB *Blob,
const DENORM& denorm,
CLASS_ID ClassId,
int shape_id,
BOOL8 AdaptiveOn,
BOOL8 PreTrainedOn,
ADAPT_RESULTS *Results);
void ShowBestMatchFor(int shape_id,
const INT_FEATURE_STRUCT* features,
int num_features);
// Returns a string for the classifier class_id: either the corresponding
// unicharset debug_str or the shape_table_ debug str.
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT* templates,
@ -251,59 +247,46 @@ class Classify : public CCStruct {
// unichar-id!). Uses a search, so not fast.
int ShapeIDToClassID(int shape_id) const;
UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
const DENORM& denorm,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results);
int CharNormClassifier(TBLOB *Blob,
const DENORM& denorm,
INT_TEMPLATES Templates,
ADAPT_RESULTS *Results);
// As CharNormClassifier, but operates on a TrainingSample and outputs to
// a GenericVector of ShapeRating without conversion to classes.
int CharNormTrainingSample(bool pruner_only, const TrainingSample& sample,
GenericVector<ShapeRating>* results);
UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
const DENORM& denorm,
CLASS_ID CorrectClass);
void DoAdaptiveMatch(TBLOB *Blob,
const DENORM& denorm,
ADAPT_RESULTS *Results);
int CharNormTrainingSample(bool pruner_only, int keep_this,
const TrainingSample& sample,
GenericVector<UnicharRating>* results);
UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass);
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results);
void AdaptToChar(TBLOB *Blob,
const DENORM& denorm,
CLASS_ID ClassId,
int FontinfoId,
FLOAT32 Threshold);
void DisplayAdaptedChar(TBLOB* blob, const DENORM& denorm,
INT_CLASS_STRUCT* int_class);
int AdaptableWord(TWERD *Word,
const WERD_CHOICE &BestChoiceWord,
const WERD_CHOICE &RawChoiceWord);
void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
bool AdaptableWord(WERD_RES* word);
void EndAdaptiveClassifier();
void PrintAdaptiveStatistics(FILE *File);
void SettupPass1();
void SettupPass2();
void AdaptiveClassifier(TBLOB *Blob,
const DENORM& denorm,
BLOB_CHOICE_LIST *Choices,
CLASS_PRUNER_RESULTS cp_results);
void ClassifyAsNoise(ADAPT_RESULTS *Results);
void ResetAdaptiveClassifierInternal();
int GetBaselineFeatures(TBLOB *Blob,
const DENORM& denorm,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* CharNormArray,
inT32 *BlobLength);
int GetCharNormFeatures(TBLOB *Blob,
const DENORM& denorm,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
uinT8* PrunerNormArray,
uinT8* CharNormArray,
inT32 *BlobLength,
inT32 *FeatureOutlineIndex);
inT32 *BlobLength);
// Computes the char_norm_array for the unicharset and, if not NULL, the
// pruner_array as appropriate according to the existence of the shape_table.
// The norm_feature is deleted as it is almost certainly no longer needed.
@ -313,13 +296,54 @@ class Classify : public CCStruct {
uinT8* pruner_array);
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM& denorm, TBLOB *Blob);
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
void ResetFeaturesHaveBeenExtracted();
bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
bool LooksLikeGarbage(const DENORM& denorm, TBLOB *blob);
bool LooksLikeGarbage(TBLOB *blob);
void RefreshDebugWindow(ScrollView **win, const char *msg,
int y_offset, const TBOX &wbox);
// intfx.cpp
// Computes the DENORMS for bl(baseline) and cn(character) normalization
// during feature extraction. The input denorm describes the current state
// of the blob, which is usually a baseline-normalized word.
// The Transforms setup are as follows:
// Baseline Normalized (bl) Output:
// We center the grapheme by aligning the x-coordinate of its centroid with
// x=128 and leaving the already-baseline-normalized y as-is.
//
// Character Normalized (cn) Output:
// We align the grapheme's centroid at the origin and scale it
// asymmetrically in x and y so that the 2nd moments are a standard value
// (51.2) ie the result is vaguely square.
// If classify_nonlinear_norm is true:
// A non-linear normalization is setup that attempts to evenly distribute
// edges across x and y.
//
// Some of the fields of fx_info are also setup:
// Length: Total length of outline.
// Rx: Rounded y second moment. (Reversed by convention.)
// Ry: rounded x second moment.
// Xmean: Rounded x center of mass of the blob.
// Ymean: Rounded y center of mass of the blob.
static void SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
DENORM* bl_denorm, DENORM* cn_denorm,
INT_FX_RESULT_STRUCT* fx_info);
// Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as
// (x,y) position and angle as measured counterclockwise from the vector
// <-1, 0>, from blob using two normalizations defined by bl_denorm and
// cn_denorm. See SetpuBLCNDenorms for definitions.
// If outline_cn_counts is not NULL, on return it contains the cumulative
// number of cn features generated for each outline in the blob (in order).
// Thus after the first outline, there were (*outline_cn_counts)[0] features,
// after the second outline, there were (*outline_cn_counts)[1] features etc.
static void ExtractFeatures(const TBLOB& blob,
bool nonlinear_norm,
GenericVector<INT_FEATURE_STRUCT>* bl_features,
GenericVector<INT_FEATURE_STRUCT>* cn_features,
INT_FX_RESULT_STRUCT* results,
GenericVector<int>* outline_cn_counts);
/* float2int.cpp ************************************************************/
void ClearCharNormArray(uinT8* char_norm_array);
void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
@ -336,6 +360,9 @@ class Classify : public CCStruct {
UnicityTable<FontInfo>& get_fontinfo_table() {
return fontinfo_table_;
}
const UnicityTable<FontInfo>& get_fontinfo_table() const {
return fontinfo_table_;
}
UnicityTable<FontSet>& get_fontset_table() {
return fontset_table_;
}
@ -365,6 +392,10 @@ class Classify : public CCStruct {
double_VAR_H(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...");
double_VAR_H(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...");
double_VAR_H(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...");
double_VAR_H(classify_max_rating_ratio, 1.5,
"Veto ratio between classifier ratings");
double_VAR_H(classify_max_certainty_margin, 5.5,
"Veto difference between classifier certainties");
/* adaptmatch.cpp ***********************************************************/
BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching");
@ -375,6 +406,8 @@ class Classify : public CCStruct {
BOOL_VAR_H(classify_save_adapted_templates, 0,
"Save adapted templates to a file");
BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger");
BOOL_VAR_H(classify_nonlinear_norm, 0,
"Non-linear stroke-density normalization");
INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level");
INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags");
INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: ");
@ -398,6 +431,10 @@ class Classify : public CCStruct {
double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor");
double_VAR_H(tessedit_class_miss_scale, 0.00390625,
"Scale factor for features not used");
double_VAR_H(classify_adapted_pruning_factor, 2.5,
"Prune poor adapted results this much worse than best result");
double_VAR_H(classify_adapted_pruning_threshold, -1.0,
"Threshold at which classify_adapted_pruning_factor starts");
INT_VAR_H(classify_adapt_proto_threshold, 230,
"Threshold for good protos during adaptive 0-255");
INT_VAR_H(classify_adapt_feature_threshold, 230,
@ -418,11 +455,11 @@ class Classify : public CCStruct {
/* intmatcher.cpp **********************************************************/
INT_VAR_H(classify_class_pruner_threshold, 229,
"Class Pruner Threshold 0-255");
INT_VAR_H(classify_class_pruner_multiplier, 30,
INT_VAR_H(classify_class_pruner_multiplier, 15,
"Class Pruner Multiplier 0-255: ");
INT_VAR_H(classify_cp_cutoff_strength, 7,
"Class Pruner CutoffStrength: ");
INT_VAR_H(classify_integer_matcher_multiplier, 14,
INT_VAR_H(classify_integer_matcher_multiplier, 10,
"Integer Matcher Multiplier 0-255: ");
// Use class variables to hold onto built-in templates and adapted templates.
@ -453,6 +490,9 @@ class Classify : public CCStruct {
INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word");
BOOL_VAR_H(classify_bln_numeric_mode, 0,
"Assume the input is numbers [0-9].");
double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size");
double_VAR_H(speckle_rating_penalty, 10.0,
"Penalty to add to worst rating for noise");
protected:
IntegerMatcher im_;
@ -466,6 +506,8 @@ class Classify : public CCStruct {
private:
Dict dict_;
// The currently active static classifier.
ShapeClassifier* static_classifier_;
/* variables used to hold performance statistics */
int AdaptiveMatcherCalls;

View File

@ -15,11 +15,12 @@
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
#include "oldheap.h"
#include "const.h"
#include "cluster.h"
#include "emalloc.h"
#include "genericheap.h"
#include "helpers.h"
#include "kdpair.h"
#include "matrix.h"
#include "tprintf.h"
#include "danerror.h"
@ -164,6 +165,9 @@ struct TEMPCLUSTER {
CLUSTER *Neighbor;
};
typedef tesseract::KDPairInc<float, TEMPCLUSTER*> ClusterPair;
typedef tesseract::GenericHeap<ClusterPair> ClusterHeap;
struct STATISTICS {
FLOAT32 AvgVariance;
FLOAT32 *CoVariance;
@ -190,7 +194,7 @@ struct CHISTRUCT{
// For use with KDWalk / MakePotentialClusters
struct ClusteringContext {
HEAP *heap; // heap used to hold temp clusters, "best" on top
ClusterHeap *heap; // heap used to hold temp clusters, "best" on top
TEMPCLUSTER *candidates; // array of potential clusters
KDTREE *tree; // kd-tree to be searched for neighbors
inT32 next; // next candidate to be used
@ -693,7 +697,7 @@ History: 5/29/89, DSJ, Created.
******************************************************************************/
void CreateClusterTree(CLUSTERER *Clusterer) {
ClusteringContext context;
HEAPENTRY HeapEntry;
ClusterPair HeapEntry;
TEMPCLUSTER *PotentialCluster;
// each sample and its nearest neighbor form a "potential" cluster
@ -702,12 +706,12 @@ void CreateClusterTree(CLUSTERER *Clusterer) {
context.candidates = (TEMPCLUSTER *)
Emalloc(Clusterer->NumberOfSamples * sizeof(TEMPCLUSTER));
context.next = 0;
context.heap = MakeHeap(Clusterer->NumberOfSamples);
context.heap = new ClusterHeap(Clusterer->NumberOfSamples);
KDWalk(context.tree, (void_proc)MakePotentialClusters, &context);
// form potential clusters into actual clusters - always do "best" first
while (GetTopOfHeap(context.heap, &HeapEntry) != EMPTY) {
PotentialCluster = (TEMPCLUSTER *)HeapEntry.Data;
while (context.heap->Pop(&HeapEntry)) {
PotentialCluster = HeapEntry.data;
// if main cluster of potential cluster is already in another cluster
// then we don't need to worry about it
@ -720,9 +724,9 @@ void CreateClusterTree(CLUSTERER *Clusterer) {
else if (PotentialCluster->Neighbor->Clustered) {
PotentialCluster->Neighbor =
FindNearestNeighbor(context.tree, PotentialCluster->Cluster,
&HeapEntry.Key);
&HeapEntry.key);
if (PotentialCluster->Neighbor != NULL) {
HeapStore(context.heap, &HeapEntry);
context.heap->Push(&HeapEntry);
}
}
@ -732,9 +736,9 @@ void CreateClusterTree(CLUSTERER *Clusterer) {
MakeNewCluster(Clusterer, PotentialCluster);
PotentialCluster->Neighbor =
FindNearestNeighbor(context.tree, PotentialCluster->Cluster,
&HeapEntry.Key);
&HeapEntry.key);
if (PotentialCluster->Neighbor != NULL) {
HeapStore(context.heap, &HeapEntry);
context.heap->Push(&HeapEntry);
}
}
}
@ -745,7 +749,7 @@ void CreateClusterTree(CLUSTERER *Clusterer) {
// free up the memory used by the K-D tree, heap, and temp clusters
FreeKDTree(context.tree);
Clusterer->KDTree = NULL;
FreeHeap(context.heap);
delete context.heap;
memfree(context.candidates);
} // CreateClusterTree
@ -763,16 +767,16 @@ void CreateClusterTree(CLUSTERER *Clusterer) {
******************************************************************************/
void MakePotentialClusters(ClusteringContext *context,
CLUSTER *Cluster, inT32 Level) {
HEAPENTRY HeapEntry;
ClusterPair HeapEntry;
int next = context->next;
context->candidates[next].Cluster = Cluster;
HeapEntry.Data = (char *) &(context->candidates[next]);
HeapEntry.data = &(context->candidates[next]);
context->candidates[next].Neighbor =
FindNearestNeighbor(context->tree,
context->candidates[next].Cluster,
&HeapEntry.Key);
&HeapEntry.key);
if (context->candidates[next].Neighbor != NULL) {
HeapStore(context->heap, &HeapEntry);
context->heap->Push(&HeapEntry);
context->next++;
}
} // MakePotentialClusters

View File

@ -27,6 +27,9 @@
namespace tesseract {
// Difference in result rating to be thought of as an "equal" choice.
const double kRatingEpsilon = 1.0 / 32;
// Tests a classifier, computing its error rate.
// See errorcounter.h for description of arguments.
// Iterates over the samples, calling the classifier in normal/silent mode.
@ -35,14 +38,12 @@ namespace tesseract {
// with a debug flag and a keep_this argument to find out what is going on.
double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier,
int report_level, CountTypes boosting_mode,
const UnicityTable<FontInfo>& fontinfo_table,
const FontInfoTable& fontinfo_table,
const GenericVector<Pix*>& page_images, SampleIterator* it,
double* unichar_error, double* scaled_error, STRING* fonts_report) {
int charsetsize = it->shape_table()->unicharset().size();
int shapesize = it->CompactCharsetSize();
int fontsize = it->sample_set()->NumFonts();
ErrorCounter counter(charsetsize, shapesize, fontsize);
GenericVector<ShapeRating> results;
ErrorCounter counter(classifier->GetUnicharset(), fontsize);
GenericVector<UnicharRating> results;
clock_t start = clock();
int total_samples = 0;
@ -56,21 +57,28 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier,
Pix* page_pix = 0 <= page_index && page_index < page_images.size()
? page_images[page_index] : NULL;
// No debug, no keep this.
classifier->ClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID,
&results);
if (mutable_sample->class_id() == 0) {
classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
INVALID_UNICHAR_ID, &results);
bool debug_it = false;
int correct_id = mutable_sample->class_id();
if (counter.unicharset_.has_special_codes() &&
(correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
correct_id == UNICHAR_BROKEN)) {
// This is junk so use the special counter.
counter.AccumulateJunk(*it->shape_table(), results, mutable_sample);
} else if (counter.AccumulateErrors(report_level > 3, boosting_mode,
fontinfo_table, *it->shape_table(),
results, mutable_sample) &&
error_samples > 0) {
debug_it = counter.AccumulateJunk(report_level > 3,
results,
mutable_sample);
} else {
debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode,
fontinfo_table,
results, mutable_sample);
}
if (debug_it && error_samples > 0) {
// Running debug, keep the correct answer, and debug the classifier.
tprintf("Error on sample %d: Classifier debug output:\n",
it->GlobalSampleIndex());
int keep_this = it->GetSparseClassID();
classifier->ClassifySample(*mutable_sample, page_pix, 1, keep_this,
&results);
tprintf("Error on sample %d: %s Classifier debug output:\n",
it->GlobalSampleIndex(),
it->sample_set()->SampleToString(*mutable_sample).string());
classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
--error_samples;
}
++total_samples;
@ -89,12 +97,70 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier* classifier,
return unscaled_error;
}
// Tests a pair of classifiers, debugging errors of the new against the old.
// See errorcounter.h for description of arguments.
// Iterates over the samples, calling the classifiers in normal/silent mode.
// If the new_classifier makes a boosting_mode error that the old_classifier
// does not, it will then call the new_classifier again with a debug flag
// and a keep_this argument to find out what is going on.
void ErrorCounter::DebugNewErrors(
ShapeClassifier* new_classifier, ShapeClassifier* old_classifier,
CountTypes boosting_mode,
const FontInfoTable& fontinfo_table,
const GenericVector<Pix*>& page_images, SampleIterator* it) {
int fontsize = it->sample_set()->NumFonts();
ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
GenericVector<UnicharRating> results;
int total_samples = 0;
int error_samples = 25;
int total_new_errors = 0;
// Iterate over all the samples, accumulating errors.
for (it->Begin(); !it->AtEnd(); it->Next()) {
TrainingSample* mutable_sample = it->MutableSample();
int page_index = mutable_sample->page_num();
Pix* page_pix = 0 <= page_index && page_index < page_images.size()
? page_images[page_index] : NULL;
// No debug, no keep this.
old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
INVALID_UNICHAR_ID, &results);
int correct_id = mutable_sample->class_id();
if (correct_id != 0 &&
!old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
results, mutable_sample)) {
// old classifier was correct, check the new one.
new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
INVALID_UNICHAR_ID, &results);
if (correct_id != 0 &&
new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
results, mutable_sample)) {
tprintf("New Error on sample %d: Classifier debug output:\n",
it->GlobalSampleIndex());
++total_new_errors;
new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1,
correct_id, &results);
if (results.size() > 0 && error_samples > 0) {
new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
--error_samples;
}
}
}
++total_samples;
}
tprintf("Total new errors = %d\n", total_new_errors);
}
// Constructor is private. Only anticipated use of ErrorCounter is via
// the static ComputeErrorRate.
ErrorCounter::ErrorCounter(int charsetsize, int shapesize, int fontsize)
: scaled_error_(0.0), unichar_counts_(charsetsize, shapesize, 0) {
ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize)
: scaled_error_(0.0), rating_epsilon_(kRatingEpsilon),
unichar_counts_(unicharset.size(), unicharset.size(), 0),
ok_score_hist_(0, 101), bad_score_hist_(0, 101),
unicharset_(unicharset) {
Counts empty_counts;
font_counts_.init_to_size(fontsize, empty_counts);
multi_unichar_counts_.init_to_size(unicharset.size(), 0);
}
ErrorCounter::~ErrorCounter() {
}
@ -107,13 +173,11 @@ ErrorCounter::~ErrorCounter() {
// for error counting and shape_table is used to understand the relationship
// between unichar_ids and shape_ids in the results
bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode,
const UnicityTable<FontInfo>& font_table,
const ShapeTable& shape_table,
const GenericVector<ShapeRating>& results,
const FontInfoTable& font_table,
const GenericVector<UnicharRating>& results,
TrainingSample* sample) {
int num_results = results.size();
int res_index = 0;
bool debug_it = false;
int answer_actual_rank = -1;
int font_id = sample->font_id();
int unichar_id = sample->class_id();
sample->set_is_error(false);
@ -123,107 +187,143 @@ bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode,
// improve the classifier.
sample->set_is_error(true);
++font_counts_[font_id].n[CT_REJECT];
} else if (shape_table.GetShape(results[0].shape_id).
ContainsUnicharAndFont(unichar_id, font_id)) {
++font_counts_[font_id].n[CT_SHAPE_TOP_CORRECT];
// Unichar and font OK, but count if multiple unichars.
if (shape_table.GetShape(results[0].shape_id).size() > 1)
++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR];
} else {
// This is a top shape error.
++font_counts_[font_id].n[CT_SHAPE_TOP_ERR];
// Check to see if any font in the top choice has attributes that match.
bool attributes_match = false;
uinT32 font_props = font_table.get(font_id).properties;
const Shape& shape = shape_table.GetShape(results[0].shape_id);
for (int c = 0; c < shape.size() && !attributes_match; ++c) {
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
if (font_table.get(shape[c].font_ids[f]).properties == font_props) {
attributes_match = true;
break;
}
// Find rank of correct unichar answer, using rating_epsilon_ to allow
// different answers to score as equal. (Ignoring the font.)
int epsilon_rank = 0;
int answer_epsilon_rank = -1;
int num_top_answers = 0;
double prev_rating = results[0].rating;
bool joined = false;
bool broken = false;
int res_index = 0;
while (res_index < num_results) {
if (results[res_index].rating < prev_rating - rating_epsilon_) {
++epsilon_rank;
prev_rating = results[res_index].rating;
}
}
// TODO(rays) It is easy to add counters for individual font attributes
// here if we want them.
if (!attributes_match)
++font_counts_[font_id].n[CT_FONT_ATTR_ERR];
if (boosting_mode == CT_SHAPE_TOP_ERR) sample->set_is_error(true);
// Find rank of correct unichar answer. (Ignoring the font.)
while (res_index < num_results &&
!shape_table.GetShape(results[res_index].shape_id).
ContainsUnichar(unichar_id)) {
if (results[res_index].unichar_id == unichar_id &&
answer_epsilon_rank < 0) {
answer_epsilon_rank = epsilon_rank;
answer_actual_rank = res_index;
}
if (results[res_index].unichar_id == UNICHAR_JOINED &&
unicharset_.has_special_codes())
joined = true;
else if (results[res_index].unichar_id == UNICHAR_BROKEN &&
unicharset_.has_special_codes())
broken = true;
else if (epsilon_rank == 0)
++num_top_answers;
++res_index;
}
if (res_index == 0) {
if (answer_actual_rank != 0) {
// Correct result is not absolute top.
++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR];
if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) sample->set_is_error(true);
}
if (answer_epsilon_rank == 0) {
++font_counts_[font_id].n[CT_UNICHAR_TOP_OK];
// Unichar OK, but count if multiple unichars.
if (shape_table.GetShape(results[res_index].shape_id).size() > 1) {
if (num_top_answers > 1) {
++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR];
++multi_unichar_counts_[unichar_id];
}
// Check to see if any font in the top choice has attributes that match.
// TODO(rays) It is easy to add counters for individual font attributes
// here if we want them.
if (font_table.SetContainsFontProperties(
font_id, results[answer_actual_rank].fonts)) {
// Font attributes were matched.
// Check for multiple properties.
if (font_table.SetContainsMultipleFontProperties(
results[answer_actual_rank].fonts))
++font_counts_[font_id].n[CT_OK_MULTI_FONT];
} else {
// Font attributes weren't matched.
++font_counts_[font_id].n[CT_FONT_ATTR_ERR];
}
} else {
// Count maps from unichar id to shape id.
if (num_results > 0)
++unichar_counts_(unichar_id, results[0].shape_id);
// This is a unichar error.
// This is a top unichar error.
++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR];
if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true);
if (res_index >= MIN(2, num_results)) {
// Count maps from unichar id to wrong unichar id.
++unichar_counts_(unichar_id, results[0].unichar_id);
if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) {
// It is also a 2nd choice unichar error.
++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR];
if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true);
}
if (res_index >= num_results) {
if (answer_epsilon_rank < 0) {
// It is also a top-n choice unichar error.
++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR];
if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true);
debug_it = debug;
answer_epsilon_rank = epsilon_rank;
}
}
// Compute mean number of return values and mean rank of correct answer.
font_counts_[font_id].n[CT_NUM_RESULTS] += num_results;
font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank;
if (joined)
++font_counts_[font_id].n[CT_OK_JOINED];
if (broken)
++font_counts_[font_id].n[CT_OK_BROKEN];
}
// Compute mean number of return values and mean rank of correct answer.
font_counts_[font_id].n[CT_NUM_RESULTS] += num_results;
font_counts_[font_id].n[CT_RANK] += res_index;
// If it was an error for boosting then sum the weight.
if (sample->is_error()) {
scaled_error_ += sample->weight();
}
if (debug_it) {
tprintf("%d results for char %s font %d :",
num_results, shape_table.unicharset().id_to_unichar(unichar_id),
font_id);
for (int i = 0; i < num_results; ++i) {
tprintf(" %.3f/%.3f:%s",
results[i].rating, results[i].font,
shape_table.DebugStr(results[i].shape_id).string());
if (debug) {
tprintf("%d results for char %s font %d :",
num_results, unicharset_.id_to_unichar(unichar_id),
font_id);
for (int i = 0; i < num_results; ++i) {
tprintf(" %.3f : %s\n",
results[i].rating,
unicharset_.id_to_unichar(results[i].unichar_id));
}
return true;
}
tprintf("\n");
return true;
int percent = 0;
if (num_results > 0)
percent = IntCastRounded(results[0].rating * 100);
bad_score_hist_.add(percent, 1);
} else {
int percent = 0;
if (answer_actual_rank >= 0)
percent = IntCastRounded(results[answer_actual_rank].rating * 100);
ok_score_hist_.add(percent, 1);
}
return false;
}
// Accumulates counts for junk. Counts only whether the junk was correctly
// rejected or not.
void ErrorCounter::AccumulateJunk(const ShapeTable& shape_table,
const GenericVector<ShapeRating>& results,
bool ErrorCounter::AccumulateJunk(bool debug,
const GenericVector<UnicharRating>& results,
TrainingSample* sample) {
// For junk we accept no answer, or an explicit shape answer matching the
// class id of the sample.
int num_results = results.size();
int font_id = sample->font_id();
int unichar_id = sample->class_id();
if (num_results > 0 &&
!shape_table.GetShape(results[0].shape_id).ContainsUnichar(unichar_id)) {
int percent = 0;
if (num_results > 0)
percent = IntCastRounded(results[0].rating * 100);
if (num_results > 0 && results[0].unichar_id != unichar_id) {
// This is a junk error.
++font_counts_[font_id].n[CT_ACCEPTED_JUNK];
sample->set_is_error(true);
// It counts as an error for boosting too so sum the weight.
scaled_error_ += sample->weight();
bad_score_hist_.add(percent, 1);
return debug;
} else {
// Correctly rejected.
++font_counts_[font_id].n[CT_REJECTED_JUNK];
sample->set_is_error(false);
ok_score_hist_.add(percent, 1);
}
return false;
}
// Creates a report of the error rate. The report_level controls the detail
@ -239,7 +339,7 @@ void ErrorCounter::AccumulateJunk(const ShapeTable& shape_table,
// If not NULL, the report string is saved in fonts_report.
// (Ignoring report_level).
double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
const UnicityTable<FontInfo>& fontinfo_table,
const FontInfoTable& fontinfo_table,
const SampleIterator& it,
double* unichar_error,
STRING* fonts_report) {
@ -251,7 +351,7 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
// Accumulate counts over fonts.
totals += font_counts_[f];
STRING font_report;
if (ReportString(font_counts_[f], &font_report)) {
if (ReportString(false, font_counts_[f], &font_report)) {
if (fonts_report != NULL) {
*fonts_report += fontinfo_table.get(f).name;
*fonts_report += ": ";
@ -264,39 +364,59 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
}
}
}
// Report the totals.
STRING total_report;
bool any_results = ReportString(true, totals, &total_report);
if (fonts_report != NULL && fonts_report->length() == 0) {
// Make sure we return something even if there were no samples.
*fonts_report = "NoSamplesFound: ";
*fonts_report += total_report;
*fonts_report += "\n";
}
if (report_level > 0) {
// Report the totals.
STRING total_report;
if (ReportString(totals, &total_report)) {
if (any_results) {
tprintf("TOTAL Scaled Err=%.4g%%, %s\n",
scaled_error_ * 100.0, total_report.string());
}
// Report the worst substitution error only for now.
if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {
const UNICHARSET& unicharset = it.shape_table()->unicharset();
int charsetsize = unicharset.size();
int shapesize = it.CompactCharsetSize();
int charsetsize = unicharset_.size();
int worst_uni_id = 0;
int worst_shape_id = 0;
int worst_result_id = 0;
int worst_err = 0;
for (int u = 0; u < charsetsize; ++u) {
for (int s = 0; s < shapesize; ++s) {
if (unichar_counts_(u, s) > worst_err) {
worst_err = unichar_counts_(u, s);
for (int v = 0; v < charsetsize; ++v) {
if (unichar_counts_(u, v) > worst_err) {
worst_err = unichar_counts_(u, v);
worst_uni_id = u;
worst_shape_id = s;
worst_result_id = v;
}
}
}
if (worst_err > 0) {
tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n",
worst_uni_id, unicharset.id_to_unichar(worst_uni_id),
it.shape_table()->DebugStr(worst_shape_id).string(),
worst_uni_id, unicharset_.id_to_unichar(worst_uni_id),
unicharset_.id_to_unichar(worst_result_id),
worst_err, totals.n[CT_UNICHAR_TOP1_ERR],
100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);
}
}
tprintf("Multi-unichar shape use:\n");
for (int u = 0; u < multi_unichar_counts_.size(); ++u) {
if (multi_unichar_counts_[u] > 0) {
tprintf("%d multiple answers for unichar: %s\n",
multi_unichar_counts_[u],
unicharset_.id_to_unichar(u));
}
}
tprintf("OK Score histogram:\n");
ok_score_hist_.print();
tprintf("ERROR Score histogram:\n");
bad_score_hist_.print();
}
double rates[CT_SIZE];
if (!ComputeRates(totals, rates))
return 0.0;
@ -308,32 +428,37 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
// Sets the report string to a combined human and machine-readable report
// string of the error rates.
// Returns false if there is no data, leaving report unchanged.
bool ErrorCounter::ReportString(const Counts& counts, STRING* report) {
// Returns false if there is no data, leaving report unchanged, unless
// even_if_empty is true.
bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts,
STRING* report) {
// Compute the error rates.
double rates[CT_SIZE];
if (!ComputeRates(counts, rates))
if (!ComputeRates(counts, rates) && !even_if_empty)
return false;
// Using %.4g%%, the length of the output string should exactly match the
// length of the format string, but in case of overflow, allow for +eddd
// on each number.
const int kMaxExtraLength = 5; // Length of +eddd.
// Keep this format string and the snprintf in sync with the CountTypes enum.
const char* format_str = "ShapeErr=%.4g%%, FontAttr=%.4g%%, "
"Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], "
"Multi=%.4g%%, Rej=%.4g%%, "
const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] "
"Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, "
"FontAttr=%.4g%%, Multi=%.4g%%, "
"Answers=%.3g, Rank=%.3g, "
"OKjunk=%.4g%%, Badjunk=%.4g%%";
int max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1;
char* formatted_str = new char[max_str_len];
snprintf(formatted_str, max_str_len, format_str,
rates[CT_SHAPE_TOP_ERR] * 100.0,
rates[CT_FONT_ATTR_ERR] * 100.0,
rates[CT_UNICHAR_TOP1_ERR] * 100.0,
rates[CT_UNICHAR_TOP2_ERR] * 100.0,
rates[CT_UNICHAR_TOPN_ERR] * 100.0,
rates[CT_UNICHAR_TOPTOP_ERR] * 100.0,
rates[CT_OK_MULTI_UNICHAR] * 100.0,
rates[CT_OK_JOINED] * 100.0,
rates[CT_OK_BROKEN] * 100.0,
rates[CT_REJECT] * 100.0,
rates[CT_FONT_ATTR_ERR] * 100.0,
rates[CT_OK_MULTI_FONT] * 100.0,
rates[CT_NUM_RESULTS],
rates[CT_RANK],
100.0 * rates[CT_REJECTED_JUNK],
@ -350,13 +475,9 @@ bool ErrorCounter::ReportString(const Counts& counts, STRING* report) {
// Computes the error rates and returns in rates which is an array of size
// CT_SIZE. Returns false if there is no data, leaving rates unchanged.
bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) {
int ok_samples = counts.n[CT_SHAPE_TOP_CORRECT] + counts.n[CT_SHAPE_TOP_ERR] +
int ok_samples = counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] +
counts.n[CT_REJECT];
int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK];
if (ok_samples == 0 && junk_samples == 0) {
// There is no data.
return false;
}
// Compute rates for normal chars.
double denominator = static_cast<double>(MAX(ok_samples, 1));
for (int ct = 0; ct <= CT_RANK; ++ct)
@ -365,7 +486,7 @@ bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) {
denominator = static_cast<double>(MAX(junk_samples, 1));
for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct)
rates[ct] = counts.n[ct] / denominator;
return true;
return ok_samples != 0 || junk_samples != 0;
}
ErrorCounter::Counts::Counts() {

View File

@ -18,6 +18,7 @@
#include "genericvector.h"
#include "matrix.h"
#include "statistc.h"
struct Pix;
template <typename T> class UnicityTable;
@ -25,11 +26,11 @@ template <typename T> class UnicityTable;
namespace tesseract {
struct FontInfo;
class FontInfoTable;
class SampleIterator;
class ShapeClassifier;
class ShapeRating;
class ShapeTable;
class TrainingSample;
class UnicharRating;
// Enumeration of the different types of error count.
// Error counts work as follows:
@ -37,22 +38,21 @@ class TrainingSample;
// Ground truth is a valid unichar-id / font-id pair:
// Number of classifier answers?
// 0 >0
// CT_REJECT BOTH unichar-id and font-id match top shape?
// __________ yes! no
// CT_SHAPE_TOP_CORRECT CT_SHAPE_TOP_ERR
// | Font attributes match?
// | yes! no
// | | CT_FONT_ATTR_ERROR
// | Top unichar-id matches?
// | yes! no
// Top shape-id has multiple unichars? CT_UNICHAR_TOP1_ERR
// yes! no 2nd shape unichar id matches?
// CT_OK_MULTI_UNICHAR ________ yes! no
// ___________________ _____ CT_UNICHAR_TOP2_ERR
// Any unichar-id matches?
// yes! no
// ______ CT_UNICHAR_TOPN_ERR
// _________________
// CT_REJECT unichar-id matches top shape?
// __________ yes! no
// CT_UNICHAR_TOP_OK CT_UNICHAR_TOP1_ERR
// Top shape-id has multiple unichars? 2nd shape unichar id matches?
// yes! no yes! no
// CT_OK_MULTI_UNICHAR | _____ CT_UNICHAR_TOP2_ERR
// Font attributes match? Any unichar-id matches?
// yes! no yes! no
// CT_FONT_ATTR_OK CT_FONT_ATTR_ERR ______ CT_UNICHAR_TOPN_ERR
// | __________________ _________________
// Top shape-id has multiple font attrs?
// yes! no
// CT_OK_MULTI_FONT
// _____________________________
//
// Note that multiple counts may be activated for a single sample!
//
// Ground truth is for a fragment/n-gram that is NOT in the unicharset.
@ -67,14 +67,20 @@ class TrainingSample;
//
// Keep in sync with the ReportString function.
enum CountTypes {
CT_SHAPE_TOP_CORRECT, // Top shape id is actually correct.
CT_SHAPE_TOP_ERR, // Top shape id is not correct.
CT_FONT_ATTR_ERR, // Font attributes incorrect, ignoring unichar.
CT_UNICHAR_TOP_OK, // Top shape contains correct unichar id.
// The rank of the results in TOP1, TOP2, TOPN is determined by a gap of
// kRatingEpsilon from the first result in each group. The real top choice
// is measured using TOPTOP.
CT_UNICHAR_TOP1_ERR, // Top shape does not contain correct unichar id.
CT_UNICHAR_TOP2_ERR, // Top 2 shapes don't contain correct unichar id.
CT_UNICHAR_TOPN_ERR, // No output shape contains correct unichar id.
CT_UNICHAR_TOPTOP_ERR, // Very top choice not correct.
CT_OK_MULTI_UNICHAR, // Top shape id has correct unichar id, and others.
CT_OK_JOINED, // Top shape id is correct but marked joined.
CT_OK_BROKEN, // Top shape id is correct but marked broken.
CT_REJECT, // Classifier hates this.
CT_FONT_ATTR_ERR, // Top unichar OK, but font attributes incorrect.
CT_OK_MULTI_FONT, // CT_FONT_ATTR_OK but there are multiple font attrs.
CT_NUM_RESULTS, // Number of answers produced.
CT_RANK, // Rank of correct answer.
CT_REJECTED_JUNK, // Junk that was correctly rejected.
@ -115,12 +121,24 @@ class ErrorCounter {
// * The return value is the un-weighted version of the scaled_error.
static double ComputeErrorRate(ShapeClassifier* classifier,
int report_level, CountTypes boosting_mode,
const UnicityTable<FontInfo>& fontinfo_table,
const FontInfoTable& fontinfo_table,
const GenericVector<Pix*>& page_images,
SampleIterator* it,
double* unichar_error,
double* scaled_error,
STRING* fonts_report);
// Tests a pair of classifiers, debugging errors of the new against the old.
// See errorcounter.h for description of arguments.
// Iterates over the samples, calling the classifiers in normal/silent mode.
// If the new_classifier makes a boosting_mode error that the old_classifier
// does not, and the appropriate, it will then call the new_classifier again
// with a debug flag and a keep_this argument to find out what is going on.
static void DebugNewErrors(ShapeClassifier* new_classifier,
ShapeClassifier* old_classifier,
CountTypes boosting_mode,
const FontInfoTable& fontinfo_table,
const GenericVector<Pix*>& page_images,
SampleIterator* it);
private:
// Simple struct to hold an array of counts.
@ -134,7 +152,7 @@ class ErrorCounter {
// Constructor is private. Only anticipated use of ErrorCounter is via
// the static ComputeErrorRate.
ErrorCounter(int charsetsize, int shapesize, int fontsize);
ErrorCounter(const UNICHARSET& unicharset, int fontsize);
~ErrorCounter();
// Accumulates the errors from the classifier results on a single sample.
@ -145,15 +163,13 @@ class ErrorCounter {
// for error counting and shape_table is used to understand the relationship
// between unichar_ids and shape_ids in the results
bool AccumulateErrors(bool debug, CountTypes boosting_mode,
const UnicityTable<FontInfo>& font_table,
const ShapeTable& shape_table,
const GenericVector<ShapeRating>& results,
const FontInfoTable& font_table,
const GenericVector<UnicharRating>& results,
TrainingSample* sample);
// Accumulates counts for junk. Counts only whether the junk was correctly
// rejected or not.
void AccumulateJunk(const ShapeTable& shape_table,
const GenericVector<ShapeRating>& results,
bool AccumulateJunk(bool debug, const GenericVector<UnicharRating>& results,
TrainingSample* sample);
// Creates a report of the error rate. The report_level controls the detail
@ -169,15 +185,17 @@ class ErrorCounter {
// If not NULL, the report string is saved in fonts_report.
// (Ignoring report_level).
double ReportErrors(int report_level, CountTypes boosting_mode,
const UnicityTable<FontInfo>& fontinfo_table,
const FontInfoTable& fontinfo_table,
const SampleIterator& it,
double* unichar_error,
STRING* fonts_report);
// Sets the report string to a combined human and machine-readable report
// string of the error rates.
// Returns false if there is no data, leaving report unchanged.
static bool ReportString(const Counts& counts, STRING* report);
// Returns false if there is no data, leaving report unchanged, unless
// even_if_empty is true.
static bool ReportString(bool even_if_empty, const Counts& counts,
STRING* report);
// Computes the error rates and returns in rates which is an array of size
// CT_SIZE. Returns false if there is no data, leaving rates unchanged.
@ -186,11 +204,22 @@ class ErrorCounter {
// Total scaled error used by boosting algorithms.
double scaled_error_;
// Difference in result rating to be thought of as an "equal" choice.
double rating_epsilon_;
// Vector indexed by font_id from the samples of error accumulators.
GenericVector<Counts> font_counts_;
// Counts of the results that map each unichar_id (from samples) to an
// incorrect shape_id.
GENERIC_2D_ARRAY<int> unichar_counts_;
// Count of the number of times each shape_id occurs, is correct, and multi-
// unichar.
GenericVector<int> multi_unichar_counts_;
// Histogram of scores (as percent) for correct answers.
STATS ok_score_hist_;
// Histogram of scores (as percent) for incorrect answers.
STATS bad_score_hist_;
// Unicharset for printing character ids in results.
const UNICHARSET& unicharset_;
};
} // namespace tesseract.

View File

@ -49,8 +49,10 @@ void ExtractorStub();
* @note History: Sun Jan 21 10:07:28 1990, DSJ, Created.
*/
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
const DENORM& denorm, TBLOB *Blob) {
return (ExtractFlexFeatures(FeatureDefs, Blob, denorm));
const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info,
TBLOB *Blob) {
return ExtractFlexFeatures(FeatureDefs, Blob, bl_denorm, cn_denorm, fx_info);
} /* ExtractBlobFeatures */
/*-----------------------------------------------------------------------------

View File

@ -26,8 +26,12 @@ class DENORM;
/*-----------------------------------------------------------------------------
Public Function Prototypes
-----------------------------------------------------------------------------*/
// Deprecated! Will be deleted soon!
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
// See SetupBLCNDenorms in intfx.cpp for other args.
CHAR_DESC ExtractBlobFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
const DENORM& denorm, TBLOB *Blob);
const DENORM& bl_denorm, const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info, TBLOB *Blob);
/*---------------------------------------------------------------------------
Private Function Prototypes

View File

@ -19,7 +19,7 @@
Include Files and Type Defines
-----------------------------------------------------------------------------*/
#ifdef _MSC_VER
#include "mathfix.h"
#include <mathfix.h>
#endif
#include "featdefs.h"

View File

@ -28,8 +28,13 @@
Public Code
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
// Deprecated! Will be deleted soon!
// In the meantime, as all TBLOBs, Blob is in baseline normalized coords.
// See SetupBLCNDenorms in intfx.cpp for other args.
CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
TBLOB *Blob, const DENORM& denorm) {
TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** Blob blob to extract features from
@ -50,8 +55,13 @@ CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
if (FeatureDefs.FeatureExtractors[Type] != NULL &&
FeatureDefs.FeatureExtractors[Type]->Extractor != NULL) {
CharDesc->FeatureSets[Type] =
(FeatureDefs.FeatureExtractors[Type])->Extractor(Blob, denorm);
(FeatureDefs.FeatureExtractors[Type])->Extractor(Blob,
bl_denorm,
cn_denorm,
fx_info);
if (CharDesc->FeatureSets[Type] == NULL) {
tprintf("Feature extractor for type %d = %s returned NULL!\n",
Type, FeatureDefs.FeatureDesc[Type]->ShortName);
FreeCharDescription(CharDesc);
return NULL;
}

View File

@ -27,7 +27,10 @@
/**----------------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------------**/
// As with all TBLOBs this one is also baseline normalized.
CHAR_DESC ExtractFlexFeatures(const FEATURE_DEFS_STRUCT &FeatureDefs,
TBLOB *Blob, const DENORM& denorm);
TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info);
#endif

View File

@ -90,8 +90,7 @@ void IntFeatureSpace::IndexAndSortFeatures(
// window, or -1 if the feature is a miss.
int IntFeatureSpace::XYToFeatureIndex(int x, int y) const {
// Round the x,y position to a feature. Search for a valid theta.
INT_FEATURE_STRUCT feature = {static_cast<uinT8>(x), static_cast<uinT8>(y),
0, 0};
INT_FEATURE_STRUCT feature(x, y, 0);
int index = -1;
for (int theta = 0; theta <= MAX_UINT8 && index < 0; ++theta) {
feature.Theta = theta;
@ -127,16 +126,10 @@ int IntFeatureSpace::XYToFeatureIndex(int x, int y) const {
INT_FEATURE_STRUCT IntFeatureSpace::PositionFromBuckets(int x,
int y,
int theta) const {
INT_FEATURE_STRUCT pos = {
static_cast<uinT8>(ClipToRange(
(x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_,
0, MAX_UINT8)),
static_cast<uinT8>(ClipToRange(
(y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_,
0, MAX_UINT8)),
static_cast<uinT8>(ClipToRange(
DivRounded(theta * kIntFeatureExtent, theta_buckets_),
0, MAX_UINT8))};
INT_FEATURE_STRUCT pos(
(x * kIntFeatureExtent + kIntFeatureExtent / 2) / x_buckets_,
(y * kIntFeatureExtent + kIntFeatureExtent / 2) / y_buckets_,
DivRounded(theta * kIntFeatureExtent, theta_buckets_));
return pos;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
/******************************************************************************
** Filename: intfx.h
** Purpose: Interface to high level integer feature extractor.
** Author: Robert Moss
** History: Tue May 21 15:51:57 MDT 1991, RWM, Created.
** Filename: intfx.h
** Purpose: Interface to high level integer feature extractor.
** Author: Robert Moss
** History: Tue May 21 15:51:57 MDT 1991, RWM, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
@ -42,6 +42,9 @@ struct INT_FX_RESULT_STRUCT {
uinT8 YTop; // Top of blob in BLN coords.
};
// The standard feature length
const double kStandardFeatureLength = 64.0 / 5;
/**----------------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------------**/
@ -51,28 +54,22 @@ void InitIntegerFX();
// theta direction in an INT_FEATURE_STRUCT.
FCOORD FeatureDirection(uinT8 theta);
tesseract::TrainingSample* GetIntFeatures(
tesseract::NormalizationMode mode, TBLOB *blob,
const DENORM& denorm);
namespace tesseract {
// Generates a TrainingSample from a TBLOB. Extracts features and sets
// the bounding box, so classifiers that operate on the image can work.
// TODO(rays) BlobToTrainingSample must remain a global function until
// the FlexFx and FeatureDescription code can be removed and LearnBlob
// made a member of Classify.
TrainingSample* BlobToTrainingSample(const TBLOB& blob,
tesseract::NormalizationMode mode,
bool nonlinear_norm);
}
int ExtractIntFeat(TBLOB *Blob,
const DENORM& denorm,
INT_FEATURE_ARRAY BLFeat,
INT_FEATURE_ARRAY CNFeat,
INT_FX_RESULT_STRUCT* Results,
inT32 *FeatureOutlineArray = 0);
// Deprecated! Prefer tesseract::Classify::ExtractFeatures instead.
bool ExtractIntFeat(const TBLOB& blob,
bool nonlinear_norm,
INT_FEATURE_ARRAY BLFeat,
INT_FEATURE_ARRAY CNFeat,
INT_FX_RESULT_STRUCT* Results);
uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X);
int SaveFeature(INT_FEATURE_ARRAY FeatureArray,
uinT16 FeatureNum,
inT16 X,
inT16 Y,
uinT8 Theta);
uinT16 MySqrt(inT32 X, inT32 Y);
uinT8 MySqrt2(uinT16 N, uinT32 I, uinT8 *Exp);
void ClipRadius(uinT8 *RxInv, uinT8 *RxExp, uinT8 *RyInv, uinT8 *RyExp);
#endif

View File

@ -28,7 +28,7 @@ extern BOOL_VAR_H(disable_character_fragments, FALSE,
"Do not include character fragments in the"
" results of the classifier");
extern INT_VAR_H(classify_integer_matcher_multiplier, 14,
extern INT_VAR_H(classify_integer_matcher_multiplier, 10,
"Integer Matcher Multiplier 0-255: ");

View File

@ -37,6 +37,7 @@
#include "mfoutline.h"
#include "ndminx.h"
#include "picofeat.h"
#include "points.h"
#include "shapetable.h"
#include "svmnode.h"
@ -206,6 +207,22 @@ double_VAR(classify_pp_side_pad, 2.5, "Proto Pruner Side Pad");
/*-----------------------------------------------------------------------------
Public Code
-----------------------------------------------------------------------------*/
// Builds a feature from an FCOORD for position with all the necessary
// clipping and rounding.
INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta)
: X(ClipToRange<inT16>(static_cast<inT16>(pos.x() + 0.5), 0, 255)),
Y(ClipToRange<inT16>(static_cast<inT16>(pos.y() + 0.5), 0, 255)),
Theta(theta),
CP_misses(0) {
}
// Builds a feature from ints with all the necessary clipping and casting.
INT_FEATURE_STRUCT::INT_FEATURE_STRUCT(int x, int y, int theta)
: X(static_cast<uinT8>(ClipToRange(x, 0, MAX_UINT8))),
Y(static_cast<uinT8>(ClipToRange(y, 0, MAX_UINT8))),
Theta(static_cast<uinT8>(ClipToRange(theta, 0, MAX_UINT8))),
CP_misses(0) {
}
/*---------------------------------------------------------------------------*/
/**
* This routine adds a new class structure to a set of

View File

@ -28,6 +28,8 @@
#include "scrollview.h"
#include "unicharset.h"
class FCOORD;
/* define order of params in pruners */
#define PRUNER_X 0
#define PRUNER_Y 1
@ -130,8 +132,14 @@ INT_TEMPLATES_STRUCT, *INT_TEMPLATES;
#define MAX_NUM_INT_FEATURES 512
#define INT_CHAR_NORM_RANGE 256
struct INT_FEATURE_STRUCT
{
struct INT_FEATURE_STRUCT {
INT_FEATURE_STRUCT() : X(0), Y(0), Theta(0), CP_misses(0) { }
// Builds a feature from an FCOORD for position with all the necessary
// clipping and rounding.
INT_FEATURE_STRUCT(const FCOORD& pos, uinT8 theta);
// Builds a feature from ints with all the necessary clipping and casting.
INT_FEATURE_STRUCT(int x, int y, int theta);
uinT8 X;
uinT8 Y;
uinT8 Theta;

View File

@ -30,6 +30,7 @@
#include "allheaders.h"
#include "boxread.h"
#include "classify.h"
#include "efio.h"
#include "errorcounter.h"
#include "featdefs.h"
#include "sampleiterator.h"
@ -58,10 +59,6 @@ MasterTrainer::MasterTrainer(NormalizationMode norm_mode,
enable_shape_anaylsis_(shape_analysis),
enable_replication_(replicate_samples),
fragments_(NULL), prev_unichar_id_(-1), debug_level_(debug_level) {
fontinfo_table_.set_compare_callback(
NewPermanentTessCallback(CompareFontInfo));
fontinfo_table_.set_clear_callback(
NewPermanentTessCallback(FontInfoDeleteCallback));
}
MasterTrainer::~MasterTrainer() {
@ -82,10 +79,7 @@ bool MasterTrainer::Serialize(FILE* fp) const {
if (!verify_samples_.Serialize(fp)) return false;
if (!master_shapes_.Serialize(fp)) return false;
if (!flat_shapes_.Serialize(fp)) return false;
if (!fontinfo_table_.write(fp, NewPermanentTessCallback(write_info)))
return false;
if (!fontinfo_table_.write(fp, NewPermanentTessCallback(write_spacing_info)))
return false;
if (!fontinfo_table_.Serialize(fp)) return false;
if (!xheights_.Serialize(fp)) return false;
return true;
}
@ -106,11 +100,7 @@ bool MasterTrainer::DeSerialize(bool swap, FILE* fp) {
if (!verify_samples_.DeSerialize(swap, fp)) return false;
if (!master_shapes_.DeSerialize(swap, fp)) return false;
if (!flat_shapes_.DeSerialize(swap, fp)) return false;
if (!fontinfo_table_.read(fp, NewPermanentTessCallback(read_info), swap))
return false;
if (!fontinfo_table_.read(fp, NewPermanentTessCallback(read_spacing_info),
swap))
return false;
if (!fontinfo_table_.DeSerialize(swap, fp)) return false;
if (!xheights_.DeSerialize(swap, fp)) return false;
return true;
}
@ -122,8 +112,10 @@ void MasterTrainer::LoadUnicharset(const char* filename) {
"Building unicharset for training from scratch...\n",
filename);
unicharset_.clear();
// Space character needed to represent NIL_LIST classification.
unicharset_.unichar_insert(" ");
UNICHARSET initialized;
// Add special characters, as they were removed by the clear, but the
// default constructor puts them in.
unicharset_.AppendOtherUnicharset(initialized);
}
charsetsize_ = unicharset_.size();
delete [] fragments_;
@ -138,7 +130,7 @@ void MasterTrainer::LoadUnicharset(const char* filename) {
// adding them to the trainer with the font_id from the content of the file.
// See mftraining.cpp for a description of the file format.
// If verification, then these are verification samples, not training.
void MasterTrainer::ReadTrainingSamples(FILE *fp,
void MasterTrainer::ReadTrainingSamples(const char* page_name,
const FEATURE_DEFS_STRUCT& feature_defs,
bool verification) {
char buffer[2048];
@ -148,6 +140,12 @@ void MasterTrainer::ReadTrainingSamples(FILE *fp,
int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType);
int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType);
FILE* fp = Efopen(page_name, "rb");
if (fp == NULL) {
tprintf("Failed to open tr file: %s\n", page_name);
return;
}
tr_filenames_.push_back(STRING(page_name));
while (fgets(buffer, sizeof(buffer), fp) != NULL) {
if (buffer[0] == '\n')
continue;
@ -159,6 +157,7 @@ void MasterTrainer::ReadTrainingSamples(FILE *fp,
}
*space++ = '\0';
int font_id = GetFontInfoId(buffer);
if (font_id < 0) font_id = 0;
int page_number;
STRING unichar;
TBOX bounding_box;
@ -177,6 +176,7 @@ void MasterTrainer::ReadTrainingSamples(FILE *fp,
FreeCharDescription(char_desc);
}
charsetsize_ = unicharset_.size();
fclose(fp);
}
// Adds the given single sample to the trainer, setting the classid
@ -278,23 +278,23 @@ void MasterTrainer::SetupMasterShapes() {
const CHAR_FRAGMENT *fragment = samples_.unicharset().get_fragment(c);
if (fragment == NULL)
char_shapes.AppendMasterShapes(shapes);
char_shapes.AppendMasterShapes(shapes, NULL);
else if (fragment->is_beginning())
char_shapes_begin_fragment.AppendMasterShapes(shapes);
char_shapes_begin_fragment.AppendMasterShapes(shapes, NULL);
else if (fragment->is_ending())
char_shapes_end_fragment.AppendMasterShapes(shapes);
char_shapes_end_fragment.AppendMasterShapes(shapes, NULL);
else
char_shapes.AppendMasterShapes(shapes);
char_shapes.AppendMasterShapes(shapes, NULL);
}
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster,
kFontMergeDistance, &char_shapes_begin_fragment);
char_shapes.AppendMasterShapes(char_shapes_begin_fragment);
char_shapes.AppendMasterShapes(char_shapes_begin_fragment, NULL);
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster,
kFontMergeDistance, &char_shapes_end_fragment);
char_shapes.AppendMasterShapes(char_shapes_end_fragment);
char_shapes.AppendMasterShapes(char_shapes_end_fragment, NULL);
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster,
kFontMergeDistance, &char_shapes);
master_shapes_.AppendMasterShapes(char_shapes);
master_shapes_.AppendMasterShapes(char_shapes, NULL);
tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().string());
}
@ -401,7 +401,7 @@ bool MasterTrainer::LoadXHeights(const char* filename) {
continue;
fontinfo.name = buffer;
if (!fontinfo_table_.contains(fontinfo)) continue;
int fontinfo_id = fontinfo_table_.get_id(fontinfo);
int fontinfo_id = fontinfo_table_.get_index(fontinfo);
xheights_[fontinfo_id] = xht;
total_xheight += xht;
++xheight_count;
@ -439,7 +439,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
char kerned_uch[UNICHAR_LEN];
int x_gap, x_gap_before, x_gap_after, num_kerned;
ASSERT_HOST(fscanf(fontinfo_file, "%d\n", &num_unichars) == 1);
FontInfo *fi = fontinfo_table_.get_mutable(fontinfo_id);
FontInfo *fi = &fontinfo_table_.get(fontinfo_id);
fi->init_spacing(unicharset_.size());
FontSpacingInfo *spacing = NULL;
for (int l = 0; l < num_unichars; ++l) {
@ -480,11 +480,7 @@ int MasterTrainer::GetFontInfoId(const char* font_name) {
fontinfo.name = const_cast<char*>(font_name);
fontinfo.properties = 0; // Not used to lookup in the table
fontinfo.universal_id = 0;
if (!fontinfo_table_.contains(fontinfo)) {
return -1;
} else {
return fontinfo_table_.get_id(fontinfo);
}
return fontinfo_table_.get_index(fontinfo);
}
// Returns the font_id of the closest matching font name to the given
// filename. It is assumed that a substring of the filename will match
@ -585,7 +581,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET& unicharset,
const char* pffmtable_file) {
tesseract::Classify *classify = new tesseract::Classify();
// Move the fontinfo table to classify.
classify->get_fontinfo_table().move(&fontinfo_table_);
fontinfo_table_.MoveTo(&classify->get_fontinfo_table());
INT_TEMPLATES int_templates = classify->CreateIntTemplates(float_classes,
shape_set);
FILE* fp = fopen(inttemp_file, "wb");
@ -750,17 +746,29 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font,
}
#endif // GRAPHICS_DISABLED
void MasterTrainer::TestClassifierVOld(bool replicate_samples,
ShapeClassifier* test_classifier,
ShapeClassifier* old_classifier) {
SampleIterator sample_it;
sample_it.Init(NULL, NULL, replicate_samples, &samples_);
ErrorCounter::DebugNewErrors(test_classifier, old_classifier,
CT_UNICHAR_TOPN_ERR, fontinfo_table_,
page_images_, &sample_it);
}
// Tests the given test_classifier on the internal samples.
// See TestClassifier for details.
void MasterTrainer::TestClassifierOnSamples(int report_level,
void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode,
int report_level,
bool replicate_samples,
ShapeClassifier* test_classifier,
STRING* report_string) {
TestClassifier(report_level, replicate_samples, &samples_,
TestClassifier(error_mode, report_level, replicate_samples, &samples_,
test_classifier, report_string);
}
// Tests the given test_classifier on the given samples
// Tests the given test_classifier on the given samples.
// error_mode indicates what counts as an error.
// report_levels:
// 0 = no output.
// 1 = bottom-line error rate.
@ -772,14 +780,14 @@ void MasterTrainer::TestClassifierOnSamples(int report_level,
// sample including replicated and systematically perturbed samples.
// If report_string is non-NULL, a summary of the results for each font
// is appended to the report_string.
double MasterTrainer::TestClassifier(int report_level,
double MasterTrainer::TestClassifier(CountTypes error_mode,
int report_level,
bool replicate_samples,
TrainingSampleSet* samples,
ShapeClassifier* test_classifier,
STRING* report_string) {
SampleIterator sample_it;
sample_it.Init(NULL, test_classifier->GetShapeTable(), replicate_samples,
samples);
sample_it.Init(NULL, NULL, replicate_samples, samples);
if (report_level > 0) {
int num_samples = 0;
for (sample_it.Begin(); !sample_it.AtEnd(); sample_it.Next())
@ -791,7 +799,7 @@ double MasterTrainer::TestClassifier(int report_level,
}
double unichar_error = 0.0;
ErrorCounter::ComputeErrorRate(test_classifier, report_level,
CT_SHAPE_TOP_ERR, fontinfo_table_,
error_mode, fontinfo_table_,
page_images_, &sample_it, &unichar_error,
NULL, report_string);
return unichar_error;

View File

@ -29,6 +29,7 @@
#include "cluster.h"
#include "intfx.h"
#include "elst.h"
#include "errorcounter.h"
#include "featdefs.h"
#include "fontinfo.h"
#include "indexmapbidi.h"
@ -89,7 +90,7 @@ class MasterTrainer {
// Reads the samples and their features from the given file,
// adding them to the trainer with the font_id from the content of the file.
// If verification, then these are verification samples, not training.
void ReadTrainingSamples(FILE *fp,
void ReadTrainingSamples(const char* page_name,
const FEATURE_DEFS_STRUCT& feature_defs,
bool verification);
@ -159,6 +160,12 @@ class MasterTrainer {
// one of the fonts. If more than one is matched, the longest is returned.
int GetBestMatchingFontInfoId(const char* filename);
// Returns the filename of the tr file corresponding to the command-line
// argument with the given index.
const STRING& GetTRFileName(int index) const {
return tr_filenames_[index];
}
// Sets up a flat shapetable with one shape per class/font combination.
void SetupFlatShapeTable(ShapeTable* shape_table);
@ -207,13 +214,19 @@ class MasterTrainer {
const char* unichar_str2, int canonical_font);
#endif // GRAPHICS_DISABLED
void TestClassifierVOld(bool replicate_samples,
ShapeClassifier* test_classifier,
ShapeClassifier* old_classifier);
// Tests the given test_classifier on the internal samples.
// See TestClassifier for details.
void TestClassifierOnSamples(int report_level,
void TestClassifierOnSamples(CountTypes error_mode,
int report_level,
bool replicate_samples,
ShapeClassifier* test_classifier,
STRING* report_string);
// Tests the given test_classifier on the given samples
// error_mode indicates what counts as an error.
// report_levels:
// 0 = no output.
// 1 = bottom-line error rate.
@ -225,7 +238,8 @@ class MasterTrainer {
// sample including replicated and systematically perturbed samples.
// If report_string is non-NULL, a summary of the results for each font
// is appended to the report_string.
double TestClassifier(int report_level,
double TestClassifier(CountTypes error_mode,
int report_level,
bool replicate_samples,
TrainingSampleSet* samples,
ShapeClassifier* test_classifier,
@ -263,9 +277,9 @@ class MasterTrainer {
// Flat shape table has each unichar/font id pair in a separate shape.
ShapeTable flat_shapes_;
// Font metrics gathered from multiple files.
UnicityTable<FontInfo> fontinfo_table_;
FontInfoTable fontinfo_table_;
// Array of xheights indexed by font ids in fontinfo_table_;
GenericVector<int> xheights_;
GenericVector<inT32> xheights_;
// Non-serialized data initialized by other means or used temporarily
// during loading of training samples.
@ -291,6 +305,8 @@ class MasterTrainer {
// Indexed by page_num_ in the samples.
// These images are owned by the trainer and need to be pixDestroyed.
GenericVector<Pix*> page_images_;
// Vector of filenames of loaded tr files.
GenericVector<STRING> tr_filenames_;
};
} // namespace tesseract.

View File

@ -33,7 +33,9 @@
Private Code
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm) {
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** Blob blob to extract micro-features from
@ -52,7 +54,8 @@ FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm) {
FEATURE Feature;
MICROFEATURE OldFeature;
OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, denorm);
OldFeatures = (MICROFEATURES)BlobMicroFeatures(Blob, bl_denorm, cn_denorm,
fx_info);
if (OldFeatures == NULL)
return NULL;
NumFeatures = count (OldFeatures);

View File

@ -34,6 +34,8 @@ typedef float MicroFeature[MFCount];
/*----------------------------------------------------------------------------
Private Function Prototypes
-----------------------------------------------------------------------------*/
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& denorm);
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info);
#endif

View File

@ -103,56 +103,6 @@ LIST ConvertOutlines(TESSLINE *outline,
return mf_outlines;
}
/*---------------------------------------------------------------------------*/
void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats) {
/*
** Parameters:
** Outlines list of outlines to compute stats for
** OutlineStats place to put results
** Globals: none
** Operation: This routine computes several statistics about the outlines
** in Outlines. These statistics are usually used to perform
** anistropic normalization of all of the outlines. The
** statistics generated are:
** first moments about x and y axes
** total length of all outlines
** center of mass of all outlines
** second moments about center of mass axes
** radius of gyration about center of mass axes
** Return: none (results are returned in OutlineStats)
** Exceptions: none
** History: Fri Dec 14 08:32:03 1990, DSJ, Created.
*/
MFOUTLINE Outline;
MFOUTLINE EdgePoint;
MFEDGEPT *Current;
MFEDGEPT *Last;
InitOutlineStats(OutlineStats);
iterate(Outlines) {
Outline = (MFOUTLINE) first_node (Outlines);
Last = PointAt (Outline);
Outline = NextPointAfter (Outline);
EdgePoint = Outline;
do {
Current = PointAt (EdgePoint);
UpdateOutlineStats (OutlineStats,
Last->Point.x, Last->Point.y,
Current->Point.x, Current->Point.y);
Last = Current;
EdgePoint = NextPointAfter (EdgePoint);
}
while (EdgePoint != Outline);
}
FinishOutlineStats(OutlineStats);
} /* ComputeOutlineStats */
/*---------------------------------------------------------------------------*/
void FindDirectionChanges(MFOUTLINE Outline,
FLOAT32 MinSlope,
@ -334,7 +284,8 @@ void NormalizeOutline(MFOUTLINE Outline,
MFOUTLINE EdgePoint = Outline;
do {
MFEDGEPT *Current = PointAt(EdgePoint);
Current->Point.y = MF_SCALE_FACTOR * (Current->Point.y - BASELINE_OFFSET);
Current->Point.y = MF_SCALE_FACTOR *
(Current->Point.y - kBlnBaselineOffset);
Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin);
EdgePoint = NextPointAfter(EdgePoint);
} while (EdgePoint != Outline);
@ -365,34 +316,10 @@ void Classify::NormalizeOutlines(LIST Outlines,
** History: Fri Dec 14 08:14:55 1990, DSJ, Created.
*/
MFOUTLINE Outline;
OUTLINE_STATS OutlineStats;
FLOAT32 BaselineScale;
switch (classify_norm_method) {
case character:
ComputeOutlineStats(Outlines, &OutlineStats);
/* limit scale factor to avoid overscaling small blobs (.,`'),
thin blobs (l1ift), and merged blobs */
*XScale = *YScale = BaselineScale = MF_SCALE_FACTOR;
*XScale *= OutlineStats.Ry;
*YScale *= OutlineStats.Rx;
if (*XScale < classify_min_norm_scale_x)
*XScale = classify_min_norm_scale_x;
if (*YScale < classify_min_norm_scale_y)
*YScale = classify_min_norm_scale_y;
if (*XScale > classify_max_norm_scale_x &&
*YScale <= classify_max_norm_scale_y)
*XScale = classify_max_norm_scale_x;
*XScale = classify_char_norm_range * BaselineScale / *XScale;
*YScale = classify_char_norm_range * BaselineScale / *YScale;
iterate(Outlines) {
Outline = (MFOUTLINE) first_node (Outlines);
CharNormalizeOutline (Outline,
OutlineStats.x, OutlineStats.y,
*XScale, *YScale);
}
ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
break;
case baseline:
@ -436,11 +363,7 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) {
/*---------------------------------------------------------------------------*/
void CharNormalizeOutline(MFOUTLINE Outline,
FLOAT32 XCenter,
FLOAT32 YCenter,
FLOAT32 XScale,
FLOAT32 YScale) {
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) {
/*
** Parameters:
** Outline outline to be character normalized
@ -463,13 +386,13 @@ void CharNormalizeOutline(MFOUTLINE Outline,
First = Outline;
Current = First;
do {
CurrentPoint = PointAt (Current);
CurrentPoint->Point.x =
(CurrentPoint->Point.x - XCenter) * XScale;
CurrentPoint->Point.y =
(CurrentPoint->Point.y - YCenter) * YScale;
CurrentPoint = PointAt(Current);
FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y);
cn_denorm.LocalNormTransform(pos, &pos);
CurrentPoint->Point.x = (pos.x() - MAX_UINT8 / 2) * MF_SCALE_FACTOR;
CurrentPoint->Point.y = (pos.y() - MAX_UINT8 / 2) * MF_SCALE_FACTOR;
Current = NextPointAfter (Current);
Current = NextPointAfter(Current);
}
while (Current != First);

View File

@ -21,10 +21,10 @@
/**----------------------------------------------------------------------------
Include Files and Type Defines
----------------------------------------------------------------------------**/
#include "blobs.h"
#include "host.h"
#include "oldlist.h"
#include "fpoint.h"
#include "baseline.h"
#include "params.h"
#define NORMAL_X_HEIGHT (0.5)
@ -68,7 +68,7 @@ typedef enum {
#define AverageOf(A,B) (((A) + (B)) / 2)
/* macro for computing the scale factor to use to normalize characters */
#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / BASELINE_SCALE)
#define MF_SCALE_FACTOR (NORMAL_X_HEIGHT / kBlnXHeight)
/* macros for manipulating micro-feature outlines */
#define DegenerateOutline(O) (((O) == NIL_LIST) || ((O) == list_rest(O)))
@ -93,8 +93,6 @@ LIST ConvertOutlines(TESSLINE *Outline,
LIST ConvertedOutlines,
OUTLINETYPE OutlineType);
void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats);
void FilterEdgeNoise(MFOUTLINE Outline, FLOAT32 NoiseSegmentLength);
void FindDirectionChanges(MFOUTLINE Outline,
@ -119,11 +117,10 @@ void NormalizeOutline(MFOUTLINE Outline,
-----------------------------------------------------------------------------*/
void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction);
void CharNormalizeOutline(MFOUTLINE Outline,
FLOAT32 XCenter,
FLOAT32 YCenter,
FLOAT32 XScale,
FLOAT32 YScale);
// Normalizes the Outline in-place using cn_denorm's local transformation,
// then converts from the integer feature range [0,255] to the clusterer
// feature range of [-0.5, 0.5].
void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm);
void ComputeDirection(MFEDGEPT *Start,
MFEDGEPT *Finish,

View File

@ -59,7 +59,9 @@ MICROFEATURE ExtractMicroFeature(MFOUTLINE Start, MFOUTLINE End);
----------------------------------------------------------------------------**/
/*---------------------------------------------------------------------------*/
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm) {
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** Blob blob to extract micro-features from
@ -74,35 +76,25 @@ CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm) {
** History: 7/21/89, DSJ, Created.
*/
MICROFEATURES MicroFeatures = NIL_LIST;
FLOAT32 XScale, YScale;
LIST Outlines;
LIST RemainingOutlines;
MFOUTLINE Outline;
INT_FEATURE_ARRAY blfeatures;
INT_FEATURE_ARRAY cnfeatures;
INT_FX_RESULT_STRUCT results;
if (Blob != NULL) {
Outlines = ConvertBlob (Blob);
if (!ExtractIntFeat(Blob, denorm, blfeatures, cnfeatures, &results))
return NULL;
XScale = 0.2f / results.Ry;
YScale = 0.2f / results.Rx;
Outlines = ConvertBlob(Blob);
RemainingOutlines = Outlines;
iterate(RemainingOutlines) {
Outline = (MFOUTLINE) first_node (RemainingOutlines);
CharNormalizeOutline (Outline,
results.Xmean, results.Ymean,
XScale, YScale);
CharNormalizeOutline(Outline, cn_denorm);
}
RemainingOutlines = Outlines;
iterate(RemainingOutlines) {
Outline = (MFOUTLINE) first_node (RemainingOutlines);
Outline = (MFOUTLINE) first_node(RemainingOutlines);
FindDirectionChanges(Outline, classify_min_slope, classify_max_slope);
MarkDirectionChanges(Outline);
MicroFeatures = ConvertToMicroFeatures (Outline, MicroFeatures);
MicroFeatures = ConvertToMicroFeatures(Outline, MicroFeatures);
}
FreeOutlines(Outlines);
}

View File

@ -35,6 +35,8 @@ extern double_VAR_H(classify_max_slope, 2.414213562,
/**----------------------------------------------------------------------------
Public Function Prototypes
----------------------------------------------------------------------------**/
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& denorm);
CHAR_FEATURES BlobMicroFeatures(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info);
#endif

View File

@ -59,22 +59,18 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) {
// the x center of the grapheme's bounding box.
// English: [0.011, 0.31]
//
FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& denorm) {
FEATURE_SET ExtractCharNormFeatures(TBLOB *blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
FEATURE_SET feature_set = NewFeatureSet(1);
FEATURE feature = NewFeature(&CharNormDesc);
INT_FEATURE_ARRAY blfeatures;
INT_FEATURE_ARRAY cnfeatures;
INT_FX_RESULT_STRUCT FXInfo;
ExtractIntFeat(blob, denorm, blfeatures, cnfeatures, &FXInfo);
feature->Params[CharNormY] =
MF_SCALE_FACTOR * (FXInfo.Ymean - BASELINE_OFFSET);
MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
feature->Params[CharNormLength] =
MF_SCALE_FACTOR * FXInfo.Length / LENGTH_COMPRESSION;
feature->Params[CharNormRx] = MF_SCALE_FACTOR * FXInfo.Rx;
feature->Params[CharNormRy] = MF_SCALE_FACTOR * FXInfo.Ry;
MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
feature->Params[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
feature->Params[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
AddFeature(feature_set, feature);

View File

@ -34,6 +34,8 @@ typedef enum {
----------------------------------------------------------------------------**/
FLOAT32 ActualOutlineLength(FEATURE Feature);
FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& denorm);
FEATURE_SET ExtractCharNormFeatures(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info);
#endif

View File

@ -94,7 +94,7 @@ FLOAT32 Classify::ComputeNormMatch(CLASS_ID ClassId,
PROTOTYPE *Proto;
int ProtoId;
if(ClassId > NormProtos->NumProtos) {
if (ClassId > NormProtos->NumProtos) {
ClassId = NO_CLASS;
}

View File

@ -230,7 +230,7 @@ void WriteFeature(FILE *File, FEATURE Feature) {
int i;
for (i = 0; i < Feature->Type->NumParams; i++) {
#ifndef _WIN32
#ifndef WIN32
assert(!isnan(Feature->Params[i]));
#endif
fprintf(File, " %g", Feature->Params[i]);

View File

@ -26,6 +26,7 @@
#include <stdio.h>
class DENORM;
struct INT_FX_RESULT_STRUCT;
#undef Min
#undef Max
@ -78,7 +79,8 @@ typedef FEATURE_SET_STRUCT *FEATURE_SET;
// classifier does not need to know the details of this data structure.
typedef char *CHAR_FEATURES;
typedef FEATURE_SET (*FX_FUNC) (TBLOB *, const DENORM&);
typedef FEATURE_SET (*FX_FUNC)(TBLOB *, const DENORM&, const DENORM&,
const INT_FX_RESULT_STRUCT&);
struct FEATURE_EXT_STRUCT {
FX_FUNC Extractor; // func to extract features

View File

@ -224,7 +224,9 @@ void NormalizePicoX(FEATURE_SET FeatureSet) {
} /* NormalizePicoX */
/*---------------------------------------------------------------------------*/
FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) {
FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** blob blob to extract features from
@ -233,8 +235,8 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) {
** Exceptions: none
** History: 8/8/2011, rays, Created.
*/
tesseract::TrainingSample* sample = GetIntFeatures(
tesseract::NM_CHAR_ANISOTROPIC, blob, denorm);
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
*blob, tesseract::NM_CHAR_ANISOTROPIC, false);
if (sample == NULL) return NULL;
int num_features = sample->num_features();
@ -254,7 +256,9 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& denorm) {
} /* ExtractIntCNFeatures */
/*---------------------------------------------------------------------------*/
FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& denorm) {
FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info) {
/*
** Parameters:
** blob blob to extract features from
@ -263,8 +267,8 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& denorm) {
** Exceptions: none
** History: 8/8/2011, rays, Created.
*/
tesseract::TrainingSample* sample = GetIntFeatures(
tesseract::NM_CHAR_ANISOTROPIC, blob, denorm);
tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
*blob, tesseract::NM_CHAR_ANISOTROPIC, false);
if (sample == NULL) return NULL;
FEATURE_SET feature_set = NewFeatureSet(1);

View File

@ -58,8 +58,12 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length");
----------------------------------------------------------------------------**/
#define GetPicoFeatureLength() (PicoFeatureLength)
FEATURE_SET ExtractIntCNFeatures(TBLOB *Blob, const DENORM& denorm);
FEATURE_SET ExtractIntGeoFeatures(TBLOB *Blob, const DENORM& denorm);
FEATURE_SET ExtractIntCNFeatures(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info);
FEATURE_SET ExtractIntGeoFeatures(TBLOB *Blob, const DENORM& bl_denorm,
const DENORM& cn_denorm,
const INT_FX_RESULT_STRUCT& fx_info);
/**----------------------------------------------------------------------------
Global Data Definitions and Declarations

View File

@ -0,0 +1,230 @@
// Copyright 2011 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
///////////////////////////////////////////////////////////////////////
// File: shapeclassifier.h
// Description: Base interface class for classifiers that return a
// shape index.
// Author: Ray Smith
// Created: Thu Dec 15 15:24:27 PST 2011
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#include "shapeclassifier.h"
#include "genericvector.h"
#include "scrollview.h"
#include "shapetable.h"
#include "svmnode.h"
#include "trainingsample.h"
#include "tprintf.h"
namespace tesseract {
// Classifies the given [training] sample, writing to results.
// See shapeclassifier.h for a full description.
// Default implementation calls the ShapeRating version.
int ShapeClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
results->truncate(0);
GenericVector<ShapeRating> shape_results;
int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this,
&shape_results);
const ShapeTable* shapes = GetShapeTable();
GenericVector<int> unichar_map;
unichar_map.init_to_size(shapes->unicharset().size(), -1);
for (int r = 0; r < num_shape_results; ++r) {
shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
}
return results->size();
}
// Classifies the given [training] sample, writing to results.
// See shapeclassifier.h for a full description.
// Default implementation aborts.
int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, int keep_this,
GenericVector<ShapeRating>* results) {
ASSERT_HOST("Must implement ClassifySample!" == NULL);
return 0;
}
// Returns the shape that contains unichar_id that has the best result.
// If result is not NULL, it is set with the shape_id and rating.
// Does not need to be overridden if ClassifySample respects the keep_this
// rule.
int ShapeClassifier::BestShapeForUnichar(const TrainingSample& sample,
Pix* page_pix, UNICHAR_ID unichar_id,
ShapeRating* result) {
GenericVector<ShapeRating> results;
const ShapeTable* shapes = GetShapeTable();
int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results);
for (int r = 0; r < num_results; ++r) {
if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) {
if (result != NULL)
*result = results[r];
return results[r].shape_id;
}
}
return -1;
}
// Provides access to the UNICHARSET that this classifier works with.
// Only needs to be overridden if GetShapeTable() can return NULL.
const UNICHARSET& ShapeClassifier::GetUnicharset() const {
return GetShapeTable()->unicharset();
}
// Visual debugger classifies the given sample, displays the results and
// solicits user input to display other classifications. Returns when
// the user has finished with debugging the sample.
// Probably doesn't need to be overridden if the subclass provides
// DisplayClassifyAs.
void ShapeClassifier::DebugDisplay(const TrainingSample& sample,
Pix* page_pix,
UNICHAR_ID unichar_id) {
static ScrollView* terminator = NULL;
if (terminator == NULL) {
terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true);
}
ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0);
// Provide a right-click menu to choose the class.
SVMenuNode* popup_menu = new SVMenuNode();
popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug");
popup_menu->BuildMenu(debug_win, false);
// Display the features in green.
const INT_FEATURE_STRUCT* features = sample.features();
int num_features = sample.num_features();
for (int f = 0; f < num_features; ++f) {
RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
}
debug_win->Update();
GenericVector<UnicharRating> results;
// Debug classification until the user quits.
const UNICHARSET& unicharset = GetUnicharset();
SVEvent* ev;
SVEventType ev_type;
do {
PointerVector<ScrollView> windows;
if (unichar_id >= 0) {
tprintf("Debugging class %d = %s\n",
unichar_id, unicharset.id_to_unichar(unichar_id));
UnicharClassifySample(sample, page_pix, 1, unichar_id, &results);
DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows);
} else {
tprintf("Invalid unichar_id: %d\n", unichar_id);
UnicharClassifySample(sample, page_pix, 1, -1, &results);
}
if (unichar_id >= 0) {
tprintf("Debugged class %d = %s\n",
unichar_id, unicharset.id_to_unichar(unichar_id));
}
tprintf("Right-click in ClassifierDebug window to choose debug class,");
tprintf(" Left-click or close window to quit...\n");
UNICHAR_ID old_unichar_id;
do {
old_unichar_id = unichar_id;
ev = debug_win->AwaitEvent(SVET_ANY);
ev_type = ev->type;
if (ev_type == SVET_POPUP) {
if (unicharset.contains_unichar(ev->parameter)) {
unichar_id = unicharset.unichar_to_id(ev->parameter);
} else {
tprintf("Char class '%s' not found in unicharset", ev->parameter);
}
}
delete ev;
} while (unichar_id == old_unichar_id &&
ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
} while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
delete debug_win;
}
// Displays classification as the given shape_id. Creates as many windows
// as it feels fit, using index as a guide for placement. Adds any created
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
int ShapeClassifier::DisplayClassifyAs(
const TrainingSample& sample, Pix* page_pix,
UNICHAR_ID unichar_id, int index,
PointerVector<ScrollView>* windows) {
// Does nothing in the default implementation.
return index;
}
// Prints debug information on the results.
void ShapeClassifier::UnicharPrintResults(
const char* context, const GenericVector<UnicharRating>& results) const {
tprintf("%s\n", context);
for (int i = 0; i < results.size(); ++i) {
tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id,
GetUnicharset().id_to_unichar(results[i].unichar_id));
if (results[i].fonts.size() != 0) {
tprintf(" Font Vector:");
for (int f = 0; f < results[i].fonts.size(); ++f) {
tprintf(" %d", results[i].fonts[f]);
}
}
tprintf("\n");
}
}
void ShapeClassifier::PrintResults(
const char* context, const GenericVector<ShapeRating>& results) const {
tprintf("%s\n", context);
for (int i = 0; i < results.size(); ++i) {
tprintf("%g:", results[i].rating);
if (results[i].joined)
tprintf("[J]");
if (results[i].broken)
tprintf("[B]");
tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string());
}
}
// Removes any result that has all its unichars covered by a better choice,
// regardless of font.
void ShapeClassifier::FilterDuplicateUnichars(
GenericVector<ShapeRating>* results) const {
GenericVector<ShapeRating> filtered_results;
// Copy results to filtered results and knock out duplicate unichars.
const ShapeTable* shapes = GetShapeTable();
for (int r = 0; r < results->size(); ++r) {
if (r > 0) {
const Shape& shape_r = shapes->GetShape((*results)[r].shape_id);
int c;
for (c = 0; c < shape_r.size(); ++c) {
int unichar_id = shape_r[c].unichar_id;
int s;
for (s = 0; s < r; ++s) {
const Shape& shape_s = shapes->GetShape((*results)[s].shape_id);
if (shape_s.ContainsUnichar(unichar_id))
break; // We found unichar_id.
}
if (s == r)
break; // We didn't find unichar_id.
}
if (c == shape_r.size())
continue; // We found all the unichar ids in previous answers.
}
filtered_results.push_back((*results)[r]);
}
*results = filtered_results;
}
} // namespace tesseract.

View File

@ -23,44 +23,21 @@
#ifndef TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
#define TESSERACT_CLASSIFY_SHAPECLASSIFIER_H_
#include "unichar.h"
template <typename T> class GenericVector;
struct Pix;
class ScrollView;
class UNICHARSET;
namespace tesseract {
template <typename T> class PointerVector;
struct ShapeRating;
class ShapeTable;
class TrainingSample;
// Classifier result from a low-level classification is an index into some
// ShapeTable and a rating.
struct ShapeRating {
ShapeRating() : shape_id(0), rating(0.0f), raw(0.0f), font(0.0f) {}
ShapeRating(int s, float r)
: shape_id(s), rating(r), raw(1.0f), font(0.0f) {}
// Sort function to sort ratings appropriately by descending rating.
static int SortDescendingRating(const void* t1, const void* t2) {
const ShapeRating* a = reinterpret_cast<const ShapeRating *>(t1);
const ShapeRating* b = reinterpret_cast<const ShapeRating *>(t2);
if (a->rating > b->rating) {
return -1;
} else if (a->rating < b->rating) {
return 1;
} else {
return a->shape_id - b->shape_id;
}
}
// Index into some shape table indicates the class of the answer.
int shape_id;
// Rating from classifier with 1.0 perfect and 0.0 impossible.
// Call it a probability if you must.
float rating;
// Subsidiary rating that a classifier may use internally.
float raw;
// Subsidiary rating that a classifier may use internally.
float font;
};
class TrainingSampleSet;
struct UnicharRating;
// Interface base class for classifiers that produce ShapeRating results.
class ShapeClassifier {
@ -76,18 +53,70 @@ class ShapeClassifier {
// to get the appropriate tesseract features.
// If debug is non-zero, then various degrees of classifier dependent debug
// information is provided.
// If keep_this (a shape index) is >= 0, then the results should always
// If keep_this (a UNICHAR_ID) is >= 0, then the results should always
// contain keep_this, and (if possible) anything of intermediate confidence.
// (Used for answering "Why didn't it get that right?" questions.)
// (Used for answering "Why didn't it get that right?" questions.) It must
// be a UNICHAR_ID as the callers have no clue how to choose the best shape
// that may contain a desired answer.
// The return value is the number of classes saved in results.
// NOTE that overriding functions MUST clear results unless the classifier
// is working with a team of such classifiers.
// NOTE that overriding functions MUST clear and sort the results by
// descending rating unless the classifier is working with a team of such
// classifiers.
// NOTE: Neither overload of ClassifySample is pure, but at least one must
// be overridden by a classifier in order for it to do anything.
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, UNICHAR_ID keep_this,
GenericVector<UnicharRating>* results);
protected:
virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, int keep_this,
GenericVector<ShapeRating>* results) = 0;
int debug, UNICHAR_ID keep_this,
GenericVector<ShapeRating>* results);
public:
// Returns the shape that contains unichar_id that has the best result.
// If result is not NULL, it is set with the shape_id and rating.
// Returns -1 if ClassifySample fails to provide any result containing
// unichar_id. BestShapeForUnichar does not need to be overridden if
// ClassifySample respects the keep_this rule.
virtual int BestShapeForUnichar(const TrainingSample& sample, Pix* page_pix,
UNICHAR_ID unichar_id, ShapeRating* result);
// Provides access to the ShapeTable that this classifier works with.
virtual const ShapeTable* GetShapeTable() const = 0;
// Provides access to the UNICHARSET that this classifier works with.
// Must be overridden IFF GetShapeTable() returns NULL.
virtual const UNICHARSET& GetUnicharset() const;
// Visual debugger classifies the given sample, displays the results and
// solicits user input to display other classifications. Returns when
// the user has finished with debugging the sample.
// Probably doesn't need to be overridden if the subclass provides
// DisplayClassifyAs.
virtual void DebugDisplay(const TrainingSample& sample, Pix* page_pix,
UNICHAR_ID unichar_id);
// Displays classification as the given unichar_id. Creates as many windows
// as it feels fit, using index as a guide for placement. Adds any created
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix,
UNICHAR_ID unichar_id, int index,
PointerVector<ScrollView>* windows);
// Prints debug information on the results. context is some introductory/title
// message.
virtual void UnicharPrintResults(
const char* context, const GenericVector<UnicharRating>& results) const;
virtual void PrintResults(const char* context,
const GenericVector<ShapeRating>& results) const;
protected:
// Removes any result that has all its unichars covered by a better choice,
// regardless of font.
void FilterDuplicateUnichars(GenericVector<ShapeRating>* results) const;
};
} // namespace tesseract.

View File

@ -22,12 +22,47 @@
#include "shapetable.h"
#include "bitvector.h"
#include "fontinfo.h"
#include "intfeaturespace.h"
#include "strngs.h"
#include "unicharset.h"
#include "unicity_table.h"
namespace tesseract {
// Helper function to get the index of the first result with the required
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
int ShapeRating::FirstResultWithUnichar(
const GenericVector<ShapeRating>& results,
const ShapeTable& shape_table,
UNICHAR_ID unichar_id) {
for (int r = 0; r < results.size(); ++r) {
int shape_id = results[r].shape_id;
const Shape& shape = shape_table.GetShape(shape_id);
if (shape.ContainsUnichar(unichar_id)) {
return r;
}
}
return -1;
}
// Helper function to get the index of the first result with the required
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
int UnicharRating::FirstResultWithUnichar(
const GenericVector<UnicharRating>& results,
UNICHAR_ID unichar_id) {
for (int r = 0; r < results.size(); ++r) {
if (results[r].unichar_id == unichar_id)
return r;
}
return -1;
}
// Writes to the given file. Returns false in case of error.
bool UnicharAndFonts::Serialize(FILE* fp) const {
if (fwrite(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
@ -138,6 +173,39 @@ bool Shape::ContainsFont(int font_id) const {
}
return false;
}
// Returns true if the shape contains the given font properties, ignoring
// unichar_id.
bool Shape::ContainsFontProperties(const FontInfoTable& font_table,
uinT32 properties) const {
for (int c = 0; c < unichars_.size(); ++c) {
GenericVector<int>& font_list = unichars_[c].font_ids;
for (int f = 0; f < font_list.size(); ++f) {
if (font_table.get(font_list[f]).properties == properties)
return true;
}
}
return false;
}
// Returns true if the shape contains multiple different font properties,
// ignoring unichar_id.
bool Shape::ContainsMultipleFontProperties(
const FontInfoTable& font_table) const {
uinT32 properties = font_table.get(unichars_[0].font_ids[0]).properties;
for (int c = 0; c < unichars_.size(); ++c) {
GenericVector<int>& font_list = unichars_[c].font_ids;
for (int f = 0; f < font_list.size(); ++f) {
if (font_table.get(font_list[f]).properties != properties)
return true;
}
}
return false;
}
// Returns true if this shape is equal to other (ignoring order of unichars
// and fonts).
bool Shape::operator==(const Shape& other) const {
return IsSubsetOf(other) && other.IsSubsetOf(*this);
}
// Returns true if this is a subset (including equal) of other.
bool Shape::IsSubsetOf(const Shape& other) const {
@ -172,10 +240,10 @@ void Shape::SortUnichars() {
unichars_sorted_ = true;
}
ShapeTable::ShapeTable() : unicharset_(NULL) {
ShapeTable::ShapeTable() : unicharset_(NULL), num_fonts_(0) {
}
ShapeTable::ShapeTable(const UNICHARSET& unicharset)
: unicharset_(&unicharset) {
: unicharset_(&unicharset), num_fonts_(0) {
}
// Writes to the given file. Returns false in case of error.
@ -187,9 +255,38 @@ bool ShapeTable::Serialize(FILE* fp) const {
// If swap is true, assumes a big/little-endian swap is needed.
bool ShapeTable::DeSerialize(bool swap, FILE* fp) {
if (!shape_table_.DeSerialize(swap, fp)) return false;
num_fonts_ = 0;
return true;
}
// Returns the number of fonts used in this ShapeTable, computing it if
// necessary.
int ShapeTable::NumFonts() const {
if (num_fonts_ <= 0) {
for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
const Shape& shape = *shape_table_[shape_id];
for (int c = 0; c < shape.size(); ++c) {
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
if (shape[c].font_ids[f] >= num_fonts_)
num_fonts_ = shape[c].font_ids[f] + 1;
}
}
}
}
return num_fonts_;
}
// Re-indexes the class_ids in the shapetable according to the given map.
// Useful in conjunction with set_unicharset.
void ShapeTable::ReMapClassIds(const GenericVector<int>& unicharset_map) {
for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
Shape* shape = shape_table_[shape_id];
for (int c = 0; c < shape->size(); ++c) {
shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
}
}
}
// Returns a string listing the classes/fonts in a shape.
STRING ShapeTable::DebugStr(int shape_id) const {
if (shape_id < 0 || shape_id >= shape_table_.size())
@ -251,15 +348,22 @@ int ShapeTable::AddShape(int unichar_id, int font_id) {
Shape* shape = new Shape;
shape->AddToShape(unichar_id, font_id);
shape_table_.push_back(shape);
num_fonts_ = MAX(num_fonts_, font_id + 1);
return index;
}
// Adds a copy of the given shape.
// Returns the assigned index.
// Adds a copy of the given shape unless it is already present.
// Returns the assigned index or index of existing shape if already present.
int ShapeTable::AddShape(const Shape& other) {
int index = shape_table_.size();
Shape* shape = new Shape(other);
shape_table_.push_back(shape);
int index;
for (index = 0; index < shape_table_.size() &&
!(other == *shape_table_[index]); ++index)
continue;
if (index == shape_table_.size()) {
Shape* shape = new Shape(other);
shape_table_.push_back(shape);
}
num_fonts_ = 0;
return index;
}
@ -275,12 +379,14 @@ void ShapeTable::DeleteShape(int shape_id) {
void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) {
Shape& shape = *shape_table_[shape_id];
shape.AddToShape(unichar_id, font_id);
num_fonts_ = MAX(num_fonts_, font_id + 1);
}
// Adds the given shape to the existing shape with the given index.
void ShapeTable::AddShapeToShape(int shape_id, const Shape& other) {
Shape& shape = *shape_table_[shape_id];
shape.AddShape(other);
num_fonts_ = 0;
}
// Returns the id of the shape that contains the given unichar and font.
@ -316,25 +422,26 @@ void ShapeTable::GetFirstUnicharAndFont(int shape_id,
// a ShapeTable.
int ShapeTable::BuildFromShape(const Shape& shape,
const ShapeTable& master_shapes) {
int num_masters = 0;
BitVector shape_map(master_shapes.NumShapes());
for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
int c = shape[u_ind].unichar_id;
int f = shape[u_ind].font_ids[f_ind];
if (FindShape(c, f) < 0) {
int shape_id = AddShape(c, f);
int master_id = master_shapes.FindShape(c, f);
if (master_id >= 0 && shape.size() > 1) {
const Shape& master = master_shapes.GetShape(master_id);
if (master.IsSubsetOf(shape) && !shape.IsSubsetOf(master)) {
// Add everything else from the master shape.
shape_table_[shape_id]->AddShape(master);
++num_masters;
}
}
int master_id = master_shapes.FindShape(c, f);
if (master_id >= 0) {
shape_map.SetBit(master_id);
} else if (FindShape(c, f) < 0) {
AddShape(c, f);
}
}
}
int num_masters = 0;
for (int s = 0; s < master_shapes.NumShapes(); ++s) {
if (shape_map[s]) {
AddShape(master_shapes.GetShape(s));
++num_masters;
}
}
return num_masters;
}
@ -381,7 +488,7 @@ void ShapeTable::ForceFontMerges(int start, int end) {
}
}
ShapeTable compacted(*unicharset_);
compacted.AppendMasterShapes(*this);
compacted.AppendMasterShapes(*this, NULL);
*this = compacted;
}
@ -422,6 +529,13 @@ void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
}
// Swaps two shape_ids.
void ShapeTable::SwapShapes(int shape_id1, int shape_id2) {
Shape* tmp = shape_table_[shape_id1];
shape_table_[shape_id1] = shape_table_[shape_id2];
shape_table_[shape_id2] = tmp;
}
// Returns the destination of this shape, (if merged), taking into account
// the fact that the destination may itself have been merged.
int ShapeTable::MasterDestinationIndex(int shape_id) const {
@ -435,11 +549,129 @@ int ShapeTable::MasterDestinationIndex(int shape_id) const {
return master_id;
}
// Returns false if the unichars in neither shape is a subset of the other.
bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const {
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
int c1, c2;
for (c1 = 0; c1 < shape1.size(); ++c1) {
int unichar_id1 = shape1[c1].unichar_id;
if (!shape2.ContainsUnichar(unichar_id1))
break;
}
for (c2 = 0; c2 < shape2.size(); ++c2) {
int unichar_id2 = shape2[c2].unichar_id;
if (!shape1.ContainsUnichar(unichar_id2))
break;
}
return c1 == shape1.size() || c2 == shape2.size();
}
// Returns false if the unichars in neither shape is a subset of the other.
bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2,
int shape_id) const {
const Shape& merge1 = GetShape(merge_id1);
const Shape& merge2 = GetShape(merge_id2);
const Shape& shape = GetShape(shape_id);
int cm1, cm2, cs;
for (cs = 0; cs < shape.size(); ++cs) {
int unichar_id = shape[cs].unichar_id;
if (!merge1.ContainsUnichar(unichar_id) &&
!merge2.ContainsUnichar(unichar_id))
break; // Shape is not a subset of the merge.
}
for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
int unichar_id1 = merge1[cm1].unichar_id;
if (!shape.ContainsUnichar(unichar_id1))
break; // Merge is not a subset of shape
}
for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
int unichar_id2 = merge2[cm2].unichar_id;
if (!shape.ContainsUnichar(unichar_id2))
break; // Merge is not a subset of shape
}
return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
}
// Returns true if the unichar sets are equal between the shapes.
bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const {
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
for (int c1 = 0; c1 < shape1.size(); ++c1) {
int unichar_id1 = shape1[c1].unichar_id;
if (!shape2.ContainsUnichar(unichar_id1))
return false;
}
for (int c2 = 0; c2 < shape2.size(); ++c2) {
int unichar_id2 = shape2[c2].unichar_id;
if (!shape1.ContainsUnichar(unichar_id2))
return false;
}
return true;
}
// Returns true if the unichar sets are equal between the shapes.
bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2,
int shape_id) const {
const Shape& merge1 = GetShape(merge_id1);
const Shape& merge2 = GetShape(merge_id2);
const Shape& shape = GetShape(shape_id);
for (int cs = 0; cs < shape.size(); ++cs) {
int unichar_id = shape[cs].unichar_id;
if (!merge1.ContainsUnichar(unichar_id) &&
!merge2.ContainsUnichar(unichar_id))
return false; // Shape has a unichar that appears in neither merge.
}
for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
int unichar_id1 = merge1[cm1].unichar_id;
if (!shape.ContainsUnichar(unichar_id1))
return false; // Merge has a unichar that is not in shape.
}
for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
int unichar_id2 = merge2[cm2].unichar_id;
if (!shape.ContainsUnichar(unichar_id2))
return false; // Merge has a unichar that is not in shape.
}
return true;
}
// Returns true if there is a common unichar between the shapes.
bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const {
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
for (int c1 = 0; c1 < shape1.size(); ++c1) {
int unichar_id1 = shape1[c1].unichar_id;
if (shape2.ContainsUnichar(unichar_id1))
return true;
}
return false;
}
// Returns true if there is a common font id between the shapes.
bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
for (int c1 = 0; c1 < shape1.size(); ++c1) {
const GenericVector<int>& font_list1 = shape1[c1].font_ids;
for (int f = 0; f < font_list1.size(); ++f) {
if (shape2.ContainsFont(font_list1[f]))
return true;
}
}
return false;
}
// Appends the master shapes from other to this.
void ShapeTable::AppendMasterShapes(const ShapeTable& other) {
// If not NULL, shape_map is set to map other shape_ids to this's shape_ids.
void ShapeTable::AppendMasterShapes(const ShapeTable& other,
GenericVector<int>* shape_map) {
if (shape_map != NULL)
shape_map->init_to_size(other.NumShapes(), -1);
for (int s = 0; s < other.shape_table_.size(); ++s) {
if (other.shape_table_[s]->destination_index() < 0) {
AddShape(*other.shape_table_[s]);
int index = AddShape(*other.shape_table_[s]);
if (shape_map != NULL)
(*shape_map)[s] = index;
}
}
}
@ -455,6 +687,46 @@ int ShapeTable::NumMasterShapes() const {
}
// Adds the unichars of the given shape_id to the vector of results. Any
// unichar_id that is already present just has the fonts added to the
// font set for that result without adding a new entry in the vector.
// NOTE: it is assumed that the results are given to this function in order
// of decreasing rating.
// The unichar_map vector indicates the index of the results entry containing
// each unichar, or -1 if the unichar is not yet included in results.
void ShapeTable::AddShapeToResults(const ShapeRating& shape_rating,
GenericVector<int>* unichar_map,
GenericVector<UnicharRating>* results)const {
if (shape_rating.joined) {
AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map,
results);
}
if (shape_rating.broken) {
AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map,
results);
}
const Shape& shape = GetShape(shape_rating.shape_id);
for (int u = 0; u < shape.size(); ++u) {
int result_index = AddUnicharToResults(shape[u].unichar_id,
shape_rating.rating,
unichar_map, results);
(*results)[result_index].fonts += shape[u].font_ids;
}
}
// Adds the given unichar_id to the results if needed, updating unichar_map
// and returning the index of unichar in results.
int ShapeTable::AddUnicharToResults(
int unichar_id, float rating, GenericVector<int>* unichar_map,
GenericVector<UnicharRating>* results) const {
int result_index = unichar_map->get(unichar_id);
if (result_index < 0) {
UnicharRating result(unichar_id, rating);
result_index = results->push_back(result);
(*unichar_map)[unichar_id] = result_index;
}
return result_index;
}
} // namespace tesseract

View File

@ -23,6 +23,8 @@
#ifndef TESSERACT_CLASSIFY_SHAPETABLE_H_
#define TESSERACT_CLASSIFY_SHAPETABLE_H_
#include "bitvector.h"
#include "genericheap.h"
#include "genericvector.h"
#include "intmatcher.h"
@ -31,6 +33,113 @@ class UNICHARSET;
namespace tesseract {
struct FontInfo;
class FontInfoTable;
class ShapeTable;
// Simple struct to hold a single classifier unichar selection, a corresponding
// rating, and a list of appropriate fonts.
struct UnicharRating {
UnicharRating() : unichar_id(0), rating(0.0f) {}
UnicharRating(int u, float r)
: unichar_id(u), rating(r) {}
// Sort function to sort ratings appropriately by descending rating.
static int SortDescendingRating(const void* t1, const void* t2) {
const UnicharRating* a = reinterpret_cast<const UnicharRating *>(t1);
const UnicharRating* b = reinterpret_cast<const UnicharRating *>(t2);
if (a->rating > b->rating) {
return -1;
} else if (a->rating < b->rating) {
return 1;
} else {
return a->unichar_id - b->unichar_id;
}
}
// Helper function to get the index of the first result with the required
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
static int FirstResultWithUnichar(const GenericVector<UnicharRating>& results,
UNICHAR_ID unichar_id);
// Index into some UNICHARSET table indicates the class of the answer.
UNICHAR_ID unichar_id;
// Rating from classifier with 1.0 perfect and 0.0 impossible.
// Call it a probability if you must.
float rating;
// Set of fonts for this shape in order of decreasing preference.
// (There is no mechanism for storing scores for fonts as yet.)
GenericVector<int> fonts;
};
// Classifier result from a low-level classification is an index into some
// ShapeTable and a rating.
struct ShapeRating {
ShapeRating()
: shape_id(0), rating(0.0f), raw(0.0f), font(0.0f),
joined(false), broken(false) {}
ShapeRating(int s, float r)
: shape_id(s), rating(r), raw(1.0f), font(0.0f),
joined(false), broken(false) {}
// Sort function to sort ratings appropriately by descending rating.
static int SortDescendingRating(const void* t1, const void* t2) {
const ShapeRating* a = reinterpret_cast<const ShapeRating *>(t1);
const ShapeRating* b = reinterpret_cast<const ShapeRating *>(t2);
if (a->rating > b->rating) {
return -1;
} else if (a->rating < b->rating) {
return 1;
} else {
return a->shape_id - b->shape_id;
}
}
// Helper function to get the index of the first result with the required
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
static int FirstResultWithUnichar(const GenericVector<ShapeRating>& results,
const ShapeTable& shape_table,
UNICHAR_ID unichar_id);
// Index into some shape table indicates the class of the answer.
int shape_id;
// Rating from classifier with 1.0 perfect and 0.0 impossible.
// Call it a probability if you must.
float rating;
// Subsidiary rating that a classifier may use internally.
float raw;
// Subsidiary rating that a classifier may use internally.
float font;
// Flag indicating that the input may be joined.
bool joined;
// Flag indicating that the input may be broken (a fragment).
bool broken;
};
// Simple struct to hold an entry for a heap-based priority queue of
// ShapeRating.
struct ShapeQueueEntry {
ShapeQueueEntry() : result(ShapeRating(0, 0.0f)), level(0) {}
ShapeQueueEntry(const ShapeRating& rating, int level0)
: result(rating), level(level0) {}
// Sort by decreasing rating and decreasing level for equal rating.
bool operator<(const ShapeQueueEntry& other) const {
if (result.rating > other.result.rating) return true;
if (result.rating == other.result.rating)
return level > other.level;
return false;
}
// Output from classifier.
ShapeRating result;
// Which level in the tree did this come from?
int level;
};
typedef GenericHeap<ShapeQueueEntry> ShapeQueue;
// Simple struct to hold a set of fonts associated with a single unichar-id.
// A vector of UnicharAndFonts makes a shape.
struct UnicharAndFonts {
@ -83,6 +192,10 @@ class Shape {
const UnicharAndFonts& operator[](int index) const {
return unichars_[index];
}
// Sets the unichar_id of the given index to the new unichar_id.
void SetUnicharId(int index, int unichar_id) {
unichars_[index].unichar_id = unichar_id;
}
// Adds a font_id for the given unichar_id. If the unichar_id is not
// in the shape, it is added.
void AddToShape(int unichar_id, int font_id);
@ -94,6 +207,16 @@ class Shape {
bool ContainsUnichar(int unichar_id) const;
// Returns true if the shape contains the given font, ignoring unichar_id.
bool ContainsFont(int font_id) const;
// Returns true if the shape contains the given font properties, ignoring
// unichar_id.
bool ContainsFontProperties(const FontInfoTable& font_table,
uinT32 properties) const;
// Returns true if the shape contains multiple different font properties,
// ignoring unichar_id.
bool ContainsMultipleFontProperties(const FontInfoTable& font_table) const;
// Returns true if this shape is equal to other (ignoring order of unichars
// and fonts).
bool operator==(const Shape& other) const;
// Returns true if this is a subset (including equal) of other.
bool IsSubsetOf(const Shape& other) const;
// Returns true if the lists of unichar ids are the same in this and other,
@ -143,11 +266,17 @@ class ShapeTable {
const UNICHARSET& unicharset() const {
return *unicharset_;
}
// Returns the number of fonts used in this ShapeTable, computing it if
// necessary.
int NumFonts() const;
// Shapetable takes a pointer to the UNICHARSET, so it must persist for the
// entire life of the ShapeTable.
void set_unicharset(const UNICHARSET& unicharset) {
unicharset_ = &unicharset;
}
// Re-indexes the class_ids in the shapetable according to the given map.
// Useful in conjunction with set_unicharset.
void ReMapClassIds(const GenericVector<int>& unicharset_map);
// Returns a string listing the classes/fonts in a shape.
STRING DebugStr(int shape_id) const;
// Returns a debug string summarizing the table.
@ -156,8 +285,8 @@ class ShapeTable {
// Adds a new shape starting with the given unichar_id and font_id.
// Returns the assigned index.
int AddShape(int unichar_id, int font_id);
// Adds a copy of the given shape.
// Returns the assigned index.
// Adds a copy of the given shape unless it is already present.
// Returns the assigned index or index of existing shape if already present.
int AddShape(const Shape& other);
// Removes the shape given by the shape index. All indices above are changed!
void DeleteShape(int shape_id);
@ -204,10 +333,14 @@ class ShapeTable {
int MergedUnicharCount(int shape_id1, int shape_id2) const;
// Merges two shape_ids, leaving shape_id2 marked as merged.
void MergeShapes(int shape_id1, int shape_id2);
// Swaps two shape_ids.
void SwapShapes(int shape_id1, int shape_id2);
// Appends the master shapes from other to this.
// Used to create a clean ShapeTable from a merged one, or to create a
// copy of a ShapeTable.
void AppendMasterShapes(const ShapeTable& other);
// If not NULL, shape_map is set to map other shape_ids to this's shape_ids.
void AppendMasterShapes(const ShapeTable& other,
GenericVector<int>* shape_map);
// Returns the number of master shapes remaining after merging.
int NumMasterShapes() const;
// Returns the destination of this shape, (if merged), taking into account
@ -215,11 +348,43 @@ class ShapeTable {
// For a non-merged shape, returns the input shape_id.
int MasterDestinationIndex(int shape_id) const;
// Returns false if the unichars in neither shape is a subset of the other..
bool SubsetUnichar(int shape_id1, int shape_id2) const;
// Returns false if the unichars in neither shape is a subset of the other..
bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const;
// Returns true if the unichar sets are equal between the shapes.
bool EqualUnichars(int shape_id1, int shape_id2) const;
bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const;
// Returns true if there is a common unichar between the shapes.
bool CommonUnichars(int shape_id1, int shape_id2) const;
// Returns true if there is a common font id between the shapes.
bool CommonFont(int shape_id1, int shape_id2) const;
// Adds the unichars of the given shape_id to the vector of results. Any
// unichar_id that is already present just has the fonts added to the
// font set for that result without adding a new entry in the vector.
// NOTE: it is assumed that the results are given to this function in order
// of decreasing rating.
// The unichar_map vector indicates the index of the results entry containing
// each unichar, or -1 if the unichar is not yet included in results.
void AddShapeToResults(const ShapeRating& shape_rating,
GenericVector<int>* unichar_map,
GenericVector<UnicharRating>* results) const;
private:
// Adds the given unichar_id to the results if needed, updating unichar_map
// and returning the index of unichar in results.
int AddUnicharToResults(int unichar_id, float rating,
GenericVector<int>* unichar_map,
GenericVector<UnicharRating>* results) const;
// Pointer to a provided unicharset used only by the Debugstr member.
const UNICHARSET* unicharset_;
// Vector of pointers to the Shapes in this ShapeTable.
PointerVector<Shape> shape_table_;
// Cached data calculated on demand.
mutable int num_fonts_;
};
} // namespace tesseract.

View File

@ -1,107 +0,0 @@
/******************************************************************************
** Filename: speckle.c
** Purpose: Routines used by classifier to filter out speckle.
** Author: Dan Johnson
** History: Mon Mar 11 10:06:14 1991, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
/*-----------------------------------------------------------------------------
Include Files and Type Defines
-----------------------------------------------------------------------------*/
#include "speckle.h"
#include "blobs.h"
#include "ratngs.h"
#include "params.h"
/*-----------------------------------------------------------------------------
Global Data Definitions and Declarations
-----------------------------------------------------------------------------*/
/** define control knobs for adjusting definition of speckle*/
double_VAR(speckle_large_max_size, 0.30, "Max large speckle size");
double_VAR(speckle_small_penalty, 10.0, "Small speckle penalty");
double_VAR(speckle_large_penalty, 10.0, "Large speckle penalty");
double_VAR(speckle_small_certainty, -1.0, "Small speckle certainty");
/*-----------------------------------------------------------------------------
Public Code
-----------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
/**
* This routine adds a null choice to Choices with a
* rating equal to the worst rating in Choices plus a pad.
* The certainty of the new choice is the same as the
* certainty of the worst choice in Choices. The new choice
* is added to the end of Choices.
*
* Globals:
* - #speckle_small_penalty rating for a small speckle
* - #speckle_large_penalty rating penalty for a large speckle
* - #speckle_small_certainty certainty for a small speckle
*
* @param Choices choices to add a speckle choice to
*
* @return New Choices list with null choice added to end.
*
* Exceptions: none
* History: Mon Mar 11 11:08:11 1991, DSJ, Created.
*/
void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices) {
assert(Choices != NULL);
BLOB_CHOICE *blob_choice;
BLOB_CHOICE_IT temp_it;
temp_it.set_to_list(Choices);
// If there are no other choices, use the small speckle penalty plus
// the large speckle penalty.
if (Choices->length() == 0) {
blob_choice =
new BLOB_CHOICE(0, speckle_small_certainty + speckle_large_penalty,
speckle_small_certainty, -1, -1, NULL, 0, 0, false);
temp_it.add_to_end(blob_choice);
return;
}
// If there are other choices, add a null choice that is slightly worse
// than the worst choice so far.
temp_it.move_to_last();
blob_choice = temp_it.data(); // pick the worst choice
temp_it.add_to_end(
new BLOB_CHOICE(0, blob_choice->rating() + speckle_large_penalty,
blob_choice->certainty(), -1, -1, NULL, 0, 0, false));
} /* AddLargeSpeckleTo */
/*---------------------------------------------------------------------------*/
/**
* This routine returns TRUE if both the width of height
* of Blob are less than the MaxLargeSpeckleSize.
*
* Globals:
* - #speckle_large_max_size largest allowed speckle
*
* Exceptions: none
* History: Mon Mar 11 10:06:49 1991, DSJ, Created.
*
* @param blob blob to test against speckle criteria
*
* @return TRUE if blob is speckle, FALSE otherwise.
*/
BOOL8 LargeSpeckle(TBLOB *blob) {
double speckle_size = BASELINE_SCALE * speckle_large_max_size;
TBOX bbox = blob->bounding_box();
return (bbox.width() < speckle_size && bbox.height() < speckle_size);
} /* LargeSpeckle */

View File

@ -1,35 +0,0 @@
/******************************************************************************
** Filename: speckle.h
** Purpose: Interface to classifier speckle filtering routines.
** Author: Dan Johnson
** History: Mon Mar 11 10:14:16 1991, DSJ, Created.
**
** (c) Copyright Hewlett-Packard Company, 1988.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
******************************************************************************/
#ifndef SPECKLE_H
#define SPECKLE_H
/*-----------------------------------------------------------------------------
Include Files and Type Defines
-----------------------------------------------------------------------------*/
#include "baseline.h"
#include "ratngs.h"
/*-----------------------------------------------------------------------------
Public Function Prototypes
-----------------------------------------------------------------------------*/
void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices);
BOOL8 LargeSpeckle(TBLOB *Blob);
#endif

View File

@ -28,17 +28,25 @@ namespace tesseract {
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
int TessClassifier::ClassifySample(const TrainingSample& sample,
Pix* page_pix, int debug, int keep_this,
GenericVector<ShapeRating>* results) {
int TessClassifier::UnicharClassifySample(
const TrainingSample& sample, Pix* page_pix, int debug,
UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
int old_matcher_level = classify_->matcher_debug_level;
int old_matcher_flags = classify_->matcher_debug_flags;
int old_classify_level = classify_->classify_debug_level;
if (debug) {
classify_->matcher_debug_level.set_value(debug ? 2 : 0);
classify_->matcher_debug_flags.set_value(debug ? 25 : 0);
classify_->classify_debug_level.set_value(debug ? 3 : 0);
} else {
classify_->classify_debug_level.set_value(debug ? 2 : 0);
// Explicitly set values of various control parameters to generate debug
// output if required, restoring the old values after classifying.
classify_->matcher_debug_level.set_value(2);
classify_->matcher_debug_flags.set_value(25);
classify_->classify_debug_level.set_value(3);
}
classify_->CharNormTrainingSample(pruner_only_, keep_this, sample, results);
if (debug) {
classify_->matcher_debug_level.set_value(old_matcher_level);
classify_->matcher_debug_flags.set_value(old_matcher_flags);
classify_->classify_debug_level.set_value(old_classify_level);
}
classify_->CharNormTrainingSample(pruner_only_, sample, results);
return results->size();
}
@ -46,6 +54,32 @@ int TessClassifier::ClassifySample(const TrainingSample& sample,
const ShapeTable* TessClassifier::GetShapeTable() const {
return classify_->shape_table();
}
// Provides access to the UNICHARSET that this classifier works with.
// Only needs to be overridden if GetShapeTable() can return NULL.
const UNICHARSET& TessClassifier::GetUnicharset() const {
return classify_->unicharset;
}
// Displays classification as the given shape_id. Creates as many windows
// as it feels fit, using index as a guide for placement. Adds any created
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
int TessClassifier::DisplayClassifyAs(
const TrainingSample& sample, Pix* page_pix, int unichar_id, int index,
PointerVector<ScrollView>* windows) {
int shape_id = unichar_id;
if (GetShapeTable() != NULL)
shape_id = BestShapeForUnichar(sample, page_pix, unichar_id, NULL);
if (shape_id < 0) return index;
if (UnusedClassIdIn(classify_->PreTrainedTemplates, shape_id)) {
tprintf("No built-in templates for class/shape %d\n", shape_id);
return index;
}
classify_->ShowBestMatchFor(shape_id, sample.features(),
sample.num_features());
return index;
}
} // namespace tesseract

View File

@ -41,11 +41,23 @@ class TessClassifier : public ShapeClassifier {
// Classifies the given [training] sample, writing to results.
// See ShapeClassifier for a full description.
virtual int ClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, int keep_this,
GenericVector<ShapeRating>* results);
virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
int debug, UNICHAR_ID keep_this,
GenericVector<UnicharRating>* results);
// Provides access to the ShapeTable that this classifier works with.
virtual const ShapeTable* GetShapeTable() const;
// Provides access to the UNICHARSET that this classifier works with.
// Only needs to be overridden if GetShapeTable() can return NULL.
virtual const UNICHARSET& GetUnicharset() const;
// Displays classification as the given shape_id. Creates as many windows
// as it feels fit, using index as a guide for placement. Adds any created
// windows to the windows output and returns a new index that may be used
// by any subsequent classifiers. Caller waits for the user to view and
// then destroys the windows by clearing the vector.
virtual int DisplayClassifyAs(const TrainingSample& sample, Pix* page_pix,
int unichar_id, int index,
PointerVector<ScrollView>* windows);
private:
// Indicates that this classifier is to use just the ClassPruner, or the

View File

@ -59,6 +59,8 @@ bool TrainingSample::Serialize(FILE* fp) const {
if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
return false;
if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
return false;
if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
return false;
if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
@ -90,10 +92,13 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
return false;
if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
return false;
if (swap) {
ReverseN(&class_id_, sizeof(class_id_));
ReverseN(&num_features_, sizeof(num_features_));
ReverseN(&num_micro_features_, sizeof(num_micro_features_));
ReverseN(&outline_length_, sizeof(outline_length_));
}
delete [] features_;
features_ = new INT_FEATURE_STRUCT[num_features_];
@ -113,20 +118,40 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
// Saves the given features into a TrainingSample.
TrainingSample* TrainingSample::CopyFromFeatures(
const INT_FX_RESULT_STRUCT& fx_info, const INT_FEATURE_STRUCT* features,
const INT_FX_RESULT_STRUCT& fx_info,
const TBOX& bounding_box,
const INT_FEATURE_STRUCT* features,
int num_features) {
TrainingSample* sample = new TrainingSample;
sample->num_features_ = num_features;
sample->features_ = new INT_FEATURE_STRUCT[num_features];
sample->outline_length_ = fx_info.Length;
memcpy(sample->features_, features, num_features * sizeof(features[0]));
sample->geo_feature_[GeoBottom] = fx_info.YBottom;
sample->geo_feature_[GeoTop] = fx_info.YTop;
sample->geo_feature_[GeoWidth] = fx_info.Width;
sample->geo_feature_[GeoBottom] = bounding_box.bottom();
sample->geo_feature_[GeoTop] = bounding_box.top();
sample->geo_feature_[GeoWidth] = bounding_box.width();
// Generate the cn_feature_ from the fx_info.
sample->cn_feature_[CharNormY] =
MF_SCALE_FACTOR * (fx_info.Ymean - kBlnBaselineOffset);
sample->cn_feature_[CharNormLength] =
MF_SCALE_FACTOR * fx_info.Length / LENGTH_COMPRESSION;
sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
sample->features_are_indexed_ = false;
sample->features_are_mapped_ = false;
return sample;
}
// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
FEATURE_STRUCT* TrainingSample::GetCNFeature() const {
FEATURE feature = NewFeature(&CharNormDesc);
for (int i = 0; i < kNumCNParams; ++i)
feature->Params[i] = cn_feature_[i];
return feature;
}
// Constructs and returns a copy randomized by the method given by
// the randomizer index. If index is out of [0, kSampleRandomSize) then
// an exact copy is returned.

View File

@ -54,7 +54,7 @@ class TrainingSample : public ELIST_LINK {
public:
TrainingSample()
: class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
num_features_(0), num_micro_features_(0),
num_features_(0), num_micro_features_(0), outline_length_(0),
features_(NULL), micro_features_(NULL), weight_(1.0),
max_dist_(0.0), sample_index_(0),
features_are_indexed_(false), features_are_mapped_(false),
@ -65,8 +65,11 @@ class TrainingSample : public ELIST_LINK {
// Saves the given features into a TrainingSample. The features are copied,
// so may be deleted afterwards. Delete the return value after use.
static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info,
const TBOX& bounding_box,
const INT_FEATURE_STRUCT* features,
int num_features);
// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
FEATURE_STRUCT* GetCNFeature() const;
// Constructs and returns a copy "randomized" by the method given by
// the randomizer index. If index is out of [0, kSampleRandomSize) then
// an exact copy is returned.
@ -146,6 +149,9 @@ class TrainingSample : public ELIST_LINK {
const MicroFeature* micro_features() const {
return micro_features_;
}
int outline_length() const {
return outline_length_;
}
float cn_feature(int index) const {
return cn_feature_[index];
}
@ -203,6 +209,10 @@ class TrainingSample : public ELIST_LINK {
int num_features_;
// Number of MicroFeature in micro_features_ array.
int num_micro_features_;
// Total length of outline in the baseline normalized coordinate space.
// See comment in WERD_RES class definition for a discussion of coordinate
// spaces.
int outline_length_;
// Array of features.
INT_FEATURE_STRUCT* features_;
// Array of features.

View File

@ -67,7 +67,7 @@ bool TrainingSampleSet::FontClassInfo::DeSerialize(bool swap, FILE* fp) {
return true;
}
TrainingSampleSet::TrainingSampleSet(const UnicityTable<FontInfo>& font_table)
TrainingSampleSet::TrainingSampleSet(const FontInfoTable& font_table)
: num_raw_samples_(0), unicharset_size_(0),
font_class_array_(NULL), fontinfo_table_(font_table) {
}
@ -115,11 +115,12 @@ bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) {
void TrainingSampleSet::LoadUnicharset(const char* filename) {
if (!unicharset_.load_from_file(filename)) {
tprintf("Failed to load unicharset from file %s\n"
"Building unicharset for boosting from scratch...\n",
"Building unicharset from scratch...\n",
filename);
unicharset_.clear();
// Space character needed to represent NIL_LIST classification.
unicharset_.unichar_insert(" ");
// Add special characters as they were removed by the clear.
UNICHARSET empty;
unicharset_.AppendOtherUnicharset(empty);
}
unicharset_size_ = unicharset_.size();
}
@ -708,14 +709,6 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map,
continue;
GenericVector<int> features2 = samples_[s2]->indexed_features();
double dist = f_table.FeatureDistance(features2);
int height = samples_[s2]->geo_feature(GeoTop) -
samples_[s2]->geo_feature(GeoBottom);
if (dist == 1.0 && height > 64) {
// TODO(rays) rethink this when the polygonal approximation goes.
// Currently it is possible for dots and other small characters
// to be completely different, even within the same class.
f_table.DebugFeatureDistance(features2);
}
if (dist > max_dist) {
max_dist = dist;
if (dist > max_max_dist) {

View File

@ -24,11 +24,11 @@
#include "trainingsample.h"
class UNICHARSET;
template <typename T> class UnicityTable;
namespace tesseract {
struct FontInfo;
class FontInfoTable;
class IntFeatureMap;
class IntFeatureSpace;
class TrainingSample;
@ -42,7 +42,7 @@ class UnicharAndFonts;
// metrics.
class TrainingSampleSet {
public:
explicit TrainingSampleSet(const UnicityTable<FontInfo>& fontinfo_table);
explicit TrainingSampleSet(const FontInfoTable& fontinfo_table);
~TrainingSampleSet();
// Writes to the given file. Returns false in case of error.
@ -67,6 +67,9 @@ class TrainingSampleSet {
int charsetsize() const {
return unicharset_size_;
}
const FontInfoTable& fontinfo_table() const {
return fontinfo_table_;
}
// Loads an initial unicharset, or sets one up if the file cannot be read.
void LoadUnicharset(const char* filename);
@ -281,7 +284,7 @@ class TrainingSampleSet {
// Reference to the fontinfo_table_ in MasterTrainer. Provides names
// for font_ids in the samples. Not serialized!
const UnicityTable<FontInfo>& fontinfo_table_;
const FontInfoTable& fontinfo_table_;
};
} // namespace tesseract.