mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
Made some major classifier and clustering improvements
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@130 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
166c867d84
commit
6b5e0c4046
@ -62,7 +62,7 @@ float compare_tess_blobs(TBLOB *blob1,
|
||||
SetBaseLineMatch();
|
||||
IntegerMatcher (ClassForClassId (ad_templates->Templates, CMP_CLASS),
|
||||
AllProtosOn, AllConfigsOn, fcount, fcount,
|
||||
int_features, 0, 0, &int_result, testedit_match_debug);
|
||||
int_features, 0, &int_result, testedit_match_debug);
|
||||
FreeFeatureSet(float_features);
|
||||
if (int_result.Rating < 0)
|
||||
int_result.Rating = MAX_FLOAT32;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@
|
||||
#include "const.h"
|
||||
#include "cluster.h"
|
||||
#include "emalloc.h"
|
||||
#include "tprintf.h"
|
||||
#include "danerror.h"
|
||||
#include "freelist.h"
|
||||
#include <math.h>
|
||||
@ -281,6 +282,7 @@ PROTOTYPE *MakeDegenerateProto(UINT16 N,
|
||||
INT32 MinSamples);
|
||||
|
||||
PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
|
||||
CLUSTERCONFIG *Config,
|
||||
CLUSTER *Cluster,
|
||||
STATISTICS *Statistics);
|
||||
|
||||
@ -1037,7 +1039,7 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,
|
||||
}
|
||||
|
||||
if (HOTELLING && Config->ProtoStyle == elliptical) {
|
||||
Proto = TestEllipticalProto(Clusterer, Cluster, Statistics);
|
||||
Proto = TestEllipticalProto(Clusterer, Config, Cluster, Statistics);
|
||||
if (Proto != NULL) {
|
||||
FreeStatistics(Statistics);
|
||||
return Proto;
|
||||
@ -1129,6 +1131,7 @@ PROTOTYPE *MakeDegenerateProto( //this was MinSample
|
||||
|
||||
/** TestEllipticalProto ****************************************************
|
||||
Parameters: Clusterer data struct containing samples being clustered
|
||||
Config provides the magic number of samples that make a good cluster
|
||||
Cluster cluster to be made into an elliptical prototype
|
||||
Statistics statistical info about cluster
|
||||
Globals: None
|
||||
@ -1141,24 +1144,60 @@ Operation: This routine tests the specified cluster to see if **
|
||||
Return: Pointer to new elliptical prototype or NULL.
|
||||
****************************************************************************/
|
||||
PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
|
||||
CLUSTERCONFIG *Config,
|
||||
CLUSTER *Cluster,
|
||||
STATISTICS *Statistics) {
|
||||
// Fraction of the number of samples used as a range around 1 within
|
||||
// which a cluster has the magic size that allows a boost to the
|
||||
// FTable by kFTableBoostMargin, thus allowing clusters near the
|
||||
// magic size (equal to the number of sample characters) to be more
|
||||
// likely to stay together.
|
||||
const double kMagicSampleMargin = 0.0625;
|
||||
const double kFTableBoostMargin = 2.0;
|
||||
|
||||
int N = Clusterer->SampleSize;
|
||||
CLUSTER* Left = Cluster->Left;
|
||||
CLUSTER* Right = Cluster->Right;
|
||||
if (Left == NULL || Right == NULL)
|
||||
return NULL;
|
||||
int TotalDims = Left->SampleCount + Right->SampleCount;
|
||||
if (TotalDims < N + 1)
|
||||
if (TotalDims < N + 1 || TotalDims < 2)
|
||||
return NULL;
|
||||
FLOAT32* Inverse = (FLOAT32 *) Emalloc(N * N * sizeof(FLOAT32));
|
||||
FLOAT32* Delta = (FLOAT32*) Emalloc(N * sizeof(FLOAT32));
|
||||
double err = InvertMatrix(Statistics->CoVariance, N, Inverse);
|
||||
if (err > 1) {
|
||||
cprintf("Clustering error: Matrix inverse failed with error %g\n", err);
|
||||
const int kMatrixSize = N * N * sizeof(FLOAT32);
|
||||
FLOAT32* Covariance = reinterpret_cast<FLOAT32 *>(Emalloc(kMatrixSize));
|
||||
FLOAT32* Inverse = reinterpret_cast<FLOAT32 *>(Emalloc(kMatrixSize));
|
||||
FLOAT32* Delta = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
|
||||
// Compute a new covariance matrix that only uses essential features.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
int row_offset = i * N;
|
||||
if (!Clusterer->ParamDesc[i].NonEssential) {
|
||||
for (int j = 0; j < N; ++j) {
|
||||
if (!Clusterer->ParamDesc[j].NonEssential)
|
||||
Covariance[j + row_offset] = Statistics->CoVariance[j + row_offset];
|
||||
else
|
||||
Covariance[j + row_offset] = 0.0f;
|
||||
}
|
||||
} else {
|
||||
for (int j = 0; j < N; ++j) {
|
||||
if (i == j)
|
||||
Covariance[j + row_offset] = 1.0f;
|
||||
else
|
||||
Covariance[j + row_offset] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
double err = InvertMatrix(Covariance, N, Inverse);
|
||||
if (err > 1) {
|
||||
tprintf("Clustering error: Matrix inverse failed with error %g\n", err);
|
||||
}
|
||||
int EssentialN = 0;
|
||||
for (int dim = 0; dim < N; ++dim) {
|
||||
Delta[dim] = Left->Mean[dim] - Right->Mean[dim];
|
||||
if (!Clusterer->ParamDesc[dim].NonEssential) {
|
||||
Delta[dim] = Left->Mean[dim] - Right->Mean[dim];
|
||||
++EssentialN;
|
||||
} else {
|
||||
Delta[dim] = 0.0f;
|
||||
}
|
||||
}
|
||||
// Compute Hotelling's T-squared.
|
||||
double Tsq = 0.0;
|
||||
@ -1169,19 +1208,30 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,
|
||||
}
|
||||
Tsq += Delta[x] * temp;
|
||||
}
|
||||
memfree(Covariance);
|
||||
memfree(Inverse);
|
||||
memfree(Delta);
|
||||
Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;
|
||||
double F = Tsq * (TotalDims - N - 1) / ((TotalDims - N) * 2);
|
||||
int Fx = N;
|
||||
// Changed this function to match the formula in
|
||||
// Statistical Methods in Medical Research p 473
|
||||
// By Peter Armitage, Geoffrey Berry, J. N. S. Matthews.
|
||||
// Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;
|
||||
double F = Tsq * (TotalDims - EssentialN - 1) / ((TotalDims - 2)*EssentialN);
|
||||
int Fx = EssentialN;
|
||||
if (Fx > FTABLE_X)
|
||||
Fx = FTABLE_X;
|
||||
--Fx;
|
||||
int Fy = TotalDims - N - 1;
|
||||
int Fy = TotalDims - EssentialN - 1;
|
||||
if (Fy > FTABLE_Y)
|
||||
Fy = FTABLE_Y;
|
||||
--Fy;
|
||||
if (F < FTable[Fy][Fx]) {
|
||||
double FTarget = FTable[Fy][Fx];
|
||||
if (Config->MagicSamples > 0 &&
|
||||
TotalDims >= Config->MagicSamples * (1.0 - kMagicSampleMargin) &&
|
||||
TotalDims <= Config->MagicSamples * (1.0 + kMagicSampleMargin)) {
|
||||
// Give magic-sized clusters a magic FTable boost.
|
||||
FTarget += kFTableBoostMargin;
|
||||
}
|
||||
if (F < FTarget) {
|
||||
return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);
|
||||
}
|
||||
return NULL;
|
||||
|
@ -55,6 +55,7 @@ typedef struct // parameters to control clustering
|
||||
// more than 1 feature in that cluster
|
||||
FLOAT32 Independence; // desired independence between dimensions
|
||||
FLOAT64 Confidence; // desired confidence in prototypes created
|
||||
int MagicSamples; // Ideal number of samples in a cluster.
|
||||
}
|
||||
|
||||
|
||||
@ -80,8 +81,13 @@ FLOATUNION;
|
||||
typedef struct proto
|
||||
{
|
||||
unsigned Significant:1; // TRUE if prototype is significant
|
||||
unsigned Merged:1; // Merged after clustering so do not output
|
||||
// but kept for display purposes. If it has no
|
||||
// samples then it was actually merged.
|
||||
// Otherwise it matched an already significant
|
||||
// cluster.
|
||||
unsigned Style:2; // spherical, elliptical, or mixed
|
||||
unsigned NumSamples:29; // number of samples in the cluster
|
||||
unsigned NumSamples:28; // number of samples in the cluster
|
||||
CLUSTER *Cluster; // ptr to cluster which made prototype
|
||||
DISTRIBUTION *Distrib; // different distribution for each dimension
|
||||
FLOAT32 *Mean; // prototype mean
|
||||
@ -129,19 +135,22 @@ CLUSTERER *MakeClusterer (INT16 SampleSize, PARAM_DESC ParamDesc[]);
|
||||
|
||||
SAMPLE *MakeSample (CLUSTERER * Clusterer, FLOAT32 Feature[], INT32 CharID);
|
||||
|
||||
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
|
||||
LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
|
||||
|
||||
void FreeClusterer(CLUSTERER *Clusterer);
|
||||
void FreeClusterer(CLUSTERER *Clusterer);
|
||||
|
||||
void FreeProtoList(LIST *ProtoList);
|
||||
void FreeProtoList(LIST *ProtoList);
|
||||
|
||||
void FreePrototype(void *arg); //PROTOTYPE *Prototype);
|
||||
|
||||
CLUSTER *NextSample(LIST *SearchState);
|
||||
CLUSTER *NextSample(LIST *SearchState);
|
||||
|
||||
FLOAT32 Mean(PROTOTYPE *Proto, UINT16 Dimension);
|
||||
FLOAT32 Mean(PROTOTYPE *Proto, UINT16 Dimension);
|
||||
|
||||
FLOAT32 StandardDeviation(PROTOTYPE *Proto, UINT16 Dimension);
|
||||
FLOAT32 StandardDeviation(PROTOTYPE *Proto, UINT16 Dimension);
|
||||
|
||||
INT32 MergeClusters(INT16 N, PARAM_DESC ParamDesc[], INT32 n1, INT32 n2,
|
||||
FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]);
|
||||
|
||||
//--------------Global Data Definitions and Declarations---------------------------
|
||||
// define errors that can be trapped
|
||||
|
@ -41,7 +41,7 @@
|
||||
StartParamDesc (MicroFeatureParams)
|
||||
DefineParam (0, 0, -0.5, 0.5)
|
||||
DefineParam (0, 0, -0.25, 0.75)
|
||||
DefineParam (0, 0, 0.0, 1.0)
|
||||
DefineParam (0, 1, 0.0, 1.0)
|
||||
DefineParam (1, 0, 0.0, 1.0)
|
||||
DefineParam (0, 1, -0.5, 0.5)
|
||||
DefineParam (0, 1, -0.5, 0.5)
|
||||
@ -65,9 +65,9 @@ DefineFeature (PicoFeatDesc, 2, 1, 1, MAX_UINT8, "Pico", "pf", PicoFeatParams)
|
||||
/* define all of the parameters for the NormFeat type*/
|
||||
StartParamDesc (CharNormParams)
|
||||
DefineParam (0, 0, -0.25, 0.75)
|
||||
DefineParam (0, 0, 0.0, 1.0)
|
||||
DefineParam (0, 0, 0.0, 1.0)
|
||||
DefineParam (0, 0, 0.0, 1.0)
|
||||
DefineParam (0, 1, 0.0, 1.0)
|
||||
DefineParam (0, 1, 0.0, 1.0)
|
||||
DefineParam (0, 1, 0.0, 1.0)
|
||||
EndParamDesc
|
||||
/* now define the feature type itself (see features.h for info about each
|
||||
parameter).*/
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -30,6 +30,7 @@ typedef struct
|
||||
FLOAT32 Rating;
|
||||
UINT8 Config;
|
||||
UINT8 Config2;
|
||||
UINT16 FeatureMisses;
|
||||
}
|
||||
|
||||
|
||||
@ -38,8 +39,7 @@ INT_RESULT_STRUCT, *INT_RESULT;
|
||||
typedef struct
|
||||
{
|
||||
FLOAT32 Rating;
|
||||
FLOAT32 Rating2;
|
||||
UINT32 config_mask;
|
||||
INT_RESULT_STRUCT IMResult;
|
||||
CLASS_ID Class;
|
||||
}
|
||||
|
||||
@ -68,42 +68,12 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
|
||||
CLASS_PRUNER_RESULTS Results,
|
||||
int Debug);
|
||||
|
||||
int feature_pruner(INT_TEMPLATES IntTemplates,
|
||||
INT16 NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
INT32 NumClasses,
|
||||
CLASS_PRUNER_RESULTS Results);
|
||||
|
||||
int prune_configs(INT_TEMPLATES IntTemplates,
|
||||
INT32 min_misses,
|
||||
INT16 NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
CLASS_NORMALIZATION_ARRAY NormalizationFactors,
|
||||
INT32 class_count,
|
||||
UINT16 BlobLength,
|
||||
CLASS_PRUNER_RESULTS Results,
|
||||
int Debug);
|
||||
|
||||
void PruningMatcher(INT_CLASS ClassTemplate,
|
||||
UINT16 BlobLength,
|
||||
INT16 NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
INT32 min_misses,
|
||||
UINT8 NormalizationFactor,
|
||||
INT_RESULT Result,
|
||||
int Debug);
|
||||
|
||||
void config_mask_to_proto_mask(INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR config_mask,
|
||||
BIT_VECTOR proto_mask);
|
||||
|
||||
void IntegerMatcher(INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
BIT_VECTOR ConfigMask,
|
||||
UINT16 BlobLength,
|
||||
INT16 NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
INT32 min_misses,
|
||||
UINT8 NormalizationFactor,
|
||||
INT_RESULT Result,
|
||||
int Debug);
|
||||
@ -126,19 +96,19 @@ int FindBadFeatures(INT_CLASS ClassTemplate,
|
||||
FEATURE_ID *FeatureArray,
|
||||
int Debug);
|
||||
|
||||
void InitIntegerMatcher();
|
||||
void InitIntegerMatcher();
|
||||
|
||||
void InitIntegerMatcherVars();
|
||||
void InitIntegerMatcherVars();
|
||||
|
||||
void PrintIntMatcherStats(FILE *f);
|
||||
void PrintIntMatcherStats(FILE *f);
|
||||
|
||||
void SetProtoThresh(FLOAT32 Threshold);
|
||||
void SetProtoThresh(FLOAT32 Threshold);
|
||||
|
||||
void SetFeatureThresh(FLOAT32 Threshold);
|
||||
void SetFeatureThresh(FLOAT32 Threshold);
|
||||
|
||||
void SetBaseLineMatch();
|
||||
void SetBaseLineMatch();
|
||||
|
||||
void SetCharNormMatch();
|
||||
void SetCharNormMatch();
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Private Function Prototypes
|
||||
@ -160,14 +130,7 @@ void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
|
||||
UINT8 *FeatureEvidence,
|
||||
INT32 ConfigCount);
|
||||
|
||||
void PMUpdateTablesForFeature (INT_CLASS ClassTemplate,
|
||||
int FeatureNum,
|
||||
INT_FEATURE Feature,
|
||||
UINT8 FeatureEvidence[MAX_NUM_CONFIGS],
|
||||
int SumOfFeatureEvidence[MAX_NUM_CONFIGS],
|
||||
int Debug);
|
||||
|
||||
void IMUpdateTablesForFeature (INT_CLASS ClassTemplate,
|
||||
int IMUpdateTablesForFeature (INT_CLASS ClassTemplate,
|
||||
BIT_VECTOR ProtoMask,
|
||||
BIT_VECTOR ConfigMask,
|
||||
int FeatureNum,
|
||||
@ -209,10 +172,6 @@ UINT8
|
||||
ProtoEvidence[MAX_NUM_PROTOS]
|
||||
[MAX_PROTO_INDEX], INT16 NumFeatures);
|
||||
|
||||
void PMNormalizeSumOfEvidences (INT_CLASS ClassTemplate,
|
||||
int SumOfFeatureEvidence[MAX_NUM_CONFIGS],
|
||||
INT16 NumFeatures, INT32 used_features);
|
||||
|
||||
void IMNormalizeSumOfEvidences (INT_CLASS ClassTemplate,
|
||||
int SumOfFeatureEvidence[MAX_NUM_CONFIGS],
|
||||
INT16 NumFeatures, INT32 used_features);
|
||||
@ -229,7 +188,7 @@ void IMDebugBestMatch(int BestMatch,
|
||||
UINT8 NormalizationFactor);
|
||||
#endif
|
||||
|
||||
void HeapSort (int n, register INT16 ra[], register UINT8 rb[]);
|
||||
void HeapSort (int n, register int ra[], register int rb[]);
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
|
@ -61,6 +61,26 @@ static jmp_buf QuickExit;
|
||||
|
||||
static void_proc WalkAction;
|
||||
|
||||
// Helper function to find the next essential dimension in a cycle.
|
||||
static int NextLevel(int level) {
|
||||
do {
|
||||
++level;
|
||||
if (level >= N)
|
||||
level = 0;
|
||||
} while (KeyDesc[level].NonEssential);
|
||||
return level;
|
||||
}
|
||||
|
||||
// Helper function to find the previous essential dimension in a cycle.
|
||||
static int PrevLevel(int level) {
|
||||
do {
|
||||
--level;
|
||||
if (level < 0)
|
||||
level = N - 1;
|
||||
} while (KeyDesc[level].NonEssential);
|
||||
return level;
|
||||
}
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
@ -136,7 +156,7 @@ MakeKDTree (INT16 KeySize, PARAM_DESC KeyDesc[]) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Tree K-D tree in which data is to be stored
|
||||
@ -164,7 +184,7 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
KeyDesc = &(Tree->KeyDesc[0]);
|
||||
PtrToNode = &(Tree->Root.Left);
|
||||
Node = *PtrToNode;
|
||||
Level = 0;
|
||||
Level = NextLevel(-1);
|
||||
while (Node != NULL) {
|
||||
if (Key[Level] < Node->BranchPoint) {
|
||||
PtrToNode = &(Node->Left);
|
||||
@ -176,9 +196,7 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) {
|
||||
if (Key[Level] < Node->RightBranch)
|
||||
Node->RightBranch = Key[Level];
|
||||
}
|
||||
Level++;
|
||||
if (Level >= N)
|
||||
Level = 0;
|
||||
Level = NextLevel(Level);
|
||||
Node = *PtrToNode;
|
||||
}
|
||||
|
||||
@ -239,7 +257,7 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
|
||||
KeyDesc = &(Tree->KeyDesc[0]);
|
||||
Father = &(Tree->Root);
|
||||
Current = Father->Left;
|
||||
Level = 0;
|
||||
Level = NextLevel(-1);
|
||||
|
||||
/* search tree for node to be deleted */
|
||||
while ((Current != NULL) && (!NodeFound (Current, Key, Data))) {
|
||||
@ -249,9 +267,7 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
|
||||
else
|
||||
Current = Current->Right;
|
||||
|
||||
Level++;
|
||||
if (Level >= N)
|
||||
Level = 0;
|
||||
Level = NextLevel(Level);
|
||||
}
|
||||
|
||||
if (Current != NULL) { /* if node to be deleted was found */
|
||||
@ -271,15 +287,11 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
|
||||
else
|
||||
break;
|
||||
|
||||
Level++;
|
||||
if (Level >= N)
|
||||
Level = 0;
|
||||
Level = NextLevel(Level);
|
||||
}
|
||||
|
||||
/* compute level of replacement node's father */
|
||||
Level--;
|
||||
if (Level < 0)
|
||||
Level = N - 1;
|
||||
Level = PrevLevel(Level);
|
||||
|
||||
/* disconnect replacement node from it's father */
|
||||
if (FatherReplacement->Left == Replacement) {
|
||||
@ -304,7 +316,7 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) {
|
||||
else
|
||||
Father->Right = Replacement;
|
||||
}
|
||||
FreeKDNode(Current);
|
||||
FreeKDNode(Current);
|
||||
}
|
||||
} /* KDDelete */
|
||||
|
||||
@ -381,7 +393,7 @@ void *NBuffer, FLOAT32 DBuffer[]) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void KDWalk(KDTREE *Tree, void_proc Action) {
|
||||
void KDWalk(KDTREE *Tree, void_proc Action) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Tree ptr to K-D tree to be walked
|
||||
@ -401,12 +413,12 @@ void KDWalk(KDTREE *Tree, void_proc Action) {
|
||||
*/
|
||||
WalkAction = Action;
|
||||
if (Tree->Root.Left != NULL)
|
||||
Walk (Tree->Root.Left, 0);
|
||||
Walk (Tree->Root.Left, NextLevel(-1));
|
||||
} /* KDWalk */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeKDTree(KDTREE *Tree) {
|
||||
void FreeKDTree(KDTREE *Tree) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Tree tree data structure to be released
|
||||
@ -424,7 +436,7 @@ void FreeKDTree(KDTREE *Tree) {
|
||||
** 5/26/89, DSJ, Created.
|
||||
*/
|
||||
FreeSubTree (Tree->Root.Left);
|
||||
memfree(Tree);
|
||||
memfree(Tree);
|
||||
} /* FreeKDTree */
|
||||
|
||||
|
||||
@ -496,7 +508,7 @@ MakeKDNode (FLOAT32 Key[], char *Data, int Index) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeKDNode(KDNODE *Node) {
|
||||
void FreeKDNode(KDNODE *Node) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Node ptr to node data structure to be freed
|
||||
@ -516,7 +528,7 @@ void FreeKDNode(KDNODE *Node) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void Search(int Level, KDNODE *SubTree) {
|
||||
void Search(int Level, KDNODE *SubTree) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Level level in tree of sub-tree to be searched
|
||||
@ -561,12 +573,12 @@ void Search(int Level, KDNODE *SubTree) {
|
||||
Distance[NumberOfNeighbors] = d;
|
||||
NumberOfNeighbors++;
|
||||
if (NumberOfNeighbors == MaxNeighbors)
|
||||
FindMaxDistance();
|
||||
FindMaxDistance();
|
||||
}
|
||||
else {
|
||||
Neighbor[Furthest] = SubTree->Data;
|
||||
Distance[Furthest] = d;
|
||||
FindMaxDistance();
|
||||
FindMaxDistance();
|
||||
}
|
||||
}
|
||||
if (QueryPoint[Level] < SubTree->BranchPoint) {
|
||||
@ -575,7 +587,7 @@ void Search(int Level, KDNODE *SubTree) {
|
||||
OldLBoxEdge = LBMax[Level];
|
||||
LBMax[Level] = SubTree->RightBranch;
|
||||
if (SubTree->Left != NULL)
|
||||
Search (Level + 1, SubTree->Left);
|
||||
Search (NextLevel(Level), SubTree->Left);
|
||||
SBMax[Level] = OldSBoxEdge;
|
||||
LBMax[Level] = OldLBoxEdge;
|
||||
OldSBoxEdge = SBMin[Level];
|
||||
@ -583,7 +595,7 @@ void Search(int Level, KDNODE *SubTree) {
|
||||
OldLBoxEdge = LBMin[Level];
|
||||
LBMin[Level] = SubTree->LeftBranch;
|
||||
if ((SubTree->Right != NULL) && QueryIntersectsSearch ())
|
||||
Search (Level + 1, SubTree->Right);
|
||||
Search (NextLevel(Level), SubTree->Right);
|
||||
SBMin[Level] = OldSBoxEdge;
|
||||
LBMin[Level] = OldLBoxEdge;
|
||||
}
|
||||
@ -593,7 +605,7 @@ void Search(int Level, KDNODE *SubTree) {
|
||||
OldLBoxEdge = LBMin[Level];
|
||||
LBMin[Level] = SubTree->LeftBranch;
|
||||
if (SubTree->Right != NULL)
|
||||
Search (Level + 1, SubTree->Right);
|
||||
Search (NextLevel(Level), SubTree->Right);
|
||||
SBMin[Level] = OldSBoxEdge;
|
||||
LBMin[Level] = OldLBoxEdge;
|
||||
OldSBoxEdge = SBMax[Level];
|
||||
@ -601,7 +613,7 @@ void Search(int Level, KDNODE *SubTree) {
|
||||
OldLBoxEdge = LBMax[Level];
|
||||
LBMax[Level] = SubTree->RightBranch;
|
||||
if ((SubTree->Left != NULL) && QueryIntersectsSearch ())
|
||||
Search (Level + 1, SubTree->Left);
|
||||
Search (NextLevel(Level), SubTree->Left);
|
||||
SBMax[Level] = OldSBoxEdge;
|
||||
LBMax[Level] = OldLBoxEdge;
|
||||
}
|
||||
@ -657,7 +669,7 @@ register FLOAT32 p1[], register FLOAT32 p2[]) {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FindMaxDistance() {
|
||||
void FindMaxDistance() {
|
||||
/*
|
||||
** Parameters:
|
||||
** None
|
||||
@ -690,7 +702,7 @@ void FindMaxDistance() {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int QueryIntersectsSearch() {
|
||||
int QueryIntersectsSearch() {
|
||||
/*
|
||||
** Parameters:
|
||||
** None
|
||||
@ -765,7 +777,7 @@ int QueryIntersectsSearch() {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int QueryInSearch() {
|
||||
int QueryInSearch() {
|
||||
/*
|
||||
** Parameters:
|
||||
** None
|
||||
@ -813,7 +825,7 @@ int QueryInSearch() {
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void Walk(KDNODE *SubTree, INT32 Level) {
|
||||
void Walk(KDNODE *SubTree, INT32 Level) {
|
||||
/*
|
||||
** Parameters:
|
||||
** SubTree ptr to root of subtree to be walked
|
||||
@ -842,17 +854,17 @@ void Walk(KDNODE *SubTree, INT32 Level) {
|
||||
else {
|
||||
(*WalkAction) (SubTree->Data, preorder, Level);
|
||||
if (SubTree->Left != NULL)
|
||||
Walk (SubTree->Left, Level + 1);
|
||||
Walk (SubTree->Left, NextLevel(Level));
|
||||
(*WalkAction) (SubTree->Data, postorder, Level);
|
||||
if (SubTree->Right != NULL)
|
||||
Walk (SubTree->Right, Level + 1);
|
||||
Walk (SubTree->Right, NextLevel(Level));
|
||||
(*WalkAction) (SubTree->Data, endorder, Level);
|
||||
}
|
||||
} /* Walk */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeSubTree(KDNODE *SubTree) {
|
||||
void FreeSubTree(KDNODE *SubTree) {
|
||||
/*
|
||||
** Parameters:
|
||||
** SubTree ptr to root node of sub-tree to be freed
|
||||
@ -867,6 +879,6 @@ void FreeSubTree(KDNODE *SubTree) {
|
||||
if (SubTree != NULL) {
|
||||
FreeSubTree (SubTree->Left);
|
||||
FreeSubTree (SubTree->Right);
|
||||
memfree(SubTree);
|
||||
memfree(SubTree);
|
||||
}
|
||||
} /* FreeSubTree */
|
||||
|
@ -49,6 +49,7 @@ int row_number; /* cjn: fixes link problem */
|
||||
typedef struct
|
||||
{
|
||||
char *Label;
|
||||
int SampleCount;
|
||||
LIST List;
|
||||
}
|
||||
LABELEDLISTNODE, *LABELEDLIST;
|
||||
@ -143,7 +144,7 @@ static BOOL8 ShowInsignificantProtos = FALSE;
|
||||
//-M 0.025 -B 0.05 -I 0.8 -C 1e-3
|
||||
static CLUSTERCONFIG Config =
|
||||
{
|
||||
elliptical, 0.025, 0.05, 0.8, 1e-3
|
||||
elliptical, 0.025, 0.05, 0.8, 1e-3, 0
|
||||
};
|
||||
|
||||
static FLOAT32 RoundingAccuracy = 0.0;
|
||||
@ -235,6 +236,7 @@ int main (
|
||||
//printf ("\nClustering %s ...", CharSample->Label);
|
||||
Clusterer = SetUpForClustering(CharSample);
|
||||
float SavedMinSamples = Config.MinSamples;
|
||||
Config.MagicSamples = CharSample->SampleCount;
|
||||
while (Config.MinSamples > 0.001) {
|
||||
ProtoList = ClusterSamples(Clusterer, &Config);
|
||||
if (NumberOfProtos(ProtoList, 1, 0) > 0)
|
||||
@ -451,6 +453,7 @@ void ReadTrainingSamples (
|
||||
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
|
||||
}
|
||||
CharSample->List = push (CharSample->List, FeatureSamples);
|
||||
CharSample->SampleCount++;
|
||||
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
|
||||
if (Type != i)
|
||||
FreeFeatureSet (FeaturesOfType (CharDesc, i));
|
||||
@ -513,6 +516,7 @@ LABELEDLIST NewLabeledList (
|
||||
LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
|
||||
strcpy (LabeledList->Label, Label);
|
||||
LabeledList->List = NIL;
|
||||
LabeledList->SampleCount = 0;
|
||||
return (LabeledList);
|
||||
|
||||
} /* NewLabeledList */
|
||||
|
@ -32,12 +32,14 @@
|
||||
#include "featdefs.h"
|
||||
#include "tessopt.h"
|
||||
#include "ocrfeatures.h"
|
||||
#include "mf.h"
|
||||
#include "general.h"
|
||||
#include "clusttool.h"
|
||||
#include "cluster.h"
|
||||
#include "protos.h"
|
||||
#include "minmax.h"
|
||||
#include "debug.h"
|
||||
#include "tprintf.h"
|
||||
#include "const.h"
|
||||
#include "mergenf.h"
|
||||
#include "name2char.h"
|
||||
@ -50,18 +52,21 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <math.h>
|
||||
|
||||
#define MAXNAMESIZE 80
|
||||
#define MAX_NUM_SAMPLES 10000
|
||||
#define PROGRAM_FEATURE_TYPE "mf"
|
||||
#define MINSD (1.0f / 128.0f)
|
||||
#define MINSD_ANGLE (1.0f / 64.0f)
|
||||
|
||||
int row_number; /* cjn: fixes link problem */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *Label;
|
||||
int SampleCount;
|
||||
LIST List;
|
||||
}
|
||||
LABELEDLISTNODE, *LABELEDLIST;
|
||||
@ -151,6 +156,9 @@ PARAMDESC *ConvertToPARAMDESC(
|
||||
PARAM_DESC* Param_Desc,
|
||||
int N);
|
||||
*/
|
||||
void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
||||
CLUSTERER *Clusterer, CLUSTERCONFIG *Config);
|
||||
|
||||
LIST RemoveInsignificantProtos(
|
||||
LIST ProtoList,
|
||||
BOOL8 KeepSigProtos,
|
||||
@ -184,21 +192,51 @@ static BOOL8 ShowInsignificantProtos = FALSE;
|
||||
// global variable to hold configuration parameters to control clustering
|
||||
// -M 0.40 -B 0.05 -I 1.0 -C 1e-6.
|
||||
static CLUSTERCONFIG Config =
|
||||
{ elliptical, 0.40, 0.05, 1.0, 1e-6 };
|
||||
{ elliptical, 0.625, 0.05, 1.0, 1e-6, 0 };
|
||||
|
||||
static FLOAT32 RoundingAccuracy = 0.0;
|
||||
static FLOAT32 RoundingAccuracy = 0.0f;
|
||||
|
||||
// The unicharset used during mftraining
|
||||
static UNICHARSET unicharset_mftraining;
|
||||
|
||||
const char* test_ch = "";
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
Public Code
|
||||
-----------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int main (
|
||||
int argc,
|
||||
char **argv)
|
||||
void DisplayProtoList(const char* ch, LIST protolist) {
|
||||
void* window = c_create_window("Char samples", 50, 200,
|
||||
520, 520, -130.0, 130.0, -130.0, 130.0);
|
||||
LIST proto = protolist;
|
||||
iterate(proto) {
|
||||
PROTOTYPE* prototype = reinterpret_cast<PROTOTYPE *>(first_node(proto));
|
||||
if (prototype->Significant)
|
||||
c_line_color_index(window, Green);
|
||||
else if (prototype->NumSamples == 0)
|
||||
c_line_color_index(window, Blue);
|
||||
else if (prototype->Merged)
|
||||
c_line_color_index(window, Magenta);
|
||||
else
|
||||
c_line_color_index(window, Red);
|
||||
float x = CenterX(prototype->Mean);
|
||||
float y = CenterY(prototype->Mean);
|
||||
double angle = OrientationOf(prototype->Mean) * 2 * M_PI;
|
||||
float dx = static_cast<float>(LengthOf(prototype->Mean) * cos(angle) / 2);
|
||||
float dy = static_cast<float>(LengthOf(prototype->Mean) * sin(angle) / 2);
|
||||
c_move(window, (x - dx) * 256, (y - dy) * 256);
|
||||
c_draw(window, (x + dx) * 256, (y + dy) * 256);
|
||||
if (prototype->Significant)
|
||||
tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n",
|
||||
x, y, dx, dy, prototype->NumSamples);
|
||||
else if (prototype->NumSamples > 0 && !prototype->Merged)
|
||||
tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n",
|
||||
x, y, dx, dy, prototype->NumSamples);
|
||||
}
|
||||
c_make_current(window);
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int main (int argc, char **argv) {
|
||||
/*
|
||||
** Parameters:
|
||||
** argc number of command line arguments
|
||||
@ -231,123 +269,119 @@ int main (
|
||||
** History: Fri Aug 18 08:56:17 1989, DSJ, Created.
|
||||
** Mon May 18 1998, Christy Russson, Revistion started.
|
||||
*/
|
||||
char *PageName;
|
||||
FILE *TrainingPage;
|
||||
FILE *OutFile;
|
||||
LIST CharList;
|
||||
CLUSTERER *Clusterer = NULL;
|
||||
LIST ProtoList = NIL;
|
||||
LABELEDLIST CharSample;
|
||||
PROTOTYPE *Prototype;
|
||||
LIST ClassList = NIL;
|
||||
int Cid, Pid;
|
||||
PROTO Proto;
|
||||
PROTO_STRUCT DummyProto;
|
||||
BIT_VECTOR Config2;
|
||||
MERGE_CLASS MergeClass;
|
||||
INT_TEMPLATES IntTemplates;
|
||||
LIST pCharList, pProtoList;
|
||||
char Filename[MAXNAMESIZE];
|
||||
|
||||
{
|
||||
char *PageName;
|
||||
FILE *TrainingPage;
|
||||
FILE *OutFile;
|
||||
LIST CharList;
|
||||
CLUSTERER *Clusterer = NULL;
|
||||
LIST ProtoList = NIL;
|
||||
LABELEDLIST CharSample;
|
||||
PROTOTYPE *Prototype;
|
||||
LIST ClassList = NIL;
|
||||
int Cid, Pid;
|
||||
PROTO Proto;
|
||||
PROTO_STRUCT DummyProto;
|
||||
BIT_VECTOR Config2;
|
||||
MERGE_CLASS MergeClass;
|
||||
INT_TEMPLATES IntTemplates;
|
||||
LIST pCharList, pProtoList;
|
||||
char Filename[MAXNAMESIZE];
|
||||
// Clean the unichar set
|
||||
unicharset_mftraining.clear();
|
||||
// Space character needed to represent NIL classification
|
||||
unicharset_mftraining.unichar_insert(" ");
|
||||
|
||||
// Clean the unichar set
|
||||
unicharset_mftraining.clear();
|
||||
// Space character needed to represent NIL classification
|
||||
unicharset_mftraining.unichar_insert(" ");
|
||||
ParseArguments (argc, argv);
|
||||
InitFastTrainerVars ();
|
||||
InitSubfeatureVars ();
|
||||
while ((PageName = GetNextFilename()) != NULL) {
|
||||
printf ("Reading %s ...\n", PageName);
|
||||
TrainingPage = Efopen (PageName, "r");
|
||||
CharList = ReadTrainingSamples (TrainingPage);
|
||||
fclose (TrainingPage);
|
||||
//WriteTrainingSamples (Directory, CharList);
|
||||
pCharList = CharList;
|
||||
iterate(pCharList) {
|
||||
//Cluster
|
||||
CharSample = (LABELEDLIST) first_node (pCharList);
|
||||
// printf ("\nClustering %s ...", CharSample->Label);
|
||||
Clusterer = SetUpForClustering(CharSample);
|
||||
Config.MagicSamples = CharSample->SampleCount;
|
||||
ProtoList = ClusterSamples(Clusterer, &Config);
|
||||
CleanUpUnusedData(ProtoList);
|
||||
|
||||
ParseArguments (argc, argv);
|
||||
InitFastTrainerVars ();
|
||||
InitSubfeatureVars ();
|
||||
while ((PageName = GetNextFilename()) != NULL)
|
||||
{
|
||||
printf ("Reading %s ...\n", PageName);
|
||||
TrainingPage = Efopen (PageName, "r");
|
||||
CharList = ReadTrainingSamples (TrainingPage);
|
||||
fclose (TrainingPage);
|
||||
//WriteTrainingSamples (Directory, CharList);
|
||||
pCharList = CharList;
|
||||
iterate(pCharList)
|
||||
{
|
||||
//Cluster
|
||||
CharSample = (LABELEDLIST) first_node (pCharList);
|
||||
// printf ("\nClustering %s ...", CharSample->Label);
|
||||
Clusterer = SetUpForClustering(CharSample);
|
||||
ProtoList = ClusterSamples(Clusterer, &Config);
|
||||
//WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample);
|
||||
CleanUpUnusedData(ProtoList);
|
||||
//Merge
|
||||
MergeInsignificantProtos(ProtoList, CharSample->Label,
|
||||
Clusterer, &Config);
|
||||
if (strcmp(test_ch, CharSample->Label) == 0)
|
||||
DisplayProtoList(test_ch, ProtoList);
|
||||
ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
|
||||
ShowInsignificantProtos,
|
||||
Clusterer->SampleSize);
|
||||
FreeClusterer(Clusterer);
|
||||
MergeClass = FindClass (ClassList, CharSample->Label);
|
||||
if (MergeClass == NULL) {
|
||||
MergeClass = NewLabeledClass (CharSample->Label);
|
||||
ClassList = push (ClassList, MergeClass);
|
||||
}
|
||||
Cid = AddConfigToClass(MergeClass->Class);
|
||||
pProtoList = ProtoList;
|
||||
iterate (pProtoList) {
|
||||
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
||||
|
||||
//Merge
|
||||
ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,
|
||||
ShowInsignificantProtos, Clusterer->SampleSize);
|
||||
FreeClusterer(Clusterer);
|
||||
MergeClass = FindClass (ClassList, CharSample->Label);
|
||||
if (MergeClass == NULL)
|
||||
{
|
||||
MergeClass = NewLabeledClass (CharSample->Label);
|
||||
ClassList = push (ClassList, MergeClass);
|
||||
}
|
||||
Cid = AddConfigToClass(MergeClass->Class);
|
||||
pProtoList = ProtoList;
|
||||
iterate (pProtoList)
|
||||
{
|
||||
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
||||
|
||||
// see if proto can be approximated by existing proto
|
||||
Pid = FindClosestExistingProto (MergeClass->Class, MergeClass->NumMerged, Prototype);
|
||||
if (Pid == NO_PROTO)
|
||||
{
|
||||
Pid = AddProtoToClass (MergeClass->Class);
|
||||
Proto = ProtoIn (MergeClass->Class, Pid);
|
||||
MakeNewFromOld (Proto, Prototype);
|
||||
MergeClass->NumMerged[Pid] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
MakeNewFromOld (&DummyProto, Prototype);
|
||||
ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
|
||||
(FLOAT32) MergeClass->NumMerged[Pid], 1.0,
|
||||
ProtoIn (MergeClass->Class, Pid));
|
||||
MergeClass->NumMerged[Pid] ++;
|
||||
}
|
||||
Config2 = ConfigIn (MergeClass->Class, Cid);
|
||||
AddProtoToConfig (Pid, Config2);
|
||||
}
|
||||
FreeProtoList (&ProtoList);
|
||||
}
|
||||
FreeTrainingSamples (CharList);
|
||||
}
|
||||
//WriteMergedTrainingSamples(Directory,ClassList);
|
||||
WriteMicrofeat(Directory, ClassList);
|
||||
InitIntProtoVars ();
|
||||
InitPrototypes ();
|
||||
SetUpForFloat2Int(ClassList);
|
||||
IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
|
||||
strcpy (Filename, "");
|
||||
if (Directory != NULL)
|
||||
{
|
||||
strcat (Filename, Directory);
|
||||
strcat (Filename, "/");
|
||||
}
|
||||
strcat (Filename, "inttemp");
|
||||
// see if proto can be approximated by existing proto
|
||||
Pid = FindClosestExistingProto(MergeClass->Class,
|
||||
MergeClass->NumMerged, Prototype);
|
||||
if (Pid == NO_PROTO) {
|
||||
Pid = AddProtoToClass (MergeClass->Class);
|
||||
Proto = ProtoIn (MergeClass->Class, Pid);
|
||||
MakeNewFromOld (Proto, Prototype);
|
||||
MergeClass->NumMerged[Pid] = 1;
|
||||
}
|
||||
else {
|
||||
MakeNewFromOld (&DummyProto, Prototype);
|
||||
ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,
|
||||
(FLOAT32) MergeClass->NumMerged[Pid], 1.0,
|
||||
ProtoIn (MergeClass->Class, Pid));
|
||||
MergeClass->NumMerged[Pid] ++;
|
||||
}
|
||||
Config2 = ConfigIn (MergeClass->Class, Cid);
|
||||
AddProtoToConfig (Pid, Config2);
|
||||
}
|
||||
FreeProtoList (&ProtoList);
|
||||
}
|
||||
FreeTrainingSamples (CharList);
|
||||
}
|
||||
//WriteMergedTrainingSamples(Directory,ClassList);
|
||||
WriteMicrofeat(Directory, ClassList);
|
||||
InitIntProtoVars ();
|
||||
InitPrototypes ();
|
||||
SetUpForFloat2Int(ClassList);
|
||||
IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
|
||||
strcpy (Filename, "");
|
||||
if (Directory != NULL) {
|
||||
strcat (Filename, Directory);
|
||||
strcat (Filename, "/");
|
||||
}
|
||||
strcat (Filename, "inttemp");
|
||||
#ifdef __UNIX__
|
||||
OutFile = Efopen (Filename, "w");
|
||||
OutFile = Efopen (Filename, "w");
|
||||
#else
|
||||
OutFile = Efopen (Filename, "wb");
|
||||
OutFile = Efopen (Filename, "wb");
|
||||
#endif
|
||||
WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
|
||||
fclose (OutFile);
|
||||
strcpy (Filename, "");
|
||||
if (Directory != NULL)
|
||||
{
|
||||
strcat (Filename, Directory);
|
||||
strcat (Filename, "/");
|
||||
}
|
||||
strcat (Filename, "pffmtable");
|
||||
// Now create pffmtable.
|
||||
WritePFFMTable(IntTemplates, Filename);
|
||||
printf ("Done!\n"); /**/
|
||||
FreeLabeledClassList (ClassList);
|
||||
WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
|
||||
fclose (OutFile);
|
||||
strcpy (Filename, "");
|
||||
if (Directory != NULL) {
|
||||
strcat (Filename, Directory);
|
||||
strcat (Filename, "/");
|
||||
}
|
||||
strcat (Filename, "pffmtable");
|
||||
// Now create pffmtable.
|
||||
WritePFFMTable(IntTemplates, Filename);
|
||||
printf ("Done!\n"); /**/
|
||||
FreeLabeledClassList (ClassList);
|
||||
return 0;
|
||||
} /* main */
|
||||
|
||||
@ -438,8 +472,8 @@ char **argv)
|
||||
case 'R':
|
||||
ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
|
||||
if ( ParametersRead != 1 ) Error = TRUE;
|
||||
else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
|
||||
else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
|
||||
else if ( RoundingAccuracy > 0.01f ) RoundingAccuracy = 0.01f;
|
||||
else if ( RoundingAccuracy < 0.0f ) RoundingAccuracy = 0.0f;
|
||||
break;
|
||||
case 'S':
|
||||
switch ( tessoptarg[0] )
|
||||
@ -547,9 +581,12 @@ LIST ReadTrainingSamples (
|
||||
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
|
||||
FEATURE f = FeatureSamples->Features[feature];
|
||||
for (int dim =0; dim < f->Type->NumParams; ++dim)
|
||||
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
|
||||
f->Params[dim] += dim == MFDirection ?
|
||||
UniformRandomNumber(-MINSD_ANGLE, MINSD_ANGLE) :
|
||||
UniformRandomNumber(-MINSD, MINSD);
|
||||
}
|
||||
CharSample->List = push (CharSample->List, FeatureSamples);
|
||||
CharSample->SampleCount++;
|
||||
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
|
||||
if (Type != i)
|
||||
FreeFeatureSet (FeaturesOfType (CharDesc, i));
|
||||
@ -631,6 +668,7 @@ LABELEDLIST NewLabeledList (
|
||||
LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
|
||||
strcpy (LabeledList->Label, Label);
|
||||
LabeledList->List = NIL;
|
||||
LabeledList->SampleCount = 0;
|
||||
return (LabeledList);
|
||||
|
||||
} /* NewLabeledList */
|
||||
@ -1030,7 +1068,7 @@ CLUSTERER *SetUpForClustering(
|
||||
if (Sample == NULL)
|
||||
Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
|
||||
for (j=0; j < N; j++)
|
||||
if (RoundingAccuracy != 0.0)
|
||||
if (RoundingAccuracy != 0.0f)
|
||||
Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);
|
||||
else
|
||||
Sample[j] = FeatureSet->Features[i]->Params[j];
|
||||
@ -1043,6 +1081,71 @@ CLUSTERER *SetUpForClustering(
|
||||
|
||||
} /* SetUpForClustering */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
void MergeInsignificantProtos(LIST ProtoList, const char* label,
|
||||
CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {
|
||||
PROTOTYPE *Prototype;
|
||||
bool debug = strcmp(test_ch, label) == 0;
|
||||
|
||||
LIST pProtoList = ProtoList;
|
||||
iterate(pProtoList) {
|
||||
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
||||
if (Prototype->Significant || Prototype->Merged)
|
||||
continue;
|
||||
FLOAT32 best_dist = 0.125;
|
||||
PROTOTYPE* best_match = NULL;
|
||||
// Find the nearest alive prototype.
|
||||
LIST list_it = ProtoList;
|
||||
iterate(list_it) {
|
||||
PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
|
||||
if (test_p != Prototype && !test_p->Merged) {
|
||||
FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
|
||||
Clusterer->ParamDesc,
|
||||
Prototype->Mean, test_p->Mean);
|
||||
if (dist < best_dist) {
|
||||
best_match = test_p;
|
||||
best_dist = dist;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (best_match != NULL && !best_match->Significant) {
|
||||
if (debug)
|
||||
tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
|
||||
best_match->NumSamples, Prototype->NumSamples,
|
||||
best_match->Mean[0], best_match->Mean[1],
|
||||
Prototype->Mean[0], Prototype->Mean[1]);
|
||||
best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
|
||||
Clusterer->ParamDesc,
|
||||
best_match->NumSamples,
|
||||
Prototype->NumSamples,
|
||||
best_match->Mean,
|
||||
best_match->Mean, Prototype->Mean);
|
||||
Prototype->NumSamples = 0;
|
||||
Prototype->Merged = 1;
|
||||
} else if (best_match != NULL) {
|
||||
if (debug)
|
||||
tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
|
||||
Prototype->Mean[0], Prototype->Mean[1],
|
||||
best_match->Mean[0], best_match->Mean[1]);
|
||||
Prototype->Merged = 1;
|
||||
}
|
||||
}
|
||||
// Mark significant those that now have enough samples.
|
||||
int min_samples = (INT32) (Config->MinSamples * Clusterer->NumChar);
|
||||
pProtoList = ProtoList;
|
||||
iterate(pProtoList) {
|
||||
Prototype = (PROTOTYPE *) first_node (pProtoList);
|
||||
// Process insignificant protos that do not match a green one
|
||||
if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
|
||||
!Prototype->Merged) {
|
||||
if (debug)
|
||||
tprintf("Red proto at %g,%g becoming green\n",
|
||||
Prototype->Mean[0], Prototype->Mean[1]);
|
||||
Prototype->Significant = true;
|
||||
}
|
||||
}
|
||||
} /* MergeInsignificantProtos */
|
||||
|
||||
/*------------------------------------------------------------------------*/
|
||||
LIST RemoveInsignificantProtos(
|
||||
LIST ProtoList,
|
||||
|
Loading…
Reference in New Issue
Block a user