Improved consistency of results from floating point calculations

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@79 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith 2007-07-18 00:55:02 +00:00
parent d33938c084
commit 2f4a43b419
6 changed files with 273 additions and 217 deletions

View File

@ -60,6 +60,7 @@
#define UNLIKELY_NUM_FEAT 200
#define NO_DEBUG 0
#define MAX_ADAPTABLE_WERD_SIZE 40
#define ADAPTABLE_WERD (GOOD_NUMBER + 0.05)
#define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
@ -68,7 +69,7 @@
typedef struct
{
FLOAT32 BlobLength;
INT32 BlobLength;
int NumMatches;
CLASS_ID Classes[MAX_NUM_CLASSES];
FLOAT32 Ratings[MAX_CLASS_ID + 1];
@ -123,13 +124,13 @@ void AddNewResult(ADAPT_RESULTS *Results,
void AmbigClassifier(TBLOB *Blob,
LINE_STATS *LineStats,
INT_TEMPLATES Templates,
char *Ambiguities,
UNICHAR_ID *Ambiguities,
ADAPT_RESULTS *Results);
char *BaselineClassifier(TBLOB *Blob,
LINE_STATS *LineStats,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results);
UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
LINE_STATS *LineStats,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results);
void make_config_pruner(INT_TEMPLATES templates, CONFIG_PRUNER *config_pruner);
@ -158,19 +159,19 @@ void DoAdaptiveMatch(TBLOB *Blob,
void GetAdaptThresholds (TWERD * Word,
LINE_STATS * LineStats,
const char *BestChoice,
const char *BestRawChoice, FLOAT32 Thresholds[]);
const WERD_CHOICE& BestChoice,
const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]);
char *GetAmbiguities(TBLOB *Blob,
LINE_STATS *LineStats,
CLASS_ID CorrectClass);
UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
LINE_STATS *LineStats,
CLASS_ID CorrectClass);
int GetBaselineFeatures(TBLOB *Blob,
LINE_STATS *LineStats,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength);
INT32 *BlobLength);
FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);
@ -179,21 +180,21 @@ int GetCharNormFeatures(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength);
INT32 *BlobLength);
int GetIntBaselineFeatures(TBLOB *Blob,
LINE_STATS *LineStats,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength);
INT32 *BlobLength);
int GetIntCharNormFeatures(TBLOB *Blob,
LINE_STATS *LineStats,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength);
INT32 *BlobLength);
void InitMatcherRatings(register FLOAT32 *Rating);
@ -526,7 +527,7 @@ make_float_var (BadMatchPad, 0.15, MakeBadMatchPad,
make_float_var (RatingMargin, 0.1, MakeRatingMargin,
18, 10, SetRatingMargin, "New template margin (0-1): ");
make_float_var (NoiseBlobLength, 0.6, MakeNoiseBlobLength,
make_float_var (NoiseBlobLength, 12.0, MakeNoiseBlobLength,
18, 11, SetNoiseBlobLength, "Avg. noise blob length: ");
make_int_var (MinNumPermClasses, 1, MakeMinNumPermClasses,
@ -550,7 +551,7 @@ make_toggle_var (EnableNewAdaptRules, 1, MakeEnableNewAdaptRules,
"Enable new adaptation rules");
/* PREV DEFAULT 0 */
make_float_var (RatingScale, 30.0, MakeRatingScale,
make_float_var (RatingScale, 1.5, MakeRatingScale,
18, 17, SetRatingScale, "Rating scale: ");
make_float_var (CertaintyScale, 20.0, MakeCertaintyScale,
@ -602,7 +603,7 @@ LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
AdaptedTemplates = NewAdaptedTemplates ();
EnterClassifyMode;
Results.BlobLength = MAX_FLOAT32;
Results.BlobLength = MAX_INT32;
Results.NumMatches = 0;
Results.BestRating = WORST_POSSIBLE_RATING;
Results.BestClass = NO_CLASS;
@ -635,9 +636,10 @@ LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
NumClassesOutput += count (Choices);
if (Choices == NIL) {
char empty_lengths[] = {0};
if (!bln_numericmode)
tprintf ("Nil classification!\n"); // Should never normally happen.
return (append_choice (NIL, "", 50.0f, -20.0f, -1));
return (append_choice (NIL, "", empty_lengths, 50.0f, -20.0f, -1));
}
return (Choices);
@ -648,8 +650,8 @@ LIST AdaptiveClassifier(TBLOB *Blob, TBLOB *DotBlob, TEXTROW *Row) {
/*---------------------------------------------------------------------------*/
void AdaptToWord(TWERD *Word,
TEXTROW *Row,
const char *BestChoice,
const char *BestRawChoice,
const WERD_CHOICE& BestChoice,
const WERD_CHOICE& BestRawChoice,
const char *rejmap) {
/*
** Parameters:
@ -680,8 +682,10 @@ void AdaptToWord(TWERD *Word,
FLOAT32 *Threshold;
const char *map = rejmap;
char map_char = '1';
const char* BestChoice_string = BestChoice.string().string();
const char* BestChoice_lengths = BestChoice.lengths().string();
if (strlen(BestChoice) > MAX_ADAPTABLE_WERD_SIZE)
if (strlen(BestChoice_lengths) > MAX_ADAPTABLE_WERD_SIZE)
return;
if (EnableLearning) {
@ -689,7 +693,7 @@ void AdaptToWord(TWERD *Word,
#ifndef SECURE_NAMES
if (LearningDebugLevel >= 1)
cprintf ("\n\nAdapting to word = %s\n", BestChoice);
cprintf ("\n\nAdapting to word = %s\n", BestChoice.string().string());
#endif
GetLineStatsFromRow(Row, &LineStats);
@ -699,8 +703,9 @@ void AdaptToWord(TWERD *Word,
BestRawChoice,
Thresholds);
for (Blob = Word->blobs, Threshold = Thresholds;
Blob != NULL; Blob = Blob->next, BestChoice++, Threshold++) {
for (Blob = Word->blobs, Threshold = Thresholds; Blob != NULL;
Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
Threshold++) {
InitIntFX();
if (rejmap != NULL)
@ -710,7 +715,8 @@ void AdaptToWord(TWERD *Word,
if (map_char == '1') {
if (isalnum (*BestChoice)) {
// if (unicharset.get_isalpha (BestChoice_string, *BestChoice_lengths) ||
// unicharset.get_isdigit (BestChoice_string, *BestChoice_lengths)) {
/* SPECIAL RULE: don't adapt to an 'i' which is the first char
in a word because they are too ambiguous with 'I'.
The new adaptation rules should account for this
@ -719,30 +725,46 @@ void AdaptToWord(TWERD *Word,
Also, don't adapt to i's that have only 1 blob in them
because this creates too much ambiguity for broken
characters. */
if ((*BestChoice == 'i'
|| il1_adaption_test && *BestChoice == 'I'
&& islower (BestChoice[1])) && (Blob == Word->blobs
||
ispunct (*
(BestChoice -
1))
|| !il1_adaption_test
&&
NumOutlinesInBlob
(Blob) != 2)) {
if (*BestChoice_lengths == 1 &&
(*BestChoice_string == 'i'
|| il1_adaption_test && *BestChoice_string == 'I' &&
(Blob->next == NULL ||
unicharset.get_islower (BestChoice_string + *BestChoice_lengths,
*(BestChoice_lengths + 1))))
&& (Blob == Word->blobs
|| (!(unicharset.get_isalpha (BestChoice_string -
*(BestChoice_lengths - 1),
*(BestChoice_lengths - 1)) ||
unicharset.get_isdigit (BestChoice_string -
*(BestChoice_lengths - 1),
*(BestChoice_lengths - 1))))
|| !il1_adaption_test && NumOutlinesInBlob(Blob) != 2)) {
if (LearningDebugLevel >= 1)
cprintf ("Rejecting char = %c\n", *BestChoice);
cprintf ("Rejecting char = %s\n", unicharset.id_to_unichar(
unicharset.unichar_to_id(BestChoice_string,
*BestChoice_lengths)));
}
else {
#ifndef SECURE_NAMES
if (LearningDebugLevel >= 1)
cprintf ("Adapting to char = %c, thr= %g\n", *BestChoice, *Threshold);
cprintf ("Adapting to char = %s, thr= %g\n",
unicharset.id_to_unichar(
unicharset.unichar_to_id(BestChoice_string,
*BestChoice_lengths)),
*Threshold);
#endif
AdaptToChar(Blob, &LineStats, *BestChoice, *Threshold);
AdaptToChar(Blob, &LineStats,
unicharset.unichar_to_id(BestChoice_string,
*BestChoice_lengths),
*Threshold);
}
}
else
AdaptToPunc(Blob, &LineStats, *BestChoice, *Threshold);
// }
// else
// AdaptToPunc(Blob, &LineStats,
// unicharset.unichar_to_id(BestChoice_string,
// *BestChoice_lengths),
// *Threshold);
}
}
if (LearningDebugLevel >= 1)
@ -1149,10 +1171,9 @@ void MakeNewAdaptedClass(TBLOB *Blob,
ConvertConfig (AllProtosOn, 0, IClass);
if (LearningDebugLevel >= 1) {
cprintf ("Added new class '%c' with index %d and %d protos.\n",
ClassId, ClassIndex, NumFeatures);
cprintf ("Added new class '%s' with index %d and %d protos.\n",
unicharset.id_to_unichar(ClassId), ClassIndex, NumFeatures);
}
} /* MakeNewAdaptedClass */
@ -1207,7 +1228,9 @@ int GetAdaptiveFeatures(TBLOB *Blob,
/*---------------------------------------------------------------------------*/
int AdaptableWord(TWERD *Word,
const char *BestChoice,
const char *BestRawChoice) {
const char *BestChoice_lengths,
const char *BestRawChoice,
const char *BestRawChoice_lengths) {
/*
** Parameters:
** Word
@ -1228,7 +1251,10 @@ int AdaptableWord(TWERD *Word,
return ( /* rules that apply in general - simplest to compute first */
/* EnableLearning && */
/* new rules */
BestChoice != NULL && BestRawChoice != NULL && Word != NULL && (BestChoiceLength = strlen (BestChoice)) > 0 && BestChoiceLength == NumBlobsIn (Word) && BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && (
BestChoice != NULL && BestRawChoice != NULL && Word != NULL &&
(BestChoiceLength = strlen (BestChoice_lengths)) > 0 &&
BestChoiceLength == NumBlobsIn (Word) &&
BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE && (
EnableNewAdaptRules
&&
CurrentBestChoiceAdjustFactor
@ -1240,7 +1266,7 @@ int AdaptableWord(TWERD *Word,
(ADAPTABLE_WERD)
&&
CurrentBestChoiceIs
(BestChoice)
(BestChoice, BestChoice_lengths)
||
/* old rules */
!EnableNewAdaptRules
@ -1248,9 +1274,9 @@ int AdaptableWord(TWERD *Word,
BestChoiceLength
==
strlen
(BestRawChoice)
(BestRawChoice_lengths)
&&
((valid_word (BestChoice) && case_ok (BestChoice)) || (valid_number (BestChoice) && pure_number (BestChoice))) && punctuation_ok (BestChoice) != -1 && punctuation_ok (BestChoice) <= 1));
((valid_word (BestChoice) && case_ok (BestChoice, BestChoice_lengths)) || (valid_number (BestChoice, BestChoice_lengths) && pure_number (BestChoice, BestChoice_lengths))) && punctuation_ok (BestChoice, BestChoice_lengths) != -1 && punctuation_ok (BestChoice, BestChoice_lengths) <= 1));
} /* AdaptableWord */
@ -1399,7 +1425,7 @@ void AdaptToPunc(TBLOB *Blob,
ADAPT_RESULTS Results;
int i;
Results.BlobLength = MAX_FLOAT32;
Results.BlobLength = MAX_INT32;
Results.NumMatches = 0;
Results.BestRating = WORST_POSSIBLE_RATING;
Results.BestClass = NO_CLASS;
@ -1410,10 +1436,11 @@ void AdaptToPunc(TBLOB *Blob,
if (Results.NumMatches != 1) {
if (LearningDebugLevel >= 1) {
cprintf ("Rejecting punc = %c (Alternatives = ", ClassId);
cprintf ("Rejecting punc = %s (Alternatives = ",
unicharset.id_to_unichar(ClassId));
for (i = 0; i < Results.NumMatches; i++)
cprintf ("%c", Results.Classes[i]);
cprintf ("%s", unicharset.id_to_unichar(Results.Classes[i]));
cprintf (")\n");
}
return;
@ -1421,7 +1448,8 @@ void AdaptToPunc(TBLOB *Blob,
#ifndef SECURE_NAMES
if (LearningDebugLevel >= 1)
cprintf ("Adapting to punc = %c\n", ClassId);
cprintf ("Adapting to punc = %s, thr= %g\n",
unicharset.id_to_unichar(ClassId), Threshold);
#endif
AdaptToChar(Blob, LineStats, ClassId, Threshold);
@ -1489,7 +1517,7 @@ void AddNewResult(ADAPT_RESULTS *Results,
void AmbigClassifier(TBLOB *Blob,
LINE_STATS *LineStats,
INT_TEMPLATES Templates,
char *Ambiguities,
UNICHAR_ID *Ambiguities,
ADAPT_RESULTS *Results) {
/*
** Parameters:
@ -1500,7 +1528,7 @@ void AmbigClassifier(TBLOB *Blob,
** Templates
built-in templates to classify against
** Ambiguities
string of class id's to match against
array of class id's to match against
** Results
place to put match results
** Globals:
@ -1516,7 +1544,6 @@ void AmbigClassifier(TBLOB *Blob,
** Exceptions: none
** History: Tue Mar 12 19:40:36 1991, DSJ, Created.
*/
int IntOutlineLength;
int NumFeatures;
INT_FEATURE_ARRAY IntFeatures;
CLASS_NORMALIZATION_ARRAY CharNormArray;
@ -1533,24 +1560,23 @@ void AmbigClassifier(TBLOB *Blob,
if (NumFeatures <= 0)
return;
IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
if (MatcherDebugLevel >= 2)
cprintf ("AM Matches = ");
while (*Ambiguities) {
while (*Ambiguities >= 0) {
ClassId = *Ambiguities;
ClassIndex = IndexForClassId (Templates, ClassId);
SetCharNormMatch();
IntegerMatcher (ClassForClassId (Templates, ClassId),
AllProtosOn, AllConfigsOn,
IntOutlineLength, NumFeatures, IntFeatures, 0,
Results->BlobLength, NumFeatures, IntFeatures, 0,
CharNormArray[ClassIndex], &IntResult, NO_DEBUG);
if (MatcherDebugLevel >= 2)
cprintf ("%c-%-2d %2.0f ", ClassId, IntResult.Config,
IntResult.Rating * 100.0);
cprintf ("%s-%-2d %2.0f ", unicharset.id_to_unichar(ClassId),
IntResult.Config,
IntResult.Rating * 100.0);
AddNewResult (Results, ClassId, IntResult.Rating, IntResult.Config);
@ -1565,10 +1591,10 @@ void AmbigClassifier(TBLOB *Blob,
/*---------------------------------------------------------------------------*/
char *BaselineClassifier(TBLOB *Blob,
LINE_STATS *LineStats,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results) {
UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
LINE_STATS *LineStats,
ADAPT_TEMPLATES Templates,
ADAPT_RESULTS *Results) {
/*
** Parameters:
** Blob
@ -1586,11 +1612,10 @@ char *BaselineClassifier(TBLOB *Blob,
** from the unknown character and matches them against the
** specified set of templates. The classes which match
** are added to Results.
** Return: String of possible ambiguous chars that should be checked.
** Return: Array of possible ambiguous chars that should be checked.
** Exceptions: none
** History: Tue Mar 12 19:38:03 1991, DSJ, Created.
*/
int IntOutlineLength;
int NumFeatures;
int NumClasses;
int i;
@ -1613,8 +1638,6 @@ char *BaselineClassifier(TBLOB *Blob,
if (NumFeatures <= 0)
return NULL;
IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
NumClasses = ClassPruner (Templates->Templates, NumFeatures,
IntFeatures, CharNormArray,
BaselineCutoffs, ClassPrunerResults,
@ -1639,11 +1662,12 @@ char *BaselineClassifier(TBLOB *Blob,
IntegerMatcher (ClassForClassId (Templates->Templates, ClassId),
Templates->Class[ClassIndex]->PermProtos,
Templates->Class[ClassIndex]->PermConfigs,
IntOutlineLength, NumFeatures, IntFeatures, 0,
Results->BlobLength, NumFeatures, IntFeatures, 0,
CharNormArray[ClassIndex], &IntResult, MatchDebugFlags);
if (MatcherDebugLevel >= 2 || display_ratings > 1) {
cprintf ("%c-%-2d %2.1f(%2.1f/%2.1f) ", ClassId, IntResult.Config,
cprintf ("%s-%-2d %2.1f(%2.1f/%2.1f) ",
unicharset.id_to_unichar(ClassId), IntResult.Config,
IntResult.Rating * 100.0,
ClassPrunerResults[i].Rating * 100.0,
ClassPrunerResults[i].Rating2 * 100.0);
@ -1665,7 +1689,8 @@ char *BaselineClassifier(TBLOB *Blob,
config--);
if (MatcherDebugLevel >= 2 || display_ratings > 1) {
cprintf ("%c(%d) %2.1f(%2.1f) ", ClassId, config,
cprintf ("%s(%d) %2.1f(%2.1f) ",
unicharset.id_to_unichar(ClassId), config,
ClassPrunerResults[i].Rating * 200.0,
ClassPrunerResults[i].Rating2 * 100.0);
if (i % 4 == 3)
@ -1685,8 +1710,8 @@ char *BaselineClassifier(TBLOB *Blob,
/* this is a bug - maybe should return "" */
ClassIndex = IndexForClassId (Templates->Templates, ClassId);
return ((char *) (Templates->Class[ClassIndex]->
Config[Results->BestConfig].Perm));
return (Templates->Class[ClassIndex]->
Config[Results->BestConfig].Perm);
} /* BaselineClassifier */
@ -1825,7 +1850,6 @@ void CharNormClassifier(TBLOB *Blob,
** Exceptions: none
** History: Tue Mar 12 16:02:52 1991, DSJ, Created.
*/
int IntOutlineLength;
int NumFeatures;
int NumClasses;
int i;
@ -1847,8 +1871,6 @@ void CharNormClassifier(TBLOB *Blob,
if (NumFeatures <= 0)
return;
IntOutlineLength = (int) (Results->BlobLength / GetPicoFeatureLength ());
NumClasses = ClassPruner (Templates, NumFeatures,
IntFeatures, CharNormArray,
CharNormCutoffs, ClassPrunerResults,
@ -1865,7 +1887,7 @@ void CharNormClassifier(TBLOB *Blob,
IntFeatures,
CharNormArray,
NumClasses,
IntOutlineLength,
Results->BlobLength,
ClassPrunerResults,
MatchDebugFlags);
}
@ -1896,11 +1918,12 @@ void CharNormClassifier(TBLOB *Blob,
PrunedProtos);
//xiaofan
IntegerMatcher (ClassForClassId (Templates, ClassId), PrunedProtos, (BIT_VECTOR) & ClassPrunerResults[i].config_mask,
IntOutlineLength, NumFeatures, IntFeatures, 0,
Results->BlobLength, NumFeatures, IntFeatures, 0,
CharNormArray[ClassIndex], &IntResult, MatchDebugFlags);
if (MatcherDebugLevel >= 2 || display_ratings > 1) {
cprintf ("%c-%-2d %2.1f(%2.1f/%2.1f) ", ClassId, IntResult.Config,
cprintf ("%s-%-2d %2.1f(%2.1f/%2.1f) ",
unicharset.id_to_unichar(ClassId), IntResult.Config,
IntResult.Rating * 100.0,
ClassPrunerResults[i].Rating * 100.0,
ClassPrunerResults[i].Rating2 * 100.0);
@ -1917,7 +1940,7 @@ void CharNormClassifier(TBLOB *Blob,
ClassIndex = IndexForClassId (Templates, ClassId);
if (MatcherDebugLevel >= 2 || display_ratings > 1) {
cprintf ("%c %2.1f(%2.1f) ", ClassId,
cprintf ("%s %2.1f(%2.1f) ", unicharset.id_to_unichar(ClassId),
ClassPrunerResults[i].Rating * 200.0,
ClassPrunerResults[i].Rating2 * 100.0);
if (i % 4 == 3)
@ -1963,7 +1986,6 @@ void ClassifyAsNoise(TBLOB *Blob,
Rating /= 1.0 + Rating;
AddNewResult (Results, NO_CLASS, Rating, 0);
} /* ClassifyAsNoise */
@ -2020,25 +2042,32 @@ LIST ConvertMatchesToChoices(ADAPT_RESULTS *Results) {
** Exceptions: none
** History: Tue Mar 12 08:55:37 1991, DSJ, Created.
*/
char ChoiceString[2];
int i;
LIST Choices;
CLASS_ID NextMatch;
FLOAT32 Rating;
FLOAT32 Certainty;
const char *NextMatch_unichar;
char choice_lengths[2] = {0, 0};
ChoiceString[1] = '\0';
if (Results->NumMatches > MAX_MATCHES)
Results->NumMatches = MAX_MATCHES;
for (Choices = NIL, i = 0; i < Results->NumMatches; i++) {
NextMatch = Results->Classes[i];
ChoiceString[0] = NextMatch;
Rating = Certainty = Results->Ratings[NextMatch];
Rating *= RatingScale * Results->BlobLength;
Certainty *= -CertaintyScale;
Choices = append_choice (Choices, ChoiceString, Rating, Certainty,
Results->Configs[NextMatch]);
if (NextMatch != NO_CLASS)
NextMatch_unichar = unicharset.id_to_unichar(NextMatch);
else
NextMatch_unichar = "";
choice_lengths[0] = strlen(NextMatch_unichar);
Choices = append_choice (Choices,
NextMatch_unichar,
choice_lengths,
Rating, Certainty,
Results->Configs[NextMatch]);
}
return (Choices);
@ -2073,11 +2102,13 @@ void DebugAdaptiveClassifier(TBLOB *Blob,
BOOL8 PreTrainedOn = TRUE;
ShowMatchDisplay();
cprintf ("\nDebugging class = %c (%s) ...\n", LastClass, DebugMode);
cprintf ("\nDebugging class = %s (%s) ...\n",
unicharset.id_to_unichar(LastClass), DebugMode);
ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
UpdateMatchDisplay();
while ((ClassId = GetClassToDebug (Prompt)) != 0) {
#if 0
switch (ClassId) {
case 'b':
AdaptiveOn = TRUE;
@ -2101,9 +2132,12 @@ void DebugAdaptiveClassifier(TBLOB *Blob,
LastClass = ClassId;
break;
}
#endif
LastClass = ClassId;
ShowMatchDisplay();
cprintf ("\nDebugging class = %c (%s) ...\n", LastClass, DebugMode);
cprintf ("\nDebugging class = %s (%s) ...\n",
unicharset.id_to_unichar(LastClass), DebugMode);
ShowBestMatchFor(Blob, LineStats, LastClass, AdaptiveOn, PreTrainedOn);
UpdateMatchDisplay();
}
@ -2142,7 +2176,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
** Exceptions: none
** History: Tue Mar 12 08:50:11 1991, DSJ, Created.
*/
char *Ambiguities;
UNICHAR_ID *Ambiguities;
AdaptiveMatcherCalls++;
InitIntFX();
@ -2159,7 +2193,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
&& !tess_bn_matching) {
CharNormClassifier(Blob, LineStats, PreTrainedTemplates, Results);
}
else if (Ambiguities && *Ambiguities) {
else if (Ambiguities && *Ambiguities >= 0) {
AmbigClassifier(Blob,
LineStats,
PreTrainedTemplates,
@ -2176,8 +2210,8 @@ void DoAdaptiveMatch(TBLOB *Blob,
void
GetAdaptThresholds (TWERD * Word,
LINE_STATS * LineStats,
const char *BestChoice,
const char *BestRawChoice, FLOAT32 Thresholds[]) {
const WERD_CHOICE& BestChoice,
const WERD_CHOICE& BestRawChoice, FLOAT32 Thresholds[]) {
/*
** Parameters:
** Word
@ -2208,9 +2242,13 @@ void DoAdaptiveMatch(TBLOB *Blob,
** History: Fri May 31 09:22:08 1991, DSJ, Created.
*/
TBLOB *Blob;
const char* BestChoice_string = BestChoice.string().string();
const char* BestChoice_lengths = BestChoice.lengths().string();
const char* BestRawChoice_string = BestRawChoice.string().string();
const char* BestRawChoice_lengths = BestRawChoice.lengths().string();
if (EnableNewAdaptRules && /* new rules */
CurrentBestChoiceIs (BestChoice)) {
CurrentBestChoiceIs (BestChoice_string, BestChoice_lengths)) {
FindClassifierErrors(PerfectRating,
GoodAdaptiveMatch,
RatingMargin,
@ -2219,15 +2257,21 @@ void DoAdaptiveMatch(TBLOB *Blob,
else { /* old rules */
for (Blob = Word->blobs;
Blob != NULL;
Blob = Blob->next, BestChoice++, BestRawChoice++, Thresholds++)
if (*BestChoice == *BestRawChoice)
Blob = Blob->next, BestChoice_string += *(BestChoice_lengths++),
BestRawChoice_string += *(BestRawChoice_lengths++), Thresholds++)
if (*(BestChoice_lengths) == *(BestRawChoice_lengths) &&
strncmp(BestChoice_string, BestRawChoice_string,
*(BestChoice_lengths)) == 0)
*Thresholds = GoodAdaptiveMatch;
else {
/* the blob was incorrectly classified - find the rating threshold
needed to create a template which will correct the error with
some margin. However, don't waste time trying to make
templates which are too tight. */
*Thresholds = GetBestRatingFor (Blob, LineStats, *BestChoice);
*Thresholds = GetBestRatingFor (Blob, LineStats,
unicharset.unichar_to_id(
BestChoice_string,
*BestChoice_lengths));
*Thresholds *= (1.0 - RatingMargin);
if (*Thresholds > GoodAdaptiveMatch)
*Thresholds = GoodAdaptiveMatch;
@ -2238,9 +2282,9 @@ void DoAdaptiveMatch(TBLOB *Blob,
} /* GetAdaptThresholds */
/*---------------------------------------------------------------------------*/
char *GetAmbiguities(TBLOB *Blob,
LINE_STATS *LineStats,
CLASS_ID CorrectClass) {
UNICHAR_ID *GetAmbiguities(TBLOB *Blob,
LINE_STATS *LineStats,
CLASS_ID CorrectClass) {
/*
** Parameters:
** Blob
@ -2262,7 +2306,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
** History: Fri Mar 15 08:08:22 1991, DSJ, Created.
*/
ADAPT_RESULTS Results;
char *Ambiguities;
UNICHAR_ID *Ambiguities;
int i;
EnterClassifyMode;
@ -2283,15 +2327,16 @@ void DoAdaptiveMatch(TBLOB *Blob,
/* copy the class id's into an string of ambiguities - don't copy if
the correct class is the only class id matched */
Ambiguities = (char *) Emalloc (sizeof (char) * (Results.NumMatches + 1));
Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
(Results.NumMatches + 1));
if (Results.NumMatches > 1 ||
Results.NumMatches == 1 && Results.Classes[0] != CorrectClass) {
for (i = 0; i < Results.NumMatches; i++)
Ambiguities[i] = Results.Classes[i];
Ambiguities[i] = '\0';
Ambiguities[i] = -1;
}
else
Ambiguities[0] = '\0';
Ambiguities[0] = -1;
return (Ambiguities);
@ -2303,7 +2348,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength) {
INT32 *BlobLength) {
/*
** Parameters:
** Blob
@ -2342,7 +2387,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
Features = ExtractPicoFeatures (Blob, LineStats);
NumFeatures = NumFeaturesIn (Features);
*BlobLength = NumFeatures * GetPicoFeatureLength ();
*BlobLength = NumFeatures;
if (NumFeatures > UNLIKELY_NUM_FEAT) {
FreeFeatureSet(Features);
return (0);
@ -2384,13 +2429,12 @@ void DoAdaptiveMatch(TBLOB *Blob,
** Exceptions: none
** History: Tue Apr 9 09:01:24 1991, DSJ, Created.
*/
int CNOutlineLength, BLOutlineLength;
int NumCNFeatures, NumBLFeatures;
INT_FEATURE_ARRAY CNFeatures, BLFeatures;
INT_RESULT_STRUCT CNResult, BLResult;
CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
CLASS_INDEX ClassIndex;
FLOAT32 BlobLength;
INT32 BlobLength;
CNResult.Rating = BLResult.Rating = 1.0;
@ -2402,13 +2446,12 @@ void DoAdaptiveMatch(TBLOB *Blob,
PreTrainedTemplates,
CNFeatures, CNAdjust, &BlobLength);
if (NumCNFeatures > 0) {
CNOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
SetCharNormMatch();
IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
AllProtosOn, AllConfigsOn,
CNOutlineLength, NumCNFeatures, CNFeatures, 0,
BlobLength, NumCNFeatures, CNFeatures, 0,
CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
}
}
@ -2418,7 +2461,6 @@ void DoAdaptiveMatch(TBLOB *Blob,
AdaptedTemplates->Templates,
BLFeatures, BLAdjust, &BlobLength);
if (NumBLFeatures > 0) {
BLOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
SetBaseLineMatch();
@ -2426,7 +2468,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
(AdaptedTemplates->Templates, ClassId),
AdaptedTemplates->Class[ClassIndex]->PermProtos,
AdaptedTemplates->Class[ClassIndex]->PermConfigs,
BLOutlineLength, NumBLFeatures, BLFeatures, 0,
BlobLength, NumBLFeatures, BLFeatures, 0,
BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
}
}
@ -2441,7 +2483,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength) {
INT32 *BlobLength) {
/*
** Parameters:
** Blob
@ -2479,7 +2521,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength) {
INT32 *BlobLength) {
/*
** Parameters:
** Blob
@ -2523,7 +2565,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
}
if (!FeaturesOK) {
*BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
*BlobLength = FXInfo.NumBL;
return (0);
}
@ -2531,7 +2573,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
Src < End; *Dest++ = *Src++);
ClearCharNormArray(Templates, CharNormArray);
*BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
*BlobLength = FXInfo.NumBL;
return (FXInfo.NumBL);
} /* GetIntBaselineFeatures */
@ -2542,7 +2584,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
INT_TEMPLATES Templates,
INT_FEATURE_ARRAY IntFeatures,
CLASS_NORMALIZATION_ARRAY CharNormArray,
FLOAT32 *BlobLength) {
INT32 *BlobLength) {
/*
** Parameters:
** Blob
@ -2588,7 +2630,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
}
if (!FeaturesOK) {
*BlobLength = FXInfo.Length * ComputeScaleFactor (LineStats);
*BlobLength = FXInfo.NumBL;
return (0);
}
@ -2606,7 +2648,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
ComputeIntCharNormArray(NormFeature, Templates, CharNormArray);
FreeFeature(NormFeature);
*BlobLength = FXInfo.Length * Scale;
*BlobLength = FXInfo.NumBL;
return (FXInfo.NumCN);
} /* GetIntCharNormFeatures */
@ -2856,7 +2898,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
** Exceptions: none
** History: Thu Mar 14 15:54:08 1991, DSJ, Created.
*/
char *Ambigs;
UNICHAR_ID *Ambigs;
TEMP_CONFIG Config;
CLASS_INDEX ClassIndex;
ADAPT_CLASS Class;
@ -2881,9 +2923,14 @@ void DoAdaptiveMatch(TBLOB *Blob,
Ambigs = GetAmbiguities (Blob, LineStats, ClassId);
PermConfigFor (Class, ConfigId) = Ambigs;
if (LearningDebugLevel >= 1)
cprintf ("Making config %d permanent with ambiguities '%s'.\n",
ConfigId, Ambigs);
if (LearningDebugLevel >= 1) {
cprintf ("Making config %d permanent with ambiguities '",
ConfigId, Ambigs);
for (UNICHAR_ID *AmbigsPointer = Ambigs;
*AmbigsPointer >= 0; ++AmbigsPointer)
cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
cprintf("'.\n");
}
} /* MakePermanent */
@ -2999,13 +3046,16 @@ void DoAdaptiveMatch(TBLOB *Blob,
int i;
if (Results->NumMatches > 0) {
cprintf ("%c(%d) %4.1f ", Results->Classes[0], Results->Classes[0],
Results->Ratings[Results->Classes[0]] * 100.0);
cprintf ("%s(%d) %4.1f ",
unicharset.id_to_unichar(Results->Classes[0]),
Results->Classes[0],
Results->Ratings[Results->Classes[0]] * 100.0);
for (i = 1; i < Results->NumMatches; i++) {
cprintf ("%c(%d) %4.1f ", Results->Classes[i],
Results->Classes[i],
Results->Ratings[Results->Classes[i]] * 100.0);
cprintf ("%s(%d) %4.1f ",
unicharset.id_to_unichar(Results->Classes[i]),
Results->Classes[i],
Results->Ratings[Results->Classes[i]] * 100.0);
}
}
} /* PrintAdaptiveMatchResults */
@ -3032,20 +3082,27 @@ void DoAdaptiveMatch(TBLOB *Blob,
FLOAT32 *Rating = Results->Ratings;
CLASS_ID *Match = Results->Classes;
FLOAT32 BadMatchThreshold;
static const char* romans = "ivxIVX";
static const char* romans = "i v x I V X";
BadMatchThreshold = Results->BestRating + BadMatchPad;
if (bln_numericmode) {
UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
unicharset.unichar_to_id("1") : -1;
UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
unicharset.unichar_to_id("0") : -1;
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
if (Rating[Match[Next]] <= BadMatchThreshold) {
if (!isalpha(Match[Next]) || strchr(romans, Match[Next]) != NULL) {
if (!unicharset.get_isalpha(Match[Next]) ||
strstr(romans, unicharset.id_to_unichar(Match[Next])) != NULL) {
Match[NextGood++] = Match[Next];
} else if (Match[Next] == 'l' && Rating['1'] >= BadMatchThreshold) {
Match[NextGood++] = '1';
Rating['1'] = Rating['l'];
} else if (Match[Next] == 'O' && Rating['0'] >= BadMatchThreshold) {
Match[NextGood++] = '0';
Rating['0'] = Rating['O'];
} else if (unichar_id_one >= 0 && unicharset.eq(Match[Next], "l") &&
Rating[unichar_id_one] >= BadMatchThreshold) {
Match[NextGood++] = unichar_id_one;
Rating[unichar_id_one] = Rating[unicharset.unichar_to_id("l")];
} else if (unichar_id_zero >= 0 && unicharset.eq(Match[Next], "O") &&
Rating[unichar_id_zero] >= BadMatchThreshold) {
Match[NextGood++] = unichar_id_zero;
Rating[unichar_id_zero] = Rating[unicharset.unichar_to_id("O")];
}
}
}
@ -3084,14 +3141,16 @@ void DoAdaptiveMatch(TBLOB *Blob,
int digit_count;
CLASS_ID *Match = Results->Classes;
/*garbage characters */
static char punc_chars[] = ".,;:/`~'-=\\|\"!_^";
static char digit_chars[] = "0123456789";
static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
punc_count = 0;
digit_count = 0;
for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
if (strchr (punc_chars, Match[Next]) == NULL) {
if (strchr (digit_chars, Match[Next]) == NULL) {
if (strstr (punc_chars,
unicharset.id_to_unichar(Match[Next])) == NULL) {
if (strstr (digit_chars,
unicharset.id_to_unichar(Match[Next])) == NULL) {
Match[NextGood++] = Match[Next];
}
else {
@ -3172,13 +3231,12 @@ void DoAdaptiveMatch(TBLOB *Blob,
** Exceptions: none
** History: Fri Mar 22 08:43:52 1991, DSJ, Created.
*/
int CNOutlineLength = 0, BLOutlineLength = 0;
int NumCNFeatures = 0, NumBLFeatures = 0;
INT_FEATURE_ARRAY CNFeatures, BLFeatures;
INT_RESULT_STRUCT CNResult, BLResult;
CLASS_NORMALIZATION_ARRAY CNAdjust, BLAdjust;
CLASS_INDEX ClassIndex;
FLOAT32 BlobLength;
INT32 BlobLength;
UINT32 ConfigMask;
static int next_config = -1;
@ -3202,13 +3260,12 @@ void DoAdaptiveMatch(TBLOB *Blob,
if (NumCNFeatures <= 0)
cprintf ("Illegal blob (char norm features)!\n");
else {
CNOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
ClassIndex = IndexForClassId (PreTrainedTemplates, ClassId);
SetCharNormMatch();
IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId),
AllProtosOn, AllConfigsOn,
CNOutlineLength, NumCNFeatures, CNFeatures, 0,
BlobLength, NumCNFeatures, CNFeatures, 0,
CNAdjust[ClassIndex], &CNResult, NO_DEBUG);
cprintf ("Best built-in template match is config %2d (%4.1f) (cn=%d)\n",
@ -3227,7 +3284,6 @@ void DoAdaptiveMatch(TBLOB *Blob,
if (NumBLFeatures <= 0)
cprintf ("Illegal blob (baseline features)!\n");
else {
BLOutlineLength = (int) (BlobLength / GetPicoFeatureLength ());
ClassIndex =
IndexForClassId (AdaptedTemplates->Templates, ClassId);
@ -3237,12 +3293,15 @@ void DoAdaptiveMatch(TBLOB *Blob,
AllProtosOn, AllConfigsOn,
// AdaptedTemplates->Class[ClassIndex]->PermProtos,
// AdaptedTemplates->Class[ClassIndex]->PermConfigs,
BLOutlineLength, NumBLFeatures, BLFeatures, 0,
BlobLength, NumBLFeatures, BLFeatures, 0,
BLAdjust[ClassIndex], &BLResult, NO_DEBUG);
#ifndef SECURE_NAMES
cprintf ("Best adaptive template match is config %2d (%4.1f)\n",
BLResult.Config, BLResult.Rating * 100.0);
int ClassIndex = IndexForClassId (AdaptedTemplates->Templates, ClassId);
ADAPT_CLASS Class = AdaptedTemplates->Class[ClassIndex];
cprintf ("Best adaptive template match is config %2d (%4.1f) %s\n",
BLResult.Config, BLResult.Rating * 100.0,
ConfigIsPermanent(Class, BLResult.Config) ? "Perm" : "Temp");
#endif
}
}
@ -3264,7 +3323,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
AllProtosOn,
// AdaptedTemplates->Class[ClassIndex]->PermProtos,
(BIT_VECTOR) & ConfigMask,
BLOutlineLength, NumBLFeatures, BLFeatures, 0,
BlobLength, NumBLFeatures, BLFeatures, 0,
BLAdjust[ClassIndex], &BLResult, MatchDebugFlags);
cprintf ("Adaptive template match for config %2d is %4.1f\n",
BLResult.Config, BLResult.Rating * 100.0);
@ -3277,7 +3336,7 @@ void DoAdaptiveMatch(TBLOB *Blob,
SetCharNormMatch();
//xiaofan
IntegerMatcher (ClassForClassId (PreTrainedTemplates, ClassId), AllProtosOn, (BIT_VECTOR) & ConfigMask,
CNOutlineLength, NumCNFeatures, CNFeatures, 0,
BlobLength, NumCNFeatures, CNFeatures, 0,
CNAdjust[ClassIndex], &CNResult, MatchDebugFlags);
}
} /* ShowBestMatchFor */

View File

@ -22,6 +22,7 @@
#include "intmatcher.h"
#include "tordvars.h"
#include "callcpp.h"
#include "globals.h"
#include <math.h>
#define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \
@ -335,7 +336,7 @@ make_int_var (IntThetaFudge, 128, MakeIntThetaFudge,
16, 23, SetIntThetaFudge,
"Integer Matcher Theta Fudge 0-255: ");
make_float_var (CPCutoffStrength, 0.15, MakeCPCutoffStrength,
make_int_var (CPCutoffStrength, 7, MakeCPCutoffStrength,
16, 24, SetCPCutoffStrength,
"Class Pruner CutoffStrength: ");
@ -422,10 +423,10 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
int NumPruners;
INT32 feature_index; //current feature
static INT32 ClassCount[MAX_NUM_CLASSES - 1];
static INT16 NormCount[MAX_NUM_CLASSES - 1];
static INT16 SortKey[MAX_NUM_CLASSES];
static UINT8 SortIndex[MAX_NUM_CLASSES];
static INT32 ClassCount[MAX_NUM_CLASSES];
static INT16 NormCount[MAX_NUM_CLASSES];
static INT16 SortKey[MAX_NUM_CLASSES + 1];
static UINT8 SortIndex[MAX_NUM_CLASSES + 1];
CLASS_INDEX Class;
int out_class;
int MaxNumClasses;
@ -433,7 +434,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
int NumClasses;
FLOAT32 max_rating; //max allowed rating
INT32 *ClassCountPtr;
INT8 classch;
CLASS_ID classch;
MaxNumClasses = NumClassesIn (IntTemplates);
@ -497,12 +498,11 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
/* Adjust Class Counts for Number of Expected Features */
for (Class = 0; Class < MaxNumClasses; Class++)
if (NumFeatures < ExpectedNumFeatures[Class])
ClassCount[Class] =
(int) (((FLOAT32) (ClassCount[Class] * NumFeatures)) /
(NumFeatures +
CPCutoffStrength * (ExpectedNumFeatures[Class] -
NumFeatures)));
if (NumFeatures < ExpectedNumFeatures[Class]) {
int deficit = ExpectedNumFeatures[Class] - NumFeatures;
ClassCount[Class] -= ClassCount[Class] * deficit /
(NumFeatures*CPCutoffStrength + deficit);
}
/* Adjust Class Counts for Normalization Factors */
MaxCount = 0;
@ -535,17 +535,14 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
if (display_ratings > 1) {
cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures);
for (Class = 0; Class < NumClasses; Class++) {
classch =
ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);
cprintf ("%c:C=%d, E=%d, N=%d, Rat=%d\n", classch,
ClassCount[SortIndex[NumClasses - Class]],
ExpectedNumFeatures[SortIndex[NumClasses - Class]],
SortKey[NumClasses - Class],
(int) (10 +
1000 * (1.0f -
SortKey[NumClasses -
Class] / ((float) cp_maps[3] *
NumFeatures))));
classch = ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);
cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n",
unicharset.id_to_unichar(classch),
ClassCount[SortIndex[NumClasses - Class]],
ExpectedNumFeatures[SortIndex[NumClasses - Class]],
SortKey[NumClasses - Class],
1010 - 1000 * SortKey[NumClasses - Class] /
(cp_maps[3] * NumFeatures));
}
if (display_ratings > 2) {
NumPruners = NumClassPrunersIn (IntTemplates);
@ -569,9 +566,9 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
PrunerWord = *BasePrunerAddress++;
for (Class = 0; Class < 16; Class++, class_index++) {
if (NormCount[class_index] >= MaxCount)
cprintf (" %c=%d,",
ClassIdForIndex (IntTemplates,
class_index),
cprintf (" %s=%d,",
unicharset.id_to_unichar(ClassIdForIndex (IntTemplates,
class_index)),
PrunerWord & 3);
PrunerWord >>= 2;
}
@ -582,8 +579,8 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
cprintf ("Adjustments:");
for (Class = 0; Class < MaxNumClasses; Class++) {
if (NormCount[Class] > MaxCount)
cprintf (" %c=%d,",
ClassIdForIndex (IntTemplates, Class),
cprintf (" %s=%d,",
unicharset.id_to_unichar(ClassIdForIndex (IntTemplates, Class)),
-((ClassPrunerMultiplier *
NormalizationFactors[Class]) >> 8) * cp_maps[3] /
3);
@ -640,7 +637,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
cp_bestconf = -1;
for (Class = 0; Class < NumClasses; Class++) {
classch = Results[Class].Class;
if (classch == blob_answer) {
if (strcmp(unicharset.id_to_unichar(classch), blob_answer) == 0) {
cp_bestindex = Class;
cp_bestrating = (int) (1000 * Results[Class].Rating + 10);
cp_bestconf = (int) (1000 * Results[Class].Rating2 + 10);
@ -1191,7 +1188,6 @@ int FindGoodProtos(INT_CLASS ClassTemplate,
if (MatchDebuggingOn (Debug))
cprintf ("Match Complete --------------------------------------------\n");
return NumGoodProtos;
}

View File

@ -134,7 +134,7 @@ void add_point_to_list(POINT_GROUP point_list, EDGEPT *point) {
}
#ifndef GRAPHICS_DISABLED
if (chop_debug)
if (chop_debug > 2)
mark_outline(point);
#endif
}
@ -162,7 +162,8 @@ int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
length = length_product (vector1, vector2);
if ((int) length == 0)
return (0);
angle = (int) (asin (CROSS (vector1, vector2) / length) / PI * 180.0);
angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) /
length) / PI * 180.0 + 0.5));
/* Use dot product */
if (SCALAR (vector1, vector2) < 0)

View File

@ -73,7 +73,8 @@ double_VAR (tessedit_certainty_threshold, -2.25, "Good blob limit");
* Set the fields in this choice to be defaulted bad initial values.
**********************************************************************/
#define set_null_choice(choice) \
(class_string (choice) = NULL, \
(class_string (choice) = NULL, \
class_lengths (choice) = NULL, \
class_probability (choice) = MAX_FLOAT32, \
class_certainty (choice) = -MAX_FLOAT32) \
@ -225,7 +226,8 @@ SEAM *attempt_blob_chop(TWERD *word, INT32 blob_number, SEAMS seam_list) {
delete_seam(seam);
#ifndef GRAPHICS_DISABLED
if (chop_debug) {
display_blob(blob, Red);
if (chop_debug >2)
display_blob(blob, Red);
cprintf ("\n** seam being removed ** \n");
}
#endif
@ -437,7 +439,6 @@ CHOICES_LIST chop_word_main(register TWERD *word,
}
bit_count = index - 1;
permute_characters(char_choices, rating_limit, best_choice, raw_choice);
set_n_ones (&state, array_count (char_choices) - 1);
if (matcher_fp != NULL) {
if (matcher_pass == 0) {
@ -474,7 +475,6 @@ CHOICES_LIST chop_word_main(register TWERD *word,
if (chop_debug)
print_seams ("Final seam list:", seam_list);
if (enable_assoc &&
!AcceptableChoice (char_choices, best_choice, raw_choice, NULL)
|| (tester || trainer)

View File

@ -370,7 +370,7 @@ SEAM *pick_good_seam(TBLOB *blob) {
INT16 num_points = 0;
#ifndef GRAPHICS_DISABLED
if (chop_debug)
if (chop_debug > 2)
display_splits = TRUE;
draw_blob_edges(blob);
@ -417,7 +417,7 @@ SEAM *pick_good_seam(TBLOB *blob) {
mark_split (seam->split2);
if (seam->split3)
mark_split (seam->split3);
if (chop_debug > 1) {
if (chop_debug > 2) {
update_edge_window();
edge_window_wait();
}

View File

@ -42,22 +42,22 @@
* Split this blob into two blobs by applying the splits included in
* the seam description.
**********************************************************************/
void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
check_outline_mem();
void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
check_outline_mem();
if (seam->split1 == NULL) {
divide_blobs (blob, other_blob, seam->location);
}
else if (seam->split2 == NULL) {
make_split_blobs(blob, other_blob, seam);
make_split_blobs(blob, other_blob, seam);
}
else if (seam->split3 == NULL) {
make_double_split(blob, other_blob, seam);
make_double_split(blob, other_blob, seam);
}
else {
make_triple_split(blob, other_blob, seam);
make_triple_split(blob, other_blob, seam);
}
check_outline_mem();
check_outline_mem();
}
@ -69,7 +69,7 @@ void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* other blob. The ones whose x location is less than that point are
* retained in the original blob.
**********************************************************************/
void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
TESSLINE *outline;
TESSLINE *outline1 = NULL;
TESSLINE *outline2 = NULL;
@ -115,23 +115,23 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
* Group the outlines from the first blob into both of them. Do so
* according to the information about the split.
**********************************************************************/
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
setup_blob_outlines(blob);
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
setup_blob_outlines(blob);
divide_blobs(blob, other_blob, location);
divide_blobs(blob, other_blob, location);
eliminate_duplicate_outlines(blob);
eliminate_duplicate_outlines(other_blob);
eliminate_duplicate_outlines(blob);
eliminate_duplicate_outlines(other_blob);
correct_blob_order(blob, other_blob);
correct_blob_order(blob, other_blob);
#ifndef GRAPHICS_DISABLED
if (chop_debug) {
display_blob(blob, Red);
if (chop_debug > 2) {
display_blob(blob, Red);
#ifdef __UNIX__
sleep (1);
#endif
display_blob(other_blob, Cyan);
display_blob(other_blob, Cyan);
}
#endif
}
@ -143,7 +143,7 @@ void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
* Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification.
**********************************************************************/
void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1);
make_single_split (blob->outlines, seam->split2);
form_two_blobs (blob, other_blob, seam->location);
@ -156,7 +156,7 @@ void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* Create two outlines out of one by splitting the original one in half.
* Return the resultant outlines.
**********************************************************************/
void make_single_split(TESSLINE *outlines, SPLIT *split) {
void make_single_split(TESSLINE *outlines, SPLIT *split) {
assert (outlines != NULL);
split_outline (split->point1, split->point2);
@ -186,7 +186,7 @@ void make_single_split(TESSLINE *outlines, SPLIT *split) {
* Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification.
**********************************************************************/
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1);
form_two_blobs (blob, other_blob, seam->location);
@ -201,7 +201,7 @@ void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* the outlines. Three of the starting outlines will produce two ending
* outlines. Return the resultant blobs for classification.
**********************************************************************/
void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1);
make_single_split (blob->outlines, seam->split2);
make_single_split (blob->outlines, seam->split3);
@ -217,7 +217,7 @@ void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* result. The seam may consist of one, two, or three splits. Each
* of these split must be removed from the outlines.
**********************************************************************/
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
TESSLINE *outline;
if (!seam)
@ -231,7 +231,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
while (outline->next)
outline = outline->next;
outline->next = other_blob->outlines;
oldblob(other_blob);
oldblob(other_blob);
if (seam->split1 == NULL) {
}
@ -248,10 +248,10 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
undo_single_split (blob, seam->split1);
}
setup_blob_outlines(blob);
eliminate_duplicate_outlines(blob);
setup_blob_outlines(blob);
eliminate_duplicate_outlines(blob);
check_outline_mem();
check_outline_mem();
}
@ -261,7 +261,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* Undo a seam that is made by a single split. Perform the correct
* magic to reconstruct the appropriate set of outline data structures.
**********************************************************************/
void undo_single_split(TBLOB *blob, SPLIT *split) {
void undo_single_split(TBLOB *blob, SPLIT *split) {
TESSLINE *outline1;
TESSLINE *outline2;
/* Modify edge points */