mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 01:42:41 +08:00
Various fixes, including memory leak in fixspace, font labels on output, removed some annoying debug output, fixes to initialization of parameters, general cleanup, and added Hindi
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@570 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
ba9f73f04b
commit
c86a0f6892
@ -86,7 +86,6 @@ void FreeTempConfig(TEMP_CONFIG Config) {
|
||||
|
||||
} /* FreeTempConfig */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void FreeTempProto(void *arg) {
|
||||
PROTO proto = (PROTO) arg;
|
||||
@ -94,6 +93,11 @@ void FreeTempProto(void *arg) {
|
||||
free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
|
||||
}
|
||||
|
||||
void FreePermConfig(PERM_CONFIG Config) {
|
||||
assert(Config != NULL);
|
||||
Efree(Config->Ambigs);
|
||||
free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/**
|
||||
@ -135,7 +139,7 @@ void free_adapted_class(ADAPT_CLASS adapt_class) {
|
||||
for (i = 0; i < MAX_NUM_CONFIGS; i++) {
|
||||
if (ConfigIsPermanent (adapt_class, i)
|
||||
&& PermConfigFor (adapt_class, i) != NULL)
|
||||
Efree (PermConfigFor (adapt_class, i));
|
||||
FreePermConfig (PermConfigFor (adapt_class, i));
|
||||
else if (!ConfigIsPermanent (adapt_class, i)
|
||||
&& TempConfigFor (adapt_class, i) != NULL)
|
||||
FreeTempConfig (TempConfigFor (adapt_class, i));
|
||||
@ -181,6 +185,14 @@ ADAPT_TEMPLATES Classify::NewAdaptedTemplates(bool InitFromUnicharset) {
|
||||
return (Templates);
|
||||
|
||||
} /* NewAdaptedTemplates */
|
||||
|
||||
// Returns FontinfoId of the given config of the given adapted class.
|
||||
int Classify::GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId) {
|
||||
return (ConfigIsPermanent(Class, ConfigId) ?
|
||||
PermConfigFor(Class, ConfigId)->FontinfoId :
|
||||
TempConfigFor(Class, ConfigId)->FontinfoId);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
@ -201,13 +213,14 @@ void free_adapted_templates(ADAPT_TEMPLATES templates) {
|
||||
* This routine allocates and returns a new temporary config.
|
||||
*
|
||||
* @param MaxProtoId max id of any proto in new config
|
||||
* @param FontinfoId font information from pre-trained templates
|
||||
* @return Ptr to new temp config.
|
||||
*
|
||||
* @note Globals: none
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 14 13:28:21 1991, DSJ, Created.
|
||||
*/
|
||||
TEMP_CONFIG NewTempConfig(int MaxProtoId) {
|
||||
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
|
||||
TEMP_CONFIG Config;
|
||||
int NumProtos = MaxProtoId + 1;
|
||||
|
||||
@ -221,6 +234,7 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId) {
|
||||
Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
|
||||
Config->ContextsSeen = NIL_LIST;
|
||||
zero_all_bits (Config->Protos, Config->ProtoVectorSize);
|
||||
Config->FontinfoId = FontinfoId;
|
||||
|
||||
return (Config);
|
||||
|
||||
@ -388,13 +402,14 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
|
||||
* @note History: Tue Mar 19 14:25:26 1991, DSJ, Created.
|
||||
*/
|
||||
PERM_CONFIG ReadPermConfig(FILE *File) {
|
||||
PERM_CONFIG Config;
|
||||
PERM_CONFIG Config = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT),
|
||||
"PERM_CONFIG_STRUCT");
|
||||
uinT8 NumAmbigs;
|
||||
|
||||
fread ((char *) &NumAmbigs, sizeof (uinT8), 1, File);
|
||||
Config = (PERM_CONFIG) Emalloc (sizeof (UNICHAR_ID) * (NumAmbigs + 1));
|
||||
fread (Config, sizeof (UNICHAR_ID), NumAmbigs, File);
|
||||
Config[NumAmbigs] = -1;
|
||||
fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
|
||||
Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1));
|
||||
fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
|
||||
Config->Ambigs[NumAmbigs] = -1;
|
||||
fread(&(Config->FontinfoId), sizeof(int), 1, File);
|
||||
|
||||
return (Config);
|
||||
|
||||
@ -523,12 +538,11 @@ void WritePermConfig(FILE *File, PERM_CONFIG Config) {
|
||||
uinT8 NumAmbigs = 0;
|
||||
|
||||
assert (Config != NULL);
|
||||
while (Config[NumAmbigs] > 0)
|
||||
++NumAmbigs;
|
||||
|
||||
fwrite ((char *) &NumAmbigs, sizeof (uinT8), 1, File);
|
||||
fwrite (Config, sizeof (UNICHAR_ID), NumAmbigs, File);
|
||||
while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
|
||||
|
||||
fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
|
||||
fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
|
||||
fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
|
||||
} /* WritePermConfig */
|
||||
|
||||
|
||||
|
@ -43,10 +43,16 @@ typedef struct
|
||||
PROTO_ID MaxProtoId;
|
||||
LIST ContextsSeen;
|
||||
BIT_VECTOR Protos;
|
||||
int FontinfoId; // font information inferred from pre-trained templates
|
||||
} TEMP_CONFIG_STRUCT;
|
||||
typedef TEMP_CONFIG_STRUCT *TEMP_CONFIG;
|
||||
|
||||
typedef UNICHAR_ID *PERM_CONFIG;
|
||||
typedef struct
|
||||
{
|
||||
UNICHAR_ID *Ambigs;
|
||||
int FontinfoId; // font information inferred from pre-trained templates
|
||||
} PERM_CONFIG_STRUCT;
|
||||
typedef PERM_CONFIG_STRUCT *PERM_CONFIG;
|
||||
|
||||
typedef union
|
||||
{
|
||||
@ -116,7 +122,7 @@ void free_adapted_class(ADAPT_CLASS adapt_class);
|
||||
|
||||
void free_adapted_templates(ADAPT_TEMPLATES templates);
|
||||
|
||||
TEMP_CONFIG NewTempConfig(int MaxProtoId);
|
||||
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId);
|
||||
|
||||
TEMP_PROTO NewTempProto();
|
||||
|
||||
|
@ -80,6 +80,8 @@ struct ScoredClass {
|
||||
FLOAT32 rating;
|
||||
inT16 config;
|
||||
inT16 config2;
|
||||
inT16 fontinfo_id;
|
||||
inT16 fontinfo_id2;
|
||||
};
|
||||
|
||||
struct ADAPT_RESULTS {
|
||||
@ -100,6 +102,8 @@ struct ADAPT_RESULTS {
|
||||
best_match.rating = WORST_POSSIBLE_RATING;
|
||||
best_match.config = 0;
|
||||
best_match.config2 = 0;
|
||||
best_match.fontinfo_id = kBlankFontinfoId;
|
||||
best_match.fontinfo_id2 = kBlankFontinfoId;
|
||||
}
|
||||
};
|
||||
|
||||
@ -403,9 +407,11 @@ void Classify::LearnPieces(const char* filename, int start, int length,
|
||||
}
|
||||
UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
|
||||
if (classify_learning_debug_level >= 1)
|
||||
tprintf("Adapting to char = %s, thr= %g\n",
|
||||
unicharset.id_to_unichar(class_id), threshold);
|
||||
AdaptToChar(blob, class_id, threshold);
|
||||
tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
|
||||
unicharset.id_to_unichar(class_id), threshold, word->fontinfo_id);
|
||||
// If filename is not NULL we are doing recognition
|
||||
// (as opposed to training), so we must have already set word fonts.
|
||||
AdaptToChar(blob, class_id, word->fontinfo_id, threshold);
|
||||
}
|
||||
|
||||
break_pieces(blob, word->seam_array, start, start + length - 1);
|
||||
@ -680,6 +686,7 @@ void Classify::SettupPass2() {
|
||||
*
|
||||
* @param Blob blob to model new class after
|
||||
* @param ClassId id of the class to be initialized
|
||||
* @param FontinfoId font information inferred from pre-trained templates
|
||||
* @param Class adapted class to be initialized
|
||||
* @param Templates adapted templates to add new class to
|
||||
*
|
||||
@ -693,6 +700,7 @@ void Classify::SettupPass2() {
|
||||
*/
|
||||
void Classify::InitAdaptedClass(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
ADAPT_CLASS Class,
|
||||
ADAPT_TEMPLATES Templates) {
|
||||
FEATURE_SET Features;
|
||||
@ -712,8 +720,8 @@ void Classify::InitAdaptedClass(TBLOB *Blob,
|
||||
return;
|
||||
}
|
||||
|
||||
Config = NewTempConfig (NumFeatures - 1);
|
||||
TempConfigFor (Class, 0) = Config;
|
||||
Config = NewTempConfig(NumFeatures - 1, FontinfoId);
|
||||
TempConfigFor(Class, 0) = Config;
|
||||
|
||||
/* this is a kludge to construct cutoffs for adapted templates */
|
||||
if (Templates == AdaptedTemplates)
|
||||
@ -851,6 +859,7 @@ int Classify::AdaptableWord(TWERD *Word,
|
||||
* @param Blob blob to add to templates for ClassId
|
||||
* @param LineStats statistics about text line blob is in
|
||||
* @param ClassId class to add blob to
|
||||
* @param FontinfoId font information from pre-trained templates
|
||||
* @param Threshold minimum match rating to existing template
|
||||
*
|
||||
* Globals:
|
||||
@ -864,6 +873,7 @@ int Classify::AdaptableWord(TWERD *Word,
|
||||
*/
|
||||
void Classify::AdaptToChar(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
FLOAT32 Threshold) {
|
||||
int NumFeatures;
|
||||
INT_FEATURE_ARRAY IntFeatures;
|
||||
@ -882,7 +892,7 @@ void Classify::AdaptToChar(TBLOB *Blob,
|
||||
Class = AdaptedTemplates->Class[ClassId];
|
||||
assert(Class != NULL);
|
||||
if (IsEmptyAdaptedClass(Class)) {
|
||||
InitAdaptedClass(Blob, ClassId, Class, AdaptedTemplates);
|
||||
InitAdaptedClass(Blob, ClassId, FontinfoId, Class, AdaptedTemplates);
|
||||
}
|
||||
else {
|
||||
IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
|
||||
@ -892,10 +902,20 @@ void Classify::AdaptToChar(TBLOB *Blob,
|
||||
return;
|
||||
|
||||
im_.SetBaseLineMatch();
|
||||
im_.Match(IClass, AllProtosOn, AllConfigsOn,
|
||||
// Only match configs with the matching font.
|
||||
BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
|
||||
for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
|
||||
if (GetFontinfoId(Class, cfg) == FontinfoId) {
|
||||
SET_BIT(MatchingFontConfigs, cfg);
|
||||
} else {
|
||||
reset_bit(MatchingFontConfigs, cfg);
|
||||
}
|
||||
}
|
||||
im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
|
||||
NumFeatures, NumFeatures, IntFeatures, 0,
|
||||
&IntResult, classify_adapt_feature_threshold,
|
||||
NO_DEBUG, matcher_debug_separate_windows);
|
||||
FreeBitVector(MatchingFontConfigs);
|
||||
|
||||
SetAdaptiveThreshold(Threshold);
|
||||
|
||||
@ -931,10 +951,10 @@ void Classify::AdaptToChar(TBLOB *Blob,
|
||||
}
|
||||
NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates,
|
||||
ClassId,
|
||||
FontinfoId,
|
||||
NumFeatures,
|
||||
IntFeatures,
|
||||
FloatFeatures);
|
||||
|
||||
if (NewTempConfigId >= 0 &&
|
||||
TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
|
||||
MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, Blob);
|
||||
@ -988,6 +1008,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
|
||||
* @param Blob blob to add to templates for ClassId
|
||||
* @param LineStats statistics about text line blob is in
|
||||
* @param ClassId class to add blob to
|
||||
* @param FontinfoId font information from pre-trained teamples
|
||||
* @param Threshold minimum match rating to existing template
|
||||
*
|
||||
* Globals:
|
||||
@ -998,6 +1019,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
|
||||
*/
|
||||
void Classify::AdaptToPunc(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
FLOAT32 Threshold) {
|
||||
ADAPT_RESULTS *Results = new ADAPT_RESULTS();
|
||||
int i;
|
||||
@ -1021,7 +1043,7 @@ void Classify::AdaptToPunc(TBLOB *Blob,
|
||||
cprintf ("Adapting to punc = %s, thr= %g\n",
|
||||
unicharset.id_to_unichar(ClassId), Threshold);
|
||||
#endif
|
||||
AdaptToChar(Blob, ClassId, Threshold);
|
||||
AdaptToChar(Blob, ClassId, FontinfoId, Threshold);
|
||||
}
|
||||
delete Results;
|
||||
} /* AdaptToPunc */
|
||||
@ -1045,8 +1067,10 @@ void Classify::AdaptToPunc(TBLOB *Blob,
|
||||
* @param[out] results results to add new result to
|
||||
* @param class_id class of new result
|
||||
* @param rating rating of new result
|
||||
* @param config_id config id of new result
|
||||
* @param config2_id config id of 2nd choice result
|
||||
* @param config config id of new result
|
||||
* @param config2 config id of 2nd choice result
|
||||
* @param fontinfo_id font information of the new result
|
||||
* @param fontinfo_id2 font information of the 2nd choice result
|
||||
*
|
||||
* @note Exceptions: none
|
||||
* @note History: Tue Mar 12 18:19:29 1991, DSJ, Created.
|
||||
@ -1054,10 +1078,13 @@ void Classify::AdaptToPunc(TBLOB *Blob,
|
||||
void Classify::AddNewResult(ADAPT_RESULTS *results,
|
||||
CLASS_ID class_id,
|
||||
FLOAT32 rating,
|
||||
int config_id,
|
||||
int config2_id) {
|
||||
int config,
|
||||
int config2,
|
||||
int fontinfo_id,
|
||||
int fontinfo_id2) {
|
||||
ScoredClass *old_match = FindScoredUnichar(results, class_id);
|
||||
ScoredClass match = {class_id, rating, config_id, config2_id};
|
||||
ScoredClass match =
|
||||
{ class_id, rating, config, config2, fontinfo_id, fontinfo_id2 };
|
||||
|
||||
if (rating > results->best_match.rating + matcher_bad_match_pad ||
|
||||
(old_match && rating >= old_match->rating))
|
||||
@ -1108,6 +1135,7 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
|
||||
*/
|
||||
void Classify::AmbigClassifier(TBLOB *Blob,
|
||||
INT_TEMPLATES Templates,
|
||||
ADAPT_CLASS *Classes,
|
||||
UNICHAR_ID *Ambiguities,
|
||||
ADAPT_RESULTS *Results) {
|
||||
int NumFeatures;
|
||||
@ -1142,9 +1170,11 @@ void Classify::AmbigClassifier(TBLOB *Blob,
|
||||
IntResult.Config,
|
||||
IntResult.Rating * 100.0);
|
||||
|
||||
assert(Classes != NULL);
|
||||
AddNewResult(Results, ClassId, IntResult.Rating,
|
||||
IntResult.Config, IntResult.Config2);
|
||||
|
||||
IntResult.Config, IntResult.Config2,
|
||||
GetFontinfoId(Classes[ClassId], IntResult.Config),
|
||||
GetFontinfoId(Classes[ClassId], IntResult.Config2));
|
||||
Ambiguities++;
|
||||
|
||||
NumAmbigClassesTried++;
|
||||
@ -1213,8 +1243,18 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
|
||||
int_result.Rating += miss_penalty;
|
||||
if (int_result.Rating > WORST_POSSIBLE_RATING)
|
||||
int_result.Rating = WORST_POSSIBLE_RATING;
|
||||
AddNewResult(final_results, class_id, int_result.Rating,
|
||||
int_result.Config, int_result.Config2);
|
||||
if (classes != NULL) {
|
||||
AddNewResult(final_results, class_id, int_result.Rating,
|
||||
int_result.Config, int_result.Config2,
|
||||
GetFontinfoId(classes[class_id], int_result.Config),
|
||||
GetFontinfoId(classes[class_id], int_result.Config2));
|
||||
} else {
|
||||
AddNewResult(final_results, class_id, int_result.Rating,
|
||||
int_result.Config, int_result.Config2,
|
||||
kBlankFontinfoId, kBlankFontinfoId);
|
||||
|
||||
}
|
||||
|
||||
// Add unichars ambiguous with class_id with the same rating as class_id.
|
||||
if (use_definite_ambigs_for_classifier) {
|
||||
const UnicharIdVector *definite_ambigs =
|
||||
@ -1239,8 +1279,17 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
|
||||
ambig_match->rating = int_result.Rating;
|
||||
}
|
||||
} else {
|
||||
AddNewResult(final_results, ambig_class_id, int_result.Rating,
|
||||
int_result.Config, int_result.Config2);
|
||||
if (classes != NULL) {
|
||||
AddNewResult(
|
||||
final_results, ambig_class_id, int_result.Rating,
|
||||
int_result.Config, int_result.Config2,
|
||||
GetFontinfoId(classes[class_id], int_result.Config),
|
||||
GetFontinfoId(classes[class_id], int_result.Config2));
|
||||
} else {
|
||||
AddNewResult(final_results, ambig_class_id, int_result.Rating,
|
||||
int_result.Config, int_result.Config2,
|
||||
kBlankFontinfoId, kBlankFontinfoId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1302,7 +1351,8 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
|
||||
return (NULL);
|
||||
/* this is a bug - maybe should return "" */
|
||||
|
||||
return Templates->Class[ClassId]->Config[Results->best_match.config].Perm;
|
||||
return Templates->Class[ClassId]->
|
||||
Config[Results->best_match.config].Perm->Ambigs;
|
||||
} /* BaselineClassifier */
|
||||
|
||||
|
||||
@ -1377,7 +1427,8 @@ void Classify::ClassifyAsNoise(ADAPT_RESULTS *Results) {
|
||||
Rating *= Rating;
|
||||
Rating /= 1.0 + Rating;
|
||||
|
||||
AddNewResult(Results, NO_CLASS, Rating, -1, -1);
|
||||
AddNewResult(Results, NO_CLASS, Rating, -1, -1,
|
||||
kBlankFontinfoId, kBlankFontinfoId);
|
||||
} /* ClassifyAsNoise */
|
||||
} // namespace tesseract
|
||||
|
||||
@ -1395,7 +1446,8 @@ ScoredClass *FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
|
||||
// Retrieve the current rating for a unichar id if we have rated it, defaulting
|
||||
// to WORST_POSSIBLE_RATING.
|
||||
ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id) {
|
||||
ScoredClass poor_result = {id, WORST_POSSIBLE_RATING, -1, -1};
|
||||
ScoredClass poor_result =
|
||||
{id, WORST_POSSIBLE_RATING, -1, -1, kBlankFontinfoId, kBlankFontinfoId};
|
||||
ScoredClass *entry = FindScoredUnichar(results, id);
|
||||
return (entry == NULL) ? poor_result : *entry;
|
||||
}
|
||||
@ -1439,6 +1491,22 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results,
|
||||
|
||||
for (int i = 0; i < Results->NumMatches; i++) {
|
||||
ScoredClass next = Results->match[i];
|
||||
int fontinfo_id = next.fontinfo_id;
|
||||
int fontinfo_id2 = next.fontinfo_id2;
|
||||
if (fontinfo_id == kBlankFontinfoId) {
|
||||
// ScoredClass next must have come from pre-trained templates,
|
||||
// so we infer its font information from fontset_table.
|
||||
int font_set_id = PreTrainedTemplates->Class[next.id]->font_set_id;
|
||||
if (font_set_id >= 0) {
|
||||
const FontSet &fs = fontset_table_.get(font_set_id);
|
||||
if (next.config >= 0 && next.config < fs.size) {
|
||||
fontinfo_id = fs.configs[next.config];
|
||||
}
|
||||
if (next.config2 >= 0 && next.config2 < fs.size) {
|
||||
fontinfo_id2 = fs.configs[next.config2];
|
||||
}
|
||||
}
|
||||
}
|
||||
bool current_is_frag = (unicharset.get_fragment(next.id) != NULL);
|
||||
if (temp_it.length()+1 == MAX_MATCHES &&
|
||||
!contains_nonfrag && current_is_frag) {
|
||||
@ -1458,8 +1526,8 @@ void Classify::ConvertMatchesToChoices(ADAPT_RESULTS *Results,
|
||||
Certainty *= -(getDict().certainty_scale);
|
||||
}
|
||||
temp_it.add_to_end(new BLOB_CHOICE(next.id, Rating, Certainty,
|
||||
next.config, next.config2,
|
||||
unicharset.get_script(next.id)));
|
||||
fontinfo_id, fontinfo_id2,
|
||||
unicharset.get_script(next.id)));
|
||||
contains_nonfrag |= !current_is_frag; // update contains_nonfrag
|
||||
choices_length++;
|
||||
if (choices_length >= MAX_MATCHES) break;
|
||||
@ -1559,6 +1627,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob,
|
||||
} else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
|
||||
AmbigClassifier(Blob,
|
||||
PreTrainedTemplates,
|
||||
AdaptedTemplates->Class,
|
||||
Ambiguities,
|
||||
Results);
|
||||
}
|
||||
@ -1567,7 +1636,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob,
|
||||
// Force the blob to be classified as noise
|
||||
// if the results contain only fragments.
|
||||
// TODO(daria): verify that this is better than
|
||||
// just adding a NULL classificaiton.
|
||||
// just adding a NULL classification.
|
||||
if (!Results->HasNonfragment) {
|
||||
Results->NumMatches = 0;
|
||||
}
|
||||
@ -2006,6 +2075,7 @@ int Classify::GetIntCharNormFeatures(TBLOB *Blob,
|
||||
*
|
||||
* @param Templates adapted templates to add new config to
|
||||
* @param ClassId class id to associate with new config
|
||||
* @param FontinfoId font information inferred from pre-trained templates
|
||||
* @param NumFeatures number of features in IntFeatures
|
||||
* @param Features features describing model for new config
|
||||
* @param FloatFeatures floating-pt representation of features
|
||||
@ -2017,6 +2087,7 @@ int Classify::GetIntCharNormFeatures(TBLOB *Blob,
|
||||
*/
|
||||
int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
int NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
FEATURE_SET FloatFeatures) {
|
||||
@ -2077,13 +2148,15 @@ int Classify::MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
|
||||
|
||||
ConfigId = AddIntConfig(IClass);
|
||||
ConvertConfig(TempProtoMask, ConfigId, IClass);
|
||||
Config = NewTempConfig(MaxProtoId);
|
||||
Config = NewTempConfig(MaxProtoId, FontinfoId);
|
||||
TempConfigFor(Class, ConfigId) = Config;
|
||||
copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
|
||||
|
||||
if (classify_learning_debug_level >= 1)
|
||||
cprintf("Making new temp config %d using %d old and %d new protos.\n",
|
||||
ConfigId, NumOldProtos, MaxProtoId - OldMaxProtoId);
|
||||
cprintf("Making new temp config %d fontinfo id %d"
|
||||
" using %d old and %d new protos.\n",
|
||||
ConfigId, Config->FontinfoId,
|
||||
NumOldProtos, MaxProtoId - OldMaxProtoId);
|
||||
|
||||
return ConfigId;
|
||||
} /* MakeNewTemporaryConfig */
|
||||
@ -2215,23 +2288,33 @@ void Classify::MakePermanent(ADAPT_TEMPLATES Templates,
|
||||
Templates->NumPermClasses++;
|
||||
Class->NumPermConfigs++;
|
||||
|
||||
// Initialize permanent config.
|
||||
Ambigs = GetAmbiguities(Blob, ClassId);
|
||||
PERM_CONFIG Perm = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT),
|
||||
"PERM_CONFIG_STRUCT");
|
||||
Perm->Ambigs = Ambigs;
|
||||
Perm->FontinfoId = Config->FontinfoId;
|
||||
|
||||
// Free memory associated with temporary config (since ADAPTED_CONFIG
|
||||
// is a union we need to clean up before we record permanent config).
|
||||
ProtoKey.Templates = Templates;
|
||||
ProtoKey.ClassId = ClassId;
|
||||
ProtoKey.ConfigId = ConfigId;
|
||||
Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey,
|
||||
MakeTempProtoPerm);
|
||||
Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
|
||||
FreeTempConfig(Config);
|
||||
|
||||
Ambigs = GetAmbiguities(Blob, ClassId);
|
||||
PermConfigFor(Class, ConfigId) = Ambigs;
|
||||
// Record permanent config.
|
||||
PermConfigFor(Class, ConfigId) = Perm;
|
||||
|
||||
if (classify_learning_debug_level >= 1) {
|
||||
cprintf("Making config %d permanent with ambiguities '",
|
||||
ConfigId, Ambigs);
|
||||
tprintf("Making config %d for %s (ClassId %d) permanent:"
|
||||
" fontinfo id %d, ambiguities '",
|
||||
ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
|
||||
ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
|
||||
for (UNICHAR_ID *AmbigsPointer = Ambigs;
|
||||
*AmbigsPointer >= 0; ++AmbigsPointer)
|
||||
cprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
|
||||
cprintf("'.\n");
|
||||
*AmbigsPointer >= 0; ++AmbigsPointer)
|
||||
tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
|
||||
tprintf("'.\n");
|
||||
}
|
||||
} /* MakePermanent */
|
||||
} // namespace tesseract
|
||||
|
@ -45,6 +45,10 @@ bool compare_font_set(const FontSet& fs1, const FontSet& fs2) {
|
||||
}
|
||||
|
||||
void delete_callback(FontInfo f) {
|
||||
if (f.spacing_vec != NULL) {
|
||||
f.spacing_vec->delete_data_pointers();
|
||||
delete f.spacing_vec;
|
||||
}
|
||||
delete[] f.name;
|
||||
}
|
||||
void delete_callback_fs(FontSet fs) {
|
||||
|
@ -37,6 +37,9 @@ class WERD_RES;
|
||||
struct ADAPT_RESULTS;
|
||||
struct NORM_PROTOS;
|
||||
|
||||
static const int kUnknownFontinfoId = -1;
|
||||
static const int kBlankFontinfoId = -2;
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// How segmented is a blob. In this enum, character refers to a classifiable
|
||||
@ -63,6 +66,7 @@ class Classify : public CCStruct {
|
||||
|
||||
/* adaptive.cpp ************************************************************/
|
||||
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset);
|
||||
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId);
|
||||
int ClassPruner(INT_TEMPLATES IntTemplates,
|
||||
inT16 NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
@ -108,13 +112,16 @@ class Classify : public CCStruct {
|
||||
void InitAdaptiveClassifier(bool load_pre_trained_templates);
|
||||
void InitAdaptedClass(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
ADAPT_CLASS Class,
|
||||
ADAPT_TEMPLATES Templates);
|
||||
void AdaptToPunc(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
FLOAT32 Threshold);
|
||||
void AmbigClassifier(TBLOB *Blob,
|
||||
INT_TEMPLATES Templates,
|
||||
ADAPT_CLASS *Classes,
|
||||
UNICHAR_ID *Ambiguities,
|
||||
ADAPT_RESULTS *Results);
|
||||
void MasterMatcher(INT_TEMPLATES templates,
|
||||
@ -129,11 +136,13 @@ class Classify : public CCStruct {
|
||||
ADAPT_RESULTS* final_results);
|
||||
void ConvertMatchesToChoices(ADAPT_RESULTS *Results,
|
||||
BLOB_CHOICE_LIST *Choices);
|
||||
void AddNewResult(ADAPT_RESULTS *Results,
|
||||
CLASS_ID ClassId,
|
||||
FLOAT32 Rating,
|
||||
int ConfigId,
|
||||
int config2);
|
||||
void AddNewResult(ADAPT_RESULTS *results,
|
||||
CLASS_ID class_dd,
|
||||
FLOAT32 rating,
|
||||
int config,
|
||||
int config2,
|
||||
int fontinfo_id,
|
||||
int fontinfo_id2);
|
||||
int GetAdaptiveFeatures(TBLOB *Blob,
|
||||
INT_FEATURE_ARRAY IntFeatures,
|
||||
FEATURE_SET *FloatFeatures);
|
||||
@ -155,6 +164,7 @@ class Classify : public CCStruct {
|
||||
BIT_VECTOR TempProtoMask);
|
||||
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
int NumFeatures,
|
||||
INT_FEATURE_ARRAY Features,
|
||||
FEATURE_SET FloatFeatures);
|
||||
@ -182,6 +192,7 @@ class Classify : public CCStruct {
|
||||
ADAPT_RESULTS *Results);
|
||||
void AdaptToChar(TBLOB *Blob,
|
||||
CLASS_ID ClassId,
|
||||
int FontinfoId,
|
||||
FLOAT32 Threshold);
|
||||
void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
|
||||
int AdaptableWord(TWERD *Word,
|
||||
|
@ -374,7 +374,7 @@ int ExtractIntFeat(TBLOB *Blob,
|
||||
// made by vector <X, Y> as measured counterclockwise from <-1, 0>
|
||||
// The order of the arguments follows the convention of atan2(3)
|
||||
uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X) {
|
||||
inT16 Angle;
|
||||
inT16 Angle, Atan;
|
||||
uinT16 Ratio;
|
||||
uinT32 AbsX, AbsY;
|
||||
|
||||
@ -393,26 +393,26 @@ uinT8 BinaryAnglePlusPi(inT32 Y, inT32 X) {
|
||||
Ratio = AbsX * ATAN_TABLE_SIZE / AbsY;
|
||||
if (Ratio >= ATAN_TABLE_SIZE)
|
||||
Ratio = ATAN_TABLE_SIZE - 1;
|
||||
Angle = AtanTable[Ratio];
|
||||
Atan = AtanTable[Ratio];
|
||||
if (X >= 0)
|
||||
if (Y >= 0)
|
||||
if (AbsX > AbsY)
|
||||
Angle = Angle;
|
||||
Angle = Atan;
|
||||
else
|
||||
Angle = 64 - Angle;
|
||||
Angle = 64 - Atan;
|
||||
else if (AbsX > AbsY)
|
||||
Angle = 256 - Angle;
|
||||
Angle = 256 - Atan;
|
||||
else
|
||||
Angle = 192 + Angle;
|
||||
Angle = 192 + Atan;
|
||||
else if (Y >= 0)
|
||||
if (AbsX > AbsY)
|
||||
Angle = 128 - Angle;
|
||||
Angle = 128 - Atan;
|
||||
else
|
||||
Angle = 64 + Angle;
|
||||
Angle = 64 + Atan;
|
||||
else if (AbsX > AbsY)
|
||||
Angle = 128 + Angle;
|
||||
Angle = 128 + Atan;
|
||||
else
|
||||
Angle = 192 - Angle;
|
||||
Angle = 192 - Atan;
|
||||
|
||||
/* reverse angles to match old feature extractor: Angle += PI */
|
||||
Angle += 128;
|
||||
|
@ -1162,6 +1162,9 @@ int IntegerMatcher::FindBestMatch(
|
||||
*/
|
||||
int BestMatch = 0;
|
||||
int Best2Match = 0;
|
||||
assert(ClassTemplate->NumConfigs > 0);
|
||||
Result->Config = 0;
|
||||
Result->Config2 = 0;
|
||||
|
||||
/* Find best match */
|
||||
for (int ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
|
||||
|
@ -45,23 +45,21 @@
|
||||
#endif
|
||||
|
||||
/* match debug display constants*/
|
||||
#define DISPLAY_OFFSET (0.5 * INT_CHAR_NORM_RANGE)
|
||||
#define PROTO_PRUNER_SCALE (4.0)
|
||||
|
||||
#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET)
|
||||
#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET)
|
||||
#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET)
|
||||
#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET)
|
||||
#define INT_DESCENDER (0.0 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_BASELINE (0.25 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_XHEIGHT (0.75 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_CAPHEIGHT (1.0 * INT_CHAR_NORM_RANGE)
|
||||
|
||||
#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET)
|
||||
#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE - DISPLAY_OFFSET)
|
||||
#define INT_XCENTER (0.5 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_YCENTER (0.5 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_XRADIUS (0.2 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_YRADIUS (0.2 * INT_CHAR_NORM_RANGE)
|
||||
#define INT_MIN_X (- DISPLAY_OFFSET)
|
||||
#define INT_MIN_Y (- DISPLAY_OFFSET)
|
||||
#define INT_MAX_X ( DISPLAY_OFFSET)
|
||||
#define INT_MAX_Y ( DISPLAY_OFFSET)
|
||||
#define DOUBLE_OFFSET 0.095
|
||||
#define INT_MIN_X 0
|
||||
#define INT_MIN_Y 0
|
||||
#define INT_MAX_X INT_CHAR_NORM_RANGE
|
||||
#define INT_MAX_Y INT_CHAR_NORM_RANGE
|
||||
|
||||
/** define pad used to snap near horiz/vertical protos to horiz/vertical */
|
||||
#define HV_TOLERANCE (0.0025) /* approx 0.9 degrees */
|
||||
@ -147,7 +145,7 @@ void GetCPPadsForLevel(int Level,
|
||||
FLOAT32 *SidePad,
|
||||
FLOAT32 *AnglePad);
|
||||
|
||||
C_COL GetMatchColorFor(FLOAT32 Evidence);
|
||||
ScrollView::Color GetMatchColorFor(FLOAT32 Evidence);
|
||||
|
||||
void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill);
|
||||
|
||||
@ -158,12 +156,13 @@ void InitTableFiller(FLOAT32 EndPad,
|
||||
TABLE_FILLER *Filler);
|
||||
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color);
|
||||
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
|
||||
ScrollView::Color color);
|
||||
|
||||
void RenderIntProto(void *window,
|
||||
void RenderIntProto(ScrollView *window,
|
||||
INT_CLASS Class,
|
||||
PROTO_ID ProtoId,
|
||||
C_COL Color);
|
||||
ScrollView::Color color);
|
||||
#endif
|
||||
|
||||
int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id);
|
||||
@ -488,7 +487,7 @@ void UpdateMatchDisplay() {
|
||||
** History: Thu Mar 21 15:40:19 1991, DSJ, Created.
|
||||
*/
|
||||
if (IntMatchWindow != NULL)
|
||||
c_make_current(IntMatchWindow);
|
||||
IntMatchWindow->Update();
|
||||
} /* ClearMatchDisplay */
|
||||
#endif
|
||||
|
||||
@ -650,12 +649,10 @@ void DisplayIntFeature(INT_FEATURE Feature, FLOAT32 Evidence) {
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 14:45:04 1991, DSJ, Created.
|
||||
*/
|
||||
C_COL Color;
|
||||
|
||||
Color = GetMatchColorFor(Evidence);
|
||||
RenderIntFeature(IntMatchWindow, Feature, Color);
|
||||
ScrollView::Color color = GetMatchColorFor(Evidence);
|
||||
RenderIntFeature(IntMatchWindow, Feature, color);
|
||||
if (FeatureDisplayWindow) {
|
||||
RenderIntFeature(FeatureDisplayWindow, Feature, Color);
|
||||
RenderIntFeature(FeatureDisplayWindow, Feature, color);
|
||||
}
|
||||
} /* DisplayIntFeature */
|
||||
|
||||
@ -675,12 +672,10 @@ void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, FLOAT32 Evidence) {
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 14:45:04 1991, DSJ, Created.
|
||||
*/
|
||||
C_COL Color;
|
||||
|
||||
Color = GetMatchColorFor(Evidence);
|
||||
RenderIntProto(IntMatchWindow, Class, ProtoId, Color);
|
||||
ScrollView::Color color = GetMatchColorFor(Evidence);
|
||||
RenderIntProto(IntMatchWindow, Class, ProtoId, color);
|
||||
if (ProtoDisplayWindow) {
|
||||
RenderIntProto(ProtoDisplayWindow, Class, ProtoId, Color);
|
||||
RenderIntProto(ProtoDisplayWindow, Class, ProtoId, color);
|
||||
}
|
||||
} /* DisplayIntProto */
|
||||
#endif
|
||||
@ -812,6 +807,66 @@ bool write_info(FILE* f, const FontInfo& fi) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) {
|
||||
inT32 vec_size, kern_size;
|
||||
if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
|
||||
if (swap) Reverse32(&vec_size);
|
||||
ASSERT_HOST(vec_size >= 0);
|
||||
if (vec_size == 0) return true;
|
||||
fi->init_spacing(vec_size);
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = new FontSpacingInfo();
|
||||
if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 ||
|
||||
fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 ||
|
||||
fread(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
|
||||
ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
|
||||
Reverse32(&kern_size);
|
||||
}
|
||||
if (kern_size < 0) { // indication of a NULL entry in fi->spacing_vec
|
||||
delete fs;
|
||||
continue;
|
||||
}
|
||||
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) ||
|
||||
!fs->kerned_x_gaps.DeSerialize(swap, f))) {
|
||||
return false;
|
||||
}
|
||||
fi->add_spacing(i, fs);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_spacing_info(FILE* f, const FontInfo& fi) {
|
||||
inT32 vec_size = (fi.spacing_vec == NULL) ? 0 : fi.spacing_vec->size();
|
||||
if (fwrite(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
|
||||
inT16 x_gap_invalid = -1;
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = fi.spacing_vec->get(i);
|
||||
inT32 kern_size = (fs == NULL) ? -1 : fs->kerned_x_gaps.size();
|
||||
if (fs == NULL) {
|
||||
if (fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
|
||||
fwrite(&(x_gap_invalid), sizeof(x_gap_invalid), 1, f) != 1 ||
|
||||
fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (fwrite(&(fs->x_gap_before), sizeof(fs->x_gap_before), 1, f) != 1 ||
|
||||
fwrite(&(fs->x_gap_after), sizeof(fs->x_gap_after), 1, f) != 1 ||
|
||||
fwrite(&kern_size, sizeof(kern_size), 1, f) != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (kern_size > 0 && (!fs->kerned_unichar_ids.Serialize(f) ||
|
||||
!fs->kerned_x_gaps.Serialize(f))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_set(FILE* f, FontSet* fs, bool swap) {
|
||||
if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
|
||||
if (swap)
|
||||
@ -1130,6 +1185,11 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
|
||||
}
|
||||
if (version_id >= 4) {
|
||||
this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap);
|
||||
if (version_id >= 5) {
|
||||
this->fontinfo_table_.read(File,
|
||||
NewPermanentTessCallback(read_spacing_info),
|
||||
swap);
|
||||
}
|
||||
this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap);
|
||||
}
|
||||
|
||||
@ -1156,52 +1216,18 @@ void Classify::ShowMatchDisplay() {
|
||||
** Exceptions: none
|
||||
** History: Thu Mar 21 15:47:33 1991, DSJ, Created.
|
||||
*/
|
||||
void *window;
|
||||
/* Size of drawable */
|
||||
InitIntMatchWindowIfReqd();
|
||||
c_clear_window(IntMatchWindow);
|
||||
if (ProtoDisplayWindow) {
|
||||
c_clear_window(ProtoDisplayWindow);
|
||||
ProtoDisplayWindow->Clear();
|
||||
}
|
||||
if (FeatureDisplayWindow) {
|
||||
c_clear_window(FeatureDisplayWindow);
|
||||
FeatureDisplayWindow->Clear();
|
||||
}
|
||||
ClearFeatureSpaceWindow(
|
||||
static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
|
||||
IntMatchWindow);
|
||||
|
||||
window = IntMatchWindow;
|
||||
c_line_color_index(window, Grey);
|
||||
/* Default size of drawing */
|
||||
if (classify_norm_method == baseline) {
|
||||
c_move (window, -1000.0, INT_BASELINE);
|
||||
c_draw (window, 1000.0, INT_BASELINE);
|
||||
c_move (window, -1000.0, INT_DESCENDER);
|
||||
c_draw (window, 1000.0, INT_DESCENDER);
|
||||
c_move (window, -1000.0, INT_XHEIGHT);
|
||||
c_draw (window, 1000.0, INT_XHEIGHT);
|
||||
c_move (window, -1000.0, INT_CAPHEIGHT);
|
||||
c_draw (window, 1000.0, INT_CAPHEIGHT);
|
||||
c_move (window, INT_MIN_X, -1000.0);
|
||||
c_draw (window, INT_MIN_X, 1000.0);
|
||||
c_move (window, INT_MAX_X, -1000.0);
|
||||
c_draw (window, INT_MAX_X, 1000.0);
|
||||
}
|
||||
else {
|
||||
c_move (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS);
|
||||
c_draw (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER - INT_YRADIUS);
|
||||
c_move (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
|
||||
c_draw (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
|
||||
c_move (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS);
|
||||
c_draw (window, INT_XCENTER - INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
|
||||
c_move (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER - INT_YRADIUS);
|
||||
c_draw (window, INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
|
||||
c_move(window, INT_MIN_X, INT_MIN_Y);
|
||||
c_draw(window, INT_MIN_X, INT_MAX_Y);
|
||||
c_move(window, INT_MIN_X, INT_MIN_Y);
|
||||
c_draw(window, INT_MAX_X, INT_MIN_Y);
|
||||
c_move(window, INT_MAX_X, INT_MAX_Y);
|
||||
c_draw(window, INT_MIN_X, INT_MAX_Y);
|
||||
c_move(window, INT_MAX_X, INT_MAX_Y);
|
||||
c_draw(window, INT_MAX_X, INT_MIN_Y);
|
||||
}
|
||||
IntMatchWindow->ZoomToRectangle(INT_MIN_X, INT_MIN_Y,
|
||||
INT_MAX_X, INT_MAX_Y);
|
||||
if (ProtoDisplayWindow) {
|
||||
@ -1213,6 +1239,29 @@ void Classify::ShowMatchDisplay() {
|
||||
INT_MAX_X, INT_MAX_Y);
|
||||
}
|
||||
} /* ShowMatchDisplay */
|
||||
|
||||
// Clears the given window and draws the featurespace guides for the
|
||||
// appropriate normalization method.
|
||||
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window) {
|
||||
window->Clear();
|
||||
|
||||
window->Pen(ScrollView::GREY);
|
||||
// Draw the feature space limit rectangle.
|
||||
window->Rectangle(0, 0, INT_MAX_X, INT_MAX_Y);
|
||||
if (norm_method == baseline) {
|
||||
window->SetCursor(0, INT_DESCENDER);
|
||||
window->DrawTo(INT_MAX_X, INT_DESCENDER);
|
||||
window->SetCursor(0, INT_BASELINE);
|
||||
window->DrawTo(INT_MAX_X, INT_BASELINE);
|
||||
window->SetCursor(0, INT_XHEIGHT);
|
||||
window->DrawTo(INT_MAX_X, INT_XHEIGHT);
|
||||
window->SetCursor(0, INT_CAPHEIGHT);
|
||||
window->DrawTo(INT_MAX_X, INT_CAPHEIGHT);
|
||||
} else {
|
||||
window->Rectangle(INT_XCENTER - INT_XRADIUS, INT_YCENTER - INT_YRADIUS,
|
||||
INT_XCENTER + INT_XRADIUS, INT_YCENTER + INT_YRADIUS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
@ -1233,7 +1282,7 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
|
||||
int i, j;
|
||||
INT_CLASS Class;
|
||||
int unicharset_size = target_unicharset.size();
|
||||
int version_id = -4; // When negated by the reader -1 becomes +1 etc.
|
||||
int version_id = -5; // When negated by the reader -1 becomes +1 etc.
|
||||
|
||||
if (Templates->NumClasses != unicharset_size) {
|
||||
cprintf("Warning: executing WriteIntTemplates() with %d classes in"
|
||||
@ -1283,6 +1332,8 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
|
||||
|
||||
/* Write the fonts info tables */
|
||||
this->fontinfo_table_.write(File, NewPermanentTessCallback(write_info));
|
||||
this->fontinfo_table_.write(File,
|
||||
NewPermanentTessCallback(write_spacing_info));
|
||||
this->fontset_table_.write(File, NewPermanentTessCallback(write_set));
|
||||
} /* WriteIntTemplates */
|
||||
} // namespace tesseract
|
||||
@ -1588,7 +1639,7 @@ void GetCPPadsForLevel(int Level,
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
C_COL GetMatchColorFor(FLOAT32 Evidence) {
|
||||
ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) {
|
||||
/*
|
||||
** Parameters:
|
||||
** Evidence evidence value to return color for
|
||||
@ -1603,13 +1654,13 @@ C_COL GetMatchColorFor(FLOAT32 Evidence) {
|
||||
assert (Evidence <= 1.0);
|
||||
|
||||
if (Evidence >= 0.90)
|
||||
return White;
|
||||
return ScrollView::WHITE;
|
||||
else if (Evidence >= 0.75)
|
||||
return Green;
|
||||
return ScrollView::GREEN;
|
||||
else if (Evidence >= 0.50)
|
||||
return Red;
|
||||
return ScrollView::RED;
|
||||
else
|
||||
return Blue;
|
||||
return ScrollView::BLUE;
|
||||
} /* GetMatchColorFor */
|
||||
|
||||
|
||||
@ -1839,7 +1890,8 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad,
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) {
|
||||
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
|
||||
ScrollView::Color color) {
|
||||
/*
|
||||
** Parameters:
|
||||
** ShapeList shape list to add feature rendering to
|
||||
@ -1853,20 +1905,27 @@ void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) {
|
||||
*/
|
||||
FLOAT32 X, Y, Dx, Dy, Length;
|
||||
|
||||
c_line_color_index(window, Color);
|
||||
window->Pen(color);
|
||||
assert(Feature != NULL);
|
||||
assert(Color != 0);
|
||||
assert(color != 0);
|
||||
|
||||
X = Feature->X - DISPLAY_OFFSET;
|
||||
Y = Feature->Y - DISPLAY_OFFSET;
|
||||
X = Feature->X;
|
||||
Y = Feature->Y;
|
||||
Length = GetPicoFeatureLength() * 0.7 * INT_CHAR_NORM_RANGE;
|
||||
Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * PI);
|
||||
Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * PI);
|
||||
// The -PI has no significant effect here, but the value of Theta is computed
|
||||
// using BinaryAnglePlusPi in intfx.cpp.
|
||||
Dx = (Length / 2.0) * cos((Feature->Theta / 256.0) * 2.0 * PI - PI);
|
||||
Dy = (Length / 2.0) * sin((Feature->Theta / 256.0) * 2.0 * PI - PI);
|
||||
float x_offset = Dy / 4.0;
|
||||
float y_offset = -Dx / 4.0;
|
||||
|
||||
c_move(window, X - Dx, Y - Dy);
|
||||
c_draw(window, X + Dx, Y + Dy);
|
||||
c_move(window, X - Dx - Dy * DOUBLE_OFFSET, Y - Dy + Dx * DOUBLE_OFFSET);
|
||||
c_draw(window, X + Dx - Dy * DOUBLE_OFFSET, Y + Dy + Dx * DOUBLE_OFFSET);
|
||||
window->SetCursor(X - Dx, Y - Dy);
|
||||
window->DrawTo(X + Dx, Y + Dy);
|
||||
// Draw another copy of the feature offset perpendicualar to its direction.
|
||||
X += x_offset;
|
||||
Y += y_offset;
|
||||
window->SetCursor(X - Dx, Y - Dy);
|
||||
window->DrawTo(X + Dx, Y + Dy);
|
||||
} /* RenderIntFeature */
|
||||
|
||||
|
||||
@ -1887,10 +1946,10 @@ void RenderIntFeature(void *window, INT_FEATURE Feature, C_COL Color) {
|
||||
* @note Exceptions: none
|
||||
* @note History: Thu Mar 21 10:21:09 1991, DSJ, Created.
|
||||
*/
|
||||
void RenderIntProto(void *window,
|
||||
void RenderIntProto(ScrollView *window,
|
||||
INT_CLASS Class,
|
||||
PROTO_ID ProtoId,
|
||||
C_COL Color) {
|
||||
ScrollView::Color color) {
|
||||
PROTO_SET ProtoSet;
|
||||
INT_PROTO Proto;
|
||||
int ProtoSetIndex;
|
||||
@ -1904,8 +1963,8 @@ void RenderIntProto(void *window,
|
||||
assert(ProtoId >= 0);
|
||||
assert(Class != NULL);
|
||||
assert(ProtoId < Class->NumProtos);
|
||||
assert(Color != 0);
|
||||
c_line_color_index(window, Color);
|
||||
assert(color != 0);
|
||||
window->Pen(color);
|
||||
|
||||
ProtoSet = Class->ProtoSets[SetForProto(ProtoId)];
|
||||
ProtoSetIndex = IndexForProto(ProtoId);
|
||||
@ -1927,13 +1986,15 @@ void RenderIntProto(void *window,
|
||||
UpdateRange(Bucket, &Ymin, &Ymax);
|
||||
}
|
||||
}
|
||||
X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET;
|
||||
Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE - DISPLAY_OFFSET;
|
||||
Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * PI);
|
||||
Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * PI);
|
||||
X = (Xmin + Xmax + 1) / 2.0 * PROTO_PRUNER_SCALE;
|
||||
Y = (Ymin + Ymax + 1) / 2.0 * PROTO_PRUNER_SCALE;
|
||||
// The -PI has no significant effect here, but the value of Theta is computed
|
||||
// using BinaryAnglePlusPi in intfx.cpp.
|
||||
Dx = (Length / 2.0) * cos((Proto->Angle / 256.0) * 2.0 * PI - PI);
|
||||
Dy = (Length / 2.0) * sin((Proto->Angle / 256.0) * 2.0 * PI - PI);
|
||||
|
||||
c_move(window, X - Dx, Y - Dy);
|
||||
c_draw(window, X + Dx, Y + Dy);
|
||||
window->SetCursor(X - Dx, Y - Dy);
|
||||
window->DrawTo(X + Dx, Y + Dy);
|
||||
} /* RenderIntProto */
|
||||
#endif
|
||||
|
||||
@ -1977,9 +2038,7 @@ int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id) {
|
||||
*/
|
||||
void InitIntMatchWindowIfReqd() {
|
||||
if (IntMatchWindow == NULL) {
|
||||
IntMatchWindow = c_create_window("IntMatchWindow", 50, 200,
|
||||
520, 520,
|
||||
-130.0, 130.0, -130.0, 130.0);
|
||||
IntMatchWindow = CreateFeatureSpaceWindow("IntMatchWindow", 50, 200);
|
||||
SVMenuNode* popup_menu = new SVMenuNode();
|
||||
|
||||
popup_menu->AddChild("Debug Adapted classes", IDA_ADAPTIVE,
|
||||
@ -1998,10 +2057,9 @@ void InitIntMatchWindowIfReqd() {
|
||||
*/
|
||||
void InitProtoDisplayWindowIfReqd() {
|
||||
if (ProtoDisplayWindow == NULL) {
|
||||
ProtoDisplayWindow = c_create_window("ProtoDisplayWindow", 50, 200,
|
||||
520, 520,
|
||||
-130.0, 130.0, -130.0, 130.0);
|
||||
}
|
||||
ProtoDisplayWindow = CreateFeatureSpaceWindow("ProtoDisplayWindow",
|
||||
550, 200);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2010,8 +2068,13 @@ void InitProtoDisplayWindowIfReqd() {
|
||||
*/
|
||||
void InitFeatureDisplayWindowIfReqd() {
|
||||
if (FeatureDisplayWindow == NULL) {
|
||||
FeatureDisplayWindow = c_create_window("FeatureDisplayWindow", 50, 200,
|
||||
520, 520,
|
||||
-130.0, 130.0, -130.0, 130.0);
|
||||
FeatureDisplayWindow = CreateFeatureSpaceWindow("FeatureDisplayWindow",
|
||||
50, 700);
|
||||
}
|
||||
}
|
||||
|
||||
// Creates a window of the appropriate size for displaying elements
|
||||
// in feature space.
|
||||
ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos) {
|
||||
return new ScrollView(name, xpos, ypos, 520, 520, 260, 260, true);
|
||||
}
|
||||
|
@ -21,9 +21,12 @@
|
||||
/**----------------------------------------------------------------------------
|
||||
Include Files and Type Defines
|
||||
----------------------------------------------------------------------------**/
|
||||
#include "genericvector.h"
|
||||
#include "matchdefs.h"
|
||||
#include "mfoutline.h"
|
||||
#include "protos.h"
|
||||
#include "callcpp.h"
|
||||
#include "scrollview.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
/* define order of params in pruners */
|
||||
@ -100,18 +103,70 @@ PROTO_SET_STRUCT, *PROTO_SET;
|
||||
|
||||
typedef uinT32 CONFIG_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][4];
|
||||
|
||||
// Struct for information about spacing between characters in a particular font.
|
||||
struct FontSpacingInfo {
|
||||
inT16 x_gap_before;
|
||||
inT16 x_gap_after;
|
||||
GenericVector<UNICHAR_ID> kerned_unichar_ids;
|
||||
GenericVector<inT16> kerned_x_gaps;
|
||||
};
|
||||
|
||||
/*
|
||||
* font_properties contains properties about boldness, italicness, fixed pitch,
|
||||
* serif, fraktur
|
||||
*/
|
||||
struct FontInfo {
|
||||
char* name;
|
||||
uinT32 properties;
|
||||
bool is_italic() { return properties & 1; }
|
||||
bool is_bold() { return (properties & 2) != 0; }
|
||||
bool is_fixed_pitch() { return (properties & 4) != 0; }
|
||||
bool is_serif() { return (properties & 8) != 0; }
|
||||
bool is_fraktur() { return (properties & 16) != 0; }
|
||||
FontInfo() : name(NULL), spacing_vec(NULL) {}
|
||||
~FontInfo() {}
|
||||
// Reserves unicharset_size spots in spacing_vec.
|
||||
void init_spacing(int unicharset_size) {
|
||||
spacing_vec = new GenericVector<FontSpacingInfo *>();
|
||||
spacing_vec->init_to_size(unicharset_size, NULL);
|
||||
}
|
||||
// Adds the given pointer to FontSpacingInfo to spacing_vec member
|
||||
// (FontInfo class takes ownership of the pointer).
|
||||
// Note: init_spacing should be called before calling this function.
|
||||
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
|
||||
ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id);
|
||||
(*spacing_vec)[uch_id] = spacing_info;
|
||||
}
|
||||
|
||||
// Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
|
||||
const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
|
||||
return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ?
|
||||
NULL : (*spacing_vec)[uch_id];
|
||||
}
|
||||
|
||||
// Fills spacing with the value of the x gap expected between the two given
|
||||
// UNICHAR_IDs. Returns true on success.
|
||||
bool get_spacing(UNICHAR_ID prev_uch_id,
|
||||
UNICHAR_ID uch_id,
|
||||
int *spacing) const {
|
||||
const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
|
||||
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
|
||||
if (prev_fsi == NULL || fsi == NULL) return false;
|
||||
int i = 0;
|
||||
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
|
||||
if (prev_fsi->kerned_unichar_ids[i] == uch_id) break;
|
||||
}
|
||||
if (i < prev_fsi->kerned_unichar_ids.size()) {
|
||||
*spacing = prev_fsi->kerned_x_gaps[i];
|
||||
} else {
|
||||
*spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_italic() const { return properties & 1; }
|
||||
bool is_bold() const { return (properties & 2) != 0; }
|
||||
bool is_fixed_pitch() const { return (properties & 4) != 0; }
|
||||
bool is_serif() const { return (properties & 8) != 0; }
|
||||
bool is_fraktur() const { return (properties & 16) != 0; }
|
||||
|
||||
char* name;
|
||||
uinT32 properties;
|
||||
// Horizontal spacing between characters (indexed by UNICHAR_ID).
|
||||
GenericVector<FontSpacingInfo *> *spacing_vec;
|
||||
};
|
||||
|
||||
// Every class (character) owns a FontSet that represents all the fonts that can
|
||||
@ -164,6 +219,10 @@ struct INT_FEATURE_STRUCT
|
||||
uinT8 Y;
|
||||
uinT8 Theta;
|
||||
inT8 CP_misses;
|
||||
|
||||
void print() const {
|
||||
tprintf("(%d,%d):%d\n", X, Y, Theta);
|
||||
}
|
||||
};
|
||||
|
||||
typedef INT_FEATURE_STRUCT *INT_FEATURE;
|
||||
@ -253,7 +312,17 @@ void free_int_templates(INT_TEMPLATES templates);
|
||||
|
||||
void ShowMatchDisplay();
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Clears the given window and draws the featurespace guides for the
|
||||
// appropriate normalization method.
|
||||
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView* window);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
|
||||
ScrollView::Color color);
|
||||
|
||||
void InitIntMatchWindowIfReqd();
|
||||
|
||||
@ -261,4 +330,8 @@ void InitProtoDisplayWindowIfReqd();
|
||||
|
||||
void InitFeatureDisplayWindowIfReqd();
|
||||
|
||||
// Creates a window of the appropriate size for displaying elements
|
||||
// in feature space.
|
||||
ScrollView* CreateFeatureSpaceWindow(const char* name, int xpos, int ypos);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user