mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-08 02:12:40 +08:00
doxygen training/commontraining.cpp
This commit is contained in:
parent
cc5f466ac6
commit
cce044fa66
@ -73,18 +73,17 @@ DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence,
|
|||||||
DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence,
|
DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence,
|
||||||
"Desired confidence in prototypes created");
|
"Desired confidence in prototypes created");
|
||||||
|
|
||||||
/*
|
/**
|
||||||
** Parameters:
|
* This routine parses the command line arguments that were
|
||||||
** argc number of command line arguments to parse
|
* passed to the program and ses them to set relevant
|
||||||
** argv command line arguments
|
* training-related global parameters
|
||||||
** Globals:
|
*
|
||||||
** Config current clustering parameters
|
* Globals:
|
||||||
** Operation:
|
* - Config current clustering parameters
|
||||||
** This routine parses the command line arguments that were
|
* @param argc number of command line arguments to parse
|
||||||
** passed to the program and ses them to set relevant
|
* @parm argv command line arguments
|
||||||
** training-related global parameters
|
* @return none
|
||||||
** Return: none
|
* @note Exceptions: Illegal options terminate the program.
|
||||||
** Exceptions: Illegal options terminate the program.
|
|
||||||
*/
|
*/
|
||||||
void ParseArguments(int* argc, char ***argv) {
|
void ParseArguments(int* argc, char ***argv) {
|
||||||
STRING usage;
|
STRING usage;
|
||||||
@ -158,19 +157,21 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates a MasterTraininer and loads the training data into it:
|
/**
|
||||||
// Initializes feature_defs and IntegerFX.
|
* Creates a MasterTraininer and loads the training data into it:
|
||||||
// Loads the shape_table if shape_table != NULL.
|
* Initializes feature_defs and IntegerFX.
|
||||||
// Loads initial unicharset from -U command-line option.
|
* Loads the shape_table if shape_table != NULL.
|
||||||
// If FLAGS_T is set, loads the majority of data from there, else:
|
* Loads initial unicharset from -U command-line option.
|
||||||
// Loads font info from -F option.
|
* If FLAGS_T is set, loads the majority of data from there, else:
|
||||||
// Loads xheights from -X option.
|
* - Loads font info from -F option.
|
||||||
// Loads samples from .tr files in remaining command-line args.
|
* - Loads xheights from -X option.
|
||||||
// Deletes outliers and computes canonical samples.
|
* - Loads samples from .tr files in remaining command-line args.
|
||||||
// If FLAGS_output_trainer is set, saves the trainer for future use.
|
* - Deletes outliers and computes canonical samples.
|
||||||
// Computes canonical and cloud features.
|
* - If FLAGS_output_trainer is set, saves the trainer for future use.
|
||||||
// If shape_table is not NULL, but failed to load, make a fake flat one,
|
* Computes canonical and cloud features.
|
||||||
// as shape clustering was not run.
|
* If shape_table is not NULL, but failed to load, make a fake flat one,
|
||||||
|
* as shape clustering was not run.
|
||||||
|
*/
|
||||||
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
||||||
bool replication,
|
bool replication,
|
||||||
ShapeTable** shape_table,
|
ShapeTable** shape_table,
|
||||||
@ -294,20 +295,19 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
|
|||||||
} // namespace tesseract.
|
} // namespace tesseract.
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
const char *GetNextFilename(int argc, const char* const * argv) {
|
/**
|
||||||
/*
|
* This routine returns the next command line argument. If
|
||||||
** Parameters: none
|
* there are no remaining command line arguments, it returns
|
||||||
** Globals:
|
* NULL. This routine should only be called after all option
|
||||||
** tessoptind defined by tessopt sys call
|
* arguments have been parsed and removed with ParseArguments.
|
||||||
** Operation:
|
*
|
||||||
** This routine returns the next command line argument. If
|
* Globals:
|
||||||
** there are no remaining command line arguments, it returns
|
* - tessoptind defined by tessopt sys call
|
||||||
** NULL. This routine should only be called after all option
|
* @return Next command line argument or NULL.
|
||||||
** arguments have been parsed and removed with ParseArguments.
|
* @note Exceptions: none
|
||||||
** Return: Next command line argument or NULL.
|
* @note History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 09:34:12 1989, DSJ, Created.
|
|
||||||
*/
|
*/
|
||||||
|
const char *GetNextFilename(int argc, const char* const * argv) {
|
||||||
if (tessoptind < argc)
|
if (tessoptind < argc)
|
||||||
return argv[tessoptind++];
|
return argv[tessoptind++];
|
||||||
else
|
else
|
||||||
@ -317,24 +317,20 @@ const char *GetNextFilename(int argc, const char* const * argv) {
|
|||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
/**
|
||||||
|
* This routine searches thru a list of labeled lists to find
|
||||||
|
* a list with the specified label. If a matching labeled list
|
||||||
|
* cannot be found, NULL is returned.
|
||||||
|
* @param List list to search
|
||||||
|
* @param Label label to search for
|
||||||
|
* @return Labeled list with the specified Label or NULL.
|
||||||
|
* @note Globals: none
|
||||||
|
* @note Exceptions: none
|
||||||
|
* @note History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
||||||
|
*/
|
||||||
LABELEDLIST FindList (
|
LABELEDLIST FindList (
|
||||||
LIST List,
|
LIST List,
|
||||||
char *Label)
|
char *Label)
|
||||||
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** List list to search
|
|
||||||
** Label label to search for
|
|
||||||
** Globals: none
|
|
||||||
** Operation:
|
|
||||||
** This routine searches thru a list of labeled lists to find
|
|
||||||
** a list with the specified label. If a matching labeled list
|
|
||||||
** cannot be found, NULL is returned.
|
|
||||||
** Return: Labeled list with the specified Label or NULL.
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 15:57:41 1989, DSJ, Created.
|
|
||||||
*/
|
|
||||||
|
|
||||||
{
|
{
|
||||||
LABELEDLIST LabeledList;
|
LABELEDLIST LabeledList;
|
||||||
|
|
||||||
@ -349,21 +345,17 @@ LABELEDLIST FindList (
|
|||||||
} /* FindList */
|
} /* FindList */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
/**
|
||||||
|
* This routine allocates a new, empty labeled list and gives
|
||||||
|
* it the specified label.
|
||||||
|
* @param Label label for new list
|
||||||
|
* @return New, empty labeled list.
|
||||||
|
* @note Globals: none
|
||||||
|
* @note Exceptions: none
|
||||||
|
* @note History: Fri Aug 18 16:08:46 1989, DSJ, Created.
|
||||||
|
*/
|
||||||
LABELEDLIST NewLabeledList (
|
LABELEDLIST NewLabeledList (
|
||||||
const char *Label)
|
const char *Label)
|
||||||
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** Label label for new list
|
|
||||||
** Globals: none
|
|
||||||
** Operation:
|
|
||||||
** This routine allocates a new, empty labeled list and gives
|
|
||||||
** it the specified label.
|
|
||||||
** Return: New, empty labeled list.
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 16:08:46 1989, DSJ, Created.
|
|
||||||
*/
|
|
||||||
|
|
||||||
{
|
{
|
||||||
LABELEDLIST LabeledList;
|
LABELEDLIST LabeledList;
|
||||||
|
|
||||||
@ -380,25 +372,24 @@ LABELEDLIST NewLabeledList (
|
|||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
// TODO(rays) This is now used only by cntraining. Convert cntraining to use
|
// TODO(rays) This is now used only by cntraining. Convert cntraining to use
|
||||||
// the new method or get rid of it entirely.
|
// the new method or get rid of it entirely.
|
||||||
|
/**
|
||||||
|
* This routine reads training samples from a file and
|
||||||
|
* places them into a data structure which organizes the
|
||||||
|
* samples by FontName and CharName. It then returns this
|
||||||
|
* data structure.
|
||||||
|
* @param file open text file to read samples from
|
||||||
|
* @return none
|
||||||
|
* @note Globals: none
|
||||||
|
* @note Exceptions: none
|
||||||
|
* @note History:
|
||||||
|
* - Fri Aug 18 13:11:39 1989, DSJ, Created.
|
||||||
|
* - Tue May 17 1998 simplifications to structure, illiminated
|
||||||
|
* font, and feature specification levels of structure.
|
||||||
|
*/
|
||||||
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
||||||
const char *feature_name, int max_samples,
|
const char *feature_name, int max_samples,
|
||||||
UNICHARSET* unicharset,
|
UNICHARSET* unicharset,
|
||||||
FILE* file, LIST* training_samples) {
|
FILE* file, LIST* training_samples) {
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** file open text file to read samples from
|
|
||||||
** Globals: none
|
|
||||||
** Operation:
|
|
||||||
** This routine reads training samples from a file and
|
|
||||||
** places them into a data structure which organizes the
|
|
||||||
** samples by FontName and CharName. It then returns this
|
|
||||||
** data structure.
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 13:11:39 1989, DSJ, Created.
|
|
||||||
** Tue May 17 1998 simplifications to structure, illiminated
|
|
||||||
** font, and feature specification levels of structure.
|
|
||||||
*/
|
|
||||||
char buffer[2048];
|
char buffer[2048];
|
||||||
char unichar[UNICHAR_LEN + 1];
|
char unichar[UNICHAR_LEN + 1];
|
||||||
LABELEDLIST char_sample;
|
LABELEDLIST char_sample;
|
||||||
@ -450,18 +441,16 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
|||||||
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
void FreeTrainingSamples(LIST CharList) {
|
/**
|
||||||
/*
|
* This routine deallocates all of the space allocated to
|
||||||
** Parameters:
|
* the specified list of training samples.
|
||||||
** FontList list of all fonts in document
|
* @param FontList list of all fonts in document
|
||||||
** Globals: none
|
* @return none
|
||||||
** Operation:
|
* @note Globals: none
|
||||||
** This routine deallocates all of the space allocated to
|
* @note Exceptions: none
|
||||||
** the specified list of training samples.
|
* @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
|
||||||
*/
|
*/
|
||||||
|
void FreeTrainingSamples(LIST CharList) {
|
||||||
LABELEDLIST char_sample;
|
LABELEDLIST char_sample;
|
||||||
FEATURE_SET FeatureSet;
|
FEATURE_SET FeatureSet;
|
||||||
LIST FeatureList;
|
LIST FeatureList;
|
||||||
@ -480,45 +469,37 @@ void FreeTrainingSamples(LIST CharList) {
|
|||||||
} /* FreeTrainingSamples */
|
} /* FreeTrainingSamples */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
void FreeLabeledList(LABELEDLIST LabeledList) {
|
/**
|
||||||
/*
|
* This routine deallocates all of the memory consumed by
|
||||||
** Parameters:
|
* a labeled list. It does not free any memory which may be
|
||||||
** LabeledList labeled list to be freed
|
* consumed by the items in the list.
|
||||||
** Globals: none
|
* @param LabeledList labeled list to be freed
|
||||||
** Operation:
|
* @note Globals: none
|
||||||
** This routine deallocates all of the memory consumed by
|
* @return none
|
||||||
** a labeled list. It does not free any memory which may be
|
* @note Exceptions: none
|
||||||
** consumed by the items in the list.
|
* @note History: Fri Aug 18 17:52:45 1989, DSJ, Created.
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 17:52:45 1989, DSJ, Created.
|
|
||||||
*/
|
*/
|
||||||
|
void FreeLabeledList(LABELEDLIST LabeledList) {
|
||||||
destroy(LabeledList->List);
|
destroy(LabeledList->List);
|
||||||
free(LabeledList->Label);
|
free(LabeledList->Label);
|
||||||
free(LabeledList);
|
free(LabeledList);
|
||||||
} /* FreeLabeledList */
|
} /* FreeLabeledList */
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
/**
|
||||||
|
* This routine reads samples from a LABELEDLIST and enters
|
||||||
|
* those samples into a clusterer data structure. This
|
||||||
|
* data structure is then returned to the caller.
|
||||||
|
* @param char_sample: LABELEDLIST that holds all the feature information for a
|
||||||
|
* given character.
|
||||||
|
* @return Pointer to new clusterer data structure.
|
||||||
|
* @note Globals: None
|
||||||
|
* @note Exceptions: None
|
||||||
|
* @note History: 8/16/89, DSJ, Created.
|
||||||
|
*/
|
||||||
CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs,
|
||||||
LABELEDLIST char_sample,
|
LABELEDLIST char_sample,
|
||||||
const char* program_feature_type) {
|
const char* program_feature_type) {
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** char_sample: LABELEDLIST that holds all the feature information for a
|
|
||||||
** given character.
|
|
||||||
** Globals:
|
|
||||||
** None
|
|
||||||
** Operation:
|
|
||||||
** This routine reads samples from a LABELEDLIST and enters
|
|
||||||
** those samples into a clusterer data structure. This
|
|
||||||
** data structure is then returned to the caller.
|
|
||||||
** Return:
|
|
||||||
** Pointer to new clusterer data structure.
|
|
||||||
** Exceptions:
|
|
||||||
** None
|
|
||||||
** History:
|
|
||||||
** 8/16/89, DSJ, Created.
|
|
||||||
*/
|
|
||||||
uinT16 N;
|
uinT16 N;
|
||||||
int i, j;
|
int i, j;
|
||||||
FLOAT32 *Sample = NULL;
|
FLOAT32 *Sample = NULL;
|
||||||
@ -741,21 +722,17 @@ MERGE_CLASS NewLabeledClass (
|
|||||||
} /* NewLabeledClass */
|
} /* NewLabeledClass */
|
||||||
|
|
||||||
/*-----------------------------------------------------------------------------*/
|
/*-----------------------------------------------------------------------------*/
|
||||||
|
/**
|
||||||
|
* This routine deallocates all of the space allocated to
|
||||||
|
* the specified list of training samples.
|
||||||
|
* @param FontList list of all fonts in document
|
||||||
|
* @return none
|
||||||
|
* @note Globals: none
|
||||||
|
* @note Exceptions: none
|
||||||
|
* @note History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
||||||
|
*/
|
||||||
void FreeLabeledClassList (
|
void FreeLabeledClassList (
|
||||||
LIST ClassList)
|
LIST ClassList)
|
||||||
|
|
||||||
/*
|
|
||||||
** Parameters:
|
|
||||||
** FontList list of all fonts in document
|
|
||||||
** Globals: none
|
|
||||||
** Operation:
|
|
||||||
** This routine deallocates all of the space allocated to
|
|
||||||
** the specified list of training samples.
|
|
||||||
** Return: none
|
|
||||||
** Exceptions: none
|
|
||||||
** History: Fri Aug 18 17:44:27 1989, DSJ, Created.
|
|
||||||
*/
|
|
||||||
|
|
||||||
{
|
{
|
||||||
MERGE_CLASS MergeClass;
|
MERGE_CLASS MergeClass;
|
||||||
|
|
||||||
@ -770,7 +747,7 @@ void FreeLabeledClassList (
|
|||||||
|
|
||||||
} /* FreeLabeledClassList */
|
} /* FreeLabeledClassList */
|
||||||
|
|
||||||
/** SetUpForFloat2Int **************************************************/
|
/* SetUpForFloat2Int */
|
||||||
CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
|
CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
|
||||||
LIST LabeledClassList) {
|
LIST LabeledClassList) {
|
||||||
MERGE_CLASS MergeClass;
|
MERGE_CLASS MergeClass;
|
||||||
|
Loading…
Reference in New Issue
Block a user