Simplify function LoadTrainingData and fix mastertrainer_test

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-09-17 08:24:50 +02:00
parent 2b4c3599de
commit 638045133f
7 changed files with 10 additions and 37 deletions

View File

@ -102,7 +102,7 @@ int main(int argc, char **argv) {
tesseract::CheckSharedLibraryVersion();
ParseArguments(&argc, &argv);
std::string file_prefix;
auto trainer = tesseract::LoadTrainingData(argc, argv, false, nullptr, file_prefix);
auto trainer = tesseract::LoadTrainingData(argv + 1, false, nullptr, file_prefix);
tesseract::TessBaseAPI *api;
// Decode the classifier string.
tesseract::ShapeClassifier *shape_classifier =

View File

@ -106,7 +106,6 @@ int main(int argc, char *argv[]) {
// Set the global Config parameters before parsing the command line.
Config = CNConfig;
const char *PageName;
LIST CharList = NIL_LIST;
CLUSTERER *Clusterer = nullptr;
LIST ProtoList = NIL_LIST;
@ -118,8 +117,7 @@ int main(int argc, char *argv[]) {
ParseArguments(&argc, &argv);
int num_fonts = 0;
int tessoptind = 1;
while ((PageName = GetNextFilename(argc, argv, tessoptind)) != nullptr) {
for (const char *PageName = *++argv; PageName != nullptr; PageName = *++argv) {
printf("Reading %s ...\n", PageName);
FILE *TrainingPage = fopen(PageName, "rb");
ASSERT_HOST(TrainingPage);

View File

@ -197,7 +197,7 @@ void WriteShapeTable(const std::string &file_prefix, const ShapeTable &shape_tab
* If shape_table is not nullptr, but failed to load, make a fake flat one,
* as shape clustering was not run.
*/
std::unique_ptr<MasterTrainer> LoadTrainingData(int argc, const char *const *argv, bool replication,
std::unique_ptr<MasterTrainer> LoadTrainingData(const char *const *filelist, bool replication,
ShapeTable **shape_table, std::string &file_prefix) {
InitFeatureDefs(&feature_defs);
InitIntegerFX();
@ -236,10 +236,8 @@ std::unique_ptr<MasterTrainer> LoadTrainingData(int argc, const char *const *arg
}
}
trainer->SetFeatureSpace(fs);
const char *page_name;
// Load training data from .tr files on the command line.
int tessoptind = 1;
while ((page_name = GetNextFilename(argc, argv, tessoptind)) != nullptr) {
// Load training data from .tr files in filelist (terminated by nullptr).
for (const char *page_name = *filelist++; page_name != nullptr; page_name = *filelist++) {
tprintf("Reading %s ...\n", page_name);
trainer->ReadTrainingSamples(page_name, feature_defs, false);
@ -291,25 +289,6 @@ std::unique_ptr<MasterTrainer> LoadTrainingData(int argc, const char *const *arg
return trainer;
}
/*---------------------------------------------------------------------------*/
/**
* This routine returns the next command line argument. If
* there are no remaining command line arguments, it returns
* nullptr. This routine should only be called after all option
* arguments have been parsed and removed with ParseArguments.
*
* Globals:
* - tessoptind defined by tessopt sys call
* @return Next command line argument or nullptr.
*/
const char *GetNextFilename(int argc, const char *const *argv, int &tessoptind) {
if (tessoptind < argc) {
return argv[tessoptind++];
} else {
return nullptr;
}
} /* GetNextFilename */
/*---------------------------------------------------------------------------*/
/**
* This routine searches through a list of labeled lists to find

View File

@ -121,12 +121,9 @@ void WriteShapeTable(const std::string &file_prefix, const ShapeTable &shape_tab
// If shape_table is not nullptr, but failed to load, make a fake flat one,
// as shape clustering was not run.
TESS_COMMON_TRAINING_API
std::unique_ptr<MasterTrainer> LoadTrainingData(int argc, const char *const *argv, bool replication,
std::unique_ptr<MasterTrainer> LoadTrainingData(const char *const *filelist, bool replication,
ShapeTable **shape_table, std::string &file_prefix);
TESS_COMMON_TRAINING_API
const char *GetNextFilename(int argc, const char *const *argv, int &tessoptind);
LABELEDLIST FindList(tesseract::LIST List, const std::string &Label);
TESS_COMMON_TRAINING_API

View File

@ -198,7 +198,7 @@ int main(int argc, char **argv) {
ShapeTable *shape_table = nullptr;
std::string file_prefix;
// Load the training data.
auto trainer = tesseract::LoadTrainingData(argc, argv, false, &shape_table, file_prefix);
auto trainer = tesseract::LoadTrainingData(argv + 1, false, &shape_table, file_prefix);
if (trainer == nullptr) {
return 1; // Failed.
}

View File

@ -47,7 +47,7 @@ int main(int argc, char **argv) {
ParseArguments(&argc, &argv);
std::string file_prefix;
auto trainer = tesseract::LoadTrainingData(argc, argv, false, nullptr, file_prefix);
auto trainer = tesseract::LoadTrainingData(argv + 1, false, nullptr, file_prefix);
if (!trainer) {
return 1;

View File

@ -173,12 +173,11 @@ protected:
FLAGS_X = TestDataNameToPath("eng.xheights").c_str();
FLAGS_U = TestDataNameToPath("eng.unicharset").c_str();
std::string tr_file_name(TestDataNameToPath("eng.Arial.exp0.tr"));
const char *argv[] = {tr_file_name.c_str()};
int argc = 1;
const char *filelist[] = {tr_file_name.c_str(), nullptr};
std::string file_prefix;
delete shape_table_;
shape_table_ = nullptr;
master_trainer_ = LoadTrainingData(argc, argv, false, &shape_table_, file_prefix);
master_trainer_ = LoadTrainingData(filelist, false, &shape_table_, file_prefix);
EXPECT_TRUE(master_trainer_ != nullptr);
EXPECT_TRUE(shape_table_ != nullptr);
}