Merge pull request #3256 from stweil/mastertrainer_test

Fix memory leak in mastertrainer_test (fixes issue #3215)
This commit is contained in:
Egor Pugin 2021-01-23 17:27:29 +03:00 committed by GitHub
commit f8173fb8d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 48 additions and 30 deletions

View File

@ -110,7 +110,8 @@ int main(int argc, char **argv) {
tesseract::CheckSharedLibraryVersion();
ParseArguments(&argc, &argv);
STRING file_prefix;
auto [trainer,_] = tesseract::LoadTrainingData(argc, argv, false, false, &file_prefix);
auto trainer =
tesseract::LoadTrainingData(argc, argv, false, nullptr, &file_prefix);
tesseract::TessBaseAPI* api;
// Decode the classifier string.
tesseract::ShapeClassifier* shape_classifier = InitializeClassifier(

View File

@ -147,14 +147,16 @@ void ParseArguments(int* argc, char ***argv) {
namespace tesseract {
// Helper loads shape table from the given file.
std::unique_ptr<ShapeTable> LoadShapeTable(const STRING& file_prefix) {
std::unique_ptr<ShapeTable> shape_table;
ShapeTable* LoadShapeTable(const STRING& file_prefix) {
ShapeTable* shape_table = nullptr;
STRING shape_table_file = file_prefix;
shape_table_file += kShapeTableFileSuffix;
TFile shape_fp;
if (shape_fp.Open(shape_table_file.c_str(), nullptr)) {
shape_table = std::make_unique<ShapeTable>();
shape_table = new ShapeTable;
if (!shape_table->DeSerialize(&shape_fp)) {
delete shape_table;
shape_table = nullptr;
tprintf("Error: Failed to read shape table %s\n",
shape_table_file.c_str());
} else {
@ -202,10 +204,9 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table) {
* If shape_table is not nullptr, but failed to load, make a fake flat one,
* as shape clustering was not run.
*/
std::pair<std::unique_ptr<MasterTrainer>, std::unique_ptr<ShapeTable>>
LoadTrainingData(int argc, const char* const * argv,
std::unique_ptr<MasterTrainer> LoadTrainingData(int argc, const char* const * argv,
bool replication,
bool shape_analysis,
ShapeTable** shape_table,
STRING* file_prefix) {
InitFeatureDefs(&feature_defs);
InitIntegerFX();
@ -218,9 +219,12 @@ LoadTrainingData(int argc, const char* const * argv,
// a shape_table written by a previous shape clustering, then
// shape_analysis will be true, meaning that the MasterTrainer will replace
// some members of the unicharset with their fragments.
std::unique_ptr<ShapeTable> shape_table;
if (shape_analysis) {
shape_table = LoadShapeTable(*file_prefix);
bool shape_analysis = false;
if (shape_table != nullptr) {
*shape_table = LoadShapeTable(*file_prefix);
if (*shape_table != nullptr) shape_analysis = true;
} else {
shape_analysis = true;
}
auto trainer = std::make_unique<MasterTrainer>(NM_CHAR_ANISOTROPIC,
shape_analysis,
@ -283,18 +287,19 @@ LoadTrainingData(int argc, const char* const * argv,
fprintf(stderr, "Failed to save unicharset to file %s\n", FLAGS_O.c_str());
return {};
}
if (shape_analysis) {
if (shape_table != nullptr) {
// If we previously failed to load a shapetable, then shape clustering
// wasn't run so make a flat one now.
if (!shape_table) {
shape_table = std::make_unique<ShapeTable>();
trainer->SetupFlatShapeTable(shape_table.get());
if (*shape_table == nullptr) {
*shape_table = new ShapeTable;
trainer->SetupFlatShapeTable(*shape_table);
tprintf("Flat shape table summary: %s\n",
shape_table->SummaryStr().c_str());
(*shape_table)->SummaryStr().c_str());
}
shape_table->set_unicharset(trainer->unicharset());
(*shape_table)->set_unicharset(trainer->unicharset());
}
return { std::move(trainer), std::move(shape_table) };
return trainer;
}
} // namespace tesseract.

View File

@ -100,7 +100,7 @@ using MERGE_CLASS = MERGE_CLASS_NODE*;
namespace tesseract {
// Helper loads shape table from the given file.
std::unique_ptr<ShapeTable> LoadShapeTable(const STRING& file_prefix);
ShapeTable* LoadShapeTable(const STRING& file_prefix);
// Helper to write the shape_table.
TESS_COMMON_TRAINING_API
void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
@ -119,10 +119,9 @@ void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
// If shape_table is not nullptr, but failed to load, make a fake flat one,
// as shape clustering was not run.
TESS_COMMON_TRAINING_API
std::pair<std::unique_ptr<MasterTrainer>, std::unique_ptr<ShapeTable>>
LoadTrainingData(int argc, const char* const * argv,
std::unique_ptr<MasterTrainer> LoadTrainingData(int argc, const char* const * argv,
bool replication,
bool shape_analysis,
ShapeTable** shape_table,
STRING* file_prefix);
} // namespace tesseract.

View File

@ -202,11 +202,12 @@ int main (int argc, char **argv) {
ParseArguments(&argc, &argv);
ShapeTable* shape_table = nullptr;
STRING file_prefix;
// Load the training data.
auto [trainer,shape_table] = tesseract::LoadTrainingData(argc, argv,
auto trainer = tesseract::LoadTrainingData(argc, argv,
false,
true,
&shape_table,
&file_prefix);
if (trainer == nullptr) return 1; // Failed.
@ -215,7 +216,7 @@ int main (int argc, char **argv) {
// with the same list of unichars becomes a different class and the configs
// represent the different combinations of fonts.
IndexMapBiDi config_map;
SetupConfigMap(shape_table.get(), &config_map);
SetupConfigMap(shape_table, &config_map);
WriteShapeTable(file_prefix, *shape_table);
// If the shape_table is flat, then either we didn't run shape clustering, or
@ -269,6 +270,7 @@ int main (int argc, char **argv) {
}
delete [] float_classes;
FreeLabeledClassList(mf_classes);
delete shape_table;
printf("Done!\n");
if (!FLAGS_test_ch.empty()) {
// If we are displaying debug window(s), wait for the user to look at them.

View File

@ -49,7 +49,9 @@ int main(int argc, char **argv) {
ParseArguments(&argc, &argv);
STRING file_prefix;
auto [trainer,_] = tesseract::LoadTrainingData(argc, argv, false, false, &file_prefix);
auto trainer =
tesseract::LoadTrainingData(argc, argv, false, nullptr, &file_prefix);
if (!trainer)
return 1;

View File

@ -159,6 +159,14 @@ class MasterTrainerTest : public testing::Test {
return file::JoinPath(FLAGS_test_tmpdir, name);
}
MasterTrainerTest() {
shape_table_ = nullptr;
master_trainer_ = nullptr;
}
~MasterTrainerTest() {
delete shape_table_;
}
// Initializes the master_trainer_ and shape_table_.
// if load_from_tmp, then reloads a master trainer that was saved by a
// previous call in which it was false.
@ -171,9 +179,10 @@ class MasterTrainerTest : public testing::Test {
const char* argv[] = {tr_file_name.c_str()};
int argc = 1;
STRING file_prefix;
auto [m,s] = LoadTrainingData(argc, argv, false, true, &file_prefix);
master_trainer_ = std::move(m);
shape_table_ = std::move(s);
delete shape_table_;
shape_table_ = nullptr;
master_trainer_ =
LoadTrainingData(argc, argv, false, &shape_table_, &file_prefix);
EXPECT_TRUE(master_trainer_ != nullptr);
EXPECT_TRUE(shape_table_ != nullptr);
}
@ -226,7 +235,7 @@ class MasterTrainerTest : public testing::Test {
}
// Objects declared here can be used by all tests in the test case for Foo.
std::unique_ptr<ShapeTable> shape_table_;
ShapeTable* shape_table_;
std::unique_ptr<MasterTrainer> master_trainer_;
#endif
};
@ -257,7 +266,7 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) {
// count junk.
if (shape_table_->FindShape(0, -1) < 0) shape_table_->AddShape(0, 0);
// Make a mock classifier.
auto shape_classifier = std::make_unique<MockClassifier>(shape_table_.get());
auto shape_classifier = std::make_unique<MockClassifier>(shape_table_);
// Get the accuracy report.
STRING accuracy_report;
master_trainer_->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR, 0,