Merge pull request #1951 from stweil/checkdir

combine_tessdata, lstmtraining: Check for write failures
This commit is contained in:
Egor Pugin 2018-10-05 23:38:01 +03:00 committed by GitHub
commit 0e43ae5cf4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 20 deletions

View File

@ -72,7 +72,7 @@ int main(int argc, char **argv) {
tesseract::TessdataManager tm;
if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) {
printf("%s\n", tesseract::TessBaseAPI::Version());
return 0;
return EXIT_SUCCESS;
} else if (argc == 2) {
printf("Combining tessdata files\n");
STRING lang = argv[1];
@ -92,16 +92,22 @@ int main(int argc, char **argv) {
// Initialize TessdataManager with the data in the given traineddata file.
if (!tm.Init(argv[2])) {
tprintf("Failed to read %s\n", argv[2]);
exit(1);
return EXIT_FAILURE;
}
printf("Extracting tessdata components from %s\n", argv[2]);
if (strcmp(argv[1], "-e") == 0) {
for (i = 3; i < argc; ++i) {
errno = 0;
if (tm.ExtractToFile(argv[i])) {
printf("Wrote %s\n", argv[i]);
} else {
} else if (errno == 0) {
printf("Not extracting %s, since this component"
" is not present\n", argv[i]);
return EXIT_FAILURE;
} else {
printf("Error, could not extract %s: %s\n",
argv[i], strerror(errno));
return EXIT_FAILURE;
}
}
} else { // extract all the components
@ -111,8 +117,13 @@ int main(int argc, char **argv) {
if (*last != '.')
filename += '.';
filename += tesseract::kTessdataFileSuffixes[i];
errno = 0;
if (tm.ExtractToFile(filename.string())) {
printf("Wrote %s\n", filename.string());
} else if (errno != 0) {
printf("Error, could not extract %s: %s\n",
filename.string(), strerror(errno));
return EXIT_FAILURE;
}
}
}
@ -124,7 +135,7 @@ int main(int argc, char **argv) {
if (rename(new_traineddata_filename, traineddata_filename.string()) != 0) {
tprintf("Failed to create a temporary file %s\n",
traineddata_filename.string());
exit(1);
return EXIT_FAILURE;
}
// Initialize TessdataManager with the data in the given traineddata file.
@ -135,17 +146,17 @@ int main(int argc, char **argv) {
} else if (argc == 3 && strcmp(argv[1], "-c") == 0) {
if (!tm.Init(argv[2])) {
tprintf("Failed to read %s\n", argv[2]);
exit(1);
return EXIT_FAILURE;
}
tesseract::TFile fp;
if (!tm.GetComponent(tesseract::TESSDATA_LSTM, &fp)) {
tprintf("No LSTM Component found in %s!\n", argv[2]);
exit(1);
return EXIT_FAILURE;
}
tesseract::LSTMRecognizer recognizer;
if (!recognizer.DeSerialize(&tm, &fp)) {
tprintf("Failed to deserialize LSTM in %s!\n", argv[2]);
exit(1);
return EXIT_FAILURE;
}
recognizer.ConvertToInt();
GenericVector<char> lstm_data;
@ -155,7 +166,7 @@ int main(int argc, char **argv) {
lstm_data.size());
if (!tm.SaveFile(argv[2], nullptr)) {
tprintf("Failed to write modified traineddata:%s!\n", argv[2]);
exit(1);
return EXIT_FAILURE;
}
} else if (argc == 3 && strcmp(argv[1], "-d") == 0) {
// Initialize TessdataManager with the data in the given traineddata file.
@ -186,4 +197,5 @@ int main(int argc, char **argv) {
return 1;
}
tm.Directory();
return EXIT_SUCCESS;
}

View File

@ -76,11 +76,11 @@ int main(int argc, char **argv) {
// Purify the model name in case it is based on the network string.
if (FLAGS_model_output.empty()) {
tprintf("Must provide a --model_output!\n");
return 1;
return EXIT_FAILURE;
}
if (FLAGS_traineddata.empty()) {
tprintf("Must provide a --traineddata see training wiki\n");
return 1;
return EXIT_FAILURE;
}
STRING model_output = FLAGS_model_output.c_str();
for (int i = 0; i < model_output.length(); ++i) {
@ -89,6 +89,19 @@ int main(int argc, char **argv) {
if (model_output[i] == '(' || model_output[i] == ')')
model_output[i] = '_';
}
// Check write permissions.
STRING test_file = FLAGS_model_output.c_str();
test_file += "_wtest";
FILE* f = fopen(test_file.c_str(), "wb");
if (f != nullptr) {
fclose(f);
remove(test_file.c_str());
} else {
tprintf("Error, model output cannot be written: %s\n", strerror(errno));
return EXIT_FAILURE;
}
// Setup the trainer.
STRING checkpoint_file = FLAGS_model_output.c_str();
checkpoint_file += "_checkpoint";
@ -105,7 +118,7 @@ int main(int argc, char **argv) {
if (!trainer.TryLoadingCheckpoint(FLAGS_continue_from.c_str(), nullptr)) {
tprintf("Failed to read continue from: %s\n",
FLAGS_continue_from.c_str());
return 1;
return EXIT_FAILURE;
}
if (FLAGS_debug_network) {
trainer.DebugNetwork();
@ -116,20 +129,20 @@ int main(int argc, char **argv) {
FLAGS_model_output.c_str());
}
}
return 0;
return EXIT_SUCCESS;
}
// Get the list of files to process.
if (FLAGS_train_listfile.empty()) {
tprintf("Must supply a list of training filenames! --train_listfile\n");
return 1;
return EXIT_FAILURE;
}
GenericVector<STRING> filenames;
if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(),
&filenames)) {
tprintf("Failed to load list of training filenames from %s\n",
FLAGS_train_listfile.c_str());
return 1;
return EXIT_FAILURE;
}
// Checkpoints always take priority if they are available.
@ -145,7 +158,7 @@ int main(int argc, char **argv) {
? FLAGS_continue_from.c_str()
: FLAGS_old_traineddata.c_str())) {
tprintf("Failed to continue from: %s\n", FLAGS_continue_from.c_str());
return 1;
return EXIT_FAILURE;
}
tprintf("Continuing from %s\n", FLAGS_continue_from.c_str());
trainer.InitIterations();
@ -155,7 +168,7 @@ int main(int argc, char **argv) {
tprintf("Appending a new network to an old one!!");
if (FLAGS_continue_from.empty()) {
tprintf("Must set --continue_from for appending!\n");
return 1;
return EXIT_FAILURE;
}
}
// We are initializing from scratch.
@ -165,7 +178,7 @@ int main(int argc, char **argv) {
FLAGS_adam_beta)) {
tprintf("Failed to create network from spec: %s\n",
FLAGS_net_spec.c_str());
return 1;
return EXIT_FAILURE;
}
trainer.set_perfect_delay(FLAGS_perfect_sample_delay);
}
@ -176,7 +189,7 @@ int main(int argc, char **argv) {
: tesseract::CS_ROUND_ROBIN,
FLAGS_randomly_rotate)) {
tprintf("Load of images failed!!\n");
return 1;
return EXIT_FAILURE;
}
tesseract::LSTMTester tester(static_cast<int64_t>(FLAGS_max_image_MB) *
@ -186,7 +199,7 @@ int main(int argc, char **argv) {
if (!tester.LoadAllEvalData(FLAGS_eval_listfile.c_str())) {
tprintf("Failed to load eval data from: %s\n",
FLAGS_eval_listfile.c_str());
return 1;
return EXIT_FAILURE;
}
tester_callback =
NewPermanentTessCallback(&tester, &tesseract::LSTMTester::RunEvalAsync);
@ -208,5 +221,5 @@ int main(int argc, char **argv) {
FLAGS_max_iterations == 0));
delete tester_callback;
tprintf("Finished! Error rate = %g\n", trainer.best_error_rate());
return 0;
return EXIT_SUCCESS;
} /* main */