From 58122ea313fb5e0eb4930b05a33e7ff138b6cfdb Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 2 Oct 2019 19:10:23 +0200 Subject: [PATCH] Don't create OCR result files when training data is created The configuration file lstm.train causes Tesseract to generate training data for training of an LSTM line recognizer. In this mode, no other files with OCR results should be written. Without this patch, Tesseract writes a small text file. Signed-off-by: Stefan Weil --- src/api/tesseractmain.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/api/tesseractmain.cpp b/src/api/tesseractmain.cpp index 5999ae4e..945cafa5 100644 --- a/src/api/tesseractmain.cpp +++ b/src/api/tesseractmain.cpp @@ -565,6 +565,9 @@ static void PreloadRenderers( api->GetBoolVariable("tessedit_create_txt", &b); if (b || (!error && renderers->empty())) { + // Create text output if no other output was requested + // even if text output was not explicitly requested unless + // there was an error. auto* renderer = new tesseract::TessTextRenderer(outputbase); if (renderer->happy()) { @@ -716,13 +719,15 @@ int main(int argc, char** argv) { return ret_val; } - // set in_training_mode to true when using one of these configs: - // ambigs.train, box.train, box.train.stderr, linebox, rebox + // Set in_training_mode to true when using one of these configs: + // ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train. + // In this mode no other OCR result files are written. bool b = false; bool in_training_mode = (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); + (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) || + (api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b); #ifdef DISABLED_LEGACY_ENGINE auto cur_psm = api.GetPageSegMode();