mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-13 16:09:04 +08:00
Don't create OCR result files when training data is created
The configuration file lstm.train causes Tesseract to generate training data for training of an LSTM line recognizer. In this mode, no other files with OCR results should be written. Without this patch, Tesseract writes a small text file. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
3dfd72721b
commit
58122ea313
@ -565,6 +565,9 @@ static void PreloadRenderers(
|
||||
|
||||
api->GetBoolVariable("tessedit_create_txt", &b);
|
||||
if (b || (!error && renderers->empty())) {
|
||||
// Create text output if no other output was requested
|
||||
// even if text output was not explicitly requested unless
|
||||
// there was an error.
|
||||
auto* renderer =
|
||||
new tesseract::TessTextRenderer(outputbase);
|
||||
if (renderer->happy()) {
|
||||
@ -716,13 +719,15 @@ int main(int argc, char** argv) {
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
// set in_training_mode to true when using one of these configs:
|
||||
// ambigs.train, box.train, box.train.stderr, linebox, rebox
|
||||
// Set in_training_mode to true when using one of these configs:
|
||||
// ambigs.train, box.train, box.train.stderr, linebox, rebox, lstm.train.
|
||||
// In this mode no other OCR result files are written.
|
||||
bool b = false;
|
||||
bool in_training_mode =
|
||||
(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
|
||||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_train_line_recognizer", &b) && b);
|
||||
|
||||
#ifdef DISABLED_LEGACY_ENGINE
|
||||
auto cur_psm = api.GetPageSegMode();
|
||||
|
Loading…
Reference in New Issue
Block a user