diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 1246bcbb..16f03c6e 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -286,7 +286,8 @@ void ParseArgs(const int argc, char** argv, void PreloadRenderers(tesseract::TessBaseAPI* api, tesseract::PointerVector* renderers, tesseract::PageSegMode pagesegmode, - const char* outputbase) { + const char* outputbase, + bool in_training_mode) { if (pagesegmode == tesseract::PSM_OSD_ONLY) { renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); } else { @@ -315,15 +316,8 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase)); } - // disable text renderer when using one of these configs: - // ambigs.train, box.train, box.train.stderr, linebox, rebox - bool disable_text_renderer = - (api->GetBoolVariable("tessedit_ambigs_training", &b) && b) || - (api->GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api->GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); - api->GetBoolVariable("tessedit_create_txt", &b); - if (b || (renderers->empty() && !disable_text_renderer)) { + if (b || (renderers->empty() && !in_training_mode)) { renderers->push_back(new tesseract::TessTextRenderer(outputbase)); } } @@ -419,9 +413,19 @@ int main(int argc, char **argv) { exit(ret_val); } + // set in_training_mode to true when using one of these configs: + // ambigs.train, box.train, box.train.stderr, linebox, rebox + bool b = false; + bool in_training_mode = + (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || + (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || + (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); + tesseract::PointerVector renderers; - PreloadRenderers(&api, &renderers, pagesegmode, outputbase); - if (!renderers.empty()) { + PreloadRenderers(&api, &renderers, pagesegmode, outputbase, + in_training_mode); + + if (!renderers.empty() || in_training_mode) { bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]); if (!succeed) { fprintf(stderr, "Error during processing.\n"); diff --git a/training/language-specific.sh b/training/language-specific.sh index 384536d0..a62f1e3c 100755 --- a/training/language-specific.sh +++ b/training/language-specific.sh @@ -69,6 +69,39 @@ LATIN_FONTS=( "DejaVu Sans Ultra-Light" \ ) +# List of fonts for printed/neo-Latin ('lat' language code, different from Latin script) +NEOLATIN_FONTS=( + "GFS Bodoni" \ + "GFS Bodoni Bold" \ + "GFS Bodoni Italic" \ + "GFS Bodoni Bold Italic" \ + "GFS Didot" \ + "GFS Didot Bold" \ + "GFS Didot Italic" \ + "GFS Didot Bold Italic" \ + "Cardo" \ + "Cardo Bold" \ + "Cardo Italic" \ + "Wyld" \ + "Wyld Italic" \ + "EB Garamond" \ + "EB Garamond Italic" \ + "Junicode" \ + "Junicode Bold" \ + "Junicode Italic" \ + "Junicode Bold Italic" \ + "IM FELL DW Pica PRO" \ + "IM FELL English PRO" \ + "IM FELL Double Pica PRO" \ + "IM FELL French Canon PRO" \ + "IM FELL Great Primer PRO" \ + "IM FELL DW Pica PRO Italic" \ + "IM FELL English PRO Italic" \ + "IM FELL Double Pica PRO Italic" \ + "IM FELL French Canon PRO Italic" \ + "IM FELL Great Primer PRO Italic" \ +) + EARLY_LATIN_FONTS=( "${FRAKTUR_FONTS[@]}" \ "${LATIN_FONTS[@]}" \ @@ -853,6 +886,9 @@ set_lang_specific_parameters() { FILTER_ARGUMENTS="--make_early_language_variant=ita" TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported. test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; + lat ) + test -z "$EXPOSURES" && EXPOSURES="-3 -2 -1 0 1 2 3" + test -z "$FONTS" && FONTS=( "${NEOLATIN_FONTS[@]}" ) ;; spa_old ) TEXT_CORPUS="${FLAGS_webtext_prefix}/spa.corpus.txt" # Make long-s substitutions for Early Spanish text @@ -893,7 +929,6 @@ set_lang_specific_parameters() { isl ) ;; ita ) ;; jav ) ;; - lat ) ;; lav ) ;; lit ) ;; mlt ) ;;