Merge branch 'master' of github.com:tesseract-ocr/tesseract

This commit is contained in:
Egor Pugin 2016-01-26 13:47:16 +03:00
commit d855a9d611
2 changed files with 51 additions and 12 deletions

View File

@ -286,7 +286,8 @@ void ParseArgs(const int argc, char** argv,
void PreloadRenderers(tesseract::TessBaseAPI* api,
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
tesseract::PageSegMode pagesegmode,
const char* outputbase) {
const char* outputbase,
bool in_training_mode) {
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
} else {
@ -315,15 +316,8 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
}
// disable text renderer when using one of these configs:
// ambigs.train, box.train, box.train.stderr, linebox, rebox
bool disable_text_renderer =
(api->GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api->GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api->GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
api->GetBoolVariable("tessedit_create_txt", &b);
if (b || (renderers->empty() && !disable_text_renderer)) {
if (b || (renderers->empty() && !in_training_mode)) {
renderers->push_back(new tesseract::TessTextRenderer(outputbase));
}
}
@ -419,9 +413,19 @@ int main(int argc, char **argv) {
exit(ret_val);
}
// set in_training_mode to true when using one of these configs:
// ambigs.train, box.train, box.train.stderr, linebox, rebox
bool b = false;
bool in_training_mode =
(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
if (!renderers.empty()) {
PreloadRenderers(&api, &renderers, pagesegmode, outputbase,
in_training_mode);
if (!renderers.empty() || in_training_mode) {
bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
if (!succeed) {
fprintf(stderr, "Error during processing.\n");

View File

@ -69,6 +69,39 @@ LATIN_FONTS=(
"DejaVu Sans Ultra-Light" \
)
# List of fonts for printed/neo-Latin ('lat' language code, different from Latin script)
NEOLATIN_FONTS=(
"GFS Bodoni" \
"GFS Bodoni Bold" \
"GFS Bodoni Italic" \
"GFS Bodoni Bold Italic" \
"GFS Didot" \
"GFS Didot Bold" \
"GFS Didot Italic" \
"GFS Didot Bold Italic" \
"Cardo" \
"Cardo Bold" \
"Cardo Italic" \
"Wyld" \
"Wyld Italic" \
"EB Garamond" \
"EB Garamond Italic" \
"Junicode" \
"Junicode Bold" \
"Junicode Italic" \
"Junicode Bold Italic" \
"IM FELL DW Pica PRO" \
"IM FELL English PRO" \
"IM FELL Double Pica PRO" \
"IM FELL French Canon PRO" \
"IM FELL Great Primer PRO" \
"IM FELL DW Pica PRO Italic" \
"IM FELL English PRO Italic" \
"IM FELL Double Pica PRO Italic" \
"IM FELL French Canon PRO Italic" \
"IM FELL Great Primer PRO Italic" \
)
EARLY_LATIN_FONTS=(
"${FRAKTUR_FONTS[@]}" \
"${LATIN_FONTS[@]}" \
@ -853,6 +886,9 @@ set_lang_specific_parameters() {
FILTER_ARGUMENTS="--make_early_language_variant=ita"
TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported.
test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
lat )
test -z "$EXPOSURES" && EXPOSURES="-3 -2 -1 0 1 2 3"
test -z "$FONTS" && FONTS=( "${NEOLATIN_FONTS[@]}" ) ;;
spa_old )
TEXT_CORPUS="${FLAGS_webtext_prefix}/spa.corpus.txt"
# Make long-s substitutions for Early Spanish text
@ -893,7 +929,6 @@ set_lang_specific_parameters() {
isl ) ;;
ita ) ;;
jav ) ;;
lat ) ;;
lav ) ;;
lit ) ;;
mlt ) ;;