mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 22:00:09 +08:00
Merge branch 'master' of github.com:tesseract-ocr/tesseract
This commit is contained in:
commit
d855a9d611
@ -286,7 +286,8 @@ void ParseArgs(const int argc, char** argv,
|
||||
void PreloadRenderers(tesseract::TessBaseAPI* api,
|
||||
tesseract::PointerVector<tesseract::TessResultRenderer>* renderers,
|
||||
tesseract::PageSegMode pagesegmode,
|
||||
const char* outputbase) {
|
||||
const char* outputbase,
|
||||
bool in_training_mode) {
|
||||
if (pagesegmode == tesseract::PSM_OSD_ONLY) {
|
||||
renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
|
||||
} else {
|
||||
@ -315,15 +316,8 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
|
||||
renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
|
||||
}
|
||||
|
||||
// disable text renderer when using one of these configs:
|
||||
// ambigs.train, box.train, box.train.stderr, linebox, rebox
|
||||
bool disable_text_renderer =
|
||||
(api->GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
|
||||
(api->GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
|
||||
(api->GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
|
||||
|
||||
api->GetBoolVariable("tessedit_create_txt", &b);
|
||||
if (b || (renderers->empty() && !disable_text_renderer)) {
|
||||
if (b || (renderers->empty() && !in_training_mode)) {
|
||||
renderers->push_back(new tesseract::TessTextRenderer(outputbase));
|
||||
}
|
||||
}
|
||||
@ -419,9 +413,19 @@ int main(int argc, char **argv) {
|
||||
exit(ret_val);
|
||||
}
|
||||
|
||||
// set in_training_mode to true when using one of these configs:
|
||||
// ambigs.train, box.train, box.train.stderr, linebox, rebox
|
||||
bool b = false;
|
||||
bool in_training_mode =
|
||||
(api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
|
||||
(api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
|
||||
|
||||
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
|
||||
PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
|
||||
if (!renderers.empty()) {
|
||||
PreloadRenderers(&api, &renderers, pagesegmode, outputbase,
|
||||
in_training_mode);
|
||||
|
||||
if (!renderers.empty() || in_training_mode) {
|
||||
bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
|
||||
if (!succeed) {
|
||||
fprintf(stderr, "Error during processing.\n");
|
||||
|
@ -69,6 +69,39 @@ LATIN_FONTS=(
|
||||
"DejaVu Sans Ultra-Light" \
|
||||
)
|
||||
|
||||
# List of fonts for printed/neo-Latin ('lat' language code, different from Latin script)
|
||||
NEOLATIN_FONTS=(
|
||||
"GFS Bodoni" \
|
||||
"GFS Bodoni Bold" \
|
||||
"GFS Bodoni Italic" \
|
||||
"GFS Bodoni Bold Italic" \
|
||||
"GFS Didot" \
|
||||
"GFS Didot Bold" \
|
||||
"GFS Didot Italic" \
|
||||
"GFS Didot Bold Italic" \
|
||||
"Cardo" \
|
||||
"Cardo Bold" \
|
||||
"Cardo Italic" \
|
||||
"Wyld" \
|
||||
"Wyld Italic" \
|
||||
"EB Garamond" \
|
||||
"EB Garamond Italic" \
|
||||
"Junicode" \
|
||||
"Junicode Bold" \
|
||||
"Junicode Italic" \
|
||||
"Junicode Bold Italic" \
|
||||
"IM FELL DW Pica PRO" \
|
||||
"IM FELL English PRO" \
|
||||
"IM FELL Double Pica PRO" \
|
||||
"IM FELL French Canon PRO" \
|
||||
"IM FELL Great Primer PRO" \
|
||||
"IM FELL DW Pica PRO Italic" \
|
||||
"IM FELL English PRO Italic" \
|
||||
"IM FELL Double Pica PRO Italic" \
|
||||
"IM FELL French Canon PRO Italic" \
|
||||
"IM FELL Great Primer PRO Italic" \
|
||||
)
|
||||
|
||||
EARLY_LATIN_FONTS=(
|
||||
"${FRAKTUR_FONTS[@]}" \
|
||||
"${LATIN_FONTS[@]}" \
|
||||
@ -853,6 +886,9 @@ set_lang_specific_parameters() {
|
||||
FILTER_ARGUMENTS="--make_early_language_variant=ita"
|
||||
TEXT2IMAGE_EXTRA_ARGS=" --ligatures" # Add ligatures when supported.
|
||||
test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
|
||||
lat )
|
||||
test -z "$EXPOSURES" && EXPOSURES="-3 -2 -1 0 1 2 3"
|
||||
test -z "$FONTS" && FONTS=( "${NEOLATIN_FONTS[@]}" ) ;;
|
||||
spa_old )
|
||||
TEXT_CORPUS="${FLAGS_webtext_prefix}/spa.corpus.txt"
|
||||
# Make long-s substitutions for Early Spanish text
|
||||
@ -893,7 +929,6 @@ set_lang_specific_parameters() {
|
||||
isl ) ;;
|
||||
ita ) ;;
|
||||
jav ) ;;
|
||||
lat ) ;;
|
||||
lav ) ;;
|
||||
lit ) ;;
|
||||
mlt ) ;;
|
||||
|
Loading…
Reference in New Issue
Block a user