If there is no explicit renderer(s), default to TessTextRenderer

Revert fd429c32, 43834da7, 05de195e.

See #49, #59.

The code in this commit solves the issue in a more elegant way, IMHO.

Now you can use:
  * `tesseract eurotext.tif eurotext txt pdf`
  * `tesseract eurotext.tif eurotext txt hocr`
  * `tesseract eurotext.tif eurotext txt hocr pdf`

NOTE:
  With `tesseract eurotext.tif eurotext`
  or `tesseract eurotext.tif eurotext txt`
  the psm will be set to '3', but...
  With `tesseract eurotext.tif eurotext txt pdf`
  or `tesseract eurotext.tif eurotext txt hocr`
  the psm will be set to '1'.
This commit is contained in:
amitdo 2015-12-11 19:06:49 +02:00
parent d4e0c6459a
commit c2f5e9b849
8 changed files with 38 additions and 21 deletions

View File

@ -295,19 +295,37 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
if (b) {
bool font_info;
api->GetBoolVariable("hocr_font_info", &font_info);
renderers->push_back(new tesseract::TessHOcrRenderer(outputbase, font_info));
renderers->push_back(
new tesseract::TessHOcrRenderer(outputbase, font_info));
}
api->GetBoolVariable("tessedit_create_pdf", &b);
if (b) {
renderers->push_back(new tesseract::TessPDFRenderer(outputbase,
api->GetDatapath()));
}
api->GetBoolVariable("tessedit_write_unlv", &b);
if (b) renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
if (b) {
renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
}
api->GetBoolVariable("tessedit_create_boxfile", &b);
if (b) renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
if (b) {
renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
}
// disable text renderer when using one of these configs:
// ambigs.train, box.train, box.train.stderr, linebox, rebox
bool disable_text_renderer =
(api->GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
(api->GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
(api->GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
api->GetBoolVariable("tessedit_create_txt", &b);
if (b) renderers->push_back(new tesseract::TessTextRenderer(outputbase));
if (b || (renderers->empty() && !disable_text_renderer) {
renderers->push_back(new tesseract::TessTextRenderer(outputbase));
}
}
if (!renderers->empty()) {

View File

@ -381,7 +381,7 @@ Tesseract::Tesseract()
this->params()),
BOOL_MEMBER(tessedit_write_unlv, false, "Write .unlv output file",
this->params()),
BOOL_MEMBER(tessedit_create_txt, true, "Write .txt output file",
BOOL_MEMBER(tessedit_create_txt, false, "Write .txt output file",
this->params()),
BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file",
this->params()),

View File

@ -1001,7 +1001,7 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_write_rep_codes, false,
"Write repetition char code");
BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
BOOL_VAR_H(tessedit_create_txt, true, "Write .txt output file");
BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file");
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
STRING_VAR_H(unrecognised_char, "|",

View File

@ -1,3 +1,2 @@
tessedit_create_txt 0
tessedit_create_hocr 1
tessedit_pageseg_mode 1

View File

@ -1,2 +1 @@
tessedit_create_txt 0
tessedit_create_boxfile 1

View File

@ -1,3 +1,2 @@
tessedit_create_txt 0
tessedit_create_pdf 1
tessedit_pageseg_mode 1

3
tessdata/configs/txt Normal file
View File

@ -0,0 +1,3 @@
# This config file should be used with other cofig files which creates renderers.
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
tessedit_create_txt 1

View File

@ -1,3 +1,2 @@
tessedit_create_txt 0
tessedit_write_unlv 1
tessedit_pageseg_mode 6