mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
If there is no explicit renderer(s), default to TessTextRenderer
Revertfd429c32
,43834da7
,05de195e
. See #49, #59. The code in this commit solves the issue in a more elegant way, IMHO. Now you can use: * `tesseract eurotext.tif eurotext txt pdf` * `tesseract eurotext.tif eurotext txt hocr` * `tesseract eurotext.tif eurotext txt hocr pdf` NOTE: With `tesseract eurotext.tif eurotext` or `tesseract eurotext.tif eurotext txt` the psm will be set to '3', but... With `tesseract eurotext.tif eurotext txt pdf` or `tesseract eurotext.tif eurotext txt hocr` the psm will be set to '1'.
This commit is contained in:
parent
d4e0c6459a
commit
c2f5e9b849
@ -295,19 +295,37 @@ void PreloadRenderers(tesseract::TessBaseAPI* api,
|
||||
if (b) {
|
||||
bool font_info;
|
||||
api->GetBoolVariable("hocr_font_info", &font_info);
|
||||
renderers->push_back(new tesseract::TessHOcrRenderer(outputbase, font_info));
|
||||
renderers->push_back(
|
||||
new tesseract::TessHOcrRenderer(outputbase, font_info));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_create_pdf", &b);
|
||||
if (b) {
|
||||
renderers->push_back(new tesseract::TessPDFRenderer(outputbase,
|
||||
api->GetDatapath()));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_write_unlv", &b);
|
||||
if (b) renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
|
||||
if (b) {
|
||||
renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
|
||||
}
|
||||
|
||||
api->GetBoolVariable("tessedit_create_boxfile", &b);
|
||||
if (b) renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
|
||||
if (b) {
|
||||
renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
|
||||
}
|
||||
|
||||
// disable text renderer when using one of these configs:
|
||||
// ambigs.train, box.train, box.train.stderr, linebox, rebox
|
||||
bool disable_text_renderer =
|
||||
(api->GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
|
||||
(api->GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
|
||||
(api->GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
|
||||
|
||||
api->GetBoolVariable("tessedit_create_txt", &b);
|
||||
if (b) renderers->push_back(new tesseract::TessTextRenderer(outputbase));
|
||||
if (b || (renderers->empty() && !disable_text_renderer) {
|
||||
renderers->push_back(new tesseract::TessTextRenderer(outputbase));
|
||||
}
|
||||
}
|
||||
|
||||
if (!renderers->empty()) {
|
||||
|
@ -381,7 +381,7 @@ Tesseract::Tesseract()
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_write_unlv, false, "Write .unlv output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_txt, true, "Write .txt output file",
|
||||
BOOL_MEMBER(tessedit_create_txt, false, "Write .txt output file",
|
||||
this->params()),
|
||||
BOOL_MEMBER(tessedit_create_hocr, false, "Write .html hOCR output file",
|
||||
this->params()),
|
||||
|
@ -1001,7 +1001,7 @@ class Tesseract : public Wordrec {
|
||||
BOOL_VAR_H(tessedit_write_rep_codes, false,
|
||||
"Write repetition char code");
|
||||
BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
|
||||
BOOL_VAR_H(tessedit_create_txt, true, "Write .txt output file");
|
||||
BOOL_VAR_H(tessedit_create_txt, false, "Write .txt output file");
|
||||
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
|
||||
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
|
||||
STRING_VAR_H(unrecognised_char, "|",
|
||||
|
@ -1,3 +1,2 @@
|
||||
tessedit_create_txt 0
|
||||
tessedit_create_hocr 1
|
||||
tessedit_pageseg_mode 1
|
||||
|
@ -1,2 +1 @@
|
||||
tessedit_create_txt 0
|
||||
tessedit_create_boxfile 1
|
||||
|
@ -1,3 +1,2 @@
|
||||
tessedit_create_txt 0
|
||||
tessedit_create_pdf 1
|
||||
tessedit_pageseg_mode 1
|
||||
|
3
tessdata/configs/txt
Normal file
3
tessdata/configs/txt
Normal file
@ -0,0 +1,3 @@
|
||||
# This config file should be used with other cofig files which creates renderers.
|
||||
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
|
||||
tessedit_create_txt 1
|
@ -1,3 +1,2 @@
|
||||
tessedit_create_txt 0
|
||||
tessedit_write_unlv 1
|
||||
tessedit_pageseg_mode 6
|
||||
|
Loading…
Reference in New Issue
Block a user