Allow arbitrary configuration options to be set from the command line (fix issue 893)

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@837 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
zdenop@gmail.com 2013-04-29 20:43:14 +00:00
parent 1032cb1692
commit 7dcfd02c22
2 changed files with 31 additions and 6 deletions

View File

@ -90,6 +90,9 @@ int main(int argc, char **argv) {
} else if (strcmp(argv[arg], "--print-parameters") == 0) {
noocr = true;
print_parameters = true;
} else if (strcmp(argv[arg], "-o") == 0 && arg + 1 < argc) {
// handled properly after api init
++arg;
} else if (image == NULL) {
image = argv[arg];
} else if (output == NULL) {
@ -105,7 +108,8 @@ int main(int argc, char **argv) {
if (output == NULL && noocr == false) {
fprintf(stderr, _("Usage:%s imagename outputbase|stdout [-l lang] "
"[-psm pagesegmode] [configfile...]\n\n"), argv[0]);
"[-psm pagesegmode] [-o configvar=value] "
"[configfile...]\n\n"), argv[0]);
fprintf(stderr,
_("pagesegmode values are:\n"
"0 = Orientation and script detection (OSD) only.\n"
@ -119,8 +123,9 @@ int main(int argc, char **argv) {
"8 = Treat the image as a single word.\n"
"9 = Treat the image as a single word in a circle.\n"
"10 = Treat the image as a single character.\n"));
fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any"
"configfile.\n\n"));
fprintf(stderr, _("multiple -o arguments are allowed.\n"));
fprintf(stderr, _("-l lang, -psm pagesegmode and any -o options must occur"
"before any configfile.\n\n"));
fprintf(stderr, _("Single options:\n"));
fprintf(stderr, _(" -v --version: version info\n"));
fprintf(stderr, _(" --list-langs: list available languages for tesseract "
@ -143,6 +148,21 @@ int main(int argc, char **argv) {
exit(1);
}
char opt1[255], opt2[255];
for (arg = 0; arg < argc; arg++) {
if (strcmp(argv[arg], "-o") == 0 && arg + 1 < argc) {
strncpy(opt1, argv[arg + 1], 255);
*(strchr(opt1, '=')) = 0;
strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255);
opt2[254] = 0;
++arg;
if(!api.SetVariable(opt1, opt2)) {
fprintf(stderr, _("Could not set option: %s=%s\n"), opt1, opt2);
}
}
}
if (list_langs) {
GenericVector<STRING> languages;
api.GetAvailableLanguagesAsVector(&languages);

View File

@ -31,7 +31,7 @@
tesseract \- command\-line OCR engine
.SH "SYNOPSIS"
.sp
\fBtesseract\fR \fIimagename\fR \fIoutbase\fR [\fI\-l lang\fR] [\fI\-psm N\fR] [\fIconfigfile\fR \&...]
\fBtesseract\fR \fIimagename\fR \fIoutbase\fR|\fIstdout\fR [\fI\-l lang\fR] [\fI\-psm N\fR] [\gI\-o configvar=value\fR] [\fIconfigfile\fR \&...]
.SH "DESCRIPTION"
.sp
tesseract(1) is a commercial quality OCR engine originally developed at HP between 1985 and 1995\&. In 1995, this engine was among the top 3 evaluated by UNLV\&. It was open\-sourced by HP and UNLV in 2005, and has been developed at Google since then\&.
@ -45,7 +45,7 @@ The name of the input image\&. Most image file formats (anything readable by Lep
\fIoutbase\fR
.RS 4
The basename of the output file (to which the appropriate extension will be appended)\&. By default the output will be named
\fIoutbase\&.txt\fR\&.
\fIoutbase\&.txt\fR\&. When stdout is used as outbase, output will be sent to stdout\&.
.RE
.PP
\fI\-l lang\fR
@ -80,6 +80,11 @@ are:
.\}
.RE
.PP
\fI\-o configvar=value\fR
.RS 4
Sets a configuration variable\&. Multiple options can be set by using -o multiple times, once for each option\&.
.RE
.PP
\fI\-v\fR
.RS 4
Returns the current version of the tesseract(1) executable\&.
@ -102,7 +107,7 @@ hocr \- Output in hOCR format instead of as a text file\&.
.RE
.RE
.sp
\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\-psm N\fR must occur before any \fIconfigfile\fR\&.
\fBNota Bene:\fR The options \fI\-l lang\fR, \fI\-psm N\fR and \fI\-o configvar=value\fR must occur before any \fIconfigfile\fR\&.
.SH "LANGUAGES"
.sp
There are currently language packs available for the following languages: