mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-30 23:49:05 +08:00
Change tesseract parameter -psm to --psm
For compatibility reasons the old variant is still supported. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
d2f9264383
commit
92d981b93a
@ -63,7 +63,7 @@ You can either [Install Tesseract via pre-built binary package](https://github.c
|
|||||||
|
|
||||||
Basic command line usage:
|
Basic command line usage:
|
||||||
|
|
||||||
tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...]
|
tesseract imagename outputbase [-l lang] [--psm pagesegmode] [configfiles...]
|
||||||
|
|
||||||
For more information about the various command line options use `tesseract --help` or `man tesseract`.
|
For more information about the various command line options use `tesseract --help` or `man tesseract`.
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ void PrintHelpMessage(const char* program) {
|
|||||||
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
|
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
|
||||||
" -c VAR=VALUE Set value for config variables.\n"
|
" -c VAR=VALUE Set value for config variables.\n"
|
||||||
" Multiple -c arguments are allowed.\n"
|
" Multiple -c arguments are allowed.\n"
|
||||||
" -psm NUM Specify page segmentation mode.\n"
|
" --psm NUM Specify page segmentation mode.\n"
|
||||||
" --oem NUM Specify OCR Engine mode.\n"
|
" --oem NUM Specify OCR Engine mode.\n"
|
||||||
"NOTE: These options must occur before any configfile.\n";
|
"NOTE: These options must occur before any configfile.\n";
|
||||||
|
|
||||||
@ -275,6 +275,11 @@ void ParseArgs(const int argc, char** argv, const char** lang,
|
|||||||
noocr = true;
|
noocr = true;
|
||||||
*list_langs = true;
|
*list_langs = true;
|
||||||
} else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
|
} else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
|
||||||
|
// The parameter -psm is deprecated and was replaced by --psm.
|
||||||
|
// It is still supported for compatibility reasons.
|
||||||
|
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
||||||
|
++i;
|
||||||
|
} else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
|
||||||
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
*pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
|
||||||
++i;
|
++i;
|
||||||
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
|
} else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
|
||||||
|
@ -84,7 +84,7 @@ Set value for control parameter\&. Multiple \-c arguments are allowed\&.
|
|||||||
The language to use\&. If none is specified, English is assumed\&. Multiple languages may be specified, separated by plus characters\&. Tesseract uses 3\-character ISO 639\-2 language codes\&. (See LANGUAGES)
|
The language to use\&. If none is specified, English is assumed\&. Multiple languages may be specified, separated by plus characters\&. Tesseract uses 3\-character ISO 639\-2 language codes\&. (See LANGUAGES)
|
||||||
.RE
|
.RE
|
||||||
.PP
|
.PP
|
||||||
\fI\-psm N\fR
|
\fI\--psm N\fR
|
||||||
.RS 4
|
.RS 4
|
||||||
Set Tesseract to only run a subset of layout analysis and assume a certain form of image\&. The options for
|
Set Tesseract to only run a subset of layout analysis and assume a certain form of image\&. The options for
|
||||||
\fBN\fR
|
\fBN\fR
|
||||||
@ -139,7 +139,7 @@ pdf \- Output in pdf instead of a text file\&.
|
|||||||
.RE
|
.RE
|
||||||
.RE
|
.RE
|
||||||
.sp
|
.sp
|
||||||
\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\-psm N\fR must occur before any \fIconfigfile\fR\&.
|
\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\--psm N\fR must occur before any \fIconfigfile\fR\&.
|
||||||
.SH "SINGLE OPTIONS"
|
.SH "SINGLE OPTIONS"
|
||||||
.PP
|
.PP
|
||||||
\fI\-v\fR
|
\fI\-v\fR
|
||||||
|
@ -54,7 +54,7 @@ OPTIONS
|
|||||||
Multiple languages may be specified, separated by plus characters.
|
Multiple languages may be specified, separated by plus characters.
|
||||||
Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
|
Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
|
||||||
|
|
||||||
'-psm N'::
|
'--psm N'::
|
||||||
Set Tesseract to only run a subset of layout analysis and assume
|
Set Tesseract to only run a subset of layout analysis and assume
|
||||||
a certain form of image. The options for *N* are:
|
a certain form of image. The options for *N* are:
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ OPTIONS
|
|||||||
* hocr - Output in hOCR format instead of as a text file.
|
* hocr - Output in hOCR format instead of as a text file.
|
||||||
* pdf - Output in pdf instead of a text file.
|
* pdf - Output in pdf instead of a text file.
|
||||||
|
|
||||||
*Nota Bene:* The options '-l lang' and '-psm N' must occur
|
*Nota Bene:* The options '-l lang' and '--psm N' must occur
|
||||||
before any 'configfile'.
|
before any 'configfile'.
|
||||||
|
|
||||||
|
|
||||||
|
@ -847,7 +847,7 @@ at Google since then.</p></div>
|
|||||||
</p>
|
</p>
|
||||||
</dd>
|
</dd>
|
||||||
<dt class="hdlist1">
|
<dt class="hdlist1">
|
||||||
<em>-psm N</em>
|
<em>--psm N</em>
|
||||||
</dt>
|
</dt>
|
||||||
<dd>
|
<dd>
|
||||||
<p>
|
<p>
|
||||||
@ -893,7 +893,7 @@ pdf - Output in pdf instead of a text file.
|
|||||||
</ul></div>
|
</ul></div>
|
||||||
</dd>
|
</dd>
|
||||||
</dl></div>
|
</dl></div>
|
||||||
<div class="paragraph"><p><strong>Nota Bene:</strong> The options <em>-l lang</em> and <em>-psm N</em> must occur
|
<div class="paragraph"><p><strong>Nota Bene:</strong> The options <em>-l lang</em> and <em>--psm N</em> must occur
|
||||||
before any <em>configfile</em>.</p></div>
|
before any <em>configfile</em>.</p></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -130,7 +130,7 @@ at Google since then.</simpara>
|
|||||||
</varlistentry>
|
</varlistentry>
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term>
|
<term>
|
||||||
<emphasis>-psm N</emphasis>
|
<emphasis>--psm N</emphasis>
|
||||||
</term>
|
</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
@ -176,7 +176,7 @@ pdf - Output in pdf instead of a text file.
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
</variablelist>
|
</variablelist>
|
||||||
<simpara><emphasis role="strong">Nota Bene:</emphasis> The options <emphasis>-l lang</emphasis> and <emphasis>-psm N</emphasis> must occur
|
<simpara><emphasis role="strong">Nota Bene:</emphasis> The options <emphasis>-l lang</emphasis> and <emphasis>--psm N</emphasis> must occur
|
||||||
before any <emphasis>configfile</emphasis>.</simpara>
|
before any <emphasis>configfile</emphasis>.</simpara>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
<refsect1 id="_single_options">
|
<refsect1 id="_single_options">
|
||||||
|
@ -64,7 +64,7 @@ do
|
|||||||
srcdir="$imdir"
|
srcdir="$imdir"
|
||||||
fi
|
fi
|
||||||
# echo "$srcdir/$page.tif"
|
# echo "$srcdir/$page.tif"
|
||||||
$tess $srcdir/$page.tif $resdir/$page -psm 6 $config 2>&1 |grep -v "OCR Engine"
|
$tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
||||||
if [ -r times.txt ]
|
if [ -r times.txt ]
|
||||||
then
|
then
|
||||||
read t <times.txt
|
read t <times.txt
|
||||||
|
Loading…
Reference in New Issue
Block a user