mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Merge pull request #1947 from stweil/doc
Update tesseract man page and add Makefile rule to build HTML manpages
This commit is contained in:
commit
e03ee932d2
@ -6,7 +6,7 @@ asciidoc=asciidoc -d manpage
|
||||
|
||||
|
||||
man_MANS = \
|
||||
combine_lang_model.1 \
|
||||
combine_lang_model.1 \
|
||||
combine_tessdata.1 \
|
||||
dawg2wordlist.1 \
|
||||
lstmeval.1 \
|
||||
@ -31,9 +31,16 @@ endif
|
||||
|
||||
EXTRA_DIST = $(man_MANS) Doxyfile
|
||||
|
||||
.PHONY: html
|
||||
|
||||
html: $(patsubst %,%.html,$(man_MANS))
|
||||
|
||||
%: %.asc
|
||||
$(asciidoc) -o $@ $<
|
||||
|
||||
%.html: %.asc
|
||||
asciidoc -b html5 -o $@ $<
|
||||
|
||||
MAINTAINERCLEANFILES = $(man_MANS) Doxyfile
|
||||
|
||||
endif
|
||||
|
@ -17,12 +17,6 @@ between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by
|
||||
UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed
|
||||
at Google since then.
|
||||
|
||||
Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused
|
||||
on line recognition, but also still supports the legacy Tesseract OCR engine of
|
||||
Tesseract 3 which works by recognizing character patterns. Compatibility with
|
||||
Tesseract 3 is enabled by --oem 0. It also needs traineddata files which support
|
||||
the legacy engine, for example those from the tessdata repository.
|
||||
|
||||
|
||||
IN/OUT ARGUMENTS
|
||||
----------------
|
||||
@ -97,7 +91,7 @@ OPTIONS
|
||||
* hocr - Output in hOCR format instead of as a text file.
|
||||
* pdf - Output in pdf instead of a text file.
|
||||
|
||||
*Nota Bene:* The options '-l lang' and '--psm N' must occur
|
||||
*Nota Bene:* The options `-l lang` and `--psm N` must occur
|
||||
before any 'configfile'.
|
||||
|
||||
|
||||
@ -116,7 +110,7 @@ SINGLE OPTIONS
|
||||
Returns the current version of the tesseract(1) executable.
|
||||
|
||||
'--list-langs'::
|
||||
List available languages for tesseract engine. Can be used with --tessdata-dir.
|
||||
List available languages for tesseract engine. Can be used with `--tessdata-dir`.
|
||||
|
||||
'--print-parameters'::
|
||||
Print tesseract parameters.
|
||||
@ -251,7 +245,7 @@ for the following languages are in
|
||||
To use a non-standard language pack named *foo.traineddata*, set the
|
||||
*TESSDATA_PREFIX* environment variable so the file can be found at
|
||||
*TESSDATA_PREFIX*/tessdata/*foo*.traineddata and give Tesseract the
|
||||
argument '-l foo'.
|
||||
argument `-l foo`.
|
||||
|
||||
SCRIPTS
|
||||
-------
|
||||
@ -377,7 +371,15 @@ language data.
|
||||
Tesseract 3.02 adds BiDirectional text support, the ability to recognize
|
||||
multiple languages in a single image, and improved layout analysis.
|
||||
|
||||
For further details, see the file ReleaseNotes included with the distribution.
|
||||
Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused
|
||||
on line recognition, but also still supports the legacy Tesseract OCR engine of
|
||||
Tesseract 3 which works by recognizing character patterns. Compatibility with
|
||||
Tesseract 3 is enabled by `--oem 0`. It also needs traineddata files which
|
||||
support the legacy engine, for example those from the tessdata repository.
|
||||
|
||||
For further details, see the file ReleaseNotes in the Tesseract wiki
|
||||
(<https://github.com/tesseract-ocr/tesseract/wiki/ReleaseNotes>).
|
||||
|
||||
|
||||
RESOURCES
|
||||
---------
|
||||
@ -402,6 +404,9 @@ Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel
|
||||
Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh
|
||||
Lloyd, Shobhit Saxena, and Thomas Kielbus.
|
||||
|
||||
For a list of contributors see
|
||||
<https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS>.
|
||||
|
||||
COPYING
|
||||
-------
|
||||
Licensed under the Apache License, Version 2.0
|
||||
|
Loading…
Reference in New Issue
Block a user