From 3e9b0acc5cc4f0367ab618e90ddebae7ca42f5e1 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 4 Oct 2018 22:10:22 +0200 Subject: [PATCH 1/2] Update tesseract man page - move Tesseract 4 release note to other release notes - format command line options in text - add link to release notes (wiki) - add link to contributors (GitHub) Signed-off-by: Stefan Weil --- doc/tesseract.1.asc | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc index 8d9ae27c4..22a9ee7a1 100644 --- a/doc/tesseract.1.asc +++ b/doc/tesseract.1.asc @@ -17,12 +17,6 @@ between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed at Google since then. -Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused -on line recognition, but also still supports the legacy Tesseract OCR engine of -Tesseract 3 which works by recognizing character patterns. Compatibility with -Tesseract 3 is enabled by --oem 0. It also needs traineddata files which support -the legacy engine, for example those from the tessdata repository. - IN/OUT ARGUMENTS ---------------- @@ -97,7 +91,7 @@ OPTIONS * hocr - Output in hOCR format instead of as a text file. * pdf - Output in pdf instead of a text file. -*Nota Bene:* The options '-l lang' and '--psm N' must occur +*Nota Bene:* The options `-l lang` and `--psm N` must occur before any 'configfile'. @@ -116,7 +110,7 @@ SINGLE OPTIONS Returns the current version of the tesseract(1) executable. '--list-langs':: - List available languages for tesseract engine. Can be used with --tessdata-dir. + List available languages for tesseract engine. Can be used with `--tessdata-dir`. '--print-parameters':: Print tesseract parameters. @@ -251,7 +245,7 @@ for the following languages are in To use a non-standard language pack named *foo.traineddata*, set the *TESSDATA_PREFIX* environment variable so the file can be found at *TESSDATA_PREFIX*/tessdata/*foo*.traineddata and give Tesseract the -argument '-l foo'. +argument `-l foo`. SCRIPTS ------- @@ -377,7 +371,15 @@ language data. Tesseract 3.02 adds BiDirectional text support, the ability to recognize multiple languages in a single image, and improved layout analysis. -For further details, see the file ReleaseNotes included with the distribution. +Tesseract 4 adds a new neural net (LSTM) based OCR engine which is focused +on line recognition, but also still supports the legacy Tesseract OCR engine of +Tesseract 3 which works by recognizing character patterns. Compatibility with +Tesseract 3 is enabled by `--oem 0`. It also needs traineddata files which +support the legacy engine, for example those from the tessdata repository. + +For further details, see the file ReleaseNotes in the Tesseract wiki +(). + RESOURCES --------- @@ -402,6 +404,9 @@ Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh Lloyd, Shobhit Saxena, and Thomas Kielbus. +For a list of contributors see +. + COPYING ------- Licensed under the Apache License, Version 2.0 From b70a45678820c7347e73c530cea5b447459acb0d Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 4 Oct 2018 22:26:35 +0200 Subject: [PATCH 2/2] Add Makefile rule to build HTML manpages They can be built optionally by `make html` (only for automake builds). Signed-off-by: Stefan Weil --- doc/Makefile.am | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/Makefile.am b/doc/Makefile.am index 52c6898e2..a218aca3b 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -6,7 +6,7 @@ asciidoc=asciidoc -d manpage man_MANS = \ - combine_lang_model.1 \ + combine_lang_model.1 \ combine_tessdata.1 \ dawg2wordlist.1 \ lstmeval.1 \ @@ -31,9 +31,16 @@ endif EXTRA_DIST = $(man_MANS) Doxyfile +.PHONY: html + +html: $(patsubst %,%.html,$(man_MANS)) + %: %.asc $(asciidoc) -o $@ $< +%.html: %.asc + asciidoc -b html5 -o $@ $< + MAINTAINERCLEANFILES = $(man_MANS) Doxyfile endif