From 6b3c81c9093763a48ade814a03e63618771d9786 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Fri, 15 Mar 2019 20:56:53 +0100
Subject: [PATCH 1/5] Add rule for PDF documentation

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 doc/Makefile.am | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/Makefile.am b/doc/Makefile.am
index f86923e1..b8cf07ac 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -33,8 +33,9 @@ EXTRA_DIST = $(man_MANS) Doxyfile
 .PHONY: html
 
 html: ${man_MANS:%=%.html}
+pdf: ${man_MANS:%=%.pdf}
 
-SUFFIXES = .asc .html
+SUFFIXES = .asc .html .pdf
 
 .asc:
 	-asciidoc -b docbook -d manpage -o - $< | \
@@ -43,6 +44,10 @@ SUFFIXES = .asc .html
 .asc.html:
 	asciidoc -b html5 -o $@ $<
 
+.asc.pdf:
+	asciidoc -b docbook -d manpage -o $*.dbk $<
+	docbook2pdf $*.dbk
+
 MAINTAINERCLEANFILES = $(man_MANS) Doxyfile
 
 endif

From a6981ae54884f3461211471ed99b25a7f1b62302 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Fri, 15 Mar 2019 21:53:27 +0100
Subject: [PATCH 2/5] Improve man page for tesseract

Format it like the example
https://github.com/asciidoc/asciidoc/blob/master/doc/asciidoc.1.txt.

Replace tab characters by blanks.

Add also a chapter on environment variables.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 doc/tesseract.1.asc | 329 +++++++++++++++++++++++---------------------
 1 file changed, 174 insertions(+), 155 deletions(-)

diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc
index ad9abfc4..b741b7a1 100644
--- a/doc/tesseract.1.asc
+++ b/doc/tesseract.1.asc
@@ -8,7 +8,7 @@ tesseract - command-line OCR engine
 
 SYNOPSIS
 --------
-*tesseract* 'imagename'|'listname'|'stdin' 'outputbase'|'stdout' [options...] [configfile...]
+*tesseract* 'FILE' 'OUTPUTBASE' ['OPTIONS']... ['CONFIGFILE']...
 
 DESCRIPTION
 -----------
@@ -20,128 +20,129 @@ at Google since then.
 
 IN/OUT ARGUMENTS
 ----------------
-'imagename'::
-	The name of the input image.  Most image file formats (anything
-	readable by Leptonica) are supported.
+'FILE'::
+  The name of the input file.
+  This can either be an image file or a text file. +
+  Most image file formats (anything readable by Leptonica) are supported. +
+  A text file lists the names of all input images (one image name per line).
+  The results will be combined in a single file for each output file format
+  (txt, pdf, hocr, xml). +
+  If 'FILE' is `stdin` or `-` then the standard input is used.
 
-'listname'::
-	The name of a text file which lists the names of all input images
-	(one image name per line). The results will be combined in a
-	single file for each output file format (txt, pdf, hocr).
-
-'stdin'::
-	Instruction to read data from standard input.
-
-'outputbase'::
-	The basename of the output file (to which the appropriate extension
-	will be appended).  By default the output will be a text file
-	with `.txt` added to the basename unless there are one or more
-	parameters set which explicitly specify the desired output.
-
-'stdout'::
-	Instruction to send output data to standard output.
+'OUTPUTBASE'::
+  The basename of the output file (to which the appropriate extension
+  will be appended).  By default the output will be a text file
+  with `.txt` added to the basename unless there are one or more
+  parameters set which explicitly specify the desired output. +
+  If 'OUTPUTBASE' is `stdout` or `-` then the standard output is used.
 
 
 OPTIONS
 -------
-'--tessdata-dir /path'::
-	Specify the location of tessdata path.
+[[TESSDATADIR]]
+*--tessdata-dir* 'PATH'::
+  Specify the location of tessdata path.
 
-'--user-words /path/to/file'::
-	Specify the location of user words file.
+*--user-words* 'FILE'::
+  Specify the location of user words file.
 
-'--user-patterns /path/to/file'::
-	Specify the location of user patterns file.
+*--user-patterns* 'FILE'::
+  Specify the location of user patterns file.
 
-'-c configvar=value'::
-	Set value for parameter 'configvar'. Multiple -c arguments are allowed.
+*-c* 'CONFIGVAR=VALUE'::
+  Set value for parameter 'CONFIGVAR' to VALUE. Multiple *-c* arguments are allowed.
 
-'-l lang'::
-	The language to use. If none is specified, English is assumed.
-	Multiple languages may be specified, separated by plus characters.
-	Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES)
+*-l* 'LANG'::
+*-l* 'SCRIPT'::
+  The language or script to use.
+  If none is specified, `eng` (English) is assumed.
+  Multiple languages may be specified, separated by plus characters.
+  Tesseract uses 3-character ISO 639-2 language codes
+  (see <<LANGUAGES,*LANGUAGES AND SCRIPTS*>>).
 
-'--psm N'::
-	Set Tesseract to only run a subset of layout analysis and assume
-	a certain form of image. The options for *N* are:
+*--psm* 'N'::
+  Set Tesseract to only run a subset of layout analysis and assume
+  a certain form of image. The options for 'N' are:
 
-	0 = Orientation and script detection (OSD) only.
-	1 = Automatic page segmentation with OSD.
-	2 = Automatic page segmentation, but no OSD, or OCR. (not implemented)
-	3 = Fully automatic page segmentation, but no OSD. (Default)
-	4 = Assume a single column of text of variable sizes.
-	5 = Assume a single uniform block of vertically aligned text.
-	6 = Assume a single uniform block of text.
-	7 = Treat the image as a single text line.
-	8 = Treat the image as a single word.
-	9 = Treat the image as a single word in a circle.
-	10 = Treat the image as a single character.
+  0 = Orientation and script detection (OSD) only.
+  1 = Automatic page segmentation with OSD.
+  2 = Automatic page segmentation, but no OSD, or OCR. (not implemented)
+  3 = Fully automatic page segmentation, but no OSD. (Default)
+  4 = Assume a single column of text of variable sizes.
+  5 = Assume a single uniform block of vertically aligned text.
+  6 = Assume a single uniform block of text.
+  7 = Treat the image as a single text line.
+  8 = Treat the image as a single word.
+  9 = Treat the image as a single word in a circle.
+  10 = Treat the image as a single character.
 
-'--oem N'::
-	Specify OCR Engine mode. The options for *N* are:
+*--oem* 'N'::
+  Specify OCR Engine mode. The options for 'N' are:
 
-	0 = Original Tesseract only.
-	1 = Neural nets LSTM only.
-	2 = Tesseract + LSTM.
-	3 = Default, based on what is available.
+  0 = Original Tesseract only.
+  1 = Neural nets LSTM only.
+  2 = Tesseract + LSTM.
+  3 = Default, based on what is available.
 
-'configfile'::
-	The name of a config to use. The name can be a file in tessdata/configs
-	or tessdata/tessconfigs, or an absolute or relative file path.
-	A config is a plain text file which contains a list of parameters and
-	their values, one per line, with a space separating parameter from value. +
-	Interesting config files include:
+[[CONFIGFILE]]
+'CONFIGFILE'::
+  The name of a config to use. The name can be a file in `tessdata/configs`
+  or `tessdata/tessconfigs`, or an absolute or relative file path.
+  A config is a plain text file which contains a list of parameters and
+  their values, one per line, with a space separating parameter from value. +
+  Interesting config files include:
 
-	* `alto` - Output in ALTO format ('outputbase'`.xml`).
-	* `hocr` - Output in hOCR format ('outputbase'`.hocr`).
-	* `pdf` - Output PDF ('outputbase'`.pdf`).
-	* `tsv` - Output TSV ('outputbase'`.tsv`).
-	* `txt` - Output plain text ('outputbase'`.txt`).
-	* `get.images` - Write processed input images to file (`tessinput.tif`).
-	* `logfile` - Redirect debug messages to file (`tesseract.log`).
-	* `lstm.train` - Output files used by LSTM training ('outputbase'`.lstmf`).
-	* `makebox` - Write box file ('outputbase'`.box`).
-	* `quiet` - Redirect debug messages to /dev/null.
+  * *alto* -- Output in ALTO format ('OUTPUTBASE'`.xml`).
+  * *hocr* -- Output in hOCR format ('OUTPUTBASE'`.hocr`).
+  * *pdf* -- Output PDF ('OUTPUTBASE'`.pdf`).
+  * *tsv* -- Output TSV ('OUTPUTBASE'`.tsv`).
+  * *txt* -- Output plain text ('OUTPUTBASE'`.txt`).
+  * *get.images* -- Write processed input images to file (`tessinput.tif`).
+  * *logfile* -- Redirect debug messages to file (`tesseract.log`).
+  * *lstm.train* -- Output files used by LSTM training ('OUTPUTBASE'`.lstmf`).
+  * *makebox* -- Write box file ('OUTPUTBASE'`.box`).
+  * *quiet* -- Redirect debug messages to '/dev/null'.
 
 It is possible to select several config files, for example
-`tesseract image.png demo hocr pdf txt` will create three output files
-`demo.hocr`, `demo.pdf` and `demo.txt` with the OCR results.
+`tesseract image.png demo alto hocr pdf txt` will create four output files
+`demo.alto`, `demo.hocr`, `demo.pdf` and `demo.txt` with the OCR results.
 
-*Nota Bene:*   The options `-l lang` and `--psm N` must occur
-before any 'configfile'.
+*Nota bene:*   The options *-l* 'LANG', *-l* 'SCRIPT' and *--psm* 'N'
+must occur before any 'CONFIGFILE'.
 
 
 SINGLE OPTIONS
 --------------
-'-h, --help'::
-	Show help message.
+*-h, --help*::
+  Show help message.
 
-'--help-extra'::
-	Show extra help for advanced users.
+*--help-extra*::
+  Show extra help for advanced users.
 
-'--help-psm'::
-	Show page segmentation modes.
+*--help-psm*::
+  Show page segmentation modes.
 
-'--help-oem'::
-	Show OCR Engine modes.
+*--help-oem*::
+  Show OCR Engine modes.
 
-'-v, --version'::
-	Returns the current version of the tesseract(1) executable.
+*-v, --version*::
+  Returns the current version of the tesseract(1) executable.
 
-'--list-langs'::
-	List available languages for tesseract engine. Can be used with `--tessdata-dir`.
-
-'--print-parameters'::
-	Print tesseract parameters.
+*--list-langs*::
+  List available languages for tesseract engine.
+  Can be used with <<TESSDATADIR,*--tessdata-dir* 'PATH'>>.
 
+*--print-parameters*::
+  Print tesseract parameters.
 
 
+[[LANGUAGES]]
 LANGUAGES AND SCRIPTS
 ---------------------
 
 To recognize some text with Tesseract, it is normally necessary to specify
-the language(s) or script of the text (unless it is English text which is
-supported by default) using `-l lang`.
+the language(s) or script(s) of the text (unless it is English text which is
+supported by default) using *-l* 'LANG' or *-l* 'SCRIPT'.
 
 Selecting a language automatically also selects the language specific
 character set and dictionary (word list).
@@ -153,6 +154,9 @@ In most cases, a script also supports English.
 So it is possible to recognize a language that has not been specifically
 trained for by using traineddata for the script it is written in.
 
+More than one language or script may be specified by using `+`.
+Example: `tesseract myimage.png myimage -l eng+deu+fra`.
+
 https://github.com/tesseract-ocr/tessdata_fast provides fast language and
 script models which are also part of Linux distributions.
 
@@ -174,16 +178,16 @@ following languages:
 *cat* (Catalan; Valencian),
 *ceb* (Cebuano),
 *ces* (Czech),
-*chi_sim* (Chinese - Simplified),
-*chi_tra* (Chinese - Traditional),
+*chi_sim* (Chinese simplified),
+*chi_tra* (Chinese traditional),
 *chr* (Cherokee),
 *cym* (Welsh),
 *dan* (Danish),
 *deu* (German),
 *dzo* (Dzongkha),
-*ell* (Greek, Modern (1453-)),
+*ell* (Greek, Modern, 1453-),
 *eng* (English),
-*enm* (English, Middle (1100-1500)),
+*enm* (English, Middle, 1100-1500),
 *epo* (Esperanto),
 *equ* (Math / equation detection module),
 *est* (Estonian),
@@ -192,10 +196,10 @@ following languages:
 *fin* (Finnish),
 *fra* (French),
 *frk* (Frankish),
-*frm* (French, Middle (ca.1400-1600)),
+*frm* (French, Middle, ca.1400-1600),
 *gle* (Irish),
 *glg* (Galician),
-*grc* (Greek, Ancient (to 1453)),
+*grc* (Greek, Ancient, to 1453),
 *guj* (Gujarati),
 *hat* (Haitian; Haitian Creole),
 *heb* (Hebrew),
@@ -215,9 +219,9 @@ following languages:
 *kaz* (Kazakh),
 *khm* (Central Khmer),
 *kir* (Kirghiz; Kyrgyz),
+*kmr* (Kurdish Kurmanji),
 *kor* (Korean),
-*kor_vert* (Korean (vertical)),
-*kmr* (Kurdish (Kurmanji)),
+*kor_vert* (Korean vertical),
 *kur* (Kurdish),
 *lao* (Lao),
 *lat* (Latin),
@@ -235,7 +239,7 @@ following languages:
 *nep* (Nepali),
 *nld* (Dutch; Flemish),
 *nor* (Norwegian),
-*oci* (Occitan (post 1500)),
+*oci* (Occitan post 1500),
 *ori* (Oriya),
 *osd* (Orientation and script detection module),
 *pan* (Panjabi; Punjabi),
@@ -277,51 +281,51 @@ following languages:
 *yid* (Yiddish),
 *yor* (Yoruba)
 
-To use a non-standard language pack named *foo.traineddata*, set the
-*TESSDATA_PREFIX* environment variable so the file can be found at
-*TESSDATA_PREFIX*/tessdata/*foo*.traineddata and give Tesseract the
-argument `-l foo`.
+To use a non-standard language pack named `foo.traineddata`, set the
+`TESSDATA_PREFIX` environment variable so the file can be found at
+`TESSDATA_PREFIX/tessdata/foo.traineddata` and give Tesseract the
+argument *-l* `foo`.
 
 For Tesseract 4, `tessdata_fast` includes traineddata files for the
 following scripts:
 
-Arabic,
-Armenian,
-Bengali,
-Canadian Aboriginal,
-Cherokee,
-Cyrillic,
-Devanagari,
-Ethiopic,
-Fraktur,
-Georgian,
-Greek,
-Gujarati,
-Gurmukhi,
-Han - Simplified,
-Han - Simplified (vertical),
-Han - Traditional,
-Han - Traditional (vertical),
-Hangul,
-Hangul (vertical),
-Hebrew,
-Japanese,
-Japanese (vertical),
-Kannada,
-Khmer,
-Lao,
-Latin,
-Malayalam,
-Myanmar,
-Oriya (Odia),
-Sinhala,
-Syriac,
-Tamil,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Vietnamese.
+*Arabic*,
+*Armenian*,
+*Bengali*,
+*Canadian_Aboriginal*,
+*Cherokee*,
+*Cyrillic*,
+*Devanagari*,
+*Ethiopic*,
+*Fraktur*,
+*Georgian*,
+*Greek*,
+*Gujarati*,
+*Gurmukhi*,
+*HanS* (Han simplified),
+*HanS_vert* (Han simplified, vertical),
+*HanT* (Han traditional),
+*HanT_vert* (Han traditional, vertical),
+*Hangul*,
+*Hangul_vert* (Hangul vertical),
+*Hebrew*,
+*Japanese*,
+*Japanese_vert* (Japanese vertical),
+*Kannada*,
+*Khmer*,
+*Lao*,
+*Latin*,
+*Malayalam*,
+*Myanmar*,
+*Oriya* (Odia),
+*Sinhala*,
+*Syriac*,
+*Tamil*,
+*Telugu*,
+*Thaana*,
+*Thai*,
+*Tibetan*,
+*Vietnamese*.
 
 The same languages and scripts are available from
 https://github.com/tesseract-ocr/tessdata_best.
@@ -343,8 +347,8 @@ Tesseract config files consist of lines with parameter-value pairs (space
 separated).  The parameters are documented as flags in the source code like
 the following one in tesseractclass.h:
 
-STRING_VAR_H(tessedit_char_blacklist, "",
-             "Blacklist of chars not to recognize");
+`STRING_VAR_H(tessedit_char_blacklist, "",
+             "Blacklist of chars not to recognize");`
 
 These parameters may enable or disable various features of the engine, and
 may cause it to load (or not load) various data.  For instance, let's suppose
@@ -352,10 +356,10 @@ you want to OCR in English, but suppress the normal dictionary and load an
 alternative word list and an alternative list of patterns -- these two files
 are the most commonly used extra data files.
 
-If your language pack is in /path/to/eng.traineddata  and the hocr config
-is in /path/to/configs/hocr then create three new files:
+If your language pack is in '/path/to/eng.traineddata' and the hocr config
+is in '/path/to/configs/hocr' then create three new files:
 
-/path/to/eng.user-words:
+'/path/to/eng.user-words':
 [verse]
 the
 quick
@@ -363,25 +367,39 @@ brown
 fox
 jumped
 
-
-/path/to/eng.user-patterns:
+'/path/to/eng.user-patterns':
 [verse]
 1-\d\d\d-GOOG-411
 www.\n\\\*.com
 
-/path/to/configs/bazaar:
+'/path/to/configs/bazaar':
 [verse]
 load_system_dawg     F
 load_freq_dawg       F
 user_words_suffix    user-words
 user_patterns_suffix user-patterns
 
-Now, if you pass the word 'bazaar' as a 'configfile' to Tesseract,
-Tesseract will not bother loading the system dictionary nor
-the dictionary of frequent words and will load and use the eng.user-words
-and eng.user-patterns files you provided.  The former is a simple word list,
-one per line.  The format of the latter is documented in dict/trie.h
-on read_pattern_list().
+Now, if you pass the word 'bazaar' as a <<CONFIGFILE,'CONFIGFILE'>> to
+Tesseract, Tesseract will not bother loading the system dictionary nor
+the dictionary of frequent words and will load and use the 'eng.user-words'
+and 'eng.user-patterns' files you provided.  The former is a simple word list,
+one per line.  The format of the latter is documented in 'dict/trie.h'
+on 'read_pattern_list()'.
+
+
+ENVIRONMENT VARIABLES
+---------------------
+*`TESSDATA_PREFIX`*::
+  If the `TESSDATA_PREFIX` is set to a path, then that path is used to
+  find the `tessdata` directory with language and script recognition
+  models and config files.
+  Using <<TESSDATADIR,*--tessdata-dir* 'PATH'>> is the recommended alternative.
+*`OMP_THREAD_LIMIT`*::
+  If the `tesseract` executable was built with multithreading support,
+  it will normally use four CPU cores for the OCR process. While this
+  can be faster for a single image, it gives bad performance if the host
+  computer provides less than four CPU cores or if OCR is made for many images.
+  Only a single CPU core is used with `OMP_THREAD_LIMIT=1`.
 
 
 HISTORY
@@ -391,7 +409,7 @@ Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more
 changes made in 1996 to port to Windows, and some $$C++$$izing in 1998. A
 lot of the code was written in C, and then some more was written in $$C++$$.
 The $$C++$$ code makes heavy use of a list system using macros. This predates
-stl, was portable before stl, and is more efficient than stl lists, but has
+STL, was portable before STL, and is more efficient than STL lists, but has
 the big negative that if you do get a segmentation violation, it is hard to
 debug.
 
@@ -399,7 +417,8 @@ Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability
 to train Tesseract.
 
 Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy.
-See <https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf>. With Tesseract 2.00,
+See <https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf>.
+Since Tesseract 2.00,
 scripts are now included to allow anyone to reproduce some of these tests.
 See <https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract> for more
 details.

From 26b4457b868b1571986b3279ca8b571ad6bbe363 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Sat, 16 Mar 2019 09:24:40 +0100
Subject: [PATCH 3/5] Add description for new --psm values in tesseract man
 page

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 doc/tesseract.1.asc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc
index b741b7a1..decdd074 100644
--- a/doc/tesseract.1.asc
+++ b/doc/tesseract.1.asc
@@ -75,6 +75,10 @@ OPTIONS
   8 = Treat the image as a single word.
   9 = Treat the image as a single word in a circle.
   10 = Treat the image as a single character.
+  11 = Sparse text. Find as much text as possible in no particular order.
+  12 = Sparse text with OSD.
+  13 = Raw line. Treat the image as a single text line,
+       bypassing hacks that are Tesseract-specific.
 
 *--oem* 'N'::
   Specify OCR Engine mode. The options for 'N' are:

From b55984fb88d3a6eb89929ca4394b0b4e03fccab5 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Sat, 16 Mar 2019 09:33:41 +0100
Subject: [PATCH 4/5] Add description for new --dpi option in tesseract man
 page

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 doc/tesseract.1.asc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc
index decdd074..e54c0d3f 100644
--- a/doc/tesseract.1.asc
+++ b/doc/tesseract.1.asc
@@ -52,6 +52,12 @@ OPTIONS
 *-c* 'CONFIGVAR=VALUE'::
   Set value for parameter 'CONFIGVAR' to VALUE. Multiple *-c* arguments are allowed.
 
+*--dpi* 'N'::
+  Specify the resolution 'N' in DPI for the input image(s).
+  A typical value for 'N' is `300`. Without this option,
+  the resolution is read from the metadata included in the image.
+  If an image does not include that information, Tesseract tries to guess it.
+
 *-l* 'LANG'::
 *-l* 'SCRIPT'::
   The language or script to use.

From 5f76a8495b2c38e10ee1cf8fcee637ab16edb073 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Sat, 16 Mar 2019 10:19:00 +0100
Subject: [PATCH 5/5] Sort options alphabetically in tesseract man page

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 doc/tesseract.1.asc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc
index e54c0d3f..d8dc479d 100644
--- a/doc/tesseract.1.asc
+++ b/doc/tesseract.1.asc
@@ -37,18 +37,9 @@ IN/OUT ARGUMENTS
   If 'OUTPUTBASE' is `stdout` or `-` then the standard output is used.
 
 
+[[TESSDATADIR]]
 OPTIONS
 -------
-[[TESSDATADIR]]
-*--tessdata-dir* 'PATH'::
-  Specify the location of tessdata path.
-
-*--user-words* 'FILE'::
-  Specify the location of user words file.
-
-*--user-patterns* 'FILE'::
-  Specify the location of user patterns file.
-
 *-c* 'CONFIGVAR=VALUE'::
   Set value for parameter 'CONFIGVAR' to VALUE. Multiple *-c* arguments are allowed.
 
@@ -94,6 +85,15 @@ OPTIONS
   2 = Tesseract + LSTM.
   3 = Default, based on what is available.
 
+*--tessdata-dir* 'PATH'::
+  Specify the location of tessdata path.
+
+*--user-patterns* 'FILE'::
+  Specify the location of user patterns file.
+
+*--user-words* 'FILE'::
+  Specify the location of user words file.
+
 [[CONFIGFILE]]
 'CONFIGFILE'::
   The name of a config to use. The name can be a file in `tessdata/configs`
@@ -140,7 +140,7 @@ SINGLE OPTIONS
 
 *--list-langs*::
   List available languages for tesseract engine.
-  Can be used with <<TESSDATADIR,*--tessdata-dir* 'PATH'>>.
+  Can be used with *--tessdata-dir* 'PATH'.
 
 *--print-parameters*::
   Print tesseract parameters.