add --xsize as parameter for tesstrain

This commit is contained in:
Shree Devi Kumar 2019-01-27 07:00:25 +00:00
parent 1ac76d8825
commit 4d9bc11fd3
2 changed files with 12 additions and 8 deletions

View File

@ -17,28 +17,28 @@
display_usage() {
echo -e "USAGE: tesstrain.sh
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
--fontlist FONTS # A list of fontnames to train on.
--fonts_dir FONTS_PATH # Path to font files.
--lang LANG_CODE # ISO 639 code.
--langdata_dir DATADIR # Path to tesseract/training/langdata directory.
--output_dir OUTPUTDIR # Location of output traineddata file.
--save_box_tiff # Save box/tiff pairs along with lstmf files.
--overwrite # Safe to overwrite files in output_dir.
--linedata_only # Only generate training data for lstmtraining.
--output_dir OUTPUTDIR # Location of output traineddata file.
--overwrite # Safe to overwrite files in output_dir.
--run_shape_clustering # Run shape clustering (use for Indic langs).
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
--maxpages # Specify maximum pages to output (default:0=all)
--save_box_tiff # Save box/tiff pairs along with lstmf files.
--x_size # Specify width of output image (default:3600)
OPTIONAL flags for input data. If unspecified we will look for them in
the langdata_dir directory.
--training_text TEXTFILE # Text to render and use for training.
--wordlist WORDFILE # Word list for the language ordered by
# decreasing frequency.
OPTIONAL flag to specify location of existing traineddata files, required
during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
the current environment.
--tessdata_dir TESSDATADIR # Path to tesseract/tessdata directory.
NOTE:
The font names specified in --fontlist need to be recognizable by Pango using
fontconfig. An easy way to list the canonical names of all fonts available on

View File

@ -33,6 +33,7 @@ else
FONTS_DIR="/usr/share/fonts/"
fi
X_SIZE=3600
MAX_PAGES=0
SAVE_BOX_TIFF=0
OUTPUT_DIR="/tmp/tesstrain/tessdata"
@ -185,6 +186,9 @@ parse_flags() {
FONT_CONFIG_CACHE=$WORKSPACE_DIR/fc-cache
mkdir -p $FONT_CONFIG_CACHE
i=$j ;;
--x_size)
parse_value "X_SIZE" ${ARGV[$j]:-}
i=$j ;;
*)
err_exit "Unrecognized argument ${ARGV[$i]}" ;;
esac
@ -246,7 +250,7 @@ generate_font_image() {
local common_args="--fontconfig_tmpdir=${FONT_CONFIG_CACHE}"
common_args+=" --fonts_dir=${FONTS_DIR} --strip_unrenderable_words"
common_args+=" --leading=${LEADING}"
common_args+=" --leading=${LEADING} --xsize=${X_SIZE}"
common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}"