mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 11:09:06 +08:00
add --xsize as parameter for tesstrain
This commit is contained in:
parent
1ac76d8825
commit
4d9bc11fd3
@ -17,28 +17,28 @@
|
||||
|
||||
display_usage() {
|
||||
echo -e "USAGE: tesstrain.sh
|
||||
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
|
||||
--fontlist FONTS # A list of fontnames to train on.
|
||||
--fonts_dir FONTS_PATH # Path to font files.
|
||||
--lang LANG_CODE # ISO 639 code.
|
||||
--langdata_dir DATADIR # Path to tesseract/training/langdata directory.
|
||||
--output_dir OUTPUTDIR # Location of output traineddata file.
|
||||
--save_box_tiff # Save box/tiff pairs along with lstmf files.
|
||||
--overwrite # Safe to overwrite files in output_dir.
|
||||
--linedata_only # Only generate training data for lstmtraining.
|
||||
--output_dir OUTPUTDIR # Location of output traineddata file.
|
||||
--overwrite # Safe to overwrite files in output_dir.
|
||||
--run_shape_clustering # Run shape clustering (use for Indic langs).
|
||||
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
|
||||
|
||||
--maxpages # Specify maximum pages to output (default:0=all)
|
||||
--save_box_tiff # Save box/tiff pairs along with lstmf files.
|
||||
--x_size # Specify width of output image (default:3600)
|
||||
|
||||
OPTIONAL flags for input data. If unspecified we will look for them in
|
||||
the langdata_dir directory.
|
||||
--training_text TEXTFILE # Text to render and use for training.
|
||||
--wordlist WORDFILE # Word list for the language ordered by
|
||||
# decreasing frequency.
|
||||
|
||||
OPTIONAL flag to specify location of existing traineddata files, required
|
||||
during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
|
||||
the current environment.
|
||||
--tessdata_dir TESSDATADIR # Path to tesseract/tessdata directory.
|
||||
|
||||
NOTE:
|
||||
The font names specified in --fontlist need to be recognizable by Pango using
|
||||
fontconfig. An easy way to list the canonical names of all fonts available on
|
||||
|
@ -33,6 +33,7 @@ else
|
||||
FONTS_DIR="/usr/share/fonts/"
|
||||
fi
|
||||
|
||||
X_SIZE=3600
|
||||
MAX_PAGES=0
|
||||
SAVE_BOX_TIFF=0
|
||||
OUTPUT_DIR="/tmp/tesstrain/tessdata"
|
||||
@ -185,6 +186,9 @@ parse_flags() {
|
||||
FONT_CONFIG_CACHE=$WORKSPACE_DIR/fc-cache
|
||||
mkdir -p $FONT_CONFIG_CACHE
|
||||
i=$j ;;
|
||||
--x_size)
|
||||
parse_value "X_SIZE" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
*)
|
||||
err_exit "Unrecognized argument ${ARGV[$i]}" ;;
|
||||
esac
|
||||
@ -246,7 +250,7 @@ generate_font_image() {
|
||||
|
||||
local common_args="--fontconfig_tmpdir=${FONT_CONFIG_CACHE}"
|
||||
common_args+=" --fonts_dir=${FONTS_DIR} --strip_unrenderable_words"
|
||||
common_args+=" --leading=${LEADING}"
|
||||
common_args+=" --leading=${LEADING} --xsize=${X_SIZE}"
|
||||
common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
|
||||
common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user