mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-28 05:13:49 +08:00
allow user specified box/tiff pairs with tesstrain.sh
This commit is contained in:
parent
31c48a04d4
commit
323361b902
@ -28,7 +28,15 @@ echo -e "USAGE: tesstrain.sh
|
||||
--run_shape_clustering # Run shape clustering (use for Indic langs).
|
||||
--maxpages # Specify maximum pages to output (default:0=all)
|
||||
--save_box_tiff # Save box/tiff pairs along with lstmf files.
|
||||
<<<<<<< HEAD
|
||||
--x_size # Specify width of output image (default:3600)
|
||||
=======
|
||||
--xsize # Specify width of output image (default:3600)
|
||||
|
||||
OPTIONAL flag for specifying directory with user specified box/tiff pairs.
|
||||
Files should be named similar to ${LANG_CODE}.${fontname}.exp${EXPOSURE}.box/tif
|
||||
--my_boxtiff_dir MY_BOXTIFF_DIR # Location of user specified box/tiff files.
|
||||
>>>>>>> c7cd112... allow box/tiff pairs for LSTM training
|
||||
|
||||
OPTIONAL flags for input data. If unspecified we will look for them in
|
||||
the langdata_dir directory.
|
||||
@ -60,6 +68,14 @@ ARGV=("$@")
|
||||
parse_flags
|
||||
|
||||
mkdir -p ${TRAINING_DIR}
|
||||
|
||||
if [[ ${MY_BOXTIFF_DIR} != "" ]]; then
|
||||
tlog "\n=== Copy existing box/tiff pairs from '${MY_BOXTIFF_DIR}'"
|
||||
cp ${MY_BOXTIFF_DIR}/*.box ${TRAINING_DIR} | true
|
||||
cp ${MY_BOXTIFF_DIR}/*.tif ${TRAINING_DIR} | true
|
||||
ls -l ${TRAINING_DIR}
|
||||
fi
|
||||
|
||||
tlog "\n=== Starting training for language '${LANG_CODE}'"
|
||||
|
||||
source "$(dirname $0)/language-specific.sh"
|
||||
@ -72,8 +88,8 @@ phase_UP_generate_unicharset
|
||||
if ((LINEDATA)); then
|
||||
phase_E_extract_features " --psm 6 lstm.train " 8 "lstmf"
|
||||
make__lstmdata
|
||||
tlog "\nCreated starter traineddata for language '${LANG_CODE}'\n"
|
||||
tlog "\nRun lstmtraining to do the LSTM training for language '${LANG_CODE}'\n"
|
||||
tlog "\nCreated starter traineddata for LSTM training of language '${LANG_CODE}'\n"
|
||||
tlog "\nRun 'lstmtraining' comman next to continue LSTM training for language '${LANG_CODE}'\n"
|
||||
else
|
||||
phase_D_generate_dawg
|
||||
phase_E_extract_features "box.train" 8 "tr"
|
||||
|
@ -36,6 +36,7 @@ fi
|
||||
X_SIZE=3600
|
||||
MAX_PAGES=0
|
||||
SAVE_BOX_TIFF=0
|
||||
MY_BOXTIFF_DIR=""
|
||||
OUTPUT_DIR="/tmp/tesstrain/tessdata"
|
||||
OVERWRITE=0
|
||||
LINEDATA=0
|
||||
@ -157,6 +158,9 @@ parse_flags() {
|
||||
--maxpages)
|
||||
parse_value "MAX_PAGES" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--my_boxtiff_dir)
|
||||
parse_value "MY_BOXTIFF_DIR" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--output_dir)
|
||||
parse_value "OUTPUT_DIR" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
|
Loading…
Reference in New Issue
Block a user