mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 06:53:36 +08:00
Fix some unbound variables and other small issues in training shell scripts
Fix also the logging helper functions to work without log file. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
a4b03fbb27
commit
acca4fb999
@ -603,7 +603,7 @@ BURMESE_FONTS=( \
|
||||
"Padauk" \
|
||||
"TharLon" \
|
||||
)
|
||||
|
||||
|
||||
JAVANESE_FONTS=( \
|
||||
"Prada" \
|
||||
)
|
||||
@ -909,7 +909,8 @@ set_lang_specific_parameters() {
|
||||
# Language to mix with the language for maximum accuracy. Defaults to eng.
|
||||
# If no language is good, set to the base language.
|
||||
MIX_LANG="eng"
|
||||
FONTS=
|
||||
EXPOSURES=${EXPOSURES:-}
|
||||
FONTS=${FONTS:-}
|
||||
|
||||
case ${lang} in
|
||||
# Latin languages.
|
||||
@ -1176,7 +1177,7 @@ set_lang_specific_parameters() {
|
||||
test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" )
|
||||
|
||||
# Default to 0 exposure if it hasn't been set
|
||||
test -z "${EXPOSURES:-}" && EXPOSURES=0
|
||||
test -z "$EXPOSURES" && EXPOSURES=0
|
||||
# Set right-to-left and normalization mode.
|
||||
case "${LANG_CODE}" in
|
||||
ara | div| fas | pus | snd | syr | uig | urd | kur_ara | heb | yid )
|
||||
|
@ -47,11 +47,19 @@ TESSDATA_PREFIX=${TESSDATA_PREFIX:-}
|
||||
|
||||
# Logging helper functions.
|
||||
tlog() {
|
||||
echo -e $* 2>&1 1>&2 | tee -a ${LOG_FILE}
|
||||
if test -z "${LOG_FILE:-}"; then
|
||||
echo -e $*
|
||||
else
|
||||
echo -e $* | tee -a ${LOG_FILE}
|
||||
fi
|
||||
}
|
||||
|
||||
err_exit() {
|
||||
echo -e "ERROR: "$* 2>&1 1>&2 | tee -a ${LOG_FILE}
|
||||
if test -z "${LOG_FILE:-}"; then
|
||||
echo -e "ERROR: "$*
|
||||
else
|
||||
echo -e "ERROR: "$* | tee -a ${LOG_FILE}
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
|
||||
@ -95,8 +103,8 @@ check_file_readable() {
|
||||
# if it looks like a flag.
|
||||
# Usage: parse_value VAR_NAME VALUE
|
||||
parse_value() {
|
||||
local val="$2"
|
||||
if [[ -z $val ]]; then
|
||||
local val="${2:-}"
|
||||
if [[ -z "$val" ]]; then
|
||||
err_exit "Missing value for variable $1"
|
||||
exit
|
||||
fi
|
||||
@ -137,19 +145,19 @@ parse_flags() {
|
||||
parse_value "EXPOSURES" "$exp"
|
||||
i=$((j-1)) ;;
|
||||
--fonts_dir)
|
||||
parse_value "FONTS_DIR" ${ARGV[$j]}
|
||||
parse_value "FONTS_DIR" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--lang)
|
||||
parse_value "LANG_CODE" ${ARGV[$j]}
|
||||
parse_value "LANG_CODE" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--langdata_dir)
|
||||
parse_value "LANGDATA_ROOT" ${ARGV[$j]}
|
||||
parse_value "LANGDATA_ROOT" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--maxpages)
|
||||
parse_value "MAX_PAGES" ${ARGV[$j]}
|
||||
parse_value "MAX_PAGES" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--output_dir)
|
||||
parse_value "OUTPUT_DIR" ${ARGV[$j]}
|
||||
parse_value "OUTPUT_DIR" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--overwrite)
|
||||
OVERWRITE=1 ;;
|
||||
@ -162,18 +170,18 @@ parse_flags() {
|
||||
--noextract_font_properties)
|
||||
EXTRACT_FONT_PROPERTIES=0 ;;
|
||||
--tessdata_dir)
|
||||
parse_value "TESSDATA_DIR" ${ARGV[$j]}
|
||||
parse_value "TESSDATA_DIR" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--training_text)
|
||||
parse_value "TRAINING_TEXT" "${ARGV[$j]}"
|
||||
parse_value "TRAINING_TEXT" "${ARGV[$j]:-}"
|
||||
i=$j ;;
|
||||
--wordlist)
|
||||
parse_value "WORDLIST_FILE" ${ARGV[$j]}
|
||||
parse_value "WORDLIST_FILE" ${ARGV[$j]:-}
|
||||
i=$j ;;
|
||||
--workspace_dir)
|
||||
rmdir "$FONT_CONFIG_CACHE"
|
||||
rmdir "$WORKSPACE_DIR"
|
||||
parse_value "WORKSPACE_DIR" ${ARGV[$j]}
|
||||
parse_value "WORKSPACE_DIR" ${ARGV[$j]:-}
|
||||
FONT_CONFIG_CACHE=$WORKSPACE_DIR/fc-cache
|
||||
mkdir -p $FONT_CONFIG_CACHE
|
||||
i=$j ;;
|
||||
@ -182,13 +190,13 @@ parse_flags() {
|
||||
esac
|
||||
i=$((i+1))
|
||||
done
|
||||
if [[ -z ${LANG_CODE} ]]; then
|
||||
if [[ -z ${LANG_CODE:-} ]]; then
|
||||
err_exit "Need to specify a language --lang"
|
||||
fi
|
||||
if [[ -z ${LANGDATA_ROOT} ]]; then
|
||||
if [[ -z ${LANGDATA_ROOT:-} ]]; then
|
||||
err_exit "Need to specify path to language files --langdata_dir"
|
||||
fi
|
||||
if [[ -z ${TESSDATA_DIR} ]]; then
|
||||
if [[ -z ${TESSDATA_DIR:-} ]]; then
|
||||
if [[ -z ${TESSDATA_PREFIX} ]]; then
|
||||
err_exit "Need to specify a --tessdata_dir or have a "\
|
||||
"TESSDATA_PREFIX variable defined in your environment"
|
||||
@ -267,13 +275,13 @@ generate_font_image() {
|
||||
|
||||
# Phase I : Generate (I)mages from training text for each font.
|
||||
phase_I_generate_image() {
|
||||
local par_factor=$1
|
||||
local par_factor=${1:-}
|
||||
if [[ -z ${par_factor} || ${par_factor} -le 0 ]]; then
|
||||
par_factor=1
|
||||
fi
|
||||
tlog "\n=== Phase I: Generating training images ==="
|
||||
if [[ -z ${TRAINING_TEXT} ]] || [[ ! -r ${TRAINING_TEXT} ]]; then
|
||||
err_exit "Could not find training text file ${TRAINING_TEXT}"
|
||||
if [[ -z ${TRAINING_TEXT:-} ]] || test ! -r "${TRAINING_TEXT}"; then
|
||||
err_exit "Could not find training text file ${TRAINING_TEXT:-}"
|
||||
fi
|
||||
CHAR_SPACING="0.0"
|
||||
|
||||
@ -545,7 +553,7 @@ make__lstmdata() {
|
||||
--puncs "${lang_prefix}.punc" \
|
||||
--output_dir "${OUTPUT_DIR}" --lang "${LANG_CODE}" \
|
||||
"${pass_through}" "${lang_is_rtl}"
|
||||
|
||||
|
||||
if ((SAVE_BOX_TIFF)); then
|
||||
tlog "\n=== Saving box/tiff pairs for training data ==="
|
||||
for f in "${TRAINING_DIR}/${LANG_CODE}".*.box; do
|
||||
|
Loading…
Reference in New Issue
Block a user