Merge pull request #3327 from Shreeshrii/patch-1

Create unicharset from training text to avoid normalization errors
This commit is contained in:
Egor Pugin 2021-03-14 22:09:47 +03:00 committed by GitHub
commit 2f4f7446b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -353,8 +353,13 @@ phase_UP_generate_unicharset() {
local box_files=$(ls ${TRAINING_DIR}/*.box)
UNICHARSET_FILE="${TRAINING_DIR}/${LANG_CODE}.unicharset"
run_command unicharset_extractor --output_unicharset "${UNICHARSET_FILE}" \
--norm_mode "${NORM_MODE}" ${box_files}
if [[ "${NORM_MODE}" == "2" ]] && [[ "${LANG_IS_RTL}" == "0" ]] ; then
run_command unicharset_extractor --output_unicharset "${UNICHARSET_FILE}" \
--norm_mode "${NORM_MODE}" ${TRAINING_TEXT}
else
run_command unicharset_extractor --output_unicharset "${UNICHARSET_FILE}" \
--norm_mode "${NORM_MODE}" ${box_files}
fi
check_file_readable ${UNICHARSET_FILE}
XHEIGHTS_FILE="${TRAINING_DIR}/${LANG_CODE}.xheights"