Merge pull request #2329 from Shreeshrii/kur_train

training script changes
This commit is contained in:
zdenop 2019-03-16 10:27:35 +01:00 committed by GitHub
commit 1b40cae0f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 4 deletions

View File

@ -22,7 +22,7 @@ VALID_LANGUAGE_CODES="afr amh ara asm aze aze_cyrl bel ben bih bod bos bul cat
ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo
ell eng enm epo est eus fas fil fin fra frk frm gle glg ell eng enm epo est eus fas fil fin fra frk frm gle glg
grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old
jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat jav jav_java jpn kan kat kat_old kaz khm kir kmr kor kur_ara lao lat
lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori
pan pol por pus ron rus san sin slk slv snd spa spa_old pan pol por pus ron rus san sin slk slv snd spa spa_old
sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur
@ -1164,7 +1164,8 @@ set_lang_specific_parameters() {
test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;; test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" ) kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" )
TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;; TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;;
kur ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;; kmr ) test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" ) ;;
kur_ara ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;
*) err_exit "Error: ${lang} is not a valid language code" *) err_exit "Error: ${lang} is not a valid language code"
esac esac

View File

@ -30,7 +30,7 @@ VALID_LANGUAGE_CODES = (
"ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo " "ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo "
"ell eng enm epo est eus fas fil fin fra frk frm gle glg " "ell eng enm epo est eus fas fil fin fra frk frm gle glg "
"grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old " "grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old "
"jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat " "jav jav_java jpn kan kat kat_old kaz khm kir kmr kor kur_ara lao lat "
"lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori " "lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori "
"pan pol por pus ron rus san sin slk slv snd spa spa_old " "pan pol por pus ron rus san sin slk slv snd spa spa_old "
"sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur " "sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur "
@ -1302,7 +1302,10 @@ def set_lang_specific_parameters(ctx, lang):
if not FONTS: if not FONTS:
FONTS = KYRGYZ_FONTS FONTS = KYRGYZ_FONTS
TRAINING_DATA_ARGUMENTS += ["--infrequent_ratio=100"] TRAINING_DATA_ARGUMENTS += ["--infrequent_ratio=100"]
elif lang == "kur": elif lang == "kmr":
if not FONTS:
FONTS = LATIN_FONTS
elif lang == "kur_ara":
if not FONTS: if not FONTS:
FONTS = KURDISH_FONTS FONTS = KURDISH_FONTS
else: else:

View File

@ -172,6 +172,10 @@ parser.add_argument(
"--noextract_font_properties", dest="extract_font_properties", action="store_false" "--noextract_font_properties", dest="extract_font_properties", action="store_false"
) )
parser.add_argument(
"--distort_image", dest="distort_image", help="--distort_image=true."
)
tessdata_group = parser.add_argument_group( tessdata_group = parser.add_argument_group(
"tessdata", "tessdata",
"OPTIONAL flag to specify location of existing traineddata files, required during feature extraction. If unspecified will use TESSDATA_PREFIX defined in the current environment.", "OPTIONAL flag to specify location of existing traineddata files, required during feature extraction. If unspecified will use TESSDATA_PREFIX defined in the current environment.",
@ -310,6 +314,7 @@ def generate_font_image(ctx, font, exposure, char_spacing):
f"--exposure={exposure}", f"--exposure={exposure}",
f"--outputbase={outbase}", f"--outputbase={outbase}",
f"--max_pages={ctx.max_pages}", f"--max_pages={ctx.max_pages}",
f"--distort_image={ctx.distort_image}",
] ]
# add --writing_mode=vertical-upright to common_args if the font is # add --writing_mode=vertical-upright to common_args if the font is