mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 06:53:36 +08:00
Merge pull request #2329 from Shreeshrii/kur_train
training script changes
This commit is contained in:
commit
1b40cae0f2
@ -22,7 +22,7 @@ VALID_LANGUAGE_CODES="afr amh ara asm aze aze_cyrl bel ben bih bod bos bul cat
|
||||
ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo
|
||||
ell eng enm epo est eus fas fil fin fra frk frm gle glg
|
||||
grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old
|
||||
jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat
|
||||
jav jav_java jpn kan kat kat_old kaz khm kir kmr kor kur_ara lao lat
|
||||
lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori
|
||||
pan pol por pus ron rus san sin slk slv snd spa spa_old
|
||||
sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur
|
||||
@ -1164,7 +1164,8 @@ set_lang_specific_parameters() {
|
||||
test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
|
||||
kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" )
|
||||
TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;;
|
||||
kur ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;
|
||||
kmr ) test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" ) ;;
|
||||
kur_ara ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;
|
||||
|
||||
*) err_exit "Error: ${lang} is not a valid language code"
|
||||
esac
|
||||
|
@ -30,7 +30,7 @@ VALID_LANGUAGE_CODES = (
|
||||
"ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo "
|
||||
"ell eng enm epo est eus fas fil fin fra frk frm gle glg "
|
||||
"grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old "
|
||||
"jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat "
|
||||
"jav jav_java jpn kan kat kat_old kaz khm kir kmr kor kur_ara lao lat "
|
||||
"lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori "
|
||||
"pan pol por pus ron rus san sin slk slv snd spa spa_old "
|
||||
"sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur "
|
||||
@ -1302,7 +1302,10 @@ def set_lang_specific_parameters(ctx, lang):
|
||||
if not FONTS:
|
||||
FONTS = KYRGYZ_FONTS
|
||||
TRAINING_DATA_ARGUMENTS += ["--infrequent_ratio=100"]
|
||||
elif lang == "kur":
|
||||
elif lang == "kmr":
|
||||
if not FONTS:
|
||||
FONTS = LATIN_FONTS
|
||||
elif lang == "kur_ara":
|
||||
if not FONTS:
|
||||
FONTS = KURDISH_FONTS
|
||||
else:
|
||||
|
@ -172,6 +172,10 @@ parser.add_argument(
|
||||
"--noextract_font_properties", dest="extract_font_properties", action="store_false"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--distort_image", dest="distort_image", help="--distort_image=true."
|
||||
)
|
||||
|
||||
tessdata_group = parser.add_argument_group(
|
||||
"tessdata",
|
||||
"OPTIONAL flag to specify location of existing traineddata files, required during feature extraction. If unspecified will use TESSDATA_PREFIX defined in the current environment.",
|
||||
@ -310,6 +314,7 @@ def generate_font_image(ctx, font, exposure, char_spacing):
|
||||
f"--exposure={exposure}",
|
||||
f"--outputbase={outbase}",
|
||||
f"--max_pages={ctx.max_pages}",
|
||||
f"--distort_image={ctx.distort_image}",
|
||||
]
|
||||
|
||||
# add --writing_mode=vertical-upright to common_args if the font is
|
||||
|
Loading…
Reference in New Issue
Block a user