mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-14 00:31:47 +08:00
Merge pull request #2329 from Shreeshrii/kur_train
training script changes
This commit is contained in:
commit
1b40cae0f2
@ -22,7 +22,7 @@ VALID_LANGUAGE_CODES="afr amh ara asm aze aze_cyrl bel ben bih bod bos bul cat
|
|||||||
ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo
|
ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo
|
||||||
ell eng enm epo est eus fas fil fin fra frk frm gle glg
|
ell eng enm epo est eus fas fil fin fra frk frm gle glg
|
||||||
grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old
|
grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old
|
||||||
jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat
|
jav jav_java jpn kan kat kat_old kaz khm kir kmr kor kur_ara lao lat
|
||||||
lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori
|
lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori
|
||||||
pan pol por pus ron rus san sin slk slv snd spa spa_old
|
pan pol por pus ron rus san sin slk slv snd spa spa_old
|
||||||
sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur
|
sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur
|
||||||
@ -1164,7 +1164,8 @@ set_lang_specific_parameters() {
|
|||||||
test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
|
test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
|
||||||
kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" )
|
kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" )
|
||||||
TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;;
|
TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;;
|
||||||
kur ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;
|
kmr ) test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" ) ;;
|
||||||
|
kur_ara ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;
|
||||||
|
|
||||||
*) err_exit "Error: ${lang} is not a valid language code"
|
*) err_exit "Error: ${lang} is not a valid language code"
|
||||||
esac
|
esac
|
||||||
|
@ -30,7 +30,7 @@ VALID_LANGUAGE_CODES = (
|
|||||||
"ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo "
|
"ceb ces chi_sim chi_tra chr cym cyr_lid dan deu div dzo "
|
||||||
"ell eng enm epo est eus fas fil fin fra frk frm gle glg "
|
"ell eng enm epo est eus fas fil fin fra frk frm gle glg "
|
||||||
"grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old "
|
"grc guj hat heb hin hrv hun hye iast iku ind isl ita ita_old "
|
||||||
"jav jav_java jpn kan kat kat_old kaz khm kir kor kur lao lat "
|
"jav jav_java jpn kan kat kat_old kaz khm kir kmr kor kur_ara lao lat "
|
||||||
"lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori "
|
"lat_lid lav lit mal mar mkd mlt msa mya nep nld nor ori "
|
||||||
"pan pol por pus ron rus san sin slk slv snd spa spa_old "
|
"pan pol por pus ron rus san sin slk slv snd spa spa_old "
|
||||||
"sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur "
|
"sqi srp srp_latn swa swe syr tam tel tgk tgl tha tir tur "
|
||||||
@ -1302,7 +1302,10 @@ def set_lang_specific_parameters(ctx, lang):
|
|||||||
if not FONTS:
|
if not FONTS:
|
||||||
FONTS = KYRGYZ_FONTS
|
FONTS = KYRGYZ_FONTS
|
||||||
TRAINING_DATA_ARGUMENTS += ["--infrequent_ratio=100"]
|
TRAINING_DATA_ARGUMENTS += ["--infrequent_ratio=100"]
|
||||||
elif lang == "kur":
|
elif lang == "kmr":
|
||||||
|
if not FONTS:
|
||||||
|
FONTS = LATIN_FONTS
|
||||||
|
elif lang == "kur_ara":
|
||||||
if not FONTS:
|
if not FONTS:
|
||||||
FONTS = KURDISH_FONTS
|
FONTS = KURDISH_FONTS
|
||||||
else:
|
else:
|
||||||
|
@ -172,6 +172,10 @@ parser.add_argument(
|
|||||||
"--noextract_font_properties", dest="extract_font_properties", action="store_false"
|
"--noextract_font_properties", dest="extract_font_properties", action="store_false"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--distort_image", dest="distort_image", help="--distort_image=true."
|
||||||
|
)
|
||||||
|
|
||||||
tessdata_group = parser.add_argument_group(
|
tessdata_group = parser.add_argument_group(
|
||||||
"tessdata",
|
"tessdata",
|
||||||
"OPTIONAL flag to specify location of existing traineddata files, required during feature extraction. If unspecified will use TESSDATA_PREFIX defined in the current environment.",
|
"OPTIONAL flag to specify location of existing traineddata files, required during feature extraction. If unspecified will use TESSDATA_PREFIX defined in the current environment.",
|
||||||
@ -310,6 +314,7 @@ def generate_font_image(ctx, font, exposure, char_spacing):
|
|||||||
f"--exposure={exposure}",
|
f"--exposure={exposure}",
|
||||||
f"--outputbase={outbase}",
|
f"--outputbase={outbase}",
|
||||||
f"--max_pages={ctx.max_pages}",
|
f"--max_pages={ctx.max_pages}",
|
||||||
|
f"--distort_image={ctx.distort_image}",
|
||||||
]
|
]
|
||||||
|
|
||||||
# add --writing_mode=vertical-upright to common_args if the font is
|
# add --writing_mode=vertical-upright to common_args if the font is
|
||||||
|
Loading…
Reference in New Issue
Block a user