Created Data Files in tessdata_fast (markdown)

Shreeshrii 2018-02-19 19:52:40 +05:30
parent 9bcc35c707
commit 21f3bcbcad

@ -0,0 +1,178 @@
## Data Files for Version 4.00
We have three sets of .traineddata files for tesseract on GitHub in three separate repositories.
* https://github.com/tesseract-ocr/tessdata_fast (September 15, 2017)
* https://github.com/tesseract-ocr/tessdata_best (September 15, 2017)
* https://github.com/tesseract-ocr/tessdata (November 2016)
When using the models in the **`tessdata_best`** and **`tessdata_fast`** repositories, only the new LSTM-based OCR engine is supported. The legacy tesseract engine is NOT supported with these files, so Tesseract's oem modes '0' and '2' won't work with them.
## Information specific to tessdata_fast
Most users will use **`tessdata_fast`** as that is what will be shipped as part of Linux distributions.
### Version string : 4.00.00alpha : [Network specification]
```
afr : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
amh : synth20170629
Arabic : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
ara : synth20170629
Armenian : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
asm : synth20170629
aze_cyrl : synth20170629
aze : synth20170629
bel : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
Bengali : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
ben : synth20170629
bod : synth20170629
bos : synth20170629
bre : synth20170629
bul : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
Canadian_Aboriginal : synth20170629
cat : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx192O1c1]
ceb : synth20170629
ces : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx192O1c1]
Cherokee : synth20170629
chi_sim : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
chi_sim_vert : synth20170629
chi_tra : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
chi_tra_vert : synth20170629
chr : synth20170629
cos : synth20170629
cym : synth20170629
Cyrillic : synth20170629
dan : synth20170629
deu : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx192O1c1]
Devanagari : synth20170629
div : synth20170629
dzo : synth20170629
ell : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
eng : synth20170629
enm : synth20170629
epo : synth20170629
est : synth20170629
Ethiopic : synth20170629
eus : synth20170629
fao : synth20170629
fas : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
fil : synth20170629
fin : synth20170629
Fraktur : synth20170629
fra : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
frk : synth20170629
frm : synth20170629
fry : synth20170629
Georgian : synth20170629
gla : synth20170629
gle : synth20170629
glg : synth20170629
grc : synth20170629
Greek : synth20170629
Gujarati : synth20170629
guj : synth20170629
Gurmukhi : synth20170629
Hangul : synth20170629
Hangul_vert : synth20170629
HanS : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
HanS_vert : synth20170629
HanT : synth20170629
HanT_vert : synth20170629
hat : synth20170629
Hebrew : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
heb : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
hin : synth20170629
hrv : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx192O1c1]
hun : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx192O1c1]
hye : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
iku : synth20170629
ind : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
isl : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
ita_old : synth20170629
ita : synth20170629
Japanese : synth20170629
Japanese_vert : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
jav : synth20170629
jpn : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
jpn_vert : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
Kannada : synth20170629
kan : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
kat_old : synth20170629
kat : synth20170629
kaz : synth20170629
Khmer : synth20170629
khm : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
kir : synth20170629
kor : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
kor_vert : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx192O1c1]
kur_ara : synth20170629
lao : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
Lao : synth20170629
Latin : synth20170629
lat : synth20170629
lav : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
lit : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx192O1c1]
ltz : synth20170629
Malayalam : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
mal : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
mar : synth20170629
mkd : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx192O1c1]
mlt : synth20170629
mon : synth20170629
mri : synth20170629
msa : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
Myanmar : synth20170629
mya : synth20170629
nep : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
nld : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx192O1c1]
nor : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx384O1c1]
oci : synth20170629
ori : synth20170629
Oriya : synth20170629
pan : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
pol : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
por : synth20170629
pus : synth20170629
que : synth20170629
ron : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
rus : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
san : synth20170629
Sinhala : synth20170629
sin : synth20170629
slk : synth20170629
slv : synth20170629
snd : synth20170629
spa_old : synth20170629
spa : synth20170629
sqi : synth20170629
srp_latn : synth20170629
srp : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
sun : synth20170629
swa : synth20170629
swe : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
Syriac : synth20170629
syr : synth20170629
Tamil : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx192O1c1]
tam : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
tat : synth20170629
tel : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
Telugu : synth20170629 : [1,48,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
tgk : synth20170629
Thaana : synth20170629
Thai : synth20170629
tha : synth20170629
Tibetan : synth20170629
tir : synth20170629
ton : synth20170629
tur : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx128O1c1]
uig : synth20170629
ukr : synth20170629
urd : synth20170629
uzb_cyrl : synth20170629
uzb : synth20170629
Vietnamese : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx512O1c1]
vie : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys48Lfx96Lrx96Lfx192O1c1]
yid : synth20170629 : [1,36,0,1Ct3,3,16Mp3,3Lfys64Lfx96Lrx96Lfx128O1c1]
yor : synth20170629
```