move testing and testdata to test, add unlvtests
@ -24,7 +24,7 @@ SUBDIRS += src/ccmain src/api . tessdata doc unittest
|
||||
|
||||
EXTRA_DIST = README.md\
|
||||
aclocal.m4 config configure.ac autogen.sh contrib \
|
||||
tesseract.pc.in $(TRAINING_SUBDIR) java doc testing
|
||||
tesseract.pc.in $(TRAINING_SUBDIR) java doc unlvtests
|
||||
|
||||
DIST_SUBDIRS = $(SUBDIRS) $(TRAINING_SUBDIR)
|
||||
|
||||
|
@ -493,7 +493,7 @@ AC_CONFIG_FILES([src/wordrec/Makefile])
|
||||
AC_CONFIG_FILES([tessdata/Makefile])
|
||||
AC_CONFIG_FILES([tessdata/configs/Makefile])
|
||||
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])
|
||||
AC_CONFIG_FILES([testing/Makefile])
|
||||
AC_CONFIG_FILES([unlvtests/Makefile])
|
||||
AC_CONFIG_FILES([unittest/Makefile])
|
||||
AC_CONFIG_FILES([java/Makefile])
|
||||
AC_CONFIG_FILES([java/com/Makefile])
|
||||
|
2
test
@ -1 +1 @@
|
||||
Subproject commit 44f9e247303e4e33a1f51940e1a32885959c4dd4
|
||||
Subproject commit 5fc323c76c409cb21962a4182cb893d3da3040ba
|
5075
testdata/chi_sim.unicharset
vendored
6220
testdata/chi_tra.unicharset
vendored
112
testdata/eng.unicharset
vendored
@ -1,112 +0,0 @@
|
||||
111
|
||||
NULL 0 NULL 0
|
||||
I 5 59,68,216,255,10,155,0,50,29,173 Latin 7 0 1 I # I [49 ]A
|
||||
' 10 148,225,216,255,11,51,0,97,36,173 Common 2 10 2 ' # ' [27 ]p
|
||||
v 3 59,68,187,197,84,173,0,32,84,218 Latin 61 0 3 v # v [76 ]a
|
||||
e 3 58,64,189,200,87,154,0,32,98,188 Latin 88 0 4 e # e [65 ]a
|
||||
J 5 0,64,216,255,39,242,0,30,62,234 Latin 79 0 5 J # J [4a ]A
|
||||
o 3 58,66,188,200,87,151,0,32,98,185 Latin 83 0 6 o # o [6f ]a
|
||||
i 3 59,69,216,255,11,141,0,54,27,173 Latin 1 0 7 i # i [69 ]a
|
||||
n 3 59,68,188,202,87,187,0,25,101,208 Latin 45 0 8 n # n [6e ]a
|
||||
| 0 0,67,216,255,8,73,0,80,50,173 Common 9 10 9 | # | [7c ]
|
||||
- 10 105,161,122,175,49,176,0,43,56,215 Common 10 3 10 - # - [2d ]p
|
||||
S 5 57,64,219,255,87,174,0,30,100,200 Latin 26 0 11 S # S [53 ]A
|
||||
z 3 46,68,186,199,65,151,0,32,68,173 Latin 95 0 12 z # z [7a ]a
|
||||
: 10 58,85,141,221,11,69,0,67,38,173 Common 13 6 13 : # : [3a ]p
|
||||
# 10 37,84,200,255,99,221,0,41,109,266 Common 14 4 14 # # # [23 ]p
|
||||
6 8 58,66,219,255,87,156,0,54,104,173 Common 15 2 15 6 # 6 [36 ]0
|
||||
% 10 27,67,205,255,105,257,0,49,117,288 Common 16 4 16 % # % [25 ]p
|
||||
5 8 12,66,199,255,82,160,0,36,103,173 Common 17 2 17 5 # 5 [35 ]0
|
||||
0 8 58,66,187,255,88,164,0,45,103,180 Common 18 2 18 0 # 0 [30 ]0
|
||||
@ 10 0,65,211,255,99,286,0,39,117,291 Common 19 10 19 @ # @ [40 ]p
|
||||
p 3 0,47,192,226,87,180,0,25,100,200 Latin 68 0 20 p # p [70 ]a
|
||||
a 3 58,65,186,200,85,164,0,26,97,185 Latin 67 0 21 a # a [61 ]a
|
||||
r 3 59,68,186,202,58,173,0,40,69,180 Latin 40 0 22 r # r [72 ]a
|
||||
m 3 56,68,189,202,108,280,0,25,117,306 Latin 38 0 23 m # m [6d ]a
|
||||
F 5 57,68,216,255,68,210,0,31,77,209 Latin 29 0 24 F # F [46 ]A
|
||||
u 3 57,65,187,202,85,184,0,39,100,208 Latin 85 0 25 u # u [75 ]a
|
||||
s 3 58,65,192,200,78,147,0,30,91,173 Latin 11 0 26 s # s [73 ]a
|
||||
B 5 62,68,216,255,91,227,0,27,106,227 Latin 46 0 27 B # B [42 ]A
|
||||
» 10 0,133,146,235,63,284,0,32,71,294 Common 28 10 49 » # » [bb ]p
|
||||
f 3 0,68,216,255,54,175,0,42,55,193 Latin 24 0 29 f # f [66 ]a
|
||||
d 3 57,65,216,255,88,174,0,28,100,200 Latin 59 0 30 d # d [64 ]a
|
||||
c 3 58,64,192,200,80,153,0,36,88,178 Latin 33 0 31 c # c [63 ]a
|
||||
h 3 59,68,216,255,87,187,0,25,101,208 Latin 55 0 32 h # h [68 ]a
|
||||
C 5 58,65,219,255,87,192,0,32,107,209 Latin 31 0 33 C # C [43 ]A
|
||||
t 3 58,66,206,254,57,167,0,47,59,180 Latin 37 0 34 t # t [74 ]a
|
||||
L 5 59,68,216,255,64,193,0,31,74,206 Latin 41 0 35 L # L [4c ]A
|
||||
? 10 40,67,219,255,59,144,0,65,77,188 Common 36 10 36 ? # ? [3f ]p
|
||||
T 5 59,68,216,255,85,227,0,47,88,236 Latin 34 0 37 T # T [54 ]A
|
||||
M 5 57,68,216,255,99,301,0,35,117,286 Latin 23 0 38 M # M [4d ]A
|
||||
y 3 0,47,187,202,87,199,0,25,87,230 Latin 100 0 39 y # y [79 ]a
|
||||
R 5 57,68,216,255,88,227,0,27,104,232 Latin 22 0 40 R # R [52 ]A
|
||||
l 3 59,68,216,255,11,147,0,56,27,173 Latin 35 0 41 l # l [6c ]a
|
||||
~ 0 91,229,135,255,73,174,0,41,0,200 Common 42 10 42 ~ # ~ [7e ]
|
||||
< 0 29,102,173,255,69,184,0,50,90,256 Common 43 10 76 < # < [3c ]
|
||||
® 0 28,163,209,255,83,223,0,48,92,257 Common 44 10 44 ® # ® [ae ]
|
||||
N 5 59,68,216,255,87,262,0,27,104,249 Latin 8 0 45 N # N [4e ]A
|
||||
b 3 58,64,216,255,87,180,0,25,100,200 Latin 27 0 46 b # b [62 ]a
|
||||
k 3 57,68,216,255,85,177,0,35,93,198 Latin 101 0 47 k # k [6b ]a
|
||||
[ 10 8,64,216,255,39,136,0,80,55,173 Common 48 10 70 [ # [ [5b ]p
|
||||
« 10 26,133,148,235,63,279,0,35,71,281 Common 49 10 28 « # « [ab ]p
|
||||
1 8 49,69,192,255,45,128,0,66,74,173 Common 50 2 50 1 # 1 [31 ]0
|
||||
, 10 14,46,79,115,17,78,0,58,30,173 Common 51 6 51 , # , [2c ]p
|
||||
. 10 26,67,73,112,13,51,0,67,30,173 Common 52 6 52 . # . [2e ]p
|
||||
” 10 141,233,216,255,59,141,0,87,66,298 Common 53 10 53 " # ” [201d ]p
|
||||
g 3 0,43,188,212,88,176,0,32,100,210 Latin 93 0 54 g # g [67 ]a
|
||||
H 5 59,68,216,255,91,258,0,27,107,244 Latin 32 0 55 H # H [48 ]A
|
||||
$ 0 24,63,229,255,85,174,0,36,106,174 Common 56 4 56 $ # $ [24 ]
|
||||
( 10 0,64,216,255,42,118,0,97,61,173 Common 57 10 94 ( # ( [28 ]p
|
||||
+ 0 54,102,171,253,90,176,0,37,103,213 Common 58 3 58 + # + [2b ]
|
||||
D 5 59,68,216,255,93,230,0,27,107,236 Latin 30 0 59 D # D [44 ]A
|
||||
w 3 59,68,187,195,108,235,0,32,117,286 Latin 103 0 60 w # w [77 ]a
|
||||
V 5 59,68,216,255,103,207,0,41,101,245 Latin 3 0 61 V # V [56 ]A
|
||||
£ 0 0,135,219,255,64,201,0,55,61,298 Common 62 4 62 £ # £ [a3 ]
|
||||
4 8 0,68,198,255,93,161,0,41,96,173 Common 63 2 63 4 # 4 [34 ]0
|
||||
9 8 0,66,200,255,89,156,0,39,104,173 Common 64 2 64 9 # 9 [39 ]0
|
||||
Q 5 7,64,219,255,91,205,0,30,106,227 Latin 96 0 65 Q # Q [51 ]A
|
||||
& 10 53,64,194,255,108,232,0,47,112,239 Common 66 10 66 & # & [26 ]p
|
||||
A 5 52,68,216,255,100,216,0,17,98,231 Latin 21 0 67 A # A [41 ]A
|
||||
P 5 57,68,216,255,87,225,0,32,97,230 Latin 20 0 68 P # P [50 ]A
|
||||
¢ 0 14,158,190,255,56,144,0,72,61,270 Common 69 4 69 ¢ # ¢ [a2 ]
|
||||
] 10 8,64,216,255,39,129,0,44,55,173 Common 70 10 48 ] # ] [5d ]p
|
||||
3 8 0,66,196,255,84,158,0,32,103,173 Common 71 2 71 3 # 3 [33 ]0
|
||||
2 8 30,69,194,255,80,160,0,27,97,173 Common 72 2 72 2 # 2 [32 ]0
|
||||
© 0 28,125,209,255,118,232,0,32,119,257 Common 73 10 73 © # © [a9 ]
|
||||
8 8 57,66,219,255,88,162,0,41,103,174 Common 74 2 74 8 # 8 [38 ]0
|
||||
/ 10 0,65,219,255,59,228,0,36,62,238 Common 75 6 75 / # / [2f ]p
|
||||
> 0 29,102,173,255,78,184,0,50,90,256 Common 76 10 43 > # > [3e ]
|
||||
X 5 59,68,216,255,94,275,0,25,93,256 Latin 86 0 77 X # X [58 ]A
|
||||
é 3 0,64,222,255,87,384,0,32,98,391 Latin 78 0 78 é # é [e9 ]a
|
||||
j 3 0,47,216,255,36,145,0,49,50,173 Latin 5 0 79 j # j [6a ]a
|
||||
; 10 14,56,131,221,17,93,0,58,38,173 Common 80 10 80 ; # ; [3b ]p
|
||||
7 8 12,68,196,255,72,160,0,60,75,173 Common 81 2 81 7 # 7 [37 ]0
|
||||
€ 0 32,68,209,255,97,238,0,49,103,293 Common 82 4 82 € # € [20ac ]
|
||||
O 5 57,64,219,255,91,209,0,34,106,233 Latin 6 0 83 O # O [4f ]A
|
||||
¥ 0 59,75,209,255,91,238,0,52,91,270 Common 84 4 84 ¥ # ¥ [a5 ]
|
||||
U 5 58,64,216,255,91,214,0,39,106,220 Latin 25 0 85 U # U [55 ]A
|
||||
x 3 59,68,187,201,85,189,0,25,84,218 Latin 77 0 86 x # x [78 ]a
|
||||
} 10 0,44,216,255,54,148,0,56,59,173 Common 87 10 97 } # } [7d ]p
|
||||
E 5 59,68,216,255,68,210,0,31,80,219 Latin 4 0 88 E # E [45 ]A
|
||||
§ 0 9,66,219,255,82,207,0,86,93,293 Common 89 10 89 § # § [a7 ]
|
||||
= 0 74,139,144,199,90,186,0,32,103,224 Common 90 10 90 = # = [3d ]
|
||||
! 10 41,67,216,255,11,87,0,71,50,173 Common 91 10 91 ! # ! [21 ]p
|
||||
’ 10 141,233,212,255,17,78,0,109,30,298 Common 92 10 92 ' # ’ [2019 ]p
|
||||
G 5 58,64,219,255,91,230,0,30,106,230 Latin 54 0 93 G # G [47 ]A
|
||||
) 10 0,64,216,255,42,119,0,53,61,173 Common 94 10 57 ) # ) [29 ]p
|
||||
Z 5 64,68,216,255,72,218,0,30,77,236 Latin 12 0 95 Z # Z [5a ]A
|
||||
q 3 0,47,192,202,88,196,0,30,100,200 Latin 65 0 96 q # q [71 ]a
|
||||
{ 10 0,44,216,255,54,148,0,71,59,173 Common 97 10 87 { # { [7b ]p
|
||||
“ 10 141,233,216,255,56,133,0,172,66,298 Common 98 10 98 " # “ [201c ]p
|
||||
— 10 110,155,132,167,126,297,0,23,136,298 Common 99 10 99 - # — [2014 ]p
|
||||
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 39 0 100 Y # Y [59 ]A
|
||||
K 5 57,68,216,255,92,225,0,37,103,216 Latin 47 0 101 K # K [4b ]A
|
||||
* 10 78,183,188,255,49,134,0,60,53,173 Common 102 10 102 * # * [2a ]p
|
||||
W 5 54,68,216,255,106,314,0,41,117,318 Latin 60 0 103 W # W [57 ]A
|
||||
" 10 151,225,216,255,52,115,0,71,71,173 Common 104 10 104 " # " [22 ]p
|
||||
\ 10 0,67,219,255,28,250,0,71,62,261 Common 105 10 105 \ # \ [5c ]p
|
||||
° 0 66,247,209,255,22,399,0,98,66,409 Common 106 4 106 ° # ° [b0 ]
|
||||
fi 3 0,71,216,255,87,202,0,28,105,199 Latin 107 0 107 fi # fi [fb01 ]a
|
||||
‘ 10 141,233,210,255,17,64,0,216,30,298 Common 108 10 108 ' # ‘ [2018 ]p
|
||||
_ 10 0,50,0,64,73,248,0,29,75,259 Common 109 10 109 _ # _ [5f ]p
|
||||
fl 3 0,71,216,255,87,219,0,28,105,236 Latin 110 0 110 fl # fl [fb02 ]a
|
3707
testdata/jpn.unicharset
vendored
2841
testdata/kan.unicharset
vendored
1651
testdata/kor.unicharset
vendored
2198
testdata/mar.unicharset
vendored
227
testdata/por.unicharset
vendored
@ -1,227 +0,0 @@
|
||||
226
|
||||
NULL 0 NULL 0
|
||||
Joined 7 0,69,188,255,486,1218,0,30,486,1188 Latin 22 0 64 Joined # Joined [4a 6f 69 6e 65 64 ]a
|
||||
|Broken|0|1 f 0,69,186,255,892,2138,0,80,892,2058 Common 101 10 101 |Broken|0|1 # Broken
|
||||
T 5 59,68,216,255,85,227,0,47,88,236 Latin 46 0 3 T # T [54 ]A
|
||||
u 3 57,65,187,202,85,184,0,39,100,208 Latin 47 0 4 u # u [75 ]a
|
||||
d 3 57,65,216,255,88,174,0,28,100,200 Latin 23 0 5 d # d [64 ]a
|
||||
o 3 58,66,188,200,87,151,0,32,98,185 Latin 28 0 6 o # o [6f ]a
|
||||
S 5 57,64,219,255,87,174,0,30,100,200 Latin 15 0 7 S # S [53 ]A
|
||||
C 5 58,65,219,255,87,192,0,32,107,209 Latin 31 0 8 C # C [43 ]A
|
||||
R 5 57,68,216,255,88,227,0,27,104,232 Latin 20 0 9 R # R [52 ]A
|
||||
A 5 52,68,216,255,100,216,0,17,98,231 Latin 14 0 10 A # A [41 ]A
|
||||
P 5 57,68,216,255,87,225,0,32,97,230 Latin 37 0 11 P # P [50 ]A
|
||||
g 3 0,43,188,212,88,176,0,32,100,210 Latin 76 0 12 g # g [67 ]a
|
||||
i 3 59,69,216,255,11,141,0,54,27,173 Latin 27 0 13 i # i [69 ]a
|
||||
a 3 58,65,186,200,85,164,0,26,97,185 Latin 10 0 14 a # a [61 ]a
|
||||
s 3 58,65,192,200,78,147,0,30,91,173 Latin 7 0 15 s # s [73 ]a
|
||||
2 8 30,69,194,255,80,160,0,27,97,173 Common 16 2 16 2 # 2 [32 ]0
|
||||
0 8 58,66,187,255,88,164,0,45,103,180 Common 17 2 17 0 # 0 [30 ]0
|
||||
8 8 57,66,219,255,88,162,0,41,103,174 Common 18 2 18 8 # 8 [38 ]0
|
||||
M 5 57,68,216,255,99,301,0,35,117,286 Latin 35 0 19 M # M [4d ]A
|
||||
r 3 59,68,186,202,58,173,0,40,69,180 Latin 9 0 20 r # r [72 ]a
|
||||
y 3 0,47,187,202,87,199,0,25,87,230 Latin 109 0 21 y # y [79 ]a
|
||||
j 3 0,47,216,255,36,145,0,49,50,173 Latin 64 0 22 j # j [6a ]a
|
||||
D 5 59,68,216,255,93,230,0,27,107,236 Latin 5 0 23 D # D [44 ]A
|
||||
w 3 59,68,187,195,108,235,0,32,117,286 Latin 108 0 24 w # w [77 ]a
|
||||
n 3 59,68,188,202,87,187,0,25,101,208 Latin 49 0 25 n # n [6e ]a
|
||||
É 5 59,68,232,255,68,314,0,31,80,325 Latin 59 0 26 É # É [c9 ]A
|
||||
I 5 59,68,216,255,10,155,0,50,29,173 Latin 13 0 27 I # I [49 ]A
|
||||
O 5 57,64,219,255,91,209,0,34,106,233 Latin 6 0 28 O # O [4f ]A
|
||||
“ 10 141,233,216,255,56,133,0,172,66,298 Common 29 10 29 " # “ [201c ]p
|
||||
l 3 59,68,216,255,11,147,0,56,27,173 Latin 48 0 30 l # l [6c ]a
|
||||
c 3 58,64,192,200,80,153,0,36,88,178 Latin 8 0 31 c # c [63 ]a
|
||||
e 3 58,64,189,200,87,154,0,32,98,188 Latin 33 0 32 e # e [65 ]a
|
||||
E 5 59,68,216,255,68,210,0,31,80,219 Latin 32 0 33 E # E [45 ]A
|
||||
â 3 0,64,222,255,85,256,0,26,97,256 Latin 34 0 34 â # â [e2 ]a
|
||||
m 3 56,68,189,202,108,280,0,25,117,306 Latin 19 0 35 m # m [6d ]a
|
||||
h 3 59,68,216,255,87,187,0,25,101,208 Latin 97 0 36 h # h [68 ]a
|
||||
p 3 0,47,192,226,87,180,0,25,100,200 Latin 11 0 37 p # p [70 ]a
|
||||
B 5 62,68,216,255,91,227,0,27,106,227 Latin 45 0 38 B # B [42 ]A
|
||||
. 10 26,67,73,112,13,51,0,67,30,173 Common 39 6 39 . # . [2e ]p
|
||||
£ 0 0,135,219,255,64,201,0,55,61,298 Common 40 4 40 £ # £ [a3 ]
|
||||
ó 3 0,64,222,255,87,192,0,32,98,197 Latin 73 0 41 ó # ó [f3 ]a
|
||||
1 8 49,69,192,255,45,128,0,66,74,173 Common 42 2 42 1 # 1 [31 ]0
|
||||
6 8 58,66,219,255,87,156,0,54,104,173 Common 43 2 43 6 # 6 [36 ]0
|
||||
4 8 0,68,198,255,93,161,0,41,96,173 Common 44 2 44 4 # 4 [34 ]0
|
||||
b 3 58,64,216,255,87,180,0,25,100,200 Latin 38 0 45 b # b [62 ]a
|
||||
t 3 58,66,206,254,57,167,0,47,59,180 Latin 3 0 46 t # t [74 ]a
|
||||
U 5 58,64,216,255,91,214,0,39,106,220 Latin 4 0 47 U # U [55 ]A
|
||||
L 5 59,68,216,255,64,193,0,31,74,206 Latin 30 0 48 L # L [4c ]A
|
||||
N 5 59,68,216,255,87,262,0,27,104,249 Latin 25 0 49 N # N [4e ]A
|
||||
V 5 59,68,216,255,103,207,0,41,101,245 Latin 56 0 50 V # V [56 ]A
|
||||
” 10 141,233,216,255,59,141,0,87,66,298 Common 51 10 51 " # ” [201d ]p
|
||||
) 10 0,64,216,255,42,119,0,53,61,173 Common 52 10 86 ) # ) [29 ]p
|
||||
, 10 14,46,79,115,17,78,0,58,30,173 Common 53 6 53 , # , [2c ]p
|
||||
k 3 57,68,216,255,85,177,0,35,93,198 Latin 57 0 54 k # k [6b ]a
|
||||
: 10 58,85,141,221,11,69,0,67,38,173 Common 55 6 55 : # : [3a ]p
|
||||
v 3 59,68,187,197,84,173,0,32,84,218 Latin 50 0 56 v # v [76 ]a
|
||||
K 5 57,68,216,255,92,225,0,37,103,216 Latin 54 0 57 K # K [4b ]A
|
||||
í 3 62,69,222,255,40,279,0,54,47,286 Latin 92 0 58 í # í [ed ]a
|
||||
é 3 0,64,222,255,87,384,0,32,98,391 Latin 26 0 59 é # é [e9 ]a
|
||||
ê 3 0,64,222,255,87,409,0,32,98,407 Latin 99 0 60 ê # ê [ea ]a
|
||||
Ç 5 0,64,217,255,87,286,0,32,107,296 Latin 82 0 61 Ç # Ç [c7 ]A
|
||||
@ 10 0,65,211,255,99,286,0,39,117,291 Common 62 10 62 @ # @ [40 ]p
|
||||
Ú 5 0,64,232,255,91,294,0,39,106,291 Latin 107 0 63 Ú # Ú [da ]A
|
||||
J 5 0,64,216,255,39,242,0,30,62,234 Latin 22 0 64 J # J [4a ]A
|
||||
€ 0 32,68,209,255,97,238,0,49,103,293 Common 65 4 65 € # € [20ac ]
|
||||
9 8 0,66,200,255,89,156,0,39,104,173 Common 66 2 66 9 # 9 [39 ]0
|
||||
5 8 12,66,199,255,82,160,0,36,103,173 Common 67 2 67 5 # 5 [35 ]0
|
||||
& 10 53,64,194,255,108,232,0,47,112,239 Common 68 10 68 & # & [26 ]p
|
||||
x 3 59,68,187,201,85,189,0,25,84,218 Latin 89 0 69 x # x [78 ]a
|
||||
/ 10 0,65,219,255,59,228,0,36,62,238 Common 70 6 70 / # / [2f ]p
|
||||
² 0 3,192,209,255,50,248,0,105,0,293 Common 71 2 71 2 # ² [b2 ]
|
||||
F 5 57,68,216,255,68,210,0,31,77,209 Latin 84 0 72 F # F [46 ]A
|
||||
Ó 5 0,64,232,255,91,276,0,34,106,286 Latin 41 0 73 Ó # Ó [d3 ]A
|
||||
3 8 0,66,196,255,84,158,0,32,103,173 Common 74 2 74 3 # 3 [33 ]0
|
||||
z 3 46,68,186,199,65,151,0,32,68,173 Latin 112 0 75 z # z [7a ]a
|
||||
G 5 58,64,219,255,91,230,0,30,106,230 Latin 12 0 76 G # G [47 ]A
|
||||
á 3 0,64,222,255,85,414,0,26,97,412 Latin 100 0 77 á # á [e1 ]a
|
||||
- 10 105,161,122,175,49,176,0,43,56,215 Common 78 3 78 - # - [2d ]p
|
||||
? 10 40,67,219,255,59,144,0,65,77,188 Common 79 10 79 ? # ? [3f ]p
|
||||
! 10 41,67,216,255,11,87,0,71,50,173 Common 80 10 80 ! # ! [21 ]p
|
||||
q 3 0,47,192,202,88,196,0,30,100,200 Latin 116 0 81 q # q [71 ]a
|
||||
ç 3 0,31,182,232,80,299,0,36,96,309 Latin 61 0 82 ç # ç [e7 ]a
|
||||
ã 3 0,64,224,255,85,279,0,26,97,289 Latin 96 0 83 ã # ã [e3 ]a
|
||||
f 3 0,68,216,255,54,175,0,42,55,193 Latin 72 0 84 f # f [66 ]a
|
||||
+ 0 54,102,171,253,90,176,0,37,103,213 Common 85 3 85 + # + [2b ]
|
||||
( 10 0,64,216,255,42,118,0,97,61,173 Common 86 10 52 ( # ( [28 ]p
|
||||
' 10 148,225,216,255,11,51,0,97,36,173 Common 87 10 87 ' # ' [27 ]p
|
||||
; 10 14,56,131,221,17,93,0,58,38,173 Common 88 10 88 ; # ; [3b ]p
|
||||
X 5 59,68,216,255,94,275,0,25,93,256 Latin 69 0 89 X # X [58 ]A
|
||||
* 10 78,183,188,255,49,134,0,60,53,173 Common 90 10 90 * # * [2a ]p
|
||||
º 3 64,187,188,255,51,189,0,81,64,293 Latin 91 0 91 o # º [ba ]a
|
||||
Í 5 64,68,232,255,35,197,0,48,55,207 Latin 58 0 92 Í # Í [cd ]A
|
||||
³ 0 0,192,209,255,48,268,0,99,0,293 Common 93 2 93 3 # ³ [b3 ]
|
||||
› 10 64,101,142,215,32,100,0,84,37,173 Common 94 10 94 › # › [203a ]p
|
||||
ª 3 64,187,207,255,51,286,0,71,62,296 Latin 95 0 95 a # ª [aa ]a
|
||||
à 5 6,68,232,255,100,204,0,25,98,245 Latin 83 0 96 à # à [c3 ]A
|
||||
H 5 59,68,216,255,91,258,0,27,107,244 Latin 36 0 97 H # H [48 ]A
|
||||
# 10 37,84,200,255,99,221,0,41,109,266 Common 98 4 98 # # # [23 ]p
|
||||
Ê 5 0,68,232,255,72,266,0,31,80,276 Latin 60 0 99 Ê # Ê [ca ]A
|
||||
Á 5 64,68,232,255,100,203,0,29,98,245 Latin 77 0 100 Á # Á [c1 ]A
|
||||
| 0 0,67,216,255,8,73,0,80,50,173 Common 101 10 101 | # | [7c ]
|
||||
" 10 151,225,216,255,52,115,0,71,71,173 Common 102 10 102 " # " [22 ]p
|
||||
> 0 29,102,173,255,78,184,0,50,90,256 Common 103 10 111 > # > [3e ]
|
||||
à 3 0,64,222,255,85,407,0,26,97,407 Latin 104 0 104 à # à [e0 ]a
|
||||
õ 3 58,66,224,255,87,194,0,32,98,204 Latin 105 0 105 õ # õ [f5 ]a
|
||||
« 10 26,133,148,235,63,279,0,35,71,281 Common 106 10 126 « # « [ab ]p
|
||||
ú 3 0,65,222,255,85,212,0,39,100,212 Latin 63 0 107 ú # ú [fa ]a
|
||||
W 5 54,68,216,255,106,314,0,41,117,318 Latin 24 0 108 W # W [57 ]A
|
||||
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 21 0 109 Y # Y [59 ]A
|
||||
7 8 12,68,196,255,72,160,0,60,75,173 Common 110 2 110 7 # 7 [37 ]0
|
||||
< 0 29,102,173,255,69,184,0,50,90,256 Common 111 10 103 < # < [3c ]
|
||||
Z 5 64,68,216,255,72,218,0,30,77,236 Latin 75 0 112 Z # Z [5a ]A
|
||||
¹ 0 64,192,209,255,24,279,1,119,0,293 Common 113 2 113 1 # ¹ [b9 ]
|
||||
ü 3 0,65,219,255,85,220,0,39,100,225 Latin 114 0 114 ü # ü [fc ]a
|
||||
_ 10 0,50,0,64,73,248,0,29,75,259 Common 115 10 115 _ # _ [5f ]p
|
||||
Q 5 7,64,219,255,91,205,0,30,106,227 Latin 81 0 116 Q # Q [51 ]A
|
||||
… 10 60,143,79,232,101,332,0,45,107,337 Common 117 10 117 ... # … [2026 ]p
|
||||
¡ 10 0,66,185,255,11,176,0,125,49,293 Common 118 10 118 ¡ # ¡ [a1 ]p
|
||||
$ 0 24,63,229,255,85,174,0,36,106,174 Common 119 4 119 $ # $ [24 ]
|
||||
© 0 28,125,209,255,118,232,0,32,119,257 Common 120 10 120 © # © [a9 ]
|
||||
[ 10 8,64,216,255,39,136,0,80,55,173 Common 121 10 123 [ # [ [5b ]p
|
||||
% 10 27,67,205,255,105,257,0,49,117,288 Common 122 4 122 % # % [25 ]p
|
||||
] 10 8,64,216,255,39,129,0,44,55,173 Common 123 10 121 ] # ] [5d ]p
|
||||
= 0 74,139,144,199,90,186,0,32,103,224 Common 124 10 124 = # = [3d ]
|
||||
₂ 0 10,67,113,172,50,118,0,105,77,293 Common 125 2 125 2 # ₂ [2082 ]
|
||||
» 10 0,133,146,235,63,284,0,32,71,294 Common 126 10 106 » # » [bb ]p
|
||||
⁴ 0 115,163,227,255,63,131,0,101,77,293 Common 127 2 127 4 # ⁴ [2074 ]
|
||||
ô 3 58,66,222,255,87,192,0,32,98,202 Latin 128 0 128 ô # ô [f4 ]a
|
||||
° 0 66,247,209,255,22,399,0,98,66,409 Common 129 4 129 ° # ° [b0 ]
|
||||
₄ 0 12,67,115,170,62,131,0,97,77,293 Common 130 2 130 4 # ₄ [2084 ]
|
||||
₃ 0 8,67,113,172,52,106,0,103,77,293 Common 131 2 131 3 # ₃ [2083 ]
|
||||
₁ 0 10,67,113,172,36,78,0,108,77,293 Common 132 2 132 1 # ₁ [2081 ]
|
||||
fl 3 0,68,216,255,82,408,0,42,82,366 Latin 72 0 84 fl # fl [66 6c ]a
|
||||
fi 3 0,69,216,255,82,408,0,42,82,366 Latin 72 0 84 fi # fi [66 69 ]a
|
||||
... 10 26,67,73,112,90,586,0,67,90,519 Common 39 6 39 ... # ... [2e 2e 2e ]p
|
||||
ff 3 0,68,216,255,110,428,0,42,110,386 Latin 72 0 84 ff # ff [66 66 ]a
|
||||
⁸ 0 124,151,229,255,56,102,0,53,75,173 Common 137 2 137 8 # ⁸ [2078 ]
|
||||
⁶ 0 124,151,229,255,56,99,0,56,77,173 Common 138 2 138 6 # ⁶ [2076 ]
|
||||
⁹ 0 126,153,230,255,56,104,0,57,77,173 Common 139 2 139 9 # ⁹ [2079 ]
|
||||
⁵ 0 124,153,227,255,50,104,0,51,75,173 Common 140 2 140 5 # ⁵ [2075 ]
|
||||
⁷ 0 128,153,227,255,52,106,0,58,77,173 Common 141 2 141 7 # ⁷ [2077 ]
|
||||
⁰ 0 124,151,229,255,56,102,0,53,77,173 Common 142 2 142 0 # ⁰ [2070 ]
|
||||
₆ 0 10,65,118,172,56,99,0,56,77,173 Common 143 2 143 6 # ₆ [2086 ]
|
||||
₉ 0 10,65,118,172,56,104,0,57,77,173 Common 144 2 144 9 # ₉ [2089 ]
|
||||
₀ 0 8,65,119,172,56,102,0,53,77,173 Common 145 2 145 0 # ₀ [2080 ]
|
||||
₅ 0 8,65,113,170,50,98,0,51,77,173 Common 146 2 146 5 # ₅ [2085 ]
|
||||
₈ 0 8,65,118,172,59,102,0,53,75,173 Common 147 2 147 8 # ₈ [2088 ]
|
||||
ffi 3 0,69,216,255,137,601,0,42,137,559 Latin 72 0 84 ffi # ffi [66 66 69 ]a
|
||||
₇ 0 10,67,115,170,52,92,0,60,77,173 Common 149 2 149 7 # ₇ [2087 ]
|
||||
Th 7 59,68,216,255,189,491,0,47,189,444 Latin 46 0 3 Th # Th [54 68 ]a
|
||||
ft 3 0,68,206,255,114,415,0,42,114,373 Latin 72 0 84 ft # ft [66 74 ]a
|
||||
ffl 3 0,68,216,255,137,601,0,42,137,559 Latin 72 0 84 ffl # ffl [66 66 6c ]a
|
||||
NJ 5 0,68,216,255,166,510,0,27,166,483 Latin 25 0 49 NJ # NJ [4e 4a ]A
|
||||
ij 3 0,69,216,255,77,400,0,54,77,346 Latin 27 0 13 ij # ij [69 6a ]a
|
||||
tt 3 58,66,206,254,118,407,0,47,118,360 Latin 3 0 46 tt # tt [74 74 ]a
|
||||
ti 3 58,69,206,255,86,400,0,47,86,353 Latin 3 0 46 ti # ti [74 69 ]a
|
||||
it 3 58,69,206,255,86,407,0,54,86,353 Latin 27 0 13 it # it [69 74 ]a
|
||||
sc 3 58,65,192,200,179,381,0,30,179,351 Latin 7 0 15 sc # sc [73 63 ]a
|
||||
rt 3 58,68,186,254,128,400,0,40,128,360 Latin 9 0 20 rt # rt [72 74 ]a
|
||||
es 3 58,65,189,200,189,393,0,32,189,361 Latin 33 0 32 es # es [65 73 ]a
|
||||
ee 3 58,64,189,200,196,408,0,32,196,376 Latin 33 0 32 ee # ee [65 65 ]a
|
||||
th 3 58,68,206,255,160,435,0,47,160,388 Latin 3 0 46 th # th [74 68 ]a
|
||||
st 3 58,66,192,254,150,383,0,30,150,353 Latin 7 0 15 st # st [73 74 ]a
|
||||
ch 3 58,68,192,255,189,422,0,36,189,386 Latin 8 0 31 ch # ch [63 68 ]a
|
||||
et 3 58,66,189,254,157,400,0,32,157,368 Latin 33 0 32 et # et [65 74 ]a
|
||||
sh 3 58,68,192,255,192,411,0,30,192,381 Latin 7 0 15 sh # sh [73 68 ]a
|
||||
il 3 59,69,216,255,54,400,0,54,54,346 Latin 27 0 13 il # il [69 6c ]a
|
||||
ot 3 58,66,188,254,157,397,0,32,157,365 Latin 28 0 6 ot # ot [6f 74 ]a
|
||||
ge 3 0,64,188,212,198,430,0,32,198,398 Latin 76 0 12 ge # ge [67 65 ]a
|
||||
sp 3 0,65,192,226,191,403,0,30,191,373 Latin 7 0 15 sp # sp [73 70 ]a
|
||||
di 3 57,69,216,255,127,401,0,28,127,373 Latin 23 0 5 di # di [64 69 ]a
|
||||
fü 3 0,68,216,255,155,460,0,42,155,418 Latin 72 0 84 fü # fü [66 fc ]a
|
||||
ss 3 58,65,192,200,182,376,0,30,182,346 Latin 7 0 15 ss # ss [73 73 ]a
|
||||
pp 3 0,47,192,226,200,425,0,25,200,400 Latin 11 0 37 pp # pp [70 70 ]a
|
||||
pt 3 0,66,192,254,159,405,0,25,159,380 Latin 11 0 37 pt # pt [70 74 ]a
|
||||
sl 3 58,68,192,255,118,376,0,30,118,346 Latin 7 0 15 sl # sl [73 6c ]a
|
||||
sf 3 0,68,192,255,146,396,0,30,146,366 Latin 7 0 15 sf # sf [73 66 ]a
|
||||
cc 3 58,64,192,200,176,392,0,36,176,356 Latin 8 0 31 cc # cc [63 63 ]a
|
||||
ll 3 59,68,216,255,54,402,0,56,54,346 Latin 48 0 30 ll # ll [6c 6c ]a
|
||||
ct 3 58,66,192,254,147,394,0,36,147,358 Latin 8 0 31 ct # ct [63 74 ]a
|
||||
rr 3 59,68,186,202,138,400,0,40,138,360 Latin 9 0 20 rr # rr [72 72 ]a
|
||||
aa 3 58,65,186,200,194,396,0,26,194,370 Latin 10 0 14 aa # aa [61 61 ]a
|
||||
fu 3 0,68,187,255,155,443,0,42,155,401 Latin 72 0 84 fu # fu [66 75 ]a
|
||||
ii 3 59,69,216,255,54,400,0,54,54,346 Latin 27 0 13 ii # ii [69 69 ]a
|
||||
ph 3 0,68,192,255,201,433,0,25,201,408 Latin 11 0 37 ph # ph [70 68 ]a
|
||||
gy 3 0,47,187,212,187,472,0,32,187,440 Latin 76 0 12 gy # gy [67 79 ]a
|
||||
fr 3 0,68,186,255,124,415,0,42,124,373 Latin 72 0 84 fr # fr [66 72 ]a
|
||||
dt 3 57,66,206,255,159,408,0,28,159,380 Latin 23 0 5 dt # dt [64 74 ]a
|
||||
cti 3 58,69,192,255,174,567,0,36,174,531 Latin 8 0 31 cti # cti [63 74 69 ]a
|
||||
oo 3 58,66,188,200,196,402,0,32,196,370 Latin 28 0 6 oo # oo [6f 6f ]a
|
||||
sti 3 58,69,192,255,177,556,0,30,177,526 Latin 7 0 15 sti # sti [73 74 69 ]a
|
||||
sk 3 57,68,192,255,184,401,0,30,184,371 Latin 7 0 15 sk # sk [73 6b ]a
|
||||
cs 3 58,65,192,200,179,387,0,36,179,351 Latin 8 0 31 cs # cs [63 73 ]a
|
||||
ooo 3 58,66,188,200,294,587,0,32,294,555 Latin 28 0 6 ooo # ooo [6f 6f 6f ]a
|
||||
ty 3 0,66,187,254,146,457,0,47,146,410 Latin 3 0 46 ty # ty [74 79 ]a
|
||||
tz 3 46,68,186,254,127,400,0,47,127,353 Latin 3 0 46 tz # tz [74 7a ]a
|
||||
fk 3 0,68,216,255,148,433,0,42,148,391 Latin 72 0 84 fk # fk [66 6b ]a
|
||||
ck 3 57,68,192,255,181,412,0,36,181,376 Latin 8 0 31 ck # ck [63 6b ]a
|
||||
gg 3 0,43,188,212,200,452,0,32,200,420 Latin 76 0 12 gg # gg [67 67 ]a
|
||||
°C 5 58,247,209,255,173,716,0,98,173,618 Common 129 4 129 °C # °C [b0 43 ]A
|
||||
!? 10 40,67,216,255,127,432,0,71,127,361 Common 80 10 80 !? # !? [21 3f ]p
|
||||
!! 10 41,67,216,255,100,417,0,71,100,346 Common 80 10 80 !! # !! [21 21 ]p
|
||||
Qu 7 7,65,187,255,206,465,0,30,206,435 Latin 81 0 116 Qu # Qu [51 75 ]a
|
||||
ry 3 0,68,186,202,156,450,0,40,156,410 Latin 9 0 20 ry # ry [72 79 ]a
|
||||
gj 3 0,47,188,255,150,415,0,32,150,383 Latin 76 0 12 gj # gj [67 6a ]a
|
||||
bt 3 58,66,206,255,159,405,0,25,159,380 Latin 38 0 45 bt # bt [62 74 ]a
|
||||
sch 3 58,68,192,255,280,589,0,30,280,559 Latin 7 0 15 sch # sch [73 63 68 ]a
|
||||
SS 5 57,64,219,255,200,430,0,30,200,400 Latin 15 0 7 SS # SS [53 53 ]A
|
||||
AND 5 52,68,216,255,309,733,0,17,309,716 Latin 14 0 10 AND # AND [41 4e 44 ]A
|
||||
ET 5 59,68,216,255,168,486,0,31,168,455 Latin 32 0 33 ET # ET [45 54 ]A
|
||||
UND 5 58,68,216,255,317,744,0,39,317,705 Latin 4 0 47 UND # UND [55 4e 44 ]A
|
||||
fb 3 0,68,216,255,155,435,0,42,155,393 Latin 72 0 84 fb # fb [66 62 ]a
|
||||
fj 3 0,68,216,255,105,408,0,42,105,366 Latin 72 0 84 fj # fj [66 6a ]a
|
||||
nj 3 0,68,188,255,151,406,0,25,151,381 Latin 49 0 25 nj # nj [6e 6a ]a
|
||||
ffb 3 0,68,216,255,210,628,0,42,210,586 Latin 72 0 84 ffb # ffb [66 66 62 ]a
|
||||
fh 3 0,68,216,255,156,443,0,42,156,401 Latin 72 0 84 fh # fh [66 68 ]a
|
||||
or 3 58,68,186,202,167,397,0,32,167,365 Latin 28 0 6 or # or [6f 72 ]a
|
||||
on 3 58,68,188,202,199,425,0,32,199,393 Latin 28 0 6 on # on [6f 6e ]a
|
||||
of 3 0,68,188,255,153,410,0,32,153,378 Latin 28 0 6 of # of [6f 66 ]a
|
||||
om 3 56,68,188,202,215,523,0,32,215,491 Latin 28 0 6 om # om [6f 6d ]a
|
||||
op 3 0,66,188,226,198,417,0,32,198,385 Latin 28 0 6 op # op [6f 70 ]a
|
||||
ou 3 57,66,187,202,198,425,0,32,198,393 Latin 28 0 6 ou # ou [6f 75 ]a
|
||||
fft 3 0,68,206,255,169,608,0,42,169,566 Latin 72 0 84 fft # fft [66 66 74 ]a
|
||||
sb 3 58,65,192,255,191,403,0,30,191,373 Latin 7 0 15 sb # sb [73 62 ]a
|
||||
the 3 58,68,189,255,258,623,0,47,258,576 Latin 3 0 46 the # the [74 68 65 ]a
|
5
testdata/trivial.unicharset
vendored
@ -1,5 +0,0 @@
|
||||
4
|
||||
NULL 0 NULL 0
|
||||
i 3 59,69,216,255,11,141,0,54,27,173 Latin 1 0 1 i # i [69 ]a
|
||||
f 3 0,68,216,255,54,175,0,42,55,193 Latin 2 0 2 f # f [66 ]a
|
||||
fi 3 0,71,216,255,87,202,0,28,105,199 Latin 3 0 3 fi # fi [fb01 ]a
|
Before Width: | Height: | Size: 692 KiB |
@ -1,18 +0,0 @@
|
||||
This file documents the original source of various examples used for testing.
|
||||
|
||||
hebrew.png - Sample from Hebrew OCR with Nikud project by Adi Oz and Vered Shani
|
||||
project URL - http://www.cs.bgu.ac.il/~nlpproj/hocr/
|
||||
direct link to image - http://www.cs.bgu.ac.il/~nlpproj/hocr/images/image00.png
|
||||
|
||||
hebtypo.jpg - Sample from OCR and Hebrew on the Web project at Universiteit van Amsterdam
|
||||
project URL - http://cf.uba.uva.nl/en/collections/rosenthaliana/menasseh/hebtypo.html
|
||||
direct link to image - http://cf.uba.uva.nl/en/collections/rosenthaliana/menasseh/gif/hebtypo.jpg
|
||||
|
||||
DuTillet1004Pg2LG.jpg - Sample from Hebrew Matthew Project with parallel texts in Hebrew & Greek
|
||||
as well as English page/chapter labels with Arabic numerals - test with -l heb+grc+eng
|
||||
project URL - http://www.torahresource.com/Dutillet.html
|
||||
direct link to image - http://www.torahresource.com/DuTillet/DuTillet1004Pg2LG.jpg
|
||||
|
||||
hebrew-nikud-genesis-1-2.png - Genesis 1-2 Hebrew example from OCR forum
|
||||
forum post - https://community.logos.com/forums/p/16124/277997.aspx
|
||||
direct link to image - https://community.logos.com/cfs-filesystemfile.ashx/__key/CommunityServer.Discussions.Components.Files/77/4578.Gen.png
|
@ -1,42 +0,0 @@
|
||||
How to run UNLV tests.
|
||||
|
||||
The scripts in this directory make it possible to duplicate the tests
|
||||
published in the Fourth Annual Test of OCR Accuracy.
|
||||
See http://www.isri.unlv.edu/downloads/AT-1995.pdf
|
||||
but first you have to get the tools and data from UNLV:
|
||||
|
||||
Step 1: to download the images goto
|
||||
http://www.isri.unlv.edu/ISRI/OCRtk
|
||||
and get 3b.tgz, Bb.tgz, Mb.tgz and Nb.tgz.
|
||||
|
||||
Step 2: extract the files. It doesn't really matter where
|
||||
in your filesystem you put them, but they must go under a common
|
||||
root so you have directories 3, B, M and N in, for example,
|
||||
/users/me/ISRI-OCRtk.
|
||||
|
||||
Step 3: Reorg the files
|
||||
The lack of tif extensions on the images is inconvenient, so there
|
||||
is a script to reorganize the data to match the rest of the test
|
||||
scripts.
|
||||
cd to /users/me/ISRI-OCRtk or wherever 3, B, M and N ended up and run
|
||||
/blah/blah/tesseract-ocr/testing/reorgdata.sh 3B
|
||||
This makes directories doe3.3B, bus.3B, mag.3B and news.3B.
|
||||
You can now get rid of 3, B, M, and N unless you want to get some of the
|
||||
other scanning resolutions out of them.
|
||||
|
||||
Step 4: Download the ISRI toolkit from:
|
||||
http://www.isri.unlv.edu/downloads/ftk-1.0.tgz
|
||||
|
||||
Step 5: If they work for you, use the binaries directly from the bin
|
||||
directory and put them in tesseract-ocr/testing/unlv
|
||||
otherwise build the tools for yourself and put them there.
|
||||
|
||||
Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
|
||||
|
||||
Step 7: run testing/runalltests.sh with the root data dir and testname:
|
||||
testing/runalltests.sh /users/me/ISRI-OCRtk tess2.0
|
||||
and go to the gym, have lunch etc.
|
||||
|
||||
Step 8: There should be a file
|
||||
testing/reports/tess2.0.summary that contains the final summarized accuracy
|
||||
report and comparison with the 1995 results.
|
Before Width: | Height: | Size: 598 KiB |
Before Width: | Height: | Size: 522 KiB |
@ -1,12 +0,0 @@
|
||||
The (quick) [brown] {fox} jumps!
|
||||
Over the $43,456.78 <lazy> #90 dog
|
||||
& duck/goose, as 12.5% of E-mail
|
||||
from aspammer@website.com is spam.
|
||||
Der „schnelle” braune Fuchs springt
|
||||
über den faulen Hund. Le renard brun
|
||||
«rapide» saute par-dessus le chien
|
||||
paresseux. La volpe marrone rapida
|
||||
salta sopra il cane pigro. El zorro
|
||||
marrón rápido salta sobre el perro
|
||||
perezoso. A raposa marrom rápida
|
||||
salta sobre o cão preguiçoso.
|
Before Width: | Height: | Size: 390 KiB |
Before Width: | Height: | Size: 189 KiB |
Before Width: | Height: | Size: 434 KiB |
Before Width: | Height: | Size: 5.2 KiB |
Before Width: | Height: | Size: 6.6 KiB |
Before Width: | Height: | Size: 6.7 KiB |
@ -1,9 +0,0 @@
|
||||
This is a lot of 12 point text to test the
|
||||
ocr code and see if it works on all types
|
||||
of file format.
|
||||
|
||||
The quick brown dog jumped over the
|
||||
lazy fox. The quick brown dog jumped
|
||||
over the lazy fox. The quick brown dog
|
||||
jumped over the lazy fox. The quick
|
||||
brown dog jumped over the lazy fox.
|
@ -3,8 +3,8 @@
|
||||
TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
|
||||
|
||||
# Absolute path of directory 'testing' with test images and ground truth texts
|
||||
# (must be directly below top source directory).
|
||||
TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/testing
|
||||
# (using submodule test).
|
||||
TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
|
||||
|
||||
AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
|
||||
AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
|
||||
@ -92,11 +92,11 @@ osd_test_LDADD += -lws2_32
|
||||
tesseracttests_LDADD += -lws2_32
|
||||
endif
|
||||
|
||||
EXTRA_apiexample_test_DEPENDENCIES = $(abs_top_builddir)/testing/phototest.tif
|
||||
EXTRA_apiexample_test_DEPENDENCIES += $(abs_top_builddir)/testing/phototest.txt
|
||||
EXTRA_apiexample_test_DEPENDENCIES = $(abs_top_builddir)/test/testing/phototest.tif
|
||||
EXTRA_apiexample_test_DEPENDENCIES += $(abs_top_builddir)/test/testing/phototest.txt
|
||||
|
||||
$(abs_top_builddir)/testing/phototest.tif:
|
||||
ln -s $(top_srcdir)/testing/phototest.tif $(top_builddir)/testing/phototest.tif
|
||||
$(abs_top_builddir)/test/testing/phototest.tif:
|
||||
ln -s $(top_srcdir)/test/testing/phototest.tif $(top_builddir)/test/testing/phototest.tif
|
||||
|
||||
$(abs_top_builddir)/testing/phototest.txt:
|
||||
ln -s $(top_srcdir)/testing/phototest.txt $(top_builddir)/testing/phototest.txt
|
||||
$(abs_top_builddir)/test/testing/phototest.txt:
|
||||
ln -s $(top_srcdir)/test/testing/phototest.txt $(top_builddir)/test/testing/phototest.txt
|
||||
|
3
unlvtests/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Ignore ocreval tool and generated files.
|
||||
ocreval*
|
||||
results*
|
45
unlvtests/README
Normal file
@ -0,0 +1,45 @@
|
||||
How to run UNLV tests.
|
||||
|
||||
The scripts in this directory make it possible to duplicate the tests
|
||||
published in the Fourth Annual Test of OCR Accuracy.
|
||||
See http://www.isri.unlv.edu/downloads/AT-1995.pdf
|
||||
but first you have to get the tools and data used by UNLV:
|
||||
|
||||
Step 1: to download the images goto
|
||||
https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/
|
||||
and get doe3.3B.tar.gz, bus.3B.tar.gz, mag.3B.tar.gz and news.3B.tar.gz
|
||||
|
||||
mkdir -p ~/isri-downloads
|
||||
cd ~/isri-downloads
|
||||
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/bus.3B.tar.gz > bus.3B.tar.gz
|
||||
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/doe3.3B.tar.gz > doe3.3B.tar.gz
|
||||
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/mag.3B.tar.gz > mag.3B.tar.gz
|
||||
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/news.3B.tar.gz > news.3B.tar.gz
|
||||
|
||||
Step 2: extract the files. It doesn't really matter where
|
||||
in your filesystem you put them, but they must go under a common
|
||||
root so you have directories doe3.3B, bus.3B, mag.3B and news.3B. in, for example,
|
||||
~/ISRI-OCRtk.
|
||||
|
||||
mkdir -p ~/ISRI-OCRtk
|
||||
cd ~/ISRI-OCRtk
|
||||
tar xzvf ~/isri-downloads/bus.3B.tar.gz
|
||||
tar xzvf ~/isri-downloads/doe3.3B.tar.gz
|
||||
tar xzvf ~/isri-downloads/mag.3B.tar.gz
|
||||
tar xzvf ~/isri-downloads/news.3B.tar.gz
|
||||
|
||||
Step 4: Download the modified ISRI toolkit from:
|
||||
https://ancientgreekocr.org/ocr-evaluation-tools.git
|
||||
|
||||
make and install the tools in unlvtests/ocreval/bin by
|
||||
`make PREFIX=~/tesseract/unlvtests/ocreval install`
|
||||
|
||||
Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
|
||||
|
||||
Step 7: run unlvtests/runalltests.sh with the root ISRI data dir and testname:
|
||||
unlvtests/runalltests.sh ~/ISRI-OCRtk tess4.0.0-beta.1
|
||||
and go to the gym, have lunch etc.
|
||||
|
||||
Step 8: There should be a file
|
||||
unlvtests/reports/tess4.0.0-beta.1.summary that contains the final summarized accuracy
|
||||
report and comparison with the 1995 results.
|
28
testing/counttestset.sh → unlvtests/counttestset.sh
Normal file → Executable file
@ -20,22 +20,18 @@ then
|
||||
echo "Usage:$0 pagesfile"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -d api ]
|
||||
if [ ! -d src/api ]
|
||||
then
|
||||
echo "Run $0 from the tesseract-ocr root directory!"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -r testing/unlv/accuracy ]
|
||||
then
|
||||
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pages=$1
|
||||
|
||||
imdir=${pages%/pages}
|
||||
setname=${imdir##*/}
|
||||
resdir=testing/results/$setname
|
||||
mkdir -p testing/reports
|
||||
resdir=unlvtests/results/$setname
|
||||
mkdir -p unlvtests/reports
|
||||
echo "Counting on set $setname in directory $imdir to $resdir"
|
||||
accfiles=""
|
||||
wafiles=""
|
||||
@ -47,13 +43,17 @@ do
|
||||
else
|
||||
srcdir="$imdir"
|
||||
fi
|
||||
# echo "$srcdir/$page.tif"
|
||||
echo "$srcdir/$page.tif"
|
||||
# Count character errors.
|
||||
testing/unlv/accuracy "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.acc"
|
||||
accfiles="$accfiles $resdir/$page.acc"
|
||||
unlvtests/ocreval/bin/ocrevalutf8 unlvtests/ocreval/bin/accuracy "$srcdir/$page.txt" "$resdir/$page.unlv" "$resdir/$page.acc"
|
||||
accfiles="$accfiles $resdir/$page.acc"
|
||||
# Count word errors.
|
||||
testing/unlv/wordacc "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.wa"
|
||||
unlvtests/ocreval/bin/ocrevalutf8 unlvtests/ocreval/bin/wordacc "$srcdir/$page.txt" "$resdir/$page.unlv" "$resdir/$page.wa"
|
||||
wafiles="$wafiles $resdir/$page.wa"
|
||||
done <"$pages"
|
||||
testing/unlv/accsum "$accfiles" >"testing/reports/$setname.characc"
|
||||
testing/unlv/wordaccsum "$wafiles" >"testing/reports/$setname.wordacc"
|
||||
|
||||
echo "$accfiles"
|
||||
echo "$wafiles"
|
||||
|
||||
unlvtests/ocreval/bin/accsum "$accfiles" >"unlvtests/reports/$setname.characc"
|
||||
unlvtests/ocreval/bin/ocrevalutf8 unlvtests/ocreval/bin/wordaccsum "$wafiles" >"unlvtests/reports/$setname.wordacc"
|
0
unlvtests/reports/bus.3B.characc
Normal file
200
unlvtests/reports/bus.3B.times
Normal file
@ -0,0 +1,200 @@
|
||||
8500_001.3B 21.18
|
||||
8501_001.3B 11.72
|
||||
8502_001.3B 27.12
|
||||
8503_001.3B 24.01
|
||||
8504_001.3B 12.42
|
||||
8505_001.3B 18.75
|
||||
8506_001.3B 18.29
|
||||
8507_001.3B 14.15
|
||||
8508_001.3B 23.30
|
||||
8509_001.3B 35.00
|
||||
8510_001.3B 17.38
|
||||
8512_001.3B 4.64
|
||||
8513_001.3B 15.98
|
||||
8514_001.3B 19.79
|
||||
8515_001.3B 13.37
|
||||
8516_001.3B 19.37
|
||||
8517_001.3B 10.41
|
||||
8518_001.3B 5.99
|
||||
8519_001.3B 20.46
|
||||
8520_001.3B 32.38
|
||||
8520_002.3B 39.26
|
||||
8520_003.3B 25.79
|
||||
8521_001.3B 19.02
|
||||
8522_001.3B 28.00
|
||||
8523_001.3B 32.00
|
||||
8524_001.3B 20.81
|
||||
8525_001.3B 29.17
|
||||
8526_001.3B 11.92
|
||||
8527_001.3B 14.06
|
||||
8528_001.3B 7.94
|
||||
8529_001.3B 26.18
|
||||
8529_002.3B 5.02
|
||||
8530_001.3B 9.51
|
||||
8531_001.3B 13.77
|
||||
8533_001.3B 5.82
|
||||
8534_001.3B 25.04
|
||||
8535_001.3B 18.50
|
||||
8536_001.3B 17.06
|
||||
8537_001.3B 8.46
|
||||
8538_001.3B 14.22
|
||||
8539_001.3B 22.86
|
||||
8540_001.3B 13.65
|
||||
8541_001.3B 28.38
|
||||
8541_002.3B 27.92
|
||||
8542_001.3B 17.16
|
||||
8544_001.3B 27.17
|
||||
8545_001.3B 8.38
|
||||
8546_001.3B 16.02
|
||||
8547_001.3B 6.45
|
||||
8548_001.3B 12.00
|
||||
8549_001.3B 8.05
|
||||
8550_001.3B 12.76
|
||||
8551_001.3B 9.19
|
||||
8552_001.3B 21.46
|
||||
8553_001.3B 16.93
|
||||
8554_001.3B 6.96
|
||||
8555_001.3B 20.78
|
||||
8556_001.3B 11.21
|
||||
8557_001.3B 20.81
|
||||
8558_001.3B 14.93
|
||||
8559_001.3B 13.74
|
||||
8560_001.3B 20.18
|
||||
8561_001.3B 27.58
|
||||
8562_001.3B 9.98
|
||||
8563_001.3B 19.63
|
||||
8564_001.3B 19.25
|
||||
8565_001.3B 8.89
|
||||
8566_001.3B 10.63
|
||||
8567_001.3B 32.30
|
||||
8568_001.3B 10.89
|
||||
8569_001.3B 10.54
|
||||
8570_001.3B 6.47
|
||||
8571_001.3B 27.37
|
||||
8572_001.3B 12.59
|
||||
8573_001.3B 12.75
|
||||
8574_001.3B 25.88
|
||||
8574_002.3B 10.32
|
||||
8575_001.3B 7.23
|
||||
8576_001.3B 9.30
|
||||
8577_001.3B 13.52
|
||||
8578_001.3B 14.99
|
||||
8579_001.3B 14.34
|
||||
8580_001.3B 9.18
|
||||
8581_001.3B 13.65
|
||||
8582_001.3B 15.56
|
||||
8583_001.3B 10.98
|
||||
8584_001.3B 19.10
|
||||
8585_001.3B 10.71
|
||||
8586_001.3B 7.56
|
||||
8587_001.3B 11.33
|
||||
8588_001.3B 12.31
|
||||
8589_001.3B 7.01
|
||||
8590_001.3B 31.73
|
||||
8591_001.3B 28.57
|
||||
8591_002.3B 1.59
|
||||
8592_001.3B 14.69
|
||||
8593_001.3B 17.47
|
||||
8594_001.3B 5.87
|
||||
8595_001.3B 11.65
|
||||
8596_001.3B 11.78
|
||||
8597_001.3B 11.55
|
||||
8598_001.3B 10.73
|
||||
8599_001.3B 16.48
|
||||
8600_001.3B 9.28
|
||||
8601_001.3B 29.24
|
||||
8700_001.3B 27.68
|
||||
8701_001.3B 16.43
|
||||
8702_001.3B 23.29
|
||||
8703_001.3B 19.90
|
||||
8704_001.3B 10.42
|
||||
8705_001.3B 12.84
|
||||
8706_001.3B 19.20
|
||||
8707_001.3B 21.43
|
||||
8708_001.3B 16.98
|
||||
8709_001.3B 17.46
|
||||
8710_001.3B 14.46
|
||||
8711_001.3B 14.90
|
||||
8712_001.3B 23.02
|
||||
8713_001.3B 21.30
|
||||
8714_001.3B 16.53
|
||||
8715_001.3B 25.23
|
||||
8716_001.3B 21.95
|
||||
8717_001.3B 18.83
|
||||
8718_001.3B 20.00
|
||||
8719_001.3B 19.54
|
||||
8720_001.3B 36.70
|
||||
8721_001.3B 29.99
|
||||
8721_002.3B 15.29
|
||||
8721_003.3B 20.94
|
||||
8722_001.3B 23.94
|
||||
8723_001.3B 22.84
|
||||
8723_002.3B 25.78
|
||||
8724_001.3B 24.02
|
||||
8724_002.3B 26.69
|
||||
8725_001.3B 31.03
|
||||
8726_001.3B 14.05
|
||||
8727_001.3B 22.38
|
||||
8728_001.3B 18.95
|
||||
8729_001.3B 28.13
|
||||
8730_001.3B 34.85
|
||||
8731_001.3B 15.46
|
||||
8733_001.3B 21.63
|
||||
8733_002.3B 11.17
|
||||
8734_001.3B 31.24
|
||||
8734_002.3B 31.30
|
||||
8735_001.3B 23.96
|
||||
8735_002.3B 10.60
|
||||
8736_001.3B 12.80
|
||||
8736_002.3B 7.17
|
||||
8737_001.3B 28.60
|
||||
8737_002.3B 28.37
|
||||
8738_001.3B 31.20
|
||||
8739_001.3B 28.83
|
||||
8739_002.3B 25.54
|
||||
8740_001.3B 11.76
|
||||
8740_002.3B 17.64
|
||||
8741_001.3B 21.74
|
||||
8741_002.3B 9.04
|
||||
8742_001.3B 16.40
|
||||
8742_002.3B 6.36
|
||||
8743_001.3B 23.47
|
||||
8743_002.3B 18.55
|
||||
8744_001.3B 32.44
|
||||
8744_002.3B 37.44
|
||||
8745_001.3B 26.77
|
||||
8745_002.3B 8.20
|
||||
8746_001.3B 23.62
|
||||
8747_001.3B 33.47
|
||||
8747_002.3B 36.95
|
||||
8748_001.3B 41.88
|
||||
8748_002.3B 27.64
|
||||
8749_001.3B 18.71
|
||||
8749_002.3B 34.45
|
||||
8750_001.3B 22.16
|
||||
8750_002.3B 35.29
|
||||
8750_003.3B 43.09
|
||||
8750_004.3B 10.43
|
||||
8751_001.3B 26.53
|
||||
8751_002.3B 70.68
|
||||
8752_001.3B 88.23
|
||||
8752_002.3B 77.29
|
||||
8752_003.3B 18.16
|
||||
8753_001.3B 98.12
|
||||
8753_002.3B 133.03
|
||||
8754_001.3B 68.60
|
||||
8754_002.3B 45.30
|
||||
8755_001.3B 61.55
|
||||
8755_002.3B 114.88
|
||||
8756_001.3B 59.22
|
||||
8758_001.3B 82.09
|
||||
8758_002.3B 52.46
|
||||
8758_003.3B 85.34
|
||||
8758_004.3B 78.35
|
||||
8759_001.3B 64.63
|
||||
8759_002.3B 72.59
|
||||
8759_003.3B 83.72
|
||||
8759_004.3B 68.94
|
||||
8760_001.3B 34.73
|
||||
8760_002.3B 14.60
|
||||
8761_001.3B 13.27
|
1
unlvtests/reports/bus.3B.wordacc
Normal file
@ -0,0 +1 @@
|
||||
unlvtests/ocreval/bin/ocrevalutf8 toolname [option1] [option2] [option...] ground.txt ocr.txt
|
1
unlvtests/reports/tess4.0.0-beta.1.bus.3B.sum
Normal file
@ -0,0 +1 @@
|
||||
tess4.0.0-beta.1 bus.3B -100.00% unlvtests unlvtests -100.00% -100.00% 4851.37s
|
6
unlvtests/reports/tess4.0.0-beta.1.summary
Normal file
@ -0,0 +1,6 @@
|
||||
1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
|
||||
1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
|
||||
1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
|
||||
1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
|
||||
tess4.0.0-beta.1 bus.3B -100.00% unlvtests unlvtests -100.00% -100.00% 4851.37s
|
||||
tess4.0.0-beta.1 Total 0 - -100.00% 0 - -100.00% 0 - -100.00%
|
1
unlvtests/reports/tess4.0.0-beta.1.total.sum
Normal file
@ -0,0 +1 @@
|
||||
tess4.0.0-beta.1 Total 0 - -100.00% 0 - -100.00% 0 - -100.00%
|
@ -20,21 +20,17 @@ then
|
||||
echo "Usage:$0 unlv-data-dir version-id"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -d api ]
|
||||
if [ ! -d src/api ]
|
||||
then
|
||||
echo "Run $0 from the tesseract-ocr root directory!"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -r api/tesseract ] && [ ! -r tesseract.exe ]
|
||||
if [ ! -r src/api/tesseract ] && [ ! -r tesseract.exe ]
|
||||
then
|
||||
echo "Please build tesseract before running $0"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -r testing/unlv/accuracy ] && [ ! -r testing/unlv/accuracy.exe ]
|
||||
then
|
||||
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
#deltapc new old calculates the %change from old to new
|
||||
deltapc() {
|
||||
@ -63,8 +59,9 @@ if [ "$bindir" = "$0" ]
|
||||
then
|
||||
bindir="./"
|
||||
fi
|
||||
rdir=testing/reports
|
||||
testsets="bus.3B doe3.3B mag.3B news.3B"
|
||||
rdir=unlvtests/reports
|
||||
#testsets="bus.3B doe3.3B mag.3B news.3B"
|
||||
testsets="bus.3B"
|
||||
|
||||
totalerrs=0
|
||||
totalwerrs=0
|
||||
@ -81,34 +78,34 @@ do
|
||||
# Count the errors on all the pages.
|
||||
$bindir/counttestset.sh "$imdir/$set/pages"
|
||||
# Get the old character word and nonstop word errors.
|
||||
olderrs=$(cut -f3 "testing/reports/1995.$set.sum")
|
||||
oldwerrs=$(cut -f6 "testing/reports/1995.$set.sum")
|
||||
oldnswerrs=$(cut -f9 "testing/reports/1995.$set.sum")
|
||||
olderrs=$(cut -f3 "unlvtests/reports/1995.$set.sum")
|
||||
oldwerrs=$(cut -f6 "unlvtests/reports/1995.$set.sum")
|
||||
oldnswerrs=$(cut -f9 "unlvtests/reports/1995.$set.sum")
|
||||
# Get the new character word and nonstop word errors and accuracy.
|
||||
cherrs=$(head -4 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||
cherrs=$(head -4 "unlvtests/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
chacc=$(head -5 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||
chacc=$(head -5 "unlvtests/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
wderrs=$(head -4 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||
wderrs=$(head -4 "unlvtests/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
wdacc=$(head -5 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||
wdacc=$(head -5 "unlvtests/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
nswderrs=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
|
||||
nswderrs=$(grep Total "unlvtests/reports/$set.wordacc" |head -2 |tail -1 |
|
||||
cut -c10-17 |tr -d '[:blank:]')
|
||||
nswdacc=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
|
||||
nswdacc=$(grep Total "unlvtests/reports/$set.wordacc" |head -2 |tail -1 |
|
||||
cut -c19-26 |tr -d '[:blank:]')
|
||||
# Compute the percent change.
|
||||
chdelta=$(deltapc "$cherrs" "$olderrs")
|
||||
wdelta=$(deltapc "$wderrs" "$oldwerrs")
|
||||
nswdelta=$(deltapc "$nswderrs" "$oldnswerrs")
|
||||
sumfile=$rdir/$vid.$set.sum
|
||||
if [ -r "testing/reports/$set.times" ]
|
||||
if [ -r "unlvtests/reports/$set.times" ]
|
||||
then
|
||||
total_time=$(timesum "testing/reports/$set.times")
|
||||
if [ -r "testing/reports/prev/$set.times" ]
|
||||
total_time=$(timesum "unlvtests/reports/$set.times")
|
||||
if [ -r "unlvtests/reports/prev/$set.times" ]
|
||||
then
|
||||
paste "testing/reports/prev/$set.times" "testing/reports/$set.times" |
|
||||
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"testing/reports/$set.timedelta"
|
||||
paste "unlvtests/reports/prev/$set.times" "unlvtests/reports/$set.times" |
|
||||
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"unlvtests/reports/$set.timedelta"
|
||||
fi
|
||||
else
|
||||
total_time='0.0'
|
@ -20,12 +20,12 @@ then
|
||||
echo "Usage:$0 pagesfile [-zoning]"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -d api ]
|
||||
if [ ! -d src/api ]
|
||||
then
|
||||
echo "Run $0 from the tesseract-ocr root directory!"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -r api/tesseract ]
|
||||
if [ ! -r src/api/tesseract ]
|
||||
then
|
||||
if [ ! -r tesseract.exe ]
|
||||
then
|
||||
@ -35,7 +35,7 @@ then
|
||||
tess="./tesseract.exe"
|
||||
fi
|
||||
else
|
||||
tess="time -f %U -o times.txt api/tesseract"
|
||||
tess="time -f %U -o times.txt src/api/tesseract"
|
||||
export TESSDATA_PREFIX=$PWD/
|
||||
fi
|
||||
|
||||
@ -45,14 +45,14 @@ setname=${imdir##*/}
|
||||
if [ $# -eq 2 ] && [ "$2" = "-zoning" ]
|
||||
then
|
||||
config=unlv.auto
|
||||
resdir=testing/results/zoning.$setname
|
||||
resdir=unlvtests/results/zoning.$setname
|
||||
else
|
||||
config=unlv
|
||||
resdir=testing/results/$setname
|
||||
resdir=unlvtests/results/$setname
|
||||
fi
|
||||
echo -e "Testing on set $setname in directory $imdir to $resdir\n"
|
||||
mkdir -p "$resdir"
|
||||
rm -f "testing/reports/$setname.times"
|
||||
rm -f "unlvtests/reports/$setname.times"
|
||||
while read page dir
|
||||
do
|
||||
# A pages file may be a list of files with subdirs or maybe just
|
||||
@ -64,11 +64,11 @@ do
|
||||
srcdir="$imdir"
|
||||
fi
|
||||
# echo "$srcdir/$page.tif"
|
||||
$tess "$srcdir/$page.tif" "$resdir/$page" --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
||||
$tess "$srcdir/$page.tif" "$resdir/$page" --tessdata-dir ../tessdata_fast --oem 1 -l eng --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
||||
if [ -r times.txt ]
|
||||
then
|
||||
read t <times.txt
|
||||
echo "$page $t" >>"testing/reports/$setname.times"
|
||||
echo "$page $t" >>"unlvtests/reports/$setname.times"
|
||||
echo -e "\033M$page $t"
|
||||
if [ "$t" = "Command terminated by signal 2" ]
|
||||
then
|