move testing and testdata to test, add unlvtests

This commit is contained in:
Shree Devi Kumar 2018-06-06 12:20:14 +00:00
parent f8b689f85f
commit 2563380d51
47 changed files with 310 additions and 22173 deletions

View File

@ -24,7 +24,7 @@ SUBDIRS += src/ccmain src/api . tessdata doc unittest
EXTRA_DIST = README.md\
aclocal.m4 config configure.ac autogen.sh contrib \
tesseract.pc.in $(TRAINING_SUBDIR) java doc testing
tesseract.pc.in $(TRAINING_SUBDIR) java doc unlvtests
DIST_SUBDIRS = $(SUBDIRS) $(TRAINING_SUBDIR)

View File

@ -493,7 +493,7 @@ AC_CONFIG_FILES([src/wordrec/Makefile])
AC_CONFIG_FILES([tessdata/Makefile])
AC_CONFIG_FILES([tessdata/configs/Makefile])
AC_CONFIG_FILES([tessdata/tessconfigs/Makefile])
AC_CONFIG_FILES([testing/Makefile])
AC_CONFIG_FILES([unlvtests/Makefile])
AC_CONFIG_FILES([unittest/Makefile])
AC_CONFIG_FILES([java/Makefile])
AC_CONFIG_FILES([java/com/Makefile])

2
test

@ -1 +1 @@
Subproject commit 44f9e247303e4e33a1f51940e1a32885959c4dd4
Subproject commit 5fc323c76c409cb21962a4182cb893d3da3040ba

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,112 +0,0 @@
111
NULL 0 NULL 0
I 5 59,68,216,255,10,155,0,50,29,173 Latin 7 0 1 I # I [49 ]A
' 10 148,225,216,255,11,51,0,97,36,173 Common 2 10 2 ' # ' [27 ]p
v 3 59,68,187,197,84,173,0,32,84,218 Latin 61 0 3 v # v [76 ]a
e 3 58,64,189,200,87,154,0,32,98,188 Latin 88 0 4 e # e [65 ]a
J 5 0,64,216,255,39,242,0,30,62,234 Latin 79 0 5 J # J [4a ]A
o 3 58,66,188,200,87,151,0,32,98,185 Latin 83 0 6 o # o [6f ]a
i 3 59,69,216,255,11,141,0,54,27,173 Latin 1 0 7 i # i [69 ]a
n 3 59,68,188,202,87,187,0,25,101,208 Latin 45 0 8 n # n [6e ]a
| 0 0,67,216,255,8,73,0,80,50,173 Common 9 10 9 | # | [7c ]
- 10 105,161,122,175,49,176,0,43,56,215 Common 10 3 10 - # - [2d ]p
S 5 57,64,219,255,87,174,0,30,100,200 Latin 26 0 11 S # S [53 ]A
z 3 46,68,186,199,65,151,0,32,68,173 Latin 95 0 12 z # z [7a ]a
: 10 58,85,141,221,11,69,0,67,38,173 Common 13 6 13 : # : [3a ]p
# 10 37,84,200,255,99,221,0,41,109,266 Common 14 4 14 # # # [23 ]p
6 8 58,66,219,255,87,156,0,54,104,173 Common 15 2 15 6 # 6 [36 ]0
% 10 27,67,205,255,105,257,0,49,117,288 Common 16 4 16 % # % [25 ]p
5 8 12,66,199,255,82,160,0,36,103,173 Common 17 2 17 5 # 5 [35 ]0
0 8 58,66,187,255,88,164,0,45,103,180 Common 18 2 18 0 # 0 [30 ]0
@ 10 0,65,211,255,99,286,0,39,117,291 Common 19 10 19 @ # @ [40 ]p
p 3 0,47,192,226,87,180,0,25,100,200 Latin 68 0 20 p # p [70 ]a
a 3 58,65,186,200,85,164,0,26,97,185 Latin 67 0 21 a # a [61 ]a
r 3 59,68,186,202,58,173,0,40,69,180 Latin 40 0 22 r # r [72 ]a
m 3 56,68,189,202,108,280,0,25,117,306 Latin 38 0 23 m # m [6d ]a
F 5 57,68,216,255,68,210,0,31,77,209 Latin 29 0 24 F # F [46 ]A
u 3 57,65,187,202,85,184,0,39,100,208 Latin 85 0 25 u # u [75 ]a
s 3 58,65,192,200,78,147,0,30,91,173 Latin 11 0 26 s # s [73 ]a
B 5 62,68,216,255,91,227,0,27,106,227 Latin 46 0 27 B # B [42 ]A
» 10 0,133,146,235,63,284,0,32,71,294 Common 28 10 49 » # » [bb ]p
f 3 0,68,216,255,54,175,0,42,55,193 Latin 24 0 29 f # f [66 ]a
d 3 57,65,216,255,88,174,0,28,100,200 Latin 59 0 30 d # d [64 ]a
c 3 58,64,192,200,80,153,0,36,88,178 Latin 33 0 31 c # c [63 ]a
h 3 59,68,216,255,87,187,0,25,101,208 Latin 55 0 32 h # h [68 ]a
C 5 58,65,219,255,87,192,0,32,107,209 Latin 31 0 33 C # C [43 ]A
t 3 58,66,206,254,57,167,0,47,59,180 Latin 37 0 34 t # t [74 ]a
L 5 59,68,216,255,64,193,0,31,74,206 Latin 41 0 35 L # L [4c ]A
? 10 40,67,219,255,59,144,0,65,77,188 Common 36 10 36 ? # ? [3f ]p
T 5 59,68,216,255,85,227,0,47,88,236 Latin 34 0 37 T # T [54 ]A
M 5 57,68,216,255,99,301,0,35,117,286 Latin 23 0 38 M # M [4d ]A
y 3 0,47,187,202,87,199,0,25,87,230 Latin 100 0 39 y # y [79 ]a
R 5 57,68,216,255,88,227,0,27,104,232 Latin 22 0 40 R # R [52 ]A
l 3 59,68,216,255,11,147,0,56,27,173 Latin 35 0 41 l # l [6c ]a
~ 0 91,229,135,255,73,174,0,41,0,200 Common 42 10 42 ~ # ~ [7e ]
< 0 29,102,173,255,69,184,0,50,90,256 Common 43 10 76 < # < [3c ]
® 0 28,163,209,255,83,223,0,48,92,257 Common 44 10 44 ® # ® [ae ]
N 5 59,68,216,255,87,262,0,27,104,249 Latin 8 0 45 N # N [4e ]A
b 3 58,64,216,255,87,180,0,25,100,200 Latin 27 0 46 b # b [62 ]a
k 3 57,68,216,255,85,177,0,35,93,198 Latin 101 0 47 k # k [6b ]a
[ 10 8,64,216,255,39,136,0,80,55,173 Common 48 10 70 [ # [ [5b ]p
« 10 26,133,148,235,63,279,0,35,71,281 Common 49 10 28 « # « [ab ]p
1 8 49,69,192,255,45,128,0,66,74,173 Common 50 2 50 1 # 1 [31 ]0
, 10 14,46,79,115,17,78,0,58,30,173 Common 51 6 51 , # , [2c ]p
. 10 26,67,73,112,13,51,0,67,30,173 Common 52 6 52 . # . [2e ]p
” 10 141,233,216,255,59,141,0,87,66,298 Common 53 10 53 " # ” [201d ]p
g 3 0,43,188,212,88,176,0,32,100,210 Latin 93 0 54 g # g [67 ]a
H 5 59,68,216,255,91,258,0,27,107,244 Latin 32 0 55 H # H [48 ]A
$ 0 24,63,229,255,85,174,0,36,106,174 Common 56 4 56 $ # $ [24 ]
( 10 0,64,216,255,42,118,0,97,61,173 Common 57 10 94 ( # ( [28 ]p
+ 0 54,102,171,253,90,176,0,37,103,213 Common 58 3 58 + # + [2b ]
D 5 59,68,216,255,93,230,0,27,107,236 Latin 30 0 59 D # D [44 ]A
w 3 59,68,187,195,108,235,0,32,117,286 Latin 103 0 60 w # w [77 ]a
V 5 59,68,216,255,103,207,0,41,101,245 Latin 3 0 61 V # V [56 ]A
£ 0 0,135,219,255,64,201,0,55,61,298 Common 62 4 62 £ # £ [a3 ]
4 8 0,68,198,255,93,161,0,41,96,173 Common 63 2 63 4 # 4 [34 ]0
9 8 0,66,200,255,89,156,0,39,104,173 Common 64 2 64 9 # 9 [39 ]0
Q 5 7,64,219,255,91,205,0,30,106,227 Latin 96 0 65 Q # Q [51 ]A
& 10 53,64,194,255,108,232,0,47,112,239 Common 66 10 66 & # & [26 ]p
A 5 52,68,216,255,100,216,0,17,98,231 Latin 21 0 67 A # A [41 ]A
P 5 57,68,216,255,87,225,0,32,97,230 Latin 20 0 68 P # P [50 ]A
¢ 0 14,158,190,255,56,144,0,72,61,270 Common 69 4 69 ¢ # ¢ [a2 ]
] 10 8,64,216,255,39,129,0,44,55,173 Common 70 10 48 ] # ] [5d ]p
3 8 0,66,196,255,84,158,0,32,103,173 Common 71 2 71 3 # 3 [33 ]0
2 8 30,69,194,255,80,160,0,27,97,173 Common 72 2 72 2 # 2 [32 ]0
© 0 28,125,209,255,118,232,0,32,119,257 Common 73 10 73 © # © [a9 ]
8 8 57,66,219,255,88,162,0,41,103,174 Common 74 2 74 8 # 8 [38 ]0
/ 10 0,65,219,255,59,228,0,36,62,238 Common 75 6 75 / # / [2f ]p
> 0 29,102,173,255,78,184,0,50,90,256 Common 76 10 43 > # > [3e ]
X 5 59,68,216,255,94,275,0,25,93,256 Latin 86 0 77 X # X [58 ]A
é 3 0,64,222,255,87,384,0,32,98,391 Latin 78 0 78 é # é [e9 ]a
j 3 0,47,216,255,36,145,0,49,50,173 Latin 5 0 79 j # j [6a ]a
; 10 14,56,131,221,17,93,0,58,38,173 Common 80 10 80 ; # ; [3b ]p
7 8 12,68,196,255,72,160,0,60,75,173 Common 81 2 81 7 # 7 [37 ]0
€ 0 32,68,209,255,97,238,0,49,103,293 Common 82 4 82 € # € [20ac ]
O 5 57,64,219,255,91,209,0,34,106,233 Latin 6 0 83 O # O [4f ]A
¥ 0 59,75,209,255,91,238,0,52,91,270 Common 84 4 84 ¥ # ¥ [a5 ]
U 5 58,64,216,255,91,214,0,39,106,220 Latin 25 0 85 U # U [55 ]A
x 3 59,68,187,201,85,189,0,25,84,218 Latin 77 0 86 x # x [78 ]a
} 10 0,44,216,255,54,148,0,56,59,173 Common 87 10 97 } # } [7d ]p
E 5 59,68,216,255,68,210,0,31,80,219 Latin 4 0 88 E # E [45 ]A
§ 0 9,66,219,255,82,207,0,86,93,293 Common 89 10 89 § # § [a7 ]
= 0 74,139,144,199,90,186,0,32,103,224 Common 90 10 90 = # = [3d ]
! 10 41,67,216,255,11,87,0,71,50,173 Common 91 10 91 ! # ! [21 ]p
10 141,233,212,255,17,78,0,109,30,298 Common 92 10 92 ' # [2019 ]p
G 5 58,64,219,255,91,230,0,30,106,230 Latin 54 0 93 G # G [47 ]A
) 10 0,64,216,255,42,119,0,53,61,173 Common 94 10 57 ) # ) [29 ]p
Z 5 64,68,216,255,72,218,0,30,77,236 Latin 12 0 95 Z # Z [5a ]A
q 3 0,47,192,202,88,196,0,30,100,200 Latin 65 0 96 q # q [71 ]a
{ 10 0,44,216,255,54,148,0,71,59,173 Common 97 10 87 { # { [7b ]p
“ 10 141,233,216,255,56,133,0,172,66,298 Common 98 10 98 " # “ [201c ]p
— 10 110,155,132,167,126,297,0,23,136,298 Common 99 10 99 - # — [2014 ]p
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 39 0 100 Y # Y [59 ]A
K 5 57,68,216,255,92,225,0,37,103,216 Latin 47 0 101 K # K [4b ]A
* 10 78,183,188,255,49,134,0,60,53,173 Common 102 10 102 * # * [2a ]p
W 5 54,68,216,255,106,314,0,41,117,318 Latin 60 0 103 W # W [57 ]A
" 10 151,225,216,255,52,115,0,71,71,173 Common 104 10 104 " # " [22 ]p
\ 10 0,67,219,255,28,250,0,71,62,261 Common 105 10 105 \ # \ [5c ]p
° 0 66,247,209,255,22,399,0,98,66,409 Common 106 4 106 ° # ° [b0 ]
fi 3 0,71,216,255,87,202,0,28,105,199 Latin 107 0 107 fi # fi [fb01 ]a
10 141,233,210,255,17,64,0,216,30,298 Common 108 10 108 ' # [2018 ]p
_ 10 0,50,0,64,73,248,0,29,75,259 Common 109 10 109 _ # _ [5f ]p
fl 3 0,71,216,255,87,219,0,28,105,236 Latin 110 0 110 fl # fl [fb02 ]a

3707
testdata/jpn.unicharset vendored

File diff suppressed because it is too large Load Diff

2841
testdata/kan.unicharset vendored

File diff suppressed because it is too large Load Diff

1651
testdata/kor.unicharset vendored

File diff suppressed because it is too large Load Diff

2198
testdata/mar.unicharset vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,227 +0,0 @@
226
NULL 0 NULL 0
Joined 7 0,69,188,255,486,1218,0,30,486,1188 Latin 22 0 64 Joined # Joined [4a 6f 69 6e 65 64 ]a
|Broken|0|1 f 0,69,186,255,892,2138,0,80,892,2058 Common 101 10 101 |Broken|0|1 # Broken
T 5 59,68,216,255,85,227,0,47,88,236 Latin 46 0 3 T # T [54 ]A
u 3 57,65,187,202,85,184,0,39,100,208 Latin 47 0 4 u # u [75 ]a
d 3 57,65,216,255,88,174,0,28,100,200 Latin 23 0 5 d # d [64 ]a
o 3 58,66,188,200,87,151,0,32,98,185 Latin 28 0 6 o # o [6f ]a
S 5 57,64,219,255,87,174,0,30,100,200 Latin 15 0 7 S # S [53 ]A
C 5 58,65,219,255,87,192,0,32,107,209 Latin 31 0 8 C # C [43 ]A
R 5 57,68,216,255,88,227,0,27,104,232 Latin 20 0 9 R # R [52 ]A
A 5 52,68,216,255,100,216,0,17,98,231 Latin 14 0 10 A # A [41 ]A
P 5 57,68,216,255,87,225,0,32,97,230 Latin 37 0 11 P # P [50 ]A
g 3 0,43,188,212,88,176,0,32,100,210 Latin 76 0 12 g # g [67 ]a
i 3 59,69,216,255,11,141,0,54,27,173 Latin 27 0 13 i # i [69 ]a
a 3 58,65,186,200,85,164,0,26,97,185 Latin 10 0 14 a # a [61 ]a
s 3 58,65,192,200,78,147,0,30,91,173 Latin 7 0 15 s # s [73 ]a
2 8 30,69,194,255,80,160,0,27,97,173 Common 16 2 16 2 # 2 [32 ]0
0 8 58,66,187,255,88,164,0,45,103,180 Common 17 2 17 0 # 0 [30 ]0
8 8 57,66,219,255,88,162,0,41,103,174 Common 18 2 18 8 # 8 [38 ]0
M 5 57,68,216,255,99,301,0,35,117,286 Latin 35 0 19 M # M [4d ]A
r 3 59,68,186,202,58,173,0,40,69,180 Latin 9 0 20 r # r [72 ]a
y 3 0,47,187,202,87,199,0,25,87,230 Latin 109 0 21 y # y [79 ]a
j 3 0,47,216,255,36,145,0,49,50,173 Latin 64 0 22 j # j [6a ]a
D 5 59,68,216,255,93,230,0,27,107,236 Latin 5 0 23 D # D [44 ]A
w 3 59,68,187,195,108,235,0,32,117,286 Latin 108 0 24 w # w [77 ]a
n 3 59,68,188,202,87,187,0,25,101,208 Latin 49 0 25 n # n [6e ]a
É 5 59,68,232,255,68,314,0,31,80,325 Latin 59 0 26 É # É [c9 ]A
I 5 59,68,216,255,10,155,0,50,29,173 Latin 13 0 27 I # I [49 ]A
O 5 57,64,219,255,91,209,0,34,106,233 Latin 6 0 28 O # O [4f ]A
“ 10 141,233,216,255,56,133,0,172,66,298 Common 29 10 29 " # “ [201c ]p
l 3 59,68,216,255,11,147,0,56,27,173 Latin 48 0 30 l # l [6c ]a
c 3 58,64,192,200,80,153,0,36,88,178 Latin 8 0 31 c # c [63 ]a
e 3 58,64,189,200,87,154,0,32,98,188 Latin 33 0 32 e # e [65 ]a
E 5 59,68,216,255,68,210,0,31,80,219 Latin 32 0 33 E # E [45 ]A
â 3 0,64,222,255,85,256,0,26,97,256 Latin 34 0 34 â # â [e2 ]a
m 3 56,68,189,202,108,280,0,25,117,306 Latin 19 0 35 m # m [6d ]a
h 3 59,68,216,255,87,187,0,25,101,208 Latin 97 0 36 h # h [68 ]a
p 3 0,47,192,226,87,180,0,25,100,200 Latin 11 0 37 p # p [70 ]a
B 5 62,68,216,255,91,227,0,27,106,227 Latin 45 0 38 B # B [42 ]A
. 10 26,67,73,112,13,51,0,67,30,173 Common 39 6 39 . # . [2e ]p
£ 0 0,135,219,255,64,201,0,55,61,298 Common 40 4 40 £ # £ [a3 ]
ó 3 0,64,222,255,87,192,0,32,98,197 Latin 73 0 41 ó # ó [f3 ]a
1 8 49,69,192,255,45,128,0,66,74,173 Common 42 2 42 1 # 1 [31 ]0
6 8 58,66,219,255,87,156,0,54,104,173 Common 43 2 43 6 # 6 [36 ]0
4 8 0,68,198,255,93,161,0,41,96,173 Common 44 2 44 4 # 4 [34 ]0
b 3 58,64,216,255,87,180,0,25,100,200 Latin 38 0 45 b # b [62 ]a
t 3 58,66,206,254,57,167,0,47,59,180 Latin 3 0 46 t # t [74 ]a
U 5 58,64,216,255,91,214,0,39,106,220 Latin 4 0 47 U # U [55 ]A
L 5 59,68,216,255,64,193,0,31,74,206 Latin 30 0 48 L # L [4c ]A
N 5 59,68,216,255,87,262,0,27,104,249 Latin 25 0 49 N # N [4e ]A
V 5 59,68,216,255,103,207,0,41,101,245 Latin 56 0 50 V # V [56 ]A
” 10 141,233,216,255,59,141,0,87,66,298 Common 51 10 51 " # ” [201d ]p
) 10 0,64,216,255,42,119,0,53,61,173 Common 52 10 86 ) # ) [29 ]p
, 10 14,46,79,115,17,78,0,58,30,173 Common 53 6 53 , # , [2c ]p
k 3 57,68,216,255,85,177,0,35,93,198 Latin 57 0 54 k # k [6b ]a
: 10 58,85,141,221,11,69,0,67,38,173 Common 55 6 55 : # : [3a ]p
v 3 59,68,187,197,84,173,0,32,84,218 Latin 50 0 56 v # v [76 ]a
K 5 57,68,216,255,92,225,0,37,103,216 Latin 54 0 57 K # K [4b ]A
í 3 62,69,222,255,40,279,0,54,47,286 Latin 92 0 58 í # í [ed ]a
é 3 0,64,222,255,87,384,0,32,98,391 Latin 26 0 59 é # é [e9 ]a
ê 3 0,64,222,255,87,409,0,32,98,407 Latin 99 0 60 ê # ê [ea ]a
Ç 5 0,64,217,255,87,286,0,32,107,296 Latin 82 0 61 Ç # Ç [c7 ]A
@ 10 0,65,211,255,99,286,0,39,117,291 Common 62 10 62 @ # @ [40 ]p
Ú 5 0,64,232,255,91,294,0,39,106,291 Latin 107 0 63 Ú # Ú [da ]A
J 5 0,64,216,255,39,242,0,30,62,234 Latin 22 0 64 J # J [4a ]A
€ 0 32,68,209,255,97,238,0,49,103,293 Common 65 4 65 € # € [20ac ]
9 8 0,66,200,255,89,156,0,39,104,173 Common 66 2 66 9 # 9 [39 ]0
5 8 12,66,199,255,82,160,0,36,103,173 Common 67 2 67 5 # 5 [35 ]0
& 10 53,64,194,255,108,232,0,47,112,239 Common 68 10 68 & # & [26 ]p
x 3 59,68,187,201,85,189,0,25,84,218 Latin 89 0 69 x # x [78 ]a
/ 10 0,65,219,255,59,228,0,36,62,238 Common 70 6 70 / # / [2f ]p
² 0 3,192,209,255,50,248,0,105,0,293 Common 71 2 71 2 # ² [b2 ]
F 5 57,68,216,255,68,210,0,31,77,209 Latin 84 0 72 F # F [46 ]A
Ó 5 0,64,232,255,91,276,0,34,106,286 Latin 41 0 73 Ó # Ó [d3 ]A
3 8 0,66,196,255,84,158,0,32,103,173 Common 74 2 74 3 # 3 [33 ]0
z 3 46,68,186,199,65,151,0,32,68,173 Latin 112 0 75 z # z [7a ]a
G 5 58,64,219,255,91,230,0,30,106,230 Latin 12 0 76 G # G [47 ]A
á 3 0,64,222,255,85,414,0,26,97,412 Latin 100 0 77 á # á [e1 ]a
- 10 105,161,122,175,49,176,0,43,56,215 Common 78 3 78 - # - [2d ]p
? 10 40,67,219,255,59,144,0,65,77,188 Common 79 10 79 ? # ? [3f ]p
! 10 41,67,216,255,11,87,0,71,50,173 Common 80 10 80 ! # ! [21 ]p
q 3 0,47,192,202,88,196,0,30,100,200 Latin 116 0 81 q # q [71 ]a
ç 3 0,31,182,232,80,299,0,36,96,309 Latin 61 0 82 ç # ç [e7 ]a
ã 3 0,64,224,255,85,279,0,26,97,289 Latin 96 0 83 ã # ã [e3 ]a
f 3 0,68,216,255,54,175,0,42,55,193 Latin 72 0 84 f # f [66 ]a
+ 0 54,102,171,253,90,176,0,37,103,213 Common 85 3 85 + # + [2b ]
( 10 0,64,216,255,42,118,0,97,61,173 Common 86 10 52 ( # ( [28 ]p
' 10 148,225,216,255,11,51,0,97,36,173 Common 87 10 87 ' # ' [27 ]p
; 10 14,56,131,221,17,93,0,58,38,173 Common 88 10 88 ; # ; [3b ]p
X 5 59,68,216,255,94,275,0,25,93,256 Latin 69 0 89 X # X [58 ]A
* 10 78,183,188,255,49,134,0,60,53,173 Common 90 10 90 * # * [2a ]p
º 3 64,187,188,255,51,189,0,81,64,293 Latin 91 0 91 o # º [ba ]a
Í 5 64,68,232,255,35,197,0,48,55,207 Latin 58 0 92 Í # Í [cd ]A
³ 0 0,192,209,255,48,268,0,99,0,293 Common 93 2 93 3 # ³ [b3 ]
10 64,101,142,215,32,100,0,84,37,173 Common 94 10 94 # [203a ]p
ª 3 64,187,207,255,51,286,0,71,62,296 Latin 95 0 95 a # ª [aa ]a
à 5 6,68,232,255,100,204,0,25,98,245 Latin 83 0 96 à # à [c3 ]A
H 5 59,68,216,255,91,258,0,27,107,244 Latin 36 0 97 H # H [48 ]A
# 10 37,84,200,255,99,221,0,41,109,266 Common 98 4 98 # # # [23 ]p
Ê 5 0,68,232,255,72,266,0,31,80,276 Latin 60 0 99 Ê # Ê [ca ]A
Á 5 64,68,232,255,100,203,0,29,98,245 Latin 77 0 100 Á # Á [c1 ]A
| 0 0,67,216,255,8,73,0,80,50,173 Common 101 10 101 | # | [7c ]
" 10 151,225,216,255,52,115,0,71,71,173 Common 102 10 102 " # " [22 ]p
> 0 29,102,173,255,78,184,0,50,90,256 Common 103 10 111 > # > [3e ]
à 3 0,64,222,255,85,407,0,26,97,407 Latin 104 0 104 à # à [e0 ]a
õ 3 58,66,224,255,87,194,0,32,98,204 Latin 105 0 105 õ # õ [f5 ]a
« 10 26,133,148,235,63,279,0,35,71,281 Common 106 10 126 « # « [ab ]p
ú 3 0,65,222,255,85,212,0,39,100,212 Latin 63 0 107 ú # ú [fa ]a
W 5 54,68,216,255,106,314,0,41,117,318 Latin 24 0 108 W # W [57 ]A
Y 5 59,68,216,255,91,205,0,47,91,223 Latin 21 0 109 Y # Y [59 ]A
7 8 12,68,196,255,72,160,0,60,75,173 Common 110 2 110 7 # 7 [37 ]0
< 0 29,102,173,255,69,184,0,50,90,256 Common 111 10 103 < # < [3c ]
Z 5 64,68,216,255,72,218,0,30,77,236 Latin 75 0 112 Z # Z [5a ]A
¹ 0 64,192,209,255,24,279,1,119,0,293 Common 113 2 113 1 # ¹ [b9 ]
ü 3 0,65,219,255,85,220,0,39,100,225 Latin 114 0 114 ü # ü [fc ]a
_ 10 0,50,0,64,73,248,0,29,75,259 Common 115 10 115 _ # _ [5f ]p
Q 5 7,64,219,255,91,205,0,30,106,227 Latin 81 0 116 Q # Q [51 ]A
… 10 60,143,79,232,101,332,0,45,107,337 Common 117 10 117 ... # … [2026 ]p
¡ 10 0,66,185,255,11,176,0,125,49,293 Common 118 10 118 ¡ # ¡ [a1 ]p
$ 0 24,63,229,255,85,174,0,36,106,174 Common 119 4 119 $ # $ [24 ]
© 0 28,125,209,255,118,232,0,32,119,257 Common 120 10 120 © # © [a9 ]
[ 10 8,64,216,255,39,136,0,80,55,173 Common 121 10 123 [ # [ [5b ]p
% 10 27,67,205,255,105,257,0,49,117,288 Common 122 4 122 % # % [25 ]p
] 10 8,64,216,255,39,129,0,44,55,173 Common 123 10 121 ] # ] [5d ]p
= 0 74,139,144,199,90,186,0,32,103,224 Common 124 10 124 = # = [3d ]
₂ 0 10,67,113,172,50,118,0,105,77,293 Common 125 2 125 2 # ₂ [2082 ]
» 10 0,133,146,235,63,284,0,32,71,294 Common 126 10 106 » # » [bb ]p
⁴ 0 115,163,227,255,63,131,0,101,77,293 Common 127 2 127 4 # ⁴ [2074 ]
ô 3 58,66,222,255,87,192,0,32,98,202 Latin 128 0 128 ô # ô [f4 ]a
° 0 66,247,209,255,22,399,0,98,66,409 Common 129 4 129 ° # ° [b0 ]
₄ 0 12,67,115,170,62,131,0,97,77,293 Common 130 2 130 4 # ₄ [2084 ]
₃ 0 8,67,113,172,52,106,0,103,77,293 Common 131 2 131 3 # ₃ [2083 ]
₁ 0 10,67,113,172,36,78,0,108,77,293 Common 132 2 132 1 # ₁ [2081 ]
fl 3 0,68,216,255,82,408,0,42,82,366 Latin 72 0 84 fl # fl [66 6c ]a
fi 3 0,69,216,255,82,408,0,42,82,366 Latin 72 0 84 fi # fi [66 69 ]a
... 10 26,67,73,112,90,586,0,67,90,519 Common 39 6 39 ... # ... [2e 2e 2e ]p
ff 3 0,68,216,255,110,428,0,42,110,386 Latin 72 0 84 ff # ff [66 66 ]a
⁸ 0 124,151,229,255,56,102,0,53,75,173 Common 137 2 137 8 # ⁸ [2078 ]
⁶ 0 124,151,229,255,56,99,0,56,77,173 Common 138 2 138 6 # ⁶ [2076 ]
⁹ 0 126,153,230,255,56,104,0,57,77,173 Common 139 2 139 9 # ⁹ [2079 ]
⁵ 0 124,153,227,255,50,104,0,51,75,173 Common 140 2 140 5 # ⁵ [2075 ]
⁷ 0 128,153,227,255,52,106,0,58,77,173 Common 141 2 141 7 # ⁷ [2077 ]
⁰ 0 124,151,229,255,56,102,0,53,77,173 Common 142 2 142 0 # ⁰ [2070 ]
₆ 0 10,65,118,172,56,99,0,56,77,173 Common 143 2 143 6 # ₆ [2086 ]
₉ 0 10,65,118,172,56,104,0,57,77,173 Common 144 2 144 9 # ₉ [2089 ]
₀ 0 8,65,119,172,56,102,0,53,77,173 Common 145 2 145 0 # ₀ [2080 ]
₅ 0 8,65,113,170,50,98,0,51,77,173 Common 146 2 146 5 # ₅ [2085 ]
₈ 0 8,65,118,172,59,102,0,53,75,173 Common 147 2 147 8 # ₈ [2088 ]
ffi 3 0,69,216,255,137,601,0,42,137,559 Latin 72 0 84 ffi # ffi [66 66 69 ]a
₇ 0 10,67,115,170,52,92,0,60,77,173 Common 149 2 149 7 # ₇ [2087 ]
Th 7 59,68,216,255,189,491,0,47,189,444 Latin 46 0 3 Th # Th [54 68 ]a
ft 3 0,68,206,255,114,415,0,42,114,373 Latin 72 0 84 ft # ft [66 74 ]a
ffl 3 0,68,216,255,137,601,0,42,137,559 Latin 72 0 84 ffl # ffl [66 66 6c ]a
NJ 5 0,68,216,255,166,510,0,27,166,483 Latin 25 0 49 NJ # NJ [4e 4a ]A
ij 3 0,69,216,255,77,400,0,54,77,346 Latin 27 0 13 ij # ij [69 6a ]a
tt 3 58,66,206,254,118,407,0,47,118,360 Latin 3 0 46 tt # tt [74 74 ]a
ti 3 58,69,206,255,86,400,0,47,86,353 Latin 3 0 46 ti # ti [74 69 ]a
it 3 58,69,206,255,86,407,0,54,86,353 Latin 27 0 13 it # it [69 74 ]a
sc 3 58,65,192,200,179,381,0,30,179,351 Latin 7 0 15 sc # sc [73 63 ]a
rt 3 58,68,186,254,128,400,0,40,128,360 Latin 9 0 20 rt # rt [72 74 ]a
es 3 58,65,189,200,189,393,0,32,189,361 Latin 33 0 32 es # es [65 73 ]a
ee 3 58,64,189,200,196,408,0,32,196,376 Latin 33 0 32 ee # ee [65 65 ]a
th 3 58,68,206,255,160,435,0,47,160,388 Latin 3 0 46 th # th [74 68 ]a
st 3 58,66,192,254,150,383,0,30,150,353 Latin 7 0 15 st # st [73 74 ]a
ch 3 58,68,192,255,189,422,0,36,189,386 Latin 8 0 31 ch # ch [63 68 ]a
et 3 58,66,189,254,157,400,0,32,157,368 Latin 33 0 32 et # et [65 74 ]a
sh 3 58,68,192,255,192,411,0,30,192,381 Latin 7 0 15 sh # sh [73 68 ]a
il 3 59,69,216,255,54,400,0,54,54,346 Latin 27 0 13 il # il [69 6c ]a
ot 3 58,66,188,254,157,397,0,32,157,365 Latin 28 0 6 ot # ot [6f 74 ]a
ge 3 0,64,188,212,198,430,0,32,198,398 Latin 76 0 12 ge # ge [67 65 ]a
sp 3 0,65,192,226,191,403,0,30,191,373 Latin 7 0 15 sp # sp [73 70 ]a
di 3 57,69,216,255,127,401,0,28,127,373 Latin 23 0 5 di # di [64 69 ]a
fü 3 0,68,216,255,155,460,0,42,155,418 Latin 72 0 84 fü # fü [66 fc ]a
ss 3 58,65,192,200,182,376,0,30,182,346 Latin 7 0 15 ss # ss [73 73 ]a
pp 3 0,47,192,226,200,425,0,25,200,400 Latin 11 0 37 pp # pp [70 70 ]a
pt 3 0,66,192,254,159,405,0,25,159,380 Latin 11 0 37 pt # pt [70 74 ]a
sl 3 58,68,192,255,118,376,0,30,118,346 Latin 7 0 15 sl # sl [73 6c ]a
sf 3 0,68,192,255,146,396,0,30,146,366 Latin 7 0 15 sf # sf [73 66 ]a
cc 3 58,64,192,200,176,392,0,36,176,356 Latin 8 0 31 cc # cc [63 63 ]a
ll 3 59,68,216,255,54,402,0,56,54,346 Latin 48 0 30 ll # ll [6c 6c ]a
ct 3 58,66,192,254,147,394,0,36,147,358 Latin 8 0 31 ct # ct [63 74 ]a
rr 3 59,68,186,202,138,400,0,40,138,360 Latin 9 0 20 rr # rr [72 72 ]a
aa 3 58,65,186,200,194,396,0,26,194,370 Latin 10 0 14 aa # aa [61 61 ]a
fu 3 0,68,187,255,155,443,0,42,155,401 Latin 72 0 84 fu # fu [66 75 ]a
ii 3 59,69,216,255,54,400,0,54,54,346 Latin 27 0 13 ii # ii [69 69 ]a
ph 3 0,68,192,255,201,433,0,25,201,408 Latin 11 0 37 ph # ph [70 68 ]a
gy 3 0,47,187,212,187,472,0,32,187,440 Latin 76 0 12 gy # gy [67 79 ]a
fr 3 0,68,186,255,124,415,0,42,124,373 Latin 72 0 84 fr # fr [66 72 ]a
dt 3 57,66,206,255,159,408,0,28,159,380 Latin 23 0 5 dt # dt [64 74 ]a
cti 3 58,69,192,255,174,567,0,36,174,531 Latin 8 0 31 cti # cti [63 74 69 ]a
oo 3 58,66,188,200,196,402,0,32,196,370 Latin 28 0 6 oo # oo [6f 6f ]a
sti 3 58,69,192,255,177,556,0,30,177,526 Latin 7 0 15 sti # sti [73 74 69 ]a
sk 3 57,68,192,255,184,401,0,30,184,371 Latin 7 0 15 sk # sk [73 6b ]a
cs 3 58,65,192,200,179,387,0,36,179,351 Latin 8 0 31 cs # cs [63 73 ]a
ooo 3 58,66,188,200,294,587,0,32,294,555 Latin 28 0 6 ooo # ooo [6f 6f 6f ]a
ty 3 0,66,187,254,146,457,0,47,146,410 Latin 3 0 46 ty # ty [74 79 ]a
tz 3 46,68,186,254,127,400,0,47,127,353 Latin 3 0 46 tz # tz [74 7a ]a
fk 3 0,68,216,255,148,433,0,42,148,391 Latin 72 0 84 fk # fk [66 6b ]a
ck 3 57,68,192,255,181,412,0,36,181,376 Latin 8 0 31 ck # ck [63 6b ]a
gg 3 0,43,188,212,200,452,0,32,200,420 Latin 76 0 12 gg # gg [67 67 ]a
°C 5 58,247,209,255,173,716,0,98,173,618 Common 129 4 129 °C # °C [b0 43 ]A
!? 10 40,67,216,255,127,432,0,71,127,361 Common 80 10 80 !? # !? [21 3f ]p
!! 10 41,67,216,255,100,417,0,71,100,346 Common 80 10 80 !! # !! [21 21 ]p
Qu 7 7,65,187,255,206,465,0,30,206,435 Latin 81 0 116 Qu # Qu [51 75 ]a
ry 3 0,68,186,202,156,450,0,40,156,410 Latin 9 0 20 ry # ry [72 79 ]a
gj 3 0,47,188,255,150,415,0,32,150,383 Latin 76 0 12 gj # gj [67 6a ]a
bt 3 58,66,206,255,159,405,0,25,159,380 Latin 38 0 45 bt # bt [62 74 ]a
sch 3 58,68,192,255,280,589,0,30,280,559 Latin 7 0 15 sch # sch [73 63 68 ]a
SS 5 57,64,219,255,200,430,0,30,200,400 Latin 15 0 7 SS # SS [53 53 ]A
AND 5 52,68,216,255,309,733,0,17,309,716 Latin 14 0 10 AND # AND [41 4e 44 ]A
ET 5 59,68,216,255,168,486,0,31,168,455 Latin 32 0 33 ET # ET [45 54 ]A
UND 5 58,68,216,255,317,744,0,39,317,705 Latin 4 0 47 UND # UND [55 4e 44 ]A
fb 3 0,68,216,255,155,435,0,42,155,393 Latin 72 0 84 fb # fb [66 62 ]a
fj 3 0,68,216,255,105,408,0,42,105,366 Latin 72 0 84 fj # fj [66 6a ]a
nj 3 0,68,188,255,151,406,0,25,151,381 Latin 49 0 25 nj # nj [6e 6a ]a
ffb 3 0,68,216,255,210,628,0,42,210,586 Latin 72 0 84 ffb # ffb [66 66 62 ]a
fh 3 0,68,216,255,156,443,0,42,156,401 Latin 72 0 84 fh # fh [66 68 ]a
or 3 58,68,186,202,167,397,0,32,167,365 Latin 28 0 6 or # or [6f 72 ]a
on 3 58,68,188,202,199,425,0,32,199,393 Latin 28 0 6 on # on [6f 6e ]a
of 3 0,68,188,255,153,410,0,32,153,378 Latin 28 0 6 of # of [6f 66 ]a
om 3 56,68,188,202,215,523,0,32,215,491 Latin 28 0 6 om # om [6f 6d ]a
op 3 0,66,188,226,198,417,0,32,198,385 Latin 28 0 6 op # op [6f 70 ]a
ou 3 57,66,187,202,198,425,0,32,198,393 Latin 28 0 6 ou # ou [6f 75 ]a
fft 3 0,68,206,255,169,608,0,42,169,566 Latin 72 0 84 fft # fft [66 66 74 ]a
sb 3 58,65,192,255,191,403,0,30,191,373 Latin 7 0 15 sb # sb [73 62 ]a
the 3 58,68,189,255,258,623,0,47,258,576 Latin 3 0 46 the # the [74 68 65 ]a

View File

@ -1,5 +0,0 @@
4
NULL 0 NULL 0
i 3 59,69,216,255,11,141,0,54,27,173 Latin 1 0 1 i # i [69 ]a
f 3 0,68,216,255,54,175,0,42,55,193 Latin 2 0 2 f # f [66 ]a
fi 3 0,71,216,255,87,202,0,28,105,199 Latin 3 0 3 fi # fi [fb01 ]a

Binary file not shown.

Before

Width:  |  Height:  |  Size: 692 KiB

View File

@ -1,18 +0,0 @@
This file documents the original source of various examples used for testing.
hebrew.png - Sample from Hebrew OCR with Nikud project by Adi Oz and Vered Shani
project URL - http://www.cs.bgu.ac.il/~nlpproj/hocr/
direct link to image - http://www.cs.bgu.ac.il/~nlpproj/hocr/images/image00.png
hebtypo.jpg - Sample from OCR and Hebrew on the Web project at Universiteit van Amsterdam
project URL - http://cf.uba.uva.nl/en/collections/rosenthaliana/menasseh/hebtypo.html
direct link to image - http://cf.uba.uva.nl/en/collections/rosenthaliana/menasseh/gif/hebtypo.jpg
DuTillet1004Pg2LG.jpg - Sample from Hebrew Matthew Project with parallel texts in Hebrew & Greek
as well as English page/chapter labels with Arabic numerals - test with -l heb+grc+eng
project URL - http://www.torahresource.com/Dutillet.html
direct link to image - http://www.torahresource.com/DuTillet/DuTillet1004Pg2LG.jpg
hebrew-nikud-genesis-1-2.png - Genesis 1-2 Hebrew example from OCR forum
forum post - https://community.logos.com/forums/p/16124/277997.aspx
direct link to image - https://community.logos.com/cfs-filesystemfile.ashx/__key/CommunityServer.Discussions.Components.Files/77/4578.Gen.png

View File

@ -1,42 +0,0 @@
How to run UNLV tests.
The scripts in this directory make it possible to duplicate the tests
published in the Fourth Annual Test of OCR Accuracy.
See http://www.isri.unlv.edu/downloads/AT-1995.pdf
but first you have to get the tools and data from UNLV:
Step 1: to download the images goto
http://www.isri.unlv.edu/ISRI/OCRtk
and get 3b.tgz, Bb.tgz, Mb.tgz and Nb.tgz.
Step 2: extract the files. It doesn't really matter where
in your filesystem you put them, but they must go under a common
root so you have directories 3, B, M and N in, for example,
/users/me/ISRI-OCRtk.
Step 3: Reorg the files
The lack of tif extensions on the images is inconvenient, so there
is a script to reorganize the data to match the rest of the test
scripts.
cd to /users/me/ISRI-OCRtk or wherever 3, B, M and N ended up and run
/blah/blah/tesseract-ocr/testing/reorgdata.sh 3B
This makes directories doe3.3B, bus.3B, mag.3B and news.3B.
You can now get rid of 3, B, M, and N unless you want to get some of the
other scanning resolutions out of them.
Step 4: Download the ISRI toolkit from:
http://www.isri.unlv.edu/downloads/ftk-1.0.tgz
Step 5: If they work for you, use the binaries directly from the bin
directory and put them in tesseract-ocr/testing/unlv
otherwise build the tools for yourself and put them there.
Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
Step 7: run testing/runalltests.sh with the root data dir and testname:
testing/runalltests.sh /users/me/ISRI-OCRtk tess2.0
and go to the gym, have lunch etc.
Step 8: There should be a file
testing/reports/tess2.0.summary that contains the final summarized accuracy
report and comparison with the 1995 results.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 598 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 522 KiB

Binary file not shown.

View File

@ -1,12 +0,0 @@
The (quick) [brown] {fox} jumps!
Over the $43,456.78 <lazy> #90 dog
& duck/goose, as 12.5% of E-mail
from aspammer@website.com is spam.
Der „schnelle” braune Fuchs springt
über den faulen Hund. Le renard brun
«rapide» saute par-dessus le chien
paresseux. La volpe marrone rapida
salta sopra il cane pigro. El zorro
marrón rápido salta sobre el perro
perezoso. A raposa marrom rápida
salta sobre o cão preguiçoso.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 390 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 434 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

View File

@ -1,9 +0,0 @@
This is a lot of 12 point text to test the
ocr code and see if it works on all types
of file format.
The quick brown dog jumped over the
lazy fox. The quick brown dog jumped
over the lazy fox. The quick brown dog
jumped over the lazy fox. The quick
brown dog jumped over the lazy fox.

View File

@ -3,8 +3,8 @@
TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
# Absolute path of directory 'testing' with test images and ground truth texts
# (must be directly below top source directory).
TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/testing
# (using submodule test).
TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
@ -92,11 +92,11 @@ osd_test_LDADD += -lws2_32
tesseracttests_LDADD += -lws2_32
endif
EXTRA_apiexample_test_DEPENDENCIES = $(abs_top_builddir)/testing/phototest.tif
EXTRA_apiexample_test_DEPENDENCIES += $(abs_top_builddir)/testing/phototest.txt
EXTRA_apiexample_test_DEPENDENCIES = $(abs_top_builddir)/test/testing/phototest.tif
EXTRA_apiexample_test_DEPENDENCIES += $(abs_top_builddir)/test/testing/phototest.txt
$(abs_top_builddir)/testing/phototest.tif:
ln -s $(top_srcdir)/testing/phototest.tif $(top_builddir)/testing/phototest.tif
$(abs_top_builddir)/test/testing/phototest.tif:
ln -s $(top_srcdir)/test/testing/phototest.tif $(top_builddir)/test/testing/phototest.tif
$(abs_top_builddir)/testing/phototest.txt:
ln -s $(top_srcdir)/testing/phototest.txt $(top_builddir)/testing/phototest.txt
$(abs_top_builddir)/test/testing/phototest.txt:
ln -s $(top_srcdir)/test/testing/phototest.txt $(top_builddir)/test/testing/phototest.txt

3
unlvtests/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# Ignore ocreval tool and generated files.
ocreval*
results*

45
unlvtests/README Normal file
View File

@ -0,0 +1,45 @@
How to run UNLV tests.
The scripts in this directory make it possible to duplicate the tests
published in the Fourth Annual Test of OCR Accuracy.
See http://www.isri.unlv.edu/downloads/AT-1995.pdf
but first you have to get the tools and data used by UNLV:
Step 1: to download the images goto
https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/
and get doe3.3B.tar.gz, bus.3B.tar.gz, mag.3B.tar.gz and news.3B.tar.gz
mkdir -p ~/isri-downloads
cd ~/isri-downloads
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/bus.3B.tar.gz > bus.3B.tar.gz
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/doe3.3B.tar.gz > doe3.3B.tar.gz
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/mag.3B.tar.gz > mag.3B.tar.gz
curl -L https://sourceforge.net/projects/isri-ocr-evaluation-tools-alt/files/news.3B.tar.gz > news.3B.tar.gz
Step 2: extract the files. It doesn't really matter where
in your filesystem you put them, but they must go under a common
root so you have directories doe3.3B, bus.3B, mag.3B and news.3B. in, for example,
~/ISRI-OCRtk.
mkdir -p ~/ISRI-OCRtk
cd ~/ISRI-OCRtk
tar xzvf ~/isri-downloads/bus.3B.tar.gz
tar xzvf ~/isri-downloads/doe3.3B.tar.gz
tar xzvf ~/isri-downloads/mag.3B.tar.gz
tar xzvf ~/isri-downloads/news.3B.tar.gz
Step 4: Download the modified ISRI toolkit from:
https://ancientgreekocr.org/ocr-evaluation-tools.git
make and install the tools in unlvtests/ocreval/bin by
`make PREFIX=~/tesseract/unlvtests/ocreval install`
Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
Step 7: run unlvtests/runalltests.sh with the root ISRI data dir and testname:
unlvtests/runalltests.sh ~/ISRI-OCRtk tess4.0.0-beta.1
and go to the gym, have lunch etc.
Step 8: There should be a file
unlvtests/reports/tess4.0.0-beta.1.summary that contains the final summarized accuracy
report and comparison with the 1995 results.

28
testing/counttestset.sh → unlvtests/counttestset.sh Normal file → Executable file
View File

@ -20,22 +20,18 @@ then
echo "Usage:$0 pagesfile"
exit 1
fi
if [ ! -d api ]
if [ ! -d src/api ]
then
echo "Run $0 from the tesseract-ocr root directory!"
exit 1
fi
if [ ! -r testing/unlv/accuracy ]
then
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
exit 1
fi
pages=$1
imdir=${pages%/pages}
setname=${imdir##*/}
resdir=testing/results/$setname
mkdir -p testing/reports
resdir=unlvtests/results/$setname
mkdir -p unlvtests/reports
echo "Counting on set $setname in directory $imdir to $resdir"
accfiles=""
wafiles=""
@ -47,13 +43,17 @@ do
else
srcdir="$imdir"
fi
# echo "$srcdir/$page.tif"
echo "$srcdir/$page.tif"
# Count character errors.
testing/unlv/accuracy "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.acc"
accfiles="$accfiles $resdir/$page.acc"
unlvtests/ocreval/bin/ocrevalutf8 unlvtests/ocreval/bin/accuracy "$srcdir/$page.txt" "$resdir/$page.unlv" "$resdir/$page.acc"
accfiles="$accfiles $resdir/$page.acc"
# Count word errors.
testing/unlv/wordacc "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.wa"
unlvtests/ocreval/bin/ocrevalutf8 unlvtests/ocreval/bin/wordacc "$srcdir/$page.txt" "$resdir/$page.unlv" "$resdir/$page.wa"
wafiles="$wafiles $resdir/$page.wa"
done <"$pages"
testing/unlv/accsum "$accfiles" >"testing/reports/$setname.characc"
testing/unlv/wordaccsum "$wafiles" >"testing/reports/$setname.wordacc"
echo "$accfiles"
echo "$wafiles"
unlvtests/ocreval/bin/accsum "$accfiles" >"unlvtests/reports/$setname.characc"
unlvtests/ocreval/bin/ocrevalutf8 unlvtests/ocreval/bin/wordaccsum "$wafiles" >"unlvtests/reports/$setname.wordacc"

View File

View File

@ -0,0 +1,200 @@
8500_001.3B 21.18
8501_001.3B 11.72
8502_001.3B 27.12
8503_001.3B 24.01
8504_001.3B 12.42
8505_001.3B 18.75
8506_001.3B 18.29
8507_001.3B 14.15
8508_001.3B 23.30
8509_001.3B 35.00
8510_001.3B 17.38
8512_001.3B 4.64
8513_001.3B 15.98
8514_001.3B 19.79
8515_001.3B 13.37
8516_001.3B 19.37
8517_001.3B 10.41
8518_001.3B 5.99
8519_001.3B 20.46
8520_001.3B 32.38
8520_002.3B 39.26
8520_003.3B 25.79
8521_001.3B 19.02
8522_001.3B 28.00
8523_001.3B 32.00
8524_001.3B 20.81
8525_001.3B 29.17
8526_001.3B 11.92
8527_001.3B 14.06
8528_001.3B 7.94
8529_001.3B 26.18
8529_002.3B 5.02
8530_001.3B 9.51
8531_001.3B 13.77
8533_001.3B 5.82
8534_001.3B 25.04
8535_001.3B 18.50
8536_001.3B 17.06
8537_001.3B 8.46
8538_001.3B 14.22
8539_001.3B 22.86
8540_001.3B 13.65
8541_001.3B 28.38
8541_002.3B 27.92
8542_001.3B 17.16
8544_001.3B 27.17
8545_001.3B 8.38
8546_001.3B 16.02
8547_001.3B 6.45
8548_001.3B 12.00
8549_001.3B 8.05
8550_001.3B 12.76
8551_001.3B 9.19
8552_001.3B 21.46
8553_001.3B 16.93
8554_001.3B 6.96
8555_001.3B 20.78
8556_001.3B 11.21
8557_001.3B 20.81
8558_001.3B 14.93
8559_001.3B 13.74
8560_001.3B 20.18
8561_001.3B 27.58
8562_001.3B 9.98
8563_001.3B 19.63
8564_001.3B 19.25
8565_001.3B 8.89
8566_001.3B 10.63
8567_001.3B 32.30
8568_001.3B 10.89
8569_001.3B 10.54
8570_001.3B 6.47
8571_001.3B 27.37
8572_001.3B 12.59
8573_001.3B 12.75
8574_001.3B 25.88
8574_002.3B 10.32
8575_001.3B 7.23
8576_001.3B 9.30
8577_001.3B 13.52
8578_001.3B 14.99
8579_001.3B 14.34
8580_001.3B 9.18
8581_001.3B 13.65
8582_001.3B 15.56
8583_001.3B 10.98
8584_001.3B 19.10
8585_001.3B 10.71
8586_001.3B 7.56
8587_001.3B 11.33
8588_001.3B 12.31
8589_001.3B 7.01
8590_001.3B 31.73
8591_001.3B 28.57
8591_002.3B 1.59
8592_001.3B 14.69
8593_001.3B 17.47
8594_001.3B 5.87
8595_001.3B 11.65
8596_001.3B 11.78
8597_001.3B 11.55
8598_001.3B 10.73
8599_001.3B 16.48
8600_001.3B 9.28
8601_001.3B 29.24
8700_001.3B 27.68
8701_001.3B 16.43
8702_001.3B 23.29
8703_001.3B 19.90
8704_001.3B 10.42
8705_001.3B 12.84
8706_001.3B 19.20
8707_001.3B 21.43
8708_001.3B 16.98
8709_001.3B 17.46
8710_001.3B 14.46
8711_001.3B 14.90
8712_001.3B 23.02
8713_001.3B 21.30
8714_001.3B 16.53
8715_001.3B 25.23
8716_001.3B 21.95
8717_001.3B 18.83
8718_001.3B 20.00
8719_001.3B 19.54
8720_001.3B 36.70
8721_001.3B 29.99
8721_002.3B 15.29
8721_003.3B 20.94
8722_001.3B 23.94
8723_001.3B 22.84
8723_002.3B 25.78
8724_001.3B 24.02
8724_002.3B 26.69
8725_001.3B 31.03
8726_001.3B 14.05
8727_001.3B 22.38
8728_001.3B 18.95
8729_001.3B 28.13
8730_001.3B 34.85
8731_001.3B 15.46
8733_001.3B 21.63
8733_002.3B 11.17
8734_001.3B 31.24
8734_002.3B 31.30
8735_001.3B 23.96
8735_002.3B 10.60
8736_001.3B 12.80
8736_002.3B 7.17
8737_001.3B 28.60
8737_002.3B 28.37
8738_001.3B 31.20
8739_001.3B 28.83
8739_002.3B 25.54
8740_001.3B 11.76
8740_002.3B 17.64
8741_001.3B 21.74
8741_002.3B 9.04
8742_001.3B 16.40
8742_002.3B 6.36
8743_001.3B 23.47
8743_002.3B 18.55
8744_001.3B 32.44
8744_002.3B 37.44
8745_001.3B 26.77
8745_002.3B 8.20
8746_001.3B 23.62
8747_001.3B 33.47
8747_002.3B 36.95
8748_001.3B 41.88
8748_002.3B 27.64
8749_001.3B 18.71
8749_002.3B 34.45
8750_001.3B 22.16
8750_002.3B 35.29
8750_003.3B 43.09
8750_004.3B 10.43
8751_001.3B 26.53
8751_002.3B 70.68
8752_001.3B 88.23
8752_002.3B 77.29
8752_003.3B 18.16
8753_001.3B 98.12
8753_002.3B 133.03
8754_001.3B 68.60
8754_002.3B 45.30
8755_001.3B 61.55
8755_002.3B 114.88
8756_001.3B 59.22
8758_001.3B 82.09
8758_002.3B 52.46
8758_003.3B 85.34
8758_004.3B 78.35
8759_001.3B 64.63
8759_002.3B 72.59
8759_003.3B 83.72
8759_004.3B 68.94
8760_001.3B 34.73
8760_002.3B 14.60
8761_001.3B 13.27

View File

@ -0,0 +1 @@
unlvtests/ocreval/bin/ocrevalutf8 toolname [option1] [option2] [option...] ground.txt ocr.txt

View File

@ -0,0 +1 @@
tess4.0.0-beta.1 bus.3B -100.00% unlvtests unlvtests -100.00% -100.00% 4851.37s

View File

@ -0,0 +1,6 @@
1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
tess4.0.0-beta.1 bus.3B -100.00% unlvtests unlvtests -100.00% -100.00% 4851.37s
tess4.0.0-beta.1 Total 0 - -100.00% 0 - -100.00% 0 - -100.00%

View File

@ -0,0 +1 @@
tess4.0.0-beta.1 Total 0 - -100.00% 0 - -100.00% 0 - -100.00%

View File

@ -20,21 +20,17 @@ then
echo "Usage:$0 unlv-data-dir version-id"
exit 1
fi
if [ ! -d api ]
if [ ! -d src/api ]
then
echo "Run $0 from the tesseract-ocr root directory!"
exit 1
fi
if [ ! -r api/tesseract ] && [ ! -r tesseract.exe ]
if [ ! -r src/api/tesseract ] && [ ! -r tesseract.exe ]
then
echo "Please build tesseract before running $0"
exit 1
fi
if [ ! -r testing/unlv/accuracy ] && [ ! -r testing/unlv/accuracy.exe ]
then
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
exit 1
fi
#deltapc new old calculates the %change from old to new
deltapc() {
@ -63,8 +59,9 @@ if [ "$bindir" = "$0" ]
then
bindir="./"
fi
rdir=testing/reports
testsets="bus.3B doe3.3B mag.3B news.3B"
rdir=unlvtests/reports
#testsets="bus.3B doe3.3B mag.3B news.3B"
testsets="bus.3B"
totalerrs=0
totalwerrs=0
@ -81,34 +78,34 @@ do
# Count the errors on all the pages.
$bindir/counttestset.sh "$imdir/$set/pages"
# Get the old character word and nonstop word errors.
olderrs=$(cut -f3 "testing/reports/1995.$set.sum")
oldwerrs=$(cut -f6 "testing/reports/1995.$set.sum")
oldnswerrs=$(cut -f9 "testing/reports/1995.$set.sum")
olderrs=$(cut -f3 "unlvtests/reports/1995.$set.sum")
oldwerrs=$(cut -f6 "unlvtests/reports/1995.$set.sum")
oldnswerrs=$(cut -f9 "unlvtests/reports/1995.$set.sum")
# Get the new character word and nonstop word errors and accuracy.
cherrs=$(head -4 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
cherrs=$(head -4 "unlvtests/reports/$set.characc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
chacc=$(head -5 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
chacc=$(head -5 "unlvtests/reports/$set.characc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
wderrs=$(head -4 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
wderrs=$(head -4 "unlvtests/reports/$set.wordacc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
wdacc=$(head -5 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
wdacc=$(head -5 "unlvtests/reports/$set.wordacc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
nswderrs=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
nswderrs=$(grep Total "unlvtests/reports/$set.wordacc" |head -2 |tail -1 |
cut -c10-17 |tr -d '[:blank:]')
nswdacc=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
nswdacc=$(grep Total "unlvtests/reports/$set.wordacc" |head -2 |tail -1 |
cut -c19-26 |tr -d '[:blank:]')
# Compute the percent change.
chdelta=$(deltapc "$cherrs" "$olderrs")
wdelta=$(deltapc "$wderrs" "$oldwerrs")
nswdelta=$(deltapc "$nswderrs" "$oldnswerrs")
sumfile=$rdir/$vid.$set.sum
if [ -r "testing/reports/$set.times" ]
if [ -r "unlvtests/reports/$set.times" ]
then
total_time=$(timesum "testing/reports/$set.times")
if [ -r "testing/reports/prev/$set.times" ]
total_time=$(timesum "unlvtests/reports/$set.times")
if [ -r "unlvtests/reports/prev/$set.times" ]
then
paste "testing/reports/prev/$set.times" "testing/reports/$set.times" |
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"testing/reports/$set.timedelta"
paste "unlvtests/reports/prev/$set.times" "unlvtests/reports/$set.times" |
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"unlvtests/reports/$set.timedelta"
fi
else
total_time='0.0'

View File

@ -20,12 +20,12 @@ then
echo "Usage:$0 pagesfile [-zoning]"
exit 1
fi
if [ ! -d api ]
if [ ! -d src/api ]
then
echo "Run $0 from the tesseract-ocr root directory!"
exit 1
fi
if [ ! -r api/tesseract ]
if [ ! -r src/api/tesseract ]
then
if [ ! -r tesseract.exe ]
then
@ -35,7 +35,7 @@ then
tess="./tesseract.exe"
fi
else
tess="time -f %U -o times.txt api/tesseract"
tess="time -f %U -o times.txt src/api/tesseract"
export TESSDATA_PREFIX=$PWD/
fi
@ -45,14 +45,14 @@ setname=${imdir##*/}
if [ $# -eq 2 ] && [ "$2" = "-zoning" ]
then
config=unlv.auto
resdir=testing/results/zoning.$setname
resdir=unlvtests/results/zoning.$setname
else
config=unlv
resdir=testing/results/$setname
resdir=unlvtests/results/$setname
fi
echo -e "Testing on set $setname in directory $imdir to $resdir\n"
mkdir -p "$resdir"
rm -f "testing/reports/$setname.times"
rm -f "unlvtests/reports/$setname.times"
while read page dir
do
# A pages file may be a list of files with subdirs or maybe just
@ -64,11 +64,11 @@ do
srcdir="$imdir"
fi
# echo "$srcdir/$page.tif"
$tess "$srcdir/$page.tif" "$resdir/$page" --psm 6 $config 2>&1 |grep -v "OCR Engine"
$tess "$srcdir/$page.tif" "$resdir/$page" --tessdata-dir ../tessdata_fast --oem 1 -l eng --psm 6 $config 2>&1 |grep -v "OCR Engine"
if [ -r times.txt ]
then
read t <times.txt
echo "$page $t" >>"testing/reports/$setname.times"
echo "$page $t" >>"unlvtests/reports/$setname.times"
echo -e "\033M$page $t"
if [ "$t" = "Command terminated by signal 2" ]
then