mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
Merge pull request #1658 from Shreeshrii/master
correct script for eng, remove new reports from distribution
This commit is contained in:
commit
e5d11b5297
10
.gitignore
vendored
10
.gitignore
vendored
@ -64,6 +64,8 @@ build/*
|
|||||||
/bin
|
/bin
|
||||||
*/.deps/*
|
*/.deps/*
|
||||||
*/.libs/*
|
*/.libs/*
|
||||||
|
*/*/.deps/*
|
||||||
|
*/*/.libs/*
|
||||||
*.lo
|
*.lo
|
||||||
*.la
|
*.la
|
||||||
*.o
|
*.o
|
||||||
@ -102,9 +104,7 @@ kernel*.bin
|
|||||||
/unittest/osd_test
|
/unittest/osd_test
|
||||||
/unittest/tesseracttests
|
/unittest/tesseracttests
|
||||||
|
|
||||||
# ocreval tool and generated files from unlvtests
|
# generated files from unlvtests
|
||||||
/unlvtests/ocreval*
|
times.txt
|
||||||
/unlvtests/results*
|
/unlvtests/results*
|
||||||
/unlvtests/reports/*.characc
|
|
||||||
/unlvtests/reports/*.times
|
|
||||||
/unlvtests/reports/*.wordacc
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
EXTRA_DIST = README.md
|
EXTRA_DIST = README.md
|
||||||
EXTRA_DIST += counttestset.sh
|
EXTRA_DIST += counttestset.sh
|
||||||
EXTRA_DIST += reorgdata.sh
|
|
||||||
EXTRA_DIST += runalltests.sh
|
EXTRA_DIST += runalltests.sh
|
||||||
|
EXTRA_DIST += runalltests_spa.sh
|
||||||
EXTRA_DIST += runtestset.sh
|
EXTRA_DIST += runtestset.sh
|
||||||
EXTRA_DIST += reports/1995.bus.3B.sum
|
EXTRA_DIST += reports/1995.bus.3B.sum
|
||||||
EXTRA_DIST += reports/1995.doe3.3B.sum
|
EXTRA_DIST += reports/1995.doe3.3B.sum
|
||||||
@ -10,6 +10,3 @@ EXTRA_DIST += reports/1995.mag.3B.sum
|
|||||||
EXTRA_DIST += reports/1995.news.3B.sum
|
EXTRA_DIST += reports/1995.news.3B.sum
|
||||||
EXTRA_DIST += reports/2.03.summary
|
EXTRA_DIST += reports/2.03.summary
|
||||||
EXTRA_DIST += reports/2.04.summary
|
EXTRA_DIST += reports/2.04.summary
|
||||||
EXTRA_DIST += reports/4_best_spa.summary
|
|
||||||
EXTRA_DIST += reports/4_best_int_spa.summary
|
|
||||||
EXTRA_DIST += reports/4_fast_spa.summary
|
|
||||||
|
@ -45,9 +45,10 @@ do
|
|||||||
srcdir="$imdir"
|
srcdir="$imdir"
|
||||||
fi
|
fi
|
||||||
#echo "$srcdir/$page.tif"
|
#echo "$srcdir/$page.tif"
|
||||||
# Count character errors.
|
# Convert groundtruth and recognized text to UTF-8 to correctly treat accented letters.
|
||||||
iconv -f ISO8859-1 -t UTF-8 "$resdir/$page.unlv" >"$resdir/$page.text"
|
|
||||||
iconv -f ISO8859-1 -t UTF-8 "$srcdir/$page.txt" >"$srcdir/$page.text"
|
iconv -f ISO8859-1 -t UTF-8 "$srcdir/$page.txt" >"$srcdir/$page.text"
|
||||||
|
iconv -f ISO8859-1 -t UTF-8 "$resdir/$page.unlv" >"$resdir/$page.text"
|
||||||
|
# Count character errors.
|
||||||
ocrevalutf8 accuracy "$srcdir/$page.text" "$resdir/$page.text" > "$resdir/$page.acc"
|
ocrevalutf8 accuracy "$srcdir/$page.text" "$resdir/$page.text" > "$resdir/$page.acc"
|
||||||
accfiles="$accfiles $resdir/$page.acc"
|
accfiles="$accfiles $resdir/$page.acc"
|
||||||
# Count word errors.
|
# Count word errors.
|
||||||
|
9
unlvtests/reports/4_fast_eng.summary
Normal file
9
unlvtests/reports/4_fast_eng.summary
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
1995 bus.3B 5959 98.14% 0.00% 1631 96.83% 0.00% 1293 95.73% 0.00%
|
||||||
|
1995 doe3.3B 36349 97.52% 0.00% 7826 96.34% 0.00% 7042 94.87% 0.00%
|
||||||
|
1995 mag.3B 15043 97.74% 0.00% 4566 96.01% 0.00% 3379 94.99% 0.00%
|
||||||
|
1995 news.3B 6432 98.69% 0.00% 1946 97.68% 0.00% 1502 96.94% 0.00%
|
||||||
|
4_fast_eng bus.3B 6124 98.11% 2.77% 1138 97.88% -30.23% 963 97.05 -25.52% 3935.26s
|
||||||
|
4_fast_eng doe3.3B 30029 97.96% -17.39% 13781 94.45% 76.09% 13178 92.38 87.13% 18847.36s
|
||||||
|
4_fast_eng mag.3B 10934 98.37% -27.32% 3343 97.15% -26.78% 2813 96.06 -16.75% 6867.14s
|
||||||
|
4_fast_eng news.3B 5734 98.84% -10.85% 1322 98.45% -32.07% 1040 97.94 -30.76% 5527.38s
|
||||||
|
4_fast_eng Total 52821 - -17.19% 19584 - 22.64% 17994 - 36.15%
|
@ -77,7 +77,7 @@ do
|
|||||||
# Run tesseract on all the pages.
|
# Run tesseract on all the pages.
|
||||||
$bindir/runtestset.sh "$imdir/$set/pages" "$tessdata" "eng"
|
$bindir/runtestset.sh "$imdir/$set/pages" "$tessdata" "eng"
|
||||||
# Count the errors on all the pages.
|
# Count the errors on all the pages.
|
||||||
$bindir/counttestset.sh "$imdir/$set/pages"
|
$bindir/counttestset.sh "$imdir/$set/pages" "eng"
|
||||||
# Get the old character word and nonstop word errors.
|
# Get the old character word and nonstop word errors.
|
||||||
olderrs=$(cut -f3 "unlvtests/reports/1995.$set.sum")
|
olderrs=$(cut -f3 "unlvtests/reports/1995.$set.sum")
|
||||||
oldwerrs=$(cut -f6 "unlvtests/reports/1995.$set.sum")
|
oldwerrs=$(cut -f6 "unlvtests/reports/1995.$set.sum")
|
||||||
|
Loading…
Reference in New Issue
Block a user