backport doc and testing changes from master (4.0)

This commit is contained in:
Zdenko Podobný 2017-05-05 20:40:35 +02:00
parent 5fe49627bf
commit 10f96d1e80
5 changed files with 64 additions and 58 deletions

View File

@ -84,14 +84,20 @@ before any 'configfile'.
SINGLE OPTIONS
--------------
'-v'::
'-h, --help'::
Show help message.
'--help-psm'::
Show page segmentation modes.
'-v, --version'::
Returns the current version of the tesseract(1) executable.
'--list-langs'::
list available languages for tesseract engine. Can be used with --tessdata-dir.
List available languages for tesseract engine. Can be used with --tessdata-dir.
'--print-parameters'::
print tesseract parameters to the stdout.
Print tesseract parameters.

View File

@ -49,13 +49,13 @@ do
fi
# echo "$srcdir/$page.tif"
# Count character errors.
testing/unlv/accuracy $srcdir/$page.txt $resdir/$page.txt $resdir/$page.acc
testing/unlv/accuracy "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.acc"
accfiles="$accfiles $resdir/$page.acc"
# Count word errors.
testing/unlv/wordacc $srcdir/$page.txt $resdir/$page.txt $resdir/$page.wa
testing/unlv/wordacc "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.wa"
wafiles="$wafiles $resdir/$page.wa"
done <$pages
testing/unlv/accsum $accfiles >testing/reports/$setname.characc
testing/unlv/wordaccsum $wafiles >testing/reports/$setname.wordacc
done <"$pages"
testing/unlv/accsum "$accfiles" >"testing/reports/$setname.characc"
testing/unlv/wordaccsum "$wafiles" >"testing/reports/$setname.wordacc"

View File

@ -33,21 +33,21 @@ do
if [ -r "$old/PAGES" ]
then
new=${s#*/}.$ext
mkdir -p $new
mkdir -p "$new"
echo "Set $old -> $new"
#The pages file had - instead of _ so fix it and add the extension.
for page in `cat $old/PAGES`
for page in $(cat $old/PAGES)
do
echo "${page%-*}_${page#*-}.$ext"
done >$new/pages
for f in `cat $new/pages`
done >"$new/pages"
for f in $(cat "$new/pages")
do
#Put a tif extension on the tif files.
cp $old/${old}_B/$f $new/$f.tif
cp "$old/${old}_B/$f" "$new/$f.tif"
#Put a uzn extension on the zone files.
cp $old/${old}_B/${f}Z $new/$f.uzn
cp "$old/${old}_B/${f}Z" "$new/$f.uzn"
#Cat all the truth files together and put into a single txt file.
cat $old/${old}_GT/${f%.$ext}.Z* >$new/$f.txt
cat "$old/${old}_GT/${f%.$ext}".Z* >"$new/$f.txt"
done
fi
done

View File

@ -25,12 +25,12 @@ then
echo "Run $0 from the tesseract-ocr root directory!"
exit 1
fi
if [ ! -r api/tesseract -a ! -r tesseract.exe ]
if [ ! -r api/tesseract ] && [ ! -r tesseract.exe ]
then
echo "Please build tesseract before running $0"
exit 1
fi
if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ]
if [ ! -r testing/unlv/accuracy ] && [ ! -r testing/unlv/accuracy.exe ]
then
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
exit 1
@ -39,7 +39,7 @@ fi
#deltapc new old calculates the %change from old to new
deltapc() {
awk ' BEGIN {
printf("%.2f", 100.0*('$1'-'$2')/'$2');
printf("%.2f", 100.0*('"$1"'-'"$2"')/'"$2"');
}'
}
@ -53,7 +53,7 @@ total = 0.0;
}
END {
printf("%.2f\n", total);
}' $1
}' "$1"
}
imdir="$1"
@ -74,47 +74,47 @@ totaloldwerrs=0
totaloldnswerrs=0
for set in $testsets
do
if [ -r $imdir/$set/pages ]
if [ -r "$imdir/$set/pages" ]
then
# Run tesseract on all the pages.
$bindir/runtestset.sh $imdir/$set/pages
$bindir/runtestset.sh "$imdir/$set/pages"
# Count the errors on all the pages.
$bindir/counttestset.sh $imdir/$set/pages
$bindir/counttestset.sh "$imdir/$set/pages"
# Get the old character word and nonstop word errors.
olderrs=`cat testing/reports/1995.$set.sum | cut -f3`
oldwerrs=`cat testing/reports/1995.$set.sum | cut -f6`
oldnswerrs=`cat testing/reports/1995.$set.sum | cut -f9`
olderrs=$(cut -f3 "testing/reports/1995.$set.sum")
oldwerrs=$(cut -f6 "testing/reports/1995.$set.sum")
oldnswerrs=$(cut -f9 "testing/reports/1995.$set.sum")
# Get the new character word and nonstop word errors and accuracy.
cherrs=`head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
chacc=`head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
wderrs=`head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
wdacc=`head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
nswderrs=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
cut -c10-17 |tr -d '[:blank:]'`
nswdacc=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
cut -c19-26 |tr -d '[:blank:]'`
cherrs=$(head -4 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
chacc=$(head -5 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
wderrs=$(head -4 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
wdacc=$(head -5 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
nswderrs=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
cut -c10-17 |tr -d '[:blank:]')
nswdacc=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
cut -c19-26 |tr -d '[:blank:]')
# Compute the percent change.
chdelta=`deltapc $cherrs $olderrs`
wdelta=`deltapc $wderrs $oldwerrs`
nswdelta=`deltapc $nswderrs $oldnswerrs`
chdelta=$(deltapc "$cherrs" "$olderrs")
wdelta=$(deltapc "$wderrs" "$oldwerrs")
nswdelta=$(deltapc "$nswderrs" "$oldnswerrs")
sumfile=$rdir/$vid.$set.sum
if [ -r testing/reports/$set.times ]
if [ -r "testing/reports/$set.times" ]
then
total_time=`timesum testing/reports/$set.times`
if [ -r testing/reports/prev/$set.times ]
total_time=$(timesum "testing/reports/$set.times")
if [ -r "testing/reports/prev/$set.times" ]
then
paste testing/reports/prev/$set.times testing/reports/$set.times |
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >testing/reports/$set.timedelta
paste "testing/reports/prev/$set.times" "testing/reports/$set.times" |
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"testing/reports/$set.timedelta"
fi
else
total_time='0.0'
fi
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >$sumfile
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >"$sumfile"
# Sum totals over all the testsets.
let totalerrs=totalerrs+cherrs
let totalwerrs=totalwerrs+wderrs
@ -125,10 +125,10 @@ do
fi
done
# Compute grand total percent change.
chdelta=`deltapc $totalerrs $totalolderrs`
wdelta=`deltapc $totalwerrs $totaloldwerrs`
nswdelta=`deltapc $totalnswerrs $totaloldnswerrs `
chdelta=$(deltapc $totalerrs $totalolderrs)
wdelta=$(deltapc $totalwerrs $totaloldwerrs)
nswdelta=$(deltapc $totalnswerrs $totaloldnswerrs)
tfile=$rdir/$vid.total.sum
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
- $wdelta% $totalnswerrs - $nswdelta%" >$tfile
cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary
- $wdelta% $totalnswerrs - $nswdelta%" >"$tfile"
cat $rdir/1995.*.sum "$rdir/$vid".*.sum >"$rdir/$vid".summary

View File

@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -ne 1 -a $# -ne 2 ]
if [ $# -ne 1 ] && [ $# -ne 2 ]
then
echo "Usage:$0 pagesfile [-zoning]"
exit 1
@ -42,7 +42,7 @@ fi
pages=$1
imdir=${pages%/pages}
setname=${imdir##*/}
if [ $# -eq 2 -a "$2" = "-zoning" ]
if [ $# -eq 2 ] && [ "$2" = "-zoning" ]
then
config=unlv.auto
resdir=testing/results/zoning.$setname
@ -51,8 +51,8 @@ else
resdir=testing/results/$setname
fi
echo -e "Testing on set $setname in directory $imdir to $resdir\n"
mkdir -p $resdir
rm -f testing/reports/$setname.times
mkdir -p "$resdir"
rm -f "testing/reports/$setname.times"
while read page dir
do
# A pages file may be a list of files with subdirs or maybe just
@ -64,15 +64,15 @@ do
srcdir="$imdir"
fi
# echo "$srcdir/$page.tif"
$tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine"
$tess "$srcdir/$page.tif" "$resdir/$page" --psm 6 $config 2>&1 |grep -v "OCR Engine"
if [ -r times.txt ]
then
read t <times.txt
echo "$page $t" >>testing/reports/$setname.times
echo "$page $t" >>"testing/reports/$setname.times"
echo -e "\033M$page $t"
if [ "$t" = "Command terminated by signal 2" ]
then
exit 0
fi
fi
done <$pages
done <"$pages"