tesseract/testing/runalltests.sh

135 lines
4.1 KiB
Bash
Raw Normal View History

#!/bin/bash
# File: runalltests.sh
# Description: Script to run a set of UNLV test sets.
# Author: Ray Smith
# Created: Thu Jun 14 08:21:01 PDT 2007
#
# (C) Copyright 2007, Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -ne 2 ]
then
echo "Usage:$0 unlv-data-dir version-id"
exit 1
fi
if [ ! -d api ]
then
echo "Run $0 from the tesseract-ocr root directory!"
exit 1
fi
if [ ! -r api/tesseract -a ! -r tesseract.exe ]
then
echo "Please build tesseract before running $0"
exit 1
fi
if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ]
then
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
exit 1
fi
#deltapc new old calculates the %change from old to new
deltapc() {
awk ' BEGIN {
printf("%.2f", 100.0*('$1'-'$2')/'$2');
}'
}
#timesum computes the total cpu time
timesum() {
awk ' BEGIN {
total = 0.0;
}
{
total += $2;
}
END {
printf("%.2f\n", total);
}' $1
}
imdir="$1"
vid="$2"
bindir=${0%/*}
if [ "$bindir" = "$0" ]
then
bindir="./"
fi
rdir=testing/reports
testsets="bus.3B doe3.3B mag.3B news.3B"
totalerrs=0
totalwerrs=0
totalnswerrs=0
totalolderrs=0
totaloldwerrs=0
totaloldnswerrs=0
for set in $testsets
do
if [ -r $imdir/$set/pages ]
then
# Run tesseract on all the pages.
$bindir/runtestset.sh $imdir/$set/pages
# Count the errors on all the pages.
$bindir/counttestset.sh $imdir/$set/pages
# Get the old character word and nonstop word errors.
olderrs=$(cat testing/reports/1995.$set.sum | cut -f3)
oldwerrs=$(cat testing/reports/1995.$set.sum | cut -f6)
oldnswerrs=$(cat testing/reports/1995.$set.sum | cut -f9)
# Get the new character word and nonstop word errors and accuracy.
cherrs=$(head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
chacc=$(head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
wderrs=$(head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
wdacc=$(head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
tr -d '[:blank:]')
nswderrs=$(grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
cut -c10-17 |tr -d '[:blank:]')
nswdacc=$(grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
cut -c19-26 |tr -d '[:blank:]')
# Compute the percent change.
chdelta=$(deltapc $cherrs $olderrs)
wdelta=$(deltapc $wderrs $oldwerrs)
nswdelta=$(deltapc $nswderrs $oldnswerrs)
sumfile=$rdir/$vid.$set.sum
if [ -r testing/reports/$set.times ]
then
total_time=$(timesum testing/reports/$set.times)
if [ -r testing/reports/prev/$set.times ]
then
paste testing/reports/prev/$set.times testing/reports/$set.times |
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >testing/reports/$set.timedelta
fi
else
total_time='0.0'
fi
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >$sumfile
# Sum totals over all the testsets.
let totalerrs=totalerrs+cherrs
let totalwerrs=totalwerrs+wderrs
let totalnswerrs=totalnswerrs+nswderrs
let totalolderrs=totalolderrs+olderrs
let totaloldwerrs=totaloldwerrs+oldwerrs
let totaloldnswerrs=totaloldnswerrs+oldnswerrs
fi
done
# Compute grand total percent change.
chdelta=$(deltapc $totalerrs $totalolderrs)
wdelta=$(deltapc $totalwerrs $totaloldwerrs)
nswdelta=$(deltapc $totalnswerrs $totaloldnswerrs)
tfile=$rdir/$vid.total.sum
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
- $wdelta% $totalnswerrs - $nswdelta%" >$tfile
cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary