2007-07-18 09:11:18 +08:00
|
|
|
#!/bin/bash
|
|
|
|
# File: runalltests.sh
|
2018-06-09 20:47:09 +08:00
|
|
|
# Description: Script to run a set of UNLV test sets for English.
|
2007-07-18 09:11:18 +08:00
|
|
|
# Author: Ray Smith
|
|
|
|
# Created: Thu Jun 14 08:21:01 PDT 2007
|
|
|
|
#
|
|
|
|
# (C) Copyright 2007, Google Inc.
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2018-06-09 20:47:09 +08:00
|
|
|
if [ $# -ne 3 ]
|
2007-07-18 09:11:18 +08:00
|
|
|
then
|
2018-06-09 20:47:09 +08:00
|
|
|
echo "Usage:$0 unlv-data-dir version-id tessdata-dir"
|
2007-07-18 09:11:18 +08:00
|
|
|
exit 1
|
|
|
|
fi
|
2018-06-06 20:20:14 +08:00
|
|
|
if [ ! -d src/api ]
|
2007-07-18 09:11:18 +08:00
|
|
|
then
|
|
|
|
echo "Run $0 from the tesseract-ocr root directory!"
|
|
|
|
exit 1
|
|
|
|
fi
|
2018-06-06 20:20:14 +08:00
|
|
|
if [ ! -r src/api/tesseract ] && [ ! -r tesseract.exe ]
|
2007-07-18 09:11:18 +08:00
|
|
|
then
|
|
|
|
echo "Please build tesseract before running $0"
|
|
|
|
exit 1
|
|
|
|
fi
|
2018-06-08 22:28:50 +08:00
|
|
|
tessdata=$3
|
2007-07-18 09:11:18 +08:00
|
|
|
|
|
|
|
#deltapc new old calculates the %change from old to new
|
|
|
|
deltapc() {
|
|
|
|
awk ' BEGIN {
|
2017-04-08 16:18:01 +08:00
|
|
|
printf("%.2f", 100.0*('"$1"'-'"$2"')/'"$2"');
|
2007-07-18 09:11:18 +08:00
|
|
|
}'
|
|
|
|
}
|
|
|
|
|
2009-07-11 09:50:32 +08:00
|
|
|
#timesum computes the total cpu time
|
|
|
|
timesum() {
|
|
|
|
awk ' BEGIN {
|
|
|
|
total = 0.0;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
total += $2;
|
|
|
|
}
|
|
|
|
END {
|
|
|
|
printf("%.2f\n", total);
|
2017-04-08 16:18:01 +08:00
|
|
|
}' "$1"
|
2009-07-11 09:50:32 +08:00
|
|
|
}
|
|
|
|
|
2007-07-18 09:11:18 +08:00
|
|
|
imdir="$1"
|
|
|
|
vid="$2"
|
|
|
|
bindir=${0%/*}
|
|
|
|
if [ "$bindir" = "$0" ]
|
|
|
|
then
|
|
|
|
bindir="./"
|
|
|
|
fi
|
2018-06-06 20:20:14 +08:00
|
|
|
rdir=unlvtests/reports
|
2018-06-08 22:28:50 +08:00
|
|
|
|
2018-06-09 20:47:09 +08:00
|
|
|
testsets="bus.3B doe3.3B mag.3B news.3B"
|
|
|
|
#testsets="bus.3B"
|
2007-07-18 09:11:18 +08:00
|
|
|
|
|
|
|
totalerrs=0
|
|
|
|
totalwerrs=0
|
|
|
|
totalnswerrs=0
|
|
|
|
totalolderrs=0
|
|
|
|
totaloldwerrs=0
|
|
|
|
totaloldnswerrs=0
|
|
|
|
for set in $testsets
|
|
|
|
do
|
2017-04-08 16:18:01 +08:00
|
|
|
if [ -r "$imdir/$set/pages" ]
|
2007-07-18 09:11:18 +08:00
|
|
|
then
|
|
|
|
# Run tesseract on all the pages.
|
2018-06-09 20:47:09 +08:00
|
|
|
$bindir/runtestset.sh "$imdir/$set/pages" "$tessdata" "eng"
|
2007-07-18 09:11:18 +08:00
|
|
|
# Count the errors on all the pages.
|
2017-04-08 16:18:01 +08:00
|
|
|
$bindir/counttestset.sh "$imdir/$set/pages"
|
2007-07-18 09:11:18 +08:00
|
|
|
# Get the old character word and nonstop word errors.
|
2018-06-06 20:20:14 +08:00
|
|
|
olderrs=$(cut -f3 "unlvtests/reports/1995.$set.sum")
|
|
|
|
oldwerrs=$(cut -f6 "unlvtests/reports/1995.$set.sum")
|
|
|
|
oldnswerrs=$(cut -f9 "unlvtests/reports/1995.$set.sum")
|
2007-07-18 09:11:18 +08:00
|
|
|
# Get the new character word and nonstop word errors and accuracy.
|
2018-06-08 22:28:50 +08:00
|
|
|
cherrs=$(head -4 "unlvtests/results/$set.characc" |tail -1 |cut -c1-9 |
|
2016-12-14 21:11:24 +08:00
|
|
|
tr -d '[:blank:]')
|
2018-06-08 22:28:50 +08:00
|
|
|
chacc=$(head -5 "unlvtests/results/$set.characc" |tail -1 |cut -c1-9 |
|
2016-12-14 21:11:24 +08:00
|
|
|
tr -d '[:blank:]')
|
2018-06-08 22:28:50 +08:00
|
|
|
wderrs=$(head -4 "unlvtests/results/$set.wordacc" |tail -1 |cut -c1-9 |
|
2016-12-14 21:11:24 +08:00
|
|
|
tr -d '[:blank:]')
|
2018-06-08 22:28:50 +08:00
|
|
|
wdacc=$(head -5 "unlvtests/results/$set.wordacc" |tail -1 |cut -c1-9 |
|
2016-12-14 21:11:24 +08:00
|
|
|
tr -d '[:blank:]')
|
2018-06-08 22:28:50 +08:00
|
|
|
nswderrs=$(grep Total "unlvtests/results/$set.wordacc" |head -2 |tail -1 |
|
2016-12-14 21:11:24 +08:00
|
|
|
cut -c10-17 |tr -d '[:blank:]')
|
2018-06-08 22:28:50 +08:00
|
|
|
nswdacc=$(grep Total "unlvtests/results/$set.wordacc" |head -2 |tail -1 |
|
2016-12-14 21:11:24 +08:00
|
|
|
cut -c19-26 |tr -d '[:blank:]')
|
2007-07-18 09:11:18 +08:00
|
|
|
# Compute the percent change.
|
2017-04-08 16:18:01 +08:00
|
|
|
chdelta=$(deltapc "$cherrs" "$olderrs")
|
|
|
|
wdelta=$(deltapc "$wderrs" "$oldwerrs")
|
|
|
|
nswdelta=$(deltapc "$nswderrs" "$oldnswerrs")
|
2007-07-18 09:11:18 +08:00
|
|
|
sumfile=$rdir/$vid.$set.sum
|
2018-06-08 22:28:50 +08:00
|
|
|
if [ -r "unlvtests/results/$set.times" ]
|
2009-07-11 09:50:32 +08:00
|
|
|
then
|
2018-06-08 22:28:50 +08:00
|
|
|
total_time=$(timesum "unlvtests/results/$set.times")
|
|
|
|
if [ -r "unlvtests/results/prev/$set.times" ]
|
2009-07-11 09:50:32 +08:00
|
|
|
then
|
2018-06-08 22:28:50 +08:00
|
|
|
paste "unlvtests/results/prev/$set.times" "unlvtests/results/$set.times" |
|
|
|
|
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"unlvtests/results/$set.timedelta"
|
2009-07-11 09:50:32 +08:00
|
|
|
fi
|
|
|
|
else
|
|
|
|
total_time='0.0'
|
|
|
|
fi
|
|
|
|
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
|
2017-04-08 16:18:01 +08:00
|
|
|
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >"$sumfile"
|
2007-07-18 09:11:18 +08:00
|
|
|
# Sum totals over all the testsets.
|
|
|
|
let totalerrs=totalerrs+cherrs
|
|
|
|
let totalwerrs=totalwerrs+wderrs
|
|
|
|
let totalnswerrs=totalnswerrs+nswderrs
|
|
|
|
let totalolderrs=totalolderrs+olderrs
|
|
|
|
let totaloldwerrs=totaloldwerrs+oldwerrs
|
|
|
|
let totaloldnswerrs=totaloldnswerrs+oldnswerrs
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
# Compute grand total percent change.
|
2016-12-14 21:11:24 +08:00
|
|
|
chdelta=$(deltapc $totalerrs $totalolderrs)
|
|
|
|
wdelta=$(deltapc $totalwerrs $totaloldwerrs)
|
|
|
|
nswdelta=$(deltapc $totalnswerrs $totaloldnswerrs)
|
2007-07-18 09:11:18 +08:00
|
|
|
tfile=$rdir/$vid.total.sum
|
|
|
|
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
|
2017-04-08 16:18:01 +08:00
|
|
|
- $wdelta% $totalnswerrs - $nswdelta%" >"$tfile"
|
|
|
|
cat $rdir/1995.*.sum "$rdir/$vid".*.sum >"$rdir/$vid".summary
|
2018-06-08 22:28:50 +08:00
|
|
|
|
|
|
|
mv "$rdir/$vid".*.sum unlvtests/results/
|
|
|
|
cat "$rdir/$vid".summary
|