mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-23 18:49:08 +08:00
backport doc and testing changes from master (4.0)
This commit is contained in:
parent
5fe49627bf
commit
10f96d1e80
@ -84,14 +84,20 @@ before any 'configfile'.
|
||||
|
||||
SINGLE OPTIONS
|
||||
--------------
|
||||
'-v'::
|
||||
'-h, --help'::
|
||||
Show help message.
|
||||
|
||||
'--help-psm'::
|
||||
Show page segmentation modes.
|
||||
|
||||
'-v, --version'::
|
||||
Returns the current version of the tesseract(1) executable.
|
||||
|
||||
'--list-langs'::
|
||||
list available languages for tesseract engine. Can be used with --tessdata-dir.
|
||||
List available languages for tesseract engine. Can be used with --tessdata-dir.
|
||||
|
||||
'--print-parameters'::
|
||||
print tesseract parameters to the stdout.
|
||||
Print tesseract parameters.
|
||||
|
||||
|
||||
|
||||
|
@ -49,13 +49,13 @@ do
|
||||
fi
|
||||
# echo "$srcdir/$page.tif"
|
||||
# Count character errors.
|
||||
testing/unlv/accuracy $srcdir/$page.txt $resdir/$page.txt $resdir/$page.acc
|
||||
testing/unlv/accuracy "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.acc"
|
||||
accfiles="$accfiles $resdir/$page.acc"
|
||||
# Count word errors.
|
||||
testing/unlv/wordacc $srcdir/$page.txt $resdir/$page.txt $resdir/$page.wa
|
||||
testing/unlv/wordacc "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.wa"
|
||||
wafiles="$wafiles $resdir/$page.wa"
|
||||
done <$pages
|
||||
testing/unlv/accsum $accfiles >testing/reports/$setname.characc
|
||||
testing/unlv/wordaccsum $wafiles >testing/reports/$setname.wordacc
|
||||
done <"$pages"
|
||||
testing/unlv/accsum "$accfiles" >"testing/reports/$setname.characc"
|
||||
testing/unlv/wordaccsum "$wafiles" >"testing/reports/$setname.wordacc"
|
||||
|
||||
|
||||
|
@ -33,21 +33,21 @@ do
|
||||
if [ -r "$old/PAGES" ]
|
||||
then
|
||||
new=${s#*/}.$ext
|
||||
mkdir -p $new
|
||||
mkdir -p "$new"
|
||||
echo "Set $old -> $new"
|
||||
#The pages file had - instead of _ so fix it and add the extension.
|
||||
for page in `cat $old/PAGES`
|
||||
for page in $(cat $old/PAGES)
|
||||
do
|
||||
echo "${page%-*}_${page#*-}.$ext"
|
||||
done >$new/pages
|
||||
for f in `cat $new/pages`
|
||||
done >"$new/pages"
|
||||
for f in $(cat "$new/pages")
|
||||
do
|
||||
#Put a tif extension on the tif files.
|
||||
cp $old/${old}_B/$f $new/$f.tif
|
||||
cp "$old/${old}_B/$f" "$new/$f.tif"
|
||||
#Put a uzn extension on the zone files.
|
||||
cp $old/${old}_B/${f}Z $new/$f.uzn
|
||||
cp "$old/${old}_B/${f}Z" "$new/$f.uzn"
|
||||
#Cat all the truth files together and put into a single txt file.
|
||||
cat $old/${old}_GT/${f%.$ext}.Z* >$new/$f.txt
|
||||
cat "$old/${old}_GT/${f%.$ext}".Z* >"$new/$f.txt"
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
@ -25,12 +25,12 @@ then
|
||||
echo "Run $0 from the tesseract-ocr root directory!"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -r api/tesseract -a ! -r tesseract.exe ]
|
||||
if [ ! -r api/tesseract ] && [ ! -r tesseract.exe ]
|
||||
then
|
||||
echo "Please build tesseract before running $0"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ]
|
||||
if [ ! -r testing/unlv/accuracy ] && [ ! -r testing/unlv/accuracy.exe ]
|
||||
then
|
||||
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
|
||||
exit 1
|
||||
@ -39,7 +39,7 @@ fi
|
||||
#deltapc new old calculates the %change from old to new
|
||||
deltapc() {
|
||||
awk ' BEGIN {
|
||||
printf("%.2f", 100.0*('$1'-'$2')/'$2');
|
||||
printf("%.2f", 100.0*('"$1"'-'"$2"')/'"$2"');
|
||||
}'
|
||||
}
|
||||
|
||||
@ -53,7 +53,7 @@ total = 0.0;
|
||||
}
|
||||
END {
|
||||
printf("%.2f\n", total);
|
||||
}' $1
|
||||
}' "$1"
|
||||
}
|
||||
|
||||
imdir="$1"
|
||||
@ -74,47 +74,47 @@ totaloldwerrs=0
|
||||
totaloldnswerrs=0
|
||||
for set in $testsets
|
||||
do
|
||||
if [ -r $imdir/$set/pages ]
|
||||
if [ -r "$imdir/$set/pages" ]
|
||||
then
|
||||
# Run tesseract on all the pages.
|
||||
$bindir/runtestset.sh $imdir/$set/pages
|
||||
$bindir/runtestset.sh "$imdir/$set/pages"
|
||||
# Count the errors on all the pages.
|
||||
$bindir/counttestset.sh $imdir/$set/pages
|
||||
$bindir/counttestset.sh "$imdir/$set/pages"
|
||||
# Get the old character word and nonstop word errors.
|
||||
olderrs=`cat testing/reports/1995.$set.sum | cut -f3`
|
||||
oldwerrs=`cat testing/reports/1995.$set.sum | cut -f6`
|
||||
oldnswerrs=`cat testing/reports/1995.$set.sum | cut -f9`
|
||||
olderrs=$(cut -f3 "testing/reports/1995.$set.sum")
|
||||
oldwerrs=$(cut -f6 "testing/reports/1995.$set.sum")
|
||||
oldnswerrs=$(cut -f9 "testing/reports/1995.$set.sum")
|
||||
# Get the new character word and nonstop word errors and accuracy.
|
||||
cherrs=`head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]'`
|
||||
chacc=`head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]'`
|
||||
wderrs=`head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]'`
|
||||
wdacc=`head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]'`
|
||||
nswderrs=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
|
||||
cut -c10-17 |tr -d '[:blank:]'`
|
||||
nswdacc=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
|
||||
cut -c19-26 |tr -d '[:blank:]'`
|
||||
cherrs=$(head -4 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
chacc=$(head -5 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
wderrs=$(head -4 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
wdacc=$(head -5 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||
tr -d '[:blank:]')
|
||||
nswderrs=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
|
||||
cut -c10-17 |tr -d '[:blank:]')
|
||||
nswdacc=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
|
||||
cut -c19-26 |tr -d '[:blank:]')
|
||||
# Compute the percent change.
|
||||
chdelta=`deltapc $cherrs $olderrs`
|
||||
wdelta=`deltapc $wderrs $oldwerrs`
|
||||
nswdelta=`deltapc $nswderrs $oldnswerrs`
|
||||
chdelta=$(deltapc "$cherrs" "$olderrs")
|
||||
wdelta=$(deltapc "$wderrs" "$oldwerrs")
|
||||
nswdelta=$(deltapc "$nswderrs" "$oldnswerrs")
|
||||
sumfile=$rdir/$vid.$set.sum
|
||||
if [ -r testing/reports/$set.times ]
|
||||
if [ -r "testing/reports/$set.times" ]
|
||||
then
|
||||
total_time=`timesum testing/reports/$set.times`
|
||||
if [ -r testing/reports/prev/$set.times ]
|
||||
total_time=$(timesum "testing/reports/$set.times")
|
||||
if [ -r "testing/reports/prev/$set.times" ]
|
||||
then
|
||||
paste testing/reports/prev/$set.times testing/reports/$set.times |
|
||||
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >testing/reports/$set.timedelta
|
||||
paste "testing/reports/prev/$set.times" "testing/reports/$set.times" |
|
||||
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"testing/reports/$set.timedelta"
|
||||
fi
|
||||
else
|
||||
total_time='0.0'
|
||||
fi
|
||||
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
|
||||
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >$sumfile
|
||||
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >"$sumfile"
|
||||
# Sum totals over all the testsets.
|
||||
let totalerrs=totalerrs+cherrs
|
||||
let totalwerrs=totalwerrs+wderrs
|
||||
@ -125,10 +125,10 @@ do
|
||||
fi
|
||||
done
|
||||
# Compute grand total percent change.
|
||||
chdelta=`deltapc $totalerrs $totalolderrs`
|
||||
wdelta=`deltapc $totalwerrs $totaloldwerrs`
|
||||
nswdelta=`deltapc $totalnswerrs $totaloldnswerrs `
|
||||
chdelta=$(deltapc $totalerrs $totalolderrs)
|
||||
wdelta=$(deltapc $totalwerrs $totaloldwerrs)
|
||||
nswdelta=$(deltapc $totalnswerrs $totaloldnswerrs)
|
||||
tfile=$rdir/$vid.total.sum
|
||||
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
|
||||
- $wdelta% $totalnswerrs - $nswdelta%" >$tfile
|
||||
cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary
|
||||
- $wdelta% $totalnswerrs - $nswdelta%" >"$tfile"
|
||||
cat $rdir/1995.*.sum "$rdir/$vid".*.sum >"$rdir/$vid".summary
|
||||
|
@ -15,7 +15,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
if [ $# -ne 1 -a $# -ne 2 ]
|
||||
if [ $# -ne 1 ] && [ $# -ne 2 ]
|
||||
then
|
||||
echo "Usage:$0 pagesfile [-zoning]"
|
||||
exit 1
|
||||
@ -42,7 +42,7 @@ fi
|
||||
pages=$1
|
||||
imdir=${pages%/pages}
|
||||
setname=${imdir##*/}
|
||||
if [ $# -eq 2 -a "$2" = "-zoning" ]
|
||||
if [ $# -eq 2 ] && [ "$2" = "-zoning" ]
|
||||
then
|
||||
config=unlv.auto
|
||||
resdir=testing/results/zoning.$setname
|
||||
@ -51,8 +51,8 @@ else
|
||||
resdir=testing/results/$setname
|
||||
fi
|
||||
echo -e "Testing on set $setname in directory $imdir to $resdir\n"
|
||||
mkdir -p $resdir
|
||||
rm -f testing/reports/$setname.times
|
||||
mkdir -p "$resdir"
|
||||
rm -f "testing/reports/$setname.times"
|
||||
while read page dir
|
||||
do
|
||||
# A pages file may be a list of files with subdirs or maybe just
|
||||
@ -64,15 +64,15 @@ do
|
||||
srcdir="$imdir"
|
||||
fi
|
||||
# echo "$srcdir/$page.tif"
|
||||
$tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
||||
$tess "$srcdir/$page.tif" "$resdir/$page" --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
||||
if [ -r times.txt ]
|
||||
then
|
||||
read t <times.txt
|
||||
echo "$page $t" >>testing/reports/$setname.times
|
||||
echo "$page $t" >>"testing/reports/$setname.times"
|
||||
echo -e "\033M$page $t"
|
||||
if [ "$t" = "Command terminated by signal 2" ]
|
||||
then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
done <$pages
|
||||
done <"$pages"
|
||||
|
Loading…
Reference in New Issue
Block a user