mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
backport doc and testing changes from master (4.0)
This commit is contained in:
parent
5fe49627bf
commit
10f96d1e80
@ -84,14 +84,20 @@ before any 'configfile'.
|
|||||||
|
|
||||||
SINGLE OPTIONS
|
SINGLE OPTIONS
|
||||||
--------------
|
--------------
|
||||||
'-v'::
|
'-h, --help'::
|
||||||
|
Show help message.
|
||||||
|
|
||||||
|
'--help-psm'::
|
||||||
|
Show page segmentation modes.
|
||||||
|
|
||||||
|
'-v, --version'::
|
||||||
Returns the current version of the tesseract(1) executable.
|
Returns the current version of the tesseract(1) executable.
|
||||||
|
|
||||||
'--list-langs'::
|
'--list-langs'::
|
||||||
list available languages for tesseract engine. Can be used with --tessdata-dir.
|
List available languages for tesseract engine. Can be used with --tessdata-dir.
|
||||||
|
|
||||||
'--print-parameters'::
|
'--print-parameters'::
|
||||||
print tesseract parameters to the stdout.
|
Print tesseract parameters.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,13 +49,13 @@ do
|
|||||||
fi
|
fi
|
||||||
# echo "$srcdir/$page.tif"
|
# echo "$srcdir/$page.tif"
|
||||||
# Count character errors.
|
# Count character errors.
|
||||||
testing/unlv/accuracy $srcdir/$page.txt $resdir/$page.txt $resdir/$page.acc
|
testing/unlv/accuracy "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.acc"
|
||||||
accfiles="$accfiles $resdir/$page.acc"
|
accfiles="$accfiles $resdir/$page.acc"
|
||||||
# Count word errors.
|
# Count word errors.
|
||||||
testing/unlv/wordacc $srcdir/$page.txt $resdir/$page.txt $resdir/$page.wa
|
testing/unlv/wordacc "$srcdir/$page.txt" "$resdir/$page.txt" "$resdir/$page.wa"
|
||||||
wafiles="$wafiles $resdir/$page.wa"
|
wafiles="$wafiles $resdir/$page.wa"
|
||||||
done <$pages
|
done <"$pages"
|
||||||
testing/unlv/accsum $accfiles >testing/reports/$setname.characc
|
testing/unlv/accsum "$accfiles" >"testing/reports/$setname.characc"
|
||||||
testing/unlv/wordaccsum $wafiles >testing/reports/$setname.wordacc
|
testing/unlv/wordaccsum "$wafiles" >"testing/reports/$setname.wordacc"
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,21 +33,21 @@ do
|
|||||||
if [ -r "$old/PAGES" ]
|
if [ -r "$old/PAGES" ]
|
||||||
then
|
then
|
||||||
new=${s#*/}.$ext
|
new=${s#*/}.$ext
|
||||||
mkdir -p $new
|
mkdir -p "$new"
|
||||||
echo "Set $old -> $new"
|
echo "Set $old -> $new"
|
||||||
#The pages file had - instead of _ so fix it and add the extension.
|
#The pages file had - instead of _ so fix it and add the extension.
|
||||||
for page in `cat $old/PAGES`
|
for page in $(cat $old/PAGES)
|
||||||
do
|
do
|
||||||
echo "${page%-*}_${page#*-}.$ext"
|
echo "${page%-*}_${page#*-}.$ext"
|
||||||
done >$new/pages
|
done >"$new/pages"
|
||||||
for f in `cat $new/pages`
|
for f in $(cat "$new/pages")
|
||||||
do
|
do
|
||||||
#Put a tif extension on the tif files.
|
#Put a tif extension on the tif files.
|
||||||
cp $old/${old}_B/$f $new/$f.tif
|
cp "$old/${old}_B/$f" "$new/$f.tif"
|
||||||
#Put a uzn extension on the zone files.
|
#Put a uzn extension on the zone files.
|
||||||
cp $old/${old}_B/${f}Z $new/$f.uzn
|
cp "$old/${old}_B/${f}Z" "$new/$f.uzn"
|
||||||
#Cat all the truth files together and put into a single txt file.
|
#Cat all the truth files together and put into a single txt file.
|
||||||
cat $old/${old}_GT/${f%.$ext}.Z* >$new/$f.txt
|
cat "$old/${old}_GT/${f%.$ext}".Z* >"$new/$f.txt"
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
@ -25,12 +25,12 @@ then
|
|||||||
echo "Run $0 from the tesseract-ocr root directory!"
|
echo "Run $0 from the tesseract-ocr root directory!"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ! -r api/tesseract -a ! -r tesseract.exe ]
|
if [ ! -r api/tesseract ] && [ ! -r tesseract.exe ]
|
||||||
then
|
then
|
||||||
echo "Please build tesseract before running $0"
|
echo "Please build tesseract before running $0"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ]
|
if [ ! -r testing/unlv/accuracy ] && [ ! -r testing/unlv/accuracy.exe ]
|
||||||
then
|
then
|
||||||
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
|
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
|
||||||
exit 1
|
exit 1
|
||||||
@ -39,7 +39,7 @@ fi
|
|||||||
#deltapc new old calculates the %change from old to new
|
#deltapc new old calculates the %change from old to new
|
||||||
deltapc() {
|
deltapc() {
|
||||||
awk ' BEGIN {
|
awk ' BEGIN {
|
||||||
printf("%.2f", 100.0*('$1'-'$2')/'$2');
|
printf("%.2f", 100.0*('"$1"'-'"$2"')/'"$2"');
|
||||||
}'
|
}'
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ total = 0.0;
|
|||||||
}
|
}
|
||||||
END {
|
END {
|
||||||
printf("%.2f\n", total);
|
printf("%.2f\n", total);
|
||||||
}' $1
|
}' "$1"
|
||||||
}
|
}
|
||||||
|
|
||||||
imdir="$1"
|
imdir="$1"
|
||||||
@ -74,47 +74,47 @@ totaloldwerrs=0
|
|||||||
totaloldnswerrs=0
|
totaloldnswerrs=0
|
||||||
for set in $testsets
|
for set in $testsets
|
||||||
do
|
do
|
||||||
if [ -r $imdir/$set/pages ]
|
if [ -r "$imdir/$set/pages" ]
|
||||||
then
|
then
|
||||||
# Run tesseract on all the pages.
|
# Run tesseract on all the pages.
|
||||||
$bindir/runtestset.sh $imdir/$set/pages
|
$bindir/runtestset.sh "$imdir/$set/pages"
|
||||||
# Count the errors on all the pages.
|
# Count the errors on all the pages.
|
||||||
$bindir/counttestset.sh $imdir/$set/pages
|
$bindir/counttestset.sh "$imdir/$set/pages"
|
||||||
# Get the old character word and nonstop word errors.
|
# Get the old character word and nonstop word errors.
|
||||||
olderrs=`cat testing/reports/1995.$set.sum | cut -f3`
|
olderrs=$(cut -f3 "testing/reports/1995.$set.sum")
|
||||||
oldwerrs=`cat testing/reports/1995.$set.sum | cut -f6`
|
oldwerrs=$(cut -f6 "testing/reports/1995.$set.sum")
|
||||||
oldnswerrs=`cat testing/reports/1995.$set.sum | cut -f9`
|
oldnswerrs=$(cut -f9 "testing/reports/1995.$set.sum")
|
||||||
# Get the new character word and nonstop word errors and accuracy.
|
# Get the new character word and nonstop word errors and accuracy.
|
||||||
cherrs=`head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 |
|
cherrs=$(head -4 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||||
tr -d '[:blank:]'`
|
tr -d '[:blank:]')
|
||||||
chacc=`head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 |
|
chacc=$(head -5 "testing/reports/$set.characc" |tail -1 |cut -c1-9 |
|
||||||
tr -d '[:blank:]'`
|
tr -d '[:blank:]')
|
||||||
wderrs=`head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
|
wderrs=$(head -4 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||||
tr -d '[:blank:]'`
|
tr -d '[:blank:]')
|
||||||
wdacc=`head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
|
wdacc=$(head -5 "testing/reports/$set.wordacc" |tail -1 |cut -c1-9 |
|
||||||
tr -d '[:blank:]'`
|
tr -d '[:blank:]')
|
||||||
nswderrs=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
|
nswderrs=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
|
||||||
cut -c10-17 |tr -d '[:blank:]'`
|
cut -c10-17 |tr -d '[:blank:]')
|
||||||
nswdacc=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
|
nswdacc=$(grep Total "testing/reports/$set.wordacc" |head -2 |tail -1 |
|
||||||
cut -c19-26 |tr -d '[:blank:]'`
|
cut -c19-26 |tr -d '[:blank:]')
|
||||||
# Compute the percent change.
|
# Compute the percent change.
|
||||||
chdelta=`deltapc $cherrs $olderrs`
|
chdelta=$(deltapc "$cherrs" "$olderrs")
|
||||||
wdelta=`deltapc $wderrs $oldwerrs`
|
wdelta=$(deltapc "$wderrs" "$oldwerrs")
|
||||||
nswdelta=`deltapc $nswderrs $oldnswerrs`
|
nswdelta=$(deltapc "$nswderrs" "$oldnswerrs")
|
||||||
sumfile=$rdir/$vid.$set.sum
|
sumfile=$rdir/$vid.$set.sum
|
||||||
if [ -r testing/reports/$set.times ]
|
if [ -r "testing/reports/$set.times" ]
|
||||||
then
|
then
|
||||||
total_time=`timesum testing/reports/$set.times`
|
total_time=$(timesum "testing/reports/$set.times")
|
||||||
if [ -r testing/reports/prev/$set.times ]
|
if [ -r "testing/reports/prev/$set.times" ]
|
||||||
then
|
then
|
||||||
paste testing/reports/prev/$set.times testing/reports/$set.times |
|
paste "testing/reports/prev/$set.times" "testing/reports/$set.times" |
|
||||||
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >testing/reports/$set.timedelta
|
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >"testing/reports/$set.timedelta"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
total_time='0.0'
|
total_time='0.0'
|
||||||
fi
|
fi
|
||||||
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
|
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
|
||||||
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >$sumfile
|
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >"$sumfile"
|
||||||
# Sum totals over all the testsets.
|
# Sum totals over all the testsets.
|
||||||
let totalerrs=totalerrs+cherrs
|
let totalerrs=totalerrs+cherrs
|
||||||
let totalwerrs=totalwerrs+wderrs
|
let totalwerrs=totalwerrs+wderrs
|
||||||
@ -125,10 +125,10 @@ do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
# Compute grand total percent change.
|
# Compute grand total percent change.
|
||||||
chdelta=`deltapc $totalerrs $totalolderrs`
|
chdelta=$(deltapc $totalerrs $totalolderrs)
|
||||||
wdelta=`deltapc $totalwerrs $totaloldwerrs`
|
wdelta=$(deltapc $totalwerrs $totaloldwerrs)
|
||||||
nswdelta=`deltapc $totalnswerrs $totaloldnswerrs `
|
nswdelta=$(deltapc $totalnswerrs $totaloldnswerrs)
|
||||||
tfile=$rdir/$vid.total.sum
|
tfile=$rdir/$vid.total.sum
|
||||||
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
|
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
|
||||||
- $wdelta% $totalnswerrs - $nswdelta%" >$tfile
|
- $wdelta% $totalnswerrs - $nswdelta%" >"$tfile"
|
||||||
cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary
|
cat $rdir/1995.*.sum "$rdir/$vid".*.sum >"$rdir/$vid".summary
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
if [ $# -ne 1 -a $# -ne 2 ]
|
if [ $# -ne 1 ] && [ $# -ne 2 ]
|
||||||
then
|
then
|
||||||
echo "Usage:$0 pagesfile [-zoning]"
|
echo "Usage:$0 pagesfile [-zoning]"
|
||||||
exit 1
|
exit 1
|
||||||
@ -42,7 +42,7 @@ fi
|
|||||||
pages=$1
|
pages=$1
|
||||||
imdir=${pages%/pages}
|
imdir=${pages%/pages}
|
||||||
setname=${imdir##*/}
|
setname=${imdir##*/}
|
||||||
if [ $# -eq 2 -a "$2" = "-zoning" ]
|
if [ $# -eq 2 ] && [ "$2" = "-zoning" ]
|
||||||
then
|
then
|
||||||
config=unlv.auto
|
config=unlv.auto
|
||||||
resdir=testing/results/zoning.$setname
|
resdir=testing/results/zoning.$setname
|
||||||
@ -51,8 +51,8 @@ else
|
|||||||
resdir=testing/results/$setname
|
resdir=testing/results/$setname
|
||||||
fi
|
fi
|
||||||
echo -e "Testing on set $setname in directory $imdir to $resdir\n"
|
echo -e "Testing on set $setname in directory $imdir to $resdir\n"
|
||||||
mkdir -p $resdir
|
mkdir -p "$resdir"
|
||||||
rm -f testing/reports/$setname.times
|
rm -f "testing/reports/$setname.times"
|
||||||
while read page dir
|
while read page dir
|
||||||
do
|
do
|
||||||
# A pages file may be a list of files with subdirs or maybe just
|
# A pages file may be a list of files with subdirs or maybe just
|
||||||
@ -64,15 +64,15 @@ do
|
|||||||
srcdir="$imdir"
|
srcdir="$imdir"
|
||||||
fi
|
fi
|
||||||
# echo "$srcdir/$page.tif"
|
# echo "$srcdir/$page.tif"
|
||||||
$tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
$tess "$srcdir/$page.tif" "$resdir/$page" --psm 6 $config 2>&1 |grep -v "OCR Engine"
|
||||||
if [ -r times.txt ]
|
if [ -r times.txt ]
|
||||||
then
|
then
|
||||||
read t <times.txt
|
read t <times.txt
|
||||||
echo "$page $t" >>testing/reports/$setname.times
|
echo "$page $t" >>"testing/reports/$setname.times"
|
||||||
echo -e "\033M$page $t"
|
echo -e "\033M$page $t"
|
||||||
if [ "$t" = "Command terminated by signal 2" ]
|
if [ "$t" = "Command terminated by signal 2" ]
|
||||||
then
|
then
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
done <$pages
|
done <"$pages"
|
||||||
|
Loading…
Reference in New Issue
Block a user