mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-08-06 13:56:47 +08:00
Automake changes for version 2.00.
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@84 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
0d9fa6a040
commit
4df1016692
@ -22,3 +22,11 @@ May 15 2007 - V1.04
|
|||||||
Fixed name collisions with stl etc.
|
Fixed name collisions with stl etc.
|
||||||
Made some preliminary changes ready for unicodeization.
|
Made some preliminary changes ready for unicodeization.
|
||||||
Several bug fixes discovered during unicodeization.
|
Several bug fixes discovered during unicodeization.
|
||||||
|
July 02 2007 - V2.00
|
||||||
|
Converted internal character handling to UTF8.
|
||||||
|
Trained with 6 languages.
|
||||||
|
Added unicharset_extractor, wordlist2dawg.
|
||||||
|
Added boxfile creation mode.
|
||||||
|
Added UNLV regression test capability.
|
||||||
|
Fixed problems with copyright and registered symbols.
|
||||||
|
Fixed extern "C" declarations problem.
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
# TODO(luc) Add 'doc' to this list when ready
|
# TODO(luc) Add 'doc' to this list when ready
|
||||||
SUBDIRS = ccstruct ccutil classify cutil dict display image textord viewer wordrec ccmain training tessdata dlltest
|
SUBDIRS = ccstruct ccutil classify cutil dict display image textord viewer wordrec ccmain training tessdata testing dlltest
|
||||||
|
|
||||||
EXTRA_DIST = tessdata phototest.tif tesseract.dsp tesseract.dsw StdAfx.cpp StdAfx.h tessdll.cpp tessdll.h tessdll.dsp tessdll.dll tessdll.lib tesseract.exe dlltest.exe ReleaseNotes
|
EXTRA_DIST = eurotext.tif phototest.tif ReleaseNotes tesseract.spec config \
|
||||||
|
tesseract.dsp tesseract.dsw tesseract.vcproj tesseract.sln \
|
||||||
|
StdAfx.cpp StdAfx.h tessdll.cpp tessdll.h tessdll.dsp tessdll.vcproj
|
||||||
|
|
||||||
#EXTRA_DIST = doc/html doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.pdf doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.ps.gz
|
#EXTRA_DIST = doc/html doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.pdf doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.ps.gz
|
||||||
|
|
||||||
|
16
README
16
README
@ -66,10 +66,11 @@ This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO OUTPUT
|
|||||||
FORMATTING, and NO UI. It can only process an image of a single column
|
FORMATTING, and NO UI. It can only process an image of a single column
|
||||||
and create text from it. It can detect fixed pitch vs proportional text.
|
and create text from it. It can detect fixed pitch vs proportional text.
|
||||||
Having said that, in 1995, this engine was in the top 3 in terms of character
|
Having said that, in 1995, this engine was in the top 3 in terms of character
|
||||||
accuracy, and it compiles and runs on both Linux and Windows. Another current
|
accuracy, and it compiles and runs on both Linux and Windows.
|
||||||
limitation is that it only recognizes English and its character set is only
|
As of 2.0, Tesseract is fully unicode (UTF-8) enabled, and can recognize 6
|
||||||
US-ASCII. Training code IS included in the open source release however, and
|
languages "out of the box." Code and documentation is provided for the brave
|
||||||
will be included in a future release.
|
to train in other languages. See code.google.com/p/tesseract-ocr for more
|
||||||
|
information on training.
|
||||||
|
|
||||||
|
|
||||||
Using the Engine
|
Using the Engine
|
||||||
@ -77,7 +78,7 @@ Using the Engine
|
|||||||
Windows:
|
Windows:
|
||||||
The executable must reside in the same directory as the tessdata directory
|
The executable must reside in the same directory as the tessdata directory
|
||||||
The command line is:
|
The command line is:
|
||||||
tesseract <image.tif> <output>
|
tesseract <image.tif> <output> [-l langid]
|
||||||
A windows executable (tesseract.exe) is included in the distribution, but
|
A windows executable (tesseract.exe) is included in the distribution, but
|
||||||
may not work for you unless you also have the correct mfc and crt dlls.
|
may not work for you unless you also have the correct mfc and crt dlls.
|
||||||
There is also a tessdll.dll, which you can use to run tesseract from your
|
There is also a tessdll.dll, which you can use to run tesseract from your
|
||||||
@ -95,11 +96,12 @@ export TESSDATA_PREFIX="directory in which your tessdata resides/"
|
|||||||
variable. Note that the directory must end in a /
|
variable. Note that the directory must end in a /
|
||||||
HAVING tesseract and tessdata IN THE SAME DIRECTORY DOES NOT WORK ANY MORE.
|
HAVING tesseract and tessdata IN THE SAME DIRECTORY DOES NOT WORK ANY MORE.
|
||||||
The command line is:
|
The command line is:
|
||||||
tesseract <image.tif> <output>
|
tesseract <image.tif> <output> [-l langid]
|
||||||
|
|
||||||
All Systems:
|
All Systems:
|
||||||
The image file requires a .tif extension for its type to be recognized
|
The image file requires a .tif extension for its type to be recognized
|
||||||
correctly. If a file exists with the .tif extension replaced by .uzn, then it
|
correctly. If a file exists with the .tif extension replaced by .uzn, then it
|
||||||
will be interpreted as a UNLV-style zone file. (See www.isri.unlv.edu for
|
will be interpreted as a UNLV-style zone file. (See www.isri.unlv.edu for
|
||||||
details of the zone files.)
|
details of the zone files.)
|
||||||
|
langid may be one of the codes defined in ISO 639-2, and you must download
|
||||||
|
the corresponding data files into your tessdata directory.
|
||||||
|
49
ReleaseNotes
49
ReleaseNotes
@ -1,3 +1,52 @@
|
|||||||
|
Tesseract release notes July 17, 2007 - V2.00
|
||||||
|
|
||||||
|
First release of the International version.
|
||||||
|
This version recognizes the following languages:
|
||||||
|
English - eng
|
||||||
|
French - fra
|
||||||
|
Italian - ita
|
||||||
|
German - deu
|
||||||
|
Spanish - spa
|
||||||
|
Dutch - nld
|
||||||
|
The language codes follow ISO 639-2. The default language is English.
|
||||||
|
To recognize another language:
|
||||||
|
tesseract inputimage outputbase -l langcode
|
||||||
|
|
||||||
|
To train on a new language, see separate documentation.
|
||||||
|
More languages will be appearing over time.
|
||||||
|
|
||||||
|
List of changes in this release:
|
||||||
|
Converted internal character handling to UTF8.
|
||||||
|
Trained with 6 languages.
|
||||||
|
Added unicharset_extractor, wordlist2dawg.
|
||||||
|
Added boxfile creation mode.
|
||||||
|
Added UNLV regression test capability.
|
||||||
|
Fixed problems with copyright and registered symbols.
|
||||||
|
Fixed extern "C" declarations problem.
|
||||||
|
Made some improvements to consistency of accuracy across platforms.
|
||||||
|
Added vc++ express support.
|
||||||
|
|
||||||
|
Instructions for downloading and building version 2.00.
|
||||||
|
Things have changed quite a bit since the previous versions so please read carefully.
|
||||||
|
*All users*
|
||||||
|
The tarballs are split into pieces.
|
||||||
|
tesseract-2.00.tar.gz contains all the source code.
|
||||||
|
tesseract-2.00.<lang>.tar.gt contains the data files for <lang>. You need at least one of these or tesseract will not work.
|
||||||
|
tesseract-2.00.exe.tar.gz is not for the 'exe' language. It is windows executables. They are built with VC++ express and come with absolutely no warranty. If they work for you then great, otherwise get visual C++ express (and the platform sdk) and build from the source.
|
||||||
|
|
||||||
|
*Non-windows users*
|
||||||
|
As with 1.04, this version works with make install.
|
||||||
|
*New* there is a tesseract.spec for making rpms. (Thanks to Andrew Ziem for the help.)
|
||||||
|
It might work with your OS if you know how to do that sort of thing.
|
||||||
|
If you are linking to the libraries, as with Ocropus, there is now a single master
|
||||||
|
library called libtesseract_full.a.
|
||||||
|
|
||||||
|
*Windows users*
|
||||||
|
If you are building from the sources, there are still dsw and dsp files for vc++6 and also
|
||||||
|
sln and vcproj files for vc++ express.
|
||||||
|
The dll has been updated to allow input of non-binary images. (Thanks to Glen of Jetsoft.)
|
||||||
|
|
||||||
|
|
||||||
Tesseract release notes May 15, 2007 - V1.04.
|
Tesseract release notes May 15, 2007 - V1.04.
|
||||||
|
|
||||||
=== Windows users only ===
|
=== Windows users only ===
|
||||||
|
@ -15,7 +15,7 @@ include_HEADERS = \
|
|||||||
tessbox.h tessedit.h tesseractmain.h tessvars.h tfacep.h \
|
tessbox.h tessedit.h tesseractmain.h tessvars.h tfacep.h \
|
||||||
tessembedded.h tfacepp.h tstruct.h werdit.h
|
tessembedded.h tfacepp.h tstruct.h werdit.h
|
||||||
|
|
||||||
lib_LIBRARIES = libtesseract_main.a
|
lib_LIBRARIES = libtesseract_main.a libtesseract_full.a
|
||||||
libtesseract_main_a_SOURCES = \
|
libtesseract_main_a_SOURCES = \
|
||||||
tessedit.cpp adaptions.cpp applybox.cpp \
|
tessedit.cpp adaptions.cpp applybox.cpp \
|
||||||
baseapi.cpp blobcmp.cpp \
|
baseapi.cpp blobcmp.cpp \
|
||||||
@ -24,10 +24,8 @@ libtesseract_main_a_SOURCES = \
|
|||||||
imgscale.cpp matmatch.cpp output.cpp paircmp.cpp \
|
imgscale.cpp matmatch.cpp output.cpp paircmp.cpp \
|
||||||
reject.cpp scaleimg.cpp tessbox.cpp tessvars.cpp \
|
reject.cpp scaleimg.cpp tessbox.cpp tessvars.cpp \
|
||||||
tfacepp.cpp tstruct.cpp werdit.cpp
|
tfacepp.cpp tstruct.cpp werdit.cpp
|
||||||
|
libtesseract_full_a_SOURCES = tesseractfull.cc
|
||||||
bin_PROGRAMS = tesseract
|
libtesseract_full.o: tesseractfull.o \
|
||||||
tesseract_SOURCES = tesseractmain.cpp
|
|
||||||
tesseract_LDADD = \
|
|
||||||
libtesseract_main.a \
|
libtesseract_main.a \
|
||||||
../display/libtesseract_display.a \
|
../display/libtesseract_display.a \
|
||||||
../textord/libtesseract_textord.a \
|
../textord/libtesseract_textord.a \
|
||||||
@ -39,3 +37,23 @@ tesseract_LDADD = \
|
|||||||
../cutil/libtesseract_cutil.a \
|
../cutil/libtesseract_cutil.a \
|
||||||
../ccstruct/libtesseract_ccstruct.a \
|
../ccstruct/libtesseract_ccstruct.a \
|
||||||
../ccutil/libtesseract_ccutil.a
|
../ccutil/libtesseract_ccutil.a
|
||||||
|
ld -r -o libtesseract_full.o tesseractfull.o \
|
||||||
|
libtesseract_main.a \
|
||||||
|
../display/libtesseract_display.a \
|
||||||
|
../textord/libtesseract_textord.a \
|
||||||
|
../wordrec/libtesseract_wordrec.a \
|
||||||
|
../classify/libtesseract_classify.a \
|
||||||
|
../dict/libtesseract_dict.a \
|
||||||
|
../viewer/libtesseract_viewer.a \
|
||||||
|
../image/libtesseract_image.a \
|
||||||
|
../cutil/libtesseract_cutil.a \
|
||||||
|
../ccstruct/libtesseract_ccstruct.a \
|
||||||
|
../ccutil/libtesseract_ccutil.a
|
||||||
|
|
||||||
|
libtesseract_full.a : libtesseract_full.o
|
||||||
|
ar cru libtesseract_full.a libtesseract_full.o ; ranlib libtesseract_full.a
|
||||||
|
|
||||||
|
bin_PROGRAMS = tesseract
|
||||||
|
tesseract_SOURCES = tesseractmain.cpp
|
||||||
|
tesseract_LDADD = \
|
||||||
|
libtesseract_full.a
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
# ----------------------------------------
|
# ----------------------------------------
|
||||||
|
|
||||||
AC_PREREQ(2.50)
|
AC_PREREQ(2.50)
|
||||||
AC_INIT(Tesseract, 1.04, theraysmith@gmail.com)
|
AC_INIT(tesseract, 2.00, theraysmith@gmail.com)
|
||||||
AC_REVISION($Id: configure.ac,v 1.4 2007/02/02 22:38:17 theraysmith Exp $)
|
AC_REVISION($Id: configure.ac,v 1.4 2007/02/02 22:38:17 theraysmith Exp $)
|
||||||
AC_CONFIG_AUX_DIR(config)
|
AC_CONFIG_AUX_DIR(config)
|
||||||
AC_CONFIG_SRCDIR(ccmain/tesseractmain.cpp)
|
AC_CONFIG_SRCDIR(ccmain/tesseractmain.cpp)
|
||||||
@ -18,7 +18,7 @@ AC_CANONICAL_HOST
|
|||||||
# documentation.
|
# documentation.
|
||||||
# TODO(luc) Generate good documentation using doxygen or equivalent
|
# TODO(luc) Generate good documentation using doxygen or equivalent
|
||||||
PACKAGE_YEAR=2007
|
PACKAGE_YEAR=2007
|
||||||
PACKAGE_DATE="05/2007"
|
PACKAGE_DATE="07/2007"
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED(PACKAGE_NAME,["${PACKAGE_NAME}"],[Name of package])
|
AC_DEFINE_UNQUOTED(PACKAGE_NAME,["${PACKAGE_NAME}"],[Name of package])
|
||||||
AC_DEFINE_UNQUOTED(PACKAGE_VERSION,["${PACKAGE_VERSION}"],[Version number])
|
AC_DEFINE_UNQUOTED(PACKAGE_VERSION,["${PACKAGE_VERSION}"],[Version number])
|
||||||
@ -296,6 +296,9 @@ AC_CONFIG_FILES(viewer/Makefile)
|
|||||||
AC_CONFIG_FILES(wordrec/Makefile)
|
AC_CONFIG_FILES(wordrec/Makefile)
|
||||||
AC_CONFIG_FILES(training/Makefile)
|
AC_CONFIG_FILES(training/Makefile)
|
||||||
AC_CONFIG_FILES(tessdata/Makefile)
|
AC_CONFIG_FILES(tessdata/Makefile)
|
||||||
|
AC_CONFIG_FILES(tessdata/configs/Makefile)
|
||||||
|
AC_CONFIG_FILES(tessdata/tessconfigs/Makefile)
|
||||||
|
AC_CONFIG_FILES(testing/Makefile)
|
||||||
AC_CONFIG_FILES(dlltest/Makefile)
|
AC_CONFIG_FILES(dlltest/Makefile)
|
||||||
# AC_CONFIG_FILES(doc/Doxyfile)
|
# AC_CONFIG_FILES(doc/Doxyfile)
|
||||||
# AC_CONFIG_FILES(doc/header.html)
|
# AC_CONFIG_FILES(doc/header.html)
|
||||||
|
@ -3,9 +3,11 @@ AM_CPPFLAGS = -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil
|
|||||||
|
|
||||||
include_HEADERS = \
|
include_HEADERS = \
|
||||||
choicearr.h choices.h context.h dawg.h hyphen.h matchdefs.h \
|
choicearr.h choices.h context.h dawg.h hyphen.h matchdefs.h \
|
||||||
permdawg.h permnum.h permute.h states.h stopper.h trie.h
|
permdawg.h permnum.h permute.h states.h stopper.h trie.h \
|
||||||
|
lookdawg.h makedawg.h reduce.h
|
||||||
|
|
||||||
lib_LIBRARIES = libtesseract_dict.a
|
lib_LIBRARIES = libtesseract_dict.a
|
||||||
libtesseract_dict_a_SOURCES = \
|
libtesseract_dict_a_SOURCES = \
|
||||||
choices.cpp context.cpp dawg.cpp hyphen.cpp permdawg.cpp \
|
choices.cpp context.cpp dawg.cpp hyphen.cpp permdawg.cpp \
|
||||||
permnum.cpp permute.cpp states.cpp stopper.cpp trie.cpp
|
permnum.cpp permute.cpp states.cpp stopper.cpp trie.cpp \
|
||||||
|
lookdawg.cpp makedawg.cpp reduce.cpp
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
include_HEADERS = dlltest.cpp dlltest.dsp
|
include_HEADERS = dlltest.cpp dlltest.dsp dlltest.vcproj
|
||||||
|
|
||||||
|
BIN
eurotext.tif
Normal file
BIN
eurotext.tif
Normal file
Binary file not shown.
9
makemoredists
Executable file
9
makemoredists
Executable file
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
winlist="tessdll.dll tessdll.lib tesseract.exe dlltest.exe training/cnTraining.exe training/mfTraining.exe training/unicharset_extractor.exe training/wordlist2dawg.exe"
|
||||||
|
ver=`ls -1rt *[0-9].tar.gz |tail -1`
|
||||||
|
ver=${ver%.tar.gz}
|
||||||
|
for l in eng deu fra ita spa nld
|
||||||
|
do
|
||||||
|
tar chozf $ver.$l.tar.gz tessdata/$l.*
|
||||||
|
done
|
||||||
|
tar chozf $ver.exe.tar.gz $winlist
|
@ -1,2 +1,31 @@
|
|||||||
datadir = @datadir@/tessdata
|
datadir = @datadir@/tessdata
|
||||||
data_DATA = confsets eng.DangAmbigs eng.freq-dawg eng.inttemp eng.normproto eng.pffmtable eng.user-words eng.word-dawg eng.unicharset
|
data_DATA = confsets \
|
||||||
|
fra.DangAmbigs fra.freq-dawg fra.inttemp fra.normproto \
|
||||||
|
fra.pffmtable fra.user-words fra.word-dawg fra.unicharset \
|
||||||
|
ita.DangAmbigs ita.freq-dawg ita.inttemp ita.normproto \
|
||||||
|
ita.pffmtable ita.user-words ita.word-dawg ita.unicharset \
|
||||||
|
deu.DangAmbigs deu.freq-dawg deu.inttemp deu.normproto \
|
||||||
|
deu.pffmtable deu.user-words deu.word-dawg deu.unicharset \
|
||||||
|
spa.DangAmbigs spa.freq-dawg spa.inttemp spa.normproto \
|
||||||
|
spa.pffmtable spa.user-words spa.word-dawg spa.unicharset \
|
||||||
|
nld.DangAmbigs nld.freq-dawg nld.inttemp nld.normproto \
|
||||||
|
nld.pffmtable nld.user-words nld.word-dawg nld.unicharset \
|
||||||
|
eng.DangAmbigs eng.freq-dawg eng.inttemp eng.normproto \
|
||||||
|
eng.pffmtable eng.user-words eng.word-dawg eng.unicharset
|
||||||
|
|
||||||
|
SUBDIRS = configs tessconfigs
|
||||||
|
|
||||||
|
EXTRA_DIST = confsets makedummies
|
||||||
|
|
||||||
|
eng.DangAmbigs eng.freq-dawg eng.inttemp eng.normproto eng.pffmtable eng.user-words eng.word-dawg eng.unicharset : makedummies
|
||||||
|
./makedummies eng
|
||||||
|
fra.DangAmbigs fra.freq-dawg fra.inttemp fra.normproto fra.pffmtable fra.user-words fra.word-dawg fra.unicharset : makedummies
|
||||||
|
./makedummies fra
|
||||||
|
ita.DangAmbigs ita.freq-dawg ita.inttemp ita.normproto ita.pffmtable ita.user-words ita.word-dawg ita.unicharset : makedummies
|
||||||
|
./makedummies ita
|
||||||
|
deu.DangAmbigs deu.freq-dawg deu.inttemp deu.normproto deu.pffmtable deu.user-words deu.word-dawg deu.unicharset : makedummies
|
||||||
|
./makedummies deu
|
||||||
|
spa.DangAmbigs spa.freq-dawg spa.inttemp spa.normproto spa.pffmtable spa.user-words spa.word-dawg spa.unicharset : makedummies
|
||||||
|
./makedummies spa
|
||||||
|
nld.DangAmbigs nld.freq-dawg nld.inttemp nld.normproto nld.pffmtable nld.user-words nld.word-dawg nld.unicharset : makedummies
|
||||||
|
./makedummies nld
|
||||||
|
3
tessdata/configs/Makefile.am
Normal file
3
tessdata/configs/Makefile.am
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
datadir = @datadir@/tessdata/configs
|
||||||
|
data_DATA = inter makebox box.train unlv
|
||||||
|
EXTRA_DIST = inter makebox box.train unlv
|
8
tessdata/makedummies
Executable file
8
tessdata/makedummies
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
for f in DangAmbigs freq-dawg inttemp normproto pffmtable unicharset user-words word-dawg
|
||||||
|
do
|
||||||
|
if [ ! -r $1.$f ]
|
||||||
|
then
|
||||||
|
touch $1.$f
|
||||||
|
fi
|
||||||
|
done
|
3
tessdata/tessconfigs/Makefile.am
Normal file
3
tessdata/tessconfigs/Makefile.am
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
datadir = @datadir@/tessdata/tessconfigs
|
||||||
|
data_DATA = batch batch.nochop nobatch matdemo segdemo msdemo
|
||||||
|
EXTRA_DIST = batch batch.nochop nobatch matdemo segdemo msdemo
|
188
tesseract.spec
Normal file
188
tesseract.spec
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
# This is a rough draft that may only work on Fedora Core 6.
|
||||||
|
# Andrew Ziem, 25 May 2007
|
||||||
|
# Hacked to add the new langeuages as separate language packs.
|
||||||
|
# Ray Smith, 16 July 2007
|
||||||
|
|
||||||
|
|
||||||
|
Name: tesseract
|
||||||
|
Version: 2.00
|
||||||
|
Release: 1%{?dist}
|
||||||
|
Summary: Open source OCR Engine developed by HP Labs - now improved by Google
|
||||||
|
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
License: Apache License
|
||||||
|
URL: http://code.google.com/p/tesseract-ocr/
|
||||||
|
Source0: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.tar.gz
|
||||||
|
Source1: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.eng.tar.gz
|
||||||
|
Source2: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.fra.tar.gz
|
||||||
|
Source3: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.ita.tar.gz
|
||||||
|
Source4: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.deu.tar.gz
|
||||||
|
Source5: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.spa.tar.gz
|
||||||
|
Source6: http://tesseract-ocr.googlecode.com/files/tesseract-%{version}.nld.tar.gz
|
||||||
|
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
||||||
|
|
||||||
|
#BuildRequires: compat-gcc-34-c++
|
||||||
|
BuildRequires: libtiff-devel
|
||||||
|
|
||||||
|
%package devel
|
||||||
|
Summary: Development files for tesseract
|
||||||
|
Group: Development/Libraries
|
||||||
|
Requires: %name = %{version}
|
||||||
|
|
||||||
|
|
||||||
|
%description
|
||||||
|
The Tesseract OCR engine was one of the top 3 engines in the 1995 UNLV
|
||||||
|
Accuracy test. Since then it has had little work done on it, but it is
|
||||||
|
probably one of the most accurate open source OCR engines available. The
|
||||||
|
source code will read a binary, grey or color image and output text. A tiff
|
||||||
|
reader is built in that will read uncompressed TIFF images, or libtiff can
|
||||||
|
be added to read compressed images.
|
||||||
|
|
||||||
|
%description devel
|
||||||
|
tesseract libraries and includes
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%setup -q
|
||||||
|
|
||||||
|
tar xzvf %{_sourcedir}/tesseract-%{version}.eng.tar.gz
|
||||||
|
tar xzvf %{_sourcedir}/tesseract-%{version}.fra.tar.gz
|
||||||
|
tar xzvf %{_sourcedir}/tesseract-%{version}.ita.tar.gz
|
||||||
|
tar xzvf %{_sourcedir}/tesseract-%{version}.deu.tar.gz
|
||||||
|
tar xzvf %{_sourcedir}/tesseract-%{version}.spa.tar.gz
|
||||||
|
tar xzvf %{_sourcedir}/tesseract-%{version}.nld.tar.gz
|
||||||
|
|
||||||
|
|
||||||
|
%build
|
||||||
|
export CFLAGS=
|
||||||
|
export CXXFLAGS=
|
||||||
|
# Should build with gcc4.1 now...
|
||||||
|
#export CC=gcc34
|
||||||
|
#export CXX=g++34
|
||||||
|
# % configure
|
||||||
|
./configure --bindir=%{_bindir} --datadir=%{_datadir} --libdir=%{_libdir} --includedir=%{_includedir}
|
||||||
|
make %{?_smp_mflags}
|
||||||
|
|
||||||
|
|
||||||
|
%install
|
||||||
|
rm -rf $RPM_BUILD_ROOT
|
||||||
|
make install DESTDIR=$RPM_BUILD_ROOT
|
||||||
|
|
||||||
|
|
||||||
|
%clean
|
||||||
|
rm -rf $RPM_BUILD_ROOT
|
||||||
|
|
||||||
|
|
||||||
|
%files
|
||||||
|
%defattr(-,root,root,-)
|
||||||
|
%doc AUTHORS COPYING ChangeLog README
|
||||||
|
%{_bindir}/cntraining
|
||||||
|
%{_bindir}/mftraining
|
||||||
|
%{_bindir}/tesseract
|
||||||
|
%{_bindir}/unicharset_extractor
|
||||||
|
%{_bindir}/wordlist2dawg
|
||||||
|
%{_datadir}/tessdata/configs
|
||||||
|
%{_datadir}/tessdata/confsets
|
||||||
|
%{_datadir}/tessdata/tessconfigs
|
||||||
|
|
||||||
|
%files devel
|
||||||
|
%{_includedir}/tesseract/
|
||||||
|
%{_libdir}/libtesseract*
|
||||||
|
|
||||||
|
%package eng
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
Summary: English language pack for tesseract
|
||||||
|
%description eng
|
||||||
|
The %{name}-%{version}.eng package contains the data files required to recognize English
|
||||||
|
|
||||||
|
%files eng
|
||||||
|
%{_datadir}/tessdata/eng.DangAmbigs
|
||||||
|
%{_datadir}/tessdata/eng.freq-dawg
|
||||||
|
%{_datadir}/tessdata/eng.inttemp
|
||||||
|
%{_datadir}/tessdata/eng.normproto
|
||||||
|
%{_datadir}/tessdata/eng.pffmtable
|
||||||
|
%{_datadir}/tessdata/eng.unicharset
|
||||||
|
%{_datadir}/tessdata/eng.user-words
|
||||||
|
%{_datadir}/tessdata/eng.word-dawg
|
||||||
|
|
||||||
|
%package fra
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
Summary: French language pack for tesseract
|
||||||
|
%description fra
|
||||||
|
The %{name}-%{version}.fra package contains the data files required to recognize French
|
||||||
|
|
||||||
|
%files fra
|
||||||
|
%{_datadir}/tessdata/fra.DangAmbigs
|
||||||
|
%{_datadir}/tessdata/fra.freq-dawg
|
||||||
|
%{_datadir}/tessdata/fra.inttemp
|
||||||
|
%{_datadir}/tessdata/fra.normproto
|
||||||
|
%{_datadir}/tessdata/fra.pffmtable
|
||||||
|
%{_datadir}/tessdata/fra.unicharset
|
||||||
|
%{_datadir}/tessdata/fra.user-words
|
||||||
|
%{_datadir}/tessdata/fra.word-dawg
|
||||||
|
|
||||||
|
%package ita
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
Summary: Italian language pack for tesseract
|
||||||
|
%description ita
|
||||||
|
The %{name}-%{version}.ita package contains the data files required to recognize Italian
|
||||||
|
|
||||||
|
%files ita
|
||||||
|
%{_datadir}/tessdata/ita.DangAmbigs
|
||||||
|
%{_datadir}/tessdata/ita.freq-dawg
|
||||||
|
%{_datadir}/tessdata/ita.inttemp
|
||||||
|
%{_datadir}/tessdata/ita.normproto
|
||||||
|
%{_datadir}/tessdata/ita.pffmtable
|
||||||
|
%{_datadir}/tessdata/ita.unicharset
|
||||||
|
%{_datadir}/tessdata/ita.user-words
|
||||||
|
%{_datadir}/tessdata/ita.word-dawg
|
||||||
|
|
||||||
|
%package deu
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
Summary: German language pack for tesseract
|
||||||
|
%description deu
|
||||||
|
The %{name}-%{version}.deu package contains the data files required to recognize German
|
||||||
|
|
||||||
|
%files deu
|
||||||
|
%{_datadir}/tessdata/deu.DangAmbigs
|
||||||
|
%{_datadir}/tessdata/deu.freq-dawg
|
||||||
|
%{_datadir}/tessdata/deu.inttemp
|
||||||
|
%{_datadir}/tessdata/deu.normproto
|
||||||
|
%{_datadir}/tessdata/deu.pffmtable
|
||||||
|
%{_datadir}/tessdata/deu.unicharset
|
||||||
|
%{_datadir}/tessdata/deu.user-words
|
||||||
|
%{_datadir}/tessdata/deu.word-dawg
|
||||||
|
|
||||||
|
%package spa
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
Summary: Spanish language pack for tesseract
|
||||||
|
%description spa
|
||||||
|
The %{name}-%{version}.spa package contains the data files required to recognize Spanish
|
||||||
|
|
||||||
|
%files spa
|
||||||
|
%{_datadir}/tessdata/spa.DangAmbigs
|
||||||
|
%{_datadir}/tessdata/spa.freq-dawg
|
||||||
|
%{_datadir}/tessdata/spa.inttemp
|
||||||
|
%{_datadir}/tessdata/spa.normproto
|
||||||
|
%{_datadir}/tessdata/spa.pffmtable
|
||||||
|
%{_datadir}/tessdata/spa.unicharset
|
||||||
|
%{_datadir}/tessdata/spa.user-words
|
||||||
|
%{_datadir}/tessdata/spa.word-dawg
|
||||||
|
|
||||||
|
%package nld
|
||||||
|
Group: Applications/Multimedia
|
||||||
|
Summary: Dutch language pack for tesseract
|
||||||
|
%description nld
|
||||||
|
The %{name}-%{version}.nld package contains the data files required to recognize Dutch
|
||||||
|
|
||||||
|
%files nld
|
||||||
|
%{_datadir}/tessdata/nld.DangAmbigs
|
||||||
|
%{_datadir}/tessdata/nld.freq-dawg
|
||||||
|
%{_datadir}/tessdata/nld.inttemp
|
||||||
|
%{_datadir}/tessdata/nld.normproto
|
||||||
|
%{_datadir}/tessdata/nld.pffmtable
|
||||||
|
%{_datadir}/tessdata/nld.unicharset
|
||||||
|
%{_datadir}/tessdata/nld.user-words
|
||||||
|
%{_datadir}/tessdata/nld.word-dawg
|
||||||
|
|
||||||
|
|
||||||
|
%changelog
|
@ -7,15 +7,19 @@ AM_CPPFLAGS = \
|
|||||||
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
|
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
|
||||||
-I$(top_srcdir)/textord
|
-I$(top_srcdir)/textord
|
||||||
|
|
||||||
include_HEADERS = \
|
EXTRA_DIST = training.cpp unicharset_extractor.dsp wordlist2dawg.dsp \
|
||||||
cnTraining.dsp mfTraining.dsp \
|
cnTraining.dsp mfTraining.dsp \
|
||||||
|
unicharset_extractor.vcproj wordlist2dawg.vcproj \
|
||||||
|
cnTraining.vcproj mfTraining.vcproj
|
||||||
|
|
||||||
|
include_HEADERS = \
|
||||||
mergenf.h name2char.h training.h
|
mergenf.h name2char.h training.h
|
||||||
|
|
||||||
lib_LIBRARIES = libtesseract_training.a
|
lib_LIBRARIES = libtesseract_training.a
|
||||||
libtesseract_training_a_SOURCES = \
|
libtesseract_training_a_SOURCES = \
|
||||||
name2char.cpp training.cpp
|
name2char.cpp
|
||||||
|
|
||||||
bin_PROGRAMS = cntraining mftraining
|
bin_PROGRAMS = cntraining mftraining unicharset_extractor wordlist2dawg
|
||||||
cntraining_SOURCES = cnTraining.cpp
|
cntraining_SOURCES = cnTraining.cpp
|
||||||
cntraining_LDADD = \
|
cntraining_LDADD = \
|
||||||
libtesseract_training.a \
|
libtesseract_training.a \
|
||||||
@ -38,3 +42,13 @@ mftraining_LDADD = \
|
|||||||
../ccstruct/libtesseract_ccstruct.a \
|
../ccstruct/libtesseract_ccstruct.a \
|
||||||
../viewer/libtesseract_viewer.a \
|
../viewer/libtesseract_viewer.a \
|
||||||
../ccutil/libtesseract_ccutil.a
|
../ccutil/libtesseract_ccutil.a
|
||||||
|
unicharset_extractor_SOURCES = unicharset_extractor.cpp
|
||||||
|
unicharset_extractor_LDADD = \
|
||||||
|
../ccutil/libtesseract_ccutil.a
|
||||||
|
wordlist2dawg_SOURCES = wordlist2dawg.cpp
|
||||||
|
wordlist2dawg_LDADD = \
|
||||||
|
../dict/libtesseract_dict.a \
|
||||||
|
../cutil/libtesseract_cutil.a \
|
||||||
|
../ccstruct/libtesseract_ccstruct.a \
|
||||||
|
../viewer/libtesseract_viewer.a \
|
||||||
|
../ccutil/libtesseract_ccutil.a
|
||||||
|
Loading…
Reference in New Issue
Block a user