mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
man pages included to install script, improved windows installer script (issue 425), output format for "tesseract -v" changed to "3.00 version", README cleanup.
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@601 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
parent
411e074b4d
commit
9b9efa8e4c
@ -1,13 +1,12 @@
|
||||
# TODO(luc) Add 'doc' to this list when ready
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
SUBDIRS = ccstruct ccutil classify cube cutil dict image neural_networks/runtime textord viewer wordrec ccmain training tessdata testing java api
|
||||
SUBDIRS = ccstruct ccutil classify cube cutil dict image neural_networks/runtime textord viewer wordrec ccmain training tessdata testing java api doc
|
||||
#if USING_GETTEXT
|
||||
#SUBDIRS += po
|
||||
#AM_CPPFLAGS = -DLOCALEDIR=\"$(localedir)\"
|
||||
#endif
|
||||
|
||||
EXTRA_DIST = config/config.rpath eurotext.tif phototest.tif ReleaseNotes \
|
||||
acinclude.m4 config configure.ac runautoconf tesseract.spec doc contrib
|
||||
acinclude.m4 config configure.ac runautoconf tesseract.spec contrib
|
||||
|
||||
#EXTRA_DIST = doc/html doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.pdf doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.ps.gz
|
||||
|
||||
@ -19,6 +18,6 @@ dist-hook:
|
||||
rm -rf `find $(distdir) -name .svn`
|
||||
rm -rf `find $(distdir) -name .deps`
|
||||
# Also remove extra files not needed in a distribution
|
||||
rm -rf `find $(distdir) -name configure.ac`
|
||||
# rm -rf `find $(distdir) -name configure.ac`
|
||||
rm -rf `find $(distdir) -name acinclude.m4`
|
||||
rm -rf `find $(distdir) -name aclocal.m4`
|
||||
# rm -rf `find $(distdir) -name aclocal.m4`
|
||||
|
110
README
110
README
@ -4,8 +4,9 @@ wiki ReadMe, which is located at:
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This package contains the Tesseract Open Source OCR Engine.
|
||||
Orignally developed at Hewlett Packard Laboratories Bristol and
|
||||
Originally developed at Hewlett Packard Laboratories Bristol and
|
||||
at Hewlett Packard Co, Greeley Colorado, all the code
|
||||
in this distribution is now licensed under the Apache License:
|
||||
|
||||
@ -20,52 +21,73 @@ in this distribution is now licensed under the Apache License:
|
||||
** limitations under the License.
|
||||
|
||||
|
||||
Other Dependencies and Licenses:
|
||||
================================
|
||||
The Aspirin/MIGRAINES system is no longer required.
|
||||
Dependencies and Licenses:
|
||||
==========================
|
||||
|
||||
Leptonica is required. (www.leptonica.com). Tesseract no longer compiles
|
||||
without Leptonica.
|
||||
Libtiff is no longer required as a direct dependency.
|
||||
Instead, Leptonica is required. (www.leptonica.com)
|
||||
See http://code.google.com/p/tesseract-ocr/wiki/FAQ for details.
|
||||
As of 3.01, Tesseract no longer compiles without Leptonica.
|
||||
|
||||
|
||||
Installing and Running Tesseract
|
||||
All Users Do NOT Ignore!
|
||||
The tarballs are split into pieces.
|
||||
|
||||
tesseract-2.04.tar.gz contains all the source code.
|
||||
tesseract-x.xx.tar.gz contains all the source code.
|
||||
|
||||
tesseract-2.00.<lang>.tar.gz contains the language data files for <lang>. You need at least one of these or tesseract will not work.
|
||||
tesseract-x.xx.<lang>.tar.gz contains the language data files for <lang>.
|
||||
You need at least one of these or Tesseract will not work.
|
||||
|
||||
Note that tesseract-2.04.tar.gz unpacks to the tesseract-2.04 directory. tesseract-2.00.<lang>.tar.gz unpacks to the tessdata directory which belongs inside your tesseract-2.04 directory. It is therefore best to download them into your tesseract-2.04 directory, so you can use unpack here or equivalent. You can unpack as many of the language packs as you care to, as they all contain different files. Note that if you are using make install you should unpack your language data to your source tree before you run make install. If you unpack them as root to the destination directory of make install, then the user ids and access permissions might be messed up.
|
||||
Note that tesseract-x.xx.tar.gz unpacks to the tesseract-ocr directory.
|
||||
tesseract-x.xx.<lang>.tar.gz unpacks to the tessdata directory which
|
||||
belongs inside your tesseract-ocr directory. It is therefore best to
|
||||
download them into your tesseract-x.xx directory, so you can use unpack
|
||||
here or equivalent. You can unpack as many of the language packs as you
|
||||
care to, as they all contain different files. Note that if you are using
|
||||
make install you should unpack your language data to your source tree
|
||||
before you run make install. If you unpack them as root to the
|
||||
destination directory of make install, then the user ids and access
|
||||
permissions might be messed up.
|
||||
|
||||
boxtiff-2.01.<lang>.tar.gz contains data that was used in training for those that want to do their own training. Most users should NOT download these files.
|
||||
boxtiff-2.xx.<lang>.tar.gz contains data that was used in training for
|
||||
those that want to do their own training. Most users should NOT download
|
||||
these files.
|
||||
|
||||
Instructions for using the training tools are documented separately at TrainingTesseract and for testing at TestingTesseract.
|
||||
Instructions for using the training tools are documented separately at
|
||||
Tesseract wiki http://code.google.com/p/tesseract-ocr/w/list
|
||||
|
||||
Without Additional Libraries, Image format support is limited!
|
||||
|
||||
Without additional libraries, Tesseract can only read uncompressed TIFF. (And some versions of BMP) Upto version 2.04, you can add libtiff-dev. See the FAQ question on compressed TIFF for installation instructions. Version 3.00 will support additional formats via Leptonica, but requires more libraries to be added.
|
||||
Windows:
|
||||
--------
|
||||
|
||||
There is no windows installer! (Still looking for volunteers to create one.) There are windows executables: tesseract-2.04.exe.tar.gz (It is not for the 'exe' language.) They are built with VC++ express 2008 and come with absolutely no warranty. If they work for you then great, otherwise get Visual C++ Express 2008 with service pack 1 and build from the source. You can also try tesseract-2.01.exe.tar.gz, which is built with VC++6, and may work better if your windows is old, but note that this is an older version of Tesseract.
|
||||
Please use installer (for 3.00 and above). Tesseract is library with
|
||||
command line interface. If you need GUI, please check AddOns wiki page
|
||||
http://code.google.com/p/tesseract-ocr/wiki/AddOns#GUI
|
||||
|
||||
If you are building from the sources, there are still (up to v2.04) .dsw and .dsp files for vc++6, but the recommended build platform is now VC++ Express 2008. There are also .sln and .vcproj files for VC++ Express 2008, but these files are not backward compatible with any previous version - not even VC++ Express 2005. Note that the executables produced with the newer compiler are smaller, faster, and, believe it or not, more accurate. (See TestingTesseract.)
|
||||
If you are building from the sources, the recommended build platform is
|
||||
VC++ Express 2008 (optionally 2010).
|
||||
|
||||
New with 2.04: the executables are built with static linking, so they stand more chance of working out of the box on more windows systems.
|
||||
The executables are built with static linking, so they stand more chance
|
||||
of working out of the box on more windows systems.
|
||||
|
||||
The executable must reside in the same directory as the tessdata directory. (The Visual Studio projects build the release executable directly to the correct place!)
|
||||
The executable must reside in the same directory as the tessdata
|
||||
directory or you need to set up environment variable TESSDATA_PREFIX.
|
||||
Installer will set it up for you.
|
||||
|
||||
The command line is:
|
||||
|
||||
tesseract <image.tif> <output> [-l <langid>]
|
||||
tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...]
|
||||
|
||||
For interfacing to other applications, there is a DLL included with the executables, but you may be better off building it yourself. The DLL is NOT built for static C-Runtime, so you will probably need VC++ Express 2008 to run it.
|
||||
If you need interface to other applications, please check wrapper section
|
||||
on AddOns wiki page:
|
||||
http://code.google.com/p/tesseract-ocr/wiki/AddOns#Tesseract_3.0x
|
||||
|
||||
The dll has been updated to allow input of non-binary images. (Thanks to Glen of Jetsoft.)
|
||||
|
||||
Non-Windows (or Cygwin):
|
||||
------------------------
|
||||
|
||||
You have to tell Tesseract through a standard unix mechanism where to find its data directory. You must either:
|
||||
You have to tell Tesseract through a standard unix mechanism where to
|
||||
find its data directory. You must either:
|
||||
|
||||
./configure
|
||||
make
|
||||
@ -77,13 +99,15 @@ export TESSDATA_PREFIX="directory in which your tessdata resides/"
|
||||
|
||||
In either case the command line is:
|
||||
|
||||
tesseract <image.tif> <output> [-l <langid>]
|
||||
tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...]
|
||||
|
||||
New there is a tesseract.spec for making rpms. (Thanks to Andrew Ziem for the help.) It might work with your OS if you know how to do that.
|
||||
New there is a tesseract.spec for making rpms. (Thanks to Andrew Ziem for
|
||||
the help.) It might work with your OS if you know how to do that.
|
||||
|
||||
If you are linking to the libraries, as Ocropus does, please link to
|
||||
libtesseract_api.
|
||||
|
||||
If you are linking to the libraries, as Ocropus does, there is now a single master library called libtesseract_full.a.
|
||||
|
||||
Libtiff support should now be properly working via configure, but note that you need libtiff-dev, as that contains the header files required to compile the code that uses it.
|
||||
|
||||
History:
|
||||
========
|
||||
@ -94,38 +118,24 @@ A lot of the code was written in C, and then some more was written in C++.
|
||||
Since then all the code has been converted to at least compile with a C++
|
||||
compiler. Currently it builds under Linux with gcc4.4.3 and under Windows
|
||||
with VC++2008. The C++ code makes heavy use of a list system using macros.
|
||||
This predates stl, was portable before stl, and is more efficent than stl
|
||||
This predates stl, was portable before stl, and is more efficient than stl
|
||||
lists, but has the big negative that if you do get a segmentation violation,
|
||||
it is hard to debug.
|
||||
|
||||
The most recent change is that Tesseract can now recognize 6 languages, is fully UTF8 capable, and is fully trainable. See TrainingTesseract for more information on training.
|
||||
The most recent change is that Tesseract can now recognize 6 languages,
|
||||
is fully UTF8 capable, and is fully trainable. See TrainingTesseract for
|
||||
more information on training.
|
||||
|
||||
Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy. See http://www.isri.unlv.edu/downloads/AT-1995.pdf. With Tesseract 2.00, scripts are now included to allow anyone to reproduce some of these tests. See TestingTesseract for more details.
|
||||
|
||||
|
||||
Directory Structure (ordered by dependency):
|
||||
============================================
|
||||
ccmain Top-level code. The main program resides in tesseractmain.cpp.
|
||||
display An "editor" to view and operate on the internal structures.
|
||||
(Requires a working viewer - batteries not included.)
|
||||
wordrec The word-level recognizer.
|
||||
textord The module that organizes(orders) text into lines and words.
|
||||
classify The low-level character classifiers.
|
||||
ccstruct Classes to hold information about a page as it is being processed.
|
||||
viewer The client side of a client server viewing system.
|
||||
Unfortunately, at this time, the server side is not available.
|
||||
image Image class and processing functions.
|
||||
dict Language model code.
|
||||
cutil Code for file I/O, lists, heaps etc, from the old C code.
|
||||
ccutil Somewhat newer code for lists, memory allocation etc from the
|
||||
old C++ code.
|
||||
Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy.
|
||||
Results were available on http://www.isri.unlv.edu/downloads/AT-1995.pdf.
|
||||
With Tesseract 2.00, scripts were included to allow anyone to reproduce
|
||||
some of these tests. See TestingTesseract for more details.
|
||||
|
||||
|
||||
About the Engine
|
||||
================
|
||||
This code is a raw OCR engine. It has NO PAGE LAYOUT ANALYSIS, NO OUTPUT
|
||||
FORMATTING, and NO UI. It can only process an image of a single column
|
||||
and create text from it. It can detect fixed pitch vs proportional text.
|
||||
This code is a raw OCR engine. It has limited PAGE LAYOUT ANALYSIS, simple
|
||||
OUTPUT FORMATTING (txt, hocr/html), and NO UI.
|
||||
Having said that, in 1995, this engine was in the top 3 in terms of character
|
||||
accuracy, and it compiles and runs on both Linux and Windows.
|
||||
As of 2.0, Tesseract is fully unicode (UTF-8) enabled, and can recognize 6
|
||||
|
@ -55,7 +55,7 @@ int main(int argc, char **argv) {
|
||||
textdomain (PACKAGE);
|
||||
#endif
|
||||
if ((argc == 2 && strcmp(argv[1], "-v") == 0) || (argc == 2 && strcmp(argv[1], "--version") == 0)) {
|
||||
fprintf(stderr, "tesseract-%s\n", tesseract::TessBaseAPI::Version());
|
||||
fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
|
||||
exit(0);
|
||||
}
|
||||
// Make the order of args a bit more forgiving than it used to be.
|
||||
|
@ -392,6 +392,7 @@ AC_CONFIG_FILES(java/com/google/Makefile)
|
||||
AC_CONFIG_FILES(java/com/google/scrollview/Makefile)
|
||||
AC_CONFIG_FILES(java/com/google/scrollview/events/Makefile)
|
||||
AC_CONFIG_FILES(java/com/google/scrollview/ui/Makefile)
|
||||
AC_CONFIG_FILES(doc/Makefile)
|
||||
fi
|
||||
# AC_CONFIG_FILES(doc/Doxyfile)
|
||||
# AC_CONFIG_FILES(doc/header.html)
|
||||
|
5
doc/Makefile.am
Normal file
5
doc/Makefile.am
Normal file
@ -0,0 +1,5 @@
|
||||
man_MANS = cntraining.1 combine_tessdata.1 mftraining.1 tesseract.1 \
|
||||
unicharset_extractor.1 wordlist2dawg.1 unicharambigs.5 \
|
||||
unicharset.5
|
||||
EXTRA_DIST = $(man_MANS) MOCRadaptingtesseract2.pdf \
|
||||
PageLayoutAnalysisICDAR2.pdf tesseracticdar2007.pdf
|
@ -20,15 +20,18 @@
|
||||
; - replace hardcoded program name to variables (NAME and LONGNAME)
|
||||
; - place shortcuts in program files for all users
|
||||
|
||||
!define VERSION 3.00
|
||||
!define VERSION 3.01
|
||||
!define PRODUCT_NAME "Tesseract-OCR"
|
||||
!define PRODUCT_VERSION "${VERSION}"
|
||||
!define PRODUCT_PUBLISHER ""
|
||||
!define PRODUCT_PUBLISHER "Tesseract-OCR community"
|
||||
!define PRODUCT_WEB_SITE "http://code.google.com/p/tesseract-ocr"
|
||||
;!define PRODUCT_DIR_REGKEY "Software/Microsoft"
|
||||
;!define PRODUCT_UNINST_KEY "Software/Microsoft"
|
||||
;!define PRODUCT_UNINST_ROOT_KEY "Software/Microsoft"
|
||||
SetCompressor lzma
|
||||
SetCompressor /FINAL /SOLID lzma
|
||||
;SetCompressor lzma
|
||||
SetCompressorDictSize 32
|
||||
|
||||
Name "Tesseract-OCR ${VERSION}"
|
||||
Caption "Tesseract-OCR ${VERSION}"
|
||||
;Icon "icon_1.ico"
|
||||
@ -59,9 +62,9 @@
|
||||
Pop $0 # return value = exit code, "OK" if OK
|
||||
StrCmp $0 "OK" dlok
|
||||
MessageBox MB_OK|MB_ICONEXCLAMATION "http download error. Download Status of $1: $0. Click OK to continue." /SD IDOK
|
||||
Goto error
|
||||
Goto error
|
||||
dlok:
|
||||
ExecWait '"$INSTDIR\gzip.exe" -d "$2"'
|
||||
ExecWait '"$INSTDIR\gzip.exe" -d "$2"'
|
||||
error:
|
||||
!macroend
|
||||
|
||||
@ -86,7 +89,7 @@
|
||||
!define MUI_LICENSEPAGE_CHECKBOX
|
||||
;!define MUI_LICENSEPAGE_TEXT "$(License)"
|
||||
;!insertmacro MUI_PAGE_LICENSE "${MUI_LICENSEPAGE_TEXT}"
|
||||
!insertmacro MUI_PAGE_LICENSE "doc/COPYING"
|
||||
!insertmacro MUI_PAGE_LICENSE "..\COPYING"
|
||||
!ifdef VERSION
|
||||
Page custom PageReinstall PageLeaveReinstall
|
||||
!endif
|
||||
@ -152,45 +155,47 @@ Section "Tesseract-OCR" SecDummy
|
||||
SectionIn RO
|
||||
SetOutPath "$INSTDIR"
|
||||
;files inclided in distribution
|
||||
File leptonlib.dll
|
||||
File tesseract.exe
|
||||
;File leptonlib.dll
|
||||
File bin\tesseract.exe
|
||||
File bin\cntraining.exe
|
||||
File bin\combine_tessdata.exe
|
||||
File bin\mftraining.exe
|
||||
File bin\unicharset_extractor.exe
|
||||
File bin\wordlist2dawg.exe
|
||||
File gzip.exe # for exctracting language data
|
||||
CreateDirectory "$INSTDIR\tessdata"
|
||||
CreateDirectory "$INSTDIR\tessdata\configs"
|
||||
SetOutPath "$INSTDIR\tessdata\configs"
|
||||
File tessdata\configs\ambigs.train
|
||||
File tessdata\configs\api_config
|
||||
File tessdata\configs\box.train
|
||||
File tessdata\configs\box.train.stderr
|
||||
File tessdata\configs\digits
|
||||
File tessdata\configs\inter
|
||||
File tessdata\configs\kannada
|
||||
File tessdata\configs\logfile
|
||||
File tessdata\configs\makebox
|
||||
File tessdata\configs\unlv
|
||||
File ..\tessdata\configs\ambigs.train
|
||||
File ..\tessdata\configs\api_config
|
||||
File ..\tessdata\configs\box.train
|
||||
File ..\tessdata\configs\box.train.stderr
|
||||
File ..\tessdata\configs\digits
|
||||
File ..\tessdata\configs\hocr
|
||||
File ..\tessdata\configs\inter
|
||||
File ..\tessdata\configs\kannada
|
||||
File ..\tessdata\configs\linebox
|
||||
File ..\tessdata\configs\logfile
|
||||
File ..\tessdata\configs\makebox
|
||||
File ..\tessdata\configs\rebox
|
||||
File ..\tessdata\configs\strokewidth
|
||||
File ..\tessdata\configs\unlv
|
||||
CreateDirectory "$INSTDIR\tessdata\tessconfigs"
|
||||
SetOutPath "$INSTDIR\tessdata\tessconfigs"
|
||||
File tessdata\tessconfigs\batch
|
||||
File tessdata\tessconfigs\batch.nochop
|
||||
File tessdata\tessconfigs\matdemo
|
||||
File tessdata\tessconfigs\msdemo
|
||||
File tessdata\tessconfigs\nobatch
|
||||
File tessdata\tessconfigs\segdemo
|
||||
CreateDirectory "$INSTDIR\training"
|
||||
SetOutPath "$INSTDIR\training"
|
||||
File training\cntraining.exe
|
||||
File training\combine_tessdata.exe
|
||||
File training\mftraining.exe
|
||||
File training\unicharset_extractor.exe
|
||||
File training\wordlist2dawg.exe
|
||||
File ..\tessdata\tessconfigs\batch
|
||||
File ..\tessdata\tessconfigs\batch.nochop
|
||||
File ..\tessdata\tessconfigs\matdemo
|
||||
File ..\tessdata\tessconfigs\msdemo
|
||||
File ..\tessdata\tessconfigs\nobatch
|
||||
File ..\tessdata\tessconfigs\segdemo
|
||||
CreateDirectory "$INSTDIR\doc"
|
||||
SetOutPath "$INSTDIR\doc"
|
||||
File doc\AUTHORS
|
||||
File doc\COPYING
|
||||
File doc\eurotext.tif
|
||||
File doc\phototest.tif
|
||||
File doc\README
|
||||
File doc\ReleaseNotes
|
||||
File ..\AUTHORS
|
||||
File ..\COPYING
|
||||
File ..\eurotext.tif
|
||||
File ..\phototest.tif
|
||||
File ..\README
|
||||
File ..\ReleaseNotes
|
||||
;Store installation folder
|
||||
WriteRegStr HKCU "Software\Tesseract-OCR" "InstallDir" $INSTDIR
|
||||
WriteRegStr HKCU "Software\Tesseract-OCR" "CurrentVersion" "${VERSION}"
|
||||
@ -199,17 +204,16 @@ Section "Tesseract-OCR" SecDummy
|
||||
; include for some of the windows messages defines
|
||||
!include "winmessages.nsh"
|
||||
; HKLM (all users) vs HKCU (current user) defines
|
||||
!define env_hklm 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"'
|
||||
!define env_hkcu 'HKCU "Environment"'
|
||||
|
||||
; set variable
|
||||
; append bin path to user PATH environment variable
|
||||
ReadRegStr $0 HKCU "Environment" "PATH"
|
||||
WriteRegExpandStr HKCU "Environment" "PATH" "$INSTDIR;$INSTDIR\training;$0"
|
||||
#${EnvVarUpdate} $0 "PATH" "A" "HKLM" "$0;$INSTDIR" # this command destroys long variables like path...
|
||||
${EnvVarUpdate} $0 "TESSDATA_PREFIX" "A" "HKCU" "$INSTDIR\"
|
||||
; make sure windows knows about the change
|
||||
SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000
|
||||
; set variable
|
||||
; append bin path to user PATH environment variable
|
||||
ReadRegStr $0 HKCU "Environment" "PATH"
|
||||
WriteRegExpandStr HKCU "Environment" "PATH" "$INSTDIR;$0"
|
||||
#${EnvVarUpdate} $0 "PATH" "A" "HKLM" "$0;$INSTDIR" # this command destroys long variables like path...
|
||||
${EnvVarUpdate} $0 "TESSDATA_PREFIX" "A" "HKCU" "$INSTDIR\"
|
||||
; make sure windows knows about the change
|
||||
SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000
|
||||
|
||||
;Create uninstaller
|
||||
WriteUninstaller "$INSTDIR\Uninstall.exe"
|
||||
@ -232,7 +236,8 @@ SectionEnd
|
||||
|
||||
Section "Shortcuts creation" SecCS
|
||||
CreateDirectory "$SMPROGRAMS\Tesseract-OCR"
|
||||
CreateShortCut "$SMPROGRAMS\Tesseract-OCR\Tesseract-OCR.lnk" "$INSTDIR\tesseract.exe" "" "$INSTDIR\tesseract.exe" 0
|
||||
;TODO create a simple GUI for 3.01 release
|
||||
;CreateShortCut "$SMPROGRAMS\Tesseract-OCR\Tesseract-OCR.lnk" "$INSTDIR\tesseract.exe" "" "$INSTDIR\tesseract.exe" 0
|
||||
CreateShortCut "$SMPROGRAMS\Tesseract-OCR\Uninstall.lnk" "$INSTDIR\uninstall.exe" "" "$INSTDIR\uninstall.exe" 0
|
||||
;CreateShortCut "$DESKTOP\Tesseract-OCR.lnk" "$INSTDIR\tesseract.exe" "" "$INSTDIR\tesseract.exe" 0
|
||||
;CreateShortCut "$QUICKLAUNCH\.lnk" "$INSTDIR\tesseract.exe" "" "$INSTDIR\tesseract.exe" 0
|
||||
@ -240,143 +245,143 @@ SectionEnd
|
||||
|
||||
; Download language files
|
||||
SectionGroup "Language data" SecGrp_LD
|
||||
Section "English language data" SecLang_eng
|
||||
SectionIn RO
|
||||
SetOutPath "$INSTDIR\tessdata"
|
||||
File tessdata\eng.traineddata
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Bulgarian language data" SecLang_bul
|
||||
!insertmacro Download_Lang_Data bul.traineddata.gz
|
||||
SectionEnd
|
||||
Section "English language data" SecLang_eng
|
||||
SectionIn RO
|
||||
SetOutPath "$INSTDIR\tessdata"
|
||||
File ..\tessdata\eng.*
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Bulgarian language data" SecLang_bul
|
||||
!insertmacro Download_Lang_Data bul.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Catalan language data" SecLang_cat
|
||||
!insertmacro Download_Lang_Data cat.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Catalan language data" SecLang_cat
|
||||
!insertmacro Download_Lang_Data cat.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Czech language data" SecLang_ces
|
||||
!insertmacro Download_Lang_Data ces.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Czech language data" SecLang_ces
|
||||
!insertmacro Download_Lang_Data ces.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Chinese (Traditional) language data" SecLang_chi_tra
|
||||
!insertmacro Download_Lang_Data chi_tra.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Chinese (Traditional) language data" SecLang_chi_tra
|
||||
!insertmacro Download_Lang_Data chi_tra.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Chinese (Simplified) language data" SecLang_chi_sim
|
||||
!insertmacro Download_Lang_Data chi_sim.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Chinese (Simplified) language data" SecLang_chi_sim
|
||||
!insertmacro Download_Lang_Data chi_sim.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Danish language data" SecLang_dan
|
||||
!insertmacro Download_Lang_Data dan.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Danish language data" SecLang_dan
|
||||
!insertmacro Download_Lang_Data dan.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Danish (Fraktur) language data" SecLang_dan_frak
|
||||
!insertmacro Download_Lang_Data dan-frak.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Danish (Fraktur) language data" SecLang_dan_frak
|
||||
!insertmacro Download_Lang_Data dan-frak.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Dutch language data" SecLang_nld
|
||||
!insertmacro Download_Lang_Data nld.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Dutch language data" SecLang_nld
|
||||
!insertmacro Download_Lang_Data nld.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install German language data" SecLang_deu
|
||||
!insertmacro Download_Lang_Data deu.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install German language data" SecLang_deu
|
||||
!insertmacro Download_Lang_Data deu.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Greek language data" SecLang_ell
|
||||
!insertmacro Download_Lang_Data ell.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Greek language data" SecLang_ell
|
||||
!insertmacro Download_Lang_Data ell.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Finnish language data" SecLang_fin
|
||||
!insertmacro Download_Lang_Data fin.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Finnish language data" SecLang_fin
|
||||
!insertmacro Download_Lang_Data fin.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install French language data" SecLang_fra
|
||||
!insertmacro Download_Lang_Data fra.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install French language data" SecLang_fra
|
||||
!insertmacro Download_Lang_Data fra.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Hungarian language data" SecLang_hun
|
||||
!insertmacro Download_Lang_Data hun.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Hungarian language data" SecLang_hun
|
||||
!insertmacro Download_Lang_Data hun.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Indonesian language data" SecLang_ind
|
||||
!insertmacro Download_Lang_Data ind.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Indonesian language data" SecLang_ind
|
||||
!insertmacro Download_Lang_Data ind.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Italian language data" SecLang_ita
|
||||
!insertmacro Download_Lang_Data ita.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Italian language data" SecLang_ita
|
||||
!insertmacro Download_Lang_Data ita.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Japanese language data" SecLang_jpn
|
||||
!insertmacro Download_Lang_Data jpn.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Japanese language data" SecLang_jpn
|
||||
!insertmacro Download_Lang_Data jpn.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Korean language data" SecLang_kor
|
||||
!insertmacro Download_Lang_Data kor.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Korean language data" SecLang_kor
|
||||
!insertmacro Download_Lang_Data kor.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Latvian language data" SecLang_lav
|
||||
!insertmacro Download_Lang_Data lav.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Latvian language data" SecLang_lav
|
||||
!insertmacro Download_Lang_Data lav.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Lithuanian language data" SecLang_lit
|
||||
!insertmacro Download_Lang_Data lit.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Lithuanian language data" SecLang_lit
|
||||
!insertmacro Download_Lang_Data lit.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Norwegian language data" SecLang_nor
|
||||
!insertmacro Download_Lang_Data nor.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Norwegian language data" SecLang_nor
|
||||
!insertmacro Download_Lang_Data nor.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Polish language data" SecLang_pol
|
||||
!insertmacro Download_Lang_Data pol.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Polish language data" SecLang_pol
|
||||
!insertmacro Download_Lang_Data pol.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Portuguese language data" SecLang_por
|
||||
!insertmacro Download_Lang_Data por.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Portuguese language data" SecLang_por
|
||||
!insertmacro Download_Lang_Data por.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Romanian language data" SecLang_ron
|
||||
!insertmacro Download_Lang_Data ron.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Romanian language data" SecLang_ron
|
||||
!insertmacro Download_Lang_Data ron.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Russian language data" SecLang_rus
|
||||
!insertmacro Download_Lang_Data rus.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Russian language data" SecLang_rus
|
||||
!insertmacro Download_Lang_Data rus.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Slovak language data" SecLang_slk
|
||||
!insertmacro Download_Lang_Data slk.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Slovak language data" SecLang_slk
|
||||
!insertmacro Download_Lang_Data slk.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Slovenian language data" SecLang_slv
|
||||
!insertmacro Download_Lang_Data slv.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Slovenian language data" SecLang_slv
|
||||
!insertmacro Download_Lang_Data slv.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Spanish language data" SecLang_spa
|
||||
!insertmacro Download_Lang_Data spa.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Spanish language data" SecLang_spa
|
||||
!insertmacro Download_Lang_Data spa.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Serbian language data" SecLang_srp
|
||||
!insertmacro Download_Lang_Data srp.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Serbian language data" SecLang_srp
|
||||
!insertmacro Download_Lang_Data srp.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Swedish language data" SecLang_swe
|
||||
!insertmacro Download_Lang_Data swe.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Swedish language data" SecLang_swe
|
||||
!insertmacro Download_Lang_Data swe.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Tagalog language data" SecLang_tgl
|
||||
!insertmacro Download_Lang_Data tgl.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Tagalog language data" SecLang_tgl
|
||||
!insertmacro Download_Lang_Data tgl.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Turkish language data" SecLang_tur
|
||||
!insertmacro Download_Lang_Data tur.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Turkish language data" SecLang_tur
|
||||
!insertmacro Download_Lang_Data tur.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Ukrainian language data" SecLang_ukr
|
||||
!insertmacro Download_Lang_Data ukr.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Ukrainian language data" SecLang_ukr
|
||||
!insertmacro Download_Lang_Data ukr.traineddata.gz
|
||||
SectionEnd
|
||||
|
||||
Section /o "Download and install Vietnamese language data" SecLang_vie
|
||||
!insertmacro Download_Lang_Data vie.traineddata.gz
|
||||
SectionEnd
|
||||
Section /o "Download and install Vietnamese language data" SecLang_vie
|
||||
!insertmacro Download_Lang_Data vie.traineddata.gz
|
||||
SectionEnd
|
||||
SectionGroupEnd
|
||||
;--------------------------------
|
||||
;Descriptions
|
||||
@ -420,11 +425,11 @@ Section "Uninstall"
|
||||
!define MUI_FINISHPAGE_SHOWREADME_TEXT "Create desktop shortcut"
|
||||
!define MUI_FINISHPAGE_SHOWREADME_FUNCTION CreateDeskShortcut
|
||||
;DeleteRegKey HKCU "Software\Microsoft\Windows\CurrentVersion\Run\Tesseract-OCR"
|
||||
DeleteRegKey /ifempty HKCU "Software\Tesseract-OCR"
|
||||
;DeleteRegKey /ifempty HKCU "Software\Tesseract-OCR"
|
||||
DeleteRegKey HKCU "Software\Tesseract-OCR"
|
||||
; delete variable
|
||||
${un.EnvVarUpdate} $0 "PATH" "R" "HKCU" $INSTDIR
|
||||
${un.EnvVarUpdate} $0 "PATH" "R" "HKCU" "$INSTDIR\training"
|
||||
DeleteRegValue ${env_hklm} "TESSDATA_PREFIX"
|
||||
DeleteRegValue ${env_hkcu} "TESSDATA_PREFIX"
|
||||
; make sure windows knows about the change
|
||||
SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000
|
||||
|
||||
@ -442,12 +447,17 @@ Section "Uninstall"
|
||||
RMDir "$INSTDIR\tessdata"
|
||||
;Delete "$DESKTOP\Tesseract-OCR.lnk"
|
||||
;Delete "$QUICKLAUNCH\Tesseract-OCR.lnk"
|
||||
RMDir "$INSTDIR"
|
||||
RMDir "$SMPROGRAMS\Tesseract-OCR"
|
||||
|
||||
;MessageBox MB_YESNO "Delete all configuration data? This will save a lot of space, but at the expense of reconfiguring..." IDNO skip_cfg
|
||||
;RMDir /r $APPDATA\Tesseract-OCR
|
||||
;skip_cfg:
|
||||
|
||||
Delete "$INSTDIR\Uninstall.exe"
|
||||
RMDir "$INSTDIR"
|
||||
; remove the Add/Remove information
|
||||
DeleteRegKey HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}"
|
||||
|
||||
|
||||
SectionEnd
|
||||
|
||||
Function PageReinstall
|
||||
@ -464,13 +474,13 @@ Function .onInit
|
||||
StrCmp $R0 "" SkipUnInstall
|
||||
|
||||
MessageBox MB_YESNO|MB_ICONEXCLAMATION "Tesseract-ocr version $R0 is installed! Do you want to uninstall it first?$\nUninstall will delete all files in '$INSTDIR'!" \
|
||||
/SD IDYES IDNO SkipUnInstall IDYES UnInstall
|
||||
/SD IDYES IDNO SkipUnInstall IDYES UnInstall
|
||||
UnInstall:
|
||||
readRegStr $R1 HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" "UninstallString"
|
||||
ClearErrors
|
||||
ExecWait '$R1 _?=$INSTDIR'
|
||||
readRegStr $R1 HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" "UninstallString"
|
||||
ClearErrors
|
||||
ExecWait '$R1 _?=$INSTDIR'
|
||||
SkipUnInstall:
|
||||
|
||||
|
||||
MessageBox MB_YESNO|MB_ICONQUESTION "Do you want to install ${PRODUCT_NAME} ${VERSION}?" \
|
||||
/SD IDYES IDNO no IDYES yes
|
||||
no:
|
||||
@ -504,7 +514,7 @@ Function .onInit
|
||||
StrCmp $0 "1057" Indonesian
|
||||
StrCmp $0 "1040" Italian
|
||||
StrCmp $0 "1041" Japanese
|
||||
StrCmp $0 "1042" Korean
|
||||
StrCmp $0 "1042" Korean
|
||||
StrCmp $0 "1062" Latvian
|
||||
StrCmp $0 "1063" Lithuanian
|
||||
StrCmp $0 "1044" Norwegian
|
||||
@ -556,7 +566,7 @@ Function .onInit
|
||||
Japanese: !insertmacro SelectSection ${SecLang_jpn}
|
||||
Goto lang_end
|
||||
Korean: !insertmacro SelectSection ${SecLang_kor}
|
||||
Goto lang_end
|
||||
Goto lang_end
|
||||
Latvian: !insertmacro SelectSection ${SecLang_lav}
|
||||
Goto lang_end
|
||||
Lithuanian: !insertmacro SelectSection ${SecLang_lit}
|
||||
|
Loading…
Reference in New Issue
Block a user