diff --git a/INSTALL.SVN b/INSTALL.SVN index 26f93f15..bbfb6a9a 100644 --- a/INSTALL.SVN +++ b/INSTALL.SVN @@ -6,8 +6,16 @@ before new build. So, the steps for making Tesseract are: - * ./autogen.sh - * ./configure - * make - * sudo make install - * sudo make install-langs + $ ./autogen.sh + $ ./configure + $ make + $ sudo make install + $ sudo make install-langs + +'sudo make install-langs' or 'sudo make install LANGS=' will install all +available language data files in tessdata directory. + +If you want to install just few of them than run: + $ make install LANGS="eng ara deu" +It will install only English, Arabic and German language datafiles (if +they are present in tessdata directory) diff --git a/Makefile.am b/Makefile.am index 76c3f183..e2680661 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ -ACLOCAL_AMFLAGS = -I m4 ++ACLOCAL_AMFLAGS = -I m4 SUBDIRS = ccutil viewer cutil image ccstruct dict classify wordrec neural_networks/runtime textord cube ccmain api . java tessdata testing doc training #if USING_GETTEXT @@ -7,7 +7,7 @@ SUBDIRS = ccutil viewer cutil image ccstruct dict classify wordrec neural_networ #endif EXTRA_DIST = eurotext.tif phototest.tif ReleaseNotes \ - aclocal.m4 config configure.ac autogen.sh tesseract.spec contrib + aclocal.m4 config configure.ac autogen.sh tesseract.spec contrib #EXTRA_DIST = doc/html doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.pdf doc/@PACKAGE_NAME@_@PACKAGE_VERSION@.ps.gz @@ -22,6 +22,24 @@ dist-hook: rm -rf `find $(distdir) -name .deps` rm -rf `find $(distdir) -name Makefile.in` +# 'make install' will install only libraries and programs (no language +# data files) +# 'make install LANGS=' will install libraries, programs and all +# language datafiles in tessdata/ +# 'make install LANGS="eng ara deu"' will install only English, Arabic +# and German language datafiles if they are present in tessdata/ +install-data-hook: + @if test $${LANGS+defined}; then \ + if test "$${LANGS}" == ""; then \ + echo ____All language files will be installed; \ + else \ + echo ___Folowing language files will be installed: "$$LANGS"; \ + fi; \ + cd "$(top_builddir)/tessdata" && $(MAKE) install-langs LANG="${LANGS}"; \ + else \ + echo No language file is installed.; \ + fi; + .PHONY: install-langs install-langs: @cd "$(top_builddir)/tessdata" && $(MAKE) $@ diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am index e93d8684..bd23c26a 100644 --- a/ccmain/Makefile.am +++ b/ccmain/Makefile.am @@ -12,16 +12,14 @@ AM_CPPFLAGS += -DTESS_EXPORTS \ -fvisibility=hidden -fvisibility-inlines-hidden endif -include_HEADERS = thresholder.h +include_HEADERS = \ + thresholder.h ltrresultiterator.h pageiterator.h resultiterator.h noinst_HEADERS = \ - control.h cube_reco_context.h cubeclassifier.h \ - docqual.h equationdetect.h fixspace.h \ - imgscale.h ltrresultiterator.h mutableiterator.h osdetect.h output.h \ - pageiterator.h paragraphs.h paragraphs_internal.h paramsd.h pgedit.h \ - reject.h resultiterator.h scaleimg.h \ - tessbox.h tessedit.h tesseractclass.h \ - tesseract_cube_combiner.h \ - tessvars.h tfacep.h tfacepp.h werdit.h + control.h cube_reco_context.h cubeclassifier.h docqual.h \ + equationdetect.h fixspace.h imgscale.h mutableiterator.h osdetect.h \ + output.h paragraphs.h paragraphs_internal.h paramsd.h pgedit.h \ + reject.h scaleimg.h tessbox.h tessedit.h tesseractclass.h \ + tesseract_cube_combiner.h tessvars.h tfacep.h tfacepp.h werdit.h if !USING_MULTIPLELIBS noinst_LTLIBRARIES = libtesseract_main.la diff --git a/ccutil/Makefile.am b/ccutil/Makefile.am index 2e6e59c7..403a41d4 100644 --- a/ccutil/Makefile.am +++ b/ccutil/Makefile.am @@ -8,15 +8,17 @@ endif EXTRA_DIST = mfcpch.cpp -include_HEADERS = errcode.h fileerr.h host.h memry.h \ - platform.h serialis.h strngs.h tesscallback.h unichar.h +include_HEADERS = \ + errcode.h fileerr.h genericvector.h helpers.h host.h memry.h \ + ndminx.h params.h platform.h serialis.h strngs.h tesscallback.h \ + unichar.h unicharmap.h unicharset.h + noinst_HEADERS = \ - ambigs.h basedir.h bits16.h bitvector.h ccutil.h clst.h \ - elst2.h elst.h genericvector.h globaloc.h hashfn.h helpers.h \ - hosthplb.h indexmapbidi.h lsterr.h mfcpch.h ndminx.h notdll.h \ - nwmain.h ocrclass.h qrsequence.h secname.h sorthelper.h stderr.h \ - tessdatamanager.h tprintf.h unicharmap.h unicharset.h \ - unicity_table.h unicodes.h params.h + ambigs.h basedir.h bits16.h bitvector.h ccutil.h clst.h elst2.h \ + elst.h globaloc.h hashfn.h hosthplb.h indexmapbidi.h lsterr.h \ + mfcpch.h notdll.h nwmain.h ocrclass.h qrsequence.h secname.h \ + sorthelper.h stderr.h tessdatamanager.h tprintf.h unicity_table.h \ + unicodes.h if !USING_MULTIPLELIBS noinst_LTLIBRARIES = libtesseract_ccutil.la diff --git a/ccutil/ccutil.h b/ccutil/ccutil.h index ccb24b10..03b084f4 100644 --- a/ccutil/ccutil.h +++ b/ccutil/ccutil.h @@ -83,7 +83,8 @@ class CCUtil { // params_ should be initialized before parameters are added to it. STRING_VAR_H(m_data_sub_dir, "tessdata/", "Directory for data files"); #ifdef _WIN32 - STRING_VAR_H(tessedit_module_name, "tessdll.dll", + #define makestring(x) #x + STRING_VAR_H(tessedit_module_name, makestring(WINDLLNAME), "Module colocated with tessdata dir"); #endif INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities"); diff --git a/tessdata/Makefile.am b/tessdata/Makefile.am index 5d62a4d5..3a6d6d37 100644 --- a/tessdata/Makefile.am +++ b/tessdata/Makefile.am @@ -43,17 +43,25 @@ langdata = bul.traineddata mlt.traineddata chr.traineddata \ .PHONY: install-langs install-langs: - for l in ./*.traineddata ; do \ - filename=`basename $$l`;\ - lang=$${filename%.*} ;\ - if test "$$lang" == "*" ; then \ - echo "No lang present." ; \ - break ; \ - fi ; \ - echo "installing data for $$lang" ; \ - $(INSTALL) -m 644 *$$lang* $(datadir) ; \ - done; - + @if [ ! -d $(datadir) ]; then mkdir -p $(datadir); fi; + @if test "${LANGS}" != ""; then \ + for lang_code in ${LANGS}; do \ + echo "installing data for $$lang_code"; \ + $(INSTALL) -m 644 $$lang_code.* $(datadir); \ + done; \ + else \ + for l in ./*.traineddata; do \ + filename=`basename $$l`; \ + lang_code=$${filename%.*}; \ + if test "$$lang_code" == "*"; then \ + echo "No lang present."; \ + break; \ + fi; \ + echo "installing data for $$lang_code"; \ + $(INSTALL) -m 644 $$lang_code.* $(datadir); \ + done; \ + fi; + uninstall-local: cd $(datadir); \ rm --force $(langdata) diff --git a/training/Makefile.am b/training/Makefile.am index 6633bc3f..29f6df38 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -10,7 +10,7 @@ AM_CPPFLAGS = \ # TODO: training programs can not be linked to shared library created # with -fvisibility if VISIBILITY -AM_LDFLAGS = -all-static +# AM_LDFLAGS = -all-static endif noinst_HEADERS = \ @@ -20,10 +20,10 @@ noinst_LTLIBRARIES = libtesseract_training.la libtesseract_tessopt.la libtesseract_training_la_SOURCES = \ commontraining.cpp -libtesseract_training_la_LDFLAGS = -static +#libtesseract_training_la_LDFLAGS = -static libtesseract_tessopt_la_SOURCES = \ tessopt.cpp -libtesseract_tessopt_la_LDFLAGS = -static +#libtesseract_tessopt_la_LDFLAGS = -static bin_PROGRAMS = ambiguous_words classifier_tester cntraining combine_tessdata dawg2wordlist mftraining shapeclustering unicharset_extractor wordlist2dawg @@ -52,6 +52,7 @@ ambiguous_words_LDADD += \ endif classifier_tester_SOURCES = classifier_tester.cpp +#classifier_tester_LDFLAGS = -static classifier_tester_LDADD = \ libtesseract_training.la \ libtesseract_tessopt.la @@ -76,6 +77,7 @@ classifier_tester_LDADD += \ endif combine_tessdata_SOURCES = combine_tessdata.cpp +#combine_tessdata_LDFLAGS = -static if USING_MULTIPLELIBS combine_tessdata_LDADD = \ $(top_srcdir)/ccutil/libtesseract_ccutil.la @@ -85,6 +87,7 @@ combine_tessdata_LDADD = \ endif cntraining_SOURCES = cntraining.cpp +#cntraining_LDFLAGS = -static cntraining_LDADD = \ libtesseract_training.la \ libtesseract_tessopt.la @@ -108,6 +111,7 @@ cntraining_LDADD += \ endif dawg2wordlist_SOURCES = dawg2wordlist.cpp +#dawg2wordlist_LDFLAGS = -static dawg2wordlist_LDADD = \ libtesseract_tessopt.la if USING_MULTIPLELIBS @@ -130,6 +134,7 @@ dawg2wordlist_LDADD += \ endif mftraining_SOURCES = mftraining.cpp mergenf.cpp +#mftraining_LDFLAGS = -static mftraining_LDADD = \ libtesseract_training.la \ libtesseract_tessopt.la @@ -153,6 +158,7 @@ mftraining_LDADD += \ endif shapeclustering_SOURCES = shapeclustering.cpp +#shapeclustering_LDFLAGS = -static shapeclustering_LDADD = \ libtesseract_training.la \ libtesseract_tessopt.la @@ -176,6 +182,7 @@ shapeclustering_LDADD += \ endif unicharset_extractor_SOURCES = unicharset_extractor.cpp +#unicharset_extractor_LDFLAGS = -static unicharset_extractor_LDADD = \ libtesseract_tessopt.la if USING_MULTIPLELIBS @@ -188,6 +195,7 @@ unicharset_extractor_LDADD += \ endif wordlist2dawg_SOURCES = wordlist2dawg.cpp +#wordlist2dawg_LDFLAGS = -static wordlist2dawg_LDADD = \ libtesseract_tessopt.la if USING_MULTIPLELIBS diff --git a/vs2008/libtesseract/libtesseract.vcproj b/vs2008/libtesseract/libtesseract.vcproj index 7a36c266..96a1ea12 100644 --- a/vs2008/libtesseract/libtesseract.vcproj +++ b/vs2008/libtesseract/libtesseract.vcproj @@ -46,7 +46,7 @@ Name="VCCLCompilerTool" Optimization="0" AdditionalIncludeDirectories="..\..\api;..\..\ccmain;..\..\ccutil;..\..\ccstruct;..\..\classify;..\..\cube;..\..\cutil;..\..\dict;..\..\image;..\..\neural_networks\runtime;..\..\textord;..\..\viewer;..\..\wordrec;.;..\..\..\include;..\..\..\include\leptonica;..\port" - PreprocessorDefinitions="WIN32;_WINDOWS;_DEBUG;_LIB;USE_STD_NAMESPACE" + PreprocessorDefinitions="WIN32;_WINDOWS;_DEBUG;_LIB;USE_STD_NAMESPACE;WINDLLNAME=$(TargetFileName)" MinimalRebuild="false" BasicRuntimeChecks="3" RuntimeLibrary="3" @@ -120,7 +120,7 @@ Name="VCCLCompilerTool" Optimization="2" AdditionalIncludeDirectories="..\..\api;..\..\ccmain;..\..\ccutil;..\..\ccstruct;..\..\classify;..\..\cube;..\..\cutil;..\..\dict;..\..\image;..\..\neural_networks\runtime;..\..\textord;..\..\viewer;..\..\wordrec;.;..\..\..\include;..\..\..\include\leptonica;..\port" - PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_LIB;USE_STD_NAMESPACE" + PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_LIB;USE_STD_NAMESPACE;WINDLLNAME=$(TargetFileName)" RuntimeLibrary="2" UsePrecompiledHeader="0" WarningLevel="3" @@ -191,7 +191,7 @@ Name="VCCLCompilerTool" Optimization="2" AdditionalIncludeDirectories="..\..\api;..\..\ccmain;..\..\ccutil;..\..\ccstruct;..\..\classify;..\..\cube;..\..\cutil;..\..\dict;..\..\image;..\..\neural_networks\runtime;..\..\textord;..\..\viewer;..\..\wordrec;.;..\..\..\include;..\..\..\include\leptonica;..\port" - PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_WINDLL;USE_STD_NAMESPACE;TESS_EXPORTS;LIBLEPT_IMPORTS" + PreprocessorDefinitions="WIN32;_WINDOWS;NDEBUG;_USRDLL;_WINDLL;USE_STD_NAMESPACE;TESS_EXPORTS;LIBLEPT_IMPORTS;WINDLLNAME=$(TargetFileName)" RuntimeLibrary="2" UsePrecompiledHeader="0" WarningLevel="3" @@ -273,7 +273,7 @@ Name="VCCLCompilerTool" Optimization="0" AdditionalIncludeDirectories="..\..\api;..\..\ccmain;..\..\ccutil;..\..\ccstruct;..\..\classify;..\..\cube;..\..\cutil;..\..\dict;..\..\image;..\..\neural_networks\runtime;..\..\textord;..\..\viewer;..\..\wordrec;.;..\..\..\include;..\..\..\include\leptonica;..\port" - PreprocessorDefinitions="WIN32;_WINDOWS;_DEBUG;_USRDLL;_WINDLL;USE_STD_NAMESPACE;TESS_EXPORTS;LIBLEPT_IMPORTS" + PreprocessorDefinitions="WIN32;_WINDOWS;_DEBUG;_USRDLL;_WINDLL;USE_STD_NAMESPACE;TESS_EXPORTS;LIBLEPT_IMPORTS;WINDLLNAME=$(TargetFileName)" MinimalRebuild="false" BasicRuntimeChecks="3" RuntimeLibrary="3"