From 71ad8c9bff102eb8184e4ed0a9bc7f4860fb416a Mon Sep 17 00:00:00 2001 From: Ian Blenke Date: Fri, 18 Mar 2016 00:32:35 -0400 Subject: [PATCH 01/30] Dockerifying using travis build script --- Dockerfile | 14 ++++++++++++++ docker-compose.yml | 2 ++ 2 files changed, 16 insertions(+) create mode 100755 Dockerfile create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile new file mode 100755 index 000000000..b7645cc9b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu +MAINTAINER Ian Blenke + +RUN apt-get update +RUN DEBIAN_FRONTEND=noninteractive apt-get install -y git ruby bundler wget unzip +RUN gem install travis --no-ri --no-rdoc +RUN git clone https://github.com/travis-ci/travis-build ~/.travis/travis-build +RUN bundle install --gemfile ~/.travis/travis-build/Gemfile + +ADD . /tesseract +WORKDIR /tesseract + +RUN travis compile | sed -e "s/--branch\\\=\\\'\\\'/--branch=master/g" | bash + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..22d4501b7 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,2 @@ +tesseract: + build: . From 9100adcbde39feef6c6367cf040b9c6f8cd2679b Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 21 Mar 2016 11:41:36 +0000 Subject: [PATCH 02/30] Enable all ligatures available in a font for text2image rendering This enables all OpenType ligatures for a specific font, where available. Specifically, it explicitly enables the OpenType features liga (standard ligatures), hlig (historical ligatures), clig (contextual ligatures), and dlig (discretionary ligatures). This feature requires Pango 1.38 or newer. --- training/stringrenderer.cpp | 8 ++++++++ training/stringrenderer.h | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp index a4f1994b4..c83e11eb6 100644 --- a/training/stringrenderer.cpp +++ b/training/stringrenderer.cpp @@ -120,6 +120,7 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width, box_padding_(0), total_chars_(0), font_index_(0), + features_(NULL), last_offset_(0) { pen_color_[0] = 0.0; pen_color_[1] = 0.0; @@ -149,6 +150,7 @@ void StringRenderer::set_underline_continuation_prob(const double frac) { } StringRenderer::~StringRenderer() { + free(features_); ClearBoxes(); FreePangoCairo(); } @@ -204,6 +206,12 @@ void StringRenderer::SetLayoutProperties() { spacing_attr->end_index = static_cast(-1); pango_attr_list_change(attr_list, spacing_attr); } + if (add_ligatures_) { + set_features("liga, clig, dlig, hlig"); + PangoAttribute* feature_attr = + pango_attr_font_features_new(features_); + pango_attr_list_change(attr_list, feature_attr); + } pango_layout_set_attributes(layout_, attr_list); pango_attr_list_unref(attr_list); // Adjust line spacing diff --git a/training/stringrenderer.h b/training/stringrenderer.h index 7c50db262..975e1e44f 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -90,6 +90,10 @@ class StringRenderer { void set_underline_style(const PangoUnderline style) { underline_style_ = style; } + void set_features(char *features) { + free(features_); + features_ = strdup(features); + } void set_page(int page) { page_ = page; } @@ -185,6 +189,7 @@ class StringRenderer { double underline_start_prob_; double underline_continuation_prob_; PangoUnderline underline_style_; + char *features_; // Text filtering options bool drop_uncovered_chars_; bool strip_unrenderable_words_; From 76ed9decb348d1a48df68e6f901dd16fa8164bcd Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 21 Mar 2016 13:03:03 +0000 Subject: [PATCH 03/30] Only enable extra ligatures with recent Pango versions Pango's opentype feature selection functions are only available from version 1.38+, which is still quite new, so ensure it's just ignored if using an older version. --- training/stringrenderer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp index c83e11eb6..46da77b76 100644 --- a/training/stringrenderer.cpp +++ b/training/stringrenderer.cpp @@ -206,12 +206,14 @@ void StringRenderer::SetLayoutProperties() { spacing_attr->end_index = static_cast(-1); pango_attr_list_change(attr_list, spacing_attr); } +#if (PANGO_VERSION_MAJOR == 1 && PANGO_VERSION_MINOR >= 38) if (add_ligatures_) { set_features("liga, clig, dlig, hlig"); PangoAttribute* feature_attr = pango_attr_font_features_new(features_); pango_attr_list_change(attr_list, feature_attr); } +#endif pango_layout_set_attributes(layout_, attr_list); pango_attr_list_unref(attr_list); // Adjust line spacing From a02ef80f2747214dda04372a86e900b263e7ceda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 23 Mar 2016 17:19:58 +0100 Subject: [PATCH 04/30] allow OpenMP in VS2010 LIB_Release --- vs2010/libtesseract/libtesseract.vcxproj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vs2010/libtesseract/libtesseract.vcxproj b/vs2010/libtesseract/libtesseract.vcxproj index 9b734d5d5..4b4d6dd65 100644 --- a/vs2010/libtesseract/libtesseract.vcxproj +++ b/vs2010/libtesseract/libtesseract.vcxproj @@ -195,7 +195,7 @@ copy "$(TargetPath)" ..\..\..\lib MaxSpeed ..\..\api;..\..\ccmain;..\..\ccutil;..\..\ccstruct;..\..\classify;..\..\cube;..\..\cutil;..\..\dict;..\..\neural_networks\runtime;..\..\textord;..\..\viewer;..\..\wordrec;.;..\..\..\include;..\..\..\include\leptonica;..\port;..\..\opencl;%(AdditionalIncludeDirectories) - WIN32;_WINDOWS;NDEBUG;_LIB;USE_STD_NAMESPACE;WINDLLNAME="$(TargetFileName)";%(PreprocessorDefinitions) + OPENMP;WIN32;_WINDOWS;NDEBUG;_LIB;USE_STD_NAMESPACE;WINDLLNAME="$(TargetFileName)";%(PreprocessorDefinitions) MultiThreadedDLL @@ -204,6 +204,7 @@ copy "$(TargetPath)" ..\..\..\lib Default 4244;4305;4018;4267;4996;4800;4005;4355;4099;4566;%(DisableSpecificWarnings) + true $(OutDir)$(TargetName)$(TargetExt) From bd424f6201b66212f345329877f08742808cb6f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 23 Mar 2016 17:24:21 +0100 Subject: [PATCH 05/30] fix #289 mingw64 build --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 235f96cd4..04aadeed1 100644 --- a/configure.ac +++ b/configure.ac @@ -85,7 +85,7 @@ OPENCL_LIBS="-lOpenCL" ############################# AC_CANONICAL_HOST case "${host_os}" in - mingw32*) + mingw*) AC_DEFINE_UNQUOTED(MINGW,1,[This is a MinGW system]) AM_CONDITIONAL(T_WIN, true) AM_CONDITIONAL(MINGW, true) From a3ba11b030345d32829b1e8355afea5419978d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Thu, 24 Mar 2016 17:32:03 +0100 Subject: [PATCH 06/30] Revert "allow OpenMP in VS2010 LIB_Release" This reverts commit a02ef80f2747214dda04372a86e900b263e7ceda. --- vs2010/libtesseract/libtesseract.vcxproj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vs2010/libtesseract/libtesseract.vcxproj b/vs2010/libtesseract/libtesseract.vcxproj index 4b4d6dd65..9b734d5d5 100644 --- a/vs2010/libtesseract/libtesseract.vcxproj +++ b/vs2010/libtesseract/libtesseract.vcxproj @@ -195,7 +195,7 @@ copy "$(TargetPath)" ..\..\..\lib MaxSpeed ..\..\api;..\..\ccmain;..\..\ccutil;..\..\ccstruct;..\..\classify;..\..\cube;..\..\cutil;..\..\dict;..\..\neural_networks\runtime;..\..\textord;..\..\viewer;..\..\wordrec;.;..\..\..\include;..\..\..\include\leptonica;..\port;..\..\opencl;%(AdditionalIncludeDirectories) - OPENMP;WIN32;_WINDOWS;NDEBUG;_LIB;USE_STD_NAMESPACE;WINDLLNAME="$(TargetFileName)";%(PreprocessorDefinitions) + WIN32;_WINDOWS;NDEBUG;_LIB;USE_STD_NAMESPACE;WINDLLNAME="$(TargetFileName)";%(PreprocessorDefinitions) MultiThreadedDLL @@ -204,7 +204,6 @@ copy "$(TargetPath)" ..\..\..\lib Default 4244;4305;4018;4267;4996;4800;4005;4355;4099;4566;%(DisableSpecificWarnings) - true $(OutDir)$(TargetName)$(TargetExt) From 60176fc5ae5e7f6bdef60c926a4b5ea03de2bfa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Mar 2016 14:58:24 +0100 Subject: [PATCH 07/30] replace __CYGWIN32__ with __CYGWIN__ --- opencl/openclwrapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h index f339a21ab..e422a09e5 100644 --- a/opencl/openclwrapper.h +++ b/opencl/openclwrapper.h @@ -10,7 +10,7 @@ // including CL/cl.h doesn't occur until USE_OPENCL defined below // platform preprocessor commands -#if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN32__ ) || defined( __MINGW32__ ) +#if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN__ ) || defined( __MINGW32__ ) #define ON_WINDOWS 1 #define ON_LINUX 0 #define ON_APPLE 0 From eb00574c4a8355cb6ee902c7124b3fe4c493fe1f Mon Sep 17 00:00:00 2001 From: Michael McConville Date: Wed, 13 Apr 2016 15:15:56 -0400 Subject: [PATCH 08/30] Remove conditional definition of off_t As pointed out by Stefan Weil, conditionally defining off_t using a macro isn't a valid approach. off_t does not have a fixed size and is used in ABI definitions (e.g. syscalls), so silently guessing its size risks breaking the build. Additionally, all sane and modern platforms will have off_t. --- ccutil/scanutils.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ccutil/scanutils.cpp b/ccutil/scanutils.cpp index fca4f8192..cba7d5491 100644 --- a/ccutil/scanutils.cpp +++ b/ccutil/scanutils.cpp @@ -37,11 +37,6 @@ #include "scanutils.h" #include "tprintf.h" -// workaround for "'off_t' was not declared in this scope" with -std=c++11 -#if !defined(HAVE_OFF_T) -typedef long off_t; -#endif // off_t - enum Flags { FL_SPLAT = 0x01, // Drop the value, do not assign FL_INV = 0x02, // Character-set with inverse From 4919b276ebde72782997787f424fd44122e2974e Mon Sep 17 00:00:00 2001 From: Robbert Klarenbeek Date: Thu, 28 Apr 2016 22:34:44 +0200 Subject: [PATCH 09/30] Fix incompatibility with some C++11 implementations --- ccstruct/matrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h index af240b07d..e13ef3189 100644 --- a/ccstruct/matrix.h +++ b/ccstruct/matrix.h @@ -30,7 +30,7 @@ class BLOB_CHOICE_LIST; -#define NOT_CLASSIFIED reinterpret_cast(NULL) +#define NOT_CLASSIFIED reinterpret_cast(0) // A generic class to hold a 2-D matrix with entries of type T, but can also // act as a base class for other implementations, such as a triangular or From ee5e1e972a91c6d303ef92b55f04c34074188088 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 15 May 2016 19:05:53 +0200 Subject: [PATCH 10/30] configure: Fix cross compiler flags for cairo and pango Calling pkg-config directly is a bad idea because it returns the compiler flags for native builds. Signed-off-by: Stefan Weil --- configure.ac | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 04aadeed1..d2e05ce6c 100644 --- a/configure.ac +++ b/configure.ac @@ -467,24 +467,22 @@ fi AM_CONDITIONAL(ENABLE_TRAINING, $have_icu) # Check location of pango headers -have_pango=false -AC_CHECK_HEADERS(pango-1.0/pango/pango-features.h, have_pango=true, have_pango=false) +PKG_CHECK_MODULES(pango, pango, have_pango=true, have_pango=false) if !($have_pango); then AC_MSG_WARN(Training tools WILL NOT be built because of missing pango library.) AC_MSG_WARN(Try to install libpango1.0-dev package.) else - CPPFLAGS="$CPPFLAGS $(pkg-config --cflags pango)" + CPPFLAGS="$CPPFLAGS $pango_CFLAGS" fi AM_CONDITIONAL(ENABLE_TRAINING, $have_pango) # Check location of cairo headers -have_cairo=false -AC_CHECK_HEADERS(cairo/cairo-version.h, have_cairo=true, have_cairo=false) +PKG_CHECK_MODULES(cairo, cairo, have_cairo=true, have_cairo=false) if !($have_cairo); then AC_MSG_WARN(Training tools WILL NOT be built because of missing cairo library.) AC_MSG_WARN(Try to install libcairo-dev?? package.) else - CPPFLAGS="$CPPFLAGS $(pkg-config --cflags cairo)" + CPPFLAGS="$CPPFLAGS $cairo_CFLAGS" fi AM_CONDITIONAL(ENABLE_TRAINING, $have_cairo) From 7e98c3343291d0529c4f4201193b4f3aad538911 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 17 Mar 2016 12:10:02 +0100 Subject: [PATCH 11/30] Print help text to stdout instead to stderr It is common practice for command line programs to show help text on stdout. This seems to be reasonable for Tesseract, too. Signed-off-by: Stefan Weil --- api/tesseractmain.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 6851bd933..23fbc65e3 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -71,7 +71,7 @@ void PrintVersionInfo() { } void PrintUsage(const char* program) { - fprintf(stderr, + printf( "Usage:\n" " %s --help | --help-psm | --version\n" " %s --list-langs [--tessdata-dir PATH]\n" @@ -105,7 +105,7 @@ void PrintHelpForPSM() { #endif ; - fprintf(stderr, "%s", msg); + printf("%s", msg); } void PrintHelpMessage(const char* program) { @@ -123,7 +123,7 @@ void PrintHelpMessage(const char* program) { "NOTE: These options must occur before any configfile.\n" ; - fprintf(stderr, "\n%s\n", ocr_options); + printf("\n%s\n", ocr_options); PrintHelpForPSM(); const char *single_options = @@ -135,7 +135,7 @@ void PrintHelpMessage(const char* program) { " --print-parameters Print tesseract parameters to stdout.\n" ; - fprintf(stderr, "\n%s", single_options); + printf("\n%s", single_options); } void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { From e59be55bccea98bc312ddda09982951148185eea Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 17 Mar 2016 12:10:02 +0100 Subject: [PATCH 12/30] Print list of languages to stdout instead to stderr It is common practice for command line programs to print user requested information on stdout. This seems to be reasonable for Tesseract, too. Signed-off-by: Stefan Weil --- api/tesseractmain.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 23fbc65e3..169bb424f 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -164,11 +164,10 @@ void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) void PrintLangsList(tesseract::TessBaseAPI* api) { GenericVector languages; api->GetAvailableLanguagesAsVector(&languages); - fprintf(stderr, "List of available languages (%d):\n", - languages.size()); + printf("List of available languages (%d):\n", languages.size()); for (int index = 0; index < languages.size(); ++index) { STRING& string = languages[index]; - fprintf(stderr, "%s\n", string.string()); + printf("%s\n", string.string()); } api->End(); } From cab6de17400f8147fbbd2a56125ba8c23a730e7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 20 May 2016 21:19:00 +0200 Subject: [PATCH 13/30] remove unused GlyphLessFont files --- training/GlyphLessFont.c | 631 --------------------------------------- training/GlyphLessFont.h | 228 -------------- 2 files changed, 859 deletions(-) delete mode 100644 training/GlyphLessFont.c delete mode 100644 training/GlyphLessFont.h diff --git a/training/GlyphLessFont.c b/training/GlyphLessFont.c deleted file mode 100644 index 6aba1f795..000000000 --- a/training/GlyphLessFont.c +++ /dev/null @@ -1,631 +0,0 @@ -/* I don't expect anyone to run this program, ever again. It is - * included primarily as documentation for how the GlyphLessFont was - * created. - */ - -#include -#include -#include -#include "GlyphLessFont.h" - -#define LITTLE_ENDIAN - -Offset_Table Offsets = { -#ifdef LITTLE_ENDIAN - 0x00000100, /* sfnt_version */ - 0x0A00, /* numTables (10) */ - 0x8000, /* searchRange = Max power of 2 <= numTables*16 (128) */ - 0x0300, /* entrySelector Log2(searchRange) (3) */ - 0x2000, /* rangeShift = numTables*16 - searchRange (160 - 128 = 32) */ -#else - 0x00010000, /* sfnt_version */ - 0x000A, /* numTables (10) */ - 0x0080, /* searchRange = Max power of 2 <= numTables*16 (128) */ - 0x0003, /* entrySelector Log2(searchRange) (3) */ - 0x0020, /* rangeShift = numTables*16 - searchRange (160 - 128 = 32) */ -#endif -}; - -head_table head = { -#ifdef LITTLE_ENDIAN - 0x00000100, /* sfnt_version */ - 0x00000100, /* font_version */ - 0, /* checksum adjustment */ - 0xF53C0F5F, /* Magic number */ - 0x0704, /* flags: - * Bit 0 - 1 - baseline of font at y = 0 - * Bit 1 - 1 - Left sidebearing at x = 0 - * Bit 2 - 0 - instructions not dependent on font size - * Bit 3 - 1 - force integer ppem - * Bit 4 - 0 - instructions may not alter advance width - * Bit 5 - 0 - Not laid out vertically - * Bit 6 - 0 - required to be 0 - * Bit 7 - 0 - Does not require layout for rendering - * Bit 8 - 0 - Not an AAT font with metamorphosis - * Bit 9 - 0 - Not strongly right to left - * Bit 10 - 0 - Does not require indic-style rearrangements - * Bit 11 - 0 - Font data is not 'lossless' - * Bit 12 - 0 - Font not 'covnerted' - * Bit 13 - 0 - Not optimised for ClearType - * Bit 14 - 1 - This is a 'last resort' font - * Bit 15 - 0 - Reserved, must be 0 - */ - 0x0001, /* 16 units per em */ - 0x0,0x6EFC9ACF,/* Creation time */ - 0x0,0x6EFC9ACF,/* Modified time */ - 0, /* xMin */ - 0x0080, /* yMin */ - 0, /* xMax */ - 0x0100, /* yMax */ - 0x0000, /* macStyle (none) */ - 0x1000, /* Lowest readable size (16 pixels) */ - 0x0200, /* font direction (deprecated, should be 2) */ - 0, /* index to LOCA format (shorts) */ - 0 /* glyph data format (must be 0) */ -#else - 0x00010000, /* afnt version */ - 0x00010000, /* font version */ - 0, /* checksum adjustment */ - 0x5F0F3CF5, /* Magic number */ - 0x0407, /* flags: - * Bit 0 - 1 - baseline of font at y = 0 - * Bit 1 - 1 - Left sidebearing at x = 0 - * Bit 2 - 0 - instructions not dependent on font size - * Bit 3 - 1 - force integer ppem - * Bit 4 - 0 - instructions may not alter advance width - * Bit 5 - 0 - Not laid out vertically - * Bit 6 - 0 - required to be 0 - * Bit 7 - 0 - Does not require layout for rendering - * Bit 8 - 0 - Not an AAT font with metamorphosis - * Bit 9 - 0 - Not strongly right to left - * Bit 10 - 0 - Does not require indic-style rearrangements - * Bit 11 - 0 - Font data is not 'lossless' - * Bit 12 - 0 - Font not 'covnerted' - * Bit 13 - 0 - Not optimised for ClearType - * Bit 14 - 1 - This is a 'last resort' font - * Bit 15 - 0 - Reserved, must be 0 - */ - 0x0100, /* 16 units per em */ - 0x0,0xCF9AFC6E,/* Creation time */ - 0x0,0xCF9AFC6E,/* Modified time */ - 0, /* xMin */ - 0xFFFF, /* yMin */ - 0, /* xMax */ - 0x001, /* yMax */ - 0, /* macStyle (none) */ - 0x0010, /* Lowest readable size (16 pixels) */ - 0x0002, /* font direction (deprecated, should be 2) */ - 0, /* index to LOCA format (shorts) */ - 0 /* glyph data format (must be 0) */ -#endif -}; - -hhea_table hhea = { -#ifdef LITTLE_ENDIAN - 0x00000100, /* table version */ - 0x0100, /* Ascender */ -#else - 0x00001000, /* table version */ - 0x0001, /* Ascender */ -#endif - 0xFFFF, /* Descender */ - 0x0000, /* LineGap */ - 0x0000, /* AdvanceWidthMax */ - 0x0000, /* MinLeftSideBearing */ - 0x0000, /* MinRightSideBearing */ - 0x0000, /* xMaxExtent */ -#ifdef LITTLE_ENDIAN - 0x0100, /* caretSlopeRise (1 = vertical) */ -#else - 0x0001, /* caretSlopeRise (1 = vertical) */ -#endif - 0x0000, /* caretslopeRun (0 = vertical) */ - 0x0000, /* caretOffset */ - 0x0000, /* Reserved1 */ - 0x0000, /* Reserved2 */ - 0x0000, /* Reserved3 */ - 0x0000, /* Reserved4 */ - 0x0000, /* merticDataFormat (must be 0) */ -#ifdef LITTLE_ENDIAN - 0x0200, /* number of hMetric entries in hmtx */ -#else - 0x0002, /* number of hMetric entries in hmtx */ -#endif -}; - -maxp_table maxp = { -#ifdef LITTLE_ENDIAN - 0x00000100, /* table version */ - 0x0200, /* numGlyphs */ - 0x00000000, /* maxPoints */ - 0x00000000, /* maxContours */ - 0x00000000, /* maxCompositePoints */ - 0x00000000, /* maxCompositeContours */ - 0x00000100, /* maxZones */ - 0x00000000, /* maxTwilightPoints */ - 0x00000000, /* maxStorage */ - 0x00000000, /* maxFunctionDefs */ - 0x00000000, /* maxInstructionDefs */ - 0x00000000, /* maxStackElements */ - 0x00000000, /* maxSizeOfInstructions */ - 0x00000000, /* maxComponentElements */ - 0x00000000, /* maxComponentDepth */ -#else - 0x00001000, /* table version */ - 0x0002, /* numGlyphs */ - 0x00000000, /* maxPoints */ - 0x00000000, /* maxContours */ - 0x00000000, /* maxCompositePoints */ - 0x00000000, /* maxCompositeContours */ - 0x00000001, /* maxZones */ - 0x00000000, /* maxTwilightPoints */ - 0x00000000, /* maxStorage */ - 0x00000000, /* maxFunctionDefs */ - 0x00000000, /* maxInstructionDefs */ - 0x00000000, /* maxStackElements */ - 0x00000000, /* maxSizeOfInstructions */ - 0x00000000, /* maxComponentElements */ - 0x00000000, /* maxComponentDepth */ -#endif -}; - -OS2_table OS2 = { -#ifdef LITTLE_ENDIAN - 0x0300, /* table version */ - 0x0000, /* xAvgCharWidth */ - 0x9001, /* usWeight Class (400 = FW_NORMAL) */ - 0x0500, /* usWidthClass (5 = FWIDTH_NORMAL) */ - 0x0000, /* fsType (0 = no embedding restrictions) */ - 0x0000, /* ySubscriptXSize */ - 0x0000, /* ySubscriptYSize */ - 0x0000, /* ySubscriptXOffset */ - 0x0000, /* ySubscriptYOffset */ - 0x0000, /* ySuperscriptXSize */ - 0x0000, /* ySuperscriptYSize */ - 0x0000, /* ySuperscriptXOffset */ - 0x0000, /* ySuperscriptYOffset */ - 0x0000, /* yStikeoutPosition */ - 0x0000, /* sFamilyClass (0 = no classification) */ - 0,5,0,1,0,1,0,0,0,0,0, /* PANOSE */ - 0x00000000, /* ulUnicodeRanges1 */ - 0x00000000, /* ulUnicodeRanges2 */ - 0x00000000, /* ulUnicodeRanges3 */ - 0x00000000, /* ulUnicodeRanges4 */ - 'G', 'O', 'O', 'G', /* achVendID (GOOG = Google) */ - 0x4000, /* fsSelection (bit 6 set = regular font) */ - 0xFFFF, /* fsFirstCharIndex */ - 0x0000, /* fsLastCharIndex */ - 0x0100, /* sTypoAscender */ - 0xFFFF, /* StypoDescender */ - 0x0000, /* STypoLineGap */ - 0x0100, /* usWinAscent */ - 0x0100, /* usWinDescent */ - 0x00000080,/* ulCodePageRange1 */ - 0x00000000,/* ulCodePageRange2 */ - 0x0000, /* sxHeight */ - 0x0000, /* sCapHeight */ - 0x0000, /* usDefaultChar */ - 0x0100, /* usBreakChar */ - 0x0000, /* usMaxContent */ -#else - 0x0003, /* table version */ - 0x0000, /* xAvgCharWidth */ - 0x0190, /* usWeight Class (400 = FW_NORMAL) */ - 0x0005, /* usWidthClass (5 = FWIDTH_NORMAL) */ - 0x0000, /* fsType (0 = no embedding restrictions) */ - 0x0000, /* ySubscriptXSize */ - 0x0000, /* ySubscriptYSize */ - 0x0000, /* ySubscriptXOffset */ - 0x0000, /* ySubscriptYOffset */ - 0x0000, /* ySuperscriptXSize */ - 0x0000, /* ySuperscriptYSize */ - 0x0000, /* ySuperscriptXOffset */ - 0x0000, /* ySuperscriptYOffset */ - 0x0000, /* yStikeoutPosition */ - 0x0000, /* sFamilyClass (0 = no classification) */ - 0,5,0,1,0,1,0,0,0,0,0, /* PANOSE */ - 0x00000000,/* ulUnicodeRanges1 */ - 0x00000000,/* ulUnicodeRanges2 */ - 0x00000000,/* ulUnicodeRanges3 */ - 0x00000000,/* ulUnicodeRanges4 */ - 'G', 'O', 'O', 'G', /* achVendID (GOOG = Google) */ - 0x0040, /* fsSelection (bit 6 set = regular font) */ - 0xFFFF, /* fsFirstCharIndex */ - 0x0000, /* fsLastCharIndex */ - 0x0001, /* sTypoAscender */ - 0xFFFF, /* StypoDescender */ - 0x0000, /* STypoLineGap */ - 0x0001, /* usWinAscent */ - 0x0001, /* usWinDescent */ - 0x80000000,/* ulCodePageRange1 */ - 0x00000000,/* ulCodePageRange2 */ - 0x0000, /* sxHeight */ - 0x0000, /* sCapHeight */ - 0x0000, /* usDefaultChar */ - 0x0001, /* usBreakChar */ - 0x0000, /* usMaxContent */ -#endif -}; - -hmtx_table hmtx = { -0x0000, 0x0000, -0x0000, 0x0000 -}; - -cmap_table cmap = { - 0x0000, /* Cmap version (0) */ -#ifdef LITTLE_ENDIAN - 0x0200, /* numTables (2) */ - 0x0100, /* Start of first subtable record, platformID = 1 */ - 0x0000, /* encodingID = 0 */ - 0x14000000, /* Offset of data */ - 0x0300, /* Start of second subtable record, platformID = 3 */ - 0x0000, /* encodingID = 0 */ - 0x20000000, /* Offset of data */ - 0x0600, /* STart of Apple table (format 6) */ - 0x0C00, /* length of table (12) */ - 0x0000, /* Language must be 0 for non-Apple or - non-specific language */ - 0x0000, /* firstCode = 0 */ - 0x0100, /* number of codes is 1 */ - 0x0000, /* GID is 0 */ - 0x0600, /* Start of MS Table (format 4) */ - 0x0C00, /* length of table (12) */ - 0x0000, /* Language must be 0 for non-Apple or - non-specific language */ - 0x0000, /* firstCode = 0 */ - 0x0100, /* number of codes is 1 */ - 0x0000, /* GID is 0 */ -#else - 0x0002, /* numTables (2) */ - 0x0001, - 0x0000, - 0x00000014, - 0x0003, - 0x0000, - 0x00000020, - 0x0006, - 0x000C, - 0x0000, - 0x0000, - 0x0001, - 0x0000, - 0x0006, - 0x000C, - 0x0000, - 0x0000, - 0x0001, - 0x0000, -#endif -}; - -/* Changing these strings requires you to change the offset and lengths - in the name table below */ -char Macnamestring[] = {'V', 'e', 'r', 's', 'i', 'o', 'n', ' ', '1', '.', '0'}; -char Unamestring[] = {0x00, 'V', 0x00, 'e', 0x00, 'r', 0x00, 's', 0x00, 'i', - 0x00, 'o', 0x00, 'n', 0x00, ' ', 0x00, '1', 0x00, '.', - 0x00, '0', 0x00, 0x00, 0x00}; -name_table name = { - 0x0000, /* format 0 */ -#ifdef LITTLE_ENDIAN - 0x0300, /* 3 records */ - 0x2A00, /* Offset of string storage */ - - 0x0000, /* Start of 1st name record, platform = 0 (Unicode) */ - 0x0300, /* Platform-specific ID = 0 */ - 0x0000, /* Language ID (0 = none) */ - 0x0500, /* name ID (5 = version string) */ - 0x1600, /* String length */ - 0x0B00, /* Offset from start of storage */ - - 0x0100, /* Start of 2nd name record, platform = 1 (Mac) */ - 0x0000, - 0x0000, - 0x0500, /* name ID (5 = version string) */ - 0x0B00, /* String length */ - 0x0000, /* Offset from start of storage */ - - 0x0300, /* Start of 3rd name record, platform = 3 */ - 0x0100, /* Platform-specific ID = 1 */ - 0x0904, /* Language ID (0x409 = US English) */ - 0x0500, /* name ID (5 = version string) */ - 0x1600, /* String length */ - 0x0B00, /* Offset from start of storage */ -#else - 0x0003, /* 3 record2 */ - 0x002A, /* Offset of string storage */ - - 0x0000, /* Start of 1st name record, platform = 0 (Unicode) */ - 0x0003, /* Platform-specific ID = 0 */ - 0x0000, /* Language ID (0 = none) */ - 0x0005, /* name ID (5 = version string) */ - 0x0016, /* String length */ - 0x000B, /* Offset from start of storage */ - - 0x0001, /* Start of 2nd name record, platform = 1 (Mac) */ - 0x0000, - 0x0000, - 0x0500, /* name ID (5 = version string) */ - 0x000B, /* String length */ - 0x0000, /* Offset from start of storage */ - - 0x0003, /* Start of 3rd name record, platform = 3 */ - 0x0001, /* Platform-specific ID = 0 */ - 0x0409, /* Language ID (0 = none) */ - 0x0005, /* name ID (5 = version string) */ - 0x0016, /* String length */ - 0x000B, /* Offset from start of storage */ -#endif -}; - -post_table post = { -#ifdef LITTLE_ENDIAN - 0x0100, /* Version (2) */ -#else - 0x0001, /* Version (2) */ -#endif - 0x00000000, /* italicAngle */ - 0x0000, /* underlinePosition */ - 0x0000, /* underlineThickness */ -#ifdef LITTLE_ENDIAN - 0x01000000, /* isFixedPitch */ -#else - 0x00000001, /* isFixedPitch */ -#endif - 0x00000000, /* minMemType42 */ - 0x00000000, /* maxMemType42 */ - 0x00000000, /* minMemType1 */ - 0x00000000, /* maxMemType1 */ -}; - -int main (int argc, char **argv) -{ - FILE *OutputFile; - TableRecord Table[10]; - unsigned long offset = - sizeof(Offset_Table) + (sizeof(TableRecord) * 10), - length = 0, checksum = 0, HeadTableOffset, Working; - short fword = -1; - short loca = 0; - long glyf = 0; - unsigned int NameLength, i, FileLength; - - printf("Ken's Glyph-free font creator\n"); - if (argc != 2) { - fprintf (stderr, "Usage: GlyphLessFont \n"); - exit (1); - } - - OutputFile = fopen (argv[1], "wb+"); - if (OutputFile == 0) { - fprintf (stderr, "Couldn't open file %s for writing\n", argv[1]); - exit (1); - } - - fwrite (&Offsets, sizeof(Offset_Table), 1, OutputFile); - memset(&Table, 0x00, sizeof(TableRecord) + 10); - fwrite (&Table, sizeof (TableRecord), 10, OutputFile); - offset = ftell(OutputFile); - Table[3].offset = HeadTableOffset = offset; - - /* The whole business of writing a TrueType file is complicated by - * the way its laid out Firstly there is the fact that it wants - * the tables to be laid out in alphabetical order, but it wants - * the actual table data (which the table record points to) to be - * in quite a different order. Then there's the requirement to - * have all the table offsets be a multiple of 4 bytes. Finally - * we have to calculate a checksum for each table as well, which - * we cna't realistically do until we have written the table data, - * but which gets stored in the table record at the start of the - * file. - * - * So we start by writing a dumm set of table records, we'll fill - * in the array as we go and once we've written all the data and - * worked out the offsets and checksums of all the tables, we'll - * come back and write the table records into the area we left - * reserved. - */ - fwrite (&head, sizeof(head_table), 1, OutputFile); - offset = ftell(OutputFile); - Table[4].offset = offset; - - fwrite (&hhea, sizeof(hhea_table), 1, OutputFile); - offset = ftell(OutputFile); - Table[7].offset = offset; - - fwrite (&maxp, sizeof(maxp_table), 1, OutputFile); - offset = ftell(OutputFile); - Table[0].offset = offset; - - fwrite (&OS2, sizeof(OS2_table), 1, OutputFile); - offset = ftell(OutputFile); - Table[5].offset = offset; - - fwrite (&hmtx, sizeof(hmtx_table), 1, OutputFile); - offset = ftell(OutputFile); - Table[1].offset = offset; - - fwrite (&cmap, sizeof(cmap_table), 1, OutputFile); - offset = ftell(OutputFile); - Table[6].offset = offset; - - fwrite (&loca, sizeof(short), 1, OutputFile); - fwrite (&loca, sizeof(short), 1, OutputFile); - fwrite (&loca, sizeof(short), 1, OutputFile); - fwrite (&loca, sizeof(short), 1, OutputFile); - offset = ftell(OutputFile); - Table[2].offset = offset; - - fwrite (&glyf, sizeof(long), 1, OutputFile); - offset = ftell(OutputFile); - Table[8].offset = offset; - - length = (sizeof(name_table) + sizeof(Macnamestring) + - sizeof(Unamestring) + 3) / 4; - length *= 4; - NameLength = length; - fwrite (&name, sizeof(name_table), 1, OutputFile); - fwrite (&Macnamestring, sizeof(Macnamestring), 1, OutputFile); - fwrite (&Unamestring, NameLength - - (sizeof(name_table) + sizeof(Macnamestring)), 1, OutputFile); - offset = ftell(OutputFile); - Table[9].offset = offset; - - fwrite (&post, sizeof(post_table), 1, OutputFile); - FileLength = ftell(OutputFile); - - Table[3].tag[0] = 'h'; - Table[3].tag[1] = 'e'; - Table[3].tag[2] = 'a'; - Table[3].tag[3] = 'd'; - Table[3].checkSum = 0; - Table[3].length = sizeof(head_table) - 2; /* Don't count size - of padding bytes in table */ - - Table[4].tag[0] = 'h'; - Table[4].tag[1] = 'h'; - Table[4].tag[2] = 'e'; - Table[4].tag[3] = 'a'; - Table[4].checkSum = 0; - Table[4].length = sizeof(hhea_table); - - Table[7].tag[0] = 'm'; - Table[7].tag[1] = 'a'; - Table[7].tag[2] = 'x'; - Table[7].tag[3] = 'p'; - Table[7].checkSum = 0; - Table[7].length = sizeof(maxp_table); - - Table[0].tag[0] = 'O'; - Table[0].tag[1] = 'S'; - Table[0].tag[2] = '/'; - Table[0].tag[3] = '2'; - Table[0].checkSum = 0; - Table[0].length = sizeof(OS2_table); - - Table[5].tag[0] = 'h'; - Table[5].tag[1] = 'm'; - Table[5].tag[2] = 't'; - Table[5].tag[3] = 'x'; - Table[5].checkSum = 0; - Table[5].length = sizeof(hmtx_table); - - Table[1].tag[0] = 'c'; - Table[1].tag[1] = 'm'; - Table[1].tag[2] = 'a'; - Table[1].tag[3] = 'p'; - Table[1].checkSum = 0; - Table[1].length = sizeof(cmap_table); - - Table[6].tag[0] = 'l'; - Table[6].tag[1] = 'o'; - Table[6].tag[2] = 'c'; - Table[6].tag[3] = 'a'; - Table[6].checkSum = 0; - Table[6].length = sizeof(USHORT) * 3; - - Table[2].tag[0] = 'g'; - Table[2].tag[1] = 'l'; - Table[2].tag[2] = 'y'; - Table[2].tag[3] = 'f'; - Table[2].checkSum = 0; - Table[2].length = 1; - - Table[8].tag[0] = 'n'; - Table[8].tag[1] = 'a'; - Table[8].tag[2] = 'm'; - Table[8].tag[3] = 'e'; - Table[8].checkSum = 0; - Table[8].length = (sizeof(name_table) + - sizeof(Macnamestring) + - sizeof(Unamestring) + 3) / 4; - Table[8].length *= 4; - NameLength = Table[8].length; - - Table[9].tag[0] = 'p'; - Table[9].tag[1] = 'o'; - Table[9].tag[2] = 's'; - Table[9].tag[3] = 't'; - Table[9].checkSum = 0; - Table[9].length = sizeof(post_table); - - for (i=0;i<10;i++) { - ULONG LENGTH, Sum = 0L; - ULONG *EndPtr, *Data, *Current; - - offset = Table[i].offset; - length = Table[i].length; - LENGTH = (length + 3 & ~3); - Data = (ULONG *)malloc(LENGTH); - memset(Data, 0x00, LENGTH); - fseek(OutputFile, offset, SEEK_SET); - fread(Data, length, 1, OutputFile); - - Current = Data; - EndPtr = Data + (LENGTH / sizeof(ULONG)); - while(Current < EndPtr){ -#ifdef LITTLE_ENDIAN - Working = *Current++; - Sum += ((Working & 0xff) << 24) + - ((Working & 0xff00) << 8) + - ((Working & 0xff0000) >> 8) + - (Working >> 24); -#else - Sum += *Current++; -#endif - } - free(Data); - -#ifdef LITTLE_ENDIAN - Table[i].offset = - ((offset & 0xff) << 24) + - ((offset & 0xff00) << 8) + - ((offset & 0xff0000) >> 8) + - (offset >> 24); - Table[i].length = - ((length & 0xff) << 24) + - ((length & 0xff00) << 8) + - ((length & 0xff0000) >> 8) + - (length >> 24); - Table[i].checkSum = - ((Sum & 0xff) << 24) + - ((Sum & 0xff00) << 8) + - ((Sum & 0xff0000) >> 8) + - (Sum >> 24); -#else - Table[i].checkSum = Sum; -#endif - } - - fseek(OutputFile, sizeof(Offset_Table), SEEK_SET); - fwrite (&Table, sizeof(TableRecord), 10, OutputFile); - - fseek(OutputFile, 0, SEEK_SET); - - for (i=0;i < FileLength / sizeof(long);i++) { - fread(&Working, sizeof(long), 1, OutputFile); -#ifdef LITTLE_ENDIAN - checksum += ((Working & 0xff) << 24) + - ((Working & 0xff00) << 8) + - ((Working & 0xff0000) >> 8) + - (Working >> 24); -#else - checksum += Working; -#endif - } - checksum = 0xB1B0AFBA - checksum; -#ifdef LITTLE_ENDIAN - head.checkSumAdjustment = - ((checksum & 0xff) << 24) + - ((checksum & 0xff00) << 8) + - ((checksum & 0xff0000) >> 8) + - (checksum >> 24); -#else - head.checkSumAdjustment = checksum; -#endif - fseek(OutputFile, HeadTableOffset, SEEK_SET); - fwrite (&head, sizeof(head_table), 1, OutputFile); - fclose(OutputFile); - - return 0; -} diff --git a/training/GlyphLessFont.h b/training/GlyphLessFont.h deleted file mode 100644 index 97856a71c..000000000 --- a/training/GlyphLessFont.h +++ /dev/null @@ -1,228 +0,0 @@ -/* I don't expect anyone to run this program, ever again. It is - * included primarily as documentation for how the GlyphLessFont was - * created. - */ - -/* The OpenType data types, we'll duplicate the definitions so that - * the code shall be (as far as possible) self-documenting simply by - * referencing the OpenType specification. Note that the specification - * is soemwhat inconsistent with regards to usage, naming and capitalisation - * of the names for these data types. - */ -typedef char BYTE; -typedef char CHAR; -typedef unsigned short USHORT; -typedef short SHORT; -typedef struct _uint24 {char top8;unsigned short bottom16;} UINT24; -typedef unsigned long ULONG; -typedef long LONG; -typedef unsigned long Fixed; -typedef SHORT FWORD; -typedef USHORT UFWORD; -typedef unsigned short F2DOT14; -typedef struct _datetime {long upper;long lower;} LONGDATETIME; -typedef char Tag[4]; -typedef USHORT GlyphId; -typedef USHORT Offset; -typedef struct _longHorMetric {USHORT advanceWidth;SHORT lsb;} longHorMetric; - -/* And now definitions for each of the OpenType tables we will wish to use */ - -typedef struct { - Fixed sfnt_version; - USHORT numTables; - USHORT searchRange; - USHORT entrySelector; - USHORT rangeShift; -} Offset_Table; - -typedef struct { - Tag tag; /* The spec defines this as a ULONG, - but also as a 'Tag' in its own right */ - ULONG checkSum; - ULONG offset; - ULONG length; -} TableRecord; - -typedef struct { - USHORT version; - USHORT numTables; -} cmap_header; - -typedef struct { - USHORT platformID; - USHORT encodingID; - ULONG Offset; -} cmap_record; - -typedef struct { - USHORT format; - USHORT length; - USHORT language; - BYTE glyphIDArray[256]; -} format0_cmap_table; - -/* This structure only works for single segment format 4 tables, - for multiple segments it must be constructed */ -typedef struct { - USHORT format; - USHORT length; - USHORT language; - USHORT segCountx2; - USHORT searchRange; - USHORT entrySelector; - USHORT rangeShift; - USHORT endcount; - USHORT reservedPad; - USHORT startCount; - SHORT idDelta; - USHORT idRangeOffset; - USHORT glyphIdArray[2]; -} format4_cmap_table; - -typedef struct { - USHORT format; - USHORT length; - USHORT language; - USHORT firstCode; - USHORT entryCount; - USHORT glyphIDArray; -} format6_cmap_table; - -typedef struct { - cmap_header header; - cmap_record records[2]; - format6_cmap_table AppleTable; - format6_cmap_table MSTable; -} cmap_table; - -typedef struct { - Fixed version; - Fixed FontRevision; - ULONG checkSumAdjustment; - ULONG MagicNumber; - USHORT Flags; - USHORT unitsPerEm; - LONGDATETIME created; - LONGDATETIME modified; - SHORT xMin; - SHORT yMin; - SHORT xMax; - SHORT yMax; - USHORT macStyle; - USHORT lowestRecPPEM; - SHORT FontDirectionHint; - SHORT indexToLocFormat; - SHORT glyphDataFormat; - SHORT PAD; -} head_table; - -typedef struct { - Fixed version; - FWORD Ascender; - FWORD Descender; - FWORD LineGap; - UFWORD advanceWidthMax; - FWORD minLeftSideBearing; - FWORD minRightSideBearing; - FWORD xMaxExtent; - SHORT caretSlopeRise; - SHORT caretSlopeRun; - SHORT caretOffset; - SHORT reserved1; - SHORT reserved2; - SHORT reserved3; - SHORT reserved4; - SHORT metricDataFormat; - USHORT numberOfHMetrics; -} hhea_table; - -typedef struct { - longHorMetric hMetrics[2]; -} hmtx_table; - -typedef struct { - Fixed version; - USHORT numGlyphs; - USHORT maxPoints; - USHORT maxContours; - USHORT maxCompositePoints; - USHORT maxCompositeContours; - USHORT maxZones; - USHORT maxTwilightPoints; - USHORT maxStorage; - USHORT maxFunctionDefs; - USHORT maxInstructionDefs; - USHORT maxStackElements; - USHORT maxSizeOfInstructions; - USHORT maxComponentElements; - USHORT maxComponentDepth; -} maxp_table; - -typedef struct { - USHORT platformID; - USHORT encodingID; - USHORT languageID; - USHORT nameID; - USHORT length; - USHORT offset; -} NameRecord; - -typedef struct { - USHORT format; - USHORT count; - USHORT stringOffset; - NameRecord nameRecord[3]; -} name_table; - -typedef struct { - USHORT version; - SHORT xAvgCharWidth; - USHORT usWeightClass; - USHORT usWidthClass; - USHORT fsType; - SHORT ySubscriptXSize; - SHORT ySubscriptYSize; - SHORT ySubscriptXOffset; - SHORT ySubscriptYOffset; - SHORT ySuperscriptXSize; - SHORT ySuperscriptYSize; - SHORT ySuperscriptXOffset; - SHORT ySuperscriptYOffset; - SHORT yStrikeoutSize; - SHORT yStrikeoutPosition; - SHORT sFamilyClass; - BYTE panose[10]; - ULONG ulUnicodeRange1; - ULONG ulUnicodeRange2; - ULONG ulUnicodeRange3; - ULONG ulUnicodeRange4; - CHAR achVendID[4]; - USHORT fsSelection; - USHORT usFirstCharIndex; - USHORT usLastCharIndex; - SHORT sTypoAscender; - SHORT sTypoDescender; - SHORT sTypoLineGap; - USHORT usWinAscent; - USHORT usWinDescent; - ULONG ulCodePageRange1; - ULONG ulCodePageRange2; - SHORT sxHeight; - SHORT sCapHeight; - USHORT usDefaultChar; - USHORT usBreakChar; - USHORT usMaxContent; -} OS2_table; - -typedef struct { - Fixed version; - Fixed italicAngle; - FWORD underlinePosition; - FWORD underlineThickness; - ULONG isFixedPitch; - ULONG minMemType42; - ULONG maxMemType42; - ULONG minMemType1; - ULONG maxMemType1; -} post_table; From c4d273d33cc36ec7fee890315f8dfe2b59cd6f55 Mon Sep 17 00:00:00 2001 From: Amin Cheloh Date: Sat, 21 May 2016 17:51:04 +0700 Subject: [PATCH 14/30] fix invalid release year for V3.04.01 --- ChangeLog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3fea2af8a..492d6984c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -2015-02-17 - V3.04.01 +2016-02-17 - V3.04.01 * Added OSD renderer for psm 0. Works for single page and multi-page images. * Improve tesstrain.sh script. * Simplify build and run of ScrollView. From cd1a14450c052a5c140c8b2784a2b68c3cb7b9bf Mon Sep 17 00:00:00 2001 From: amitdo Date: Sun, 22 May 2016 11:16:42 +0300 Subject: [PATCH 15/30] Training tools: Print help message when (argv == 1) --- training/commandlineflags.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/training/commandlineflags.cpp b/training/commandlineflags.cpp index 4b82bcdc3..1ea68fe0d 100644 --- a/training/commandlineflags.cpp +++ b/training/commandlineflags.cpp @@ -144,6 +144,12 @@ void PrintCommandLineFlags() { void ParseCommandLineFlags(const char* usage, int* argc, char*** argv, const bool remove_flags) { + if (*argc == 1) { + tprintf("USAGE: %s\n", usage); + PrintCommandLineFlags(); + exit(0); + } + unsigned int i = 1; for (i = 1; i < *argc; ++i) { const char* current_arg = (*argv)[i]; From 1b9d0688fa07e8c692ef56b59644450f331a00ed Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 16 May 2016 08:59:11 +0200 Subject: [PATCH 16/30] configure: Fix check for dependencies needed for training The different checks had set ENABLE_TRAINING unconditionally, thus overwriting the value from the preceding checks. So if pango and cairo were available, but icu was missing, users would still be offered to build the training tools. The changes for icu and has_cpp11 are not strictly necessary, but are made here to have uniform code patterns. Signed-off-by: Stefan Weil --- configure.ac | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index d2e05ce6c..be9211d70 100644 --- a/configure.ac +++ b/configure.ac @@ -457,34 +457,36 @@ int i = 0; [AC_MSG_RESULT(yes)], [AC_MSG_FAILURE([leptonica 1.71 or higher is required])]) +AM_CONDITIONAL(ENABLE_TRAINING, true) + # Check location of icu headers have_icu=false AC_CHECK_HEADERS(unicode/uchar.h, have_icu=true, have_icu=false) if !($have_icu); then AC_MSG_WARN(Training tools WILL NOT be built because of missing icu library.) AC_MSG_WARN(Try to install libicu-devel package.) + AM_CONDITIONAL(ENABLE_TRAINING, false) fi -AM_CONDITIONAL(ENABLE_TRAINING, $have_icu) # Check location of pango headers PKG_CHECK_MODULES(pango, pango, have_pango=true, have_pango=false) if !($have_pango); then AC_MSG_WARN(Training tools WILL NOT be built because of missing pango library.) AC_MSG_WARN(Try to install libpango1.0-dev package.) + AM_CONDITIONAL(ENABLE_TRAINING, false) else CPPFLAGS="$CPPFLAGS $pango_CFLAGS" fi -AM_CONDITIONAL(ENABLE_TRAINING, $have_pango) # Check location of cairo headers PKG_CHECK_MODULES(cairo, cairo, have_cairo=true, have_cairo=false) if !($have_cairo); then AC_MSG_WARN(Training tools WILL NOT be built because of missing cairo library.) AC_MSG_WARN(Try to install libcairo-dev?? package.) + AM_CONDITIONAL(ENABLE_TRAINING, false) else CPPFLAGS="$CPPFLAGS $cairo_CFLAGS" fi -AM_CONDITIONAL(ENABLE_TRAINING, $have_cairo) # set c++11 support based on platform/compiler if test "x$has_cpp11" = "xyes"; then @@ -507,7 +509,7 @@ if test "x$has_cpp11" = "xyes"; then esac else AC_MSG_WARN(Training tools WILL NOT be built because of missing c++11 support.) - AM_CONDITIONAL(ENABLE_TRAINING, [test "x$has_cpp11" = "xyes"]) + AM_CONDITIONAL(ENABLE_TRAINING, false) fi # ---------------------------------------- From dec38db7ced623a73efd9c30de81ba36891fa717 Mon Sep 17 00:00:00 2001 From: Heiko Oberdiek Date: Wed, 25 May 2016 16:26:41 +0200 Subject: [PATCH 17/30] Fix for constant kMaxDoubleSize (from 15 to 16), which is used by method STRING::add_str_double. --- ccutil/strngs.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp index b44c54124..ff3bbac28 100644 --- a/ccutil/strngs.cpp +++ b/ccutil/strngs.cpp @@ -32,8 +32,8 @@ using tesseract::TFile; // possible length of an int (in 64 bits), being -<20 digits>. const int kMaxIntSize = 22; // Size of buffer needed to host the decimal representation of the maximum -// possible length of a %.8g being -0.12345678e+999 = 15. -const int kMaxDoubleSize = 15; +// possible length of a %.8g being -1.2345678e+999 = 16. +const int kMaxDoubleSize = 16; /********************************************************************** * STRING_HEADER provides metadata about the allocated buffer, From 4cbe9622d14572a460d27f5b1dbbce53d2e267fe Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 27 May 2016 15:30:17 +0200 Subject: [PATCH 18/30] configure: Enclose most macro arguments in [] This is not strictly necessary, but recommended in the GNU autoconf manual. No [] was added to arguments like true or false. Signed-off-by: Stefan Weil --- configure.ac | 297 ++++++++++++++++++++++++++------------------------- 1 file changed, 149 insertions(+), 148 deletions(-) diff --git a/configure.ac b/configure.ac index be9211d70..536ac3ca1 100644 --- a/configure.ac +++ b/configure.ac @@ -5,16 +5,16 @@ # ---------------------------------------- # Initialization # ---------------------------------------- -AC_PREREQ(2.50) +AC_PREREQ([2.50]) AC_INIT([tesseract], [3.05.00dev], [https://github.com/tesseract-ocr/tesseract/issues]) -AC_PROG_CXX(g++ clang++) +AC_PROG_CXX([g++ clang++]) AC_LANG([C++]) AC_LANG_COMPILER_REQUIRE CXXFLAGS=${CXXFLAGS:-""} AC_CONFIG_MACRO_DIR([m4]) -AC_CONFIG_AUX_DIR(config) -AC_CONFIG_SRCDIR(api/tesseractmain.cpp) -AC_PREFIX_DEFAULT(/usr/local) +AC_CONFIG_AUX_DIR([config]) +AC_CONFIG_SRCDIR([api/tesseractmain.cpp]) +AC_PREFIX_DEFAULT([/usr/local]) # Define date of package, etc. Could be useful in auto-generated # documentation. @@ -24,20 +24,20 @@ PACKAGE_DATE="07/11" abs_top_srcdir=`AS_DIRNAME([$0])` gitrev="`git --git-dir=${abs_top_srcdir}/.git --work-tree=${abs_top_srcdir} describe --always --tags`" if test -n "${gitrev}" ; then - AC_REVISION("${gitrev}") - AC_DEFINE_UNQUOTED(GIT_REV,"${gitrev}", [Define to be the git revision]) + AC_REVISION(["${gitrev}"]) + AC_DEFINE_UNQUOTED([GIT_REV], ["${gitrev}"], [Define to be the git revision]) echo "Using git revision: ${gitrev}" fi -AC_DEFINE_UNQUOTED(PACKAGE_NAME,["${PACKAGE_NAME}"],[Name of package]) -AC_DEFINE_UNQUOTED(PACKAGE_VERSION,["${PACKAGE_VERSION}"],[Version number]) -AC_DEFINE_UNQUOTED(PACKAGE_YEAR,"$PACKAGE_YEAR",[Official year for this release]) -AC_DEFINE_UNQUOTED(PACKAGE_DATE,"$PACKAGE_DATE",[Official date of release]) +AC_DEFINE_UNQUOTED([PACKAGE_NAME], ["${PACKAGE_NAME}"], [Name of package]) +AC_DEFINE_UNQUOTED([PACKAGE_VERSION], ["${PACKAGE_VERSION}"], [Version number]) +AC_DEFINE_UNQUOTED([PACKAGE_YEAR], ["$PACKAGE_YEAR"], [Official year for this release]) +AC_DEFINE_UNQUOTED([PACKAGE_DATE], ["$PACKAGE_DATE"], [Official date of release]) -AC_SUBST(PACKAGE_NAME) -AC_SUBST(PACKAGE_VERSION) -AC_SUBST(PACKAGE_YEAR) -AC_SUBST(PACKAGE_DATE) +AC_SUBST([PACKAGE_NAME]) +AC_SUBST([PACKAGE_VERSION]) +AC_SUBST([PACKAGE_YEAR]) +AC_SUBST([PACKAGE_DATE]) GENERIC_LIBRARY_NAME=tesseract @@ -49,17 +49,17 @@ GENERIC_MICRO_VERSION=0 # API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION) GENERIC_API_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION GENERIC_LIBRARY_VERSION=$GENERIC_MAJOR_VERSION:$GENERIC_MINOR_VERSION -AC_SUBST(GENERIC_API_VERSION) -AC_SUBST(GENERIC_MAJOR_VERSION) +AC_SUBST([GENERIC_API_VERSION]) +AC_SUBST([GENERIC_MAJOR_VERSION]) -AC_SUBST(GENERIC_LIBRARY_VERSION) +AC_SUBST([GENERIC_LIBRARY_VERSION]) PACKAGE=$GENERIC_LIBRARY_NAME -AC_SUBST(GENERIC_LIBRARY_NAME) +AC_SUBST([GENERIC_LIBRARY_NAME]) GENERIC_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION.$GENERIC_MICRO_VERSION GENERIC_RELEASE=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION -AC_SUBST(GENERIC_RELEASE) -AC_SUBST(GENERIC_VERSION) +AC_SUBST([GENERIC_RELEASE]) +AC_SUBST([GENERIC_VERSION]) # ---------------------------------------- # Automake configuration @@ -67,14 +67,14 @@ AC_SUBST(GENERIC_VERSION) # Do not require README file (we use README.md) AM_INIT_AUTOMAKE([foreign]) -AC_CONFIG_HEADERS(config_auto.h:config/config.h.in) +AC_CONFIG_HEADERS([config_auto.h:config/config.h.in]) AM_MAINTAINER_MODE # default conditional -AM_CONDITIONAL(T_WIN, false) -AM_CONDITIONAL(MINGW, false) -AM_CONDITIONAL(OSX, false) -AM_CONDITIONAL(GRAPHICS_DISABLED, false) +AM_CONDITIONAL([T_WIN], false) +AM_CONDITIONAL([MINGW], false) +AM_CONDITIONAL([OSX], false) +AM_CONDITIONAL([GRAPHICS_DISABLED], false) OPENCL_INC="/opt/AMDAPP/include" OPENCL_LIBS="-lOpenCL" @@ -86,115 +86,115 @@ OPENCL_LIBS="-lOpenCL" AC_CANONICAL_HOST case "${host_os}" in mingw*) - AC_DEFINE_UNQUOTED(MINGW,1,[This is a MinGW system]) - AM_CONDITIONAL(T_WIN, true) - AM_CONDITIONAL(MINGW, true) - AM_CONDITIONAL(ADD_RT, false) + AC_DEFINE_UNQUOTED([MINGW], 1, [This is a MinGW system]) + AM_CONDITIONAL([T_WIN], true) + AM_CONDITIONAL([MINGW], true) + AM_CONDITIONAL([ADD_RT], false) AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed']) ;; cygwin*) - AM_CONDITIONAL(ADD_RT, false) - AM_CONDITIONAL(T_WIN, true) + AM_CONDITIONAL([ADD_RT], false) + AM_CONDITIONAL([T_WIN], true) AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed']) ;; solaris*) LIBS="-lsocket -lnsl -lrt -lxnet" - AM_CONDITIONAL(ADD_RT, true) + AM_CONDITIONAL([ADD_RT], true) ;; *darwin*) OPENCL_LIBS="" OPENCL_INC="" - AM_CONDITIONAL(ADD_RT, false) + AM_CONDITIONAL([ADD_RT], false) ;; powerpc-*-darwin*) OPENCL_LIBS="" ;; *) # default - AM_CONDITIONAL(ADD_RT, true) + AM_CONDITIONAL([ADD_RT], true) ;; esac includedir="${includedir}/tesseract" -AC_ARG_WITH(extra-includes, - AC_HELP_STRING([--with-extra-includes=DIR], - [Define an additional directory for include files]), - [ if test -d "$withval" ; then - CFLAGS="$CFLAGS -I$withval" - else - AC_MSG_ERROR([Cannot stat directory $withval]) - fi ] ) +AC_ARG_WITH([extra-includes], + [AC_HELP_STRING([--with-extra-includes=DIR], + [Define an additional directory for include files])], + [if test -d "$withval" ; then + CFLAGS="$CFLAGS -I$withval" + else + AC_MSG_ERROR([Cannot stat directory $withval]) + fi]) -AC_ARG_WITH(extra-libraries, - AC_HELP_STRING([--with-extra-libraries=DIR], - [Define an additional directory for library files]), - [ if test -d "$withval" ; then - LDFLAGS="$LDFLAGS -L$withval" - else - AC_MSG_ERROR([Cannot stat directory $withval]) - fi ] ) +AC_ARG_WITH([extra-libraries], + [AC_HELP_STRING([--with-extra-libraries=DIR], + [Define an additional directory for library files])], + [if test -d "$withval" ; then + LDFLAGS="$LDFLAGS -L$withval" + else + AC_MSG_ERROR([Cannot stat directory $withval]) + fi]) -AC_MSG_CHECKING(--enable-graphics argument) +AC_MSG_CHECKING([--enable-graphics argument]) AC_ARG_ENABLE([graphics], [AC_HELP_STRING([--enable-graphics],[enable graphics (ScrollView) (default)]) AC_HELP_STRING([--disable-graphics],[disable graphics (ScrollView)])], [enable_graphics=$enableval], [enable_graphics="yes"]) -AC_MSG_RESULT($enable_graphics) +AC_MSG_RESULT([$enable_graphics]) if test "$enable_graphics" = "no"; then AC_DEFINE([GRAPHICS_DISABLED], [], [Disable graphics]) - AM_CONDITIONAL(GRAPHICS_DISABLED, true) + AM_CONDITIONAL([GRAPHICS_DISABLED], true) fi # Check if cube should be disabled -AC_MSG_CHECKING(whether to disable cube) +AC_MSG_CHECKING([whether to disable cube]) AC_ARG_ENABLE([cube], [AC_HELP_STRING([--disable-cube], [don't build cube support (experimental)])], [disable_cube="yes"], [disable_cube="no"]) -AC_MSG_RESULT($disable_cube) +AC_MSG_RESULT([$disable_cube]) AM_CONDITIONAL([NO_CUBE_BUILD], [test "$disable_cube" = "yes"]) if test "$disable_cube" = "yes"; then AC_SUBST([AM_CPPFLAGS], [-DNO_CUBE_BUILD]) fi # check whether to build embedded version -AC_MSG_CHECKING(--enable-embedded argument) +AC_MSG_CHECKING([--enable-embedded argument]) AC_ARG_ENABLE([embedded], [ --enable-embedded enable embedded build (default=no)], [enable_embedded=$enableval], [enable_embedded="no"]) -AC_MSG_RESULT($enable_embedded) +AC_MSG_RESULT([$enable_embedded]) AM_CONDITIONAL([EMBEDDED], [test "$enable_embedded" = "yes"]) if test "$enable_embedded" = "yes"; then AC_SUBST([AM_CPPFLAGS], [-DEMBEDDED]) fi # check whether to build OpenMP support -AM_CONDITIONAL(OPENMP, false) +AM_CONDITIONAL([OPENMP], false) AC_OPENMP AS_IF([test "x$OPENMP_CFLAGS" != "x"], - AM_CONDITIONAL([OPENMP], true) - AC_SUBST(AM_CPPFLAGS,"$OPENMP_CXXFLAGS") - AC_DEFINE([OPENMP], [], [Defined when compiled with OpenMP support]) + [AM_CONDITIONAL([OPENMP], true) + AC_SUBST([AM_CPPFLAGS], ["$OPENMP_CXXFLAGS"]) + AC_DEFINE([OPENMP], [], [Defined when compiled with OpenMP support])] ) # check whether to build opencl version -AC_MSG_CHECKING(--enable-opencl argument) +AC_MSG_CHECKING([--enable-opencl argument]) AC_ARG_ENABLE([opencl], [ --enable-opencl enable opencl build (default=no)], [enable_opencl=$enableval], [enable_opencl="no"]) -AC_MSG_RESULT($enable_opencl) +AC_MSG_RESULT([$enable_opencl]) # check for opencl header have_opencl=false -AC_CHECK_HEADERS(CL/cl.h, have_opencl=true, [ +AC_CHECK_HEADERS([CL/cl.h], [have_opencl=true], [ AC_CHECK_HEADERS(OpenCL/cl.h, have_opencl=true, have_opencl=false) ]) have_tiff=false -AC_CHECK_HEADERS(tiffio.h, have_tiff=true, have_tiff=false) +AC_CHECK_HEADERS([tiffio.h], [have_tiff=true], [have_tiff=false]) # https://lists.apple.com/archives/unix-porting/2009/Jan/msg00026.html m4_define([MY_CHECK_FRAMEWORK], @@ -207,10 +207,10 @@ m4_define([MY_CHECK_FRAMEWORK], LIBS="$save_LIBS" ]) if test "$my_cv_framework_$1"="yes"; then - AC_DEFINE(AS_TR_CPP([HAVE_FRAMEWORK_$1]),1, + AC_DEFINE(AS_TR_CPP([HAVE_FRAMEWORK_$1]), 1, [Define if you have the $1 framework]) - AS_TR_CPP([FRAMEWORK_$1])="-framework $1" - AC_SUBST(AS_TR_CPP([FRAMEWORK_$1])) + AS_TR_CPP([FRAMEWORK_$1])="-framework $1" + AC_SUBST(AS_TR_CPP([FRAMEWORK_$1])) fi] ) @@ -226,7 +226,7 @@ case "${host_os}" in fi if test "$enable_opencl" = "yes"; then if !($have_opencl_lib); then - AC_MSG_ERROR(Required OpenCL library not found!) + AC_MSG_ERROR([Required OpenCL library not found!]) fi AC_SUBST([AM_CPPFLAGS], [-DUSE_OPENCL]) OPENCL_CPPFLAGS="" @@ -235,16 +235,17 @@ case "${host_os}" in ;; *) # default - AC_CHECK_LIB(OpenCL, clGetPlatformIDs, have_opencl_lib=true, have_opencl_lib=false) + AC_CHECK_LIB([OpenCL], [clGetPlatformIDs], + [have_opencl_lib=true], [have_opencl_lib=false]) if test "$enable_opencl" = "yes"; then if !($have_opencl); then - AC_MSG_ERROR(Required OpenCL headers not found!) + AC_MSG_ERROR([Required OpenCL headers not found!]) fi if !($have_opencl_lib); then - AC_MSG_ERROR(Required OpenCL library not found!) + AC_MSG_ERROR([Required OpenCL library not found!]) fi if !($have_tiff); then - AC_MSG_ERROR(Required TIFF headers not found! Try to install libtiff-dev?? package.) + AC_MSG_ERROR([Required TIFF headers not found! Try to install libtiff-dev?? package.]) fi AC_SUBST([AM_CPPFLAGS], [-DUSE_OPENCL]) OPENCL_CPPFLAGS="-I${OPENCL_INC}" @@ -253,46 +254,46 @@ case "${host_os}" in ;; esac AM_CONDITIONAL([USE_OPENCL], [test "$enable_opencl" = "yes"]) -AC_SUBST(OPENCL_CPPFLAGS) -AC_SUBST(OPENCL_LDFLAGS) +AC_SUBST([OPENCL_CPPFLAGS]) +AC_SUBST([OPENCL_LDFLAGS]) # check whether to build tesseract with -fvisibility=hidden -fvisibility-inlines-hidden # http://gcc.gnu.org/wiki/Visibility # http://groups.google.com/group/tesseract-dev/browse_thread/thread/976645ae98189127 -AC_MSG_CHECKING(--enable-visibility argument) +AC_MSG_CHECKING([--enable-visibility argument]) AC_ARG_ENABLE([visibility], [AC_HELP_STRING([--enable-visibility],[enable experimental build with fvisibility (default=no)])], [enable_visibility=$enableval], [enable_visibility="no"]) -AC_MSG_RESULT($enable_visibility) +AC_MSG_RESULT([$enable_visibility]) AM_CONDITIONAL([VISIBILITY], [test "$enable_visibility" = "yes"]) # check whether to build multiple libraries -AC_MSG_CHECKING(--enable-multiple-libraries argument) +AC_MSG_CHECKING([--enable-multiple-libraries argument]) AC_ARG_ENABLE([multiple-libraries], [AC_HELP_STRING([--enable-multiple-libraries],[enable multiple libraries (default=no)])], [enable_mlibs=$enableval], [enable_mlibs="no"]) -AC_MSG_RESULT($enable_mlibs) +AC_MSG_RESULT([$enable_mlibs]) AM_CONDITIONAL([USING_MULTIPLELIBS], [test "$enable_mlibs" = "yes"]) # Check if tessdata-prefix is disabled -AC_MSG_CHECKING(whether to use tessdata-prefix) -AC_ARG_ENABLE(tessdata-prefix, +AC_MSG_CHECKING([whether to use tessdata-prefix]) +AC_ARG_ENABLE([tessdata-prefix], [AC_HELP_STRING([--disable-tessdata-prefix], [don't set TESSDATA-PREFIX during compile])], [tessdata_prefix="no"], [tessdata_prefix="yes"]) -AC_MSG_RESULT($tessdata_prefix) +AC_MSG_RESULT([$tessdata_prefix]) AM_CONDITIONAL([NO_TESSDATA_PREFIX], [test "$tessdata_prefix" = "no"]) # Check whether enable debuging -AC_MSG_CHECKING(whether to enable debugging) +AC_MSG_CHECKING([whether to enable debugging]) AC_ARG_ENABLE([debug], [AC_HELP_STRING([--enable-debug], [turn on debugging (default=no)])], [debug=$enableval], [debug="no"]) -AC_MSG_RESULT($debug) +AC_MSG_RESULT([$debug]) if test x"$debug" = x"yes"; then AM_CXXFLAGS="$AM_CXXFLAGS -g -Wall -Wno-uninitialized -O0 -DDEBUG" AM_CPPFLAGS="$AM_CPPFLAGS -g -Wall -Wno-uninitialized -O0 -DDEBUG" @@ -361,7 +362,7 @@ dnl turn on c++11 dnl ******************** OLD_CXXFLAGS=$CXXFLAGS -AC_MSG_CHECKING(whether compiler supports C++11) +AC_MSG_CHECKING([whether compiler supports C++11]) CXXFLAGS="$CXXFLAGS -std=c++11" snprintfworks=no AC_COMPILE_IFELSE( @@ -379,14 +380,14 @@ AC_COMPILE_IFELSE( AC_MSG_RESULT(no) has_cpp11=no ]) -AC_CHECK_FUNCS(snprintf,, [snprintfworks=yes]) +AC_CHECK_FUNCS([snprintf],, [snprintfworks=yes]) CXXFLAGS="$OLD_CXXFLAGS" # ---------------------------------------- # Check for libraries # ---------------------------------------- -AC_SEARCH_LIBS(sem_init,pthread rt) +AC_SEARCH_LIBS([sem_init], [pthread rt]) # ---------------------------------------- @@ -396,8 +397,8 @@ AC_SEARCH_LIBS(sem_init,pthread rt) AC_HEADER_STDC AC_HEADER_TIME AC_HEADER_SYS_WAIT -AC_CHECK_HEADERS(sys/ipc.h sys/shm.h) -AC_CHECK_HEADERS(limits.h malloc.h) +AC_CHECK_HEADERS([sys/ipc.h sys/shm.h]) +AC_CHECK_HEADERS([limits.h malloc.h]) # Enable use of system-defined bool type if available: AC_HEADER_STDBOOL @@ -411,18 +412,18 @@ AC_CHECK_FUNCS([getline]) # Checks for typedefs, structures, and compiler characteristics. # ---------------------------------------- -AC_CHECK_TYPES(wchar_t,,,[#include "wchar.h"]) -AC_CHECK_TYPES(long long int) -AC_CHECK_TYPES(off_t,,,[#include "sys/types.h"]) -AC_CHECK_TYPES(mbstate_t,,,[#include "wchar.h"]) +AC_CHECK_TYPES([wchar_t],,, [#include "wchar.h"]) +AC_CHECK_TYPES([long long int]) +AC_CHECK_TYPES([off_t],,, [#include "sys/types.h"]) +AC_CHECK_TYPES([mbstate_t],,, [#include "wchar.h"]) # ---------------------------------------- # Test auxiliary packages # ---------------------------------------- # Check location of leptonica/liblept headers. -AC_MSG_CHECKING(for leptonica) -AC_ARG_VAR(LIBLEPT_HEADERSDIR,[Leptonica headers directory]) +AC_MSG_CHECKING([for leptonica]) +AC_ARG_VAR([LIBLEPT_HEADERSDIR], [Leptonica headers directory]) have_lept=no if test "$LIBLEPT_HEADERSDIR" = "" ; then @@ -441,7 +442,8 @@ done if test "$have_lept" = yes ; then AC_MSG_RESULT(yes) - AC_CHECK_LIB(lept,l_generateCIDataForPdf,[], AC_MSG_ERROR([leptonica library with pdf support (>= 1.71) is missing])) + AC_CHECK_LIB([lept], [l_generateCIDataForPdf], [], + [AC_MSG_ERROR([leptonica library with pdf support (>= 1.71) is missing])]) else AC_MSG_ERROR([leptonica not found]) fi @@ -457,33 +459,33 @@ int i = 0; [AC_MSG_RESULT(yes)], [AC_MSG_FAILURE([leptonica 1.71 or higher is required])]) -AM_CONDITIONAL(ENABLE_TRAINING, true) +AM_CONDITIONAL([ENABLE_TRAINING], true) # Check location of icu headers have_icu=false -AC_CHECK_HEADERS(unicode/uchar.h, have_icu=true, have_icu=false) +AC_CHECK_HEADERS([unicode/uchar.h], [have_icu=true], [have_icu=false]) if !($have_icu); then - AC_MSG_WARN(Training tools WILL NOT be built because of missing icu library.) - AC_MSG_WARN(Try to install libicu-devel package.) - AM_CONDITIONAL(ENABLE_TRAINING, false) + AC_MSG_WARN([Training tools WILL NOT be built because of missing icu library.]) + AC_MSG_WARN([Try to install libicu-devel package.]) + AM_CONDITIONAL([ENABLE_TRAINING], false) fi # Check location of pango headers -PKG_CHECK_MODULES(pango, pango, have_pango=true, have_pango=false) +PKG_CHECK_MODULES([pango], [pango], [have_pango=true], [have_pango=false]) if !($have_pango); then - AC_MSG_WARN(Training tools WILL NOT be built because of missing pango library.) - AC_MSG_WARN(Try to install libpango1.0-dev package.) - AM_CONDITIONAL(ENABLE_TRAINING, false) + AC_MSG_WARN([Training tools WILL NOT be built because of missing pango library.]) + AC_MSG_WARN([Try to install libpango1.0-dev package.]) + AM_CONDITIONAL([ENABLE_TRAINING], false) else CPPFLAGS="$CPPFLAGS $pango_CFLAGS" fi # Check location of cairo headers -PKG_CHECK_MODULES(cairo, cairo, have_cairo=true, have_cairo=false) +PKG_CHECK_MODULES([cairo], [cairo], [have_cairo=true], [have_cairo=false]) if !($have_cairo); then - AC_MSG_WARN(Training tools WILL NOT be built because of missing cairo library.) - AC_MSG_WARN(Try to install libcairo-dev?? package.) - AM_CONDITIONAL(ENABLE_TRAINING, false) + AC_MSG_WARN([Training tools WILL NOT be built because of missing cairo library.]) + AC_MSG_WARN([Try to install libcairo-dev?? package.]) + AM_CONDITIONAL([ENABLE_TRAINING], false) else CPPFLAGS="$CPPFLAGS $cairo_CFLAGS" fi @@ -508,8 +510,8 @@ if test "x$has_cpp11" = "xyes"; then ;; esac else - AC_MSG_WARN(Training tools WILL NOT be built because of missing c++11 support.) - AM_CONDITIONAL(ENABLE_TRAINING, false) + AC_MSG_WARN([Training tools WILL NOT be built because of missing c++11 support.]) + AM_CONDITIONAL([ENABLE_TRAINING], false) fi # ---------------------------------------- @@ -518,31 +520,31 @@ fi # Output files AC_CONFIG_FILES([Makefile tesseract.pc]) -AC_CONFIG_FILES(api/Makefile) -AC_CONFIG_FILES(ccmain/Makefile) -AC_CONFIG_FILES(opencl/Makefile) -AC_CONFIG_FILES(ccstruct/Makefile) -AC_CONFIG_FILES(ccutil/Makefile) -AC_CONFIG_FILES(classify/Makefile) -AC_CONFIG_FILES(cube/Makefile) -AC_CONFIG_FILES(cutil/Makefile) -AC_CONFIG_FILES(dict/Makefile) -AC_CONFIG_FILES(neural_networks/runtime/Makefile) -AC_CONFIG_FILES(textord/Makefile) -AC_CONFIG_FILES(viewer/Makefile) -AC_CONFIG_FILES(wordrec/Makefile) -AC_CONFIG_FILES(tessdata/Makefile) -AC_CONFIG_FILES(tessdata/configs/Makefile) -AC_CONFIG_FILES(tessdata/tessconfigs/Makefile) -AC_CONFIG_FILES(testing/Makefile) -AC_CONFIG_FILES(java/Makefile) -AC_CONFIG_FILES(java/com/Makefile) -AC_CONFIG_FILES(java/com/google/Makefile) -AC_CONFIG_FILES(java/com/google/scrollview/Makefile) -AC_CONFIG_FILES(java/com/google/scrollview/events/Makefile) -AC_CONFIG_FILES(java/com/google/scrollview/ui/Makefile) -AC_CONFIG_FILES(doc/Makefile) -AM_COND_IF([ENABLE_TRAINING], AC_CONFIG_FILES(training/Makefile)) +AC_CONFIG_FILES([api/Makefile]) +AC_CONFIG_FILES([ccmain/Makefile]) +AC_CONFIG_FILES([opencl/Makefile]) +AC_CONFIG_FILES([ccstruct/Makefile]) +AC_CONFIG_FILES([ccutil/Makefile]) +AC_CONFIG_FILES([classify/Makefile]) +AC_CONFIG_FILES([cube/Makefile]) +AC_CONFIG_FILES([cutil/Makefile]) +AC_CONFIG_FILES([dict/Makefile]) +AC_CONFIG_FILES([neural_networks/runtime/Makefile]) +AC_CONFIG_FILES([textord/Makefile]) +AC_CONFIG_FILES([viewer/Makefile]) +AC_CONFIG_FILES([wordrec/Makefile]) +AC_CONFIG_FILES([tessdata/Makefile]) +AC_CONFIG_FILES([tessdata/configs/Makefile]) +AC_CONFIG_FILES([tessdata/tessconfigs/Makefile]) +AC_CONFIG_FILES([testing/Makefile]) +AC_CONFIG_FILES([java/Makefile]) +AC_CONFIG_FILES([java/com/Makefile]) +AC_CONFIG_FILES([java/com/google/Makefile]) +AC_CONFIG_FILES([java/com/google/scrollview/Makefile]) +AC_CONFIG_FILES([java/com/google/scrollview/events/Makefile]) +AC_CONFIG_FILES([java/com/google/scrollview/ui/Makefile]) +AC_CONFIG_FILES([doc/Makefile]) +AM_COND_IF([ENABLE_TRAINING], [AC_CONFIG_FILES(training/Makefile)]) AC_OUTPUT # Final message @@ -557,17 +559,16 @@ echo "$ sudo make install" # echo "$ sudo make install-langs" AM_COND_IF([ENABLE_TRAINING], - echo "" - echo "Training tools can be build and installed (after building of $PACKAGE_NAME) with:" - echo "" - echo "$ make training" - echo "$ sudo make training-install" - echo "" -, - echo "" - echo "You can not build training tools because of missing dependency." - echo "Check configure output for details." - echo "" + [echo "" + echo "Training tools can be build and installed (after building of $PACKAGE_NAME) with:" + echo "" + echo "$ make training" + echo "$ sudo make training-install" + echo ""], + [echo "" + echo "You can not build training tools because of missing dependency." + echo "Check configure output for details." + echo ""] ) # ---------------------------------------- From d19c522e0c33b5813aba0ca86acc4e6abda8cdb8 Mon Sep 17 00:00:00 2001 From: Amit Dovev Date: Sat, 28 May 2016 22:43:44 +0300 Subject: [PATCH 19/30] Create CONTRIBUTING.md --- CONTRIBUTING.md | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..aaddd40c5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,70 @@ +# Contributing + +**Please follow these rules and advice**. + +## Creating an Issue or Using the Forum + +If you think you found a bug in Tesseract, please create an issue. + +Use the [users mailing-list](https://groups.google.com/d/forum/tesseract-ocr) instead of creating an Issue if ... +* You have problems using Tesseract and need some help. +* You have problems installing the software. +* You are not satisfied with the accuracy of the OCR, and want to ask how you can improve it. Note: You should first read the [ImproveQuality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) wiki page. +* You are trying to train Tesseract and you have a problem and/or want to ask a question about the traing process. Note: You should first read the **official** guides [[1]](https://github.com/tesseract-ocr/tesseract/wiki/tesstrain.sh) or [[2]](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) found in the project wiki. +* You have a general question. + +An issue should only be reported if the platform you are using is one of these: + * Linux (but not a version that is more than 4 years old) + * Windows (Windows 7 or newer version) + * Mac (last 3 releases) + +For older versions or other operating systems, use the Tesseract forum. + +When creating an issue, please report your operating system, including its specific version: "Ubuntu 16.04", "Windows 10", "OS X 10.11" etc. + +Search through open and closed issues to see if similar issue has been reported already (and sometimes also has been solved). + +Similary, before you post your question in the forum, search through past threads to see if similar question has been asked already. + +Read the [wiki](https://github.com/tesseract-ocr/tesseract/wiki) before you report your issue or ask a question in the forum. + +Only report an issue in the latest official release. Optionally, try to check if the issue is not already solved in the latest snapshot in the git repository. + +Make sure you are able to replicate the problem with Tesseract command line program. For external programs that use Tesseract (including wrappers and your own program, if you are developer), report the issue to the developers of that software if it's possible. You can also try to find help in the Tesseract forum. + +Each version of Tesseract has its own language data you need to obtain. You **must** obtain and install trained data for English (eng) and osd. Verify that Tesseract knows about these two files (and other trained data you installed) with this command: +`tesseract --list-langs`. + +Post example files to demonstrate the problem. +BUT don't post files with private info (about yourself or others). + +When attaching a file to the issue report / forum ... + * Do not post a file larger than 20 MB. + * GitHub supports only few file name extensions like `.png` or `.txt`. If GitHub rejects your files, you can compress them using a program that can produce a zip archive and then load this zip file to GitHub. + +Do not attach programs or libraries to your issues/posts. + +For large files or for programs, add a link to a iocation where they can be downloaded (your site, Git repo, Google Drive, Dropbox etc.) + +Attaching a multi-page TIFF image is useful only if you have problem with multi-page functionality, otherwise attach only one or a few single page images. + +Copy the error message from the console instead of sending a screenshot of it. + +Use the toolbar above the comment edit area to format your comment. + +Add three backticks before and after a code sample or output of a command to format it (The 'Insert code' button can help you doing it). + +If your comment includes a code sample or output of a command that exceeds ~25 lines, post it as attached text file (filename.txt). + +Use 'Preview' before you send your issue. Read it again before sending. + +Note that most of the people that respond to issues and answer questions are either other 'regular' users or **volunteers** developers. Please be nice to them :-) + +The [tesseract developers](http://groups.google.com/group/tesseract-dev/) forum should be used to discuss Tesseract development: bug fixes, enhancements, add-ons for Tesseract. + +Sometimes you will not get a respond to your issue or question. We apologize in advance! Please don't take it personally. There can be many reasons for this, including: time limits, no one knows the answer (at least not the ones that are available at that time) or just that +your question has been asked (and has been answered) many times before... + +## For Developers: Creating a Pull Request + +TBD From 99832f306b66d3db314ad615bcca2b50398238cd Mon Sep 17 00:00:00 2001 From: Amit Dovev Date: Sun, 29 May 2016 13:27:33 +0300 Subject: [PATCH 20/30] CONTRIBUTING.md: Fix a typo --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index aaddd40c5..0d29fc458 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ Use the [users mailing-list](https://groups.google.com/d/forum/tesseract-ocr) in * You have problems using Tesseract and need some help. * You have problems installing the software. * You are not satisfied with the accuracy of the OCR, and want to ask how you can improve it. Note: You should first read the [ImproveQuality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) wiki page. -* You are trying to train Tesseract and you have a problem and/or want to ask a question about the traing process. Note: You should first read the **official** guides [[1]](https://github.com/tesseract-ocr/tesseract/wiki/tesstrain.sh) or [[2]](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) found in the project wiki. +* You are trying to train Tesseract and you have a problem and/or want to ask a question about the training process. Note: You should first read the **official** guides [[1]](https://github.com/tesseract-ocr/tesseract/wiki/tesstrain.sh) or [[2]](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) found in the project wiki. * You have a general question. An issue should only be reported if the platform you are using is one of these: From 3dcb5c2488c81a06b25f88af4b9760aff45bfe61 Mon Sep 17 00:00:00 2001 From: scottb89 Date: Sun, 5 Jun 2016 17:38:43 +0300 Subject: [PATCH 21/30] Bypass Leptonica error message with pixGenHalftoneMask() Fixes #292 --- textord/imagefind.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/textord/imagefind.cpp b/textord/imagefind.cpp index 05047cae0..c119e69f9 100644 --- a/textord/imagefind.cpp +++ b/textord/imagefind.cpp @@ -67,11 +67,21 @@ Pix* ImageFind::FindImages(Pix* pix) { if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); pixDisplayWrite(pixr, textord_tabfind_show_images); // Get the halftone mask directly from Leptonica. + // + // Leptonica will print an error message and return NULL if we call + // pixGenHalftoneMask(pixr, NULL, ...) with too small image, so we + // want to bypass that. + if (pixGetWidth(pixr) < kMinImageFindSize || + pixGetHeight(pixr) < kMinImageFindSize) { + pixDestroy(&pixr); + return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + } l_int32 ht_found = 0; Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found, textord_tabfind_show_images); From c3a7fab349597fdae48ec20e229700611225f711 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Tue, 14 Jun 2016 14:35:05 +0300 Subject: [PATCH 22/30] Replace asserts with tprintf() and exit(1) Asserts should not be used for missing or invalid input in the command line! This leads to a bad UX. --- training/text2image.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/training/text2image.cpp b/training/text2image.cpp index 040ded681..8ab22ffc4 100644 --- a/training/text2image.cpp +++ b/training/text2image.cpp @@ -423,11 +423,20 @@ int main(int argc, char** argv) { } return EXIT_SUCCESS; } + // Check validity of input flags. - ASSERT_HOST_MSG(!FLAGS_text.empty(), "Text file missing!\n"); - ASSERT_HOST_MSG(!FLAGS_outputbase.empty(), "Output file missing!\n"); - ASSERT_HOST_MSG(FLAGS_render_ngrams || FLAGS_unicharset_file.empty(), - "Use --unicharset_file only if --render_ngrams is set.\n"); + if (FLAGS_text.empty()) { + tprintf("'--text' option is missing!\n"); + exit(1); + } + if (FLAGS_outputbase.empty()) { + tprintf("'--outputbase' option is missing!\n"); + exit(1); + } + if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) { + tprintf("Use '--unicharset_file' only if '--render_ngrams' is set.\n"); + exit(1); + } if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) { string pango_name; From 034d666e7af5129cd98ed2c5b562a4449f659b21 Mon Sep 17 00:00:00 2001 From: Amit Dovev Date: Thu, 16 Jun 2016 12:10:53 +0300 Subject: [PATCH 23/30] Replace use of TLOG_FATAL() with tprintf() and exit(1) (#349) Asserts should not be used for missing or invalid input in the command line! This leads to a bad UX. --- training/text2image.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/training/text2image.cpp b/training/text2image.cpp index 8ab22ffc4..246ca7d07 100644 --- a/training/text2image.cpp +++ b/training/text2image.cpp @@ -441,9 +441,10 @@ int main(int argc, char** argv) { if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) { string pango_name; if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) { - tprintf("Could not find font named %s. Pango suggested font %s\n", + tprintf("Could not find font named %s. Pango suggested font %s\n" + "Please correct --font arg.\n", FLAGS_font.c_str(), pango_name.c_str()); - TLOG_FATAL("Please correct --font arg."); + exit(1); } } @@ -487,7 +488,8 @@ int main(int argc, char** argv) { render.set_gravity_hint_strong(true); render.set_render_fullwidth_latin(true); } else { - TLOG_FATAL("Invalid writing mode : %s\n", FLAGS_writing_mode.c_str()); + tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str()); + exit(1); } string src_utf8; @@ -512,8 +514,9 @@ int main(int argc, char** argv) { UNICHARSET unicharset; if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() && !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) { - TLOG_FATAL("Failed to load unicharset from file %s\n", + tprintf("Failed to load unicharset from file %s\n", FLAGS_unicharset_file.c_str()); + exit(1); } // If we are rendering ngrams that will be OCRed later, shuffle them so that From b1c921b59e6af1e68fd026cbc30540929b818552 Mon Sep 17 00:00:00 2001 From: Marco Atzeri Date: Fri, 17 Jun 2016 15:52:01 +0300 Subject: [PATCH 24/30] Fix Cygwin compatibility --- api/Makefile.am | 2 +- ccutil/ambigs.cpp | 4 ++-- configure.ac | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/api/Makefile.am b/api/Makefile.am index cb9990df3..9d20919b2 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -67,7 +67,7 @@ libtesseract_la_LIBADD += ../cube/libtesseract_cube.la \ ../neural_networks/runtime/libtesseract_neural.la endif -libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) +libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) -no-undefined bin_PROGRAMS = tesseract tesseract_SOURCES = tesseractmain.cpp diff --git a/ccutil/ambigs.cpp b/ccutil/ambigs.cpp index 7620e958b..15a755de8 100644 --- a/ccutil/ambigs.cpp +++ b/ccutil/ambigs.cpp @@ -24,13 +24,13 @@ #include "helpers.h" #include "universalambigs.h" -#if defined _WIN32 || defined(__CYGWIN__) +#if defined _WIN32 #ifndef __GNUC__ #define strtok_r strtok_s #else #include "strtok_r.h" #endif /* __GNUC__ */ -#endif /* _WIN32 __CYGWIN__*/ +#endif /* _WIN32 */ namespace tesseract { diff --git a/configure.ac b/configure.ac index 536ac3ca1..4bda6c091 100644 --- a/configure.ac +++ b/configure.ac @@ -94,7 +94,6 @@ case "${host_os}" in ;; cygwin*) AM_CONDITIONAL([ADD_RT], false) - AM_CONDITIONAL([T_WIN], true) AC_SUBST([AM_LDFLAGS], ['-Wl,-no-undefined -Wl,--as-needed']) ;; solaris*) From c2574609e4987fafa68215ccf6dea3d244497faf Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 11 Jun 2016 22:40:00 +0200 Subject: [PATCH 25/30] Makefile: Fix phony training target This fixes wrong behaviour of "make training" when dependencies for training were incomplete. Signed-off-by: Stefan Weil --- Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index e328c58dc..a4aa1dd91 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,7 +14,7 @@ training: @echo "Need to reconfigure project, so there are no errors" endif -.PHONY: install-langs ScrollView.jar install-jars $(TRAINING_SUBDIR) +.PHONY: install-langs ScrollView.jar install-jars training SUBDIRS = ccutil viewer cutil opencl ccstruct dict classify wordrec textord if !NO_CUBE_BUILD From 65504c8cd2300b0dd7c9352e66d0a69a5918f340 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 19 Jun 2016 11:59:58 +0300 Subject: [PATCH 26/30] Fix Cygwin compatibility - Part II --- training/pango_font_info.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp index b3a3d7bf7..b2178b192 100644 --- a/training/pango_font_info.cpp +++ b/training/pango_font_info.cpp @@ -22,11 +22,18 @@ #include "config_auto.h" #endif -#if (defined MINGW) || (defined __CYGWIN__) +#if (defined __MINGW32__) || (defined __CYGWIN__) // workaround for stdlib.h and putenv #undef __STRICT_ANSI__ +#endif + +#if (defined __MINGW32__) #include "strcasestr.h" -#endif // MINGW/Cygwin +#else +// needed for strcasestr in string.h +#define _GNU_SOURCE +#endif + #include #include #include From 724fb894ac51c8092ccbd3137584fc5cdd665bb3 Mon Sep 17 00:00:00 2001 From: amitdo Date: Sun, 19 Jun 2016 13:40:17 +0300 Subject: [PATCH 27/30] Check that pango's suggested font name is not an empty string On msys2 pango seems to always returns empty string for the suggested font. It's a good idea to check that the string is not empty before printing it - on all platforms. --- training/text2image.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/training/text2image.cpp b/training/text2image.cpp index 246ca7d07..a8bb21ab4 100644 --- a/training/text2image.cpp +++ b/training/text2image.cpp @@ -441,9 +441,11 @@ int main(int argc, char** argv) { if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) { string pango_name; if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) { - tprintf("Could not find font named %s. Pango suggested font %s\n" - "Please correct --font arg.\n", - FLAGS_font.c_str(), pango_name.c_str()); + tprintf("Could not find font named %s.", FLAGS_font.c_str()); + if (!pango_name.empty()) { + tprintf("Pango suggested font %s.\n", pango_name.c_str()); + } + tprintf("Please correct --font arg.\n"); exit(1); } } From ed053aab943619eb2e8ad1d29cc57684488da82f Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 19 Jun 2016 22:38:03 +0200 Subject: [PATCH 28/30] =?UTF-8?q?Fix=20Cygwin=20compatibility=20=E2=80=93?= =?UTF-8?q?=20part=20III?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 65504c8cd2300b0dd7c9352e66d0a69a5918f340 misplaced the #endif. The definition of _GNU_SOURCE is only needed for Cygwin. Defining _GNU_SOURCE on Linux results in compiler warnings because this macro is already defined by the compiler. Fix this by moving the #endif to the right place. In addition the code for Cygwin is made more robust: If a future Cygwin compiler defines _GNU_SOURCE, too, the code will still work. Signed-off-by: Stefan Weil --- training/pango_font_info.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp index b2178b192..641d537d0 100644 --- a/training/pango_font_info.cpp +++ b/training/pango_font_info.cpp @@ -25,14 +25,14 @@ #if (defined __MINGW32__) || (defined __CYGWIN__) // workaround for stdlib.h and putenv #undef __STRICT_ANSI__ -#endif #if (defined __MINGW32__) #include "strcasestr.h" -#else +#elif !defined(_GNU_SOURCE) // needed for strcasestr in string.h #define _GNU_SOURCE #endif +#endif #include #include From 29d971eb0c2fa96aebe7e1ef8f90c72fade7f43d Mon Sep 17 00:00:00 2001 From: Steffen Rehberg Date: Sat, 25 Jun 2016 12:40:28 +0200 Subject: [PATCH 29/30] Fix text box width/hight calculation In Tesseract's coordinate system, width is just right - left, cf. slide #2 of github.com/tesseract-ocr/docs/blob/master/das_tutorial2016/2ArchitectureAndDataStructures.pdf --- api/baseapi.cpp | 4 ++-- api/renderer.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 1bf1b43d0..94a772c73 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1431,8 +1431,8 @@ static void AddBoxToTSV(const PageIterator *it, it->BoundingBox(level, &left, &top, &right, &bottom); hocr_str->add_str_int("\t", left); hocr_str->add_str_int("\t", top); - hocr_str->add_str_int("\t", right - left + 1); - hocr_str->add_str_int("\t", bottom - top + 1); + hocr_str->add_str_int("\t", right - left); + hocr_str->add_str_int("\t", bottom - top); } diff --git a/api/renderer.cpp b/api/renderer.cpp index 2d0dc6710..4a88a2460 100644 --- a/api/renderer.cpp +++ b/api/renderer.cpp @@ -196,7 +196,7 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { } /********************************************************************** - * HOcr Text Renderer interface implementation + * TSV Text Renderer interface implementation **********************************************************************/ TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") { From c0fcce2f8f1308a808f521b1f501103e153061df Mon Sep 17 00:00:00 2001 From: Steffen Rehberg Date: Mon, 27 Jun 2016 21:58:29 +0200 Subject: [PATCH 30/30] Fix text box width/hight calculation (addition) This occurrence was should have been included in commit 29d971e but was overlooked by error. --- api/baseapi.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 94a772c73..11aeb9166 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1704,8 +1704,8 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str.add_str_int("\t", word_num); tsv_str.add_str_int("\t", left); tsv_str.add_str_int("\t", top); - tsv_str.add_str_int("\t", right - left + 1); - tsv_str.add_str_int("\t", bottom - top + 1); + tsv_str.add_str_int("\t", right - left); + tsv_str.add_str_int("\t", bottom - top); tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD)); tsv_str += "\t";