Applied patch to refix issue 331

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1064 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2025-06-07 18:02:40 +08:00 · 2014-04-23 23:12:53 +00:00 · 2014-04-23 23:12:53 +00:00 · 3a5f699013
commit 3a5f699013
parent f3176c2eb5
11 changed files with 88 additions and 45 deletions
--- a/ccstruct/blread.cpp
+++ b/ccstruct/blread.cpp
@ -50,9 +50,8 @@ bool read_unlv_file(                    //print list of sides
  name += UNLV_EXT;              //add extension
  if ((pdfp = fopen (name.string (), "rb")) == NULL) {
    return false;                //didn't read one
-  }
+  } else {
-  else {
+    while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
    while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
                                 //make rect block
      block = new BLOCK (name.string (), TRUE, 0, 0,
                         (inT16) x, (inT16) (ysize - y - height),
--- a/ccutil/Makefile.am
+++ b/ccutil/Makefile.am
@ -18,8 +18,9 @@ include_HEADERS = \
 noinst_HEADERS = \
    ambigs.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \
    elst.h genericheap.h globaloc.h hashfn.h indexmapbidi.h kdpair.h lsterr.h \
-    nwmain.h object_cache.h qrsequence.h secname.h sorthelper.h stderr.h tessdatamanager.h \
+    nwmain.h object_cache.h qrsequence.h secname.h sorthelper.h stderr.h \
-     tprintf.h unicity_table.h unicodes.h universalambigs.h
+    scanutils.h tessdatamanager.h tprintf.h unicity_table.h unicodes.h \
    universalambigs.h
 if !USING_MULTIPLELIBS
 noinst_LTLIBRARIES = libtesseract_ccutil.la
@ -34,15 +35,11 @@ libtesseract_ccutil_la_SOURCES = \
    elst2.cpp elst.cpp errcode.cpp \
    globaloc.cpp indexmapbidi.cpp \
    mainblk.cpp memry.cpp \
-    serialis.cpp strngs.cpp \
+    serialis.cpp strngs.cpp scanutils.cpp \
    tessdatamanager.cpp tprintf.cpp \
    unichar.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \
    params.cpp universalambigs.cpp
 if EMBEDDED
 include_HEADERS += scanutils.h
 libtesseract_ccutil_la_SOURCES += scanutils.cpp
 endif
 if MINGW
 AM_CPPFLAGS += -I$(top_srcdir)/vs2008/port -DWINDLLNAME=\"lib@GENERIC_LIBRARY_NAME@\"
--- a/ccutil/scanutils.cpp
+++ b/ccutil/scanutils.cpp
@ -19,12 +19,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifdef EMBEDDED
 #include <ctype.h>
 #include <stdarg.h>
 #include <stddef.h>
 #include <inttypes.h>
 #include <string.h>
 #include <limits.h>
 #include <stdio.h>
@ -35,6 +32,11 @@
 #include "scanutils.h"
 #include "tprintf.h"
 // workaround for "'off_t' was not declared in this scope" with -std=c++11
 #ifndef off_t
 typedef long off_t;
 #endif  // off_t
 enum Flags {
  FL_SPLAT  = 0x01,   // Drop the value, do not assign
  FL_INV    = 0x02,   // Character-set with inverse
@ -215,19 +217,48 @@ double strtofloat(const char* s)
  return minus ? -f : f;
 }
 static int tvfscanf(FILE* stream, const char *format, va_list ap);
 int tfscanf(FILE* stream, const char *format, ...)
 {
  va_list ap;
  int rv;
  va_start(ap, format);
  rv = tvfscanf(stream, format, ap);
  va_end(ap);
  return rv;
 }
 #ifdef EMBEDDED
 int fscanf(FILE* stream, const char *format, ...)
 {
  va_list ap;
  int rv;
  va_start(ap, format);
-  rv = vfscanf(stream, format, ap);
+  rv = tvfscanf(stream, format, ap);
  va_end(ap);
  return rv;
 }
-int vfscanf(FILE* stream, const char *format, va_list ap)
+int vfscanf(FILE* stream, const char *format, ...)
 {
  va_list ap;
  int rv;
  va_start(ap, format);
  rv = tvfscanf(stream, format, ap);
  va_end(ap);
  return rv;
 }
 #endif
 static int tvfscanf(FILE* stream, const char *format, va_list ap)
 {
  const char *p = format;
  char ch;
@ -250,7 +281,7 @@ int vfscanf(FILE* stream, const char *format, va_list ap)
  enum Bail bail = BAIL_NONE;
  int sign;
  int converted = 0;    // Successful conversions
-  unsigned long matchmap[((1 << CHAR_BIT)+(LongBit()-1))/LongBit()];
+  unsigned long matchmap[((1 << CHAR_BIT)+(CHAR_BIT * sizeof(long) - 1))/ (CHAR_BIT * sizeof(long))];
  int matchinv = 0;   // Is match map inverted?
  unsigned char range_start = 0;
  off_t start_off = ftell(stream);
@ -278,7 +309,8 @@ int vfscanf(FILE* stream, const char *format, va_list ap)
            flags |= FL_SPLAT;
          break;
-          case '0' ... '9':
+          case 0: case 1: case 2: case 3: case 4:
          case 5: case 6: case 7: case 8: case 9:
            width = (ch-'0');
            state = ST_WIDTH;
            flags |= FL_WIDTH;
@ -542,6 +574,7 @@ int vfscanf(FILE* stream, const char *format, va_list ap)
  return converted;
 }
 #ifdef EMBEDDED
 int creat(const char *pathname, mode_t mode)
 {
  return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode);
--- a/ccutil/scanutils.h
+++ b/ccutil/scanutils.h
@ -19,14 +19,25 @@
 #ifndef TESSERACT_CCUTIL_SCANUTILS_H_
 #define TESSERACT_CCUTIL_SCANUTILS_H_
 #ifdef EMBEDDED
 #include <stdint.h>
 #include <stddef.h>
 #include <stdio.h>
 //#include <klibc/extern.h>
 #include <sys/stat.h>
 /**
 * fscanf variant to ensure correct reading regardless of locale.
 *
 * tfscanf parse a file stream according to the given format. See the fscanf
 * manpage for more information, as this function attempts to mimic its
 * behavior.
 *
 * @note Note that scientific floating-point notation is not supported.
 *
 */
 int tfscanf(FILE* stream, const char *format, ...);
 #ifdef EMBEDDED
 // Attempts to parse the given file stream s as an integer of the base
 // 'base'. Returns the first successfully parsed integer as a uintmax_t, or
 // 0, if none was found.
--- a/classify/clusttool.cpp
+++ b/classify/clusttool.cpp
@ -46,7 +46,7 @@ History:	6/6/89, DSJ, Created.
 uinT16 ReadSampleSize(FILE *File) {
  int SampleSize;
-  if ((fscanf (File, "%d", &SampleSize) != 1) ||
+  if ((tfscanf(File, "%d", &SampleSize) != 1) ||
    (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
    DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
  return (SampleSize);
@ -72,7 +72,7 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
  for (i = 0; i < N; i++) {
-    if (fscanf (File, "%s", Token) != 1)
+    if (tfscanf(File, "%s", Token) != 1)
      DoError (ILLEGALCIRCULARSPEC,
        "Illegal circular/linear specification");
    if (Token[0] == 'c')
@ -80,14 +80,14 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
    else
      ParamDesc[i].Circular = FALSE;
-    if (fscanf (File, "%s", Token) != 1)
+    if (tfscanf(File, "%s", Token) != 1)
      DoError (ILLEGALESSENTIALSPEC,
        "Illegal essential/non-essential spec");
    if (Token[0] == 'e')
      ParamDesc[i].NonEssential = FALSE;
    else
      ParamDesc[i].NonEssential = TRUE;
-    if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) !=
+    if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) !=
      2)
      DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
    ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
@ -119,7 +119,7 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
  int SampleCount;
  int i;
-  if ((Status = fscanf (File, "%s", Token)) == 1) {
+  if ((Status = tfscanf(File, "%s", Token)) == 1) {
    Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
    Proto->Cluster = NULL;
    if (Token[0] == 's')
@ -129,7 +129,7 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
    Proto->Style = ReadProtoStyle (File);
-    if ((fscanf (File, "%d", &SampleCount) != 1) || (SampleCount < 0))
+    if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
      DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
    Proto->NumSamples = SampleCount;
@ -173,7 +173,7 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
        Proto->Distrib =
          (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
        for (i = 0; i < N; i++) {
-          if (fscanf (File, "%s", Token) != 1)
+          if (tfscanf(File, "%s", Token) != 1)
            DoError (ILLEGALDISTRIBUTION,
              "Illegal prototype distribution");
          switch (Token[0]) {
@ -245,7 +245,7 @@ PROTOSTYLE ReadProtoStyle(FILE *File) {
  char Token[TOKENSIZE];
  PROTOSTYLE Style;
-  if (fscanf (File, "%s", Token) != 1)
+  if (tfscanf(File, "%s", Token) != 1)
    DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
  switch (Token[0]) {
    case 's':
@ -290,7 +290,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
    Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
  for (i = 0; i < N; i++) {
-    NumFloatsRead = fscanf(File, "%f", &(Buffer[i]));
+    NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
    if (NumFloatsRead != 1) {
      if ((NumFloatsRead == EOF) && (i == 0)) {
        Efree(Buffer);
--- a/classify/cutoffs.cpp
+++ b/classify/cutoffs.cpp
@ -68,7 +68,7 @@ void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
    Cutoffs[i] = MAX_CUTOFF;
  while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
-         fscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
+         tfscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
                Class, &Cutoff) == 2) {
    if (strcmp(Class, "NULL") == 0) {
      ClassId = unicharset.unichar_to_id(" ");
--- a/classify/featdefs.cpp
+++ b/classify/featdefs.cpp
@ -265,13 +265,13 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
  CHAR_DESC CharDesc;
  int Type;
-  if (fscanf (File, "%d", &NumSetsToRead) != 1 ||
+  if (tfscanf(File, "%d", &NumSetsToRead) != 1 ||
    NumSetsToRead < 0 || NumSetsToRead > FeatureDefs.NumFeatureTypes)
    DoError (ILLEGAL_NUM_SETS, "Illegal number of feature sets");
  CharDesc = NewCharDescription(FeatureDefs);
  for (; NumSetsToRead > 0; NumSetsToRead--) {
-    fscanf (File, "%s", ShortName);
+    tfscanf(File, "%s", ShortName);
    Type = ShortNameToFeatureType(FeatureDefs, ShortName);
    CharDesc->FeatureSets[Type] =
      ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]);
--- a/classify/mastertrainer.cpp
+++ b/classify/mastertrainer.cpp
@ -38,6 +38,8 @@
 #include "shapetable.h"
 #include "svmnode.h"
 #include "scanutils.h"
 namespace tesseract {
 // Constants controlling clustering. With a low kMinClusteredShapes and a high
@ -360,8 +362,8 @@ bool MasterTrainer::LoadFontInfo(const char* filename) {
    fontinfo.name = font_name;
    fontinfo.properties = 0;
    fontinfo.universal_id = 0;
-    if (fscanf(fp, "%1024s %i %i %i %i %i\n", font_name,
+    if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name,
-               &italic, &bold, &fixed, &serif, &fraktur) != 6)
+                &italic, &bold, &fixed, &serif, &fraktur) != 6)
      continue;
    fontinfo.properties =
        (italic << 0) +
@ -397,7 +399,7 @@ bool MasterTrainer::LoadXHeights(const char* filename) {
  int total_xheight = 0;
  int xheight_count = 0;
  while (!feof(f)) {
-    if (fscanf(f, "%1023s %d\n", buffer, &xht) != 2)
+    if (tfscanf(f, "%1023s %d\n", buffer, &xht) != 2)
      continue;
    buffer[1023] = '\0';
    fontinfo.name = buffer;
@ -441,13 +443,13 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
  char uch[UNICHAR_LEN];
  char kerned_uch[UNICHAR_LEN];
  int x_gap, x_gap_before, x_gap_after, num_kerned;
-  ASSERT_HOST(fscanf(fontinfo_file, "%d\n", &num_unichars) == 1);
+  ASSERT_HOST(tfscanf(fontinfo_file, "%d\n", &num_unichars) == 1);
  FontInfo *fi = &fontinfo_table_.get(fontinfo_id);
  fi->init_spacing(unicharset_.size());
  FontSpacingInfo *spacing = NULL;
  for (int l = 0; l < num_unichars; ++l) {
-    if (fscanf(fontinfo_file, "%s %d %d %d",
+    if (tfscanf(fontinfo_file, "%s %d %d %d",
-               uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) {
+                uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) {
      tprintf("Bad format of font spacing file %s\n", filename);
      fclose(fontinfo_file);
      return false;
@ -459,7 +461,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
      spacing->x_gap_after = static_cast<inT16>(x_gap_after * scale);
    }
    for (int k = 0; k < num_kerned; ++k) {
-      if (fscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) {
+      if (tfscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) {
        tprintf("Bad format of font spacing file %s\n", filename);
        fclose(fontinfo_file);
        delete spacing;
--- a/classify/normmatch.cpp
+++ b/classify/normmatch.cpp
@ -263,7 +263,7 @@ NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
  /* read protos for each class into a separate list */
  while ((end_offset < 0 || ftell(File) < end_offset) &&
-         fscanf(File, "%s %d", unichar, &NumProtos) == 2) {
+         tfscanf(File, "%s %d", unichar, &NumProtos) == 2) {
    if (unicharset.contains_unichar(unichar)) {
      unichar_id = unicharset.unichar_to_id(unichar);
      Protos = NormProtos->Protos[unichar_id];
--- a/classify/ocrfeatures.cpp
+++ b/classify/ocrfeatures.cpp
@ -167,7 +167,7 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
  Feature = NewFeature (FeatureDesc);
  for (i = 0; i < Feature->Type->NumParams; i++) {
-    if (fscanf (File, "%f", &(Feature->Params[i])) != 1)
+    if (tfscanf (File, "%f", &(Feature->Params[i])) != 1)
      DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec");
 #ifndef _WIN32
    assert (!isnan(Feature->Params[i]));
@ -198,12 +198,12 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
  int NumFeatures;
  int i;
-  if (fscanf (File, "%d", &NumFeatures) != 1 || NumFeatures < 0)
+  if (tfscanf(File, "%d", &NumFeatures) != 1 || NumFeatures < 0)
-    DoError (ILLEGAL_NUM_FEATURES, "Illegal number of features in set");
+    DoError(ILLEGAL_NUM_FEATURES, "Illegal number of features in set");
-  FeatureSet = NewFeatureSet (NumFeatures);
+  FeatureSet = NewFeatureSet(NumFeatures);
  for (i = 0; i < NumFeatures; i++)
-    AddFeature (FeatureSet, ReadFeature (File, FeatureDesc));
+    AddFeature(FeatureSet, ReadFeature (File, FeatureDesc));
  return (FeatureSet);
--- a/viewer/scrollview.cpp
+++ b/viewer/scrollview.cpp
@ -775,6 +775,7 @@ void ScrollView::Image(struct Pix* image, int x_pos, int y_pos) {
  size_t size;
  pixWriteMem(&data, &size, image, IFF_PNG);
  int base64_len = (size + 2) / 3 * 4;
  y_pos = TranslateYCoordinate(y_pos);
  SendMsg("readImage(%d,%d,%d)", x_pos, y_pos, base64_len);
  // Base64 encode the data.
  const char kBase64Table[64] = {