Applied patch to refix issue 331

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1064 d0cd1f9f-072b-0410-8dd7-cf729c803f20
This commit is contained in:
theraysmith@gmail.com 2014-04-23 23:12:53 +00:00
parent f3176c2eb5
commit 3a5f699013
11 changed files with 88 additions and 45 deletions

View File

@ -50,9 +50,8 @@ bool read_unlv_file( //print list of sides
name += UNLV_EXT; //add extension name += UNLV_EXT; //add extension
if ((pdfp = fopen (name.string (), "rb")) == NULL) { if ((pdfp = fopen (name.string (), "rb")) == NULL) {
return false; //didn't read one return false; //didn't read one
} } else {
else { while (tfscanf(pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
//make rect block //make rect block
block = new BLOCK (name.string (), TRUE, 0, 0, block = new BLOCK (name.string (), TRUE, 0, 0,
(inT16) x, (inT16) (ysize - y - height), (inT16) x, (inT16) (ysize - y - height),

View File

@ -18,8 +18,9 @@ include_HEADERS = \
noinst_HEADERS = \ noinst_HEADERS = \
ambigs.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \ ambigs.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \
elst.h genericheap.h globaloc.h hashfn.h indexmapbidi.h kdpair.h lsterr.h \ elst.h genericheap.h globaloc.h hashfn.h indexmapbidi.h kdpair.h lsterr.h \
nwmain.h object_cache.h qrsequence.h secname.h sorthelper.h stderr.h tessdatamanager.h \ nwmain.h object_cache.h qrsequence.h secname.h sorthelper.h stderr.h \
tprintf.h unicity_table.h unicodes.h universalambigs.h scanutils.h tessdatamanager.h tprintf.h unicity_table.h unicodes.h \
universalambigs.h
if !USING_MULTIPLELIBS if !USING_MULTIPLELIBS
noinst_LTLIBRARIES = libtesseract_ccutil.la noinst_LTLIBRARIES = libtesseract_ccutil.la
@ -34,15 +35,11 @@ libtesseract_ccutil_la_SOURCES = \
elst2.cpp elst.cpp errcode.cpp \ elst2.cpp elst.cpp errcode.cpp \
globaloc.cpp indexmapbidi.cpp \ globaloc.cpp indexmapbidi.cpp \
mainblk.cpp memry.cpp \ mainblk.cpp memry.cpp \
serialis.cpp strngs.cpp \ serialis.cpp strngs.cpp scanutils.cpp \
tessdatamanager.cpp tprintf.cpp \ tessdatamanager.cpp tprintf.cpp \
unichar.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \ unichar.cpp unicharmap.cpp unicharset.cpp unicodes.cpp \
params.cpp universalambigs.cpp params.cpp universalambigs.cpp
if EMBEDDED
include_HEADERS += scanutils.h
libtesseract_ccutil_la_SOURCES += scanutils.cpp
endif
if MINGW if MINGW
AM_CPPFLAGS += -I$(top_srcdir)/vs2008/port -DWINDLLNAME=\"lib@GENERIC_LIBRARY_NAME@\" AM_CPPFLAGS += -I$(top_srcdir)/vs2008/port -DWINDLLNAME=\"lib@GENERIC_LIBRARY_NAME@\"

View File

@ -19,12 +19,9 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef EMBEDDED
#include <ctype.h> #include <ctype.h>
#include <stdarg.h> #include <stdarg.h>
#include <stddef.h> #include <stddef.h>
#include <inttypes.h>
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include <stdio.h> #include <stdio.h>
@ -35,6 +32,11 @@
#include "scanutils.h" #include "scanutils.h"
#include "tprintf.h" #include "tprintf.h"
// workaround for "'off_t' was not declared in this scope" with -std=c++11
#ifndef off_t
typedef long off_t;
#endif // off_t
enum Flags { enum Flags {
FL_SPLAT = 0x01, // Drop the value, do not assign FL_SPLAT = 0x01, // Drop the value, do not assign
FL_INV = 0x02, // Character-set with inverse FL_INV = 0x02, // Character-set with inverse
@ -215,19 +217,48 @@ double strtofloat(const char* s)
return minus ? -f : f; return minus ? -f : f;
} }
static int tvfscanf(FILE* stream, const char *format, va_list ap);
int tfscanf(FILE* stream, const char *format, ...)
{
va_list ap;
int rv;
va_start(ap, format);
rv = tvfscanf(stream, format, ap);
va_end(ap);
return rv;
}
#ifdef EMBEDDED
int fscanf(FILE* stream, const char *format, ...) int fscanf(FILE* stream, const char *format, ...)
{ {
va_list ap; va_list ap;
int rv; int rv;
va_start(ap, format); va_start(ap, format);
rv = vfscanf(stream, format, ap); rv = tvfscanf(stream, format, ap);
va_end(ap); va_end(ap);
return rv; return rv;
} }
int vfscanf(FILE* stream, const char *format, va_list ap) int vfscanf(FILE* stream, const char *format, ...)
{
va_list ap;
int rv;
va_start(ap, format);
rv = tvfscanf(stream, format, ap);
va_end(ap);
return rv;
}
#endif
static int tvfscanf(FILE* stream, const char *format, va_list ap)
{ {
const char *p = format; const char *p = format;
char ch; char ch;
@ -250,7 +281,7 @@ int vfscanf(FILE* stream, const char *format, va_list ap)
enum Bail bail = BAIL_NONE; enum Bail bail = BAIL_NONE;
int sign; int sign;
int converted = 0; // Successful conversions int converted = 0; // Successful conversions
unsigned long matchmap[((1 << CHAR_BIT)+(LongBit()-1))/LongBit()]; unsigned long matchmap[((1 << CHAR_BIT)+(CHAR_BIT * sizeof(long) - 1))/ (CHAR_BIT * sizeof(long))];
int matchinv = 0; // Is match map inverted? int matchinv = 0; // Is match map inverted?
unsigned char range_start = 0; unsigned char range_start = 0;
off_t start_off = ftell(stream); off_t start_off = ftell(stream);
@ -278,7 +309,8 @@ int vfscanf(FILE* stream, const char *format, va_list ap)
flags |= FL_SPLAT; flags |= FL_SPLAT;
break; break;
case '0' ... '9': case 0: case 1: case 2: case 3: case 4:
case 5: case 6: case 7: case 8: case 9:
width = (ch-'0'); width = (ch-'0');
state = ST_WIDTH; state = ST_WIDTH;
flags |= FL_WIDTH; flags |= FL_WIDTH;
@ -542,6 +574,7 @@ int vfscanf(FILE* stream, const char *format, va_list ap)
return converted; return converted;
} }
#ifdef EMBEDDED
int creat(const char *pathname, mode_t mode) int creat(const char *pathname, mode_t mode)
{ {
return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode); return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode);

View File

@ -19,14 +19,25 @@
#ifndef TESSERACT_CCUTIL_SCANUTILS_H_ #ifndef TESSERACT_CCUTIL_SCANUTILS_H_
#define TESSERACT_CCUTIL_SCANUTILS_H_ #define TESSERACT_CCUTIL_SCANUTILS_H_
#ifdef EMBEDDED
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
#include <stdio.h> #include <stdio.h>
//#include <klibc/extern.h>
#include <sys/stat.h> #include <sys/stat.h>
/**
* fscanf variant to ensure correct reading regardless of locale.
*
* tfscanf parse a file stream according to the given format. See the fscanf
* manpage for more information, as this function attempts to mimic its
* behavior.
*
* @note Note that scientific floating-point notation is not supported.
*
*/
int tfscanf(FILE* stream, const char *format, ...);
#ifdef EMBEDDED
// Attempts to parse the given file stream s as an integer of the base // Attempts to parse the given file stream s as an integer of the base
// 'base'. Returns the first successfully parsed integer as a uintmax_t, or // 'base'. Returns the first successfully parsed integer as a uintmax_t, or
// 0, if none was found. // 0, if none was found.

View File

@ -46,7 +46,7 @@ History: 6/6/89, DSJ, Created.
uinT16 ReadSampleSize(FILE *File) { uinT16 ReadSampleSize(FILE *File) {
int SampleSize; int SampleSize;
if ((fscanf (File, "%d", &SampleSize) != 1) || if ((tfscanf(File, "%d", &SampleSize) != 1) ||
(SampleSize < 0) || (SampleSize > MAXSAMPLESIZE)) (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
DoError (ILLEGALSAMPLESIZE, "Illegal sample size"); DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
return (SampleSize); return (SampleSize);
@ -72,7 +72,7 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC)); ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
for (i = 0; i < N; i++) { for (i = 0; i < N; i++) {
if (fscanf (File, "%s", Token) != 1) if (tfscanf(File, "%s", Token) != 1)
DoError (ILLEGALCIRCULARSPEC, DoError (ILLEGALCIRCULARSPEC,
"Illegal circular/linear specification"); "Illegal circular/linear specification");
if (Token[0] == 'c') if (Token[0] == 'c')
@ -80,14 +80,14 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
else else
ParamDesc[i].Circular = FALSE; ParamDesc[i].Circular = FALSE;
if (fscanf (File, "%s", Token) != 1) if (tfscanf(File, "%s", Token) != 1)
DoError (ILLEGALESSENTIALSPEC, DoError (ILLEGALESSENTIALSPEC,
"Illegal essential/non-essential spec"); "Illegal essential/non-essential spec");
if (Token[0] == 'e') if (Token[0] == 'e')
ParamDesc[i].NonEssential = FALSE; ParamDesc[i].NonEssential = FALSE;
else else
ParamDesc[i].NonEssential = TRUE; ParamDesc[i].NonEssential = TRUE;
if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) !=
2) 2)
DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification"); DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min; ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
@ -119,7 +119,7 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
int SampleCount; int SampleCount;
int i; int i;
if ((Status = fscanf (File, "%s", Token)) == 1) { if ((Status = tfscanf(File, "%s", Token)) == 1) {
Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE)); Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
Proto->Cluster = NULL; Proto->Cluster = NULL;
if (Token[0] == 's') if (Token[0] == 's')
@ -129,7 +129,7 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
Proto->Style = ReadProtoStyle (File); Proto->Style = ReadProtoStyle (File);
if ((fscanf (File, "%d", &SampleCount) != 1) || (SampleCount < 0)) if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
DoError (ILLEGALSAMPLECOUNT, "Illegal sample count"); DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
Proto->NumSamples = SampleCount; Proto->NumSamples = SampleCount;
@ -173,7 +173,7 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
Proto->Distrib = Proto->Distrib =
(DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION)); (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
for (i = 0; i < N; i++) { for (i = 0; i < N; i++) {
if (fscanf (File, "%s", Token) != 1) if (tfscanf(File, "%s", Token) != 1)
DoError (ILLEGALDISTRIBUTION, DoError (ILLEGALDISTRIBUTION,
"Illegal prototype distribution"); "Illegal prototype distribution");
switch (Token[0]) { switch (Token[0]) {
@ -245,7 +245,7 @@ PROTOSTYLE ReadProtoStyle(FILE *File) {
char Token[TOKENSIZE]; char Token[TOKENSIZE];
PROTOSTYLE Style; PROTOSTYLE Style;
if (fscanf (File, "%s", Token) != 1) if (tfscanf(File, "%s", Token) != 1)
DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification"); DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
switch (Token[0]) { switch (Token[0]) {
case 's': case 's':
@ -290,7 +290,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32))); Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
for (i = 0; i < N; i++) { for (i = 0; i < N; i++) {
NumFloatsRead = fscanf(File, "%f", &(Buffer[i])); NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
if (NumFloatsRead != 1) { if (NumFloatsRead != 1) {
if ((NumFloatsRead == EOF) && (i == 0)) { if ((NumFloatsRead == EOF) && (i == 0)) {
Efree(Buffer); Efree(Buffer);

View File

@ -68,7 +68,7 @@ void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
Cutoffs[i] = MAX_CUTOFF; Cutoffs[i] = MAX_CUTOFF;
while ((end_offset < 0 || ftell(CutoffFile) < end_offset) && while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
fscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d", tfscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
Class, &Cutoff) == 2) { Class, &Cutoff) == 2) {
if (strcmp(Class, "NULL") == 0) { if (strcmp(Class, "NULL") == 0) {
ClassId = unicharset.unichar_to_id(" "); ClassId = unicharset.unichar_to_id(" ");

View File

@ -265,13 +265,13 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,
CHAR_DESC CharDesc; CHAR_DESC CharDesc;
int Type; int Type;
if (fscanf (File, "%d", &NumSetsToRead) != 1 || if (tfscanf(File, "%d", &NumSetsToRead) != 1 ||
NumSetsToRead < 0 || NumSetsToRead > FeatureDefs.NumFeatureTypes) NumSetsToRead < 0 || NumSetsToRead > FeatureDefs.NumFeatureTypes)
DoError (ILLEGAL_NUM_SETS, "Illegal number of feature sets"); DoError (ILLEGAL_NUM_SETS, "Illegal number of feature sets");
CharDesc = NewCharDescription(FeatureDefs); CharDesc = NewCharDescription(FeatureDefs);
for (; NumSetsToRead > 0; NumSetsToRead--) { for (; NumSetsToRead > 0; NumSetsToRead--) {
fscanf (File, "%s", ShortName); tfscanf(File, "%s", ShortName);
Type = ShortNameToFeatureType(FeatureDefs, ShortName); Type = ShortNameToFeatureType(FeatureDefs, ShortName);
CharDesc->FeatureSets[Type] = CharDesc->FeatureSets[Type] =
ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]); ReadFeatureSet (File, FeatureDefs.FeatureDesc[Type]);

View File

@ -38,6 +38,8 @@
#include "shapetable.h" #include "shapetable.h"
#include "svmnode.h" #include "svmnode.h"
#include "scanutils.h"
namespace tesseract { namespace tesseract {
// Constants controlling clustering. With a low kMinClusteredShapes and a high // Constants controlling clustering. With a low kMinClusteredShapes and a high
@ -360,8 +362,8 @@ bool MasterTrainer::LoadFontInfo(const char* filename) {
fontinfo.name = font_name; fontinfo.name = font_name;
fontinfo.properties = 0; fontinfo.properties = 0;
fontinfo.universal_id = 0; fontinfo.universal_id = 0;
if (fscanf(fp, "%1024s %i %i %i %i %i\n", font_name, if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name,
&italic, &bold, &fixed, &serif, &fraktur) != 6) &italic, &bold, &fixed, &serif, &fraktur) != 6)
continue; continue;
fontinfo.properties = fontinfo.properties =
(italic << 0) + (italic << 0) +
@ -397,7 +399,7 @@ bool MasterTrainer::LoadXHeights(const char* filename) {
int total_xheight = 0; int total_xheight = 0;
int xheight_count = 0; int xheight_count = 0;
while (!feof(f)) { while (!feof(f)) {
if (fscanf(f, "%1023s %d\n", buffer, &xht) != 2) if (tfscanf(f, "%1023s %d\n", buffer, &xht) != 2)
continue; continue;
buffer[1023] = '\0'; buffer[1023] = '\0';
fontinfo.name = buffer; fontinfo.name = buffer;
@ -441,13 +443,13 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
char uch[UNICHAR_LEN]; char uch[UNICHAR_LEN];
char kerned_uch[UNICHAR_LEN]; char kerned_uch[UNICHAR_LEN];
int x_gap, x_gap_before, x_gap_after, num_kerned; int x_gap, x_gap_before, x_gap_after, num_kerned;
ASSERT_HOST(fscanf(fontinfo_file, "%d\n", &num_unichars) == 1); ASSERT_HOST(tfscanf(fontinfo_file, "%d\n", &num_unichars) == 1);
FontInfo *fi = &fontinfo_table_.get(fontinfo_id); FontInfo *fi = &fontinfo_table_.get(fontinfo_id);
fi->init_spacing(unicharset_.size()); fi->init_spacing(unicharset_.size());
FontSpacingInfo *spacing = NULL; FontSpacingInfo *spacing = NULL;
for (int l = 0; l < num_unichars; ++l) { for (int l = 0; l < num_unichars; ++l) {
if (fscanf(fontinfo_file, "%s %d %d %d", if (tfscanf(fontinfo_file, "%s %d %d %d",
uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) { uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) {
tprintf("Bad format of font spacing file %s\n", filename); tprintf("Bad format of font spacing file %s\n", filename);
fclose(fontinfo_file); fclose(fontinfo_file);
return false; return false;
@ -459,7 +461,7 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
spacing->x_gap_after = static_cast<inT16>(x_gap_after * scale); spacing->x_gap_after = static_cast<inT16>(x_gap_after * scale);
} }
for (int k = 0; k < num_kerned; ++k) { for (int k = 0; k < num_kerned; ++k) {
if (fscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) { if (tfscanf(fontinfo_file, "%s %d", kerned_uch, &x_gap) != 2) {
tprintf("Bad format of font spacing file %s\n", filename); tprintf("Bad format of font spacing file %s\n", filename);
fclose(fontinfo_file); fclose(fontinfo_file);
delete spacing; delete spacing;

View File

@ -263,7 +263,7 @@ NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
/* read protos for each class into a separate list */ /* read protos for each class into a separate list */
while ((end_offset < 0 || ftell(File) < end_offset) && while ((end_offset < 0 || ftell(File) < end_offset) &&
fscanf(File, "%s %d", unichar, &NumProtos) == 2) { tfscanf(File, "%s %d", unichar, &NumProtos) == 2) {
if (unicharset.contains_unichar(unichar)) { if (unicharset.contains_unichar(unichar)) {
unichar_id = unicharset.unichar_to_id(unichar); unichar_id = unicharset.unichar_to_id(unichar);
Protos = NormProtos->Protos[unichar_id]; Protos = NormProtos->Protos[unichar_id];

View File

@ -167,7 +167,7 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
Feature = NewFeature (FeatureDesc); Feature = NewFeature (FeatureDesc);
for (i = 0; i < Feature->Type->NumParams; i++) { for (i = 0; i < Feature->Type->NumParams; i++) {
if (fscanf (File, "%f", &(Feature->Params[i])) != 1) if (tfscanf (File, "%f", &(Feature->Params[i])) != 1)
DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec"); DoError (ILLEGAL_FEATURE_PARAM, "Illegal feature parameter spec");
#ifndef _WIN32 #ifndef _WIN32
assert (!isnan(Feature->Params[i])); assert (!isnan(Feature->Params[i]));
@ -198,12 +198,12 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) {
int NumFeatures; int NumFeatures;
int i; int i;
if (fscanf (File, "%d", &NumFeatures) != 1 || NumFeatures < 0) if (tfscanf(File, "%d", &NumFeatures) != 1 || NumFeatures < 0)
DoError (ILLEGAL_NUM_FEATURES, "Illegal number of features in set"); DoError(ILLEGAL_NUM_FEATURES, "Illegal number of features in set");
FeatureSet = NewFeatureSet (NumFeatures); FeatureSet = NewFeatureSet(NumFeatures);
for (i = 0; i < NumFeatures; i++) for (i = 0; i < NumFeatures; i++)
AddFeature (FeatureSet, ReadFeature (File, FeatureDesc)); AddFeature(FeatureSet, ReadFeature (File, FeatureDesc));
return (FeatureSet); return (FeatureSet);

View File

@ -775,6 +775,7 @@ void ScrollView::Image(struct Pix* image, int x_pos, int y_pos) {
size_t size; size_t size;
pixWriteMem(&data, &size, image, IFF_PNG); pixWriteMem(&data, &size, image, IFF_PNG);
int base64_len = (size + 2) / 3 * 4; int base64_len = (size + 2) / 3 * 4;
y_pos = TranslateYCoordinate(y_pos);
SendMsg("readImage(%d,%d,%d)", x_pos, y_pos, base64_len); SendMsg("readImage(%d,%d,%d)", x_pos, y_pos, base64_len);
// Base64 encode the data. // Base64 encode the data.
const char kBase64Table[64] = { const char kBase64Table[64] = {