2007-03-08 04:03:40 +08:00
|
|
|
// Copyright 2006 Google Inc.
|
|
|
|
// All Rights Reserved.
|
|
|
|
// Author: renn
|
|
|
|
//
|
|
|
|
// The fscanf, vfscanf and creat functions are implemented so that their
|
|
|
|
// functionality is mostly like their stdio counterparts. However, currently
|
2007-05-16 09:42:08 +08:00
|
|
|
// these functions do not use any buffering, making them rather slow.
|
2007-03-08 04:03:40 +08:00
|
|
|
// File streams are thus processed one character at a time.
|
2007-05-16 09:42:08 +08:00
|
|
|
// Although the implementations of the scanf functions do lack a few minor
|
|
|
|
// features, they should be sufficient for their use in tesseract.
|
2007-03-08 04:03:40 +08:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2015-10-31 18:54:37 +08:00
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config_auto.h"
|
|
|
|
#endif
|
|
|
|
|
2018-05-20 05:52:04 +08:00
|
|
|
#include <cctype>
|
|
|
|
#include <cmath>
|
|
|
|
#include <cstdarg>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstring>
|
|
|
|
#include <climits>
|
|
|
|
#include <cstdio>
|
2018-05-20 19:49:35 +08:00
|
|
|
#include <limits>
|
2007-03-08 04:03:40 +08:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
|
|
|
#include "scanutils.h"
|
|
|
|
|
|
|
|
enum Flags {
|
|
|
|
FL_SPLAT = 0x01, // Drop the value, do not assign
|
|
|
|
FL_INV = 0x02, // Character-set with inverse
|
|
|
|
FL_WIDTH = 0x04, // Field width specified
|
|
|
|
FL_MINUS = 0x08, // Negative number
|
|
|
|
};
|
|
|
|
|
|
|
|
enum Ranks {
|
|
|
|
RANK_CHAR = -2,
|
|
|
|
RANK_SHORT = -1,
|
|
|
|
RANK_INT = 0,
|
|
|
|
RANK_LONG = 1,
|
|
|
|
RANK_LONGLONG = 2,
|
2018-05-20 19:49:35 +08:00
|
|
|
RANK_PTR = std::numeric_limits<int>::max() // Special value used for pointers
|
2007-03-08 04:03:40 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
const enum Ranks kMinRank = RANK_CHAR;
|
|
|
|
const enum Ranks kMaxRank = RANK_LONGLONG;
|
|
|
|
|
|
|
|
const enum Ranks kIntMaxRank = RANK_LONGLONG;
|
|
|
|
const enum Ranks kSizeTRank = RANK_LONG;
|
|
|
|
const enum Ranks kPtrDiffRank = RANK_LONG;
|
|
|
|
|
|
|
|
enum Bail {
|
|
|
|
BAIL_NONE = 0, // No error condition
|
|
|
|
BAIL_EOF, // Hit EOF
|
|
|
|
BAIL_ERR // Conversion mismatch
|
|
|
|
};
|
|
|
|
|
|
|
|
// Helper functions ------------------------------------------------------------
|
|
|
|
inline size_t LongBit() {
|
|
|
|
return CHAR_BIT * sizeof(long);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
2014-04-25 09:25:42 +08:00
|
|
|
SkipSpace(FILE *s) {
|
2007-03-08 04:03:40 +08:00
|
|
|
int p;
|
2018-10-08 23:24:14 +08:00
|
|
|
while (isascii(p = fgetc(s)) && isspace(p));
|
2007-03-08 04:03:40 +08:00
|
|
|
ungetc(p, s); // Make sure next char is available for reading
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
2014-04-25 09:25:42 +08:00
|
|
|
SetBit(unsigned long *bitmap, unsigned int bit) {
|
2007-03-08 04:03:40 +08:00
|
|
|
bitmap[bit/LongBit()] |= 1UL << (bit%LongBit());
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
2014-04-25 09:25:42 +08:00
|
|
|
TestBit(unsigned long *bitmap, unsigned int bit) {
|
2007-03-08 04:03:40 +08:00
|
|
|
return static_cast<int>(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1;
|
|
|
|
}
|
|
|
|
|
2014-04-29 07:06:41 +08:00
|
|
|
static inline int DigitValue(int ch, int base) {
|
2007-03-08 04:03:40 +08:00
|
|
|
if (ch >= '0' && ch <= '9') {
|
2014-04-29 07:06:41 +08:00
|
|
|
if (base >= 10 || ch <= '7')
|
|
|
|
return ch-'0';
|
|
|
|
} else if (ch >= 'A' && ch <= 'Z' && base == 16) {
|
2007-03-08 04:03:40 +08:00
|
|
|
return ch-'A'+10;
|
2014-04-29 07:06:41 +08:00
|
|
|
} else if (ch >= 'a' && ch <= 'z' && base == 16) {
|
2007-03-08 04:03:40 +08:00
|
|
|
return ch-'a'+10;
|
|
|
|
}
|
2014-04-29 07:06:41 +08:00
|
|
|
return -1;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// IO (re-)implementations -----------------------------------------------------
|
2018-07-05 20:44:33 +08:00
|
|
|
static uintmax_t streamtoumax(FILE* s, int base) {
|
2007-03-08 04:03:40 +08:00
|
|
|
int minus = 0;
|
|
|
|
uintmax_t v = 0;
|
|
|
|
int d, c = 0;
|
|
|
|
|
2018-10-08 23:24:14 +08:00
|
|
|
for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s));
|
2007-05-16 09:42:08 +08:00
|
|
|
|
|
|
|
// Single optional + or -
|
2007-03-08 04:03:40 +08:00
|
|
|
if (c == '-' || c == '+') {
|
|
|
|
minus = (c == '-');
|
|
|
|
c = fgetc(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assign correct base
|
|
|
|
if (base == 0) {
|
|
|
|
if (c == '0') {
|
|
|
|
c = fgetc(s);
|
|
|
|
if (c == 'x' || c == 'X') {
|
|
|
|
base = 16;
|
|
|
|
c = fgetc(s);
|
|
|
|
} else {
|
|
|
|
base = 8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (base == 16) {
|
|
|
|
if (c == '0') {
|
|
|
|
c = fgetc(s);
|
2014-05-21 23:48:48 +08:00
|
|
|
if (c == 'x' || c == 'X') c = fgetc(s);
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Actual number parsing
|
2014-04-29 07:06:41 +08:00
|
|
|
for (; (c != EOF) && (d = DigitValue(c, base)) >= 0; c = fgetc(s))
|
2007-03-08 04:03:40 +08:00
|
|
|
v = v*base + d;
|
|
|
|
|
|
|
|
ungetc(c, s);
|
|
|
|
return minus ? -v : v;
|
|
|
|
}
|
|
|
|
|
2018-07-05 20:44:33 +08:00
|
|
|
static double streamtofloat(FILE* s) {
|
2018-11-18 02:57:58 +08:00
|
|
|
bool minus = false;
|
|
|
|
uint64_t v = 0;
|
|
|
|
int d, c;
|
|
|
|
uint64_t k = 1;
|
|
|
|
uint64_t w = 0;
|
2007-03-08 04:03:40 +08:00
|
|
|
|
2018-10-08 23:24:14 +08:00
|
|
|
for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s));
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
// Single optional + or -
|
|
|
|
if (c == '-' || c == '+') {
|
|
|
|
minus = (c == '-');
|
|
|
|
c = fgetc(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Actual number parsing
|
2014-04-29 07:06:41 +08:00
|
|
|
for (; c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s))
|
2007-03-08 04:03:40 +08:00
|
|
|
v = v*10 + d;
|
|
|
|
if (c == '.') {
|
2014-04-29 07:06:41 +08:00
|
|
|
for (c = fgetc(s); c != EOF && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
|
2007-03-08 04:03:40 +08:00
|
|
|
w = w*10 + d;
|
|
|
|
k *= 10;
|
|
|
|
}
|
2014-04-29 07:06:41 +08:00
|
|
|
}
|
2018-11-18 02:57:58 +08:00
|
|
|
double f = v + static_cast<double>(w) / k;
|
2014-04-29 07:06:41 +08:00
|
|
|
if (c == 'e' || c == 'E') {
|
|
|
|
c = fgetc(s);
|
|
|
|
int expsign = 1;
|
|
|
|
if (c == '-' || c == '+') {
|
|
|
|
expsign = (c == '-') ? -1 : 1;
|
|
|
|
c = fgetc(s);
|
|
|
|
}
|
|
|
|
int exponent = 0;
|
|
|
|
for (; (c != EOF) && (d = DigitValue(c, 10)) >= 0; c = fgetc(s)) {
|
|
|
|
exponent = exponent * 10 + d;
|
|
|
|
}
|
|
|
|
exponent *= expsign;
|
|
|
|
f *= pow(10.0, static_cast<double>(exponent));
|
|
|
|
}
|
|
|
|
ungetc(c, s);
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
return minus ? -f : f;
|
|
|
|
}
|
|
|
|
|
2014-04-24 07:12:53 +08:00
|
|
|
static int tvfscanf(FILE* stream, const char *format, va_list ap);
|
|
|
|
|
2014-04-25 09:25:42 +08:00
|
|
|
int tfscanf(FILE* stream, const char *format, ...) {
|
2014-04-24 07:12:53 +08:00
|
|
|
va_list ap;
|
|
|
|
int rv;
|
|
|
|
|
|
|
|
va_start(ap, format);
|
|
|
|
rv = tvfscanf(stream, format, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef EMBEDDED
|
|
|
|
|
2014-04-25 09:25:42 +08:00
|
|
|
int fscanf(FILE* stream, const char *format, ...) {
|
2007-03-08 04:03:40 +08:00
|
|
|
va_list ap;
|
|
|
|
int rv;
|
|
|
|
|
|
|
|
va_start(ap, format);
|
2014-04-24 07:12:53 +08:00
|
|
|
rv = tvfscanf(stream, format, ap);
|
2010-09-30 07:43:31 +08:00
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2014-04-25 09:25:42 +08:00
|
|
|
int vfscanf(FILE* stream, const char *format, ...) {
|
2014-04-24 07:12:53 +08:00
|
|
|
va_list ap;
|
|
|
|
int rv;
|
|
|
|
|
|
|
|
va_start(ap, format);
|
|
|
|
rv = tvfscanf(stream, format, ap);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2014-04-25 09:25:42 +08:00
|
|
|
static int tvfscanf(FILE* stream, const char *format, va_list ap) {
|
2007-03-08 04:03:40 +08:00
|
|
|
const char *p = format;
|
|
|
|
char ch;
|
|
|
|
int q = 0;
|
|
|
|
uintmax_t val = 0;
|
|
|
|
int rank = RANK_INT; // Default rank
|
2010-11-24 02:34:14 +08:00
|
|
|
unsigned int width = UINT_MAX;
|
2007-03-08 04:03:40 +08:00
|
|
|
int base;
|
|
|
|
int flags = 0;
|
|
|
|
enum {
|
|
|
|
ST_NORMAL, // Ground state
|
|
|
|
ST_FLAGS, // Special flags
|
|
|
|
ST_WIDTH, // Field width
|
|
|
|
ST_MODIFIERS, // Length or conversion modifiers
|
|
|
|
ST_MATCH_INIT, // Initial state of %[ sequence
|
|
|
|
ST_MATCH, // Main state of %[ sequence
|
|
|
|
ST_MATCH_RANGE, // After - in a %[ sequence
|
|
|
|
} state = ST_NORMAL;
|
2016-12-13 00:20:28 +08:00
|
|
|
char *sarg = nullptr; // %s %c or %[ string argument
|
2007-03-08 04:03:40 +08:00
|
|
|
enum Bail bail = BAIL_NONE;
|
|
|
|
int converted = 0; // Successful conversions
|
2014-04-25 09:25:42 +08:00
|
|
|
unsigned long matchmap[((1 << CHAR_BIT)+(CHAR_BIT * sizeof(long) - 1)) /
|
|
|
|
(CHAR_BIT * sizeof(long))];
|
2007-03-08 04:03:40 +08:00
|
|
|
int matchinv = 0; // Is match map inverted?
|
|
|
|
unsigned char range_start = 0;
|
|
|
|
off_t start_off = ftell(stream);
|
|
|
|
|
|
|
|
// Skip leading spaces
|
|
|
|
SkipSpace(stream);
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
while ((ch = *p++) && !bail) {
|
|
|
|
switch (state) {
|
|
|
|
case ST_NORMAL:
|
|
|
|
if (ch == '%') {
|
|
|
|
state = ST_FLAGS;
|
2010-11-24 02:34:14 +08:00
|
|
|
flags = 0; rank = RANK_INT; width = UINT_MAX;
|
2018-10-08 23:24:14 +08:00
|
|
|
} else if (isascii(ch) && isspace(ch)) {
|
2007-03-08 04:03:40 +08:00
|
|
|
SkipSpace(stream);
|
|
|
|
} else {
|
2007-05-16 09:42:08 +08:00
|
|
|
if (fgetc(stream) != ch)
|
|
|
|
bail = BAIL_ERR; // Match failure
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ST_FLAGS:
|
2014-04-29 07:06:41 +08:00
|
|
|
if (ch == '*') {
|
|
|
|
flags |= FL_SPLAT;
|
|
|
|
} else if ('0' <= ch && ch <= '9') {
|
|
|
|
width = (ch-'0');
|
|
|
|
state = ST_WIDTH;
|
|
|
|
flags |= FL_WIDTH;
|
|
|
|
} else {
|
|
|
|
state = ST_MODIFIERS;
|
|
|
|
p--; // Process this character again
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ST_WIDTH:
|
|
|
|
if (ch >= '0' && ch <= '9') {
|
|
|
|
width = width*10+(ch-'0');
|
|
|
|
} else {
|
|
|
|
state = ST_MODIFIERS;
|
|
|
|
p--; // Process this character again
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ST_MODIFIERS:
|
|
|
|
switch (ch) {
|
|
|
|
// Length modifiers - nonterminal sequences
|
|
|
|
case 'h':
|
|
|
|
rank--; // Shorter rank
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
rank++; // Longer rank
|
|
|
|
break;
|
|
|
|
case 'j':
|
|
|
|
rank = kIntMaxRank;
|
|
|
|
break;
|
|
|
|
case 'z':
|
|
|
|
rank = kSizeTRank;
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
rank = kPtrDiffRank;
|
|
|
|
break;
|
|
|
|
case 'L':
|
|
|
|
case 'q':
|
|
|
|
rank = RANK_LONGLONG; // long double/long long
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
// Output modifiers - terminal sequences
|
|
|
|
state = ST_NORMAL; // Next state will be normal
|
|
|
|
if (rank < kMinRank) // Canonicalize rank
|
|
|
|
rank = kMinRank;
|
|
|
|
else if (rank > kMaxRank)
|
|
|
|
rank = kMaxRank;
|
|
|
|
|
|
|
|
switch (ch) {
|
|
|
|
case 'P': // Upper case pointer
|
|
|
|
case 'p': // Pointer
|
|
|
|
rank = RANK_PTR;
|
2017-03-08 14:38:59 +08:00
|
|
|
base = 0;
|
2017-04-29 04:38:32 +08:00
|
|
|
goto scan_int;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 'i': // Base-independent integer
|
2017-03-08 14:38:59 +08:00
|
|
|
base = 0;
|
2017-04-29 04:38:32 +08:00
|
|
|
goto scan_int;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 'd': // Decimal integer
|
2017-03-08 14:38:59 +08:00
|
|
|
base = 10;
|
2017-04-29 04:38:32 +08:00
|
|
|
goto scan_int;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 'o': // Octal integer
|
2017-03-08 14:38:59 +08:00
|
|
|
base = 8;
|
2017-04-29 04:38:32 +08:00
|
|
|
goto scan_int;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 'u': // Unsigned decimal integer
|
2017-03-08 14:38:59 +08:00
|
|
|
base = 10;
|
2017-04-29 04:38:32 +08:00
|
|
|
goto scan_int;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 'x': // Hexadecimal integer
|
|
|
|
case 'X':
|
2017-03-08 14:38:59 +08:00
|
|
|
base = 16;
|
2017-04-29 04:38:32 +08:00
|
|
|
goto scan_int;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 'n': // Number of characters consumed
|
|
|
|
val = ftell(stream) - start_off;
|
|
|
|
goto set_integer;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
scan_int:
|
|
|
|
q = SkipSpace(stream);
|
2018-09-29 14:12:33 +08:00
|
|
|
if (q <= 0) {
|
2007-03-08 04:03:40 +08:00
|
|
|
bail = BAIL_EOF;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
val = streamtoumax(stream, base);
|
|
|
|
// fall through
|
|
|
|
|
|
|
|
set_integer:
|
|
|
|
if (!(flags & FL_SPLAT)) {
|
2014-04-29 12:30:47 +08:00
|
|
|
converted++;
|
2007-03-08 04:03:40 +08:00
|
|
|
switch(rank) {
|
|
|
|
case RANK_CHAR:
|
2007-05-16 09:42:08 +08:00
|
|
|
*va_arg(ap, unsigned char *)
|
2007-03-08 04:03:40 +08:00
|
|
|
= static_cast<unsigned char>(val);
|
|
|
|
break;
|
|
|
|
case RANK_SHORT:
|
2007-05-16 09:42:08 +08:00
|
|
|
*va_arg(ap, unsigned short *)
|
2007-03-08 04:03:40 +08:00
|
|
|
= static_cast<unsigned short>(val);
|
|
|
|
break;
|
|
|
|
case RANK_INT:
|
2007-05-16 09:42:08 +08:00
|
|
|
*va_arg(ap, unsigned int *)
|
2007-03-08 04:03:40 +08:00
|
|
|
= static_cast<unsigned int>(val);
|
|
|
|
break;
|
|
|
|
case RANK_LONG:
|
2007-05-16 09:42:08 +08:00
|
|
|
*va_arg(ap, unsigned long *)
|
2007-03-08 04:03:40 +08:00
|
|
|
= static_cast<unsigned long>(val);
|
|
|
|
break;
|
|
|
|
case RANK_LONGLONG:
|
2007-05-16 09:42:08 +08:00
|
|
|
*va_arg(ap, unsigned long long *)
|
2007-03-08 04:03:40 +08:00
|
|
|
= static_cast<unsigned long long>(val);
|
|
|
|
break;
|
|
|
|
case RANK_PTR:
|
2007-05-16 09:42:08 +08:00
|
|
|
*va_arg(ap, void **)
|
2007-03-08 04:03:40 +08:00
|
|
|
= reinterpret_cast<void *>(static_cast<uintptr_t>(val));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'f': // Preliminary float value parsing
|
|
|
|
case 'g':
|
|
|
|
case 'G':
|
|
|
|
case 'e':
|
|
|
|
case 'E':
|
|
|
|
q = SkipSpace(stream);
|
|
|
|
if (q <= 0) {
|
|
|
|
bail = BAIL_EOF;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2010-11-24 02:34:14 +08:00
|
|
|
{
|
|
|
|
double fval = streamtofloat(stream);
|
2014-04-29 12:03:41 +08:00
|
|
|
if (!(flags & FL_SPLAT)) {
|
|
|
|
if (rank == RANK_INT)
|
2007-03-08 04:03:40 +08:00
|
|
|
*va_arg(ap, float *) = static_cast<float>(fval);
|
2014-04-29 12:03:41 +08:00
|
|
|
else if (rank == RANK_LONG)
|
2007-03-08 04:03:40 +08:00
|
|
|
*va_arg(ap, double *) = static_cast<double>(fval);
|
2014-04-29 12:30:47 +08:00
|
|
|
converted++;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
2010-11-24 02:34:14 +08:00
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'c': // Character
|
|
|
|
width = (flags & FL_WIDTH) ? width : 1; // Default width == 1
|
|
|
|
sarg = va_arg(ap, char *);
|
|
|
|
while (width--) {
|
|
|
|
if ((q = fgetc(stream)) <= 0) {
|
|
|
|
bail = BAIL_EOF;
|
|
|
|
break;
|
|
|
|
}
|
2014-04-29 12:30:47 +08:00
|
|
|
if (!(flags & FL_SPLAT)) {
|
|
|
|
*sarg++ = q;
|
|
|
|
converted++;
|
|
|
|
}
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
|
|
|
break;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case 's': // String
|
|
|
|
{
|
scanutils: Fix illegal memory access
Format strings which contain "%*s" show this error in Valgrind:
==32503== Conditional jump or move depends on uninitialised value(s)
==32503== at 0x2B8BB0: tvfscanf(_IO_FILE*, char const*, __va_list_tag*) (scanutils.cpp:486)
==32503== by 0x2B825A: tfscanf(_IO_FILE*, char const*, ...) (scanutils.cpp:234)
==32503== by 0x272B01: read_unlv_file(STRING, int, int, BLOCK_LIST*) (blread.cpp:54)
==32503== by 0x1753CD: tesseract::Tesseract::SegmentPage(STRING const*, BLOCK_LIST*, tesseract::Tesseract*, OSResults*) (pagesegmain.cpp:115)
==32503== by 0x1363CD: tesseract::TessBaseAPI::FindLines() (baseapi.cpp:2291)
==32503== by 0x130CF1: tesseract::TessBaseAPI::Recognize(ETEXT_DESC*) (baseapi.cpp:802)
==32503== by 0x1322D3: tesseract::TessBaseAPI::ProcessPage(Pix*, int, char const*, char const*, int, tesseract::TessResultRenderer*) (baseapi.cpp:1176)
==32503== by 0x131A84: tesseract::TessBaseAPI::ProcessPagesMultipageTiff(unsigned char const*, unsigned long, char const*, char const*, int, tesseract::TessResultRenderer*, int) (baseapi.cpp:1013)
==32503== by 0x132052: tesseract::TessBaseAPI::ProcessPagesInternal(char const*, char const*, int, tesseract::TessResultRenderer*) (baseapi.cpp:1129)
==32503== by 0x131B1E: tesseract::TessBaseAPI::ProcessPages(char const*, char const*, int, tesseract::TessResultRenderer*) (baseapi.cpp:1032)
==32503== by 0x12E00C: main (tesseractmain.cpp:537)
==32503== Uninitialised value was created by a stack allocation
==32503== at 0x272A60: read_unlv_file(STRING, int, int, BLOCK_LIST*) (blread.cpp:41)
Signed-off-by: Stefan Weil <sw@weilnetz.de>
2018-06-08 21:27:36 +08:00
|
|
|
if (!(flags & FL_SPLAT)) {
|
|
|
|
sarg = va_arg(ap, char *);
|
|
|
|
}
|
2018-06-20 23:32:02 +08:00
|
|
|
unsigned length = 0;
|
2007-03-08 04:03:40 +08:00
|
|
|
while (width--) {
|
|
|
|
q = fgetc(stream);
|
2018-10-22 23:46:25 +08:00
|
|
|
if ((isascii(q) && isspace(q)) || (q <= 0)) {
|
2007-03-08 04:03:40 +08:00
|
|
|
ungetc(q, stream);
|
|
|
|
break;
|
|
|
|
}
|
2018-06-20 23:32:02 +08:00
|
|
|
if (!(flags & FL_SPLAT)) {
|
|
|
|
sarg[length] = q;
|
|
|
|
}
|
|
|
|
length++;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
2018-06-20 23:32:02 +08:00
|
|
|
if (length == 0) {
|
2014-04-29 12:03:41 +08:00
|
|
|
bail = BAIL_EOF;
|
|
|
|
} else if (!(flags & FL_SPLAT)) {
|
2018-06-20 23:32:02 +08:00
|
|
|
sarg[length] = '\0'; // Terminate output
|
2007-03-08 04:03:40 +08:00
|
|
|
converted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case '[': // Character range
|
|
|
|
sarg = va_arg(ap, char *);
|
|
|
|
state = ST_MATCH_INIT;
|
|
|
|
matchinv = 0;
|
|
|
|
memset(matchmap, 0, sizeof matchmap);
|
|
|
|
break;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case '%': // %% sequence
|
2018-09-29 14:12:33 +08:00
|
|
|
if (fgetc(stream) != '%')
|
2007-03-08 04:03:40 +08:00
|
|
|
bail = BAIL_ERR;
|
|
|
|
break;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
default: // Anything else
|
2007-05-16 09:42:08 +08:00
|
|
|
bail = BAIL_ERR; // Unknown sequence
|
2007-03-08 04:03:40 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ST_MATCH_INIT: // Initial state for %[ match
|
|
|
|
if (ch == '^' && !(flags & FL_INV)) {
|
|
|
|
matchinv = 1;
|
|
|
|
} else {
|
|
|
|
SetBit(matchmap, static_cast<unsigned char>(ch));
|
|
|
|
state = ST_MATCH;
|
|
|
|
}
|
|
|
|
break;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case ST_MATCH: // Main state for %[ match
|
|
|
|
if (ch == ']') {
|
|
|
|
goto match_run;
|
|
|
|
} else if (ch == '-') {
|
|
|
|
range_start = static_cast<unsigned char>(ch);
|
|
|
|
state = ST_MATCH_RANGE;
|
|
|
|
} else {
|
|
|
|
SetBit(matchmap, static_cast<unsigned char>(ch));
|
|
|
|
}
|
|
|
|
break;
|
2007-05-16 09:42:08 +08:00
|
|
|
|
2007-03-08 04:03:40 +08:00
|
|
|
case ST_MATCH_RANGE: // %[ match after -
|
|
|
|
if (ch == ']') {
|
|
|
|
SetBit(matchmap, static_cast<unsigned char>('-'));
|
|
|
|
goto match_run;
|
|
|
|
} else {
|
|
|
|
int i;
|
|
|
|
for (i = range_start ; i < (static_cast<unsigned char>(ch)) ; i++)
|
|
|
|
SetBit(matchmap, i);
|
|
|
|
state = ST_MATCH;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
match_run: // Match expression finished
|
|
|
|
char* oarg = sarg;
|
|
|
|
while (width) {
|
|
|
|
q = fgetc(stream);
|
2019-03-26 14:55:08 +08:00
|
|
|
auto qc = static_cast<unsigned char>(q);
|
2007-03-08 04:03:40 +08:00
|
|
|
if (q <= 0 || !(TestBit(matchmap, qc)^matchinv)) {
|
|
|
|
ungetc(q, stream);
|
|
|
|
break;
|
|
|
|
}
|
2014-04-29 12:03:41 +08:00
|
|
|
if (!(flags & FL_SPLAT)) *sarg = q;
|
|
|
|
sarg++;
|
2007-03-08 04:03:40 +08:00
|
|
|
}
|
2014-04-29 12:03:41 +08:00
|
|
|
if (oarg == sarg) {
|
|
|
|
bail = (q <= 0) ? BAIL_EOF : BAIL_ERR;
|
|
|
|
} else if (!(flags & FL_SPLAT)) {
|
2007-03-08 04:03:40 +08:00
|
|
|
*sarg = '\0';
|
|
|
|
converted++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bail == BAIL_EOF && !converted)
|
|
|
|
converted = -1; // Return EOF (-1)
|
|
|
|
|
|
|
|
return converted;
|
|
|
|
}
|
|
|
|
|
2014-04-24 07:12:53 +08:00
|
|
|
#ifdef EMBEDDED
|
2014-04-25 09:25:42 +08:00
|
|
|
int creat(const char *pathname, mode_t mode) {
|
2007-03-08 04:03:40 +08:00
|
|
|
return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode);
|
|
|
|
}
|
2012-03-01 20:40:04 +08:00
|
|
|
|
2012-03-02 07:19:35 +08:00
|
|
|
#endif // EMBEDDED
|