Always use isascii() with isspace()

isspace() must only used with an unsigned char or EOF argument,
and even then its result can depend on the current locale settings.

While this is not a problem for C/C++ executables which use the default
"C" locale, it becomes a problem when the Tesseract API is called from
languages like Python or Java which don't use the "C" locale.

By calling isasci() before calling isspace() this uncertainty can be
avoided, because any locale will hopefully give identical results for
the basic ASCII character set.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2018-10-08 17:24:14 +02:00
parent 59ebd58fcc
commit dcd0377bf0
3 changed files with 10 additions and 11 deletions

View File

@ -2455,7 +2455,7 @@ static void InitializeRowInfo(bool after_recognition,
int trailing_ws_idx = strlen(text.get()); // strip trailing space int trailing_ws_idx = strlen(text.get()); // strip trailing space
while (trailing_ws_idx > 0 && while (trailing_ws_idx > 0 &&
// isspace() only takes ASCII // isspace() only takes ASCII
((text[trailing_ws_idx - 1] & 0x80) == 0) && isascii(text[trailing_ws_idx - 1]) &&
isspace(text[trailing_ws_idx - 1])) isspace(text[trailing_ws_idx - 1]))
trailing_ws_idx--; trailing_ws_idx--;
if (trailing_ws_idx > 0) { if (trailing_ws_idx > 0) {

View File

@ -75,7 +75,7 @@ inline size_t LongBit() {
static inline int static inline int
SkipSpace(FILE *s) { SkipSpace(FILE *s) {
int p; int p;
while (isspace(p = fgetc(s))); while (isascii(p = fgetc(s)) && isspace(p));
ungetc(p, s); // Make sure next char is available for reading ungetc(p, s); // Make sure next char is available for reading
return p; return p;
} }
@ -108,9 +108,7 @@ static uintmax_t streamtoumax(FILE* s, int base) {
uintmax_t v = 0; uintmax_t v = 0;
int d, c = 0; int d, c = 0;
for (c = fgetc(s); for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s));
isspace(static_cast<unsigned char>(c)) && (c != EOF);
c = fgetc(s)) {}
// Single optional + or - // Single optional + or -
if (c == '-' || c == '+') { if (c == '-' || c == '+') {
@ -151,9 +149,7 @@ static double streamtofloat(FILE* s) {
int k = 1; int k = 1;
int w = 0; int w = 0;
for (c = fgetc(s); for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s));
isspace(static_cast<unsigned char>(c)) && (c != EOF);
c = fgetc(s));
// Single optional + or - // Single optional + or -
if (c == '-' || c == '+') { if (c == '-' || c == '+') {
@ -265,7 +261,7 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) {
if (ch == '%') { if (ch == '%') {
state = ST_FLAGS; state = ST_FLAGS;
flags = 0; rank = RANK_INT; width = UINT_MAX; flags = 0; rank = RANK_INT; width = UINT_MAX;
} else if (isspace(static_cast<unsigned char>(ch))) { } else if (isascii(ch) && isspace(ch)) {
SkipSpace(stream); SkipSpace(stream);
} else { } else {
if (fgetc(stream) != ch) if (fgetc(stream) != ch)
@ -445,7 +441,7 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) {
unsigned length = 0; unsigned length = 0;
while (width--) { while (width--) {
q = fgetc(stream); q = fgetc(stream);
if (isspace(static_cast<unsigned char>(q)) || q <= 0) { if (isascii(q) && isspace(q) || q <= 0) {
ungetc(q, stream); ungetc(q, stream);
break; break;
} }

View File

@ -58,7 +58,10 @@ bool ParamsModel::ParseLine(char *line, char** key, float *val) {
if (line[0] == '#') if (line[0] == '#')
return false; return false;
int end_of_key = 0; int end_of_key = 0;
while (line[end_of_key] && !isspace(line[end_of_key])) end_of_key++; while (line[end_of_key] &&
!(isascii(line[end_of_key]) && isspace(line[end_of_key]))) {
end_of_key++;
}
if (!line[end_of_key]) { if (!line[end_of_key]) {
tprintf("ParamsModel::Incomplete line %s\n", line); tprintf("ParamsModel::Incomplete line %s\n", line);
return false; return false;