mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Always use isascii() with isspace()
isspace() must only used with an unsigned char or EOF argument, and even then its result can depend on the current locale settings. While this is not a problem for C/C++ executables which use the default "C" locale, it becomes a problem when the Tesseract API is called from languages like Python or Java which don't use the "C" locale. By calling isasci() before calling isspace() this uncertainty can be avoided, because any locale will hopefully give identical results for the basic ASCII character set. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
59ebd58fcc
commit
dcd0377bf0
@ -2455,7 +2455,7 @@ static void InitializeRowInfo(bool after_recognition,
|
||||
int trailing_ws_idx = strlen(text.get()); // strip trailing space
|
||||
while (trailing_ws_idx > 0 &&
|
||||
// isspace() only takes ASCII
|
||||
((text[trailing_ws_idx - 1] & 0x80) == 0) &&
|
||||
isascii(text[trailing_ws_idx - 1]) &&
|
||||
isspace(text[trailing_ws_idx - 1]))
|
||||
trailing_ws_idx--;
|
||||
if (trailing_ws_idx > 0) {
|
||||
|
@ -75,7 +75,7 @@ inline size_t LongBit() {
|
||||
static inline int
|
||||
SkipSpace(FILE *s) {
|
||||
int p;
|
||||
while (isspace(p = fgetc(s)));
|
||||
while (isascii(p = fgetc(s)) && isspace(p));
|
||||
ungetc(p, s); // Make sure next char is available for reading
|
||||
return p;
|
||||
}
|
||||
@ -108,9 +108,7 @@ static uintmax_t streamtoumax(FILE* s, int base) {
|
||||
uintmax_t v = 0;
|
||||
int d, c = 0;
|
||||
|
||||
for (c = fgetc(s);
|
||||
isspace(static_cast<unsigned char>(c)) && (c != EOF);
|
||||
c = fgetc(s)) {}
|
||||
for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s));
|
||||
|
||||
// Single optional + or -
|
||||
if (c == '-' || c == '+') {
|
||||
@ -151,9 +149,7 @@ static double streamtofloat(FILE* s) {
|
||||
int k = 1;
|
||||
int w = 0;
|
||||
|
||||
for (c = fgetc(s);
|
||||
isspace(static_cast<unsigned char>(c)) && (c != EOF);
|
||||
c = fgetc(s));
|
||||
for (c = fgetc(s); isascii(c) && isspace(c); c = fgetc(s));
|
||||
|
||||
// Single optional + or -
|
||||
if (c == '-' || c == '+') {
|
||||
@ -265,7 +261,7 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) {
|
||||
if (ch == '%') {
|
||||
state = ST_FLAGS;
|
||||
flags = 0; rank = RANK_INT; width = UINT_MAX;
|
||||
} else if (isspace(static_cast<unsigned char>(ch))) {
|
||||
} else if (isascii(ch) && isspace(ch)) {
|
||||
SkipSpace(stream);
|
||||
} else {
|
||||
if (fgetc(stream) != ch)
|
||||
@ -445,7 +441,7 @@ static int tvfscanf(FILE* stream, const char *format, va_list ap) {
|
||||
unsigned length = 0;
|
||||
while (width--) {
|
||||
q = fgetc(stream);
|
||||
if (isspace(static_cast<unsigned char>(q)) || q <= 0) {
|
||||
if (isascii(q) && isspace(q) || q <= 0) {
|
||||
ungetc(q, stream);
|
||||
break;
|
||||
}
|
||||
|
@ -58,7 +58,10 @@ bool ParamsModel::ParseLine(char *line, char** key, float *val) {
|
||||
if (line[0] == '#')
|
||||
return false;
|
||||
int end_of_key = 0;
|
||||
while (line[end_of_key] && !isspace(line[end_of_key])) end_of_key++;
|
||||
while (line[end_of_key] &&
|
||||
!(isascii(line[end_of_key]) && isspace(line[end_of_key]))) {
|
||||
end_of_key++;
|
||||
}
|
||||
if (!line[end_of_key]) {
|
||||
tprintf("ParamsModel::Incomplete line %s\n", line);
|
||||
return false;
|
||||
|
Loading…
Reference in New Issue
Block a user