mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-04 18:29:06 +08:00
A better fix to read unichars. Imbue C locale always since on different systems, default locale will give different results.
This commit is contained in:
parent
0364832ab8
commit
0c7139ce09
@ -53,14 +53,10 @@ void Classify::ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs) {
|
|||||||
char line[kMaxLineSize];
|
char line[kMaxLineSize];
|
||||||
while (fp->FGets(line, kMaxLineSize) != nullptr) {
|
while (fp->FGets(line, kMaxLineSize) != nullptr) {
|
||||||
std::string Class;
|
std::string Class;
|
||||||
auto p = line;
|
|
||||||
while (*p != ' ' && p - line < kMaxLineSize)
|
|
||||||
Class.push_back(*p++);
|
|
||||||
CLASS_ID ClassId;
|
CLASS_ID ClassId;
|
||||||
// do not use stream to extract Class as it may contain unicode spaces (0xA0)
|
std::istringstream stream(line);
|
||||||
// they are eaten by stream, but they are a part of Class
|
stream.imbue(std::locale::classic());
|
||||||
std::istringstream stream(p);
|
stream >> Class >> Cutoff;
|
||||||
stream >> Cutoff;
|
|
||||||
if (stream.fail()) {
|
if (stream.fail()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -208,6 +208,7 @@ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) {
|
|||||||
char line[kMaxLineSize];
|
char line[kMaxLineSize];
|
||||||
while (fp->FGets(line, kMaxLineSize) != nullptr) {
|
while (fp->FGets(line, kMaxLineSize) != nullptr) {
|
||||||
std::istringstream stream(line);
|
std::istringstream stream(line);
|
||||||
|
stream.imbue(std::locale::classic());
|
||||||
stream >> unichar >> NumProtos;
|
stream >> unichar >> NumProtos;
|
||||||
if (stream.fail()) {
|
if (stream.fail()) {
|
||||||
continue;
|
continue;
|
||||||
|
Loading…
Reference in New Issue
Block a user