A better fix to read unichars. Imbue C locale always since on different systems, default locale will give different results.

2024-12-04 01:39:16 +08:00 · 2021-01-04 20:36:21 +03:00 · 2021-01-04 20:36:21 +03:00 · 0c7139ce09
commit 0c7139ce09
parent 0364832ab8
2 changed files with 4 additions and 7 deletions
--- a/src/classify/cutoffs.cpp
+++ b/src/classify/cutoffs.cpp
@ -53,14 +53,10 @@ void Classify::ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs) {
  char line[kMaxLineSize];
  while (fp->FGets(line, kMaxLineSize) != nullptr) {
    std::string Class;
-    auto p = line;
-    while (*p != ' ' && p - line < kMaxLineSize)
-        Class.push_back(*p++);
    CLASS_ID ClassId;
-    // do not use stream to extract Class as it may contain unicode spaces (0xA0)
-    // they are eaten by stream, but they are a part of Class
-    std::istringstream stream(p);
-    stream >> Cutoff;
+    std::istringstream stream(line);
+    stream.imbue(std::locale::classic());
+    stream >> Class >> Cutoff;
    if (stream.fail()) {
      break;
    }
--- a/src/classify/normmatch.cpp
+++ b/src/classify/normmatch.cpp
@ -208,6 +208,7 @@ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) {
  char line[kMaxLineSize];
  while (fp->FGets(line, kMaxLineSize) != nullptr) {
    std::istringstream stream(line);
+    stream.imbue(std::locale::classic());
    stream >> unichar >> NumProtos;
    if (stream.fail()) {
      continue;