mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-30 23:49:05 +08:00
Merge pull request #2437 from stweil/locale-fix
Fix some unittests with locale de_DE.UTF-8
This commit is contained in:
commit
b124a5f6ca
@ -41,7 +41,6 @@
|
||||
#include <unistd.h>
|
||||
#endif // _WIN32
|
||||
|
||||
#include <clocale> // for LC_ALL, LC_CTYPE, LC_NUMERIC
|
||||
#include <cmath> // for round, M_PI
|
||||
#include <cstdint> // for int32_t
|
||||
#include <cstring> // for strcmp, strcpy
|
||||
@ -209,13 +208,16 @@ TessBaseAPI::TessBaseAPI()
|
||||
rect_height_(0),
|
||||
image_width_(0),
|
||||
image_height_(0) {
|
||||
const char *locale;
|
||||
locale = std::setlocale(LC_ALL, nullptr);
|
||||
ASSERT_HOST(!strcmp(locale, "C") || !strcmp(locale, "C.UTF-8"));
|
||||
locale = std::setlocale(LC_CTYPE, nullptr);
|
||||
ASSERT_HOST(!strcmp(locale, "C") || !strcmp(locale, "C.UTF-8"));
|
||||
locale = std::setlocale(LC_NUMERIC, nullptr);
|
||||
ASSERT_HOST(!strcmp(locale, "C") || !strcmp(locale, "C.UTF-8"));
|
||||
#if defined(DEBUG)
|
||||
// The Tesseract executables would use the "C" locale by default,
|
||||
// but other software which is linked against the Tesseract library
|
||||
// typically uses the locale from the user's environment.
|
||||
// Here the default is overridden to allow debugging of potential
|
||||
// problems caused by the locale settings.
|
||||
|
||||
// Use the current locale if building debug code.
|
||||
std::locale::global(std::locale(""));
|
||||
#endif
|
||||
}
|
||||
|
||||
TessBaseAPI::~TessBaseAPI() {
|
||||
|
@ -22,6 +22,9 @@
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <iomanip> // for std::setw
|
||||
#include <locale> // for std::locale::classic
|
||||
#include <sstream> // for std::istringstream, std::ostringstream
|
||||
|
||||
#include "params.h"
|
||||
#include "serialis.h"
|
||||
@ -705,18 +708,24 @@ bool UNICHARSET::save_to_string(STRING *str) const {
|
||||
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
|
||||
this->get_script_from_script_id(this->get_script(id)),
|
||||
this->get_other_case(id));
|
||||
*str += buffer;
|
||||
} else {
|
||||
snprintf(buffer, kFileBufSize,
|
||||
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
|
||||
this->id_to_unichar(id), properties,
|
||||
min_bottom, max_bottom, min_top, max_top, width, width_sd,
|
||||
bearing, bearing_sd, advance, advance_sd,
|
||||
this->get_script_from_script_id(this->get_script(id)),
|
||||
this->get_other_case(id), this->get_direction(id),
|
||||
this->get_mirror(id), this->get_normed_unichar(id),
|
||||
this->debug_str(id).string());
|
||||
std::ostringstream stream;
|
||||
stream.imbue(std::locale::classic());
|
||||
stream << this->id_to_unichar(id) << ' ' << properties << ' ' <<
|
||||
min_bottom << ',' << max_bottom << ',' <<
|
||||
min_top << ',' << max_top << ',' <<
|
||||
width << ',' << width_sd << ',' <<
|
||||
bearing << ',' << bearing_sd << ',' <<
|
||||
advance << ',' << advance_sd << ' ' <<
|
||||
this->get_script_from_script_id(this->get_script(id)) << ' ' <<
|
||||
this->get_other_case(id) << ' ' <<
|
||||
this->get_direction(id) << ' ' <<
|
||||
this->get_mirror(id) << ' ' <<
|
||||
this->get_normed_unichar(id) << "\t# " <<
|
||||
this->debug_str(id).string() << '\n';
|
||||
*str += stream.str().c_str();
|
||||
}
|
||||
*str += buffer;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -815,41 +824,64 @@ bool UNICHARSET::load_via_fgets(
|
||||
float advance = 0.0f;
|
||||
float advance_sd = 0.0f;
|
||||
// TODO(eger): check that this default it ok
|
||||
// after enabling BiDi iterator for Arabic+Cube.
|
||||
// after enabling BiDi iterator for Arabic.
|
||||
int direction = UNICHARSET::U_LEFT_TO_RIGHT;
|
||||
UNICHAR_ID other_case = id;
|
||||
UNICHAR_ID mirror = id;
|
||||
char normed[64];
|
||||
int v = -1;
|
||||
if (fgets_cb->Run(buffer, sizeof (buffer)) == nullptr ||
|
||||
((v = sscanf(buffer,
|
||||
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
|
||||
unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
&width, &width_sd, &bearing, &bearing_sd,
|
||||
&advance, &advance_sd, script, &other_case,
|
||||
&direction, &mirror, normed)) != 17 &&
|
||||
(v = sscanf(buffer,
|
||||
"%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
|
||||
unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
&width, &width_sd, &bearing, &bearing_sd,
|
||||
&advance, &advance_sd, script, &other_case,
|
||||
&direction, &mirror)) != 16 &&
|
||||
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
|
||||
unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
script, &other_case, &direction, &mirror)) != 10 &&
|
||||
(v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties,
|
||||
&min_bottom, &max_bottom, &min_top, &max_top,
|
||||
script, &other_case)) != 8 &&
|
||||
(v = sscanf(buffer, "%s %x %63s %d", unichar, &properties,
|
||||
script, &other_case)) != 4 &&
|
||||
(v = sscanf(buffer, "%s %x %63s",
|
||||
unichar, &properties, script)) != 3 &&
|
||||
(v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) {
|
||||
UNICHAR_ID other_case = unicharset_size;
|
||||
UNICHAR_ID mirror = unicharset_size;
|
||||
if (fgets_cb->Run(buffer, sizeof (buffer)) == nullptr) {
|
||||
return false;
|
||||
}
|
||||
char normed[64];
|
||||
normed[0] = '\0';
|
||||
std::istringstream stream(buffer);
|
||||
stream.imbue(std::locale::classic());
|
||||
// 标 1 0,255,0,255,0,0,0,0,0,0 Han 68 0 68 标 # 标 [6807 ]x
|
||||
//stream.flags(std::ios::hex);
|
||||
stream >> std::setw(255) >> unichar >> std::hex >> properties >> std::dec;
|
||||
//stream.flags(std::ios::dec);
|
||||
if (stream.fail()) {
|
||||
fprintf(stderr, "%s:%u failed\n", __FILE__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
auto position = stream.tellg();
|
||||
stream.seekg(position);
|
||||
char c1, c2, c3, c4, c5, c6, c7, c8, c9;
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >>
|
||||
width >> c5 >>width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >>
|
||||
advance >> c9 >> advance_sd >> std::setw(63) >> script >>
|
||||
other_case >> direction >> mirror >> std::setw(63) >> normed;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' ||
|
||||
c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >>
|
||||
width >> c5 >>width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >>
|
||||
advance >> c9 >> advance_sd >> std::setw(63) >> script >>
|
||||
other_case >> direction >> mirror;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' ||
|
||||
c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >>
|
||||
std::setw(63) >> script >> other_case >> direction >> mirror;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >>
|
||||
std::setw(63) >> script >> other_case;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> std::setw(63) >> script >> other_case;
|
||||
if (stream.fail()) {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> std::setw(63) >> script;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Skip fragments if needed.
|
||||
CHAR_FRAGMENT *frag = nullptr;
|
||||
@ -880,9 +912,9 @@ bool UNICHARSET::load_via_fgets(
|
||||
this->set_advance_stats(id, advance, advance_sd);
|
||||
this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
|
||||
this->set_other_case(
|
||||
id, (v > 3 && other_case < unicharset_size) ? other_case : id);
|
||||
this->set_mirror(id, (v > 8 && mirror < unicharset_size) ? mirror : id);
|
||||
this->set_normed(id, (v>16) ? normed : unichar);
|
||||
id, (other_case < unicharset_size) ? other_case : id);
|
||||
this->set_mirror(id, (mirror < unicharset_size) ? mirror : id);
|
||||
this->set_normed(id, normed[0] != '\0' ? normed : unichar);
|
||||
}
|
||||
post_load_setup();
|
||||
return true;
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <sstream> // for std::istringstream
|
||||
|
||||
#include "classify.h"
|
||||
#include "clusttool.h"
|
||||
@ -113,7 +114,7 @@ float Classify::ComputeNormMatch(CLASS_ID ClassId,
|
||||
feature.Params[CharNormRx] * 8000.0 +
|
||||
feature.Params[CharNormRy] *
|
||||
feature.Params[CharNormRy] * 8000.0);
|
||||
return (1.0 - NormEvidenceOf (Match));
|
||||
return (1.0 - NormEvidenceOf(Match));
|
||||
}
|
||||
|
||||
BestMatch = FLT_MAX;
|
||||
@ -209,7 +210,11 @@ NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) {
|
||||
const int kMaxLineSize = 100;
|
||||
char line[kMaxLineSize];
|
||||
while (fp->FGets(line, kMaxLineSize) != nullptr) {
|
||||
if (sscanf(line, "%s %d", unichar, &NumProtos) != 2) continue;
|
||||
std::istringstream stream(line);
|
||||
stream >> unichar >> NumProtos;
|
||||
if (stream.fail()) {
|
||||
continue;
|
||||
}
|
||||
if (unicharset.contains_unichar(unichar)) {
|
||||
unichar_id = unicharset.unichar_to_id(unichar);
|
||||
Protos = NormProtos->Protos[unichar_id];
|
||||
|
Loading…
Reference in New Issue
Block a user