mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
fixes #388 by using raw bytes utf8 encoding
This commit is contained in:
parent
5610738be9
commit
941e1c4c84
@ -52,7 +52,7 @@ static const int kDefaultOutputResolution = 300;
|
||||
// Word joiner (U+2060) inserted after letters in ngram mode, as per
|
||||
// recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at
|
||||
// hyphens and other non-alpha characters.
|
||||
static const char* kWordJoinerUTF8 = "\u2060";
|
||||
static const char* kWordJoinerUTF8 = "\xE2\x81\xA0"; //u8"\u2060";
|
||||
static const char32 kWordJoiner = 0x2060;
|
||||
|
||||
static bool IsCombiner(int ch) {
|
||||
|
Loading…
Reference in New Issue
Block a user