fixes #388 by using raw bytes utf8 encoding

This commit is contained in:
jm 2016-08-15 18:11:01 +02:00
parent 5610738be9
commit 941e1c4c84

View File

@ -52,7 +52,7 @@ static const int kDefaultOutputResolution = 300;
// Word joiner (U+2060) inserted after letters in ngram mode, as per
// recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at
// hyphens and other non-alpha characters.
static const char* kWordJoinerUTF8 = "\u2060";
static const char* kWordJoinerUTF8 = "\xE2\x81\xA0"; //u8"\u2060";
static const char32 kWordJoiner = 0x2060;
static bool IsCombiner(int ch) {