mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Support for Sgaw and W Pwo Karen languages in the Myanmar validator. (#4065)
1. Added 0x102c and 0x1062 in the tone mark section, in Karen these can be tones too. 2. Added the optional 0x103a, 0x1037, and 0x1038 after the tones. Asat is part of the Sgaw tone mark and dot below and visarga are used as nasal marks following the Pwo tones.
This commit is contained in:
parent
9422915eb7
commit
ed69e574a9
@ -140,13 +140,21 @@ bool ValidateMyanmar::ConsumeOptionalSignsIfPresent() {
|
||||
}
|
||||
// Tone mark extensions.
|
||||
ch = codes_[codes_used_].second;
|
||||
if (ch == 0x1038 || ch == kMyanmarAsat || ch == 0x1063 || ch == 0x1064 ||
|
||||
if (ch == 0x102c || ch == 0x1038 || ch == kMyanmarAsat || (0x1062 <= ch && ch <= 0x1064) ||
|
||||
(0x1069 <= ch && ch <= 0x106d) || (0x1087 <= ch && ch <= 0x108d) || ch == 0x108f ||
|
||||
ch == 0x109a || ch == 0x109b || (0xaa7b <= ch && ch <= 0xaa7d)) {
|
||||
if (UseMultiCode(1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Sgaw tones 0x1062, 0x1063 must be followed by asat.
|
||||
// W Pwo tones 0x1069, 0x106a, and 0x106b may be followed by dot below or visarga (nasal).
|
||||
ch = codes_[codes_used_].second;
|
||||
if (ch == 0x103a || ch == 0x1037 || ch == 0x1038) {
|
||||
if (UseMultiCode(1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user