mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 09:52:40 +08:00
fix bug in UTF-16BE conversion
This commit is contained in:
parent
41918a452a
commit
85f8a98c93
@ -419,7 +419,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
for (int i = 0; i < unicodes.length(); i++) {
|
||||
int code = unicodes[i];
|
||||
// Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16
|
||||
if ((code > 0xD7FF && code < 0xE0000) || code > 0x10FFFF) {
|
||||
if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) {
|
||||
tprintf("Dropping invalid codepoint %d\n", code);
|
||||
continue;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user