mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-11 12:43:17 +08:00
fix bug in UTF-16BE conversion
This commit is contained in:
parent
41918a452a
commit
85f8a98c93
@ -419,7 +419,7 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
|||||||
for (int i = 0; i < unicodes.length(); i++) {
|
for (int i = 0; i < unicodes.length(); i++) {
|
||||||
int code = unicodes[i];
|
int code = unicodes[i];
|
||||||
// Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16
|
// Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16
|
||||||
if ((code > 0xD7FF && code < 0xE0000) || code > 0x10FFFF) {
|
if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) {
|
||||||
tprintf("Dropping invalid codepoint %d\n", code);
|
tprintf("Dropping invalid codepoint %d\n", code);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user