From f1c6564cd70ba74db8e25155c26b6614e9be8e70 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 4 Jul 2019 15:42:48 +0200 Subject: [PATCH 1/2] Revert "fix read wrong tiff page." This reverts commit 75d230a7acc74a656b9d610d40fdd0649f55de36. That commit introduced new problems (memory leak, potential endless loop) and style issues. Signed-off-by: Stefan Weil --- src/api/baseapi.cpp | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 8f10e987..d5a44b2c 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1034,22 +1034,11 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; size_t offset = 0; for (; ; ++page) { - if (tessedit_page_number >= 0) { + if (tessedit_page_number >= 0) page = tessedit_page_number; - int pages_read = 0; - do { - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); - pages_read++; - } while (pix != nullptr && pages_read < (page + 1)); - } - else { - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); - } - if (pix == nullptr) { - break; - } + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); + if (pix == nullptr) break; tprintf("Page %d\n", page + 1); char page_str[kMaxIntSize]; snprintf(page_str, kMaxIntSize - 1, "%d", page); From 201ba0dd530f180ba430dbd63680c04d9eacddd9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 4 Jul 2019 15:56:57 +0200 Subject: [PATCH 2/2] Fix handling of single pages from multipage TIFF files (issue #2537) That case now uses Leptonica to deliver the desired image instead of using an inefficient loop in the Tesseract code. See commit 54fafc4e2e9e2941b643f6cef67a7ec7e0b8bb49 which used similar code in the past. Signed-off-by: Stefan Weil --- src/api/baseapi.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index d5a44b2c..344bb6f1 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1034,10 +1034,14 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; size_t offset = 0; for (; ; ++page) { - if (tessedit_page_number >= 0) + if (tessedit_page_number >= 0) { page = tessedit_page_number; - pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) - : pixReadFromMultipageTiff(filename, &offset); + pix = (data) ? pixReadMemTiff(data, size, page) + : pixReadTiff(filename, page); + } else { + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); + } if (pix == nullptr) break; tprintf("Page %d\n", page + 1); char page_str[kMaxIntSize];