RAII: ResultIterator::GetUTF8Text(): was leaked inside TessBaseAPI::GetUTF8Text()

2025-01-18 06:30:14 +08:00 · 2017-05-10 11:06:29 +02:00 · 2017-05-10 11:06:29 +02:00 · 4840c65bf0
commit 4840c65bf0
parent 3454061334
3 changed files with 12 additions and 13 deletions
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@ -46,6 +46,7 @@
 #include <string>
 #include <iterator>
 #include <fstream>
+#include <memory> // std::unique_ptr

 #include "allheaders.h"

@ -1267,9 +1268,8 @@ char* TessBaseAPI::GetUTF8Text() {
  ResultIterator *it = GetIterator();
  do {
    if (it->Empty(RIL_PARA)) continue;
-    char *para_text = it->GetUTF8Text(RIL_PARA);
-    text += para_text;
-    delete []para_text;
+    const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
+    text += para_text.get();
  } while (it->Next(RIL_PARA));
  char* result = new char[text.length() + 1];
  strncpy(result, text.string(), text.length() + 1);
@ -1539,11 +1539,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
    if (bold) hocr_str += "<strong>";
    if (italic) hocr_str += "<em>";
    do {
-      const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
+      const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
      if (grapheme && grapheme[0] != 0) {
-        hocr_str += HOcrEscape(grapheme);
+        hocr_str += HOcrEscape(grapheme.get());
      }
-      delete []grapheme;
      res_it->Next(RIL_SYMBOL);
    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
    if (italic) hocr_str += "</em>";
@ -1661,7 +1660,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
    if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;

    do {
-      tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
+      tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
      res_it->Next(RIL_SYMBOL);
    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
    tsv_str += "\n";  // end of row
--- a/api/pdfrenderer.cpp
+++ b/api/pdfrenderer.cpp
@ -20,6 +20,7 @@
 #include "config_auto.h"
 #endif

+#include <memory> // std::unique_ptr
 #include "allheaders.h"
 #include "baseapi.h"
 #include "math.h"
@ -460,10 +461,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
    STRING pdf_word("");
    int pdf_word_len = 0;
    do {
-      const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
+      const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
      if (grapheme && grapheme[0] != '\0') {
        GenericVector<int> unicodes;
-        UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
+        UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes);
        char utf16[kMaxBytesPerCodepoint];
        for (int i = 0; i < unicodes.length(); i++) {
          int code = unicodes[i];
@ -473,7 +474,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
          }
        }
      }
-      delete []grapheme;
      res_it->Next(RIL_SYMBOL);
    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
    if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) {
--- a/ccmain/paragraphs.cpp
+++ b/ccmain/paragraphs.cpp
@ -21,6 +21,7 @@
 #endif

 #include <ctype.h>
+#include <memory> // std::unique_ptr

 #include "genericvector.h"
 #include "helpers.h"
@ -2446,8 +2447,8 @@ void InitializeRowInfo(bool after_recognition,
    return;
  }
  info->text = "";
-  char *text = it.GetUTF8Text(RIL_TEXTLINE);
-  int trailing_ws_idx = strlen(text);  // strip trailing space
+  const std::unique_ptr<const char[]> text(it.GetUTF8Text(RIL_TEXTLINE));
+  int trailing_ws_idx = strlen(text.get());  // strip trailing space
  while (trailing_ws_idx > 0 &&
         // isspace() only takes ASCII
         ((text[trailing_ws_idx - 1] & 0x80) == 0) &&
@ -2460,7 +2461,6 @@ void InitializeRowInfo(bool after_recognition,
    for (int i = 0; i < trailing_ws_idx; i++)
      info->text += text[i];
  }
-  delete []text;

  if (info->text.size() == 0) {
    return;