RAII: ResultIterator::GetUTF8Text(): was leaked inside TessBaseAPI::GetUTF8Text()

This commit is contained in:
Raf Schietekat 2017-05-10 11:06:29 +02:00
parent 3454061334
commit 4840c65bf0
3 changed files with 12 additions and 13 deletions

View File

@ -46,6 +46,7 @@
#include <string>
#include <iterator>
#include <fstream>
#include <memory> // std::unique_ptr
#include "allheaders.h"
@ -1267,9 +1268,8 @@ char* TessBaseAPI::GetUTF8Text() {
ResultIterator *it = GetIterator();
do {
if (it->Empty(RIL_PARA)) continue;
char *para_text = it->GetUTF8Text(RIL_PARA);
text += para_text;
delete []para_text;
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));
char* result = new char[text.length() + 1];
strncpy(result, text.string(), text.length() + 1);
@ -1539,11 +1539,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (bold) hocr_str += "<strong>";
if (italic) hocr_str += "<em>";
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
hocr_str += HOcrEscape(grapheme);
hocr_str += HOcrEscape(grapheme.get());
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic) hocr_str += "</em>";
@ -1661,7 +1660,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
do {
tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
tsv_str += "\n"; // end of row

View File

@ -20,6 +20,7 @@
#include "config_auto.h"
#endif
#include <memory> // std::unique_ptr
#include "allheaders.h"
#include "baseapi.h"
#include "math.h"
@ -460,10 +461,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
STRING pdf_word("");
int pdf_word_len = 0;
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != '\0') {
GenericVector<int> unicodes;
UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes);
char utf16[kMaxBytesPerCodepoint];
for (int i = 0; i < unicodes.length(); i++) {
int code = unicodes[i];
@ -473,7 +474,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
}
}
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) {

View File

@ -21,6 +21,7 @@
#endif
#include <ctype.h>
#include <memory> // std::unique_ptr
#include "genericvector.h"
#include "helpers.h"
@ -2446,8 +2447,8 @@ void InitializeRowInfo(bool after_recognition,
return;
}
info->text = "";
char *text = it.GetUTF8Text(RIL_TEXTLINE);
int trailing_ws_idx = strlen(text); // strip trailing space
const std::unique_ptr<const char[]> text(it.GetUTF8Text(RIL_TEXTLINE));
int trailing_ws_idx = strlen(text.get()); // strip trailing space
while (trailing_ws_idx > 0 &&
// isspace() only takes ASCII
((text[trailing_ws_idx - 1] & 0x80) == 0) &&
@ -2460,7 +2461,6 @@ void InitializeRowInfo(bool after_recognition,
for (int i = 0; i < trailing_ws_idx; i++)
info->text += text[i];
}
delete []text;
if (info->text.size() == 0) {
return;