mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
RAII: ResultIterator::GetUTF8Text(): was leaked inside TessBaseAPI::GetUTF8Text()
This commit is contained in:
parent
3454061334
commit
4840c65bf0
@ -46,6 +46,7 @@
|
||||
#include <string>
|
||||
#include <iterator>
|
||||
#include <fstream>
|
||||
#include <memory> // std::unique_ptr
|
||||
|
||||
#include "allheaders.h"
|
||||
|
||||
@ -1267,9 +1268,8 @@ char* TessBaseAPI::GetUTF8Text() {
|
||||
ResultIterator *it = GetIterator();
|
||||
do {
|
||||
if (it->Empty(RIL_PARA)) continue;
|
||||
char *para_text = it->GetUTF8Text(RIL_PARA);
|
||||
text += para_text;
|
||||
delete []para_text;
|
||||
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
|
||||
text += para_text.get();
|
||||
} while (it->Next(RIL_PARA));
|
||||
char* result = new char[text.length() + 1];
|
||||
strncpy(result, text.string(), text.length() + 1);
|
||||
@ -1539,11 +1539,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
|
||||
if (bold) hocr_str += "<strong>";
|
||||
if (italic) hocr_str += "<em>";
|
||||
do {
|
||||
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
|
||||
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
|
||||
if (grapheme && grapheme[0] != 0) {
|
||||
hocr_str += HOcrEscape(grapheme);
|
||||
hocr_str += HOcrEscape(grapheme.get());
|
||||
}
|
||||
delete []grapheme;
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
if (italic) hocr_str += "</em>";
|
||||
@ -1661,7 +1660,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
|
||||
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
|
||||
|
||||
do {
|
||||
tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
|
||||
tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
tsv_str += "\n"; // end of row
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include <memory> // std::unique_ptr
|
||||
#include "allheaders.h"
|
||||
#include "baseapi.h"
|
||||
#include "math.h"
|
||||
@ -460,10 +461,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
STRING pdf_word("");
|
||||
int pdf_word_len = 0;
|
||||
do {
|
||||
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
|
||||
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
|
||||
if (grapheme && grapheme[0] != '\0') {
|
||||
GenericVector<int> unicodes;
|
||||
UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
|
||||
UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes);
|
||||
char utf16[kMaxBytesPerCodepoint];
|
||||
for (int i = 0; i < unicodes.length(); i++) {
|
||||
int code = unicodes[i];
|
||||
@ -473,7 +474,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
|
||||
}
|
||||
}
|
||||
}
|
||||
delete []grapheme;
|
||||
res_it->Next(RIL_SYMBOL);
|
||||
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
|
||||
if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) {
|
||||
|
@ -21,6 +21,7 @@
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <memory> // std::unique_ptr
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "helpers.h"
|
||||
@ -2446,8 +2447,8 @@ void InitializeRowInfo(bool after_recognition,
|
||||
return;
|
||||
}
|
||||
info->text = "";
|
||||
char *text = it.GetUTF8Text(RIL_TEXTLINE);
|
||||
int trailing_ws_idx = strlen(text); // strip trailing space
|
||||
const std::unique_ptr<const char[]> text(it.GetUTF8Text(RIL_TEXTLINE));
|
||||
int trailing_ws_idx = strlen(text.get()); // strip trailing space
|
||||
while (trailing_ws_idx > 0 &&
|
||||
// isspace() only takes ASCII
|
||||
((text[trailing_ws_idx - 1] & 0x80) == 0) &&
|
||||
@ -2460,7 +2461,6 @@ void InitializeRowInfo(bool after_recognition,
|
||||
for (int i = 0; i < trailing_ws_idx; i++)
|
||||
info->text += text[i];
|
||||
}
|
||||
delete []text;
|
||||
|
||||
if (info->text.size() == 0) {
|
||||
return;
|
||||
|
Loading…
Reference in New Issue
Block a user