Merge pull request #897 from rfschtkt/RAII

RAII
This commit is contained in:
zdenop 2017-05-11 16:06:44 +02:00 committed by GitHub
commit 9b998a7e0d
9 changed files with 64 additions and 72 deletions

View File

@ -46,6 +46,7 @@
#include <string>
#include <iterator>
#include <fstream>
#include <memory> // std::unique_ptr
#include "allheaders.h"
@ -1267,9 +1268,8 @@ char* TessBaseAPI::GetUTF8Text() {
ResultIterator *it = GetIterator();
do {
if (it->Empty(RIL_PARA)) continue;
char *para_text = it->GetUTF8Text(RIL_PARA);
text += para_text;
delete []para_text;
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));
char* result = new char[text.length() + 1];
strncpy(result, text.string(), text.length() + 1);
@ -1393,6 +1393,7 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
* Image name/input_file_ can be set by SetInputName before calling
* GetHOCRText
* STL removed from original patch submission and refactored by rays.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetHOCRText(int page_number) {
return GetHOCRText(NULL, page_number);
@ -1405,6 +1406,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
* Image name/input_file_ can be set by SetInputName before calling
* GetHOCRText
* STL removed from original patch submission and refactored by rays.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
@ -1539,11 +1541,10 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (bold) hocr_str += "<strong>";
if (italic) hocr_str += "<em>";
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
hocr_str += HOcrEscape(grapheme);
hocr_str += HOcrEscape(grapheme.get());
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (italic) hocr_str += "</em>";
@ -1576,6 +1577,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetTSVText(int page_number) {
if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
@ -1661,7 +1663,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
do {
tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
tsv_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
tsv_str += "\n"; // end of row
@ -1700,8 +1702,9 @@ const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 +
/**
* The recognized text is returned as a char* which is coded
* as a UTF8 box file and must be freed with the delete [] operator.
* as a UTF8 box file.
* page_number is a 0-base page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetBoxText(int page_number) {
if (tesseract_ == NULL ||
@ -1718,7 +1721,7 @@ char* TessBaseAPI::GetBoxText(int page_number) {
do {
int left, top, right, bottom;
if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
char* text = it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr</*non-const*/ char[]> text(it->GetUTF8Text(RIL_SYMBOL));
// Tesseract uses space for recognition failure. Fix to a reject
// character, kTesseractReject so we don't create illegal box files.
for (int i = 0; text[i] != '\0'; ++i) {
@ -1727,10 +1730,9 @@ char* TessBaseAPI::GetBoxText(int page_number) {
}
snprintf(result + output_length, total_length - output_length,
"%s %d %d %d %d %d\n",
text, left, image_height_ - bottom,
text.get(), left, image_height_ - bottom,
right, image_height_ - top, page_number);
output_length += strlen(result + output_length);
delete [] text;
// Just in case...
if (output_length + kMaxBytesPerLine > total_length)
break;
@ -1755,8 +1757,8 @@ const int kLatinChs[] = {
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes
* and must be freed with the delete [] operator.
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char* TessBaseAPI::GetUNLVText() {
if (tesseract_ == NULL ||
@ -1981,9 +1983,9 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
PageSegMode current_psm = GetPageSegMode();
SetPageSegMode(mode);
SetVariable("classify_enable_learning", "0");
char* text = GetUTF8Text();
const std::unique_ptr<const char[]> text(GetUTF8Text());
if (debug) {
tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
tprintf("Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
}
if (text != NULL) {
PAGE_RES_IT it(page_res_);
@ -2023,7 +2025,6 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
tesseract_->EnableLearning = true;
tesseract_->LearnWord(NULL, word_res);
}
delete [] text;
} else {
success = false;
}

View File

@ -591,6 +591,7 @@ class TESS_API TessBaseAPI {
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
@ -598,28 +599,30 @@ class TESS_API TessBaseAPI {
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* GetHOCRText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char* GetTSVText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training. Returned string must be freed with
* the delete [] operator.
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char* GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes
* and must be freed with the delete [] operator.
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char* GetUNLVText();

View File

@ -20,6 +20,7 @@
#include "config_auto.h"
#endif
#include <memory> // std::unique_ptr
#include "allheaders.h"
#include "baseapi.h"
#include "math.h"
@ -460,10 +461,10 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
STRING pdf_word("");
int pdf_word_len = 0;
do {
const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != '\0') {
GenericVector<int> unicodes;
UNICHAR::UTF8ToUnicode(grapheme, &unicodes);
UNICHAR::UTF8ToUnicode(grapheme.get(), &unicodes);
char utf16[kMaxBytesPerCodepoint];
for (int i = 0; i < unicodes.length(); i++) {
int code = unicodes[i];
@ -473,7 +474,6 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api,
}
}
}
delete []grapheme;
res_it->Next(RIL_SYMBOL);
} while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
if (word_length > 0 && pdf_word_len > 0 && fontsize > 0) {
@ -570,14 +570,13 @@ bool TessPDFRenderer::BeginDocumentHandler() {
// CIDTOGIDMAP
const int kCIDToGIDMapSize = 2 * (1 << 16);
unsigned char *cidtogidmap = new unsigned char[kCIDToGIDMapSize];
const std::unique_ptr</*non-const*/ unsigned char[]> cidtogidmap(new unsigned char[kCIDToGIDMapSize]);
for (int i = 0; i < kCIDToGIDMapSize; i++) {
cidtogidmap[i] = (i % 2) ? 1 : 0;
}
size_t len;
unsigned char *comp =
zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len);
delete[] cidtogidmap;
zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
n = snprintf(buf, sizeof(buf),
"5 0 obj\n"
"<<\n"
@ -670,10 +669,9 @@ bool TessPDFRenderer::BeginDocumentHandler() {
fseek(fp, 0, SEEK_END);
long int size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *buffer = new char[size];
if (fread(buffer, 1, size, fp) != size) {
const std::unique_ptr</*non-const*/ char[]> buffer(new char[size]);
if (fread(buffer.get(), 1, size, fp) != size) {
fclose(fp);
delete[] buffer;
return false;
}
fclose(fp);
@ -686,13 +684,11 @@ bool TessPDFRenderer::BeginDocumentHandler() {
">>\n"
"stream\n", size, size);
if (n >= sizeof(buf)) {
delete[] buffer;
return false;
}
AppendString(buf);
objsize = strlen(buf);
AppendData(buffer, size);
delete[] buffer;
AppendData(buffer.get(), size);
objsize += size;
AppendString(endstream_endobj);
objsize += strlen(endstream_endobj);
@ -887,12 +883,11 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
AppendPDFObject(buf);
// CONTENTS
char* pdftext = GetPDFTextObjects(api, width, height);
long pdftext_len = strlen(pdftext);
unsigned char *pdftext_casted = reinterpret_cast<unsigned char *>(pdftext);
const std::unique_ptr</*non-const*/ char[]> pdftext(GetPDFTextObjects(api, width, height));
const long pdftext_len = strlen(pdftext.get());
size_t len;
unsigned char *comp_pdftext =
zlibCompress(pdftext_casted, pdftext_len, &len);
zlibCompress(reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
long comp_pdftext_len = len;
n = snprintf(buf, sizeof(buf),
"%ld 0 obj\n"
@ -901,7 +896,6 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
">>\n"
"stream\n", obj_, comp_pdftext_len);
if (n >= sizeof(buf)) {
delete[] pdftext;
lept_free(comp_pdftext);
return false;
}
@ -910,7 +904,6 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
objsize += comp_pdftext_len;
lept_free(comp_pdftext);
delete[] pdftext;
const char *b2 =
"endstream\n"
"endobj\n";

View File

@ -19,6 +19,7 @@
#include "config_auto.h"
#endif
#include <memory> // std::unique_ptr
#include <string.h>
#include "baseapi.h"
#include "genericvector.h"
@ -122,13 +123,12 @@ TessTextRenderer::TessTextRenderer(const char *outputbase)
}
bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
char* utf8 = api->GetUTF8Text();
const std::unique_ptr<const char[]> utf8(api->GetUTF8Text());
if (utf8 == NULL) {
return false;
}
AppendString(utf8);
delete[] utf8;
AppendString(utf8.get());
bool pageBreak = false;
api->GetBoolVariable("include_page_breaks", &pageBreak);
@ -186,11 +186,10 @@ bool TessHOcrRenderer::EndDocumentHandler() {
}
bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
char* hocr = api->GetHOCRText(imagenum());
const std::unique_ptr<const char[]> hocr(api->GetHOCRText(imagenum()));
if (hocr == NULL) return false;
AppendString(hocr);
delete[] hocr;
AppendString(hocr.get());
return true;
}
@ -219,11 +218,10 @@ bool TessTsvRenderer::BeginDocumentHandler() {
bool TessTsvRenderer::EndDocumentHandler() { return true; }
bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) {
char* tsv = api->GetTSVText(imagenum());
const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum()));
if (tsv == NULL) return false;
AppendString(tsv);
delete[] tsv;
AppendString(tsv.get());
return true;
}
@ -236,11 +234,10 @@ TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
}
bool TessUnlvRenderer::AddImageHandler(TessBaseAPI* api) {
char* unlv = api->GetUNLVText();
const std::unique_ptr<const char[]> unlv(api->GetUNLVText());
if (unlv == NULL) return false;
AppendString(unlv);
delete[] unlv;
AppendString(unlv.get());
return true;
}
@ -253,11 +250,10 @@ TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase)
}
bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
char* text = api->GetBoxText(imagenum());
const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum()));
if (text == NULL) return false;
AppendString(text);
delete[] text;
AppendString(text.get());
return true;
}

View File

@ -21,6 +21,7 @@
#endif
#include <ctype.h>
#include <memory> // std::unique_ptr
#include "genericvector.h"
#include "helpers.h"
@ -2446,8 +2447,8 @@ void InitializeRowInfo(bool after_recognition,
return;
}
info->text = "";
char *text = it.GetUTF8Text(RIL_TEXTLINE);
int trailing_ws_idx = strlen(text); // strip trailing space
const std::unique_ptr<const char[]> text(it.GetUTF8Text(RIL_TEXTLINE));
int trailing_ws_idx = strlen(text.get()); // strip trailing space
while (trailing_ws_idx > 0 &&
// isspace() only takes ASCII
((text[trailing_ws_idx - 1] & 0x80) == 0) &&
@ -2460,7 +2461,6 @@ void InitializeRowInfo(bool after_recognition,
for (int i = 0; i < trailing_ws_idx; i++)
info->text += text[i];
}
delete []text;
if (info->text.size() == 0) {
return;

View File

@ -18,6 +18,7 @@
**********************************************************************/
#include <stdlib.h>
#include <memory> // std::unique_ptr
#include "blckerr.h"
#include "ocrblock.h"
#include "stepblob.h"
@ -380,9 +381,8 @@ void BLOCK::compute_row_margins() {
TBOX row_box = row->bounding_box();
int left_y = row->base_line(row_box.left()) + row->x_height();
int left_margin;
ICOORDELT_LIST *segments = lines.get_line(left_y);
LeftMargin(segments, row_box.left(), &left_margin);
delete segments;
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_left(lines.get_line(left_y));
LeftMargin(segments_left.get(), row_box.left(), &left_margin);
if (row_box.top() >= drop_cap_bottom) {
int drop_cap_distance = row_box.left() - row->space() - drop_cap_right;
@ -394,9 +394,8 @@ void BLOCK::compute_row_margins() {
int right_y = row->base_line(row_box.right()) + row->x_height();
int right_margin;
segments = lines.get_line(right_y);
RightMargin(segments, row_box.right(), &right_margin);
delete segments;
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments_right(lines.get_line(right_y));
RightMargin(segments_right.get(), row_box.right(), &right_margin);
row->set_lmargin(left_margin);
row->set_rmargin(right_margin);
}

View File

@ -18,6 +18,7 @@
**********************************************************************/
#include <stdlib.h>
#include <memory> // std::unique_ptr
#include "allheaders.h"
#include "blckerr.h"
#include "pdblock.h"
@ -140,9 +141,9 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) {
// rasterized interior. (Runs of interior pixels on a line.)
PB_LINE_IT *lines = new PB_LINE_IT(&image_block);
for (int y = box.bottom(); y < box.top(); ++y) {
ICOORDELT_LIST* segments = lines->get_line(y);
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line(y));
if (!segments->empty()) {
ICOORDELT_IT s_it(segments);
ICOORDELT_IT s_it(segments.get());
// Each element of segments is a start x and x size of the
// run of interior pixels.
for (s_it.mark_cycle_pt(); !s_it.cycled_list(); s_it.forward()) {
@ -154,7 +155,6 @@ Pix* PDBLK::render_mask(const FCOORD& rerotation, TBOX* mask_box) {
xext, 1, PIX_SET, NULL, 0, 0);
}
}
delete segments;
}
delete lines;
} else {

View File

@ -20,6 +20,7 @@
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <memory> // std::unique_ptr
#include "elst.h"
#include "polyblk.h"
@ -273,7 +274,6 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) {
inT16 y;
inT16 width;
PB_LINE_IT *lines;
ICOORDELT_LIST *segments;
ICOORDELT_IT s_it;
lines = new PB_LINE_IT (this);
@ -281,9 +281,9 @@ void POLY_BLOCK::fill(ScrollView* window, ScrollView::Color colour) {
for (y = this->bounding_box ()->bottom ();
y <= this->bounding_box ()->top (); y++) {
segments = lines->get_line (y);
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line (y));
if (!segments->empty ()) {
s_it.set_to_list (segments);
s_it.set_to_list (segments.get());
for (s_it.mark_cycle_pt (); !s_it.cycled_list (); s_it.forward ()) {
// Note different use of ICOORDELT, x coord is x coord of pixel
// at the start of line segment, y coord is length of line segment

View File

@ -19,6 +19,8 @@
#include "scanedg.h"
#include <memory> // std::unique_ptr
#include "allheaders.h"
#include "edgloop.h"
@ -93,7 +95,6 @@ void make_margins( //get a line
inT16 y //line coord
) {
PB_LINE_IT *lines;
ICOORDELT_LIST *segments; //bits of a line
ICOORDELT_IT seg_it;
inT32 start; //of segment
inT16 xext; //of segment
@ -101,9 +102,9 @@ void make_margins( //get a line
if (block->poly_block () != NULL) {
lines = new PB_LINE_IT (block->poly_block ());
segments = lines->get_line (y);
const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(lines->get_line (y));
if (!segments->empty ()) {
seg_it.set_to_list (segments);
seg_it.set_to_list (segments.get());
seg_it.mark_cycle_pt ();
start = seg_it.data ()->x ();
xext = seg_it.data ()->y ();
@ -122,7 +123,6 @@ void make_margins( //get a line
for (xindex = left; xindex < right; xindex++)
pixels[xindex - left] = margin;
}
delete segments;
delete lines;
}
else {