Replace strcpy and strncpy by new inline helper function

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2024-05-23 23:27:50 +02:00
parent ea82f919a6
commit c5b0c2f421
10 changed files with 33 additions and 55 deletions

View File

@ -14,6 +14,7 @@
// limitations under the License.
#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
@ -270,12 +271,9 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
alto_str << "\t\t\t</PrintSpace>\n"
<< "\t\t</Page>\n";
const std::string &text = alto_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
delete res_it;
return result;
return copy_string(alto_str.str());
}
} // namespace tesseract

View File

@ -33,7 +33,7 @@
#include "equationdetect.h" // for EquationDetect, destructor of equ_detect_
#endif // ndef DISABLED_LEGACY_ENGINE
#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for IntCastRounded, chomp_string
#include "helpers.h" // for IntCastRounded, chomp_string, copy_string
#include "host.h" // for MAX_PATH
#include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3, ...
#ifndef DISABLED_LEGACY_ENGINE
@ -1378,9 +1378,7 @@ char *TessBaseAPI::GetUTF8Text() {
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));
char *result = new char[text.length() + 1];
strncpy(result, text.c_str(), text.length() + 1);
return result;
return copy_string(text);
}
static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::string &text) {
@ -1509,9 +1507,7 @@ char *TessBaseAPI::GetTSVText(int page_number) {
#endif
}
char *ret = new char[tsv_str.length() + 1];
strcpy(ret, tsv_str.c_str());
return ret;
return copy_string(tsv_str);
}
/** The 5 numbers output for each box (the usual 4 and a page number.) */
@ -1759,10 +1755,7 @@ char *TessBaseAPI::GetOsdText(int page_number) {
<< "Orientation confidence: " << orient_conf << "\n"
<< "Script: " << script_name << "\n"
<< "Script confidence: " << script_conf << "\n";
const std::string &text = stream.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(stream.str());
}
#endif // ndef DISABLED_LEGACY_ENGINE

View File

@ -25,6 +25,7 @@
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract
namespace tesseract {
@ -480,10 +481,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
hocr_str << " </div>\n";
const std::string &text = hocr_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(hocr_str.str());
}
/**********************************************************************

View File

@ -18,6 +18,7 @@
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract
namespace tesseract {
@ -81,10 +82,8 @@ char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of PAGE
}
char *ret = new char[lstm_box_str.length() + 1];
strcpy(ret, lstm_box_str.c_str());
delete res_it;
return ret;
return copy_string(lstm_box_str);
}
/**********************************************************************

View File

@ -14,6 +14,7 @@
// limitations under the License.
#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
@ -1143,15 +1144,8 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
const std::string &text = reading_order_str.str();
reading_order_str.str("");
// Allocate memory for result to hold text.length() characters plus a null
// terminator Safely copy the string into result, ensuring no overflow strncpy
// does not necessarily null-terminate the destination, so do it manually
char *result = new char[text.length() + 1];
strncpy(result, text.c_str(), text.length());
result[text.length()] = '\0';
delete res_it;
return result;
return copy_string(text);
}
} // namespace tesseract

View File

@ -22,7 +22,7 @@
#include "pdf_ttf.h"
#include "tprintf.h"
#include "helpers.h" // for Swap
#include "helpers.h" // for Swap, copy_string
#include <allheaders.h>
#include <tesseract/baseapi.h>
@ -497,10 +497,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
pdf_str << "ET\n"; // end the text object
}
}
const std::string &text = pdf_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(pdf_str.str());
}
bool TessPDFRenderer::BeginDocumentHandler() {

View File

@ -18,6 +18,7 @@
#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract
namespace tesseract {
@ -80,10 +81,8 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
wordstr_box_str += "\n";
}
char *ret = new char[wordstr_box_str.length() + 1];
strcpy(ret, wordstr_box_str.c_str());
delete res_it;
return ret;
return copy_string(wordstr_box_str);
}
/**********************************************************************

View File

@ -19,6 +19,7 @@
#include <tesseract/ltrresultiterator.h>
#include "helpers.h" // for copy_string
#include "pageres.h"
#include "tesseractclass.h"
@ -76,10 +77,7 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
}
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
return copy_string(text);
}
// Set the string inserted at the end of each text line. "\n" by default.
@ -310,11 +308,7 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
if (!HasTruthString()) {
return nullptr;
}
std::string truth_text = it_->word()->blamer_bundle->TruthString();
int length = truth_text.length() + 1;
char *result = new char[length];
strncpy(result, truth_text.c_str(), length);
return result;
return copy_string(it_->word()->blamer_bundle->TruthString());
}
// Returns the null terminated UTF-8 encoded normalized OCR string for the
@ -330,10 +324,7 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
for (unsigned i = 0; i < best_choice->length(); ++i) {
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
}
auto length = ocr_text.length() + 1;
char *result = new char[length];
strncpy(result, ocr_text.c_str(), length);
return result;
return copy_string(ocr_text);
}
// Returns a pointer to serialized choice lattice.

View File

@ -20,6 +20,7 @@
#include <tesseract/resultiterator.h>
#include "helpers.h" // for copy_string
#include "pageres.h"
#include "tesseractclass.h"
#include "unicharset.h"
@ -681,10 +682,7 @@ char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
}
} break;
}
int length = text.length() + 1;
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
return copy_string(text);
}
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*ResultIterator::GetRawLSTMTimesteps() const {

View File

@ -35,6 +35,17 @@
namespace tesseract {
// Copy a std::string to a newly allocated char *.
// TODO: Remove this function once the related code has been converted
// to use std::string.
inline char *copy_string(const std::string &from) {
auto length = from.length();
char *target_string = new char[length + 1];
from.copy(target_string, length);
target_string[length] = '\0';
return target_string;
}
template <class T>
inline bool contains(const std::vector<T> &data, const T &value) {
return std::find(data.begin(), data.end(), value) != data.end();