mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 12:49:35 +08:00
Replace strcpy and strncpy by new inline helper function
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
ea82f919a6
commit
c5b0c2f421
@ -14,6 +14,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#include "helpers.h" // for copy_string
|
||||
#ifdef _WIN32
|
||||
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
||||
#endif
|
||||
@ -270,12 +271,9 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {
|
||||
|
||||
alto_str << "\t\t\t</PrintSpace>\n"
|
||||
<< "\t\t</Page>\n";
|
||||
const std::string &text = alto_str.str();
|
||||
|
||||
char *result = new char[text.length() + 1];
|
||||
strcpy(result, text.c_str());
|
||||
delete res_it;
|
||||
return result;
|
||||
return copy_string(alto_str.str());
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include "equationdetect.h" // for EquationDetect, destructor of equ_detect_
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#include "helpers.h" // for IntCastRounded, chomp_string
|
||||
#include "helpers.h" // for IntCastRounded, chomp_string, copy_string
|
||||
#include "host.h" // for MAX_PATH
|
||||
#include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3, ...
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -1378,9 +1378,7 @@ char *TessBaseAPI::GetUTF8Text() {
|
||||
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
|
||||
text += para_text.get();
|
||||
} while (it->Next(RIL_PARA));
|
||||
char *result = new char[text.length() + 1];
|
||||
strncpy(result, text.c_str(), text.length() + 1);
|
||||
return result;
|
||||
return copy_string(text);
|
||||
}
|
||||
|
||||
static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::string &text) {
|
||||
@ -1509,9 +1507,7 @@ char *TessBaseAPI::GetTSVText(int page_number) {
|
||||
#endif
|
||||
}
|
||||
|
||||
char *ret = new char[tsv_str.length() + 1];
|
||||
strcpy(ret, tsv_str.c_str());
|
||||
return ret;
|
||||
return copy_string(tsv_str);
|
||||
}
|
||||
|
||||
/** The 5 numbers output for each box (the usual 4 and a page number.) */
|
||||
@ -1759,10 +1755,7 @@ char *TessBaseAPI::GetOsdText(int page_number) {
|
||||
<< "Orientation confidence: " << orient_conf << "\n"
|
||||
<< "Script: " << script_name << "\n"
|
||||
<< "Script confidence: " << script_conf << "\n";
|
||||
const std::string &text = stream.str();
|
||||
char *result = new char[text.length() + 1];
|
||||
strcpy(result, text.c_str());
|
||||
return result;
|
||||
return copy_string(stream.str());
|
||||
}
|
||||
|
||||
#endif // ndef DISABLED_LEGACY_ENGINE
|
||||
|
@ -25,6 +25,7 @@
|
||||
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
||||
#endif
|
||||
#include <tesseract/renderer.h>
|
||||
#include "helpers.h" // for copy_string
|
||||
#include "tesseractclass.h" // for Tesseract
|
||||
|
||||
namespace tesseract {
|
||||
@ -480,10 +481,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
}
|
||||
hocr_str << " </div>\n";
|
||||
|
||||
const std::string &text = hocr_str.str();
|
||||
char *result = new char[text.length() + 1];
|
||||
strcpy(result, text.c_str());
|
||||
return result;
|
||||
return copy_string(hocr_str.str());
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include <tesseract/baseapi.h> // for TessBaseAPI
|
||||
#include <tesseract/renderer.h>
|
||||
#include "helpers.h" // for copy_string
|
||||
#include "tesseractclass.h" // for Tesseract
|
||||
|
||||
namespace tesseract {
|
||||
@ -81,10 +82,8 @@ char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
|
||||
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
|
||||
lstm_box_str += "\n"; // end of PAGE
|
||||
}
|
||||
char *ret = new char[lstm_box_str.length() + 1];
|
||||
strcpy(ret, lstm_box_str.c_str());
|
||||
delete res_it;
|
||||
return ret;
|
||||
return copy_string(lstm_box_str);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
|
@ -14,6 +14,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#include "helpers.h" // for copy_string
|
||||
#ifdef _WIN32
|
||||
# include "host.h" // windows.h for MultiByteToWideChar, ...
|
||||
#endif
|
||||
@ -1143,15 +1144,8 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
|
||||
const std::string &text = reading_order_str.str();
|
||||
reading_order_str.str("");
|
||||
|
||||
// Allocate memory for result to hold text.length() characters plus a null
|
||||
// terminator Safely copy the string into result, ensuring no overflow strncpy
|
||||
// does not necessarily null-terminate the destination, so do it manually
|
||||
char *result = new char[text.length() + 1];
|
||||
strncpy(result, text.c_str(), text.length());
|
||||
result[text.length()] = '\0';
|
||||
|
||||
delete res_it;
|
||||
return result;
|
||||
return copy_string(text);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
#include "pdf_ttf.h"
|
||||
#include "tprintf.h"
|
||||
#include "helpers.h" // for Swap
|
||||
#include "helpers.h" // for Swap, copy_string
|
||||
|
||||
#include <allheaders.h>
|
||||
#include <tesseract/baseapi.h>
|
||||
@ -497,10 +497,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
|
||||
pdf_str << "ET\n"; // end the text object
|
||||
}
|
||||
}
|
||||
const std::string &text = pdf_str.str();
|
||||
char *result = new char[text.length() + 1];
|
||||
strcpy(result, text.c_str());
|
||||
return result;
|
||||
return copy_string(pdf_str.str());
|
||||
}
|
||||
|
||||
bool TessPDFRenderer::BeginDocumentHandler() {
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include <tesseract/baseapi.h> // for TessBaseAPI
|
||||
#include <tesseract/renderer.h>
|
||||
#include "helpers.h" // for copy_string
|
||||
#include "tesseractclass.h" // for Tesseract
|
||||
|
||||
namespace tesseract {
|
||||
@ -80,10 +81,8 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
|
||||
wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
|
||||
wordstr_box_str += "\n";
|
||||
}
|
||||
char *ret = new char[wordstr_box_str.length() + 1];
|
||||
strcpy(ret, wordstr_box_str.c_str());
|
||||
delete res_it;
|
||||
return ret;
|
||||
return copy_string(wordstr_box_str);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <tesseract/ltrresultiterator.h>
|
||||
|
||||
#include "helpers.h" // for copy_string
|
||||
#include "pageres.h"
|
||||
#include "tesseractclass.h"
|
||||
|
||||
@ -76,10 +77,7 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
}
|
||||
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
|
||||
}
|
||||
int length = text.length() + 1;
|
||||
char *result = new char[length];
|
||||
strncpy(result, text.c_str(), length);
|
||||
return result;
|
||||
return copy_string(text);
|
||||
}
|
||||
|
||||
// Set the string inserted at the end of each text line. "\n" by default.
|
||||
@ -310,11 +308,7 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
|
||||
if (!HasTruthString()) {
|
||||
return nullptr;
|
||||
}
|
||||
std::string truth_text = it_->word()->blamer_bundle->TruthString();
|
||||
int length = truth_text.length() + 1;
|
||||
char *result = new char[length];
|
||||
strncpy(result, truth_text.c_str(), length);
|
||||
return result;
|
||||
return copy_string(it_->word()->blamer_bundle->TruthString());
|
||||
}
|
||||
|
||||
// Returns the null terminated UTF-8 encoded normalized OCR string for the
|
||||
@ -330,10 +324,7 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
|
||||
for (unsigned i = 0; i < best_choice->length(); ++i) {
|
||||
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
|
||||
}
|
||||
auto length = ocr_text.length() + 1;
|
||||
char *result = new char[length];
|
||||
strncpy(result, ocr_text.c_str(), length);
|
||||
return result;
|
||||
return copy_string(ocr_text);
|
||||
}
|
||||
|
||||
// Returns a pointer to serialized choice lattice.
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <tesseract/resultiterator.h>
|
||||
|
||||
#include "helpers.h" // for copy_string
|
||||
#include "pageres.h"
|
||||
#include "tesseractclass.h"
|
||||
#include "unicharset.h"
|
||||
@ -681,10 +682,7 @@ char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
|
||||
}
|
||||
} break;
|
||||
}
|
||||
int length = text.length() + 1;
|
||||
char *result = new char[length];
|
||||
strncpy(result, text.c_str(), length);
|
||||
return result;
|
||||
return copy_string(text);
|
||||
}
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*ResultIterator::GetRawLSTMTimesteps() const {
|
||||
|
@ -35,6 +35,17 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Copy a std::string to a newly allocated char *.
|
||||
// TODO: Remove this function once the related code has been converted
|
||||
// to use std::string.
|
||||
inline char *copy_string(const std::string &from) {
|
||||
auto length = from.length();
|
||||
char *target_string = new char[length + 1];
|
||||
from.copy(target_string, length);
|
||||
target_string[length] = '\0';
|
||||
return target_string;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool contains(const std::vector<T> &data, const T &value) {
|
||||
return std::find(data.begin(), data.end(), value) != data.end();
|
||||
|
Loading…
Reference in New Issue
Block a user