Replace more STRING by std::string

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-03-13 14:56:05 +01:00
parent 51909d5a2e
commit 9cf5b9870d
5 changed files with 32 additions and 16 deletions

View File

@ -32,6 +32,23 @@ namespace tesseract {
class TFile;
const std::vector<std::string> split(const std::string &s, char c) {
std::string buff;
std::vector<std::string> v;
for (auto n : s) {
if (n != c)
buff += n;
else if (n == c && !buff.empty()) {
v.push_back(buff);
buff.clear();
}
}
if (!buff.empty()) {
v.push_back(buff);
}
return v;
}
class STRING : public std::string {
public:
using std::string::string;

View File

@ -348,8 +348,8 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
// Converts an array of labels to utf-8, whether or not the labels are
// augmented with character boundaries.
STRING LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
STRING result;
std::string LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
std::string result;
int end = 1;
for (int start = 0; start < labels.size(); start = end) {
if (labels[start] == null_char_) {

View File

@ -248,7 +248,7 @@ public:
// Converts an array of labels to utf-8, whether or not the labels are
// augmented with character boundaries.
STRING DecodeLabels(const std::vector<int> &labels);
std::string DecodeLabels(const std::vector<int> &labels);
// Displays the forward results in a window with the characters and
// boundaries as determined by the labels and label_coords.

View File

@ -224,8 +224,8 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr);
base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
STRING truth_text = DecodeLabels(truth_labels);
STRING ocr_text = DecodeLabels(ocr_labels);
std::string truth_text = DecodeLabels(truth_labels);
std::string ocr_text = DecodeLabels(ocr_labels);
double baseline_error = ComputeWordError(&truth_text, &ocr_text);
results->add_str_double("0,0=", baseline_error);
@ -239,8 +239,8 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
// This is destructive on both strings.
double word_error = ComputeWordError(&truth_text, &ocr_text);
if ((r == min_dict_ratio && c == min_cert_offset) || !std::isfinite(word_error)) {
STRING t = DecodeLabels(truth_labels);
STRING o = DecodeLabels(ocr_labels);
std::string t = DecodeLabels(truth_labels);
std::string o = DecodeLabels(ocr_labels);
tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, t.c_str(), o.c_str(),
word_error, truth_labels[0]);
}
@ -870,8 +870,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
tprintf("Input width was %d\n", inputs.Width());
return UNENCODABLE;
}
STRING ocr_text = DecodeLabels(ocr_labels);
STRING truth_text = DecodeLabels(truth_labels);
std::string ocr_text = DecodeLabels(ocr_labels);
std::string truth_text = DecodeLabels(truth_labels);
targets->SubtractAllFromFloat(*fwd_outputs);
if (debug_interval_ != 0) {
if (truth_text != ocr_text) {
@ -1029,7 +1029,7 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
const NetworkIO &fwd_outputs,
const std::vector<int> &truth_labels,
const NetworkIO &outputs) {
const STRING &truth_text = DecodeLabels(truth_labels);
const std::string &truth_text = DecodeLabels(truth_labels);
if (truth_text.c_str() == nullptr || truth_text.length() <= 0) {
tprintf("Empty truth string at decode time!\n");
return false;
@ -1039,7 +1039,7 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
std::vector<int> labels;
std::vector<int> xcoords;
LabelsFromOutputs(outputs, &labels, &xcoords);
STRING text = DecodeLabels(labels);
std::string text = DecodeLabels(labels);
tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(), truth_text.c_str());
if (truth_text != text) {
tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), text.c_str());
@ -1214,13 +1214,12 @@ double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,
// Computes word recall error rate using a very simple bag of words algorithm.
// NOTE that this is destructive on both input strings.
double LSTMTrainer::ComputeWordError(STRING *truth_str, STRING *ocr_str) {
double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_str) {
using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
std::vector<STRING> truth_words, ocr_words;
truth_str->split(' ', &truth_words);
std::vector<std::string> truth_words = split(*truth_str, ' ');
if (truth_words.empty())
return 0.0;
ocr_str->split(' ', &ocr_words);
std::vector<std::string> ocr_words = split(*ocr_str, ' ');
StrMap word_counts;
for (auto truth_word : truth_words) {
std::string truth_word_string(truth_word.c_str());

View File

@ -365,7 +365,7 @@ protected:
double ComputeCharError(const std::vector<int> &truth_str, const std::vector<int> &ocr_str);
// Computes a very simple bag of words word recall error rate.
// NOTE that this is destructive on both input strings.
double ComputeWordError(STRING *truth_str, STRING *ocr_str);
double ComputeWordError(std::string *truth_str, std::string *ocr_str);
// Updates the error buffer and corresponding mean of the given type with
// the new_error.