mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-21 08:43:03 +08:00
Replace more STRING by std::string
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
51909d5a2e
commit
9cf5b9870d
@ -32,6 +32,23 @@ namespace tesseract {
|
||||
|
||||
class TFile;
|
||||
|
||||
const std::vector<std::string> split(const std::string &s, char c) {
|
||||
std::string buff;
|
||||
std::vector<std::string> v;
|
||||
for (auto n : s) {
|
||||
if (n != c)
|
||||
buff += n;
|
||||
else if (n == c && !buff.empty()) {
|
||||
v.push_back(buff);
|
||||
buff.clear();
|
||||
}
|
||||
}
|
||||
if (!buff.empty()) {
|
||||
v.push_back(buff);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
class STRING : public std::string {
|
||||
public:
|
||||
using std::string::string;
|
||||
|
@ -348,8 +348,8 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo
|
||||
|
||||
// Converts an array of labels to utf-8, whether or not the labels are
|
||||
// augmented with character boundaries.
|
||||
STRING LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
|
||||
STRING result;
|
||||
std::string LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
|
||||
std::string result;
|
||||
int end = 1;
|
||||
for (int start = 0; start < labels.size(); start = end) {
|
||||
if (labels[start] == null_char_) {
|
||||
|
@ -248,7 +248,7 @@ public:
|
||||
|
||||
// Converts an array of labels to utf-8, whether or not the labels are
|
||||
// augmented with character boundaries.
|
||||
STRING DecodeLabels(const std::vector<int> &labels);
|
||||
std::string DecodeLabels(const std::vector<int> &labels);
|
||||
|
||||
// Displays the forward results in a window with the characters and
|
||||
// boundaries as determined by the labels and label_coords.
|
||||
|
@ -224,8 +224,8 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
|
||||
RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr);
|
||||
base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
|
||||
base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
|
||||
STRING truth_text = DecodeLabels(truth_labels);
|
||||
STRING ocr_text = DecodeLabels(ocr_labels);
|
||||
std::string truth_text = DecodeLabels(truth_labels);
|
||||
std::string ocr_text = DecodeLabels(ocr_labels);
|
||||
double baseline_error = ComputeWordError(&truth_text, &ocr_text);
|
||||
results->add_str_double("0,0=", baseline_error);
|
||||
|
||||
@ -239,8 +239,8 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
|
||||
// This is destructive on both strings.
|
||||
double word_error = ComputeWordError(&truth_text, &ocr_text);
|
||||
if ((r == min_dict_ratio && c == min_cert_offset) || !std::isfinite(word_error)) {
|
||||
STRING t = DecodeLabels(truth_labels);
|
||||
STRING o = DecodeLabels(ocr_labels);
|
||||
std::string t = DecodeLabels(truth_labels);
|
||||
std::string o = DecodeLabels(ocr_labels);
|
||||
tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, t.c_str(), o.c_str(),
|
||||
word_error, truth_labels[0]);
|
||||
}
|
||||
@ -870,8 +870,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
tprintf("Input width was %d\n", inputs.Width());
|
||||
return UNENCODABLE;
|
||||
}
|
||||
STRING ocr_text = DecodeLabels(ocr_labels);
|
||||
STRING truth_text = DecodeLabels(truth_labels);
|
||||
std::string ocr_text = DecodeLabels(ocr_labels);
|
||||
std::string truth_text = DecodeLabels(truth_labels);
|
||||
targets->SubtractAllFromFloat(*fwd_outputs);
|
||||
if (debug_interval_ != 0) {
|
||||
if (truth_text != ocr_text) {
|
||||
@ -1029,7 +1029,7 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
|
||||
const NetworkIO &fwd_outputs,
|
||||
const std::vector<int> &truth_labels,
|
||||
const NetworkIO &outputs) {
|
||||
const STRING &truth_text = DecodeLabels(truth_labels);
|
||||
const std::string &truth_text = DecodeLabels(truth_labels);
|
||||
if (truth_text.c_str() == nullptr || truth_text.length() <= 0) {
|
||||
tprintf("Empty truth string at decode time!\n");
|
||||
return false;
|
||||
@ -1039,7 +1039,7 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
|
||||
std::vector<int> labels;
|
||||
std::vector<int> xcoords;
|
||||
LabelsFromOutputs(outputs, &labels, &xcoords);
|
||||
STRING text = DecodeLabels(labels);
|
||||
std::string text = DecodeLabels(labels);
|
||||
tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(), truth_text.c_str());
|
||||
if (truth_text != text) {
|
||||
tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), text.c_str());
|
||||
@ -1214,13 +1214,12 @@ double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,
|
||||
|
||||
// Computes word recall error rate using a very simple bag of words algorithm.
|
||||
// NOTE that this is destructive on both input strings.
|
||||
double LSTMTrainer::ComputeWordError(STRING *truth_str, STRING *ocr_str) {
|
||||
double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_str) {
|
||||
using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
|
||||
std::vector<STRING> truth_words, ocr_words;
|
||||
truth_str->split(' ', &truth_words);
|
||||
std::vector<std::string> truth_words = split(*truth_str, ' ');
|
||||
if (truth_words.empty())
|
||||
return 0.0;
|
||||
ocr_str->split(' ', &ocr_words);
|
||||
std::vector<std::string> ocr_words = split(*ocr_str, ' ');
|
||||
StrMap word_counts;
|
||||
for (auto truth_word : truth_words) {
|
||||
std::string truth_word_string(truth_word.c_str());
|
||||
|
@ -365,7 +365,7 @@ protected:
|
||||
double ComputeCharError(const std::vector<int> &truth_str, const std::vector<int> &ocr_str);
|
||||
// Computes a very simple bag of words word recall error rate.
|
||||
// NOTE that this is destructive on both input strings.
|
||||
double ComputeWordError(STRING *truth_str, STRING *ocr_str);
|
||||
double ComputeWordError(std::string *truth_str, std::string *ocr_str);
|
||||
|
||||
// Updates the error buffer and corresponding mean of the given type with
|
||||
// the new_error.
|
||||
|
Loading…
Reference in New Issue
Block a user