Merge pull request #3325 from stweil/string

Replace more STRING by std::string
2025-01-21 17:13:09 +08:00 · 2021-03-14 12:10:05 +03:00 · 2021-03-14 12:10:05 +03:00 · c2f3d565e8
commit c2f3d565e8
parent 59894099bc 7178ebd799
64 changed files with 493 additions and 521 deletions
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@ -67,7 +67,6 @@
 #include <tesseract/resultiterator.h> // for ResultIterator
 #include <tesseract/thresholder.h>    // for ImageThresholder
 #include "helpers.h"                  // for IntCastRounded, chomp_string
-#include "strngs.h"                   // for STRING

 #include <cmath>    // for round, M_PI
 #include <cstdint>  // for int32_t
@ -1295,13 +1294,13 @@ char *TessBaseAPI::GetUTF8Text() {
  return result;
 }

-static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, STRING *text) {
+static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::string &text) {
  int left, top, right, bottom;
  it->BoundingBox(level, &left, &top, &right, &bottom);
-  text->add_str_int("\t", left);
-  text->add_str_int("\t", top);
-  text->add_str_int("\t", right - left);
-  text->add_str_int("\t", bottom - top);
+  text += "\t" + std::to_string(left);
+  text += "\t" + std::to_string(top);
+  text += "\t" + std::to_string(right - left);
+  text += "\t" + std::to_string(bottom - top);
 }

 /**
@ -1316,23 +1315,22 @@ char *TessBaseAPI::GetTSVText(int page_number) {
  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
  int page_id = page_number + 1; // we use 1-based page numbers.

-  STRING tsv_str("");
-
  int page_num = page_id;
  int block_num = 0;
  int par_num = 0;
  int line_num = 0;
  int word_num = 0;

-  tsv_str.add_str_int("1\t", page_num); // level 1 - page
-  tsv_str.add_str_int("\t", block_num);
-  tsv_str.add_str_int("\t", par_num);
-  tsv_str.add_str_int("\t", line_num);
-  tsv_str.add_str_int("\t", word_num);
-  tsv_str.add_str_int("\t", rect_left_);
-  tsv_str.add_str_int("\t", rect_top_);
-  tsv_str.add_str_int("\t", rect_width_);
-  tsv_str.add_str_int("\t", rect_height_);
+  std::string tsv_str;
+  tsv_str += "1\t" + std::to_string(page_num); // level 1 - page
+  tsv_str += "\t" + std::to_string(block_num);
+  tsv_str += "\t" + std::to_string(par_num);
+  tsv_str += "\t" + std::to_string(line_num);
+  tsv_str += "\t" + std::to_string(word_num);
+  tsv_str += "\t" + std::to_string(rect_left_);
+  tsv_str += "\t" + std::to_string(rect_top_);
+  tsv_str += "\t" + std::to_string(rect_width_);
+  tsv_str += "\t" + std::to_string(rect_height_);
  tsv_str += "\t-1\t\n";

  ResultIterator *res_it = GetIterator();
@ -1348,35 +1346,35 @@ char *TessBaseAPI::GetTSVText(int page_number) {
      par_num = 0;
      line_num = 0;
      word_num = 0;
-      tsv_str.add_str_int("2\t", page_num); // level 2 - block
-      tsv_str.add_str_int("\t", block_num);
-      tsv_str.add_str_int("\t", par_num);
-      tsv_str.add_str_int("\t", line_num);
-      tsv_str.add_str_int("\t", word_num);
-      AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
+      tsv_str += "2\t" + std::to_string(page_num); // level 2 - block
+      tsv_str += "\t" + std::to_string(block_num);
+      tsv_str += "\t" + std::to_string(par_num);
+      tsv_str += "\t" + std::to_string(line_num);
+      tsv_str += "\t" + std::to_string(word_num);
+      AddBoxToTSV(res_it, RIL_BLOCK, tsv_str);
      tsv_str += "\t-1\t\n"; // end of row for block
    }
    if (res_it->IsAtBeginningOf(RIL_PARA)) {
      par_num++;
      line_num = 0;
      word_num = 0;
-      tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
-      tsv_str.add_str_int("\t", block_num);
-      tsv_str.add_str_int("\t", par_num);
-      tsv_str.add_str_int("\t", line_num);
-      tsv_str.add_str_int("\t", word_num);
-      AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
+      tsv_str += "3\t" + std::to_string(page_num); // level 3 - paragraph
+      tsv_str += "\t" + std::to_string(block_num);
+      tsv_str += "\t" + std::to_string(par_num);
+      tsv_str += "\t" + std::to_string(line_num);
+      tsv_str += "\t" + std::to_string(word_num);
+      AddBoxToTSV(res_it, RIL_PARA, tsv_str);
      tsv_str += "\t-1\t\n"; // end of row for para
    }
    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
      line_num++;
      word_num = 0;
-      tsv_str.add_str_int("4\t", page_num); // level 4 - line
-      tsv_str.add_str_int("\t", block_num);
-      tsv_str.add_str_int("\t", par_num);
-      tsv_str.add_str_int("\t", line_num);
-      tsv_str.add_str_int("\t", word_num);
-      AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
+      tsv_str += "4\t" + std::to_string(page_num); // level 4 - line
+      tsv_str += "\t" + std::to_string(block_num);
+      tsv_str += "\t" + std::to_string(par_num);
+      tsv_str += "\t" + std::to_string(line_num);
+      tsv_str += "\t" + std::to_string(word_num);
+      AddBoxToTSV(res_it, RIL_TEXTLINE, tsv_str);
      tsv_str += "\t-1\t\n"; // end of row for line
    }

@ -1384,16 +1382,16 @@ char *TessBaseAPI::GetTSVText(int page_number) {
    int left, top, right, bottom;
    res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
    word_num++;
-    tsv_str.add_str_int("5\t", page_num); // level 5 - word
-    tsv_str.add_str_int("\t", block_num);
-    tsv_str.add_str_int("\t", par_num);
-    tsv_str.add_str_int("\t", line_num);
-    tsv_str.add_str_int("\t", word_num);
-    tsv_str.add_str_int("\t", left);
-    tsv_str.add_str_int("\t", top);
-    tsv_str.add_str_int("\t", right - left);
-    tsv_str.add_str_int("\t", bottom - top);
-    tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
+    tsv_str += "5\t" + std::to_string(page_num); // level 5 - word
+    tsv_str += "\t" + std::to_string(block_num);
+    tsv_str += "\t" + std::to_string(par_num);
+    tsv_str += "\t" + std::to_string(line_num);
+    tsv_str += "\t" + std::to_string(word_num);
+    tsv_str += "\t" + std::to_string(left);
+    tsv_str += "\t" + std::to_string(top);
+    tsv_str += "\t" + std::to_string(right - left);
+    tsv_str += "\t" + std::to_string(bottom - top);
+    tsv_str += "\t" + std::to_string(res_it->Confidence(RIL_WORD));
    tsv_str += "\t";

    // Increment counts if at end of block/paragraph/textline.
--- a/src/api/lstmboxrenderer.cpp
+++ b/src/api/lstmboxrenderer.cpp
@ -28,18 +28,18 @@ namespace tesseract {
 * Returned string must be freed with the delete [] operator.
 */
 static void AddBoxToLSTM(int right, int bottom, int top, int image_height, int page_num,
-                         STRING *text) {
-  text->add_str_int(" ", image_height - bottom);
-  text->add_str_int(" ", right + 5);
-  text->add_str_int(" ", image_height - top);
-  text->add_str_int(" ", page_num);
+                         std::string &text) {
+  text += " " + std::to_string(image_height - bottom);
+  text += " " + std::to_string(right + 5);
+  text += " " + std::to_string(image_height - top);
+  text += " " + std::to_string(page_num);
 }

 char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
    return nullptr;

-  STRING lstm_box_str("");
+  std::string lstm_box_str;
  bool first_word = true;
  int left = 0, top = 0, right = 0, bottom = 0;

@ -52,14 +52,14 @@ char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
    if (!first_word) {
      if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
        if (res_it->IsAtBeginningOf(RIL_WORD)) {
-          lstm_box_str.add_str_int("  ", left);
-          AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
+          lstm_box_str += "  " + std::to_string(left);
+          AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
          lstm_box_str += "\n"; // end of row for word
        }                       // word
      } else {
        if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-          lstm_box_str.add_str_int("\t ", left);
-          AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
+          lstm_box_str += "\t " + std::to_string(left);
+          AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
          lstm_box_str += "\n"; // end of row for line
        }                       // line
      }
@ -71,13 +71,13 @@ char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
      lstm_box_str += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
      res_it->Next(RIL_SYMBOL);
    } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
-    lstm_box_str.add_str_int(" ", left);
-    AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
+    lstm_box_str += " " + std::to_string(left);
+    AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
    lstm_box_str += "\n"; // end of row for symbol
  }
  if (!first_word) { // if first_word is true  => empty page
-    lstm_box_str.add_str_int("\t ", left);
-    AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
+    lstm_box_str += "\t " + std::to_string(left);
+    AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
    lstm_box_str += "\n"; // end of PAGE
  }
  char *ret = new char[lstm_box_str.length() + 1];
--- a/src/api/wordstrboxrenderer.cpp
+++ b/src/api/wordstrboxrenderer.cpp
@ -32,7 +32,7 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
    return nullptr;

-  STRING wordstr_box_str("");
+  std::string wordstr_box_str;
  int left = 0, top = 0, right = 0, bottom = 0;

  bool first_line = true;
@ -46,22 +46,22 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {

    if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
      if (!first_line) {
-        wordstr_box_str.add_str_int("\n\t ", right + 1);
-        wordstr_box_str.add_str_int(" ", image_height_ - bottom);
-        wordstr_box_str.add_str_int(" ", right + 5);
-        wordstr_box_str.add_str_int(" ", image_height_ - top);
-        wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
+        wordstr_box_str += "\n\t " + std::to_string(right + 1);
+        wordstr_box_str += " " + std::to_string(image_height_ - bottom);
+        wordstr_box_str += " " + std::to_string(right + 5);
+        wordstr_box_str += " " + std::to_string(image_height_ - top);
+        wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
        wordstr_box_str += "\n";
      } else {
        first_line = false;
      }
      // Use bounding box for whole line for WordStr
      res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
-      wordstr_box_str.add_str_int("WordStr ", left);
-      wordstr_box_str.add_str_int(" ", image_height_ - bottom);
-      wordstr_box_str.add_str_int(" ", right);
-      wordstr_box_str.add_str_int(" ", image_height_ - top);
-      wordstr_box_str.add_str_int(" ", page_number); // word
+      wordstr_box_str += "WordStr " + std::to_string(left);
+      wordstr_box_str += " " + std::to_string(image_height_ - bottom);
+      wordstr_box_str += " " + std::to_string(right);
+      wordstr_box_str += " " + std::to_string(image_height_ - top);
+      wordstr_box_str += " " + std::to_string(page_number); // word
      wordstr_box_str += " #";
    }
    do {
@ -72,11 +72,11 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
  }

  if (left != 0 && top != 0 && right != 0 && bottom != 0) {
-    wordstr_box_str.add_str_int("\n\t ", right + 1);
-    wordstr_box_str.add_str_int(" ", image_height_ - bottom);
-    wordstr_box_str.add_str_int(" ", right + 5);
-    wordstr_box_str.add_str_int(" ", image_height_ - top);
-    wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
+    wordstr_box_str += "\n\t " + std::to_string(right + 1);
+    wordstr_box_str += " " + std::to_string(image_height_ - bottom);
+    wordstr_box_str += " " + std::to_string(right + 5);
+    wordstr_box_str += " " + std::to_string(image_height_ - top);
+    wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
    wordstr_box_str += "\n";
  }
  char *ret = new char[wordstr_box_str.length() + 1];
--- a/src/ccmain/applybox.cpp
+++ b/src/ccmain/applybox.cpp
@ -111,7 +111,7 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
 PAGE_RES *Tesseract::ApplyBoxes(const char *filename, bool find_segmentation,
                                BLOCK_LIST *block_list) {
  std::vector<TBOX> boxes;
-  std::vector<STRING> texts, full_texts;
+  std::vector<std::string> texts, full_texts;
  if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts, nullptr)) {
    return nullptr; // Can't do it.
  }
@ -576,7 +576,7 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID> &target_text, W
  }
  word_res->correct_text.clear();
  for (int i = 0; i < target_text.size(); ++i) {
-    word_res->correct_text.push_back(STRING(unicharset.id_to_unichar(target_text[i])));
+    word_res->correct_text.push_back(unicharset.id_to_unichar(target_text[i]));
  }
  return true;
 }
@ -727,7 +727,7 @@ void Tesseract::ReportFailedBox(int boxfile_lineno, TBOX box, const char *box_ch

 /// Calls #LearnWord to extract features for labelled blobs within each word.
 /// Features are stored in an internal buffer.
-void Tesseract::ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res) {
+void Tesseract::ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res) {
  PAGE_RES_IT pr_it(page_res);
  int word_count = 0;
  for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
@ -747,8 +747,7 @@ void Tesseract::CorrectClassifyWords(PAGE_RES *page_res) {
    for (int i = 0; i < word_res->correct_text.size(); ++i) {
      // The part before the first space is the real ground truth, and the
      // rest is the bounding box location and page number.
-      std::vector<STRING> tokens;
-      word_res->correct_text[i].split(' ', &tokens);
+      std::vector<std::string> tokens = split(word_res->correct_text[i], ' ');
      UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].c_str());
      choice->append_unichar_id_space_allocated(char_id, word_res->best_state[i], 0.0f, 0.0f);
    }
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
@ -464,8 +464,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
    GenericVector<WERD_CHOICE *> overrides_word1;
    GenericVector<WERD_CHOICE *> overrides_word2;

-    const STRING orig_w1_str = w_prev->best_choice->unichar_string();
-    const STRING orig_w2_str = w->best_choice->unichar_string();
+    const auto orig_w1_str = w_prev->best_choice->unichar_string();
+    const auto orig_w2_str = w->best_choice->unichar_string();
    WERD_CHOICE prev_best(w->uch_set);
    {
      int w1start, w1end;
@ -539,8 +539,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
        }
        continue;
      }
-      const STRING new_w1_str = overrides_word1[best_idx]->unichar_string();
-      const STRING new_w2_str = overrides_word2[best_idx]->unichar_string();
+      const auto new_w1_str = overrides_word1[best_idx]->unichar_string();
+      const auto new_w2_str = overrides_word2[best_idx]->unichar_string();
      if (new_w1_str != orig_w1_str) {
        w_prev->ReplaceBestChoice(overrides_word1[best_idx]);
      }
@ -548,13 +548,13 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
        w->ReplaceBestChoice(overrides_word2[best_idx]);
      }
      if (tessedit_bigram_debug > 0) {
-        STRING choices_description;
+        std::string choices_description;
        int num_bigram_choices = overrides_word1.size() * overrides_word2.size();
        if (num_bigram_choices == 1) {
          choices_description = "This was the unique bigram choice.";
        } else {
          if (tessedit_bigram_debug > 1) {
-            STRING bigrams_list;
+            std::string bigrams_list;
            const int kMaxChoicesToPrint = 20;
            for (int i = 0; i < overrides_word1.size() && i < kMaxChoicesToPrint; i++) {
              if (i > 0) {
@ -568,7 +568,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
            choices_description += bigrams_list;
            choices_description += "}";
          } else {
-            choices_description.add_str_int("There were ", num_bigram_choices);
+            choices_description += "There were " + std::to_string(num_bigram_choices);
            choices_description += " compatible bigrams.";
          }
        }
@ -1079,11 +1079,11 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
                                            C_BLOB *blob,
                                            const GenericVector<C_OUTLINE *> &outlines,
                                            int num_outlines, std::vector<bool> *ok_outlines) {
-  STRING best_str;
+  std::string best_str;
  float target_cert = certainty_threshold;
  if (blob != nullptr) {
    float target_c2;
-    target_cert = ClassifyBlobAsWord(pass, pr_it, blob, &best_str, &target_c2);
+    target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2);
    if (debug_noise_removal) {
      tprintf("No Noise blob classified as %s=%g(%g) at:", best_str.c_str(), target_cert,
              target_c2);
@ -1093,9 +1093,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
  }
  std::vector<bool> test_outlines = *ok_outlines;
  // Start with all the outlines in.
-  STRING all_str;
+  std::string all_str;
  std::vector<bool> best_outlines = *ok_outlines;
-  float best_cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, pr_it, blob, &all_str);
+  float best_cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, pr_it, blob, all_str);
  if (debug_noise_removal) {
    TBOX ol_box;
    for (int i = 0; i < test_outlines.size(); ++i) {
@ -1116,8 +1116,8 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
    for (int i = 0; i < outlines.size(); ++i) {
      if (test_outlines[i]) {
        test_outlines[i] = false;
-        STRING str;
-        float cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, pr_it, blob, &str);
+        std::string str;
+        float cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, pr_it, blob, str);
        if (debug_noise_removal) {
          TBOX ol_box;
          for (int j = 0; j < outlines.size(); ++j) {
@ -1162,7 +1162,7 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
 // the inclusion of the outlines, and returns the certainty of the raw choice.
 float Tesseract::ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
                                          const GenericVector<C_OUTLINE *> &outlines, int pass_n,
-                                          PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str) {
+                                          PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str) {
  C_OUTLINE_IT ol_it;
  C_OUTLINE *first_to_keep = nullptr;
  C_BLOB *local_blob = nullptr;
@ -1204,7 +1204,7 @@ float Tesseract::ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
 // Classifies the given blob (part of word_data->word->word) as an individual
 // word, using languages, chopper etc, returning only the certainty of the
 // best raw choice, and undoing all the work done to fake out the word.
-float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str,
+float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str,
                                    float *c2) {
  WERD *real_word = pr_it->word()->word;
  WERD *word = real_word->ConstructFromSingleBlob(real_word->flag(W_BOL), real_word->flag(W_EOL),
@ -1233,10 +1233,10 @@ float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob
    cert = wd.word->raw_choice->certainty();
    float rat = wd.word->raw_choice->rating();
    *c2 = rat > 0.0f ? cert * cert / rat : 0.0f;
-    *best_str = wd.word->raw_choice->unichar_string();
+    best_str = wd.word->raw_choice->unichar_string();
  } else {
    *c2 = 0.0f;
-    *best_str = "";
+    best_str.clear();
  }
  it.DeleteCurrentWord();
  pr_it->ResetWordIterator();
--- a/src/ccmain/linerec.cpp
+++ b/src/ccmain/linerec.cpp
@ -38,9 +38,9 @@ const float kWorstDictCertainty = -25.0f;
 // Breaks the page into lines, according to the boxes, and writes them to a
 // serialized DocumentData based on output_basename.
 // Return true if successful, false if an error occurred.
-bool Tesseract::TrainLineRecognizer(const char *input_imagename, const STRING &output_basename,
+bool Tesseract::TrainLineRecognizer(const char *input_imagename, const std::string &output_basename,
                                    BLOCK_LIST *block_list) {
-  STRING lstmf_name = output_basename + ".lstmf";
+  std::string lstmf_name = output_basename + ".lstmf";
  DocumentData images(lstmf_name);
  if (applybox_page > 0) {
    // Load existing document for the previous pages.
@ -50,7 +50,7 @@ bool Tesseract::TrainLineRecognizer(const char *input_imagename, const STRING &o
    }
  }
  std::vector<TBOX> boxes;
-  std::vector<STRING> texts;
+  std::vector<std::string> texts;
  // Get the boxes for this page, if there are any.
  if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr, nullptr) ||
      boxes.empty()) {
@ -73,7 +73,7 @@ bool Tesseract::TrainLineRecognizer(const char *input_imagename, const STRING &o
 // Generates training data for training a line recognizer, eg LSTM.
 // Breaks the boxes into lines, normalizes them, converts to ImageData and
 // appends them to the given training_data.
-void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<STRING> &texts,
+void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
                               BLOCK_LIST *block_list, DocumentData *training_data) {
  int box_count = boxes.size();
  // Process all the text lines in this page, as defined by the boxes.
@ -85,7 +85,7 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
  for (int start_box = end_box; start_box < box_count; start_box = end_box) {
    // Find the textline of boxes starting at start and their bounding box.
    TBOX line_box = boxes[start_box];
-    STRING line_str = texts[start_box];
+    std::string line_str = texts[start_box];
    for (end_box = start_box + 1; end_box < box_count && texts[end_box] != "\t"; ++end_box) {
      line_box += boxes[end_box];
      line_str += texts[end_box];
@ -127,7 +127,7 @@ void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector
 // and ground truth boxes/truth text if available in the input.
 // The image is not normalized in any way.
 ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX> &boxes,
-                                  const std::vector<STRING> &texts, int start_box, int end_box,
+                                  const std::vector<std::string> &texts, int start_box, int end_box,
                                  const BLOCK &block) {
  TBOX revised_box;
  ImageData *image_data = GetRectImage(line_box, block, kImagePadding, &revised_box);
@ -138,7 +138,7 @@ ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX>
  FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
  ICOORD shift = -revised_box.botleft();
  std::vector<TBOX> line_boxes;
-  std::vector<STRING> line_texts;
+  std::vector<std::string> line_texts;
  for (int b = start_box; b < end_box; ++b) {
    TBOX box = boxes[b];
    box.rotate(block_rotation);
--- a/src/ccmain/paramsd.cpp
+++ b/src/ccmain/paramsd.cpp
@ -142,14 +142,14 @@ const char *ParamContent::GetDescription() const {
 }

 // Getter for the value.
-STRING ParamContent::GetValue() const {
-  STRING result;
+std::string ParamContent::GetValue() const {
+  std::string result;
  if (param_type_ == VT_INTEGER) {
-    result.add_str_int("", *iIt);
+    result += std::to_string(*iIt);
  } else if (param_type_ == VT_BOOLEAN) {
-    result.add_str_int("", *bIt);
+    result += std::to_string(*bIt);
  } else if (param_type_ == VT_DOUBLE) {
-    result.add_str_double("", *dIt);
+    result += std::to_string(*dIt);
  } else if (param_type_ == VT_STRING) {
    if (STRING(*(sIt)).c_str() != nullptr) {
      result = sIt->c_str();
--- a/src/ccmain/paramsd.h
+++ b/src/ccmain/paramsd.h
@ -62,7 +62,7 @@ public:

  // Getters and Setters.
  void SetValue(const char *val);
-  STRING GetValue() const;
+  std::string GetValue() const;
  const char *GetName() const;
  const char *GetDescription() const;

--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@ -46,7 +46,6 @@
 #include <tesseract/publictypes.h> // for OcrEngineMode, PageSegMode, OEM_L...
 #include <tesseract/unichar.h>     // for UNICHAR_ID
 #include "genericvector.h"         // for GenericVector, PointerVector
-#include "strngs.h"                // for STRING

 #include <allheaders.h> // for pixDestroy, pixGetWidth, pixGetHe...

@ -138,7 +137,7 @@ struct TesseractStats {
  int16_t doc_good_char_quality;
  int32_t word_count;    // count of word in the document
  int32_t dict_words;    // number of dicitionary words in the document
-  STRING dump_words_str; // accumulator used by dump_words()
+  std::string dump_words_str; // accumulator used by dump_words()
  // Flags used by write_results()
  bool tilde_crunch_written;
  bool last_char_was_newline;
@ -336,19 +335,19 @@ public:
  // Breaks the page into lines, according to the boxes, and writes them to a
  // serialized DocumentData based on output_basename.
  // Return true if successful, false if an error occurred.
-  bool TrainLineRecognizer(const char *input_imagename, const STRING &output_basename,
+  bool TrainLineRecognizer(const char *input_imagename, const std::string &output_basename,
                           BLOCK_LIST *block_list);
  // Generates training data for training a line recognizer, eg LSTM.
  // Breaks the boxes into lines, normalizes them, converts to ImageData and
  // appends them to the given training_data.
-  void TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<STRING> &texts,
+  void TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
                      BLOCK_LIST *block_list, DocumentData *training_data);

  // Returns an Imagedata containing the image of the given textline,
  // and ground truth boxes/truth text if available in the input.
  // The image is not normalized in any way.
  ImageData *GetLineData(const TBOX &line_box, const std::vector<TBOX> &boxes,
-                         const std::vector<STRING> &texts, int start_box, int end_box,
+                         const std::vector<std::string> &texts, int start_box, int end_box,
                         const BLOCK &block);
  // Helper gets the image of a rectangle, using the block.re_rotation() if
  // needed to get to the image, and rotating the result back to horizontal
@ -420,11 +419,11 @@ public:
  // the inclusion of the outlines, and returns the certainty of the raw choice.
  float ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
                                 const GenericVector<C_OUTLINE *> &outlines, int pass_n,
-                                 PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str);
+                                 PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str);
  // Classifies the given blob (part of word_data->word->word) as an individual
  // word, using languages, chopper etc, returning only the certainty of the
  // best raw choice, and undoing all the work done to fake out the word.
-  float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, STRING *best_str,
+  float ClassifyBlobAsWord(int pass_n, PAGE_RES_IT *pr_it, C_BLOB *blob, std::string &best_str,
                           float *c2);
  void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data);
  void classify_word_pass1(const WordData &word_data, WERD_RES **in_word,
@ -524,7 +523,7 @@ public:
  int init_tesseract_lm(const std::string &arg0, const std::string &textbase,
                        const std::string &language, TessdataManager *mgr);

-  void recognize_page(STRING &image_name);
+  void recognize_page(std::string &image_name);
  void end_tesseract();

  bool init_tesseract_lang_data(const std::string &arg0, const std::string &textbase,
@ -733,7 +732,7 @@ public:
  void CorrectClassifyWords(PAGE_RES *page_res);
  // Call LearnWord to extract features for labelled blobs within each word.
  // Features are stored in an internal buffer.
-  void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res);
+  void ApplyBoxTraining(const std::string &fontname, PAGE_RES *page_res);

  //// fixxht.cpp ///////////////////////////////////////////////////////
  // Returns the number of misfit blob tops in this word.
@ -1029,7 +1028,7 @@ private:
  // will be loaded, and set to null when debug is complete.
  const char *backup_config_file_;
  // The filename of a config file to read when processing a debug word.
-  STRING word_config_;
+  std::string word_config_;
  // Image used for input to layout analysis and tesseract recognition.
  // May be modified by the ShiroRekhaSplitter to eliminate the top-line.
  Pix *pix_binary_;
--- a/src/ccstruct/blamer.cpp
+++ b/src/ccstruct/blamer.cpp
@ -168,19 +168,19 @@ void BlamerBundle::SetupNormTruthWord(const DENORM &denorm) {
 // and the left edge of the right-hand word is word2_left.
 void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug, BlamerBundle *bundle1,
                               BlamerBundle *bundle2) const {
-  STRING debug_str;
+  std::string debug_str;
  // Find truth boxes that correspond to the split in the blobs.
  int b;
  int begin2_truth_index = -1;
  if (incorrect_result_reason_ != IRR_NO_TRUTH && truth_has_char_boxes_) {
    debug_str = "Looking for truth split at";
-    debug_str.add_str_int(" end1_x ", word1_right);
-    debug_str.add_str_int(" begin2_x ", word2_left);
+    debug_str += " end1_x " + std::to_string(word1_right);
+    debug_str += " begin2_x " + std::to_string(word2_left);
    debug_str += "\nnorm_truth_word boxes:\n";
    if (norm_truth_word_.length() > 1) {
-      norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
+      norm_truth_word_.BlobBox(0).print_to_str(debug_str);
      for (b = 1; b < norm_truth_word_.length(); ++b) {
-        norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
+        norm_truth_word_.BlobBox(b).print_to_str(debug_str);
        if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) < norm_box_tolerance_) &&
            (abs(word2_left - norm_truth_word_.BlobBox(b).left()) < norm_box_tolerance_)) {
          begin2_truth_index = b;
@ -325,24 +325,24 @@ void BlamerBundle::SetChopperBlame(const WERD_RES *word, bool debug) {
  if (missing_chop || box_index < norm_truth_word_.length()) {
    STRING debug_str;
    if (missing_chop) {
-      debug_str.add_str_int("Detected missing chop (tolerance=", norm_box_tolerance_);
+      debug_str += "Detected missing chop (tolerance=" + std::to_string(norm_box_tolerance_);
      debug_str += ") at Bounding Box=";
      TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
-      curr_blob->bounding_box().print_to_str(&debug_str);
-      debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
+      curr_blob->bounding_box().print_to_str(debug_str);
+      debug_str += "\nNo chop for truth at x=" + std::to_string(truth_x);
    } else {
-      debug_str.add_str_int("Missing chops for last ", norm_truth_word_.length() - box_index);
+      debug_str += "Missing chops for last " + std::to_string(norm_truth_word_.length() - box_index);
      debug_str += " truth box(es)";
    }
    debug_str += "\nMaximally chopped word boxes:\n";
    for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
      TBLOB *curr_blob = word->chopped_word->blobs[blob_index];
-      curr_blob->bounding_box().print_to_str(&debug_str);
+      curr_blob->bounding_box().print_to_str(debug_str);
      debug_str += '\n';
    }
    debug_str += "Truth  bounding  boxes:\n";
    for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
-      norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
+      norm_truth_word_.BlobBox(box_index).print_to_str(debug_str);
      debug_str += '\n';
    }
    SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
@ -413,8 +413,8 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
    if (blob_index + 1 < num_blobs)
      next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
    int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
-    debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
-    debug_str.add_str_int(" ", truth_x);
+    debug_str += "Box x coord vs. truth: " + std::to_string(curr_box_x);
+    debug_str += " " + std::to_string(truth_x);
    debug_str += "\n";
    if (curr_box_x > (truth_x + norm_box_tolerance_)) {
      break;                                                  // failed to find a matching box
@ -424,23 +424,23 @@ void BlamerBundle::SetupCorrectSegmentation(const TWERD *word, bool debug) {
      correct_segmentation_cols_.push_back(curr_box_col);
      correct_segmentation_rows_.push_back(next_box_col - 1);
      ++truth_idx;
-      debug_str.add_str_int("col=", curr_box_col);
-      debug_str.add_str_int(" row=", next_box_col - 1);
+      debug_str += "col=" + std::to_string(curr_box_col);
+      debug_str += " row=" + std::to_string(next_box_col - 1);
      debug_str += "\n";
      curr_box_col = next_box_col;
    }
  }
  if (blob_index < num_blobs || // trailing blobs
      correct_segmentation_cols_.size() != norm_truth_word_.length()) {
-    debug_str.add_str_int(
+    debug_str += 
        "Blamer failed to find correct segmentation"
-        " (tolerance=",
-        norm_box_tolerance_);
+        " (tolerance=" +
+        std::to_string(norm_box_tolerance_);
    if (blob_index >= num_blobs)
      debug_str += " blob == nullptr";
    debug_str += ")\n";
-    debug_str.add_str_int(" path length ", correct_segmentation_cols_.size());
-    debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
+    debug_str += " path length " + std::to_string(correct_segmentation_cols_.size());
+    debug_str += " vs. truth " + std::to_string(norm_truth_word_.length());
    debug_str += "\n";
    SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
    correct_segmentation_cols_.clear();
@ -457,7 +457,7 @@ bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
 #if !defined(DISABLED_LEGACY_ENGINE)
 // Setup ready to guide the segmentation search to the correct segmentation.
 void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings,
-                                    UNICHAR_ID wildcard_id, bool debug, STRING *debug_str,
+                                    UNICHAR_ID wildcard_id, bool debug, std::string &debug_str,
                                    tesseract::LMPainPoints *pain_points, double max_char_wh_ratio,
                                    WERD_RES *word_res) {
  segsearch_is_looking_for_blame_ = true;
@ -466,19 +466,19 @@ void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *rati
  }
  // Fill pain points for any unclassifed blob corresponding to the
  // correct segmentation state.
-  *debug_str += "Correct segmentation:\n";
+  debug_str += "Correct segmentation:\n";
  for (int idx = 0; idx < correct_segmentation_cols_.size(); ++idx) {
-    debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
-    debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
-    *debug_str += "\n";
+    debug_str += "col=" + std::to_string(correct_segmentation_cols_[idx]);
+    debug_str += " row=" + std::to_string(correct_segmentation_rows_[idx]);
+    debug_str += "\n";
    if (!ratings->Classified(correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
                             wildcard_id) &&
        !pain_points->GeneratePainPoint(
            correct_segmentation_cols_[idx], correct_segmentation_rows_[idx],
            tesseract::LM_PPTYPE_BLAMER, 0.0, false, max_char_wh_ratio, word_res)) {
      segsearch_is_looking_for_blame_ = false;
-      *debug_str += "\nFailed to insert pain point\n";
-      SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
+      debug_str += "\nFailed to insert pain point\n";
+      SetBlame(IRR_SEGSEARCH_HEUR, debug_str, best_choice, debug);
      break;
    }
  } // end for blamer_bundle->correct_segmentation_cols/rows
@ -491,7 +491,7 @@ bool BlamerBundle::GuidedSegsearchStillGoing() const {
 }

 // The segmentation search has ended. Sets the blame appropriately.
-void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str) {
+void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str) {
  // If we are still looking for blame (i.e. best_choice is incorrect, but a
  // path representing the correct segmentation could be constructed), we can
  // blame segmentation search pain point prioritization if the rating of the
@ -506,22 +506,22 @@ void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, S
  if (segsearch_is_looking_for_blame_) {
    segsearch_is_looking_for_blame_ = false;
    if (best_choice_is_dict_and_top_choice_) {
-      *debug_str = "Best choice is: incorrect, top choice, dictionary word";
-      *debug_str += " with permuter ";
-      *debug_str += best_choice->permuter_name();
-      SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
+      debug_str = "Best choice is: incorrect, top choice, dictionary word";
+      debug_str += " with permuter ";
+      debug_str += best_choice->permuter_name();
+      SetBlame(IRR_CLASSIFIER, debug_str, best_choice, debug);
    } else if (best_correctly_segmented_rating_ < best_choice->rating()) {
-      *debug_str += "Correct segmentation state was not explored";
-      SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
+      debug_str += "Correct segmentation state was not explored";
+      SetBlame(IRR_SEGSEARCH_PP, debug_str, best_choice, debug);
    } else {
      if (best_correctly_segmented_rating_ >= WERD_CHOICE::kBadRating) {
-        *debug_str += "Correct segmentation paths were pruned by LM\n";
+        debug_str += "Correct segmentation paths were pruned by LM\n";
      } else {
-        debug_str->add_str_double("Best correct segmentation rating ",
-                                  best_correctly_segmented_rating_);
-        debug_str->add_str_double(" vs. best choice rating ", best_choice->rating());
+        debug_str += "Best correct segmentation rating " +
+                                  std::to_string(best_correctly_segmented_rating_);
+        debug_str += " vs. best choice rating " + std::to_string(best_choice->rating());
      }
-      SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
+      SetBlame(IRR_CLASS_LM_TRADEOFF, debug_str, best_choice, debug);
    }
  }
 }
--- a/src/ccstruct/blamer.h
+++ b/src/ccstruct/blamer.h
@ -273,12 +273,12 @@ struct BlamerBundle {
  bool GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const;
  // Setup ready to guide the segmentation search to the correct segmentation.
  void InitForSegSearch(const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id,
-                        bool debug, STRING *debug_str, tesseract::LMPainPoints *pain_points,
+                        bool debug, std::string &debug_str, tesseract::LMPainPoints *pain_points,
                        double max_char_wh_ratio, WERD_RES *word_res);
  // Returns true if the guided segsearch is in progress.
  bool GuidedSegsearchStillGoing() const;
  // The segmentation search has ended. Sets the blame appropriately.
-  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, STRING *debug_str);
+  void FinishSegSearch(const WERD_CHOICE *best_choice, bool debug, std::string &debug_str);

  // If the bundle is null or still does not indicate the correct result,
  // fix it and use some backup reason for the blame.
--- a/src/ccstruct/boxread.cpp
+++ b/src/ccstruct/boxread.cpp
@ -74,7 +74,7 @@ FILE *OpenBoxFile(const char *fname) {
 // Each of the output vectors is optional (may be nullptr).
 // Returns false if no boxes are found.
 bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector<TBOX> *boxes,
-                  std::vector<STRING> *texts, std::vector<STRING> *box_texts,
+                  std::vector<std::string> *texts, std::vector<std::string> *box_texts,
                  std::vector<int> *pages) {
  std::ifstream input(BoxFileName(filename).c_str(), std::ios::in | std::ios::binary);
  std::vector<char> box_data(std::istreambuf_iterator<char>(input), {});
@ -88,8 +88,8 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::

 // Reads all boxes from the string. Otherwise, as ReadAllBoxes.
 bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure,
-                  std::vector<TBOX> *boxes, std::vector<STRING> *texts,
-                  std::vector<STRING> *box_texts, std::vector<int> *pages) {
+                  std::vector<TBOX> *boxes, std::vector<std::string> *texts,
+                  std::vector<std::string> *box_texts, std::vector<int> *pages) {
  STRING box_str(box_data);
  std::vector<STRING> lines;
  box_str.split('\n', &lines);
@ -115,8 +115,8 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
    if (texts != nullptr)
      texts->push_back(utf8_str);
    if (box_texts != nullptr) {
-      STRING full_text;
-      MakeBoxFileStr(utf8_str.c_str(), box, target_page, &full_text);
+      std::string full_text;
+      MakeBoxFileStr(utf8_str.c_str(), box, target_page, full_text);
      box_texts->push_back(full_text);
    }
    if (pages != nullptr)
@ -255,13 +255,13 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str
 }

 // Creates a box file string from a unichar string, TBOX and page number.
-void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str) {
-  *box_str = unichar_str;
-  box_str->add_str_int(" ", box.left());
-  box_str->add_str_int(" ", box.bottom());
-  box_str->add_str_int(" ", box.right());
-  box_str->add_str_int(" ", box.top());
-  box_str->add_str_int(" ", page_num);
+void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str) {
+  box_str = unichar_str;
+  box_str += " " + std::to_string(box.left());
+  box_str += " " + std::to_string(box.bottom());
+  box_str += " " + std::to_string(box.right());
+  box_str += " " + std::to_string(box.top());
+  box_str += " " + std::to_string(page_num);
 }

 } // namespace tesseract
--- a/src/ccstruct/boxread.h
+++ b/src/ccstruct/boxread.h
@ -43,7 +43,7 @@ FILE *OpenBoxFile(const char *filename);
 // Each of the output vectors is optional (may be nullptr).
 // Returns false if no boxes are found.
 bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::vector<TBOX> *boxes,
-                  std::vector<STRING> *texts, std::vector<STRING> *box_texts,
+                  std::vector<std::string> *texts, std::vector<std::string> *box_texts,
                  std::vector<int> *pages);

 // Reads all boxes from the string. Otherwise, as ReadAllBoxes.
@ -52,8 +52,8 @@ bool ReadAllBoxes(int target_page, bool skip_blanks, const char *filename, std::
 // It otherwise gives up and returns false on encountering an invalid box.
 TESS_API
 bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool continue_on_failure,
-                  std::vector<TBOX> *boxes, std::vector<STRING> *texts,
-                  std::vector<STRING> *box_texts, std::vector<int> *pages);
+                  std::vector<TBOX> *boxes, std::vector<std::string> *texts,
+                  std::vector<std::string> *box_texts, std::vector<int> *pages);

 // ReadNextBox factors out the code to interpret a line of a box
 // file so that applybox and unicharset_extractor interpret the same way.
@ -80,7 +80,7 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str

 // Creates a box file string from a unichar string, TBOX and page number.
 TESS_API
-void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, STRING *box_str);
+void MakeBoxFileStr(const char *unichar_str, const TBOX &box, int page_num, std::string &box_str);

 } // namespace tesseract

--- a/src/ccstruct/imagedata.cpp
+++ b/src/ccstruct/imagedata.cpp
@ -166,15 +166,15 @@ ImageData *ImageData::Build(const char *name, int page_number, const char *lang,

 // Writes to the given file. Returns false in case of error.
 bool ImageData::Serialize(TFile *fp) const {
-  if (!imagefilename_.Serialize(fp))
+  if (!fp->Serialize(imagefilename_))
    return false;
  if (!fp->Serialize(&page_number_))
    return false;
  if (!fp->Serialize(image_data_))
    return false;
-  if (!language_.Serialize(fp))
+  if (!fp->Serialize(language_))
    return false;
-  if (!transcription_.Serialize(fp))
+  if (!fp->Serialize(transcription_))
    return false;
  if (!fp->Serialize(boxes_))
    return false;
@ -186,15 +186,15 @@ bool ImageData::Serialize(TFile *fp) const {

 // Reads from the given file. Returns false in case of error.
 bool ImageData::DeSerialize(TFile *fp) {
-  if (!imagefilename_.DeSerialize(fp))
+  if (!fp->DeSerialize(imagefilename_))
    return false;
  if (!fp->DeSerialize(&page_number_))
    return false;
  if (!fp->DeSerialize(image_data_))
    return false;
-  if (!language_.DeSerialize(fp))
+  if (!fp->DeSerialize(language_))
    return false;
-  if (!transcription_.DeSerialize(fp))
+  if (!fp->DeSerialize(transcription_))
    return false;
  if (!fp->DeSerialize(boxes_))
    return false;
@ -355,7 +355,7 @@ void ImageData::Display() const {

 // Adds the supplied boxes and transcriptions that correspond to the correct
 // page number.
-void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<STRING> &texts,
+void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
                         const std::vector<int> &box_pages) {
  // Copy the boxes and make the transcription.
  for (int i = 0; i < box_pages.size(); ++i) {
@ -403,7 +403,7 @@ Pix *ImageData::GetPixInternal(const std::vector<char> &image_data) {
 bool ImageData::AddBoxes(const char *box_text) {
  if (box_text != nullptr && box_text[0] != '\0') {
    std::vector<TBOX> boxes;
-    std::vector<STRING> texts;
+    std::vector<std::string> texts;
    std::vector<int> box_pages;
    if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
                     /*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) {
@ -416,7 +416,7 @@ bool ImageData::AddBoxes(const char *box_text) {
  return false;
 }

-DocumentData::DocumentData(const STRING &name)
+DocumentData::DocumentData(const std::string &name)
    : document_name_(name)
    , pages_offset_(-1)
    , total_pages_(-1)
@ -613,7 +613,7 @@ DocumentCache::~DocumentCache() {

 // Adds all the documents in the list of filenames, counting memory.
 // The reader is used to read the files.
-bool DocumentCache::LoadDocuments(const std::vector<STRING> &filenames,
+bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
                                  CachingStrategy cache_strategy, FileReader reader) {
  cache_strategy_ = cache_strategy;
  int64_t fair_share_memory = 0;
@ -644,7 +644,7 @@ bool DocumentCache::AddToCache(DocumentData *data) {
 }

 // Finds and returns a document by name.
-DocumentData *DocumentCache::FindDocument(const STRING &document_name) const {
+DocumentData *DocumentCache::FindDocument(const std::string &document_name) const {
  for (auto *document : documents_) {
    if (document->document_name() == document_name) {
      return document;
--- a/src/ccstruct/imagedata.h
+++ b/src/ccstruct/imagedata.h
@ -22,7 +22,6 @@
 #include "points.h" // for FCOORD

 #include "genericvector.h" // for GenericVector, PointerVector, FileReader
-#include "strngs.h"        // for STRING

 #include <mutex>  // for std::mutex
 #include <thread> // for std::thread
@ -129,10 +128,10 @@ public:
  static bool SkipDeSerialize(TFile *fp);

  // Other accessors.
-  const STRING &imagefilename() const {
+  const std::string &imagefilename() const {
    return imagefilename_;
  }
-  void set_imagefilename(const STRING &name) {
+  void set_imagefilename(const std::string &name) {
    imagefilename_ = name;
  }
  int page_number() const {
@ -144,22 +143,22 @@ public:
  const std::vector<char> &image_data() const {
    return image_data_;
  }
-  const STRING &language() const {
+  const std::string &language() const {
    return language_;
  }
-  void set_language(const STRING &lang) {
+  void set_language(const std::string &lang) {
    language_ = lang;
  }
-  const STRING &transcription() const {
+  const std::string &transcription() const {
    return transcription_;
  }
  const std::vector<TBOX> &boxes() const {
    return boxes_;
  }
-  const std::vector<STRING> &box_texts() const {
+  const std::vector<std::string> &box_texts() const {
    return box_texts_;
  }
-  const STRING &box_text(int index) const {
+  const std::string &box_text(int index) const {
    return box_texts_[index];
  }
  // Saves the given Pix as a PNG-encoded string and destroys it.
@ -184,7 +183,7 @@ public:

  // Adds the supplied boxes and transcriptions that correspond to the correct
  // page number.
-  void AddBoxes(const std::vector<TBOX> &boxes, const std::vector<STRING> &texts,
+  void AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
                const std::vector<int> &box_pages);

 private:
@ -199,16 +198,16 @@ private:
  bool AddBoxes(const char *box_text);

 private:
-  STRING imagefilename_; // File to read image from.
+  std::string imagefilename_; // File to read image from.
  int32_t page_number_;  // Page number if multi-page tif or -1.
 #ifdef TESSERACT_IMAGEDATA_AS_PIX
  Pix *internal_pix_;
 #endif
  std::vector<char> image_data_;  // PNG/PNM file data.
-  STRING language_;               // Language code for image.
-  STRING transcription_;          // UTF-8 ground truth of image.
+  std::string language_;          // Language code for image.
+  std::string transcription_;     // UTF-8 ground truth of image.
  std::vector<TBOX> boxes_;       // If non-empty boxes of the image.
-  std::vector<STRING> box_texts_; // String for text in each box.
+  std::vector<std::string> box_texts_; // String for text in each box.
  bool vertical_text_;            // Image has been rotated from vertical.
 };

@ -216,7 +215,7 @@ private:
 class DocumentData {
 public:
  TESS_API
-  explicit DocumentData(const STRING &name);
+  explicit DocumentData(const std::string &name);
  TESS_API
  ~DocumentData();

@ -234,7 +233,7 @@ public:
  TESS_API
  void AddPageToDocument(ImageData *page);

-  const STRING &document_name() const {
+  const std::string &document_name() const {
    std::lock_guard<std::mutex> lock(general_mutex_);
    return document_name_;
  }
@ -303,7 +302,7 @@ private:

 private:
  // A name for this document.
-  STRING document_name_;
+  std::string document_name_;
  // A group of pages that corresponds in some loose way to a document.
  PointerVector<ImageData> pages_;
  // Page number of the first index in pages_.
@ -350,14 +349,14 @@ public:
  // Adds all the documents in the list of filenames, counting memory.
  // The reader is used to read the files.
  TESS_API
-  bool LoadDocuments(const std::vector<STRING> &filenames, CachingStrategy cache_strategy,
+  bool LoadDocuments(const std::vector<std::string> &filenames, CachingStrategy cache_strategy,
                     FileReader reader);

  // Adds document to the cache.
  bool AddToCache(DocumentData *data);

  // Finds and returns a document by name.
-  DocumentData *FindDocument(const STRING &document_name) const;
+  DocumentData *FindDocument(const std::string &document_name) const;

  // Returns a page by serial number using the current cache_strategy_ to
  // determine the mapping from serial number to page.
--- a/src/ccstruct/pageres.cpp
+++ b/src/ccstruct/pageres.cpp
@ -479,8 +479,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
    int index = 0;
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
      WERD_CHOICE *choice = it.data();
-      STRING label;
-      label.add_str_int("\nCooked Choice #", index);
+      std::string label;
+      label += "\nCooked Choice #" + std::to_string(index);
      choice->print(label.c_str());
    }
  }
--- a/src/ccstruct/rect.cpp
+++ b/src/ccstruct/rect.cpp
@ -167,13 +167,13 @@ void TBOX::plot(                    // paint box
 #endif

 // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING.
-void TBOX::print_to_str(STRING *str) const {
+void TBOX::print_to_str(std::string &str) const {
  // "(%d,%d)->(%d,%d)", left(), bottom(), right(), top()
-  str->add_str_int("(", left());
-  str->add_str_int(",", bottom());
-  str->add_str_int(")->(", right());
-  str->add_str_int(",", top());
-  *str += ')';
+  str += "(" + std::to_string(left());
+  str += "," + std::to_string(bottom());
+  str += ")->(" + std::to_string(right());
+  str += "," + std::to_string(top());
+  str += ')';
 }

 // Writes to the given file. Returns false in case of error.
--- a/src/ccstruct/rect.h
+++ b/src/ccstruct/rect.h
@ -2,7 +2,6 @@
 * File:        rect.h  (Formerly box.h)
 * Description: Bounding box class definition.
 * Author:      Phil Cheatle
- * Created:     Wed Oct 16 15:18:45 BST 1991
 *
 * (C) Copyright 1991, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -30,11 +29,10 @@
 #include <cmath>     // for std::ceil, std::floor
 #include <cstdint>   // for INT16_MAX
 #include <cstdio>    // for FILE
+#include <string>    // for std::string

 namespace tesseract {

-class STRING;
-
 class TESS_API TBOX { // bounding box
 public:
  TBOX()
@ -287,8 +285,8 @@ public:
  void print() const { // print
    tprintf("Bounding box=(%d,%d)->(%d,%d)\n", left(), bottom(), right(), top());
  }
-  // Appends the bounding box as (%d,%d)->(%d,%d) to a STRING.
-  void print_to_str(STRING *str) const;
+  // Appends the bounding box as (%d,%d)->(%d,%d) to a string.
+  void print_to_str(std::string &str) const;

 #ifndef GRAPHICS_DISABLED
  void plot(                  // use current settings
--- a/src/ccutil/serialis.h
+++ b/src/ccutil/serialis.h
@ -88,6 +88,7 @@ public:
  bool DeSerializeSize(int32_t *data);
  bool DeSerialize(std::string &data);
  bool DeSerialize(std::vector<char> &data);
+  //bool DeSerialize(std::vector<std::string> &data);
  template <typename T>
  bool DeSerialize(T *data, size_t count = 1) {
    return FReadEndian(data, sizeof(T), count) == static_cast<int>(count);
@ -102,8 +103,17 @@ public:
    } else if (size > 50000000) {
      // Arbitrarily limit the number of elements to protect against bad data.
      return false;
+    } else if constexpr (std::is_same_v<T, std::string>) {
+      // Deserialize a string.
+      // TODO: optimize.
+      data.resize(size);
+      for (auto &item : data) {
+        if (!DeSerialize(item)) {
+          return false;
+        }
+      }
    } else if constexpr (std::is_class_v<T>) {
-      // Deserialize a class.
+      // Deserialize a tesseract class.
      // TODO: optimize.
      data.resize(size);
      for (auto &item : data) {
@ -133,8 +143,15 @@ public:
    uint32_t size = data.size();
    if (!Serialize(&size)) {
      return false;
+    } else if constexpr (std::is_same_v<T, std::string>) {
+      // Serialize strings.
+      for (auto string : data) {
+        if (!Serialize(string)) {
+          return false;
+        }
+      }
    } else if constexpr (std::is_class_v<T>) {
-      // Serialize a class.
+      // Serialize a tesseract class.
      for (auto &item : data) {
        if (!item.Serialize(this)) {
          return false;
--- a/src/ccutil/strngs.cpp
+++ b/src/ccutil/strngs.cpp
@ -33,6 +33,23 @@ namespace tesseract {
 // possible length of an int (in 64 bits), being -<20 digits>.
 const int kMaxIntSize = 22;

+const std::vector<std::string> split(const std::string &s, char c) {
+  std::string buff;
+  std::vector<std::string> v;
+  for (auto n : s) {
+    if (n != c)
+      buff += n;
+    else if (n == c && !buff.empty()) {
+       v.push_back(buff);
+       buff.clear();
+    }
+  }
+  if (!buff.empty()) {
+    v.push_back(buff);
+  }
+  return v;
+}
+
 // TODO(rays) Change all callers to use TFile and remove the old functions.
 // Writes to the given file. Returns false in case of error.
 bool STRING::Serialize(FILE *fp) const {
@ -102,27 +119,4 @@ void STRING::split(const char c, std::vector<STRING> *splited) {
  }
 }

-void STRING::add_str_int(const char *str, int number) {
-  if (str != nullptr)
-    *this += str;
-  // Allow space for the maximum possible length of int64_t.
-  char num_buffer[kMaxIntSize];
-  snprintf(num_buffer, kMaxIntSize - 1, "%d", number);
-  num_buffer[kMaxIntSize - 1] = '\0';
-  *this += num_buffer;
-}
-
-// Appends the given string and double (as a %.8g) to this.
-void STRING::add_str_double(const char *str, double number) {
-  if (str != nullptr)
-    *this += str;
-  std::stringstream stream;
-  // Use "C" locale (needed for double value).
-  stream.imbue(std::locale::classic());
-  // Use 8 digits for double value.
-  stream.precision(8);
-  stream << number;
-  *this += stream.str().c_str();
-}
-
 } // namespace tesseract
--- a/src/ccutil/strngs.h
+++ b/src/ccutil/strngs.h
@ -32,6 +32,9 @@ namespace tesseract {

 class TFile;

+TESS_API
+const std::vector<std::string> split(const std::string &s, char c);
+
 class STRING : public std::string {
 public:
  using std::string::string;
@ -61,16 +64,6 @@ public:

  TESS_API
  void split(char c, std::vector<STRING> *splited);
-
-  // Appends the given string and int (as a %d) to this.
-  // += cannot be used for ints as there as a char += operator that would
-  // be ambiguous, and ints usually need a string before or between them
-  // anyway.
-  TESS_API
-  void add_str_int(const char *str, int number);
-  // Appends the given string and double (as a %.8g) to this.
-  TESS_API
-  void add_str_double(const char *str, double number);
 };

 } // namespace tesseract.
--- a/src/ccutil/unicharcompress.cpp
+++ b/src/ccutil/unicharcompress.cpp
@ -325,17 +325,17 @@ bool UnicharCompress::DeSerialize(TFile *fp) {
 // will encode a single index to a UTF8-string, but Chinese, Japanese, Korean
 // and the Indic scripts will contain a many-to-many mapping.
 // See the class comment above for details.
-STRING UnicharCompress::GetEncodingAsString(const UNICHARSET &unicharset) const {
-  STRING encoding;
+std::string UnicharCompress::GetEncodingAsString(const UNICHARSET &unicharset) const {
+  std::string encoding;
  for (int c = 0; c < encoder_.size(); ++c) {
    const RecodedCharID &code = encoder_[c];
    if (0 < c && c < SPECIAL_UNICHAR_CODES_COUNT && code == encoder_[c - 1]) {
      // Don't show the duplicate entry.
      continue;
    }
-    encoding.add_str_int("", code(0));
+    encoding += std::to_string(code(0));
    for (int i = 1; i < code.length(); ++i) {
-      encoding.add_str_int(",", code(i));
+      encoding += "," + std::to_string(code(i));
    }
    encoding += "\t";
    if (c >= unicharset.size() ||
--- a/src/ccutil/unicharcompress.h
+++ b/src/ccutil/unicharcompress.h
@ -196,14 +196,14 @@ public:

  bool DeSerialize(TFile *fp);

-  // Returns a STRING containing a text file that describes the encoding thus:
+  // Returns a string containing a text file that describes the encoding thus:
  // <index>[,<index>]*<tab><UTF8-str><newline>
  // In words, a comma-separated list of one or more indices, followed by a tab
  // and the UTF-8 string that the code represents per line. Most simple scripts
  // will encode a single index to a UTF8-string, but Chinese, Japanese, Korean
  // and the Indic scripts will contain a many-to-many mapping.
  // See the class comment above for details.
-  STRING GetEncodingAsString(const UNICHARSET &unicharset) const;
+  std::string GetEncodingAsString(const UNICHARSET &unicharset) const;

  // Helper decomposes a Hangul unicode to 3 parts, leading, vowel, trailing.
  // Note that the returned values are 0-based indices, NOT unicode Jamo.
--- a/src/classify/blobclass.cpp
+++ b/src/classify/blobclass.cpp
@ -80,7 +80,7 @@ void Classify::LearnBlob(const STRING &fontname, TBLOB *blob, const DENORM &cn_d
    tr_file_data_ += "\n";

    // write micro-features to file and clean up
-    WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
+    WriteCharDescription(feature_defs_, CharDesc, tr_file_data_);
  } else {
    tprintf("Blob learned was invalid!\n");
  }
--- a/src/classify/blobclass.h
+++ b/src/classify/blobclass.h
@ -18,14 +18,8 @@
 #ifndef BLOBCLASS_H
 #define BLOBCLASS_H

-/**----------------------------------------------------------------------------
-          Include Files and Type Defines
----------------------------------------------------------------------------**/
-#include "strngs.h"
+#include <string>

-/**----------------------------------------------------------------------------
-          Public Function Prototypes
----------------------------------------------------------------------------**/
 namespace tesseract {
 // Finds the name of the training font and returns it in fontname, by cutting
 // it out based on the expectation that the filename is of the form:
--- a/src/classify/featdefs.cpp
+++ b/src/classify/featdefs.cpp
@ -145,19 +145,19 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs) {
 * @param str            string to append CharDesc to
 * @param CharDesc       character description to write to File
 */
-void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str) {
+void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, std::string &str) {
  int NumSetsToWrite = 0;

  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++)
    if (CharDesc->FeatureSets[Type])
      NumSetsToWrite++;

-  str->add_str_int(" ", NumSetsToWrite);
-  *str += "\n";
+  str += " " + std::to_string(NumSetsToWrite);
+  str += "\n";
  for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
    if (CharDesc->FeatureSets[Type]) {
-      *str += FeatureDefs.FeatureDesc[Type]->ShortName;
-      *str += " ";
+      str += FeatureDefs.FeatureDesc[Type]->ShortName;
+      str += " ";
      WriteFeatureSet(CharDesc->FeatureSets[Type], str);
    }
  }
--- a/src/classify/featdefs.h
+++ b/src/classify/featdefs.h
@ -20,6 +20,8 @@

 #include "ocrfeatures.h"

+#include <string>
+
 namespace tesseract {

 /* Enumerate the different types of features currently defined. */
@ -60,7 +62,7 @@ CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs);

 bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc);

-void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str);
+void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, std::string &str);

 TESS_API
 CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File);
--- a/src/classify/ocrfeatures.cpp
+++ b/src/classify/ocrfeatures.cpp
@ -18,7 +18,6 @@
 #include "ocrfeatures.h"

 #include "scanutils.h"
-#include "strngs.h" // for STRING

 #include <cassert>
 #include <cmath>
@ -161,14 +160,14 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc) {
 * @param Feature feature to write out to str
 * @param str string to write Feature to
 */
-static void WriteFeature(FEATURE Feature, STRING *str) {
+static void WriteFeature(FEATURE Feature, std::string &str) {
  for (int i = 0; i < Feature->Type->NumParams; i++) {
 #ifndef WIN32
    assert(!std::isnan(Feature->Params[i]));
 #endif
-    str->add_str_double(" ", Feature->Params[i]);
+    str += " " + std::to_string(Feature->Params[i]);
  }
-  *str += "\n";
+  str += "\n";
 } /* WriteFeature */

 /**
@ -179,10 +178,10 @@ static void WriteFeature(FEATURE Feature, STRING *str) {
 * @param FeatureSet feature set to write to File
 * @param str string to write Feature to
 */
-void WriteFeatureSet(FEATURE_SET FeatureSet, STRING *str) {
+void WriteFeatureSet(FEATURE_SET FeatureSet, std::string &str) {
  if (FeatureSet) {
-    str->add_str_int("", FeatureSet->NumFeatures);
-    *str += "\n";
+    str += "" + std::to_string(FeatureSet->NumFeatures);
+    str += "\n";
    for (int i = 0; i < FeatureSet->NumFeatures; i++) {
      WriteFeature(FeatureSet->Features[i], str);
    }
--- a/src/classify/ocrfeatures.h
+++ b/src/classify/ocrfeatures.h
@ -21,6 +21,7 @@
 #include "blobs.h"

 #include <cstdio>
+#include <string> // for std::string

 namespace tesseract {

@ -117,7 +118,7 @@ FEATURE_SET NewFeatureSet(int NumFeatures);

 FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT *FeatureDesc);

-void WriteFeatureSet(FEATURE_SET FeatureSet, STRING *str);
+void WriteFeatureSet(FEATURE_SET FeatureSet, std::string &str);

 } // namespace tesseract

--- a/src/classify/shapetable.cpp
+++ b/src/classify/shapetable.cpp
@ -5,7 +5,6 @@
 // Description: Class to map a classifier shape index to unicharset
 //              indices and font indices.
 // Author:      Ray Smith
-// Created:     Tue Nov 02 15:31:32 PDT 2010
 //
 // (C) Copyright 2010, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -274,30 +273,30 @@ void ShapeTable::ReMapClassIds(const GenericVector<int> &unicharset_map) {
 }

 // Returns a string listing the classes/fonts in a shape.
-STRING ShapeTable::DebugStr(int shape_id) const {
+std::string ShapeTable::DebugStr(int shape_id) const {
  if (shape_id < 0 || shape_id >= shape_table_.size())
    return STRING("INVALID_UNICHAR_ID");
  const Shape &shape = GetShape(shape_id);
-  STRING result;
-  result.add_str_int("Shape", shape_id);
+  std::string result;
+  result += "Shape" + std::to_string(shape_id);
  if (shape.size() > 100) {
-    result.add_str_int(" Num unichars=", shape.size());
+    result += " Num unichars=" + std::to_string(shape.size());
    return result;
  }
  for (int c = 0; c < shape.size(); ++c) {
-    result.add_str_int(" c_id=", shape[c].unichar_id);
+    result += " c_id=" + std::to_string(shape[c].unichar_id);
    result += "=";
    result += unicharset_->id_to_unichar(shape[c].unichar_id);
    if (shape.size() < 10) {
-      result.add_str_int(", ", shape[c].font_ids.size());
+      result += ", " + std::to_string(shape[c].font_ids.size());
      result += " fonts =";
      int num_fonts = shape[c].font_ids.size();
      if (num_fonts > 10) {
-        result.add_str_int(" ", shape[c].font_ids[0]);
-        result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
+        result += " " + std::to_string(shape[c].font_ids[0]);
+        result += " ... " + std::to_string(shape[c].font_ids[num_fonts - 1]);
      } else {
        for (int f = 0; f < num_fonts; ++f) {
-          result.add_str_int(" ", shape[c].font_ids[f]);
+          result += " " + std::to_string(shape[c].font_ids[f]);
        }
      }
    }
@ -306,7 +305,7 @@ STRING ShapeTable::DebugStr(int shape_id) const {
 }

 // Returns a debug string summarizing the table.
-STRING ShapeTable::SummaryStr() const {
+std::string ShapeTable::SummaryStr() const {
  int max_unichars = 0;
  int num_multi_shapes = 0;
  int num_master_shapes = 0;
@ -320,10 +319,10 @@ STRING ShapeTable::SummaryStr() const {
    if (shape_size > max_unichars)
      max_unichars = shape_size;
  }
-  STRING result;
-  result.add_str_int("Number of shapes = ", num_master_shapes);
-  result.add_str_int(" max unichars = ", max_unichars);
-  result.add_str_int(" number with multiple unichars = ", num_multi_shapes);
+  std::string result;
+  result += "Number of shapes = " + std::to_string(num_master_shapes);
+  result += " max unichars = " + std::to_string(max_unichars);
+  result += " number with multiple unichars = " + std::to_string(num_multi_shapes);
  return result;
 }

--- a/src/classify/shapetable.h
+++ b/src/classify/shapetable.h
@ -258,9 +258,9 @@ public:
  // Useful in conjunction with set_unicharset.
  void ReMapClassIds(const GenericVector<int> &unicharset_map);
  // Returns a string listing the classes/fonts in a shape.
-  STRING DebugStr(int shape_id) const;
+  std::string DebugStr(int shape_id) const;
  // Returns a debug string summarizing the table.
-  STRING SummaryStr() const;
+  std::string SummaryStr() const;

  // Adds a new shape starting with the given unichar_id and font_id.
  // Returns the assigned index.
--- a/src/lstm/convolve.h
+++ b/src/lstm/convolve.h
@ -36,11 +36,8 @@ public:
  Convolve(const std::string &name, int ni, int half_x, int half_y);
  ~Convolve() override = default;

-  STRING spec() const override {
-    STRING spec;
-    spec.add_str_int("C", half_x_ * 2 + 1);
-    spec.add_str_int(",", half_y_ * 2 + 1);
-    return spec;
+  std::string spec() const override {
+    return "C" + std::to_string(half_x_ * 2 + 1) + "," + std::to_string(half_y_ * 2 + 1);
  }

  // Writes to the given file. Returns false in case of error.
--- a/src/lstm/fullyconnected.h
+++ b/src/lstm/fullyconnected.h
@ -34,24 +34,24 @@ public:
  // be partially unknown ie zero).
  StaticShape OutputShape(const StaticShape &input_shape) const override;

-  STRING spec() const override {
-    STRING spec;
+  std::string spec() const override {
+    std::string spec;
    if (type_ == NT_TANH)
-      spec.add_str_int("Ft", no_);
+      spec += "Ft" + std::to_string(no_);
    else if (type_ == NT_LOGISTIC)
-      spec.add_str_int("Fs", no_);
+      spec += "Fs" + std::to_string(no_);
    else if (type_ == NT_RELU)
-      spec.add_str_int("Fr", no_);
+      spec += "Fr" + std::to_string(no_);
    else if (type_ == NT_LINEAR)
-      spec.add_str_int("Fl", no_);
+      spec += "Fl" + std::to_string(no_);
    else if (type_ == NT_POSCLIP)
-      spec.add_str_int("Fp", no_);
+      spec += "Fp" + std::to_string(no_);
    else if (type_ == NT_SYMCLIP)
-      spec.add_str_int("Fn", no_);
+      spec += "Fn" + std::to_string(no_);
    else if (type_ == NT_SOFTMAX)
-      spec.add_str_int("Fc", no_);
+      spec += "Fc" + std::to_string(no_);
    else
-      spec.add_str_int("Fm", no_);
+      spec += "Fm" + std::to_string(no_);
    return spec;
  }

--- a/src/lstm/input.h
+++ b/src/lstm/input.h
@ -32,13 +32,9 @@ public:
  Input(const std::string &name, const StaticShape &shape);
  ~Input() override = default;

-  STRING spec() const override {
-    STRING spec;
-    spec.add_str_int("", shape_.batch());
-    spec.add_str_int(",", shape_.height());
-    spec.add_str_int(",", shape_.width());
-    spec.add_str_int(",", shape_.depth());
-    return spec;
+  std::string spec() const override {
+    return std::to_string(shape_.batch()) + "," + std::to_string(shape_.height()) + "," +
+           std::to_string(shape_.width()) + "," + std::to_string(shape_.depth());
  }

  // Returns the required shape input to the network.
--- a/src/lstm/lstm.h
+++ b/src/lstm/lstm.h
@ -55,16 +55,16 @@ public:
  // be partially unknown ie zero).
  StaticShape OutputShape(const StaticShape &input_shape) const override;

-  STRING spec() const override {
-    STRING spec;
+  std::string spec() const override {
+    std::string spec;
    if (type_ == NT_LSTM)
-      spec.add_str_int("Lfx", ns_);
+      spec += "Lfx" + std::to_string(ns_);
    else if (type_ == NT_LSTM_SUMMARY)
-      spec.add_str_int("Lfxs", ns_);
+      spec += "Lfxs" + std::to_string(ns_);
    else if (type_ == NT_LSTM_SOFTMAX)
-      spec.add_str_int("LS", ns_);
+      spec += "LS" + std::to_string(ns_);
    else if (type_ == NT_LSTM_SOFTMAX_ENCODED)
-      spec.add_str_int("LE", ns_);
+      spec += "LE" + std::to_string(ns_);
    if (softmax_ != nullptr)
      spec += softmax_->spec();
    return spec;
--- a/src/lstm/lstmrecognizer.cpp
+++ b/src/lstm/lstmrecognizer.cpp
@ -348,8 +348,8 @@ bool LSTMRecognizer::RecognizeLine(const ImageData &image_data, bool invert, boo

 // Converts an array of labels to utf-8, whether or not the labels are
 // augmented with character boundaries.
-STRING LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
-  STRING result;
+std::string LSTMRecognizer::DecodeLabels(const std::vector<int> &labels) {
+  std::string result;
  int end = 1;
  for (int start = 0; start < labels.size(); start = end) {
    if (labels[start] == null_char_) {
--- a/src/lstm/lstmrecognizer.h
+++ b/src/lstm/lstmrecognizer.h
@ -26,7 +26,6 @@
 #include "params.h"
 #include "recodebeam.h"
 #include "series.h"
-#include "strngs.h"
 #include "unicharcompress.h"

 class BLOB_CHOICE_IT;
@ -97,22 +96,22 @@ public:
  }
  // Returns a vector of layer ids that can be passed to other layer functions
  // to access a specific layer.
-  std::vector<STRING> EnumerateLayers() const {
+  std::vector<std::string> EnumerateLayers() const {
    ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES);
    auto *series = static_cast<Series *>(network_);
-    std::vector<STRING> layers;
-    series->EnumerateLayers(nullptr, &layers);
+    std::vector<std::string> layers;
+    series->EnumerateLayers(nullptr, layers);
    return layers;
  }
  // Returns a specific layer from its id (from EnumerateLayers).
-  Network *GetLayer(const STRING &id) const {
+  Network *GetLayer(const std::string &id) const {
    ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES);
    ASSERT_HOST(id.length() > 1 && id[0] == ':');
    auto *series = static_cast<Series *>(network_);
    return series->GetLayer(&id[1]);
  }
  // Returns the learning rate of the layer from its id.
-  float GetLayerLearningRate(const STRING &id) const {
+  float GetLayerLearningRate(const std::string &id) const {
    ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES);
    if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
      ASSERT_HOST(id.length() > 1 && id[0] == ':');
@ -143,14 +142,14 @@ public:
    ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES);
    learning_rate_ *= factor;
    if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
-      std::vector<STRING> layers = EnumerateLayers();
+      std::vector<std::string> layers = EnumerateLayers();
      for (int i = 0; i < layers.size(); ++i) {
        ScaleLayerLearningRate(layers[i], factor);
      }
    }
  }
  // Multiplies the learning rate of the layer with id, by the given factor.
-  void ScaleLayerLearningRate(const STRING &id, double factor) {
+  void ScaleLayerLearningRate(const std::string &id, double factor) {
    ASSERT_HOST(network_ != nullptr && network_->type() == NT_SERIES);
    ASSERT_HOST(id.length() > 1 && id[0] == ':');
    auto *series = static_cast<Series *>(network_);
@ -248,7 +247,7 @@ public:

  // Converts an array of labels to utf-8, whether or not the labels are
  // augmented with character boundaries.
-  STRING DecodeLabels(const std::vector<int> &labels);
+  std::string DecodeLabels(const std::vector<int> &labels);

  // Displays the forward results in a window with the characters and
  // boundaries as determined by the labels and label_coords.
--- a/src/lstm/maxpool.h
+++ b/src/lstm/maxpool.h
@ -33,11 +33,8 @@ public:
  ~Maxpool() override = default;

  // Accessors.
-  STRING spec() const override {
-    STRING spec;
-    spec.add_str_int("Mp", y_scale_);
-    spec.add_str_int(",", x_scale_);
-    return spec;
+  std::string spec() const override {
+    return "Mp" + std::to_string(y_scale_) + "," + std::to_string(x_scale_);
  }

  // Reads from the given file. Returns false in case of error.
--- a/src/lstm/network.cpp
+++ b/src/lstm/network.cpp
@ -153,8 +153,8 @@ bool Network::Serialize(TFile *fp) const {
  int8_t data = NT_NONE;
  if (!fp->Serialize(&data))
    return false;
-  STRING type_name = kTypeNames[type_];
-  if (!type_name.Serialize(fp))
+  std::string type_name = kTypeNames[type_];
+  if (!fp->Serialize(type_name))
    return false;
  data = training_;
  if (!fp->Serialize(&data))
@ -181,8 +181,8 @@ static NetworkType getNetworkType(TFile *fp) {
  if (!fp->DeSerialize(&data))
    return NT_NONE;
  if (data == NT_NONE) {
-    STRING type_name;
-    if (!type_name.DeSerialize(fp))
+    std::string type_name;
+    if (!fp->DeSerialize(type_name))
      return NT_NONE;
    for (data = 0; data < NT_COUNT && type_name != kTypeNames[data]; ++data) {
    }
@ -205,7 +205,7 @@ Network *Network::CreateFromFile(TFile *fp) {
  int32_t ni;             // Number of input values.
  int32_t no;             // Number of output values.
  int32_t num_weights;    // Number of weights in this and sub-network.
-  STRING name;            // A unique name for this layer.
+  std::string name;       // A unique name for this layer.
  int8_t data;
  Network *network = nullptr;
  type = getNetworkType(fp);
@ -223,7 +223,7 @@ Network *Network::CreateFromFile(TFile *fp) {
    return nullptr;
  if (!fp->DeSerialize(&num_weights))
    return nullptr;
-  if (!name.DeSerialize(fp))
+  if (!fp->DeSerialize(name))
    return nullptr;

  switch (type) {
--- a/src/lstm/network.h
+++ b/src/lstm/network.h
@ -23,7 +23,6 @@
 #include "networkio.h"
 #include "serialis.h"
 #include "static_shape.h"
-#include "strngs.h" // for STRING
 #include "tprintf.h"

 #include <cmath>
@ -141,7 +140,7 @@ public:
  const std::string &name() const {
    return name_;
  }
-  virtual STRING spec() const {
+  virtual std::string spec() const {
    return "?";
  }
  bool TestFlag(NetworkFlags flag) const {
--- a/src/lstm/parallel.h
+++ b/src/lstm/parallel.h
@ -35,26 +35,23 @@ public:
  // be partially unknown ie zero).
  StaticShape OutputShape(const StaticShape &input_shape) const override;

-  STRING spec() const override {
-    STRING spec;
+  std::string spec() const override {
+    std::string spec;
    if (type_ == NT_PAR_2D_LSTM) {
      // We have 4 LSTMs operating in parallel here, so the size of each is
      // the number of outputs/4.
-      spec.add_str_int("L2xy", no_ / 4);
+      spec += "L2xy" + std::to_string(no_ / 4);
    } else if (type_ == NT_PAR_RL_LSTM) {
      // We have 2 LSTMs operating in parallel here, so the size of each is
      // the number of outputs/2.
      if (stack_[0]->type() == NT_LSTM_SUMMARY)
-        spec.add_str_int("Lbxs", no_ / 2);
+        spec += "Lbxs" + std::to_string(no_ / 2);
      else
-        spec.add_str_int("Lbx", no_ / 2);
+        spec += "Lbx" + std::to_string(no_ / 2);
    } else {
      if (type_ == NT_REPLICATED) {
-        spec.add_str_int("R", stack_.size());
-        spec += "(";
-        spec += stack_[0]->spec();
+        spec += "R" + std::to_string(stack_.size()) + "(" + stack_[0]->spec();
      } else {
-        spec = "(";
        for (int i = 0; i < stack_.size(); ++i)
          spec += stack_[i]->spec();
      }
--- a/src/lstm/plumbing.cpp
+++ b/src/lstm/plumbing.cpp
@ -3,7 +3,6 @@
 // Description: Base class for networks that organize other networks
 //              eg series or parallel.
 // Author:      Ray Smith
-// Created:     Mon May 12 08:17:34 PST 2014
 //
 // (C) Copyright 2014, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -135,17 +134,17 @@ void Plumbing::DebugWeights() {
 }

 // Returns a set of strings representing the layer-ids of all layers below.
-void Plumbing::EnumerateLayers(const STRING *prefix, std::vector<STRING> *layers) const {
+void Plumbing::EnumerateLayers(const std::string *prefix, std::vector<std::string> &layers) const {
  for (int i = 0; i < stack_.size(); ++i) {
-    STRING layer_name;
+    std::string layer_name;
    if (prefix)
      layer_name = *prefix;
-    layer_name.add_str_int(":", i);
+    layer_name += ":" + std::to_string(i);
    if (stack_[i]->IsPlumbingType()) {
      auto *plumbing = static_cast<Plumbing *>(stack_[i]);
      plumbing->EnumerateLayers(&layer_name, layers);
    } else {
-      layers->push_back(layer_name);
+      layers.push_back(layer_name);
    }
  }
 }
--- a/src/lstm/plumbing.h
+++ b/src/lstm/plumbing.h
@ -36,7 +36,7 @@ public:
  StaticShape InputShape() const override {
    return stack_[0]->InputShape();
  }
-  STRING spec() const override {
+  std::string spec() const override {
    return "Sub-classes of Plumbing must implement spec()!";
  }

@ -101,7 +101,7 @@ public:
  }
  // Returns a set of strings representing the layer-ids of all layers below.
  TESS_API
-  void EnumerateLayers(const STRING *prefix, std::vector<STRING> *layers) const;
+  void EnumerateLayers(const std::string *prefix, std::vector<std::string> &layers) const;
  // Returns a pointer to the network layer corresponding to the given id.
  TESS_API
  Network *GetLayer(const char *id) const;
--- a/src/lstm/reconfig.h
+++ b/src/lstm/reconfig.h
@ -38,11 +38,8 @@ public:
  // be partially unknown ie zero).
  StaticShape OutputShape(const StaticShape &input_shape) const override;

-  STRING spec() const override {
-    STRING spec;
-    spec.add_str_int("S", y_scale_);
-    spec.add_str_int(",", x_scale_);
-    return spec;
+  std::string spec() const override {
+    return "S" + std::to_string(y_scale_) + "," + std::to_string(x_scale_);
  }

  // Returns an integer reduction factor that the network applies to the
--- a/src/lstm/reversed.h
+++ b/src/lstm/reversed.h
@ -2,7 +2,6 @@
 // File:        reversed.h
 // Description: Runs a single network on time-reversed input, reversing output.
 // Author:      Ray Smith
-// Created:     Thu May 02 08:38:06 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -35,15 +34,15 @@ public:
  // be partially unknown ie zero).
  StaticShape OutputShape(const StaticShape &input_shape) const override;

-  STRING spec() const override {
-    STRING spec(type_ == NT_XREVERSED ? "Rx" : (type_ == NT_YREVERSED ? "Ry" : "Txy"));
+  std::string spec() const override {
+    std::string spec(type_ == NT_XREVERSED ? "Rx" : (type_ == NT_YREVERSED ? "Ry" : "Txy"));
    // For most simple cases, we will output Rx<net> or Ry<net> where <net> is
    // the network in stack_[0], but in the special case that <net> is an
    // LSTM, we will just output the LSTM's spec modified to take the reversal
    // into account. This is because when the user specified Lfy64, we actually
    // generated TxyLfx64, and if the user specified Lrx64 we actually
    // generated RxLfx64, and we want to display what the user asked for.
-    STRING net_spec = stack_[0]->spec();
+    std::string net_spec(stack_[0]->spec());
    if (net_spec[0] == 'L') {
      // Setup a from and to character according to the type of the reversal
      // such that the LSTM spec gets modified to the spec that the user
@ -59,7 +58,8 @@ public:
        if (net_spec[i] == from)
          net_spec[i] = to;
      }
-      return net_spec;
+      spec += net_spec;
+      return spec;
    }
    spec += net_spec;
    return spec;
--- a/src/lstm/series.h
+++ b/src/lstm/series.h
@ -2,7 +2,6 @@
 // File:        series.h
 // Description: Runs networks in series on the same input.
 // Author:      Ray Smith
-// Created:     Thu May 02 08:20:06 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -35,8 +34,8 @@ public:
  // be partially unknown ie zero).
  StaticShape OutputShape(const StaticShape &input_shape) const override;

-  STRING spec() const override {
-    STRING spec("[");
+  std::string spec() const override {
+    std::string spec("[");
    for (int i = 0; i < stack_.size(); ++i)
      spec += stack_[i]->spec();
    spec += "]";
--- a/src/lstm/tfnetwork.h
+++ b/src/lstm/tfnetwork.h
@ -47,8 +47,8 @@ public:
    return output_shape_;
  }

-  STRING spec() const override {
-    return spec_.c_str();
+  std::string spec() const override {
+    return spec_;
  }

  // Deserializes *this from a serialized TFNetwork proto. Returns 0 if failed,
--- a/src/training/common/errorcounter.cpp
+++ b/src/training/common/errorcounter.cpp
@ -334,7 +334,7 @@ bool ErrorCounter::AccumulateJunk(bool debug, const std::vector<UnicharRating> &
 // (Ignoring report_level).
 double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
                                  const FontInfoTable &fontinfo_table, const SampleIterator &it,
-                                  double *unichar_error, STRING *fonts_report) {
+                                  double *unichar_error, std::string *fonts_report) {
  // Compute totals over all the fonts and report individual font results
  // when required.
  Counts totals;
@ -342,8 +342,8 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
  for (int f = 0; f < fontsize; ++f) {
    // Accumulate counts over fonts.
    totals += font_counts_[f];
-    STRING font_report;
-    if (ReportString(false, font_counts_[f], &font_report)) {
+    std::string font_report;
+    if (ReportString(false, font_counts_[f], font_report)) {
      if (fonts_report != nullptr) {
        *fonts_report += fontinfo_table.get(f).name;
        *fonts_report += ": ";
@ -357,8 +357,8 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
    }
  }
  // Report the totals.
-  STRING total_report;
-  bool any_results = ReportString(true, totals, &total_report);
+  std::string total_report;
+  bool any_results = ReportString(true, totals, total_report);
  if (fonts_report != nullptr && fonts_report->length() == 0) {
    // Make sure we return something even if there were no samples.
    *fonts_report = "NoSamplesFound: ";
@ -419,7 +419,7 @@ double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
 // string of the error rates.
 // Returns false if there is no data, leaving report unchanged, unless
 // even_if_empty is true.
-bool ErrorCounter::ReportString(bool even_if_empty, const Counts &counts, STRING *report) {
+bool ErrorCounter::ReportString(bool even_if_empty, const Counts &counts, std::string &report) {
  // Compute the error rates.
  double rates[CT_SIZE];
  if (!ComputeRates(counts, rates) && !even_if_empty)
@ -443,12 +443,12 @@ bool ErrorCounter::ReportString(bool even_if_empty, const Counts &counts, STRING
           rates[CT_OK_JOINED] * 100.0, rates[CT_OK_BROKEN] * 100.0, rates[CT_REJECT] * 100.0,
           rates[CT_FONT_ATTR_ERR] * 100.0, rates[CT_OK_MULTI_FONT] * 100.0, rates[CT_NUM_RESULTS],
           rates[CT_RANK], 100.0 * rates[CT_REJECTED_JUNK], 100.0 * rates[CT_ACCEPTED_JUNK]);
-  *report = formatted_str;
+  report = formatted_str;
  delete[] formatted_str;
  // Now append each field of counts with a tab in front so the result can
  // be loaded into a spreadsheet.
  for (int ct : counts.n)
-    report->add_str_int("\t", ct);
+    report += "\t" + std::to_string(ct);
  return true;
 }

--- a/src/training/common/errorcounter.h
+++ b/src/training/common/errorcounter.h
@ -178,13 +178,13 @@ private:
  // (Ignoring report_level).
  double ReportErrors(int report_level, CountTypes boosting_mode,
                      const FontInfoTable &fontinfo_table, const SampleIterator &it,
-                      double *unichar_error, STRING *fonts_report);
+                      double *unichar_error, std::string *fonts_report);

  // Sets the report string to a combined human and machine-readable report
  // string of the error rates.
  // Returns false if there is no data, leaving report unchanged, unless
  // even_if_empty is true.
-  static bool ReportString(bool even_if_empty, const Counts &counts, STRING *report);
+  static bool ReportString(bool even_if_empty, const Counts &counts, std::string &report);

  // Computes the error rates and returns in rates which is an array of size
  // CT_SIZE. Returns false if there is no data, leaving rates unchanged.
--- a/src/training/common/trainingsampleset.cpp
+++ b/src/training/common/trainingsampleset.cpp
@ -218,11 +218,11 @@ TrainingSample *TrainingSampleSet::MutableSample(int font_id, int class_id, int

 // Returns a string debug representation of the given sample:
 // font, unichar_str, bounding box, page.
-STRING TrainingSampleSet::SampleToString(const TrainingSample &sample) const {
-  STRING boxfile_str;
+std::string TrainingSampleSet::SampleToString(const TrainingSample &sample) const {
+  std::string boxfile_str;
  MakeBoxFileStr(unicharset_.id_to_unichar(sample.class_id()), sample.bounding_box(),
-                 sample.page_num(), &boxfile_str);
-  return STRING(fontinfo_table_.get(sample.font_id()).name) + " " + boxfile_str;
+                 sample.page_num(), boxfile_str);
+  return std::string(fontinfo_table_.get(sample.font_id()).name) + " " + boxfile_str;
 }

 // Gets the combined set of features used by all the samples of the given
--- a/src/training/common/trainingsampleset.h
+++ b/src/training/common/trainingsampleset.h
@ -101,7 +101,7 @@ public:

  // Returns a string debug representation of the given sample:
  // font, unichar_str, bounding box, page.
-  STRING SampleToString(const TrainingSample &sample) const;
+  std::string SampleToString(const TrainingSample &sample) const;

  // Gets the combined set of features used by all the samples of the given
  // font/class combination.
--- a/src/training/lstmtraining.cpp
+++ b/src/training/lstmtraining.cpp
@ -137,7 +137,7 @@ int main(int argc, char **argv) {
    tprintf("Must supply a list of training filenames! --train_listfile\n");
    return EXIT_FAILURE;
  }
-  std::vector<STRING> filenames;
+  std::vector<std::string> filenames;
  if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(), &filenames)) {
    tprintf("Failed to load list of training filenames from %s\n", FLAGS_train_listfile.c_str());
    return EXIT_FAILURE;
@ -213,8 +213,8 @@ int main(int argc, char **argv) {
         iteration = trainer.training_iteration()) {
      trainer.TrainOnLine(&trainer, false);
    }
-    STRING log_str;
-    trainer.MaintainCheckpoints(tester_callback, &log_str);
+    std::string log_str;
+    trainer.MaintainCheckpoints(tester_callback, log_str);
    tprintf("%s\n", log_str.c_str());
  } while (trainer.best_error_rate() > FLAGS_target_error_rate &&
           (trainer.training_iteration() < max_iterations));
--- a/src/training/unicharset/fileio.h
+++ b/src/training/unicharset/fileio.h
@ -28,15 +28,15 @@

 namespace tesseract {

-// Reads a file as a vector of STRING.
-// TODO: Use std::vector and std::string for LoadFileLinesToStrings.
-inline bool LoadFileLinesToStrings(const char *filename, std::vector<STRING> *lines) {
+// Reads a file as a vector of string.
+inline bool LoadFileLinesToStrings(const char *filename, std::vector<std::string> *lines) {
  std::vector<char> data;
  if (!LoadDataFromFile(filename, &data)) {
    return false;
  }
-  STRING lines_str(&data[0], data.size());
-  lines_str.split('\n', lines);
+  // TODO: optimize.
+  std::string lines_str(&data[0], data.size());
+  *lines = split(lines_str, '\n');
  return true;
 }

--- a/src/training/unicharset/lang_model_helpers.cpp
+++ b/src/training/unicharset/lang_model_helpers.cpp
@ -119,8 +119,8 @@ bool WriteRecoder(const UNICHARSET &unicharset, bool pass_through, const std::st
  STRING encoding = recoder.GetEncodingAsString(unicharset);
  recoder_data.resize(encoding.length(), 0);
  memcpy(&recoder_data[0], &encoding[0], encoding.length());
-  STRING suffix;
-  suffix.add_str_int(".charset_size=", recoder.code_range());
+  std::string suffix;
+  suffix += ".charset_size=" + std::to_string(recoder.code_range());
  suffix += ".txt";
  return WriteFile(output_dir, lang, suffix.c_str(), recoder_data, writer);
 }
--- a/src/training/unicharset/lstmtester.cpp
+++ b/src/training/unicharset/lstmtester.cpp
@ -27,7 +27,7 @@ LSTMTester::LSTMTester(int64_t max_memory) : test_data_(max_memory) {}
 // tesseract into memory ready for testing. Returns false if nothing was
 // loaded. The arg is a filename of a file that lists the filenames.
 bool LSTMTester::LoadAllEvalData(const char *filenames_file) {
-  std::vector<STRING> filenames;
+  std::vector<std::string> filenames;
  if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
    tprintf("Failed to load list of eval filenames from %s\n", filenames_file);
    return false;
@ -38,7 +38,7 @@ bool LSTMTester::LoadAllEvalData(const char *filenames_file) {
 // Loads a set of lstmf files that were created using the lstm.train config to
 // tesseract into memory ready for testing. Returns false if nothing was
 // loaded.
-bool LSTMTester::LoadAllEvalData(const std::vector<STRING> &filenames) {
+bool LSTMTester::LoadAllEvalData(const std::vector<std::string> &filenames) {
  test_data_.Clear();
  bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
  total_pages_ = test_data_.TotalPages();
@ -47,19 +47,19 @@ bool LSTMTester::LoadAllEvalData(const std::vector<STRING> &filenames) {

 // Runs an evaluation asynchronously on the stored data and returns a string
 // describing the results of the previous test.
-STRING LSTMTester::RunEvalAsync(int iteration, const double *training_errors,
-                                const TessdataManager &model_mgr, int training_stage) {
-  STRING result;
+std::string LSTMTester::RunEvalAsync(int iteration, const double *training_errors,
+                                     const TessdataManager &model_mgr, int training_stage) {
+  std::string result;
  if (total_pages_ == 0) {
-    result.add_str_int("No test data at iteration ", iteration);
+    result += "No test data at iteration " + std::to_string(iteration);
    return result;
  }
  if (!LockIfNotRunning()) {
-    result.add_str_int("Previous test incomplete, skipping test at iteration ", iteration);
+    result += "Previous test incomplete, skipping test at iteration " + std::to_string(iteration);
    return result;
  }
  // Save the args.
-  STRING prev_result = test_result_;
+  std::string prev_result = test_result_;
  test_result_ = "";
  if (training_errors != nullptr) {
    test_iteration_ = iteration;
@ -76,9 +76,9 @@ STRING LSTMTester::RunEvalAsync(int iteration, const double *training_errors,

 // Runs an evaluation synchronously on the stored data and returns a string
 // describing the results.
-STRING LSTMTester::RunEvalSync(int iteration, const double *training_errors,
-                               const TessdataManager &model_mgr, int training_stage,
-                               int verbosity) {
+std::string LSTMTester::RunEvalSync(int iteration, const double *training_errors,
+                                    const TessdataManager &model_mgr, int training_stage,
+                                    int verbosity) {
  LSTMTrainer trainer;
  trainer.InitCharSet(model_mgr);
  TFile fp;
@ -110,11 +110,11 @@ STRING LSTMTester::RunEvalSync(int iteration, const double *training_errors,
  }
  char_error *= 100.0 / total_pages_;
  word_error *= 100.0 / total_pages_;
-  STRING result;
-  result.add_str_int("At iteration ", iteration);
-  result.add_str_int(", stage ", training_stage);
-  result.add_str_double(", Eval Char error rate=", char_error);
-  result.add_str_double(", Word error rate=", word_error);
+  std::string result;
+  result += "At iteration " + std::to_string(iteration);
+  result += ", stage " + std::to_string(training_stage);
+  result += ", Eval Char error rate=" + std::to_string(char_error);
+  result += ", Word error rate=" + std::to_string(word_error);
  return result;
 }

--- a/src/training/unicharset/lstmtester.h
+++ b/src/training/unicharset/lstmtester.h
@ -21,9 +21,9 @@
 #include "export.h"

 #include "lstmtrainer.h"
-#include "strngs.h"

 #include <mutex>
+#include <string>
 #include <vector>

 namespace tesseract {
@ -41,7 +41,7 @@ public:
  // Loads a set of lstmf files that were created using the lstm.train config to
  // tesseract into memory ready for testing. Returns false if nothing was
  // loaded.
-  bool LoadAllEvalData(const std::vector<STRING> &filenames);
+  bool LoadAllEvalData(const std::vector<std::string> &filenames);

  // Runs an evaluation asynchronously on the stored eval data and returns a
  // string describing the results of the previous test. Args match TestCallback
@ -54,13 +54,13 @@ public:
  // model_data: is the model to evaluate, which should be a serialized
  //   LSTMTrainer.
  // training_stage: an arbitrary number on the progress of training.
-  STRING RunEvalAsync(int iteration, const double *training_errors,
-                      const TessdataManager &model_mgr, int training_stage);
+  std::string RunEvalAsync(int iteration, const double *training_errors,
+                           const TessdataManager &model_mgr, int training_stage);
  // Runs an evaluation synchronously on the stored eval data and returns a
  // string describing the results. Args as RunEvalAsync, except verbosity,
  // which outputs errors, if 1, or all results if 2.
-  STRING RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr,
-                     int training_stage, int verbosity);
+  std::string RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr,
+                          int training_stage, int verbosity);

 private:
  // Helper thread function for RunEvalAsync.
@ -85,7 +85,7 @@ private:
  const double *test_training_errors_ = nullptr;
  TessdataManager test_model_mgr_;
  int test_training_stage_ = 0;
-  STRING test_result_;
+  std::string test_result_;
 };

 } // namespace tesseract
--- a/src/training/unicharset/lstmtrainer.cpp
+++ b/src/training/unicharset/lstmtrainer.cpp
@ -152,7 +152,8 @@ bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index, int ne
    return false;
  }
  network_str_ += network_spec;
-  tprintf("Built network:%s from request %s\n", network_->spec().c_str(), network_spec);
+  tprintf("Built network:%s from request %s\n",
+          network_->spec().c_str(), network_spec);
  tprintf(
      "Training parameters:\n  Debug interval = %d,"
      " weights = %g, learning rate = %g, momentum=%g\n",
@ -209,7 +210,7 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
                                               double min_dict_ratio, double dict_ratio_step,
                                               double max_dict_ratio, double min_cert_offset,
                                               double cert_offset_step, double max_cert_offset,
-                                               STRING *results) {
+                                               std::string &results) {
  sample_iteration_ = iteration;
  NetworkIO fwd_outputs, targets;
  Trainability result = PrepareForBackward(trainingdata, &fwd_outputs, &targets);
@ -223,10 +224,10 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
  RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr);
  base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
  base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
-  STRING truth_text = DecodeLabels(truth_labels);
-  STRING ocr_text = DecodeLabels(ocr_labels);
+  std::string truth_text = DecodeLabels(truth_labels);
+  std::string ocr_text = DecodeLabels(ocr_labels);
  double baseline_error = ComputeWordError(&truth_text, &ocr_text);
-  results->add_str_double("0,0=", baseline_error);
+  results += "0,0=" + std::to_string(baseline_error);

  RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);
  for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {
@ -238,14 +239,14 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
      // This is destructive on both strings.
      double word_error = ComputeWordError(&truth_text, &ocr_text);
      if ((r == min_dict_ratio && c == min_cert_offset) || !std::isfinite(word_error)) {
-        STRING t = DecodeLabels(truth_labels);
-        STRING o = DecodeLabels(ocr_labels);
+        std::string t = DecodeLabels(truth_labels);
+	std::string o = DecodeLabels(ocr_labels);
        tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, t.c_str(), o.c_str(),
                word_error, truth_labels[0]);
      }
-      results->add_str_double(" ", r);
-      results->add_str_double(",", c);
-      results->add_str_double("=", word_error);
+      results += " " + std::to_string(r);
+      results += "," + std::to_string(c);
+      results += "=" + std::to_string(word_error);
    }
  }
  return result;
@ -259,7 +260,7 @@ void LSTMTrainer::DebugNetwork() {
 // Loads a set of lstmf files that were created using the lstm.train config to
 // tesseract into memory ready for training. Returns false if nothing was
 // loaded.
-bool LSTMTrainer::LoadAllTrainingData(const std::vector<STRING> &filenames,
+bool LSTMTrainer::LoadAllTrainingData(const std::vector<std::string> &filenames,
                                      CachingStrategy cache_strategy, bool randomly_rotate) {
  randomly_rotate_ = randomly_rotate;
  training_data_.Clear();
@ -270,7 +271,7 @@ bool LSTMTrainer::LoadAllTrainingData(const std::vector<STRING> &filenames,
 // using tester, when a new min or max is reached.
 // Writes checkpoints at appropriate times and builds and returns a log message
 // to indicate progress. Returns false if nothing interesting happened.
-bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING *log_msg) {
+bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg) {
  PrepareLogMsg(log_msg);
  double error_rate = CharError();
  int iteration = learning_iteration();
@ -295,35 +296,35 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING *log_msg) {
  std::vector<char> rec_model_data;
  if (error_rate < best_error_rate_) {
    SaveRecognitionDump(&rec_model_data);
-    log_msg->add_str_double(" New best char error = ", error_rate);
-    *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
+    log_msg += " New best char error = " + std::to_string(error_rate);
+    log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
    // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it
    // just overwrote *this. In either case, we have finished with it.
    delete sub_trainer_;
    sub_trainer_ = nullptr;
    stall_iteration_ = learning_iteration() + kMinStallIterations;
    if (TransitionTrainingStage(kStageTransitionThreshold)) {
-      log_msg->add_str_int(" Transitioned to stage ", CurrentTrainingStage());
+      log_msg += " Transitioned to stage " + std::to_string(CurrentTrainingStage());
    }
    SaveTrainingDump(NO_BEST_TRAINER, this, &best_trainer_);
    if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
      STRING best_model_name = DumpFilename();
      if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {
-        *log_msg += " failed to write best model:";
+        log_msg += " failed to write best model:";
      } else {
-        *log_msg += " wrote best model:";
+        log_msg += " wrote best model:";
        error_rate_of_last_saved_best_ = best_error_rate_;
      }
-      *log_msg += best_model_name;
+      log_msg += best_model_name;
    }
  } else if (error_rate > worst_error_rate_) {
    SaveRecognitionDump(&rec_model_data);
-    log_msg->add_str_double(" New worst char error = ", error_rate);
-    *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
+    log_msg += " New worst char error = " + std::to_string(error_rate);
+    log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
    if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
        best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
      // Error rate has ballooned. Go back to the best model.
-      *log_msg += "\nDivergence! ";
+      log_msg += "\nDivergence! ";
      // Copy best_trainer_ before reading it, as it will get overwritten.
      std::vector<char> revert_data(best_trainer_);
      if (ReadTrainingDump(revert_data, this)) {
@ -347,33 +348,33 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING *log_msg) {
    std::vector<char> checkpoint;
    if (!SaveTrainingDump(FULL, this, &checkpoint) ||
        !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
-      *log_msg += " failed to write checkpoint.";
+      log_msg += " failed to write checkpoint.";
    } else {
-      *log_msg += " wrote checkpoint.";
+      log_msg += " wrote checkpoint.";
    }
  }
-  *log_msg += "\n";
+  log_msg += "\n";
  return result;
 }

 // Builds a string containing a progress message with current error rates.
-void LSTMTrainer::PrepareLogMsg(STRING *log_msg) const {
+void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const {
  LogIterations("At", log_msg);
-  log_msg->add_str_double(", Mean rms=", error_rates_[ET_RMS]);
-  log_msg->add_str_double("%, delta=", error_rates_[ET_DELTA]);
-  log_msg->add_str_double("%, char train=", error_rates_[ET_CHAR_ERROR]);
-  log_msg->add_str_double("%, word train=", error_rates_[ET_WORD_RECERR]);
-  log_msg->add_str_double("%, skip ratio=", error_rates_[ET_SKIP_RATIO]);
-  *log_msg += "%, ";
+  log_msg += ", Mean rms=" + std::to_string(error_rates_[ET_RMS]);
+  log_msg += "%, delta=" + std::to_string(error_rates_[ET_DELTA]);
+  log_msg += "%, char train=" + std::to_string(error_rates_[ET_CHAR_ERROR]);
+  log_msg += "%, word train=" + std::to_string(error_rates_[ET_WORD_RECERR]);
+  log_msg += "%, skip ratio=" + std::to_string(error_rates_[ET_SKIP_RATIO]);
+  log_msg += "%, ";
 }

 // Appends <intro_str> iteration learning_iteration()/training_iteration()/
 // sample_iteration() to the log_msg.
-void LSTMTrainer::LogIterations(const char *intro_str, STRING *log_msg) const {
-  *log_msg += intro_str;
-  log_msg->add_str_int(" iteration ", learning_iteration());
-  log_msg->add_str_int("/", training_iteration());
-  log_msg->add_str_int("/", sample_iteration());
+void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) const {
+  log_msg += intro_str;
+  log_msg += " iteration " + std::to_string(learning_iteration());
+  log_msg += "/" + std::to_string(training_iteration());
+  log_msg += "/" + std::to_string(sample_iteration());
 }

 // Returns true and increments the training_stage_ if the error rate has just
@ -518,15 +519,15 @@ bool LSTMTrainer::DeSerialize(const TessdataManager *mgr, TFile *fp) {
 // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
 // learning rates (by scaling reduction, or layer specific, according to
 // NF_LAYER_SPECIFIC_LR).
-void LSTMTrainer::StartSubtrainer(STRING *log_msg) {
+void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
  delete sub_trainer_;
  sub_trainer_ = new LSTMTrainer();
  if (!ReadTrainingDump(best_trainer_, sub_trainer_)) {
-    *log_msg += " Failed to revert to previous best for trial!";
+    log_msg += " Failed to revert to previous best for trial!";
    delete sub_trainer_;
    sub_trainer_ = nullptr;
  } else {
-    log_msg->add_str_int(" Trial sub_trainer_ from iteration ", sub_trainer_->training_iteration());
+    log_msg += " Trial sub_trainer_ from iteration " + std::to_string(sub_trainer_->training_iteration());
    // Reduce learning rate so it doesn't diverge this time.
    sub_trainer_->ReduceLearningRates(this, log_msg);
    // If it fails again, we will wait twice as long before reverting again.
@ -546,14 +547,14 @@ void LSTMTrainer::StartSubtrainer(STRING *log_msg) {
 // trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
 // returned. STR_NONE is returned if the subtrainer wasn't good enough to
 // receive any training iterations.
-SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING *log_msg) {
+SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
  double training_error = CharError();
  double sub_error = sub_trainer_->CharError();
  double sub_margin = (training_error - sub_error) / sub_error;
  if (sub_margin >= kSubTrainerMarginFraction) {
-    log_msg->add_str_double(" sub_trainer=", sub_error);
-    log_msg->add_str_double(" margin=", 100.0 * sub_margin);
-    *log_msg += "\n";
+    log_msg += " sub_trainer=" + std::to_string(sub_error);
+    log_msg += " margin=" + std::to_string(100.0 * sub_margin);
+    log_msg += "\n";
    // Catch up to current iteration.
    int end_iteration = training_iteration();
    while (sub_trainer_->training_iteration() < end_iteration &&
@ -562,11 +563,11 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING *log_msg) {
      while (sub_trainer_->training_iteration() < target_iteration) {
        sub_trainer_->TrainOnLine(this, false);
      }
-      STRING batch_log = "Sub:";
-      sub_trainer_->PrepareLogMsg(&batch_log);
+      std::string batch_log = "Sub:";
+      sub_trainer_->PrepareLogMsg(batch_log);
      batch_log += "\n";
      tprintf("UpdateSubtrainer:%s", batch_log.c_str());
-      *log_msg += batch_log;
+      log_msg += batch_log;
      sub_error = sub_trainer_->CharError();
      sub_margin = (training_error - sub_error) / sub_error;
    }
@ -575,8 +576,8 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING *log_msg) {
      std::vector<char> updated_trainer;
      SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
      ReadTrainingDump(updated_trainer, this);
-      log_msg->add_str_int(" Sub trainer wins at iteration ", training_iteration());
-      *log_msg += "\n";
+      log_msg += " Sub trainer wins at iteration " + std::to_string(training_iteration());
+      log_msg += "\n";
      return STR_REPLACED;
    }
    return STR_UPDATED;
@ -586,16 +587,16 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING *log_msg) {

 // Reduces network learning rates, either for everything, or for layers
 // independently, according to NF_LAYER_SPECIFIC_LR.
-void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, STRING *log_msg) {
+void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg) {
  if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
    int num_reduced =
        ReduceLayerLearningRates(kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
-    log_msg->add_str_int("\nReduced learning rate on layers: ", num_reduced);
+    log_msg += "\nReduced learning rate on layers: " + std::to_string(num_reduced);
  } else {
    ScaleLearningRate(kLearningRateDecay);
-    log_msg->add_str_double("\nReduced learning rate to :", learning_rate_);
+    log_msg += "\nReduced learning rate to :" + std::to_string(learning_rate_);
  }
-  *log_msg += "\n";
+  log_msg += "\n";
 }

 // Considers reducing the learning rate independently for each layer down by
@ -611,7 +612,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
    LR_SAME, // Learning rate will stay the same.
    LR_COUNT // Size of arrays.
  };
-  std::vector<STRING> layers = EnumerateLayers();
+  std::vector<std::string> layers = EnumerateLayers();
  int num_layers = layers.size();
  std::vector<int> num_weights;
  num_weights.resize(num_layers, 0);
@ -869,8 +870,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
    tprintf("Input width was %d\n", inputs.Width());
    return UNENCODABLE;
  }
-  STRING ocr_text = DecodeLabels(ocr_labels);
-  STRING truth_text = DecodeLabels(truth_labels);
+  std::string ocr_text = DecodeLabels(ocr_labels);
+  std::string truth_text = DecodeLabels(truth_labels);
  targets->SubtractAllFromFloat(*fwd_outputs);
  if (debug_interval_ != 0) {
    if (truth_text != ocr_text) {
@ -932,12 +933,12 @@ void LSTMTrainer::SaveRecognitionDump(std::vector<char> *data) const {

 // Returns a suitable filename for a training dump, based on the model_base_,
 // best_error_rate_, best_iteration_ and training_iteration_.
-STRING LSTMTrainer::DumpFilename() const {
-  STRING filename;
+std::string LSTMTrainer::DumpFilename() const {
+  std::string filename;
  filename += model_base_.c_str();
-  filename.add_str_double("_", best_error_rate_);
-  filename.add_str_int("_", best_iteration_);
-  filename.add_str_int("_", training_iteration_);
+  filename += "_" + std::to_string(best_error_rate_);
+  filename += "_" + std::to_string(best_iteration_);
+  filename += "_" + std::to_string(training_iteration_);
  filename += ".checkpoint";
  return filename;
 }
@ -1028,7 +1029,7 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
                                    const NetworkIO &fwd_outputs,
                                    const std::vector<int> &truth_labels,
                                    const NetworkIO &outputs) {
-  const STRING &truth_text = DecodeLabels(truth_labels);
+  const std::string &truth_text = DecodeLabels(truth_labels);
  if (truth_text.c_str() == nullptr || truth_text.length() <= 0) {
    tprintf("Empty truth string at decode time!\n");
    return false;
@ -1038,7 +1039,7 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
    std::vector<int> labels;
    std::vector<int> xcoords;
    LabelsFromOutputs(outputs, &labels, &xcoords);
-    STRING text = DecodeLabels(labels);
+    std::string text = DecodeLabels(labels);
    tprintf("Iteration %d: GROUND  TRUTH : %s\n", training_iteration(), truth_text.c_str());
    if (truth_text != text) {
      tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), text.c_str());
@ -1213,13 +1214,12 @@ double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,

 // Computes word recall error rate using a very simple bag of words algorithm.
 // NOTE that this is destructive on both input strings.
-double LSTMTrainer::ComputeWordError(STRING *truth_str, STRING *ocr_str) {
+double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_str) {
  using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
-  std::vector<STRING> truth_words, ocr_words;
-  truth_str->split(' ', &truth_words);
+  std::vector<std::string> truth_words = split(*truth_str, ' ');
  if (truth_words.empty())
    return 0.0;
-  ocr_str->split(' ', &ocr_words);
+  std::vector<std::string> ocr_words = split(*ocr_str, ' ');
  StrMap word_counts;
  for (auto truth_word : truth_words) {
    std::string truth_word_string(truth_word.c_str());
--- a/src/training/unicharset/lstmtrainer.h
+++ b/src/training/unicharset/lstmtrainer.h
@ -173,7 +173,7 @@ public:
                                    double min_dict_ratio, double dict_ratio_step,
                                    double max_dict_ratio, double min_cert_offset,
                                    double cert_offset_step, double max_cert_offset,
-                                    STRING *results);
+                                    std::string &results);

  // Provides output on the distribution of weight values.
  void DebugNetwork();
@ -181,12 +181,12 @@ public:
  // Loads a set of lstmf files that were created using the lstm.train config to
  // tesseract into memory ready for training. Returns false if nothing was
  // loaded.
-  bool LoadAllTrainingData(const std::vector<STRING> &filenames, CachingStrategy cache_strategy,
+  bool LoadAllTrainingData(const std::vector<std::string> &filenames, CachingStrategy cache_strategy,
                           bool randomly_rotate);

  // Keeps track of best and locally worst error rate, using internally computed
  // values. See MaintainCheckpointsSpecific for more detail.
-  bool MaintainCheckpoints(TestCallback tester, STRING *log_msg);
+  bool MaintainCheckpoints(TestCallback tester, std::string &log_msg);
  // Keeps track of best and locally worst error_rate (whatever it is) and
  // launches tests using rec_model, when a new min or max is reached.
  // Writes checkpoints using train_model at appropriate times and builds and
@ -194,12 +194,12 @@ public:
  // interesting happened.
  bool MaintainCheckpointsSpecific(int iteration, const std::vector<char> *train_model,
                                   const std::vector<char> *rec_model, TestCallback tester,
-                                   STRING *log_msg);
+                                   std::string &log_msg);
  // Builds a string containing a progress message with current error rates.
-  void PrepareLogMsg(STRING *log_msg) const;
+  void PrepareLogMsg(std::string &log_msg) const;
  // Appends <intro_str> iteration learning_iteration()/training_iteration()/
  // sample_iteration() to the log_msg.
-  void LogIterations(const char *intro_str, STRING *log_msg) const;
+  void LogIterations(const char *intro_str, std::string &log_msg) const;

  // TODO(rays) Add curriculum learning.
  // Returns true and increments the training_stage_ if the error rate has just
@ -218,7 +218,7 @@ public:
  // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
  // learning rates (by scaling reduction, or layer specific, according to
  // NF_LAYER_SPECIFIC_LR).
-  void StartSubtrainer(STRING *log_msg);
+  void StartSubtrainer(std::string &log_msg);
  // While the sub_trainer_ is behind the current training iteration and its
  // training error is at least kSubTrainerMarginFraction better than the
  // current training error, trains the sub_trainer_, and returns STR_UPDATED if
@ -227,10 +227,10 @@ public:
  // trainer in *this is replaced with sub_trainer_, and STR_REPLACED is
  // returned. STR_NONE is returned if the subtrainer wasn't good enough to
  // receive any training iterations.
-  SubTrainerResult UpdateSubtrainer(STRING *log_msg);
+  SubTrainerResult UpdateSubtrainer(std::string &log_msg);
  // Reduces network learning rates, either for everything, or for layers
  // independently, according to NF_LAYER_SPECIFIC_LR.
-  void ReduceLearningRates(LSTMTrainer *samples_trainer, STRING *log_msg);
+  void ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg);
  // Considers reducing the learning rate independently for each layer down by
  // factor(<1), or leaving it the same, by double-training the given number of
  // samples and minimizing the amount of changing of sign of weight updates.
@ -306,7 +306,7 @@ public:

  // Returns a suitable filename for a training dump, based on the model_base_,
  // the iteration and the error rates.
-  STRING DumpFilename() const;
+  std::string DumpFilename() const;

  // Fills the whole error buffer of the given type with the given value.
  void FillErrorBuffer(double new_error, ErrorTypes type);
@ -365,7 +365,7 @@ protected:
  double ComputeCharError(const std::vector<int> &truth_str, const std::vector<int> &ocr_str);
  // Computes a very simple bag of words word recall error rate.
  // NOTE that this is destructive on both input strings.
-  double ComputeWordError(STRING *truth_str, STRING *ocr_str);
+  double ComputeWordError(std::string *truth_str, std::string *ocr_str);

  // Updates the error buffer and corresponding mean of the given type with
  // the new_error.
--- a/src/training/unicharset_extractor.cpp
+++ b/src/training/unicharset_extractor.cpp
@ -41,7 +41,7 @@ namespace tesseract {

 // Helper normalizes and segments the given strings according to norm_mode, and
 // adds the segmented parts to unicharset.
-static void AddStringsToUnicharset(const std::vector<STRING> &strings, int norm_mode,
+static void AddStringsToUnicharset(const std::vector<std::string> &strings, int norm_mode,
                                   UNICHARSET *unicharset) {
  for (int i = 0; i < strings.size(); ++i) {
    std::vector<std::string> normalized;
@ -64,10 +64,10 @@ static int Main(int argc, char **argv) {
  UNICHARSET unicharset;
  // Load input files
  for (int arg = 1; arg < argc; ++arg) {
-    STRING file_data = tesseract::ReadFile(argv[arg], /*reader*/ nullptr);
+    std::string file_data = tesseract::ReadFile(argv[arg], /*reader*/ nullptr);
    if (file_data.length() == 0)
      continue;
-    std::vector<STRING> texts;
+    std::vector<std::string> texts;
    if (ReadMemBoxes(-1, /*skip_blanks*/ true, &file_data[0],
                     /*continue_on_failure*/ false, /*boxes*/ nullptr, &texts,
                     /*box_texts*/ nullptr, /*pages*/ nullptr)) {
@ -75,7 +75,7 @@ static int Main(int argc, char **argv) {
    } else {
      tprintf("Extracting unicharset from plain text file %s\n", argv[arg]);
      texts.clear();
-      file_data.split('\n', &texts);
+      texts = split(file_data, '\n');
    }
    AddStringsToUnicharset(texts, FLAGS_norm_mode, &unicharset);
  }
--- a/src/wordrec/segsearch.cpp
+++ b/src/wordrec/segsearch.cpp
@ -106,11 +106,11 @@ void Wordrec::SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle
    // search for the true path to find the blame for the incorrect best_choice.
    if (SegSearchDone(num_futile_classifications) && blamer_bundle != nullptr &&
        blamer_bundle->GuidedSegsearchNeeded(word_res->best_choice)) {
-      InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, &blamer_debug);
+      InitBlamerForSegSearch(word_res, &pain_points, blamer_bundle, blamer_debug);
    }
  } // end while loop exploring alternative paths
  if (blamer_bundle != nullptr) {
-    blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, &blamer_debug);
+    blamer_bundle->FinishSegSearch(word_res->best_choice, wordrec_debug_blamer, blamer_debug);
  }

  if (segsearch_debug_level > 0) {
@ -296,7 +296,7 @@ void Wordrec::ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice
 }

 void Wordrec::InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points,
-                                     BlamerBundle *blamer_bundle, STRING *blamer_debug) {
+                                     BlamerBundle *blamer_bundle, std::string &blamer_debug) {
  pain_points->Clear(); // Clear pain points heap.
  blamer_bundle->InitForSegSearch(word_res->best_choice, word_res->ratings, getDict().WildcardID(),
                                  wordrec_debug_blamer, blamer_debug, pain_points,
--- a/src/wordrec/wordrec.h
+++ b/src/wordrec/wordrec.h
@ -487,7 +487,7 @@ protected:
  // (so that we can evaluate correct segmentation path and discover the reason
  // for incorrect result).
  void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points,
-                              BlamerBundle *blamer_bundle, STRING *blamer_debug);
+                              BlamerBundle *blamer_bundle, std::string &blamer_debug);
 };

 } // namespace tesseract
--- a/unittest/imagedata_test.cc
+++ b/unittest/imagedata_test.cc
@ -93,11 +93,11 @@ TEST_F(ImagedataTest, CachesMultiDocs) {
  // Number of pages in each document.
  const std::vector<int> kNumPages = {6, 5, 7};
  std::vector<std::vector<std::string>> page_texts;
-  std::vector<STRING> filenames;
+  std::vector<std::string> filenames;
  for (size_t d = 0; d < kNumPages.size(); ++d) {
    page_texts.emplace_back(std::vector<std::string>());
    std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());
-    filenames.push_back(STRING(filename.c_str()));
+    filenames.push_back(filename);
  }
  // Now try getting them back with different cache strategies and check that
  // the pages come out in the right order.
--- a/unittest/lstm_test.h
+++ b/unittest/lstm_test.h
@ -91,7 +91,7 @@ protected:
      net_mode |= NF_LAYER_SPECIFIC_LR;
    EXPECT_TRUE(
        trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1, learning_rate, 0.9, 0.999));
-    std::vector<STRING> filenames;
+    std::vector<std::string> filenames;
    filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str()));
    EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false));
    LOG(INFO) << "Setup network:" << model_name << "\n";
@ -102,7 +102,7 @@ protected:
    int iteration_limit = iteration + max_iterations;
    double best_error = 100.0;
    do {
-      STRING log_str;
+      std::string log_str;
      int target_iteration = iteration + kBatchIterations;
      // Train a few.
      double mean_error = 0.0;
@ -111,7 +111,7 @@ protected:
        iteration = trainer_->training_iteration();
        mean_error += trainer_->LastSingleError(ET_CHAR_ERROR);
      }
-      trainer_->MaintainCheckpoints(nullptr, &log_str);
+      trainer_->MaintainCheckpoints(nullptr, log_str);
      iteration = trainer_->training_iteration();
      mean_error *= 100.0 / kBatchIterations;
      if (mean_error < best_error)
--- a/unittest/stringrenderer_test.cc
+++ b/unittest/stringrenderer_test.cc
@ -211,7 +211,7 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
  std::string boxes_str = renderer_->GetBoxesStr();
  // Decode to get the box text strings.
  EXPECT_FALSE(boxes_str.empty());
-  std::vector<STRING> texts;
+  std::vector<std::string> texts;
  EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts, nullptr, nullptr));
  std::string ltr_str;
  for (size_t i = 0; i < texts.size(); ++i) {