Replace remaining STRING by std::string in src/ccmain

Signed-off-by: Stefan Weil <sw@weilnetz.de>
2025-01-21 17:13:09 +08:00 · 2021-03-14 10:48:06 +01:00 · 2021-03-14 10:48:06 +01:00 · db9f963411
commit db9f963411
parent d7823a71c2
21 changed files with 112 additions and 135 deletions
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
@ -1658,7 +1658,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(const UNICHARSET &char_se

  /* Single Leading punctuation char*/

-  if (s[offset] != '\0' && STRING(chs_leading_punct).contains(s[offset]))
+  if (s[offset] != '\0' && chs_leading_punct.contains(s[offset]))
    offset += lengths[i++];
  leading_punct_count = i;

@ -1705,10 +1705,10 @@ Allow a single hyphen in a lower case word
  }

  /* Up to two different, constrained trailing punctuation chars */
-  if (lengths[i] == 1 && s[offset] != '\0' && STRING(chs_trailing_punct1).contains(s[offset]))
+  if (lengths[i] == 1 && s[offset] != '\0' && chs_trailing_punct1.contains(s[offset]))
    offset += lengths[i++];
  if (lengths[i] == 1 && s[offset] != '\0' && i > 0 && s[offset - lengths[i - 1]] != s[offset] &&
-      STRING(chs_trailing_punct2).contains(s[offset]))
+      chs_trailing_punct2.contains(s[offset]))
    offset += lengths[i++];

  if (s[offset] != '\0')
--- a/src/ccmain/docqual.cpp
+++ b/src/ccmain/docqual.cpp
@ -107,9 +107,9 @@ void Tesseract::unrej_good_chs(WERD_RES *word) {
 int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
  int expected_outline_count;

-  if (STRING(outlines_odd).contains(c))
+  if (outlines_odd.contains(c))
    return 0; // Don't use this char
-  else if (STRING(outlines_2).contains(c))
+  else if (outlines_2.contains(c))
    expected_outline_count = 2;
  else
    expected_outline_count = 1;
--- a/src/ccmain/equationdetect.cpp
+++ b/src/ccmain/equationdetect.cpp
@ -216,7 +216,7 @@ void EquationDetect::IdentifySpecialText(BLOBNBOX *blobnbox, const int height_th

 BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(const UNICHARSET &unicharset,
                                                           const UNICHAR_ID id) const {
-  const STRING s = unicharset.id_to_unichar(id);
+  const std::string s = unicharset.id_to_unichar(id);
  if (unicharset.get_isalpha(id)) {
    return BSTT_NONE;
  }
@ -237,8 +237,8 @@ BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(const UNICHARSET &uni

  // Check if it is digit. In addition to the isdigit attribute, we also check
  // if this character belongs to those likely to be confused with a digit.
-  static const STRING kDigitsChars = "|";
-  if (unicharset.get_isdigit(id) || (s.length() == 1 && kDigitsChars.contains(s[0]))) {
+  static const char kDigitsChars[] = "|";
+  if (unicharset.get_isdigit(id) || (s.length() == 1 && strchr(kDigitsChars, s[0]) != nullptr)) {
    return BSTT_DIGIT;
  } else {
    return BSTT_MATH;
@ -286,8 +286,8 @@ void EquationDetect::IdentifySpecialText() {
  lang_tesseract_->classify_integer_matcher_multiplier.set_value(classify_integer_matcher);

  if (equationdetect_save_spt_image) { // For debug.
-    STRING outfile;
-    GetOutputTiffName("_spt", &outfile);
+    std::string outfile;
+    GetOutputTiffName("_spt", outfile);
    PaintSpecialTexts(outfile);
  }
 }
@ -351,11 +351,11 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
  part_grid_ = part_grid;
  best_columns_ = best_columns;
  resolution_ = lang_tesseract_->source_resolution();
-  STRING outfile;
+  std::string outfile;
  page_count_++;

  if (equationdetect_save_bi_image) {
-    GetOutputTiffName("_bi", &outfile);
+    GetOutputTiffName("_bi", outfile);
    pixWrite(outfile.c_str(), lang_tesseract_->pix_binary(), IFF_TIFF_G4);
  }

@ -371,7 +371,7 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
  IdentifyInlineParts();

  if (equationdetect_save_seed_image) {
-    GetOutputTiffName("_seed", &outfile);
+    GetOutputTiffName("_seed", outfile);
    PaintColParts(outfile);
  }

@ -396,7 +396,7 @@ int EquationDetect::FindEquationParts(ColPartitionGrid *part_grid, ColPartitionS
  ProcessMathBlockSatelliteParts();

  if (equationdetect_save_merged_image) { // For debug.
-    GetOutputTiffName("_merged", &outfile);
+    GetOutputTiffName("_merged", outfile);
    PaintColParts(outfile);
  }

@ -1383,14 +1383,14 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei
  return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh;
 }

-void EquationDetect::GetOutputTiffName(const char *name, STRING *image_name) const {
-  ASSERT_HOST(image_name && name);
+void EquationDetect::GetOutputTiffName(const char *name, std::string &image_name) const {
+  ASSERT_HOST(name);
  char page[50];
  snprintf(page, sizeof(page), "%04d", page_count_);
-  *image_name = STRING(lang_tesseract_->imagebasename) + page + name + ".tif";
+  image_name = (lang_tesseract_->imagebasename) + page + name + ".tif";
 }

-void EquationDetect::PaintSpecialTexts(const STRING &outfile) const {
+void EquationDetect::PaintSpecialTexts(const std::string &outfile) const {
  Pix *pix = nullptr, *pixBi = lang_tesseract_->pix_binary();
  pix = pixConvertTo32(pixBi);
  ColPartitionGridSearch gsearch(part_grid_);
@ -1407,7 +1407,7 @@ void EquationDetect::PaintSpecialTexts(const STRING &outfile) const {
  pixDestroy(&pix);
 }

-void EquationDetect::PaintColParts(const STRING &outfile) const {
+void EquationDetect::PaintColParts(const std::string &outfile) const {
  Pix *pix = pixConvertTo32(lang_tesseract_->BestPix());
  ColPartitionGridSearch gsearch(part_grid_);
  gsearch.StartFullSearch();
--- a/src/ccmain/equationdetect.h
+++ b/src/ccmain/equationdetect.h
@ -202,16 +202,16 @@ protected:
  bool IsNearMathNeighbor(const int y_gap, const ColPartition *neighbor) const;

  // Generate the tiff file name for output/debug file.
-  void GetOutputTiffName(const char *name, STRING *image_name) const;
+  void GetOutputTiffName(const char *name, std::string &image_name) const;

  // Debugger function that renders ColPartitions on the input image, where:
  // parts labeled as PT_EQUATION will be painted in red, PT_INLINE_EQUATION
  // will be painted in green, and other parts will be painted in blue.
-  void PaintColParts(const STRING &outfile) const;
+  void PaintColParts(const std::string &outfile) const;

  // Debugger function that renders the blobs in part_grid_ over the input
  // image.
-  void PaintSpecialTexts(const STRING &outfile) const;
+  void PaintSpecialTexts(const std::string &outfile) const;

  // Debugger function that print the math blobs density values for a
  // ColPartition object.
--- a/src/ccmain/fixspace.cpp
+++ b/src/ccmain/fixspace.cpp
@ -37,7 +37,6 @@

 #include <tesseract/ocrclass.h> // for ETEXT_DESC
 #include <tesseract/unichar.h>  // for UNICHAR_ID
-#include "strngs.h"             // for STRING

 #include <cstdint> // for INT16_MAX, int16_t, int32_t

@ -288,7 +287,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
             ((word_done && word->best_choice->unichar_lengths().c_str()[0] == 1 &&
               word->best_choice->unichar_string()[0] == '1') ||
              (!word_done &&
-               STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string()[0])))))) {
+               conflict_set_I_l_1.contains(word->best_choice->unichar_string()[0])))))) {
        total_score += prev_word_score;
        if (prev_word_done)
          done_word_count++;
@ -330,7 +329,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
      prev_char_1 =
          ((word_done && (word->best_choice->unichar_string()[offset] == '1')) ||
           (!word_done &&
-            STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string()[offset])));
+            conflict_set_I_l_1.contains(word->best_choice->unichar_string()[offset])));
    }
    /* Find next word */
    do {
@ -356,7 +355,7 @@ bool Tesseract::digit_or_numeric_punct(WERD_RES *word, int char_position) {
      word->uch_set->get_isdigit(word->best_choice->unichar_string().c_str() + offset,
                                 word->best_choice->unichar_lengths()[i]) ||
      (word->best_choice->permuter() == NUMBER_PERM &&
-       STRING(numeric_punctuation).contains(word->best_choice->unichar_string().c_str()[offset])));
+       numeric_punctuation.contains(word->best_choice->unichar_string().c_str()[offset])));
 }

 /**
--- a/src/ccmain/ltrresultiterator.cpp
+++ b/src/ccmain/ltrresultiterator.cpp
@ -24,8 +24,6 @@

 #include <allheaders.h>

-#include "strngs.h"
-
 namespace tesseract {

 LTRResultIterator::LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
@ -46,7 +44,7 @@ LTRResultIterator::~LTRResultIterator() = default;
 char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
  if (it_->word() == nullptr)
    return nullptr; // Already at the end!
-  STRING text;
+  std::string text;
  PAGE_RES_IT res_it(*it_);
  WERD_CHOICE *best_choice = res_it.word()->best_choice;
  ASSERT_HOST(best_choice != nullptr);
@ -306,7 +304,7 @@ bool LTRResultIterator::EquivalentToTruth(const char *str) const {
 char *LTRResultIterator::WordTruthUTF8Text() const {
  if (!HasTruthString())
    return nullptr;
-  STRING truth_text = it_->word()->blamer_bundle->TruthString();
+  std::string truth_text = it_->word()->blamer_bundle->TruthString();
  int length = truth_text.length() + 1;
  char *result = new char[length];
  strncpy(result, truth_text.c_str(), length);
@ -318,7 +316,7 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
 char *LTRResultIterator::WordNormedUTF8Text() const {
  if (it_->word() == nullptr)
    return nullptr; // Already at the end!
-  STRING ocr_text;
+  std::string ocr_text;
  WERD_CHOICE *best_choice = it_->word()->best_choice;
  const UNICHARSET *unicharset = it_->word()->uch_set;
  ASSERT_HOST(best_choice != nullptr);
--- a/src/ccmain/paragraphs.cpp
+++ b/src/ccmain/paragraphs.cpp
@ -32,7 +32,6 @@
 #include "ratngs.h"              // for WERD_CHOICE
 #include "rect.h"                // for TBOX
 #include "statistc.h"            // for STATS
-#include "strngs.h"              // for STRING
 #include "tprintf.h"             // for tprintf
 #include "unicharset.h"          // for UNICHARSET
 #include "werd.h"                // for WERD, W_REP_CHAR
@ -91,16 +90,9 @@ static bool AcceptableRowArgs(int debug_level, int min_num_rows, const char *fun

 // =============================== Debug Code ================================

-// Convert an integer to a decimal string.
-static STRING StrOf(int num) {
-  char buffer[30];
-  snprintf(buffer, sizeof(buffer), "%d", num);
-  return STRING(buffer);
-}
-
 // Given a row-major matrix of unicode text and a column separator, print
 // a formatted table.  For ASCII, we get good column alignment.
-static void PrintTable(const std::vector<std::vector<STRING>> &rows, const STRING &colsep) {
+static void PrintTable(const std::vector<std::vector<std::string>> &rows, const char *colsep) {
  std::vector<int> max_col_widths;
  for (const auto &row : rows) {
    int num_columns = row.size();
@ -119,56 +111,56 @@ static void PrintTable(const std::vector<std::vector<STRING>> &rows, const STRIN
    }
  }

-  std::vector<STRING> col_width_patterns;
+  std::vector<std::string> col_width_patterns;
  for (int c = 0; c < max_col_widths.size(); c++) {
-    col_width_patterns.push_back(STRING("%-") + StrOf(max_col_widths[c]) + "s");
+    col_width_patterns.push_back(std::string("%-") + std::to_string(max_col_widths[c]) + "s");
  }

  for (int r = 0; r < rows.size(); r++) {
    for (int c = 0; c < rows[r].size(); c++) {
      if (c > 0)
-        tprintf("%s", colsep.c_str());
+        tprintf("%s", colsep);
      tprintf(col_width_patterns[c].c_str(), rows[r][c].c_str());
    }
    tprintf("\n");
  }
 }

-static STRING RtlEmbed(const STRING &word, bool rtlify) {
+static std::string RtlEmbed(const std::string &word, bool rtlify) {
  if (rtlify)
-    return STRING(kRLE) + word + STRING(kPDF);
+    return std::string(kRLE) + word + std::string(kPDF);
  return word;
 }

 // Print the current thoughts of the paragraph detector.
 static void PrintDetectorState(const ParagraphTheory &theory,
                               const GenericVector<RowScratchRegisters> &rows) {
-  std::vector<std::vector<STRING>> output;
-  output.push_back(std::vector<STRING>());
+  std::vector<std::vector<std::string>> output;
+  output.push_back(std::vector<std::string>());
  output.back().push_back("#row");
  output.back().push_back("space");
  output.back().push_back("..");
  output.back().push_back("lword[widthSEL]");
  output.back().push_back("rword[widthSEL]");
-  RowScratchRegisters::AppendDebugHeaderFields(&output.back());
+  RowScratchRegisters::AppendDebugHeaderFields(output.back());
  output.back().push_back("text");

  for (int i = 0; i < rows.size(); i++) {
-    output.push_back(std::vector<STRING>());
-    std::vector<STRING> &row = output.back();
+    output.push_back(std::vector<std::string>());
+    std::vector<std::string> &row = output.back();
    const RowInfo &ri = *rows[i].ri_;
-    row.push_back(StrOf(i));
-    row.push_back(StrOf(ri.average_interword_space));
+    row.push_back(std::to_string(i));
+    row.push_back(std::to_string(ri.average_interword_space));
    row.push_back(ri.has_leaders ? ".." : " ");
-    row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + "[" + StrOf(ri.lword_box.width()) +
+    row.push_back(RtlEmbed(ri.lword_text, !ri.ltr) + "[" + std::to_string(ri.lword_box.width()) +
                  (ri.lword_likely_starts_idea ? "S" : "s") +
                  (ri.lword_likely_ends_idea ? "E" : "e") +
                  (ri.lword_indicates_list_item ? "L" : "l") + "]");
-    row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + "[" + StrOf(ri.rword_box.width()) +
+    row.push_back(RtlEmbed(ri.rword_text, !ri.ltr) + "[" + std::to_string(ri.rword_box.width()) +
                  (ri.rword_likely_starts_idea ? "S" : "s") +
                  (ri.rword_likely_ends_idea ? "E" : "e") +
                  (ri.rword_indicates_list_item ? "L" : "l") + "]");
-    rows[i].AppendDebugInfo(theory, &row);
+    rows[i].AppendDebugInfo(theory, row);
    row.push_back(RtlEmbed(ri.text, !ri.ltr));
  }
  PrintTable(output, " ");
@ -180,11 +172,11 @@ static void PrintDetectorState(const ParagraphTheory &theory,
  }
 }

-static void DebugDump(bool should_print, const STRING &phase, const ParagraphTheory &theory,
+static void DebugDump(bool should_print, const char *phase, const ParagraphTheory &theory,
                      const GenericVector<RowScratchRegisters> &rows) {
  if (!should_print)
    return;
-  tprintf("# %s\n", phase.c_str());
+  tprintf("# %s\n", phase);
  PrintDetectorState(theory, rows);
 }

@ -240,7 +232,7 @@ static const char *SkipOne(const char *str, const char *toskip) {
 // Return whether it is very likely that this is a numeral marker that could
 // start a list item.  Some examples include:
 //   A   I   iii.   VI   (2)   3.5.   [C-4]
-static bool LikelyListNumeral(const STRING &word) {
+static bool LikelyListNumeral(const std::string &word) {
  const char *kRomans = "ivxlmdIVXLMD";
  const char *kDigits = "012345789";
  const char *kOpen = "[{(";
@ -274,12 +266,12 @@ static bool LikelyListNumeral(const STRING &word) {
  return *pos == '\0';
 }

-static bool LikelyListMark(const STRING &word) {
+static bool LikelyListMark(const std::string &word) {
  const char *kListMarks = "0Oo*.,+.";
  return word.size() == 1 && strchr(kListMarks, word[0]) != nullptr;
 }

-bool AsciiLikelyListItem(const STRING &word) {
+bool AsciiLikelyListItem(const std::string &word) {
  return LikelyListMark(word) || LikelyListNumeral(word);
 }

@ -348,7 +340,7 @@ int UnicodeSpanSkipper::SkipAlpha(int pos) {

 static bool LikelyListMarkUnicode(int ch) {
  if (ch < 0x80) {
-    STRING single_ch;
+    std::string single_ch;
    single_ch += ch;
    return LikelyListMark(single_ch);
  }
@ -413,7 +405,7 @@ static bool UniLikelyListItem(const UNICHARSET *u, const WERD_CHOICE *werd) {
 //   is_list -      this word might be a list number or bullet.
 //   starts_idea -  this word is likely to start a sentence.
 //   ends_idea -    this word is likely to end a sentence.
-void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
+void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
                        bool *is_list, bool *starts_idea, bool *ends_idea) {
  *is_list = false;
  *starts_idea = false;
@ -459,7 +451,7 @@ void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, c
 //   is_list -      this word might be a list number or bullet.
 //   starts_idea -  this word is likely to start a sentence.
 //   ends_idea -    this word is likely to end a sentence.
-void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
+void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
                         bool *is_list, bool *starts_idea, bool *ends_idea) {
  *is_list = false;
  *starts_idea = false;
@ -492,17 +484,17 @@ void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd,

 // =============== Implementation of RowScratchRegisters =====================
 /* static */
-void RowScratchRegisters::AppendDebugHeaderFields(std::vector<STRING> *header) {
-  header->push_back("[lmarg,lind;rind,rmarg]");
-  header->push_back("model");
+void RowScratchRegisters::AppendDebugHeaderFields(std::vector<std::string> &header) {
+  header.push_back("[lmarg,lind;rind,rmarg]");
+  header.push_back("model");
 }

 void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
-                                          std::vector<STRING> *dbg) const {
+                                          std::vector<std::string> &dbg) const {
  char s[30];
  snprintf(s, sizeof(s), "[%3d,%3d;%3d,%3d]", lmargin_, lindent_, rindent_, rmargin_);
-  dbg->push_back(s);
-  STRING model_string;
+  dbg.push_back(s);
+  std::string model_string;
  model_string += static_cast<char>(GetLineType());
  model_string += ":";

@ -513,7 +505,7 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
    if (model_numbers > 0)
      model_string += ",";
    if (StrongModel(hypotheses_[h].model)) {
-      model_string += StrOf(1 + theory.IndexOf(hypotheses_[h].model));
+      model_string += std::to_string(1 + theory.IndexOf(hypotheses_[h].model));
    } else if (hypotheses_[h].model == kCrownLeft) {
      model_string += "CrL";
    } else if (hypotheses_[h].model == kCrownRight) {
@ -524,7 +516,7 @@ void RowScratchRegisters::AppendDebugInfo(const ParagraphTheory &theory,
  if (model_numbers == 0)
    model_string += "0";

-  dbg->push_back(model_string);
+  dbg.push_back(model_string);
 }

 void RowScratchRegisters::Init(const RowInfo &row) {
@ -2323,7 +2315,7 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,

 static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowInfo *info) {
  // Set up text, lword_text, and rword_text (mostly for debug printing).
-  STRING fake_text;
+  std::string fake_text;
  PageIterator pit(static_cast<const PageIterator &>(it));
  bool first_word = true;
  if (!pit.Empty(RIL_WORD)) {
--- a/src/ccmain/paragraphs.h
+++ b/src/ccmain/paragraphs.h
@ -21,8 +21,8 @@
 #define TESSERACT_CCMAIN_PARAGRAPHS_H_

 #include <list>
+#include <string>
 #include "rect.h"   // for TBOX
-#include "strngs.h" // for STRING

 namespace tesseract {

@ -41,7 +41,7 @@ class GenericVector;
 class RowInfo {
 public:
  // Constant data derived from Tesseract output.
-  STRING text; // the full UTF-8 text of the line.
+  std::string text; // the full UTF-8 text of the line.
  bool ltr;    // whether the majority of the text is left-to-right
               // TODO(eger) make this more fine-grained.

@ -56,8 +56,8 @@ public:
  TBOX lword_box; // in normalized (horiz text rows) space
  TBOX rword_box; // in normalized (horiz text rows) space

-  STRING lword_text; // the UTF-8 text of the leftmost werd
-  STRING rword_text; // the UTF-8 text of the rightmost werd
+  std::string lword_text; // the UTF-8 text of the leftmost werd
+  std::string rword_text; // the UTF-8 text of the rightmost werd

  //   The text of a paragraph typically starts with the start of an idea and
  // ends with the end of an idea.  Here we define paragraph as something that
--- a/src/ccmain/paragraphs_internal.h
+++ b/src/ccmain/paragraphs_internal.h
@ -32,7 +32,7 @@ class WERD_CHOICE;

 // Return whether the given word is likely to be a list item start word.
 TESS_API
-bool AsciiLikelyListItem(const STRING &word);
+bool AsciiLikelyListItem(const std::string &word);

 // Return the first Unicode Codepoint from werd[pos].
 int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
@ -40,12 +40,12 @@ int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos);
 // Set right word attributes given either a unicharset and werd or a utf8
 // string.
 TESS_API
-void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
+void RightWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
                         bool *is_list, bool *starts_idea, bool *ends_idea);

 // Set left word attributes given either a unicharset and werd or a utf8 string.
 TESS_API
-void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8,
+void LeftWordAttributes(const UNICHARSET *unicharset, const WERD_CHOICE *werd, const std::string &utf8,
                        bool *is_list, bool *starts_idea, bool *ends_idea);

 enum LineType {
@ -171,10 +171,10 @@ public:
  }

  // Append header fields to a vector of row headings.
-  static void AppendDebugHeaderFields(std::vector<STRING> *header);
+  static void AppendDebugHeaderFields(std::vector<std::string> &header);

  // Append data for this row to a vector of debug strings.
-  void AppendDebugInfo(const ParagraphTheory &theory, std::vector<STRING> *dbg) const;
+  void AppendDebugInfo(const ParagraphTheory &theory, std::vector<std::string> &dbg) const;

  const RowInfo *ri_;

--- a/src/ccmain/paramsd.cpp
+++ b/src/ccmain/paramsd.cpp
@ -151,11 +151,7 @@ std::string ParamContent::GetValue() const {
  } else if (param_type_ == VT_DOUBLE) {
    result += std::to_string(*dIt);
  } else if (param_type_ == VT_STRING) {
-    if (STRING(*(sIt)).c_str() != nullptr) {
-      result = sIt->c_str();
-    } else {
-      result = "Null";
-    }
+    result = sIt->c_str();
  }
  return result;
 }
@ -183,8 +179,8 @@ void ParamContent::SetValue(const char *val) {

 // Gets the up to the first 3 prefixes from s (split by _).
 // For example, tesseract_foo_bar will be split into tesseract,foo and bar.
-void ParamsEditor::GetPrefixes(const char *s, STRING *level_one, STRING *level_two,
-                               STRING *level_three) {
+void ParamsEditor::GetPrefixes(const char *s, std::string *level_one, std::string *level_two,
+                               std::string *level_three) {
  std::unique_ptr<char[]> p(new char[1024]);
  GetFirstWords(s, 1, p.get());
  *level_one = p.get();
@ -234,9 +230,9 @@ SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
  // Count the # of entries starting with a specific prefix.
  for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
    ParamContent *vc = vc_it.data();
-    STRING tag;
-    STRING tag2;
-    STRING tag3;
+    std::string tag;
+    std::string tag2;
+    std::string tag3;

    GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);
    amount[tag.c_str()]++;
@ -252,9 +248,9 @@ SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
  vc_it.move_to_first();
  for (vc_it.mark_cycle_pt(); !vc_it.cycled_list(); vc_it.forward()) {
    ParamContent *vc = vc_it.data();
-    STRING tag;
-    STRING tag2;
-    STRING tag3;
+    std::string tag;
+    std::string tag2;
+    std::string tag3;
    GetPrefixes(vc->GetName(), &tag, &tag2, &tag3);

    if (amount[tag.c_str()] == 1) {
@ -304,7 +300,7 @@ ParamsEditor::ParamsEditor(tesseract::Tesseract *tess, ScrollView *sv) {

  SVMenuNode *svMenuRoot = BuildListOfAllLeaves(tess);

-  STRING paramfile;
+  std::string paramfile;
  paramfile = tess->datadir;
  paramfile += VARDIR;   // parameters dir
  paramfile += "edited"; // actual name
--- a/src/ccmain/paramsd.h
+++ b/src/ccmain/paramsd.h
@ -25,7 +25,6 @@

 #  include "elst.h"       // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
 #  include "scrollview.h" // for ScrollView (ptr only), SVEvent (ptr only)
-#  include "strngs.h"     // for STRING

 namespace tesseract {

@ -107,7 +106,7 @@ public:
 private:
  // Gets the up to the first 3 prefixes from s (split by _).
  // For example, tesseract_foo_bar will be split into tesseract,foo and bar.
-  void GetPrefixes(const char *s, STRING *level_one, STRING *level_two, STRING *level_three);
+  void GetPrefixes(const char *s, std::string *level_one, std::string *level_two, std::string *level_three);

  // Gets the first n words (split by _) and puts them in t.
  // For example, tesseract_foo_bar with N=2 will yield tesseract_foo_.
--- a/src/ccmain/pgedit.cpp
+++ b/src/ccmain/pgedit.cpp
@ -794,8 +794,8 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
  }

  // Display correct text and blamer information.
-  STRING text;
-  STRING blame;
+  std::string text;
+  std::string blame;
  if (word->display_flag(DF_TEXT) && word->text() != nullptr) {
    text = word->text();
  }
@ -810,7 +810,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
      text = blamer_bundle->TruthString();
    }
    text += " -> ";
-    STRING best_choice_str;
+    std::string best_choice_str;
    if (word_res->best_choice == nullptr) {
      best_choice_str = "NULL";
    } else {
--- a/src/ccmain/recogtraining.cpp
+++ b/src/ccmain/recogtraining.cpp
@ -98,7 +98,7 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
  PAGE_RES_IT page_res_it;
  page_res_it.page_res = page_res;
  page_res_it.restart_page();
-  STRING label;
+  std::string label;

  // Process all the words on this page.
  TBOX tbox; // tesseract-identified box
@ -108,14 +108,14 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
  int examined_words = 0;
  do {
    keep_going = read_t(&page_res_it, &tbox);
-    keep_going &= ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
+    keep_going &= ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
    // Align bottom left points of the TBOXes.
    while (keep_going && !NearlyEqual<int>(tbox.bottom(), bbox.bottom(), kMaxBoxEdgeDiff)) {
      if (bbox.bottom() < tbox.bottom()) {
        page_res_it.forward();
        keep_going = read_t(&page_res_it, &tbox);
      } else {
-        keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
+        keep_going = ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
      }
    }
    while (keep_going && !NearlyEqual<int>(tbox.left(), bbox.left(), kMaxBoxEdgeDiff)) {
@ -123,7 +123,7 @@ void Tesseract::recog_training_segmented(const char *filename, PAGE_RES *page_re
        page_res_it.forward();
        keep_going = read_t(&page_res_it, &tbox);
      } else {
-        keep_going = ReadNextBox(applybox_page, &line_number, box_file, &label, &bbox);
+        keep_going = ReadNextBox(applybox_page, &line_number, box_file, label, &bbox);
      }
    }
    // OCR the word if top right points of the TBOXes are similar.
--- a/src/ccmain/reject.cpp
+++ b/src/ccmain/reject.cpp
@ -53,9 +53,6 @@ int16_t Tesseract::safe_dict_word(const WERD_RES *werd_res) {

 namespace tesseract {

-CLISTIZEH(STRING)
-CLISTIZE(STRING)
-
 /*************************************************************************
 * set_done()
 *
@ -196,7 +193,7 @@ void Tesseract::reject_I_1_L(WERD_RES *word) {

  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
       offset += word->best_choice->unichar_lengths()[i], i += 1) {
-    if (STRING(conflict_set_I_l_1).contains(word->best_choice->unichar_string()[offset])) {
+    if (conflict_set_I_l_1.contains(word->best_choice->unichar_string()[offset])) {
      // rej 1Il conflict
      word->reject_map[i].setrej_1Il_conflict();
    }
@ -316,7 +313,7 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
       offset += lengths[i++])
    non_conflict_set_char = (word_res->uch_set->get_isalpha(word + offset, lengths[i]) ||
                             word_res->uch_set->get_isdigit(word + offset, lengths[i])) &&
-                            !STRING(conflict_set_I_l_1).contains(word[offset]);
+                            !conflict_set_I_l_1.contains(word[offset]);
  if (!non_conflict_set_char) {
    if (update_map)
      reject_I_1_L(word_res);
@ -409,7 +406,7 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
    for (i = 0, offset = 0; word[offset] != '\0';
         offset += word_res->best_choice->unichar_lengths()[i++]) {
      if ((!allow_1s || (word[offset] != '1')) &&
-          STRING(conflict_set_I_l_1).contains(word[offset])) {
+          conflict_set_I_l_1.contains(word[offset])) {
        if (update_map)
          word_res->reject_map[i].setrej_1Il_conflict();
        conflict = true;
@ -425,7 +422,7 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
  if ((word_type == AC_LOWER_CASE) || (word_type == AC_INITIAL_CAP)) {
    first_alphanum_index_ = first_alphanum_index(word, lengths);
    first_alphanum_offset_ = first_alphanum_offset(word, lengths);
-    if (STRING(conflict_set_I_l_1).contains(word[first_alphanum_offset_])) {
+    if (conflict_set_I_l_1.contains(word[first_alphanum_offset_])) {
      if (update_map)
        word_res->reject_map[first_alphanum_index_].setrej_1Il_conflict();
      return true;
@ -502,7 +499,7 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) {

  for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) {
    if (word->reject_map[i].accepted()) {
-      if (STRING(conflict_set_I_l_1).contains(s[offset])) {
+      if (conflict_set_I_l_1.contains(s[offset])) {
        accepted_1Il = true;
      } else {
        if (word->uch_set->get_isalpha(s + offset, lengths[i]) ||
@ -515,7 +512,7 @@ void Tesseract::dont_allow_1Il(WERD_RES *word) {
    return; // Nothing to worry about

  for (i = 0, offset = 0; i < word_len; offset += word->best_choice->unichar_lengths()[i++]) {
-    if (STRING(conflict_set_I_l_1).contains(s[offset]) && word->reject_map[i].accepted())
+    if (conflict_set_I_l_1.contains(s[offset]) && word->reject_map[i].accepted())
      word->reject_map[i].setrej_postNN_1Il();
  }
 }
@ -549,7 +546,7 @@ bool Tesseract::repeated_nonalphanum_wd(WERD_RES *word, ROW *row) {
  if (word->best_choice->unichar_lengths().length() <= 1)
    return false;

-  if (!STRING(ok_repeated_ch_non_alphanum_wds).contains(word->best_choice->unichar_string()[0]))
+  if (!ok_repeated_ch_non_alphanum_wds.contains(word->best_choice->unichar_string()[0]))
    return false;

  UNICHAR_ID uch_id = word->best_choice->unichar_id(0);
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@ -440,7 +440,7 @@ Dict &Tesseract::getDict() {
 }

 void Tesseract::Clear() {
-  STRING debug_name = imagebasename + "_debug.pdf";
+  std::string debug_name = imagebasename + "_debug.pdf";
  pixa_debug_.WritePDF(debug_name.c_str());
  pixDestroy(&pix_binary_);
  pixDestroy(&pix_grey_);
--- a/src/ccstruct/boxread.cpp
+++ b/src/ccstruct/boxread.cpp
@ -98,9 +98,9 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
  int num_boxes = 0;
  for (int i = 0; i < lines.size(); ++i) {
    int page = 0;
-    STRING utf8_str;
+    std::string utf8_str;
    TBOX box;
-    if (!ParseBoxFileStr(lines[i].c_str(), &page, &utf8_str, &box)) {
+    if (!ParseBoxFileStr(lines[i].c_str(), &page, utf8_str, &box)) {
      if (continue_on_failure)
        continue;
      else
@ -137,14 +137,14 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
 // for valid utf-8 and allows space or tab between fields.
 // utf8_str is set with the unichar string, and bounding box with the box.
 // If there are page numbers in the file, it reads them all.
-bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box) {
+bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box) {
  return ReadNextBox(-1, line_number, box_file, utf8_str, bounding_box);
 }

 // As ReadNextBox above, but get a specific page number. (0-based)
 // Use -1 to read any page number. Files without page number all
 // read as if they are page 0.
-bool ReadNextBox(int target_page, int *line_number, FILE *box_file, STRING *utf8_str,
+bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
                 TBOX *bounding_box) {
  int page = 0;
  char buff[kBoxReadBufSize]; // boxfile read buffer
@ -185,10 +185,10 @@ bool ReadNextBox(int target_page, int *line_number, FILE *box_file, STRING *utf8
 // and for word/line-level boxes:
 //   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
 // See applyybox.cpp for more information.
-bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str,
+bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
                     TBOX *bounding_box) {
  *bounding_box = TBOX(); // Initialize it to empty.
-  *utf8_str = "";
+  utf8_str = "";
  char uch[kBoxReadBufSize];
  const char *buffptr = boxfile_str;
  // Read the unichar without messing up on Tibetan.
@ -245,7 +245,7 @@ bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str
    }
    used += new_used;
  }
-  *utf8_str = uch;
+  utf8_str = uch;
  if (x_min > x_max)
    std::swap(x_min, x_max);
  if (y_min > y_max)
--- a/src/ccstruct/boxread.h
+++ b/src/ccstruct/boxread.h
@ -64,18 +64,18 @@ bool ReadMemBoxes(int target_page, bool skip_blanks, const char *box_data, bool
 // utf8_str is set with the unichar string, and bounding box with the box.
 // If there are page numbers in the file, it reads them all.
 TESS_API
-bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box);
+bool ReadNextBox(int *line_number, FILE *box_file, std::string &utf8_str, TBOX *bounding_box);
 // As ReadNextBox above, but get a specific page number. (0-based)
 // Use -1 to read any page number. Files without page number all
 // read as if they are page 0.
 TESS_API
-bool ReadNextBox(int target_page, int *line_number, FILE *box_file, STRING *utf8_str,
+bool ReadNextBox(int target_page, int *line_number, FILE *box_file, std::string &utf8_str,
                 TBOX *bounding_box);

 // Parses the given box file string into a page_number, utf8_str, and
 // bounding_box. Returns true on a successful parse.
 TESS_API
-bool ParseBoxFileStr(const char *boxfile_str, int *page_number, STRING *utf8_str,
+bool ParseBoxFileStr(const char *boxfile_str, int *page_number, std::string &utf8_str,
                     TBOX *bounding_box);

 // Creates a box file string from a unichar string, TBOX and page number.
--- a/src/ccutil/params.h
+++ b/src/ccutil/params.h
@ -91,7 +91,7 @@ public:
      }
    }
  }
-  // Fetches the value of the named param as a STRING. Returns false if not
+  // Fetches the value of the named param as a string. Returns false if not
  // found.
  static bool GetParamAsString(const char *name, const ParamsVectors *member_params,
                               std::string *value);
@ -242,6 +242,9 @@ public:
  const char *c_str() const {
    return value_.c_str();
  }
+  bool contains(char c) {
+    return value_.find(c) != std::string::npos;
+  }
  bool empty() {
    return value_.length() <= 0;
  }
--- a/src/ccutil/strngs.cpp
+++ b/src/ccutil/strngs.cpp
@ -96,10 +96,6 @@ bool STRING::SkipDeSerialize(TFile *fp) {
  return fp->Skip(len);
 }

-bool STRING::contains(const char c) const {
-  return (c != '\0') && (strchr(c_str(), c) != nullptr);
-}
-
 void STRING::split(const char c, std::vector<STRING> *splited) {
  int start_index = 0;
  const int len = length();
--- a/src/ccutil/strngs.h
+++ b/src/ccutil/strngs.h
@ -59,9 +59,6 @@ public:
  TESS_API
  static bool SkipDeSerialize(tesseract::TFile *fp);

-  TESS_API
-  bool contains(char c) const;
-
  TESS_API
  void split(char c, std::vector<STRING> *splited);
 };
--- a/src/training/common/mastertrainer.cpp
+++ b/src/training/common/mastertrainer.cpp
@ -149,9 +149,9 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
    if (font_id < 0)
      font_id = 0;
    int page_number;
-    STRING unichar;
+    std::string unichar;
    TBOX bounding_box;
-    if (!ParseBoxFileStr(space, &page_number, &unichar, &bounding_box)) {
+    if (!ParseBoxFileStr(space, &page_number, unichar, &bounding_box)) {
      tprintf("Bad format in tr file, reading box coords\n");
      continue;
    }