Use const char* for filename parameters

This replaces the proprietary STRING data type (801 instead of 838 lines remaining). It also removes STRING from osdetect.h and serialis.h. Signed-off-by: Stefan Weil <sw@weilnetz.de>
2024-11-27 12:49:35 +08:00 · 2019-07-05 11:54:18 +02:00 · 2019-07-05 11:54:18 +02:00 · 8137cf35a6
commit 8137cf35a6
parent d01b2e43b8
25 changed files with 72 additions and 70 deletions
--- a/include/tesseract/osdetect.h
+++ b/include/tesseract/osdetect.h
@ -25,7 +25,6 @@
 class BLOBNBOX;
 class BLOBNBOX_CLIST;
 class BLOB_CHOICE_LIST;
-class STRING;
 class TO_BLOCK_LIST;
 class UNICHARSET;
 template <typename T>
@ -117,7 +116,7 @@ class ScriptDetector {
  const GenericVector<int>* allowed_scripts_;
 };

-int orientation_and_script_detection(STRING& filename, OSResults*,
+int orientation_and_script_detection(const char* filename, OSResults*,
                                     tesseract::Tesseract*);

 int os_detect(TO_BLOCK_LIST* port_blocks, OSResults* osr,
--- a/include/tesseract/serialis.h
+++ b/include/tesseract/serialis.h
@ -26,7 +26,6 @@

 template <typename T>
 class GenericVector;
-class STRING;

 /***********************************************************************
  QUOTE_IT   MACRO DEFINITION
@ -82,7 +81,7 @@ class TFile {
  // All the Open methods load the whole file into memory for reading.
  // Opens a file with a supplied reader, or nullptr to use the default.
  // Note that mixed read/write is not supported.
-  bool Open(const STRING& filename, FileReader reader);
+  bool Open(const char* filename, FileReader reader);
  // From an existing memory buffer.
  bool Open(const char* data, int size);
  // From an open file and an end offset.
@ -139,7 +138,7 @@ class TFile {
  // calling FWrite, (no close required), or supply a nullptr data to OpenWrite
  // and call CloseWrite to write to a file after the FWrites.
  void OpenWrite(GenericVector<char>* data);
-  bool CloseWrite(const STRING& filename, FileWriter writer);
+  bool CloseWrite(const char* filename, FileWriter writer);

  // Replicates fwrite, returning the number of items written.
  // To use fprintf, use snprintf and FWrite.
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@ -843,9 +843,9 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
  recognition_done_ = true;
 #ifndef DISABLED_LEGACY_ENGINE
  if (tesseract_->tessedit_resegment_from_line_boxes) {
-    page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
+    page_res_ = tesseract_->ApplyBoxes(input_file_->c_str(), true, block_list_);
  } else if (tesseract_->tessedit_resegment_from_boxes) {
-    page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
+    page_res_ = tesseract_->ApplyBoxes(input_file_->c_str(), false, block_list_);
  } else
 #endif  // ndef DISABLED_LEGACY_ENGINE
  {
@ -858,7 +858,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
  }

  if (tesseract_->tessedit_train_line_recognizer) {
-    if (!tesseract_->TrainLineRecognizer(*input_file_, *output_file_, block_list_)) {
+    if (!tesseract_->TrainLineRecognizer(input_file_->c_str(), *output_file_, block_list_)) {
      return -1;
    }
    tesseract_->CorrectClassifyWords(page_res_);
@ -895,13 +895,14 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
  #ifndef DISABLED_LEGACY_ENGINE
  } else if (tesseract_->tessedit_train_from_boxes) {
    STRING fontname;
-    ExtractFontName(*output_file_, &fontname);
+    ExtractFontName(output_file_->c_str(), &fontname);
    tesseract_->ApplyBoxTraining(fontname, page_res_);
  } else if (tesseract_->tessedit_ambigs_training) {
-    FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
+    FILE* training_output_file =
+      tesseract_->init_recog_training(input_file_->c_str());
    // OCR the page segmented into words by tesseract.
    tesseract_->recog_training_segmented(
-        *input_file_, page_res_, monitor, training_output_file);
+        input_file_->c_str(), page_res_, monitor, training_output_file);
    fclose(training_output_file);
  #endif  // ndef DISABLED_LEGACY_ENGINE
  } else {
@ -1081,7 +1082,7 @@ bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
  #ifndef DISABLED_LEGACY_ENGINE
  if (result) {
    if (tesseract_->tessedit_train_from_boxes &&
-        !tesseract_->WriteTRFile(*output_file_)) {
+        !tesseract_->WriteTRFile(output_file_->c_str())) {
      tprintf("Write of TR file failed: %s\n", output_file_->c_str());
      return false;
    }
@ -2215,7 +2216,7 @@ bool TessBaseAPI::DetectOS(OSResults* osr) {

  if (input_file_ == nullptr)
    input_file_ = new STRING(kInputFile);
-  return orientation_and_script_detection(*input_file_, osr, tesseract_) > 0;
+  return orientation_and_script_detection(input_file_->c_str(), osr, tesseract_) > 0;
 }
 #endif  // ndef DISABLED_LEGACY_ENGINE

--- a/src/ccmain/applybox.cpp
+++ b/src/ccmain/applybox.cpp
@ -90,7 +90,7 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
  }
 }

-// Applies the box file based on the image name fname, and resegments
+// Applies the box file based on the image name filename, and resegments
 // the words in the block_list (page), with:
 // blob-mode: one blob per line in the box file, words as input.
 // word/line-mode: one blob per space-delimited unit after the #, and one word
@ -110,12 +110,12 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
 // Instead, the correct_text member of WERD_RES is set, and this may be later
 // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
 // is not required before calling ApplyBoxTraining.
-PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
+PAGE_RES* Tesseract::ApplyBoxes(const char* filename,
                                bool find_segmentation,
                                BLOCK_LIST *block_list) {
  GenericVector<TBOX> boxes;
  GenericVector<STRING> texts, full_texts;
-  if (!ReadAllBoxes(applybox_page, true, fname, &boxes, &texts, &full_texts,
+  if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts,
                    nullptr)) {
    return nullptr;  // Can't do it.
  }
--- a/src/ccmain/linerec.cpp
+++ b/src/ccmain/linerec.cpp
@ -40,7 +40,7 @@ const float kWorstDictCertainty = -25.0f;
 // Breaks the page into lines, according to the boxes, and writes them to a
 // serialized DocumentData based on output_basename.
 // Return true if successful, false if an error occurred.
-bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
+bool Tesseract::TrainLineRecognizer(const char* input_imagename,
                                    const STRING& output_basename,
                                    BLOCK_LIST *block_list) {
  STRING lstmf_name = output_basename + ".lstmf";
@ -58,12 +58,12 @@ bool Tesseract::TrainLineRecognizer(const STRING& input_imagename,
  if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
                    nullptr) ||
      boxes.empty()) {
-    tprintf("Failed to read boxes from %s\n", input_imagename.c_str());
+    tprintf("Failed to read boxes from %s\n", input_imagename);
    return false;
  }
  TrainFromBoxes(boxes, texts, block_list, &images);
  if (images.PagesSize() == 0) {
-    tprintf("Failed to read pages from %s\n", input_imagename.c_str());
+    tprintf("Failed to read pages from %s\n", input_imagename);
    return false;
  }
  images.Shuffle();
--- a/src/ccmain/osdetect.cpp
+++ b/src/ccmain/osdetect.cpp
@ -187,7 +187,7 @@ static void remove_nontext_regions(tesseract::Tesseract *tess,
 // Find connected components in the page and process a subset until finished or
 // a stopping criterion is met.
 // Returns the number of blobs used in making the estimate. 0 implies failure.
-int orientation_and_script_detection(STRING& filename,
+int orientation_and_script_detection(const char* filename,
                                     OSResults* osr,
                                     tesseract::Tesseract* tess) {
  STRING name = filename;        //truncated name
--- a/src/ccmain/recogtraining.cpp
+++ b/src/ccmain/recogtraining.cpp
@ -33,7 +33,7 @@ const int16_t kMaxBoxEdgeDiff = 2;

 // Sets flags necessary for recognition in the training mode.
 // Opens and returns the pointer to the output file.
-FILE* Tesseract::init_recog_training(const STRING& fname) {
+FILE* Tesseract::init_recog_training(const char* filename) {
  if (tessedit_ambigs_training) {
    tessedit_tess_adaption_mode.set_value(0);  // turn off adaption
    tessedit_enable_doc_dict.set_value(0);     // turn off document dictionary
@ -41,7 +41,7 @@ FILE* Tesseract::init_recog_training(const STRING& fname) {
    getDict().stopper_no_acceptable_choices.set_value(1);
  }

-  STRING output_fname = fname;
+  STRING output_fname = filename;
  const char* lastdot = strrchr(output_fname.c_str(), '.');
  if (lastdot != nullptr)
    output_fname[lastdot - output_fname.c_str()] = '\0';
@ -81,11 +81,11 @@ static bool read_t(PAGE_RES_IT* page_res_it, TBOX* tbox) {
 // match to those specified by the input box file. For each word (ngram in a
 // single bounding box from the input box file) it outputs the ocred result,
 // the correct label, rating and certainty.
-void Tesseract::recog_training_segmented(const STRING& fname,
+void Tesseract::recog_training_segmented(const char* filename,
                                         PAGE_RES* page_res,
                                         volatile ETEXT_DESC* monitor,
                                         FILE* output_file) {
-  STRING box_fname = fname;
+  std::string box_fname = filename;
  const char* lastdot = strrchr(box_fname.c_str(), '.');
  if (lastdot != nullptr)
    box_fname[lastdot - box_fname.c_str()] = '\0';
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@ -340,7 +340,7 @@ class Tesseract : public Wordrec {
  // Breaks the page into lines, according to the boxes, and writes them to a
  // serialized DocumentData based on output_basename.
  // Return true if successful, false if an error occurred.
-  bool TrainLineRecognizer(const STRING& input_imagename,
+  bool TrainLineRecognizer(const char* input_imagename,
                           const STRING& output_basename,
                           BLOCK_LIST* block_list);
  // Generates training data for training a line recognizer, eg LSTM.
@ -678,7 +678,7 @@ class Tesseract : public Wordrec {
  bool tess_acceptable_word(WERD_RES* word);

  //// applybox.cpp //////////////////////////////////////////////////////
-  // Applies the box file based on the image name fname, and resegments
+  // Applies the box file based on the image name filename, and resegments
  // the words in the block_list (page), with:
  // blob-mode: one blob per line in the box file, words as input.
  // word/line-mode: one blob per space-delimited unit after the #, and one word
@ -698,7 +698,7 @@ class Tesseract : public Wordrec {
  // Instead, the correct_text member of WERD_RES is set, and this may be later
  // converted to a best_choice using CorrectClassifyWords. CorrectClassifyWords
  // is not required before calling ApplyBoxTraining.
-  PAGE_RES* ApplyBoxes(const STRING& fname, bool find_segmentation,
+  PAGE_RES* ApplyBoxes(const char* filename, bool find_segmentation,
                       BLOCK_LIST* block_list);

  // Any row xheight that is significantly different from the median is set
@ -1102,8 +1102,8 @@ class Tesseract : public Wordrec {
             "Detect music staff and remove intersecting components");

  //// ambigsrecog.cpp /////////////////////////////////////////////////////////
-  FILE* init_recog_training(const STRING& fname);
-  void recog_training_segmented(const STRING& fname, PAGE_RES* page_res,
+  FILE* init_recog_training(const char* filename);
+  void recog_training_segmented(const char* filename, PAGE_RES* page_res,
                                volatile ETEXT_DESC* monitor,
                                FILE* output_file);
  void ambigs_classify_and_output(const char* label, PAGE_RES_IT* pr_it,
--- a/src/ccstruct/boxread.cpp
+++ b/src/ccstruct/boxread.cpp
@ -51,8 +51,8 @@ static std::string BoxFileName(const char* image_filename) {
 }

 // Open the boxfile based on the given image filename.
-FILE* OpenBoxFile(const STRING& fname) {
-  std::string filename = BoxFileName(fname.c_str());
+FILE* OpenBoxFile(const char* fname) {
+  std::string filename = BoxFileName(fname);
  FILE* box_file = nullptr;
  if (!(box_file = fopen(filename.c_str(), "rb"))) {
    CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s",
@ -68,13 +68,13 @@ FILE* OpenBoxFile(const STRING& fname) {
 // a string is put in box_texts, with the corresponding page number in pages.
 // Each of the output vectors is optional (may be nullptr).
 // Returns false if no boxes are found.
-bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
+bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename,
                  GenericVector<TBOX>* boxes,
                  GenericVector<STRING>* texts,
                  GenericVector<STRING>* box_texts,
                  GenericVector<int>* pages) {
  GenericVector<char> box_data;
-  if (!tesseract::LoadDataFromFile(BoxFileName(filename.c_str()).c_str(), &box_data))
+  if (!tesseract::LoadDataFromFile(BoxFileName(filename).c_str(), &box_data))
    return false;
  // Convert the array of bytes to a string, so it can be used by the parser.
  box_data.push_back('\0');
--- a/src/ccstruct/boxread.h
+++ b/src/ccstruct/boxread.h
@ -32,7 +32,7 @@ const int kBoxReadBufSize = 1024;

 // Open the boxfile based on the given image filename.
 // Returns nullptr if the box file cannot be opened.
-FILE* OpenBoxFile(const STRING& fname);
+FILE* OpenBoxFile(const char* filename);

 // Reads all boxes from the given filename.
 // Reads a specific target_page number if >= 0, or all pages otherwise.
@ -41,7 +41,7 @@ FILE* OpenBoxFile(const STRING& fname);
 // a string is put in box_texts, with the corresponding page number in pages.
 // Each of the output vectors is optional (may be nullptr).
 // Returns false if no boxes are found.
-bool ReadAllBoxes(int target_page, bool skip_blanks, const STRING& filename,
+bool ReadAllBoxes(int target_page, bool skip_blanks, const char* filename,
                  GenericVector<TBOX>* boxes,
                  GenericVector<STRING>* texts,
                  GenericVector<STRING>* box_texts,
--- a/src/ccstruct/imagedata.cpp
+++ b/src/ccstruct/imagedata.cpp
@ -539,7 +539,7 @@ bool DocumentData::ReCachePages() {
  int loaded_pages = 0;
  pages_.truncate(0);
  TFile fp;
-  if (!fp.Open(document_name_, reader_) ||
+  if (!fp.Open(document_name_.c_str(), reader_) ||
      !PointerVector<ImageData>::DeSerializeSize(&fp, &loaded_pages) ||
      loaded_pages <= 0) {
    tprintf("Deserialize header failed: %s\n", document_name_.c_str());
--- a/src/ccutil/object_cache.h
+++ b/src/ccutil/object_cache.h
@ -21,6 +21,7 @@

 #include <mutex>                // for std::mutex
 #include <functional>           // for std::function
+#include <string>
 #include "ccutil.h"
 #include "errcode.h"
 #include <tesseract/genericvector.h>
@ -56,7 +57,7 @@ class ObjectCache {
  // and return nullptr -- further attempts to load will fail (even
  // with a different loader) until DeleteUnusedObjects() is called.
  // We delete the given loader.
-  T* Get(STRING id, std::function<T*()> loader) {
+  T* Get(const std::string& id, std::function<T*()> loader) {
    T *retval = nullptr;
    std::lock_guard<std::mutex> guard(mu_);
    for (int i = 0; i < cache_.size(); i++) {
@ -102,7 +103,7 @@ class ObjectCache {

 private:
  struct ReferenceCount {
-    STRING id;  // A unique ID to identify the object (think path on disk)
+    std::string id; // A unique ID to identify the object (think path on disk)
    T *object;  // A copy of the object in memory.  Can be delete'd.
    int count;  // A count of the number of active users of this object.
  };
--- a/src/ccutil/serialis.cpp
+++ b/src/ccutil/serialis.cpp
@ -20,7 +20,6 @@
 #include <cstdio>
 #include "errcode.h"
 #include <tesseract/genericvector.h>
-#include <tesseract/strngs.h>             // for STRING

 namespace tesseract {

@ -89,8 +88,8 @@ bool Serialize(FILE* fp, const uint32_t* data, size_t n) {
 }

 TFile::TFile()
-    : offset_(0),
-      data_(nullptr),
+    : data_(nullptr),
+      offset_(0),
      data_is_owned_(false),
      is_writing_(false),
      swap_(false) {}
@ -193,7 +192,7 @@ bool TFile::Skip(size_t count) {
  return true;
 }

-bool TFile::Open(const STRING& filename, FileReader reader) {
+bool TFile::Open(const char* filename, FileReader reader) {
  if (!data_is_owned_) {
    data_ = new GenericVector<char>;
    data_is_owned_ = true;
@ -202,9 +201,9 @@ bool TFile::Open(const STRING& filename, FileReader reader) {
  is_writing_ = false;
  swap_ = false;
  if (reader == nullptr)
-    return LoadDataFromFile(filename.c_str(), data_);
+    return LoadDataFromFile(filename, data_);
  else
-    return (*reader)(filename.c_str(), data_);
+    return (*reader)(filename, data_);
 }

 bool TFile::Open(const char* data, int size) {
@ -307,12 +306,12 @@ void TFile::OpenWrite(GenericVector<char>* data) {
  data_->truncate(0);
 }

-bool TFile::CloseWrite(const STRING& filename, FileWriter writer) {
+bool TFile::CloseWrite(const char* filename, FileWriter writer) {
  ASSERT_HOST(is_writing_);
  if (writer == nullptr)
-    return SaveDataToFile(*data_, filename.c_str());
+    return SaveDataToFile(*data_, filename);
  else
-    return (*writer)(*data_, filename.c_str());
+    return (*writer)(*data_, filename);
 }

 int TFile::FWrite(const void* buffer, size_t size, int count) {
--- a/src/ccutil/tessdatamanager.cpp
+++ b/src/ccutil/tessdatamanager.cpp
@ -151,16 +151,16 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data,
 }

 // Saves to the given filename.
-bool TessdataManager::SaveFile(const STRING &filename,
+bool TessdataManager::SaveFile(const char* filename,
                               FileWriter writer) const {
  // TODO: This method supports only the proprietary file format.
  ASSERT_HOST(is_loaded_);
  GenericVector<char> data;
  Serialize(&data);
  if (writer == nullptr)
-    return SaveDataToFile(data, filename.c_str());
+    return SaveDataToFile(data, filename);
  else
-    return (*writer)(data, filename.c_str());
+    return (*writer)(data, filename);
 }

 // Serializes to the given vector.
@ -249,7 +249,7 @@ bool TessdataManager::CombineDataFiles(
  for (auto filesuffix : kTessdataFileSuffixes) {
    TessdataType type;
    ASSERT_HOST(TessdataTypeFromFileSuffix(filesuffix, &type));
-    STRING filename = language_data_path_prefix;
+    std::string filename = language_data_path_prefix;
    filename += filesuffix;
    FILE *fp = fopen(filename.c_str(), "rb");
    if (fp != nullptr) {
--- a/src/ccutil/tessdatamanager.h
+++ b/src/ccutil/tessdatamanager.h
@ -19,6 +19,7 @@
 #ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_
 #define TESSERACT_CCUTIL_TESSDATAMANAGER_H_

+#include <string>
 #include <tesseract/genericvector.h>
 #include <tesseract/strngs.h>             // for STRING

@ -148,7 +149,7 @@ class TessdataManager {
  void OverwriteEntry(TessdataType type, const char *data, int size);

  // Saves to the given filename.
-  bool SaveFile(const STRING &filename, FileWriter writer) const;
+  bool SaveFile(const char* filename, FileWriter writer) const;
  // Serializes to the given vector.
  void Serialize(GenericVector<char> *data) const;
  // Resets to the initial state, keeping the reader.
@ -183,7 +184,7 @@ class TessdataManager {
  bool IsLSTMAvailable() const { return !entries_[TESSDATA_LSTM].empty(); }

  // Return the name of the underlying data file.
-  const STRING &GetDataFileName() const { return data_file_name_; }
+  const std::string& GetDataFileName() const { return data_file_name_; }

  /**
   * Reads all the standard tesseract config and data files for a language
@ -236,7 +237,7 @@ class TessdataManager {
                                       TessdataType *type);

  // Name of file it came from.
-  STRING data_file_name_;
+  std::string data_file_name_;
  // Function to load the file when we need it.
  FileReader reader_;
  // True if the file has been loaded.
--- a/src/classify/blobclass.cpp
+++ b/src/classify/blobclass.cpp
@ -42,14 +42,14 @@ namespace tesseract {
 // /path/to/dir/[lang].[fontname].exp[num]
 // The [lang], [fontname] and [num] fields should not have '.' characters.
 // If the global parameter classify_font_name is set, its value is used instead.
-void ExtractFontName(const STRING& filename, STRING* fontname) {
+void ExtractFontName(const char* filename, STRING* fontname) {
  *fontname = classify_font_name;
  if (*fontname == kUnknownFontName) {
    // filename is expected to be of the form [lang].[fontname].exp[num]
    // The [lang], [fontname] and [num] fields should not have '.' characters.
-    const char *basename = strrchr(filename.c_str(), '/');
-    const char *firstdot = strchr(basename ? basename : filename.c_str(), '.');
-    const char *lastdot  = strrchr(filename.c_str(), '.');
+    const char *basename = strrchr(filename, '/');
+    const char *firstdot = strchr(basename ? basename : filename, '.');
+    const char *lastdot  = strrchr(filename, '.');
    if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
      ++firstdot;
      *fontname = firstdot;
@ -95,9 +95,10 @@ void Classify::LearnBlob(const STRING& fontname, TBLOB* blob,

 // Writes stored training data to a .tr file based on the given filename.
 // Returns false on error.
-bool Classify::WriteTRFile(const STRING& filename) {
+bool Classify::WriteTRFile(const char* filename) {
  bool result = false;
-  STRING tr_filename = filename + ".tr";
+  std::string tr_filename = filename;
+  tr_filename += ".tr";
  FILE* fp = fopen(tr_filename.c_str(), "wb");
  if (fp) {
    result =
--- a/src/classify/blobclass.h
+++ b/src/classify/blobclass.h
@ -32,7 +32,7 @@ namespace tesseract {
 // /path/to/dir/[lang].[fontname].exp[num]
 // The [lang], [fontname] and [num] fields should not have '.' characters.
 // If the global parameter classify_font_name is set, its value is used instead.
-void ExtractFontName(const STRING& filename, STRING* fontname);
+void ExtractFontName(const char* filename, STRING* fontname);

 }  // namespace tesseract.

--- a/src/classify/classify.h
+++ b/src/classify/classify.h
@ -412,7 +412,7 @@ class Classify : public CCStruct {
                 const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
  // Writes stored training data to a .tr file based on the given filename.
  // Returns false on error.
-  bool WriteTRFile(const STRING& filename);
+  bool WriteTRFile(const char* filename);

  // Member variables.

--- a/src/dict/dawg_cache.cpp
+++ b/src/dict/dawg_cache.cpp
@ -44,7 +44,7 @@ struct DawgLoader {
 Dawg *DawgCache::GetSquishedDawg(const STRING &lang,
                                 TessdataType tessdata_dawg_type,
                                 int debug_level, TessdataManager *data_file) {
-  STRING data_id = data_file->GetDataFileName();
+  std::string data_id = data_file->GetDataFileName();
  data_id += kTessdataFileSuffixes[tessdata_dawg_type];
  DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
  return dawgs_.Get(data_id, std::bind(&DawgLoader::Load, &loader));
--- a/src/training/errorcounter.h
+++ b/src/training/errorcounter.h
@ -20,6 +20,7 @@
 #include "matrix.h"
 #include "statistc.h"

+class STRING;
 struct Pix;
 template <typename T> class UnicityTable;

--- a/src/training/lstmtester.cpp
+++ b/src/training/lstmtester.cpp
@ -28,11 +28,11 @@ LSTMTester::LSTMTester(int64_t max_memory)
 // Loads a set of lstmf files that were created using the lstm.train config to
 // tesseract into memory ready for testing. Returns false if nothing was
 // loaded. The arg is a filename of a file that lists the filenames.
-bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) {
+bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
  GenericVector<STRING> filenames;
-  if (!LoadFileLinesToStrings(filenames_file.c_str(), &filenames)) {
+  if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
    tprintf("Failed to load list of eval filenames from %s\n",
-            filenames_file.c_str());
+            filenames_file);
    return false;
  }
  return LoadAllEvalData(filenames);
--- a/src/training/lstmtester.h
+++ b/src/training/lstmtester.h
@ -34,7 +34,7 @@ class LSTMTester {
  // loaded. The arg is a filename of a file that lists the filenames, with one
  // name per line. Conveniently, tesstrain.sh generates such a file, along
  // with the files themselves.
-  bool LoadAllEvalData(const STRING& filenames_file);
+  bool LoadAllEvalData(const char* filenames_file);
  // Loads a set of lstmf files that were created using the lstm.train config to
  // tesseract into memory ready for testing. Returns false if nothing was
  // loaded.
--- a/src/training/lstmtrainer.cpp
+++ b/src/training/lstmtrainer.cpp
@ -890,7 +890,7 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr,
 }

 // Writes the full recognition traineddata to the given filename.
-bool LSTMTrainer::SaveTraineddata(const STRING& filename) {
+bool LSTMTrainer::SaveTraineddata(const char* filename) {
  GenericVector<char> recognizer_data;
  SaveRecognitionDump(&recognizer_data);
  mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
--- a/src/training/lstmtrainer.h
+++ b/src/training/lstmtrainer.h
@ -291,7 +291,7 @@ class LSTMTrainer : public LSTMRecognizer {
  void SetupCheckpointInfo();

  // Writes the full recognition traineddata to the given filename.
-  bool SaveTraineddata(const STRING& filename);
+  bool SaveTraineddata(const char* filename);

  // Writes the recognizer to memory, so that it can be used for testing later.
  void SaveRecognitionDump(GenericVector<char>* data) const;
--- a/unittest/applybox_test.cc
+++ b/unittest/applybox_test.cc
@ -73,7 +73,7 @@ class ApplyBoxTest : public testing::Test {
    // Test the boxes by reading the target box file in parallel with the
    // bounding boxes in the ocr output.
    std::string box_filename = TestDataNameToPath(target_box_file);
-    FILE* box_file = OpenBoxFile(STRING(box_filename.c_str()));
+    FILE* box_file = OpenBoxFile(box_filename.c_str());
    ASSERT_TRUE(box_file != nullptr);
    int height = pixGetHeight(src_pix_);
    ResultIterator* it = api_.GetIterator();