Added extra Init that takes a memory buffer or a filereader function pointer to enable read of traineddata from memory or foreign file systems. Updated existing readers to use TFile API instead of FILE. This does not yet add big-endian capability to LSTM, but it is very easy from here.

2025-01-18 06:30:14 +08:00 · 2017-04-27 15:48:23 -07:00 · 2017-04-27 15:48:23 -07:00 · 1cc511188d
commit 1cc511188d
parent 10e04ffe99
48 changed files with 833 additions and 1199 deletions
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@ -108,26 +108,30 @@ const int kMinCredibleResolution = 70;
 const int kMaxCredibleResolution = 2400;

 TessBaseAPI::TessBaseAPI()
-  : tesseract_(NULL),
-    osd_tesseract_(NULL),
-    equ_detect_(NULL),
-    // Thresholder is initialized to NULL here, but will be set before use by:
-    // A constructor of a derived API,  SetThresholder(), or
-    // created implicitly when used in InternalSetImage.
-    thresholder_(NULL),
-    paragraph_models_(NULL),
-    block_list_(NULL),
-    page_res_(NULL),
-    input_file_(NULL),
-    output_file_(NULL),
-    datapath_(NULL),
-    language_(NULL),
-    last_oem_requested_(OEM_DEFAULT),
-    recognition_done_(false),
-    truth_cb_(NULL),
-    rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
-    image_width_(0), image_height_(0) {
-}
+    : tesseract_(nullptr),
+      osd_tesseract_(nullptr),
+      equ_detect_(nullptr),
+      reader_(nullptr),
+      // Thresholder is initialized to NULL here, but will be set before use by:
+      // A constructor of a derived API,  SetThresholder(), or
+      // created implicitly when used in InternalSetImage.
+      thresholder_(nullptr),
+      paragraph_models_(nullptr),
+      block_list_(nullptr),
+      page_res_(nullptr),
+      input_file_(nullptr),
+      output_file_(nullptr),
+      datapath_(nullptr),
+      language_(nullptr),
+      last_oem_requested_(OEM_DEFAULT),
+      recognition_done_(false),
+      truth_cb_(NULL),
+      rect_left_(0),
+      rect_top_(0),
+      rect_width_(0),
+      rect_height_(0),
+      image_width_(0),
+      image_height_(0) {}

 TessBaseAPI::~TessBaseAPI() {
  End();
@ -275,20 +279,33 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
                      const GenericVector<STRING> *vars_vec,
                      const GenericVector<STRING> *vars_values,
                      bool set_only_non_debug_params) {
+  return Init(datapath, 0, language, oem, configs, configs_size, vars_vec,
+              vars_values, set_only_non_debug_params, nullptr);
+}
+
+// In-memory version reads the traineddata file directly from the given
+// data[data_size] array. Also implements the version with a datapath in data,
+// flagged by data_size = 0.
+int TessBaseAPI::Init(const char* data, int data_size, const char* language,
+                      OcrEngineMode oem, char** configs, int configs_size,
+                      const GenericVector<STRING>* vars_vec,
+                      const GenericVector<STRING>* vars_values,
+                      bool set_only_non_debug_params, FileReader reader) {
  PERF_COUNT_START("TessBaseAPI::Init")
  // Default language is "eng".
-  if (language == NULL) language = "eng";
+  if (language == nullptr) language = "eng";
+  STRING datapath = data_size == 0 ? data : language;
  // If the datapath, OcrEngineMode or the language have changed - start again.
  // Note that the language_ field stores the last requested language that was
  // initialized successfully, while tesseract_->lang stores the language
  // actually used. They differ only if the requested language was NULL, in
  // which case tesseract_->lang is set to the Tesseract default ("eng").
-  if (tesseract_ != NULL &&
-      (datapath_ == NULL || language_ == NULL ||
-       *datapath_ != datapath || last_oem_requested_ != oem ||
+  if (tesseract_ != nullptr &&
+      (datapath_ == nullptr || language_ == nullptr || *datapath_ != datapath ||
+       last_oem_requested_ != oem ||
       (*language_ != language && tesseract_->lang != language))) {
    delete tesseract_;
-    tesseract_ = NULL;
+    tesseract_ = nullptr;
  }
  // PERF_COUNT_SUB("delete tesseract_")
 #ifdef USE_OPENCL
@ -297,19 +314,25 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
 #endif
  PERF_COUNT_SUB("OD::InitEnv()")
  bool reset_classifier = true;
-  if (tesseract_ == NULL) {
+  if (tesseract_ == nullptr) {
    reset_classifier = false;
    tesseract_ = new Tesseract;
+    if (reader != nullptr) reader_ = reader;
+    TessdataManager mgr(reader_);
+    if (data_size != 0) {
+      mgr.LoadMemBuffer(language, data, data_size);
+    }
    if (tesseract_->init_tesseract(
-        datapath, output_file_ != NULL ? output_file_->string() : NULL,
-        language, oem, configs, configs_size, vars_vec, vars_values,
-        set_only_non_debug_params) != 0) {
+            datapath.string(),
+            output_file_ != nullptr ? output_file_->string() : nullptr,
+            language, oem, configs, configs_size, vars_vec, vars_values,
+            set_only_non_debug_params, &mgr) != 0) {
      return -1;
    }
  }
  PERF_COUNT_SUB("update tesseract_")
  // Update datapath and language requested for the last valid initialization.
-  if (datapath_ == NULL)
+  if (datapath_ == nullptr)
    datapath_ = new STRING(datapath);
  else
    *datapath_ = datapath;
@ -317,7 +340,7 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
      (strcmp(tesseract_->datadir.string(), "") != 0))
     *datapath_ = tesseract_->datadir;

-  if (language_ == NULL)
+  if (language_ == nullptr)
    language_ = new STRING(language);
  else
    *language_ = language;
@ -421,7 +444,8 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
    tesseract_ = new Tesseract;
  else
    ParamUtils::ResetToDefaults(tesseract_->params());
-  return tesseract_->init_tesseract_lm(datapath, NULL, language);
+  TessdataManager mgr;
+  return tesseract_->init_tesseract_lm(datapath, NULL, language, &mgr);
 }

 /**
@ -431,7 +455,7 @@ int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
 void TessBaseAPI::InitForAnalysePage() {
  if (tesseract_ == NULL) {
    tesseract_ = new Tesseract;
-    tesseract_->InitAdaptiveClassifier(false);
+    tesseract_->InitAdaptiveClassifier(nullptr);
  }
 }

@ -2239,7 +2263,7 @@ int TessBaseAPI::FindLines() {
  }
  if (tesseract_ == NULL) {
    tesseract_ = new Tesseract;
-    tesseract_->InitAdaptiveClassifier(false);
+    tesseract_->InitAdaptiveClassifier(nullptr);
  }
  if (tesseract_->pix_binary() == NULL)
    Threshold(tesseract_->mutable_pix_binary());
@ -2261,14 +2285,16 @@ int TessBaseAPI::FindLines() {

  Tesseract* osd_tess = osd_tesseract_;
  OSResults osr;
-  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
+  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) &&
+      osd_tess == nullptr) {
    if (strcmp(language_->string(), "osd") == 0) {
      osd_tess = tesseract_;
    } else {
      osd_tesseract_ = new Tesseract;
-      if (osd_tesseract_->init_tesseract(
-          datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
-          NULL, 0, NULL, NULL, false) == 0) {
+      TessdataManager mgr(reader_);
+      if (osd_tesseract_->init_tesseract(datapath_->string(), nullptr, "osd",
+                                         OEM_TESSERACT_ONLY, nullptr, 0,
+                                         nullptr, nullptr, false, &mgr) == 0) {
        osd_tess = osd_tesseract_;
        osd_tesseract_->set_source_resolution(
            thresholder_->GetSourceYResolution());
@ -2276,7 +2302,7 @@ int TessBaseAPI::FindLines() {
        tprintf("Warning: Auto orientation and script detection requested,"
                " but osd language failed to load\n");
        delete osd_tesseract_;
-        osd_tesseract_ = NULL;
+        osd_tesseract_ = nullptr;
      }
    }
  }
--- a/api/baseapi.h
+++ b/api/baseapi.h
@ -29,14 +29,15 @@
 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
 // complexity of includes here. Use forward declarations wherever possible
 // and hide includes of complex types in baseapi.cpp.
-#include "platform.h"
 #include "apitypes.h"
+#include "pageiterator.h"
+#include "platform.h"
+#include "publictypes.h"
+#include "resultiterator.h"
+#include "serialis.h"
+#include "tesscallback.h"
 #include "thresholder.h"
 #include "unichar.h"
-#include "tesscallback.h"
-#include "publictypes.h"
-#include "pageiterator.h"
-#include "resultiterator.h"

 template <typename T> class GenericVector;
 class PAGE_RES;
@ -237,6 +238,13 @@ class TESS_API TessBaseAPI {
  int Init(const char* datapath, const char* language) {
    return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
  }
+  // In-memory version reads the traineddata file directly from the given
+  // data[data_size] array, and/or reads data via a FileReader.
+  int Init(const char* data, int data_size, const char* language,
+           OcrEngineMode mode, char** configs, int configs_size,
+           const GenericVector<STRING>* vars_vec,
+           const GenericVector<STRING>* vars_values,
+           bool set_only_non_debug_params, FileReader reader);

  /**
   * Returns the languages string used in the last valid initialization.
@ -859,6 +867,7 @@ class TESS_API TessBaseAPI {
  Tesseract*        tesseract_;       ///< The underlying data object.
  Tesseract*        osd_tesseract_;   ///< For orientation & script detection.
  EquationDetect*   equ_detect_;      ///<The equation detector.
+  FileReader reader_;                 ///< Reads files from any filesystem.
  ImageThresholder* thresholder_;     ///< Image thresholding module.
  GenericVector<ParagraphModel *>* paragraph_models_;
  BLOCK_LIST*       block_list_;      ///< The page layout.
--- a/ccmain/tessedit.cpp
+++ b/ccmain/tessedit.cpp
@ -92,8 +92,8 @@ bool Tesseract::init_tesseract_lang_data(
    const char *arg0, const char *textbase, const char *language,
    OcrEngineMode oem, char **configs, int configs_size,
    const GenericVector<STRING> *vars_vec,
-    const GenericVector<STRING> *vars_values,
-    bool set_only_non_debug_params) {
+    const GenericVector<STRING> *vars_values, bool set_only_non_debug_params,
+    TessdataManager *mgr) {
  // Set the basename, compute the data directory.
  main_setup(arg0, textbase);

@ -105,16 +105,28 @@ bool Tesseract::init_tesseract_lang_data(

  // Initialize TessdataManager.
  STRING tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
-  if (!tessdata_manager.Init(tessdata_path.string(),
-                             tessdata_manager_debug_level)) {
-    return false;
+  if (!mgr->is_loaded() && !mgr->Init(tessdata_path.string())) {
+    // Try without tessdata.
+    m_data_sub_dir.set_value("");
+    main_setup(arg0, textbase);
+    language_data_path_prefix = datadir;
+    language_data_path_prefix += lang;
+    language_data_path_prefix += ".";
+    tessdata_path = language_data_path_prefix + kTrainedDataSuffix;
+    if (!mgr->Init(tessdata_path.string())) {
+      tprintf("Error opening data file %s\n", tessdata_path.string());
+      tprintf(
+          "Please make sure the TESSDATA_PREFIX environment variable is set"
+          " to your \"tessdata\" directory.\n");
+      return false;
+    }
  }
  if (oem == OEM_DEFAULT) {
    // Set the engine mode from availability, which can then be overidden by
    // the config file when we read it below.
-    if (!tessdata_manager.IsLSTMAvailable()) {
+    if (!mgr->IsLSTMAvailable()) {
      tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
-    } else if (!tessdata_manager.IsBaseAvailable()) {
+    } else if (!mgr->IsBaseAvailable()) {
      tessedit_ocr_engine_mode.set_value(OEM_LSTM_ONLY);
    } else {
      tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_LSTM_COMBINED);
@ -122,14 +134,10 @@ bool Tesseract::init_tesseract_lang_data(
  }

  // If a language specific config file (lang.config) exists, load it in.
-  if (tessdata_manager.SeekToStart(TESSDATA_LANG_CONFIG)) {
-    ParamUtils::ReadParamsFromFp(
-        tessdata_manager.GetDataFilePtr(),
-        tessdata_manager.GetEndOffset(TESSDATA_LANG_CONFIG),
-        SET_PARAM_CONSTRAINT_NONE, this->params());
-    if (tessdata_manager_debug_level) {
-      tprintf("Loaded language config file\n");
-    }
+  TFile fp;
+  if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) {
+    ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp,
+                                 this->params());
  }

  SetParamConstraint set_params_constraint = set_only_non_debug_params ?
@ -159,10 +167,6 @@ bool Tesseract::init_tesseract_lang_data(
    if (params_file != NULL) {
      ParamUtils::PrintParams(params_file, this->params());
      fclose(params_file);
-      if (tessdata_manager_debug_level > 0) {
-        tprintf("Wrote parameters to %s\n",
-                tessedit_write_params_to_file.string());
-      }
    } else {
      tprintf("Failed to open %s for writing params.\n",
              tessedit_write_params_to_file.string());
@ -171,17 +175,10 @@ bool Tesseract::init_tesseract_lang_data(

  // Determine which ocr engine(s) should be loaded and used for recognition.
  if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
-  if (tessdata_manager_debug_level) {
-    tprintf("Loading Tesseract/LSTM with tessedit_ocr_engine_mode %d\n",
-            static_cast<int>(tessedit_ocr_engine_mode));
-  }

  // If we are only loading the config file (and so not planning on doing any
  // recognition) then there's nothing else do here.
  if (tessedit_init_config_only) {
-    if (tessdata_manager_debug_level) {
-      tprintf("Returning after loading config file\n");
-    }
    return true;
  }

@ -191,17 +188,14 @@ bool Tesseract::init_tesseract_lang_data(
 #ifndef ANDROID_BUILD
  if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
      tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
-    if (tessdata_manager.swap()) {
+    if (mgr->swap()) {
      tprintf("Error: LSTM requested on big-endian hardware!!\n");
      tprintf("Big-endian not yet supported! Loading tesseract.\n");
      tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
-    } else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
+    } else if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
      lstm_recognizer_ = new LSTMRecognizer;
-      TFile fp;
-      fp.Open(tessdata_manager.GetDataFilePtr(), -1);
-      ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
-      if (lstm_use_matrix)
-        lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
+      ASSERT_HOST(lstm_recognizer_->DeSerialize(mgr->swap(), &fp));
+      if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr);
    } else {
      tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
      tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
@ -215,15 +209,14 @@ bool Tesseract::init_tesseract_lang_data(
 #ifndef ANDROID_BUILD
    unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
 #endif
-  } else if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
-             !unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
+  } else if (!mgr->GetComponent(TESSDATA_UNICHARSET, &fp) ||
+             !unicharset.load_from_file(&fp, false)) {
    return false;
  }
  if (unicharset.size() > MAX_NUM_CLASSES) {
    tprintf("Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
    return false;
  }
-  if (tessdata_manager_debug_level) tprintf("Loaded unicharset\n");
  right_to_left_ = unicharset.major_right_to_left();

  // Setup initial unichar ambigs table and read universal ambigs.
@ -232,16 +225,10 @@ bool Tesseract::init_tesseract_lang_data(
  unichar_ambigs.InitUnicharAmbigs(unicharset, use_ambigs_for_adaption);
  unichar_ambigs.LoadUniversal(encoder_unicharset, &unicharset);

-  if (!tessedit_ambigs_training &&
-      tessdata_manager.SeekToStart(TESSDATA_AMBIGS)) {
-    TFile ambigs_file;
-    ambigs_file.Open(tessdata_manager.GetDataFilePtr(),
-                     tessdata_manager.GetEndOffset(TESSDATA_AMBIGS) + 1);
-    unichar_ambigs.LoadUnicharAmbigs(
-        encoder_unicharset,
-        &ambigs_file,
-        ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
-    if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
+  if (!tessedit_ambigs_training && mgr->GetComponent(TESSDATA_AMBIGS, &fp)) {
+    unichar_ambigs.LoadUnicharAmbigs(encoder_unicharset, &fp,
+                                     ambigs_debug_level,
+                                     use_ambigs_for_adaption, &unicharset);
  }
  // Init ParamsModel.
  // Load pass1 and pass2 weights (for now these two sets are the same, but in
@ -250,15 +237,12 @@ bool Tesseract::init_tesseract_lang_data(
      p < ParamsModel::PTRAIN_NUM_PASSES; ++p) {
    language_model_->getParamsModel().SetPass(
        static_cast<ParamsModel::PassEnum>(p));
-    if (tessdata_manager.SeekToStart(TESSDATA_PARAMS_MODEL)) {
-      if (!language_model_->getParamsModel().LoadFromFp(
-          lang.string(), tessdata_manager.GetDataFilePtr(),
-          tessdata_manager.GetEndOffset(TESSDATA_PARAMS_MODEL))) {
+    if (mgr->GetComponent(TESSDATA_PARAMS_MODEL, &fp)) {
+      if (!language_model_->getParamsModel().LoadFromFp(lang.string(), &fp)) {
        return false;
      }
    }
  }
-  if (tessdata_manager_debug_level) language_model_->getParamsModel().Print();

  return true;
 }
@ -303,8 +287,6 @@ void Tesseract::ParseLanguageString(const char* lang_str,
    remains = next;
    // Check whether lang_code is already in the target vector and add.
    if (!IsStrInList(lang_code, *target)) {
-      if (tessdata_manager_debug_level)
-        tprintf("Adding language '%s' to list\n", lang_code.string());
      target->push_back(lang_code);
    }
  }
@ -314,12 +296,13 @@ void Tesseract::ParseLanguageString(const char* lang_str,
 // string and recursively any additional languages required by any language
 // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
 // See init_tesseract_internal for args.
-int Tesseract::init_tesseract(
-    const char *arg0, const char *textbase, const char *language,
-    OcrEngineMode oem, char **configs, int configs_size,
-    const GenericVector<STRING> *vars_vec,
-    const GenericVector<STRING> *vars_values,
-    bool set_only_non_debug_params) {
+int Tesseract::init_tesseract(const char *arg0, const char *textbase,
+                              const char *language, OcrEngineMode oem,
+                              char **configs, int configs_size,
+                              const GenericVector<STRING> *vars_vec,
+                              const GenericVector<STRING> *vars_values,
+                              bool set_only_non_debug_params,
+                              TessdataManager *mgr) {
  GenericVector<STRING> langs_to_load;
  GenericVector<STRING> langs_not_to_load;
  ParseLanguageString(language, &langs_to_load, &langs_not_to_load);
@ -341,15 +324,15 @@ int Tesseract::init_tesseract(
      }

      int result = tess_to_init->init_tesseract_internal(
-          arg0, textbase, lang_str, oem, configs, configs_size,
-          vars_vec, vars_values, set_only_non_debug_params);
+          arg0, textbase, lang_str, oem, configs, configs_size, vars_vec,
+          vars_values, set_only_non_debug_params, mgr);
+      // Forget that language, but keep any reader we were given.
+      mgr->Clear();

      if (!loaded_primary) {
        if (result < 0) {
          tprintf("Failed loading language '%s'\n", lang_str);
        } else {
-          if (tessdata_manager_debug_level)
-            tprintf("Loaded language '%s' as main language\n", lang_str);
          ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
                              &langs_to_load, &langs_not_to_load);
          loaded_primary = true;
@ -359,8 +342,6 @@ int Tesseract::init_tesseract(
          tprintf("Failed loading language '%s'\n", lang_str);
          delete tess_to_init;
        } else {
-          if (tessdata_manager_debug_level)
-            tprintf("Loaded language '%s' as secondary language\n", lang_str);
          sub_langs_.push_back(tess_to_init);
          // Add any languages that this language requires
          ParseLanguageString(tess_to_init->tessedit_load_sublangs.string(),
@ -385,16 +366,11 @@ int Tesseract::init_tesseract(
            this->language_model_->getParamsModel());
      }
      tprintf("Using params model of the primary language\n");
-      if (tessdata_manager_debug_level)  {
-        this->language_model_->getParamsModel().Print();
-      }
    } else {
      this->language_model_->getParamsModel().Clear();
      for (int s = 0; s < sub_langs_.size(); ++s) {
        sub_langs_[s]->language_model_->getParamsModel().Clear();
      }
-      if (tessdata_manager_debug_level)
-        tprintf("Using default language params\n");
    }
  }

@ -418,26 +394,26 @@ int Tesseract::init_tesseract(
 // in vars_vec.
 // If set_only_init_params is true, then only the initialization variables
 // will be set.
-int Tesseract::init_tesseract_internal(
-    const char *arg0, const char *textbase, const char *language,
-    OcrEngineMode oem, char **configs, int configs_size,
-    const GenericVector<STRING> *vars_vec,
-    const GenericVector<STRING> *vars_values,
-    bool set_only_non_debug_params) {
+int Tesseract::init_tesseract_internal(const char *arg0, const char *textbase,
+                                       const char *language, OcrEngineMode oem,
+                                       char **configs, int configs_size,
+                                       const GenericVector<STRING> *vars_vec,
+                                       const GenericVector<STRING> *vars_values,
+                                       bool set_only_non_debug_params,
+                                       TessdataManager *mgr) {
  if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
                                configs_size, vars_vec, vars_values,
-                                set_only_non_debug_params)) {
+                                set_only_non_debug_params, mgr)) {
    return -1;
  }
  if (tessedit_init_config_only) {
-    tessdata_manager.End();
    return 0;
  }
  // If only LSTM will be used, skip loading Tesseract classifier's
  // pre-trained templates and dictionary.
  bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY;
-  program_editup(textbase, init_tesseract, init_tesseract);
-  tessdata_manager.End();
+  program_editup(textbase, init_tesseract ? mgr : nullptr,
+                 init_tesseract ? mgr : nullptr);
  return 0;                      //Normal exit
 }

@ -482,16 +458,14 @@ void Tesseract::SetupUniversalFontIds() {
 }

 // init the LM component
-int Tesseract::init_tesseract_lm(const char *arg0,
-                   const char *textbase,
-                   const char *language) {
+int Tesseract::init_tesseract_lm(const char *arg0, const char *textbase,
+                                 const char *language, TessdataManager *mgr) {
  if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
-                                NULL, 0, NULL, NULL, false))
+                                NULL, 0, NULL, NULL, false, mgr))
    return -1;
  getDict().SetupForLoad(Dict::GlobalDawgCache());
-  getDict().Load(tessdata_manager.GetDataFileName().string(), lang);
+  getDict().Load(lang, mgr);
  getDict().FinishLoad();
-  tessdata_manager.End();
  return 0;
 }

--- a/ccmain/tesseractclass.cpp
+++ b/ccmain/tesseractclass.cpp
@ -466,10 +466,6 @@ Tesseract::Tesseract()
      STRING_MEMBER(file_type, ".tif", "Filename extension", this->params()),
      BOOL_MEMBER(tessedit_override_permuter, true, "According to dict_word",
                  this->params()),
-      INT_MEMBER(tessdata_manager_debug_level, 0,
-                 "Debug level for"
-                 " TessdataManager functions.",
-                 this->params()),
      STRING_MEMBER(tessedit_load_sublangs, "",
                    "List of languages to load with this one", this->params()),
      BOOL_MEMBER(tessedit_use_primary_params_model, false,
--- a/ccmain/tesseractclass.h
+++ b/ccmain/tesseractclass.h
@ -496,20 +496,17 @@ class Tesseract : public Wordrec {
  // string and recursively any additional languages required by any language
  // traineddata file (via tessedit_load_sublangs in its config) that is loaded.
  // See init_tesseract_internal for args.
-  int init_tesseract(const char *arg0,
-                     const char *textbase,
-                     const char *language,
-                     OcrEngineMode oem,
-                     char **configs,
-                     int configs_size,
-                     const GenericVector<STRING> *vars_vec,
-                     const GenericVector<STRING> *vars_values,
-                     bool set_only_init_params);
+  int init_tesseract(const char* arg0, const char* textbase,
+                     const char* language, OcrEngineMode oem, char** configs,
+                     int configs_size, const GenericVector<STRING>* vars_vec,
+                     const GenericVector<STRING>* vars_values,
+                     bool set_only_init_params, TessdataManager* mgr);
  int init_tesseract(const char *datapath,
                     const char *language,
                     OcrEngineMode oem) {
-    return init_tesseract(datapath, NULL, language, oem,
-                          NULL, 0, NULL, NULL, false);
+    TessdataManager mgr;
+    return init_tesseract(datapath, NULL, language, oem, NULL, 0, NULL, NULL,
+                          false, &mgr);
  }
  // Common initialization for a single language.
  // arg0 is the datapath for the tessdata directory, which could be the
@ -527,36 +524,30 @@ class Tesseract : public Wordrec {
  // in vars_vec.
  // If set_only_init_params is true, then only the initialization variables
  // will be set.
-  int init_tesseract_internal(const char *arg0,
-                              const char *textbase,
-                              const char *language,
-                              OcrEngineMode oem,
-                              char **configs,
-                              int configs_size,
-                              const GenericVector<STRING> *vars_vec,
-                              const GenericVector<STRING> *vars_values,
-                              bool set_only_init_params);
+  int init_tesseract_internal(const char* arg0, const char* textbase,
+                              const char* language, OcrEngineMode oem,
+                              char** configs, int configs_size,
+                              const GenericVector<STRING>* vars_vec,
+                              const GenericVector<STRING>* vars_values,
+                              bool set_only_init_params, TessdataManager* mgr);

  // Set the universal_id member of each font to be unique among all
  // instances of the same font loaded.
  void SetupUniversalFontIds();

-  int init_tesseract_lm(const char *arg0,
-                        const char *textbase,
-                        const char *language);
+  int init_tesseract_lm(const char* arg0, const char* textbase,
+                        const char* language, TessdataManager* mgr);

  void recognize_page(STRING& image_name);
  void end_tesseract();

-  bool init_tesseract_lang_data(const char *arg0,
-                                const char *textbase,
-                                const char *language,
-                                OcrEngineMode oem,
-                                char **configs,
-                                int configs_size,
-                                const GenericVector<STRING> *vars_vec,
-                                const GenericVector<STRING> *vars_values,
-                                bool set_only_init_params);
+  bool init_tesseract_lang_data(const char* arg0, const char* textbase,
+                                const char* language, OcrEngineMode oem,
+                                char** configs, int configs_size,
+                                const GenericVector<STRING>* vars_vec,
+                                const GenericVector<STRING>* vars_values,
+                                bool set_only_init_params,
+                                TessdataManager* mgr);

  void ParseLanguageString(const char* lang_str,
                           GenericVector<STRING>* to_load,
@ -1074,8 +1065,6 @@ class Tesseract : public Wordrec {
  BOOL_VAR_H(interactive_display_mode, false, "Run interactively?");
  STRING_VAR_H(file_type, ".tif", "Filename extension");
  BOOL_VAR_H(tessedit_override_permuter, true, "According to dict_word");
-  INT_VAR_H(tessdata_manager_debug_level, 0,
-            "Debug level for TessdataManager functions.");
  STRING_VAR_H(tessedit_load_sublangs, "",
               "List of languages to load with this one");
  BOOL_VAR_H(tessedit_use_primary_params_model, false,
--- a/ccstruct/fontinfo.cpp
+++ b/ccstruct/fontinfo.cpp
@ -31,7 +31,7 @@ bool FontInfo::Serialize(FILE* fp) const {
 }
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
-bool FontInfo::DeSerialize(bool swap, FILE* fp) {
+bool FontInfo::DeSerialize(bool swap, TFile* fp) {
  if (!read_info(fp, this, swap)) return false;
  if (!read_spacing_info(fp, this, swap)) return false;
  return true;
@ -51,7 +51,7 @@ bool FontInfoTable::Serialize(FILE* fp) const {
 }
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
-bool FontInfoTable::DeSerialize(bool swap, FILE* fp) {
+bool FontInfoTable::DeSerialize(bool swap, TFile* fp) {
  truncate(0);
  return this->DeSerializeClasses(swap, fp);
 }
@ -149,19 +149,15 @@ void FontSetDeleteCallback(FontSet fs) {

 /*---------------------------------------------------------------------------*/
 // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
-bool read_info(FILE* f, FontInfo* fi, bool swap) {
+bool read_info(TFile* f, FontInfo* fi, bool swap) {
  inT32 size;
-  if (fread(&size, sizeof(size), 1, f) != 1) return false;
-  if (swap)
-    Reverse32(&size);
+  if (f->FReadEndian(&size, sizeof(size), 1, swap) != 1) return false;
  char* font_name = new char[size + 1];
  fi->name = font_name;
-  if (static_cast<int>(fread(font_name, sizeof(*font_name), size, f)) != size)
-    return false;
+  if (f->FRead(font_name, sizeof(*font_name), size) != size) return false;
  font_name[size] = '\0';
-  if (fread(&fi->properties, sizeof(fi->properties), 1, f) != 1) return false;
-  if (swap)
-    Reverse32(&fi->properties);
+  if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1, swap) != 1)
+    return false;
  return true;
 }

@ -174,26 +170,22 @@ bool write_info(FILE* f, const FontInfo& fi) {
  return true;
 }

-bool read_spacing_info(FILE *f, FontInfo* fi, bool swap) {
+bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
  inT32 vec_size, kern_size;
-  if (fread(&vec_size, sizeof(vec_size), 1, f) != 1) return false;
-  if (swap) Reverse32(&vec_size);
+  if (f->FReadEndian(&vec_size, sizeof(vec_size), 1, swap) != 1) return false;
  ASSERT_HOST(vec_size >= 0);
  if (vec_size == 0) return true;
  fi->init_spacing(vec_size);
  for (int i = 0; i < vec_size; ++i) {
    FontSpacingInfo *fs = new FontSpacingInfo();
-    if (fread(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, f) != 1 ||
-        fread(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, f) != 1 ||
-        fread(&kern_size, sizeof(kern_size), 1, f) != 1) {
+    if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, swap) !=
+            1 ||
+        f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, swap) !=
+            1 ||
+        f->FReadEndian(&kern_size, sizeof(kern_size), 1, swap) != 1) {
      delete fs;
      return false;
    }
-    if (swap) {
-      ReverseN(&(fs->x_gap_before), sizeof(fs->x_gap_before));
-      ReverseN(&(fs->x_gap_after), sizeof(fs->x_gap_after));
-      Reverse32(&kern_size);
-    }
    if (kern_size < 0) {  // indication of a NULL entry in fi->spacing_vec
      delete fs;
      continue;
@ -237,16 +229,12 @@ bool write_spacing_info(FILE* f, const FontInfo& fi) {
  return true;
 }

-bool read_set(FILE* f, FontSet* fs, bool swap) {
-  if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false;
-  if (swap)
-    Reverse32(&fs->size);
+bool read_set(TFile* f, FontSet* fs, bool swap) {
+  if (f->FReadEndian(&fs->size, sizeof(fs->size), 1, swap) != 1) return false;
  fs->configs = new int[fs->size];
-  for (int i = 0; i < fs->size; ++i) {
-    if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false;
-    if (swap)
-      Reverse32(&fs->configs[i]);
-  }
+  if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size, swap) !=
+      fs->size)
+    return false;
  return true;
 }

--- a/ccstruct/fontinfo.h
+++ b/ccstruct/fontinfo.h
@ -67,7 +67,7 @@ struct FontInfo {
  bool Serialize(FILE* fp) const;
  // Reads from the given file. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerialize(bool swap, FILE* fp);
+  bool DeSerialize(bool swap, TFile* fp);

  // Reserves unicharset_size spots in spacing_vec.
  void init_spacing(int unicharset_size) {
@ -152,7 +152,7 @@ class FontInfoTable : public GenericVector<FontInfo> {
  bool Serialize(FILE* fp) const;
  // Reads from the given file. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerialize(bool swap, FILE* fp);
+  bool DeSerialize(bool swap, TFile* fp);

  // Returns true if the given set of fonts includes one with the same
  // properties as font_id.
@ -177,11 +177,11 @@ void FontInfoDeleteCallback(FontInfo f);
 void FontSetDeleteCallback(FontSet fs);

 // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
-bool read_info(FILE* f, FontInfo* fi, bool swap);
+bool read_info(TFile* f, FontInfo* fi, bool swap);
 bool write_info(FILE* f, const FontInfo& fi);
-bool read_spacing_info(FILE *f, FontInfo* fi, bool swap);
+bool read_spacing_info(TFile* f, FontInfo* fi, bool swap);
 bool write_spacing_info(FILE* f, const FontInfo& fi);
-bool read_set(FILE* f, FontSet* fs, bool swap);
+bool read_set(TFile* f, FontSet* fs, bool swap);
 bool write_set(FILE* f, const FontSet& fs);

 }  // namespace tesseract.
--- a/ccutil/ccutil.h
+++ b/ccutil/ccutil.h
@ -66,7 +66,6 @@ class CCUtil {
  STRING imagebasename;  // name of image
  STRING lang;
  STRING language_data_path_prefix;
-  TessdataManager tessdata_manager;
  UNICHARSET unicharset;
  UnicharAmbigs unichar_ambigs;
  STRING imagefile;  // image file name
--- a/ccutil/genericvector.h
+++ b/ccutil/genericvector.h
@ -162,7 +162,9 @@ class GenericVector {
  // Returns false on error or if the callback returns false.
  // DEPRECATED. Use [De]Serialize[Classes] instead.
  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
-  bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
+  bool read(tesseract::TFile* f,
+            TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb,
+            bool swap);
  // Writes a vector of simple types to the given file. Assumes that bitwise
  // read/write of T will work. Returns false in case of error.
  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
@ -885,15 +887,14 @@ bool GenericVector<T>::write(
 }

 template <typename T>
-bool GenericVector<T>::read(FILE* f,
-                            TessResultCallback3<bool, FILE*, T*, bool>* cb,
-                            bool swap) {
+bool GenericVector<T>::read(
+    tesseract::TFile* f,
+    TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb, bool swap) {
  inT32 reserved;
-  if (fread(&reserved, sizeof(reserved), 1, f) != 1) return false;
-  if (swap) Reverse32(&reserved);
+  if (f->FReadEndian(&reserved, sizeof(reserved), 1, swap) != 1) return false;
  reserve(reserved);
-  if (fread(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
-  if (swap) Reverse32(&size_used_);
+  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1, swap) != 1)
+    return false;
  if (cb != NULL) {
    for (int i = 0; i < size_used_; ++i) {
      if (!cb->Run(f, data_ + i, swap)) {
@ -903,11 +904,8 @@ bool GenericVector<T>::read(FILE* f,
    }
    delete cb;
  } else {
-    if (fread(data_, sizeof(T), size_used_, f) != size_used_) return false;
-    if (swap) {
-      for (int i = 0; i < size_used_; ++i)
-        ReverseN(&data_[i], sizeof(T));
-    }
+    if (f->FReadEndian(data_, sizeof(T), size_used_, swap) != size_used_)
+      return false;
  }
  return true;
 }
--- a/ccutil/mainblk.cpp
+++ b/ccutil/mainblk.cpp
@ -55,7 +55,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) {

  char *tessdata_prefix = getenv("TESSDATA_PREFIX");

-  if (argv0 != NULL) {
+  if (argv0 != NULL && *argv0 != '\0') {
    /* Use tessdata prefix from the command line. */
    datadir = argv0;
  } else if (tessdata_prefix) {
--- a/ccutil/params.cpp
+++ b/ccutil/params.cpp
@ -41,8 +41,6 @@ bool ParamUtils::ReadParamsFile(const char *file,
                                SetParamConstraint constraint,
                                ParamsVectors *member_params) {
  inT16 nameoffset;              // offset for real name
-  FILE *fp;                      // file pointer
-                                 // iterators

  if (*file == PLUS) {
    nameoffset = 1;
@ -52,26 +50,22 @@ bool ParamUtils::ReadParamsFile(const char *file,
    nameoffset = 0;
  }

-  fp = fopen(file + nameoffset, "rb");
-  if (fp == NULL) {
+  TFile fp;
+  if (!fp.Open(file + nameoffset, nullptr)) {
    tprintf("read_params_file: Can't open %s\n", file + nameoffset);
    return true;
  }
-  const bool anyerr = ReadParamsFromFp(fp, -1, constraint, member_params);
-  fclose(fp);
-  return anyerr;
+  return ReadParamsFromFp(constraint, &fp, member_params);
 }

-bool ParamUtils::ReadParamsFromFp(FILE *fp, inT64 end_offset,
-                                  SetParamConstraint constraint,
+bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp,
                                  ParamsVectors *member_params) {
  char line[MAX_PATH];           // input line
  bool anyerr = false;           // true if any error
  bool foundit;                  // found parameter
  char *valptr;                  // value field

-  while ((end_offset < 0 || ftell(fp) < end_offset) &&
-         fgets(line, MAX_PATH, fp)) {
+  while (fp->FGets(line, MAX_PATH) != nullptr) {
    if (line[0] != '\r' && line[0] != '\n' && line[0] != '#') {
      chomp_string(line);  // remove newline
      for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t';
--- a/ccutil/params.h
+++ b/ccutil/params.h
@ -60,9 +60,8 @@ class ParamUtils {
      SetParamConstraint constraint,
      ParamsVectors *member_params);

-  // Read parameters from the given file pointer (stop at end_offset).
-  static bool ReadParamsFromFp(FILE *fp, inT64 end_offset,
-                               SetParamConstraint constraint,
+  // Read parameters from the given file pointer.
+  static bool ReadParamsFromFp(SetParamConstraint constraint, TFile *fp,
                               ParamsVectors *member_params);

  // Set a parameters to have the given value.
--- a/ccutil/serialis.cpp
+++ b/ccutil/serialis.cpp
@ -88,6 +88,17 @@ char* TFile::FGets(char* buffer, int buffer_size) {
  return size > 0 ? buffer : NULL;
 }

+int TFile::FReadEndian(void* buffer, int size, int count, bool swap) {
+  int num_read = FRead(buffer, size, count);
+  if (swap) {
+    char* char_buffer = reinterpret_cast<char*>(buffer);
+    for (int i = 0; i < num_read; ++i, char_buffer += size) {
+      ReverseN(char_buffer, size);
+    }
+  }
+  return num_read;
+}
+
 int TFile::FRead(void* buffer, int size, int count) {
  ASSERT_HOST(!is_writing_);
  int required_size = size * count;
--- a/ccutil/serialis.h
+++ b/ccutil/serialis.h
@ -67,6 +67,10 @@ class TFile {
  // the line is longer. Does nothing if buffer_size <= 0.
  // To use fscanf use FGets and sscanf.
  char* FGets(char* buffer, int buffer_size);
+  // Replicates fread, followed by a swap of the bytes if needed, returning the
+  // number of items read. If swap is true then the count items will each have
+  // size bytes reversed.
+  int FReadEndian(void* buffer, int size, int count, bool swap);
  // Replicates fread, returning the number of items read.
  int FRead(void* buffer, int size, int count);
  // Resets the TFile as if it has been Opened, but nothing read.
--- a/ccutil/tessdatamanager.cpp
+++ b/ccutil/tessdatamanager.cpp
@ -33,206 +33,192 @@

 namespace tesseract {

-bool TessdataManager::Init(const char *data_file_name, int debug_level) {
-  int i;
-  debug_level_ = debug_level;
+// Lazily loads from the the given filename. Won't actually read the file
+// until it needs it.
+void TessdataManager::LoadFileLater(const char *data_file_name) {
+  Clear();
  data_file_name_ = data_file_name;
-  data_file_ = fopen(data_file_name, "rb");
-  if (data_file_ == NULL) {
-    tprintf("Error opening data file %s\n", data_file_name);
-    tprintf("Please make sure the TESSDATA_PREFIX environment variable is set "
-            "to the parent directory of your \"tessdata\" directory.\n");
+}
+
+bool TessdataManager::Init(const char *data_file_name) {
+  GenericVector<char> data;
+  bool result = true;
+  if (reader_ == nullptr) {
+    if (!LoadDataFromFile(data_file_name, &data)) return false;
+  } else {
+    if (!(*reader_)(data_file_name, &data)) return false;
+  }
+  return LoadMemBuffer(data_file_name, &data[0], data.size());
+}
+
+// Loads from the given memory buffer as if a file.
+bool TessdataManager::LoadMemBuffer(const char *name, const char *data,
+                                    int size) {
+  data_file_name_ = name;
+  TFile fp;
+  fp.Open(data, size);
+  inT32 num_entries = TESSDATA_NUM_ENTRIES;
+  if (fp.FRead(&num_entries, sizeof(num_entries), 1) != 1) return false;
+  swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0;
+  if (swap_) ReverseN(&num_entries, sizeof(num_entries));
+  GenericVector<inT64> offset_table;
+  offset_table.init_to_size(num_entries, -1);
+  if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries,
+                     swap_) != num_entries)
    return false;
-  }
-  fread(&actual_tessdata_num_entries_, sizeof(inT32), 1, data_file_);
-  swap_ = (actual_tessdata_num_entries_ > kMaxNumTessdataEntries);
-  if (swap_) {
-    ReverseN(&actual_tessdata_num_entries_,
-             sizeof(actual_tessdata_num_entries_));
-  }
-  if (actual_tessdata_num_entries_ > TESSDATA_NUM_ENTRIES) {
-    // For forward compatibility, truncate to the number we can handle.
-    actual_tessdata_num_entries_ = TESSDATA_NUM_ENTRIES;
-  }
-  fread(offset_table_, sizeof(inT64),
-        actual_tessdata_num_entries_, data_file_);
-  if (swap_) {
-    for (i = 0 ; i < actual_tessdata_num_entries_; ++i) {
-      ReverseN(&offset_table_[i], sizeof(offset_table_[i]));
-    }
-  }
-  if (debug_level_) {
-    tprintf("TessdataManager loaded %d types of tesseract data files.\n",
-            actual_tessdata_num_entries_);
-    for (i = 0; i < actual_tessdata_num_entries_; ++i) {
-      tprintf("Offset for type %d is %lld\n", i, offset_table_[i]);
+  for (int i = 0; i < num_entries && i < TESSDATA_NUM_ENTRIES; ++i) {
+    if (offset_table[i] >= 0) {
+      inT64 entry_size = size - offset_table[i];
+      int j = i + 1;
+      while (j < num_entries && offset_table[j] == -1) ++j;
+      if (j < num_entries) entry_size = offset_table[j] - offset_table[i];
+      entries_[i].init_to_size(entry_size, 0);
+      if (fp.FRead(&entries_[i][0], 1, entry_size) != entry_size) return false;
    }
  }
+  is_loaded_ = true;
  return true;
 }

-void TessdataManager::CopyFile(FILE *input_file, FILE *output_file,
-                               bool newline_end, inT64 num_bytes_to_copy) {
-  if (num_bytes_to_copy == 0) return;
-  int buffer_size = 1024;
-  if (num_bytes_to_copy > 0 && buffer_size > num_bytes_to_copy) {
-    buffer_size = num_bytes_to_copy;
-  }
-  inT64 num_bytes_copied = 0;
-  char *chunk = new char[buffer_size];
-  int bytes_read;
-  char last_char = 0x0;
-  while ((bytes_read = fread(chunk, sizeof(char),
-                             buffer_size, input_file))) {
-    fwrite(chunk, sizeof(char), bytes_read, output_file);
-    last_char = chunk[bytes_read-1];
-    if (num_bytes_to_copy > 0) {
-      num_bytes_copied += bytes_read;
-      if (num_bytes_copied == num_bytes_to_copy) break;
-      if (num_bytes_copied + buffer_size > num_bytes_to_copy) {
-        buffer_size = num_bytes_to_copy - num_bytes_copied;
-      }
-    }
-  }
-  if (newline_end) ASSERT_HOST(last_char == '\n');
-  delete[] chunk;
+// Overwrites a single entry of the given type.
+void TessdataManager::OverwriteEntry(TessdataType type, const char *data,
+                                     int size) {
+  is_loaded_ = true;
+  entries_[type].init_to_size(size, 0);
+  memcpy(&entries_[type][0], data, size);
 }

-bool TessdataManager::WriteMetadata(inT64 *offset_table,
-                                    const char * language_data_path_prefix,
-                                    FILE *output_file) {
-  inT32 num_entries = TESSDATA_NUM_ENTRIES;
-  bool result = true;
-  if (fseek(output_file, 0, SEEK_SET) != 0 ||
-      fwrite(&num_entries, sizeof(inT32), 1, output_file) != 1 ||
-      fwrite(offset_table, sizeof(inT64), TESSDATA_NUM_ENTRIES,
-             output_file) != TESSDATA_NUM_ENTRIES) {
-    fclose(output_file);
-    result = false;
-    tprintf("WriteMetadata failed in TessdataManager!\n");
-  } else if (fclose(output_file)) {
-    result = false;
-    tprintf("WriteMetadata failed to close file!\n");
-  } else {
-    tprintf("TessdataManager combined tesseract data files.\n");
-    for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
-      tprintf("Offset for type %2d (%s%-22s) is %lld\n", i,
-              language_data_path_prefix, kTessdataFileSuffixes[i],
-              offset_table[i]);
+// Saves to the given filename.
+bool TessdataManager::SaveFile(const STRING &filename,
+                               FileWriter writer) const {
+  ASSERT_HOST(is_loaded_);
+  GenericVector<char> data;
+  Serialize(&data);
+  if (writer == nullptr)
+    return SaveDataToFile(data, filename);
+  else
+    return (*writer)(data, filename);
+}
+
+// Serializes to the given vector.
+void TessdataManager::Serialize(GenericVector<char> *data) const {
+  ASSERT_HOST(is_loaded_);
+  // Compute the offset_table and total size.
+  inT64 offset_table[TESSDATA_NUM_ENTRIES];
+  inT64 offset = sizeof(inT32) + sizeof(offset_table);
+  for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
+    if (entries_[i].empty()) {
+      offset_table[i] = -1;
+    } else {
+      offset_table[i] = offset;
+      offset += entries_[i].size();
    }
  }
-  return result;
+  data->init_to_size(offset, 0);
+  inT32 num_entries = TESSDATA_NUM_ENTRIES;
+  TFile fp;
+  fp.OpenWrite(data);
+  fp.FWrite(&num_entries, sizeof(num_entries), 1);
+  fp.FWrite(offset_table, sizeof(offset_table), 1);
+  for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
+    if (!entries_[i].empty()) {
+      fp.FWrite(&entries_[i][0], entries_[i].size(), 1);
+    }
+  }
+}
+
+// Resets to the initial state, keeping the reader.
+void TessdataManager::Clear() {
+  for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
+    entries_[i].clear();
+  }
+  is_loaded_ = false;
+}
+
+// Prints a directory of contents.
+void TessdataManager::Directory() const {
+  int offset = TESSDATA_NUM_ENTRIES * sizeof(inT64);
+  for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
+    if (!entries_[i].empty()) {
+      tprintf("%d:%s:size=%d, offset=%d\n", i, kTessdataFileSuffixes[i],
+              entries_[i].size(), offset);
+      offset += entries_[i].size();
+    }
+  }
+}
+
+// Opens the given TFile pointer to the given component type.
+// Returns false in case of failure.
+bool TessdataManager::GetComponent(TessdataType type, TFile *fp) {
+  if (!is_loaded_ && !Init(data_file_name_.string())) return false;
+  if (entries_[type].empty()) return false;
+  fp->Open(&entries_[type][0], entries_[type].size());
+  return true;
 }

 bool TessdataManager::CombineDataFiles(
    const char *language_data_path_prefix,
    const char *output_filename) {
-  int i;
-  inT64 offset_table[TESSDATA_NUM_ENTRIES];
-  for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) offset_table[i] = -1;
-  FILE *output_file = fopen(output_filename, "wb");
-  if (output_file == NULL) {
-    tprintf("Error opening %s for writing\n", output_filename);
-    return false;
-  }
-  // Leave some space for recording the offset_table.
-  if (fseek(output_file,
-            sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET)) {
-    tprintf("Error seeking %s\n", output_filename);
-    fclose(output_file);
-    return false;
-  }
-
-  TessdataType type = TESSDATA_NUM_ENTRIES;
-  bool text_file = false;
-  FILE *file_ptr[TESSDATA_NUM_ENTRIES];
-
  // Load individual tessdata components from files.
-  for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
-    ASSERT_HOST(TessdataTypeFromFileSuffix(
-        kTessdataFileSuffixes[i], &type, &text_file));
+  for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
+    TessdataType type;
+    ASSERT_HOST(TessdataTypeFromFileSuffix(kTessdataFileSuffixes[i], &type));
    STRING filename = language_data_path_prefix;
    filename += kTessdataFileSuffixes[i];
-    file_ptr[i] =  fopen(filename.string(), "rb");
-    if (file_ptr[i] != NULL) {
-      offset_table[type] = ftell(output_file);
-      CopyFile(file_ptr[i], output_file, text_file, -1);
-      fclose(file_ptr[i]);
+    FILE *fp = fopen(filename.string(), "rb");
+    if (fp != nullptr) {
+      fclose(fp);
+      if (!LoadDataFromFile(filename, &entries_[type])) {
+        tprintf("Load of file %s failed!\n", filename.string());
+        return false;
+      }
    }
  }
+  is_loaded_ = true;

  // Make sure that the required components are present.
-  if (!IncludesBaseComponents(offset_table) &&
-      !IncludesLSTMComponents(offset_table)) {
+  if (!IsBaseAvailable() && !IsLSTMAvailable()) {
    tprintf(
        "Error: traineddata file must contain at least (a unicharset file"
        "and inttemp) OR an lstm file.\n");
-    fclose(output_file);
    return false;
  }
-  return WriteMetadata(offset_table, language_data_path_prefix, output_file);
+  // Write updated data to the output traineddata file.
+  return SaveFile(output_filename, nullptr);
 }

 bool TessdataManager::OverwriteComponents(
    const char *new_traineddata_filename,
    char **component_filenames,
    int num_new_components) {
-  int i;
-  inT64 offset_table[TESSDATA_NUM_ENTRIES];
-  TessdataType type = TESSDATA_NUM_ENTRIES;
-  bool text_file = false;
-  FILE *file_ptr[TESSDATA_NUM_ENTRIES];
-  for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
-    offset_table[i] = -1;
-    file_ptr[i] = NULL;
-  }
-  FILE *output_file = fopen(new_traineddata_filename, "wb");
-  if (output_file == NULL) {
-    tprintf("Error opening %s for writing\n", new_traineddata_filename);
-    return false;
-  }
-
-  // Leave some space for recording the offset_table.
-  if (fseek(output_file,
-            sizeof(inT32) + sizeof(inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET)) {
-    fclose(output_file);
-    tprintf("Error seeking %s\n", new_traineddata_filename);
-    return false;
-  }
-
  // Open the files with the new components.
-  for (i = 0; i < num_new_components; ++i) {
-    if (TessdataTypeFromFileName(component_filenames[i], &type, &text_file))
-      file_ptr[type] = fopen(component_filenames[i], "rb");
-  }
-
-  // Write updated data to the output traineddata file.
-  for (i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
-    if (file_ptr[i] != NULL) {
-      // Get the data from the opened component file.
-      offset_table[i] = ftell(output_file);
-      CopyFile(file_ptr[i], output_file, kTessdataFileIsText[i], -1);
-      fclose(file_ptr[i]);
-    } else {
-      // Get this data component from the loaded data file.
-      if (SeekToStart(static_cast<TessdataType>(i))) {
-        offset_table[i] = ftell(output_file);
-        CopyFile(data_file_, output_file, kTessdataFileIsText[i],
-                 GetEndOffset(static_cast<TessdataType>(i)) -
-                 ftell(data_file_) + 1);
+  for (int i = 0; i < num_new_components; ++i) {
+    TessdataType type;
+    if (TessdataTypeFromFileName(component_filenames[i], &type)) {
+      if (!LoadDataFromFile(component_filenames[i], &entries_[type])) {
+        tprintf("Failed to read component file:%s\n", component_filenames[i]);
+        return false;
      }
    }
  }
-  const char *language_data_path_prefix = strchr(new_traineddata_filename, '.');
-  return WriteMetadata(offset_table, language_data_path_prefix, output_file);
+
+  // Write updated data to the output traineddata file.
+  return SaveFile(new_traineddata_filename, nullptr);
 }

-bool TessdataManager::TessdataTypeFromFileSuffix(
-    const char *suffix, TessdataType *type, bool *text_file) {
+bool TessdataManager::ExtractToFile(const char *filename) {
+  TessdataType type = TESSDATA_NUM_ENTRIES;
+  ASSERT_HOST(
+      tesseract::TessdataManager::TessdataTypeFromFileName(filename, &type));
+  if (entries_[type].empty()) return false;
+  return SaveDataToFile(entries_[type], filename);
+}
+
+bool TessdataManager::TessdataTypeFromFileSuffix(const char *suffix,
+                                                 TessdataType *type) {
  for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
    if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
      *type = static_cast<TessdataType>(i);
-      *text_file = kTessdataFileIsText[i];
      return true;
    }
  }
@ -241,46 +227,12 @@ bool TessdataManager::TessdataTypeFromFileSuffix(
  return false;
 }

-bool TessdataManager::TessdataTypeFromFileName(
-    const char *filename, TessdataType *type, bool *text_file) {
+bool TessdataManager::TessdataTypeFromFileName(const char *filename,
+                                               TessdataType *type) {
  // Get the file suffix (extension)
  const char *suffix = strrchr(filename, '.');
-  if (suffix == NULL || *(++suffix) == '\0') return false;
-  return TessdataTypeFromFileSuffix(suffix, type, text_file);
-}
-
-// Returns true if the base Tesseract components are present.
-/* static */
-bool TessdataManager::IncludesBaseComponents(const inT64 *offset_table) {
-  return offset_table[TESSDATA_UNICHARSET] >= 0 &&
-         offset_table[TESSDATA_INTTEMP] >= 0;
-}
-
-// Returns true if the LSTM components are present.
-/* static */
-bool TessdataManager::IncludesLSTMComponents(const inT64 *offset_table) {
-  return offset_table[TESSDATA_LSTM] >= 0;
-}
-
-bool TessdataManager::ExtractToFile(const char *filename) {
-  TessdataType type = TESSDATA_NUM_ENTRIES;
-  bool text_file = false;
-  ASSERT_HOST(tesseract::TessdataManager::TessdataTypeFromFileName(
-      filename, &type, &text_file));
-  if (!SeekToStart(type)) return false;
-
-  FILE *output_file = fopen(filename, "wb");
-  if (output_file == NULL) {
-    tprintf("Error opening %s\n", filename);
-    exit(1);
-  }
-  inT64 begin_offset = ftell(GetDataFilePtr());
-  inT64 end_offset = GetEndOffset(type);
-  tesseract::TessdataManager::CopyFile(
-      GetDataFilePtr(), output_file, text_file,
-      end_offset - begin_offset + 1);
-  fclose(output_file);
-  return true;
+  if (suffix == nullptr || *(++suffix) == '\0') return false;
+  return TessdataTypeFromFileSuffix(suffix, type);
 }

 }  // namespace tesseract
--- a/ccutil/tessdatamanager.h
+++ b/ccutil/tessdatamanager.h
@ -108,34 +108,6 @@ static const char *const kTessdataFileSuffixes[] = {
    kLSTMNumberDawgFileSuffix,    // 20
 };

-/**
- * If kTessdataFileIsText[i] is true - the tessdata component
- * of type i (from TessdataType enum) is text, and is binary otherwise.
- */
-static const bool kTessdataFileIsText[] = {
-    true,   // 0
-    true,   // 1
-    true,   // 2
-    false,  // 3
-    true,   // 4
-    true,   // 5
-    false,  // 6
-    false,  // 7
-    false,  // 8
-    false,  // 9
-    false,  // 10  // deprecated
-    true,   // 11  // deprecated
-    false,  // 12  // deprecated
-    false,  // 13
-    false,  // 14
-    false,  // 15
-    true,   // 16
-    false,  // 17
-    false,  // 18
-    false,  // 19
-    false,  // 20
-};
-
 /**
 * TessdataType could be updated to contain more entries, however
 * we do not expect that number to be astronomically high.
@ -148,93 +120,61 @@ static const int kMaxNumTessdataEntries = 1000;

 class TessdataManager {
 public:
-  TessdataManager() {
-    data_file_ = NULL;
-    actual_tessdata_num_entries_ = 0;
-    for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
-      offset_table_[i] = -1;
-    }
-  }
+  TessdataManager() : reader_(nullptr), is_loaded_(false), swap_(false) {}
+  explicit TessdataManager(FileReader reader)
+      : reader_(reader), is_loaded_(false), swap_(false) {}
  ~TessdataManager() {}
-  int DebugLevel() { return debug_level_; }

+  bool swap() const { return swap_; }
+  bool is_loaded() const { return is_loaded_; }
+
+  // Lazily loads from the the given filename. Won't actually read the file
+  // until it needs it.
+  void LoadFileLater(const char *data_file_name);
  /**
-   * Opens the given data file and reads the offset table.
+   * Opens and reads the given data file right now.
   * @return true on success.
   */
-  bool Init(const char *data_file_name, int debug_level);
+  bool Init(const char *data_file_name);
+  // Loads from the given memory buffer as if a file, remembering name as some
+  // arbitrary source id for caching.
+  bool LoadMemBuffer(const char *name, const char *data, int size);
+  // Overwrites a single entry of the given type.
+  void OverwriteEntry(TessdataType type, const char *data, int size);
+
+  // Saves to the given filename.
+  bool SaveFile(const STRING &filename, FileWriter writer) const;
+  // Serializes to the given vector.
+  void Serialize(GenericVector<char> *data) const;
+  // Resets to the initial state, keeping the reader.
+  void Clear();
+
+  // Prints a directory of contents.
+  void Directory() const;
+
+  // Opens the given TFile pointer to the given component type.
+  // Returns false in case of failure.
+  bool GetComponent(TessdataType type, TFile *fp);

  // Returns true if the base Tesseract components are present.
-  bool IsBaseAvailable() const { return IncludesBaseComponents(offset_table_); }
+  bool IsBaseAvailable() const {
+    return !entries_[TESSDATA_UNICHARSET].empty() &&
+           !entries_[TESSDATA_INTTEMP].empty();
+  }

  // Returns true if the LSTM components are present.
-  bool IsLSTMAvailable() const { return IncludesLSTMComponents(offset_table_); }
+  bool IsLSTMAvailable() const { return !entries_[TESSDATA_LSTM].empty(); }

  // Return the name of the underlying data file.
  const STRING &GetDataFileName() const { return data_file_name_; }

-  /** Returns data file pointer. */
-  inline FILE *GetDataFilePtr() const { return data_file_; }
-
-  /**
-   * Returns false if there is no data of the given type.
-   * Otherwise does a seek on the data_file_ to position the pointer
-   * at the start of the data of the given type.
-   */
-  inline bool SeekToStart(TessdataType tessdata_type) {
-    if (debug_level_) {
-      tprintf("TessdataManager: seek to offset %lld - start of tessdata"
-              "type %d (%s))\n", offset_table_[tessdata_type],
-              tessdata_type, kTessdataFileSuffixes[tessdata_type]);
-    }
-    if (offset_table_[tessdata_type] < 0) {
-      return false;
-    } else {
-      ASSERT_HOST(fseek(data_file_,
-                        static_cast<size_t>(offset_table_[tessdata_type]),
-                        SEEK_SET) == 0);
-      return true;
-    }
-  }
-  /** Returns the end offset for the given tesseract data file type. */
-  inline inT64 GetEndOffset(TessdataType tessdata_type) const {
-    int index = tessdata_type + 1;
-    while (index < actual_tessdata_num_entries_ && offset_table_[index] == -1) {
-      ++index;  // skip tessdata types not present in the combined file
-    }
-    if (debug_level_) {
-      tprintf("TessdataManager: end offset for type %d is %lld\n",
-              tessdata_type,
-              (index == actual_tessdata_num_entries_) ? -1
-              : offset_table_[index]);
-    }
-    return (index == actual_tessdata_num_entries_) ? -1 : offset_table_[index] - 1;
-  }
-  /** Closes data_file_ (if it was opened by Init()). */
-  inline void End() {
-    if (data_file_ != NULL) {
-      fclose(data_file_);
-      data_file_ = NULL;
-    }
-  }
-  bool swap() const {
-    return swap_;
-  }
-
-  /** Writes the number of entries and the given offset table to output_file.
-   * Returns false on error.
-   */
-  static bool WriteMetadata(inT64 *offset_table,
-                            const char *language_data_path_prefix,
-                            FILE *output_file);
-
  /**
   * Reads all the standard tesseract config and data files for a language
   * at the given path and bundles them up into one binary data file.
   * Returns true if the combined traineddata file was successfully written.
   */
-  static bool CombineDataFiles(const char *language_data_path_prefix,
-                               const char *output_filename);
+  bool CombineDataFiles(const char *language_data_path_prefix,
+                        const char *output_filename);

  /**
   * Gets the individual components from the data_file_ with which the class was
@ -257,69 +197,35 @@ class TessdataManager {
   */
  bool ExtractToFile(const char *filename);

-  /**
-   * Copies data from the given input file to the output_file provided.
-   * If num_bytes_to_copy is >= 0, only num_bytes_to_copy is copied from
-   * the input file, otherwise all the data in the input file is copied.
-   */
-  static void CopyFile(FILE *input_file, FILE *output_file,
-                       bool newline_end, inT64 num_bytes_to_copy);
-
  /**
   * Fills type with TessdataType of the tessdata component represented by the
   * given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET.
-   * Sets *text_file to true if the component is in text format (e.g.
-   * unicharset, unichar ambigs, config, etc).
   * @return true if the tessdata component type could be determined
   * from the given file name.
   */
  static bool TessdataTypeFromFileSuffix(const char *suffix,
-                                         TessdataType *type,
-                                         bool *text_file);
+                                         TessdataType *type);

  /**
   * Tries to determine tessdata component file suffix from filename,
   * returns true on success.
   */
  static bool TessdataTypeFromFileName(const char *filename,
-                                       TessdataType *type,
-                                       bool *text_file);
+                                       TessdataType *type);

 private:
-  // Returns true if the base Tesseract components are present.
-  static bool IncludesBaseComponents(const inT64 *offset_table);
-  // Returns true if the LSTM components are present.
-  static bool IncludesLSTMComponents(const inT64 *offset_table);
-
-  /**
-   * Opens the file whose name is a concatenation of language_data_path_prefix
-   * and file_suffix. Returns a file pointer to the opened file.
-   */
-  static FILE *GetFilePtr(const char *language_data_path_prefix,
-                          const char *file_suffix, bool text_file);
-
-  /**
-   * Each offset_table_[i] contains a file offset in the combined data file
-   * where the data of TessdataFileType i is stored.
-   */
-  inT64 offset_table_[TESSDATA_NUM_ENTRIES];
-  /**
-   * Actual number of entries in the tessdata table. This value can only be
-   * same or smaller than TESSDATA_NUM_ENTRIES, but can never be larger,
-   * since then it would be impossible to interpret the type of tessdata at
-   * indices same and higher than TESSDATA_NUM_ENTRIES.
-   * This parameter is used to allow for backward compatibility
-   * when new tessdata types are introduced.
-   */
-  inT32 actual_tessdata_num_entries_;
-  STRING data_file_name_;  // name of the data file.
-  FILE *data_file_;  ///< pointer to the data file.
-  int debug_level_;
+  // Name of file it came from.
+  STRING data_file_name_;
+  // Function to load the file when we need it.
+  FileReader reader_;
+  // True if the file has been loaded.
+  bool is_loaded_;
  // True if the bytes need swapping.
  bool swap_;
+  // Contents of each element of the traineddata file.
+  GenericVector<char> entries_[TESSDATA_NUM_ENTRIES];
 };

-
 }  // namespace tesseract

 #endif  // TESSERACT_CCUTIL_TESSDATAMANAGER_H_
--- a/ccutil/unicity_table.h
+++ b/ccutil/unicity_table.h
@ -87,7 +87,9 @@ class UnicityTable {
  /// Returns false on read/write error.
  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
  /// swap is used to switch the endianness.
-  bool read(FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap);
+  bool read(tesseract::TFile* f,
+            TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb,
+            bool swap);

 private:
  GenericVector<T> table_;
@ -194,7 +196,8 @@ bool UnicityTable<T>::write(

 template <typename T>
 bool UnicityTable<T>::read(
-    FILE* f, TessResultCallback3<bool, FILE*, T*, bool>* cb, bool swap) {
+    tesseract::TFile* f,
+    TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb, bool swap) {
  return table_.read(f, cb, swap);
 }

--- a/classify/adaptive.cpp
+++ b/classify/adaptive.cpp
@ -30,6 +30,8 @@
 #endif
 #include <stdio.h>

+using tesseract::TFile;
+
 /*----------------------------------------------------------------------------
              Public Code
 ----------------------------------------------------------------------------*/
@ -310,7 +312,7 @@ void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) {
 * @note Exceptions: none
 * @note History: Tue Mar 19 14:11:01 1991, DSJ, Created.
 */
-ADAPT_CLASS ReadAdaptedClass(FILE *File) {
+ADAPT_CLASS ReadAdaptedClass(TFile *fp) {
  int NumTempProtos;
  int NumConfigs;
  int i;
@ -319,34 +321,34 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) {

  /* first read high level adapted class structure */
  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
-  fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
+  fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);

  /* then read in the definitions of the permanent protos and configs */
  Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
  Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
-  fread ((char *) Class->PermProtos, sizeof (uinT32),
-    WordsInVectorOfSize (MAX_NUM_PROTOS), File);
-  fread ((char *) Class->PermConfigs, sizeof (uinT32),
-    WordsInVectorOfSize (MAX_NUM_CONFIGS), File);
+  fp->FRead(Class->PermProtos, sizeof(uinT32),
+            WordsInVectorOfSize(MAX_NUM_PROTOS));
+  fp->FRead(Class->PermConfigs, sizeof(uinT32),
+            WordsInVectorOfSize(MAX_NUM_CONFIGS));

  /* then read in the list of temporary protos */
-  fread ((char *) &NumTempProtos, sizeof (int), 1, File);
+  fp->FRead(&NumTempProtos, sizeof(int), 1);
  Class->TempProtos = NIL_LIST;
  for (i = 0; i < NumTempProtos; i++) {
    TempProto =
      (TEMP_PROTO) alloc_struct (sizeof (TEMP_PROTO_STRUCT),
      "TEMP_PROTO_STRUCT");
-    fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
+    fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
    Class->TempProtos = push_last (Class->TempProtos, TempProto);
  }

  /* then read in the adapted configs */
-  fread ((char *) &NumConfigs, sizeof (int), 1, File);
+  fp->FRead(&NumConfigs, sizeof(int), 1);
  for (i = 0; i < NumConfigs; i++)
    if (test_bit (Class->PermConfigs, i))
-      Class->Config[i].Perm = ReadPermConfig (File);
+      Class->Config[i].Perm = ReadPermConfig(fp);
    else
-      Class->Config[i].Temp = ReadTempConfig (File);
+      Class->Config[i].Temp = ReadTempConfig(fp);

  return (Class);

@ -366,20 +368,20 @@ namespace tesseract {
 * @note Exceptions: none
 * @note History: Mon Mar 18 15:18:10 1991, DSJ, Created.
 */
-ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
+ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) {
  int i;
  ADAPT_TEMPLATES Templates;

  /* first read the high level adaptive template struct */
  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
-  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
+  fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);

  /* then read in the basic integer templates */
-  Templates->Templates = ReadIntTemplates (File);
+  Templates->Templates = ReadIntTemplates(false, fp);

  /* then read in the adaptive info for each class */
  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
-    Templates->Class[i] = ReadAdaptedClass (File);
+    Templates->Class[i] = ReadAdaptedClass(fp);
  }
  return (Templates);

@ -399,15 +401,15 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(FILE *File) {
 * @note Exceptions: none
 * @note History: Tue Mar 19 14:25:26 1991, DSJ, Created.
 */
-PERM_CONFIG ReadPermConfig(FILE *File) {
+PERM_CONFIG ReadPermConfig(TFile *fp) {
  PERM_CONFIG Config = (PERM_CONFIG) alloc_struct(sizeof(PERM_CONFIG_STRUCT),
                                                  "PERM_CONFIG_STRUCT");
  uinT8 NumAmbigs;
-  fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
+  fp->FRead(&NumAmbigs, sizeof(uinT8), 1);
  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
-  fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
+  fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
  Config->Ambigs[NumAmbigs] = -1;
-  fread(&(Config->FontinfoId), sizeof(int), 1, File);
+  fp->FRead(&(Config->FontinfoId), sizeof(int), 1);

  return (Config);

@ -426,17 +428,16 @@ PERM_CONFIG ReadPermConfig(FILE *File) {
 * @note Exceptions: none
 * @note History: Tue Mar 19 14:29:59 1991, DSJ, Created.
 */
-TEMP_CONFIG ReadTempConfig(FILE *File) {
+TEMP_CONFIG ReadTempConfig(TFile *fp) {
  TEMP_CONFIG Config;

  Config =
    (TEMP_CONFIG) alloc_struct (sizeof (TEMP_CONFIG_STRUCT),
    "TEMP_CONFIG_STRUCT");
-  fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
+  fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);

  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
-  fread ((char *) Config->Protos, sizeof (uinT32),
-    Config->ProtoVectorSize, File);
+  fp->FRead(Config->Protos, sizeof(uinT32), Config->ProtoVectorSize);

  return (Config);

--- a/classify/adaptive.h
+++ b/classify/adaptive.h
@ -126,11 +126,11 @@ TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId);

 TEMP_PROTO NewTempProto();

-ADAPT_CLASS ReadAdaptedClass(FILE *File);
+ADAPT_CLASS ReadAdaptedClass(tesseract::TFile *File);

-PERM_CONFIG ReadPermConfig(FILE *File);
+PERM_CONFIG ReadPermConfig(tesseract::TFile *File);

-TEMP_CONFIG ReadTempConfig(FILE *File);
+TEMP_CONFIG ReadTempConfig(tesseract::TFile *File);

 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs);

--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@ -524,7 +524,7 @@ void Classify::EndAdaptiveClassifier() {
 *                            enables use of pre-adapted templates
 *  @note History: Mon Mar 11 12:49:34 1991, DSJ, Created.
 */
-void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
+void Classify::InitAdaptiveClassifier(TessdataManager* mgr) {
  if (!classify_enable_adaptive_matcher)
    return;
  if (AllProtosOn != NULL)
@ -532,37 +532,25 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {

  // If there is no language_data_path_prefix, the classifier will be
  // adaptive only.
-  if (language_data_path_prefix.length() > 0 &&
-      load_pre_trained_templates) {
-    ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_INTTEMP));
-    PreTrainedTemplates =
-      ReadIntTemplates(tessdata_manager.GetDataFilePtr());
-    if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n");
+  if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
+    TFile fp;
+    ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
+    PreTrainedTemplates = ReadIntTemplates(mgr->swap(), &fp);

-    if (tessdata_manager.SeekToStart(TESSDATA_SHAPE_TABLE)) {
+    if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
      shape_table_ = new ShapeTable(unicharset);
-      if (!shape_table_->DeSerialize(tessdata_manager.swap(),
-                                     tessdata_manager.GetDataFilePtr())) {
+      if (!shape_table_->DeSerialize(mgr->swap(), &fp)) {
        tprintf("Error loading shape table!\n");
        delete shape_table_;
        shape_table_ = NULL;
-      } else if (tessdata_manager.DebugLevel() > 0) {
-        tprintf("Successfully loaded shape table!\n");
      }
    }

-    ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_PFFMTABLE));
-    ReadNewCutoffs(tessdata_manager.GetDataFilePtr(),
-                   tessdata_manager.swap(),
-                   tessdata_manager.GetEndOffset(TESSDATA_PFFMTABLE),
-                   CharNormCutoffs);
-    if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n");
+    ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
+    ReadNewCutoffs(&fp, mgr->swap(), CharNormCutoffs);

-    ASSERT_HOST(tessdata_manager.SeekToStart(TESSDATA_NORMPROTO));
-    NormProtos =
-      ReadNormProtos(tessdata_manager.GetDataFilePtr(),
-                     tessdata_manager.GetEndOffset(TESSDATA_NORMPROTO));
-    if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n");
+    ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
+    NormProtos = ReadNormProtos(&fp);
    static_classifier_ = new TessClassifier(false, this);
  }

@ -582,21 +570,19 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
  }

  if (classify_use_pre_adapted_templates) {
-    FILE *File;
+    TFile fp;
    STRING Filename;

    Filename = imagefile;
    Filename += ADAPT_TEMPLATE_SUFFIX;
-    File = fopen(Filename.string(), "rb");
-    if (File == NULL) {
+    if (!fp.Open(Filename.string(), nullptr)) {
      AdaptedTemplates = NewAdaptedTemplates(true);
    } else {
      cprintf("\nReading pre-adapted templates from %s ...\n",
              Filename.string());
      fflush(stdout);
-      AdaptedTemplates = ReadAdaptedTemplates(File);
+      AdaptedTemplates = ReadAdaptedTemplates(&fp);
      cprintf("\n");
-      fclose(File);
      PrintAdaptedTemplates(stdout, AdaptedTemplates);

      for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
--- a/classify/classify.h
+++ b/classify/classify.h
@ -103,16 +103,15 @@ class Classify : public CCStruct {
                   const uinT8* normalization_factors,
                   const uinT16* expected_num_features,
                   GenericVector<CP_RESULT_STRUCT>* results);
-  void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
-                      CLASS_CUTOFF_ARRAY Cutoffs);
+  void ReadNewCutoffs(TFile* fp, bool swap, CLASS_CUTOFF_ARRAY Cutoffs);
  void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
  void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
-  ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File);
+  ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File);
  /* normmatch.cpp ************************************************************/
  FLOAT32 ComputeNormMatch(CLASS_ID ClassId,
                           const FEATURE_STRUCT& feature, BOOL8 DebugMatch);
  void FreeNormProtos();
-  NORM_PROTOS *ReadNormProtos(FILE *File, inT64 end_offset);
+  NORM_PROTOS* ReadNormProtos(TFile* fp);
  /* protos.cpp ***************************************************************/
  void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
  INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos,
@ -138,7 +137,7 @@ class Classify : public CCStruct {
  void LearnPieces(const char* fontname, int start, int length, float threshold,
                   CharSegmentationType segmentation, const char* correct_text,
                   WERD_RES* word);
-  void InitAdaptiveClassifier(bool load_pre_trained_templates);
+  void InitAdaptiveClassifier(TessdataManager* mgr);
  void InitAdaptedClass(TBLOB *Blob,
                        CLASS_ID ClassId,
                        int FontinfoId,
@ -335,7 +334,7 @@ class Classify : public CCStruct {
                               uinT8* char_norm_array);
  void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
  /* intproto.cpp *************************************************************/
-  INT_TEMPLATES ReadIntTemplates(FILE *File);
+  INT_TEMPLATES ReadIntTemplates(bool swap, TFile* fp);
  void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
                         const UNICHARSET& target_unicharset);
  CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,
--- a/classify/clusttool.cpp
+++ b/classify/clusttool.cpp
@ -25,8 +25,11 @@
 #include <stdio.h>
 #include <math.h>

+using tesseract::TFile;
+
 //---------------Global Data Definitions and Declarations--------------------
 #define TOKENSIZE 80         //< max size of tokens read from an input file
+#define QUOTED_TOKENSIZE "79"
 #define MAXSAMPLESIZE 65535  //< max num of dimensions in feature space
 //#define MAXBLOCKSIZE  65535   //< max num of samples in a character (block
 // size)
@ -41,11 +44,14 @@
 * @note Exceptions: ILLEGALSAMPLESIZE  illegal format or range
 * @note History: 6/6/89, DSJ, Created.
 */
-uinT16 ReadSampleSize(FILE *File) {
-  int SampleSize;
+uinT16 ReadSampleSize(TFile *fp) {
+  int SampleSize = 0;

-  if ((tfscanf(File, "%d", &SampleSize) != 1) ||
-    (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  if (fp->FGets(line, kMaxLineSize) == nullptr ||
+      sscanf(line, "%d", &SampleSize) != 1 || (SampleSize < 0) ||
+      (SampleSize > MAXSAMPLESIZE))
    DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
  return (SampleSize);
 }
@ -64,30 +70,28 @@ uinT16 ReadSampleSize(FILE *File) {
 * @note Globals: None
 * @note History: 6/6/89, DSJ, Created.
 */
-PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
-  int i;
+PARAM_DESC *ReadParamDesc(TFile *fp, uinT16 N) {
  PARAM_DESC *ParamDesc;
-  char Token[TOKENSIZE];
+  char linear_token[TOKENSIZE], essential_token[TOKENSIZE];

  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
-  for (i = 0; i < N; i++) {
-    if (tfscanf(File, "%s", Token) != 1)
-      DoError (ILLEGALCIRCULARSPEC,
-        "Illegal circular/linear specification");
-    if (Token[0] == 'c')
+  for (int i = 0; i < N; i++) {
+    const int kMaxLineSize = TOKENSIZE * 4;
+    char line[kMaxLineSize];
+    if (fp->FGets(line, kMaxLineSize) == nullptr ||
+        sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %f %f",
+               linear_token, essential_token, &ParamDesc[i].Min,
+               &ParamDesc[i].Max) != 4)
+      DoError(ILLEGALCIRCULARSPEC, "Illegal Parameter specification");
+    if (linear_token[0] == 'c')
      ParamDesc[i].Circular = TRUE;
    else
      ParamDesc[i].Circular = FALSE;

-    if (tfscanf(File, "%s", Token) != 1)
-      DoError (ILLEGALESSENTIALSPEC,
-        "Illegal essential/non-essential spec");
-    if (Token[0] == 'e')
+    if (linear_token[0] == 'e')
      ParamDesc[i].NonEssential = FALSE;
    else
      ParamDesc[i].NonEssential = TRUE;
-    if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2)
-      DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
    ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
    ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
    ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
@ -111,123 +115,68 @@ PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
 * @note Globals: None
 * @note History: 6/6/89, DSJ, Created.
 */
-PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
-  char Token[TOKENSIZE];
-  int Status;
+PROTOTYPE *ReadPrototype(TFile *fp, uinT16 N) {
+  char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
  PROTOTYPE *Proto;
  int SampleCount;
  int i;

-  if ((Status = tfscanf(File, "%s", Token)) == 1) {
-    Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
-    Proto->Cluster = NULL;
-    if (Token[0] == 's')
-      Proto->Significant = TRUE;
-    else
-      Proto->Significant = FALSE;
-
-    Proto->Style = ReadProtoStyle (File);
-
-    if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
-      DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
-    Proto->NumSamples = SampleCount;
-
-    Proto->Mean = ReadNFloats (File, N, NULL);
-    if (Proto->Mean == NULL)
-      DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
-
-    switch (Proto->Style) {
-      case spherical:
-        if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
-          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
-        Proto->Magnitude.Spherical =
-          1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
-        Proto->TotalMagnitude =
-          pow (Proto->Magnitude.Spherical, (float) N);
-        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
-        Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
-        Proto->Distrib = NULL;
-        break;
-      case elliptical:
-        Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
-        if (Proto->Variance.Elliptical == NULL)
-          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
-        Proto->Magnitude.Elliptical =
-          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
-        Proto->Weight.Elliptical =
-          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
-        Proto->TotalMagnitude = 1.0;
-        for (i = 0; i < N; i++) {
-          Proto->Magnitude.Elliptical[i] =
-            1.0 /
-            sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
-          Proto->Weight.Elliptical[i] =
-            1.0 / Proto->Variance.Elliptical[i];
-          Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
-        }
-        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
-        Proto->Distrib = NULL;
-        break;
-      case mixed:
-        Proto->Distrib =
-          (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
-        for (i = 0; i < N; i++) {
-          if (tfscanf(File, "%s", Token) != 1)
-            DoError (ILLEGALDISTRIBUTION,
-              "Illegal prototype distribution");
-          switch (Token[0]) {
-            case 'n':
-              Proto->Distrib[i] = normal;
-              break;
-            case 'u':
-              Proto->Distrib[i] = uniform;
-              break;
-            case 'r':
-              Proto->Distrib[i] = D_random;
-              break;
-            default:
-              DoError (ILLEGALDISTRIBUTION,
-                "Illegal prototype distribution");
-          }
-        }
-        Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
-        if (Proto->Variance.Elliptical == NULL)
-          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
-        Proto->Magnitude.Elliptical =
-          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
-        Proto->Weight.Elliptical =
-          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
-        Proto->TotalMagnitude = 1.0;
-        for (i = 0; i < N; i++) {
-          switch (Proto->Distrib[i]) {
-            case normal:
-              Proto->Magnitude.Elliptical[i] = 1.0 /
-                sqrt ((double)
-                (2.0 * PI * Proto->Variance.Elliptical[i]));
-              Proto->Weight.Elliptical[i] =
-                1.0 / Proto->Variance.Elliptical[i];
-              break;
-            case uniform:
-            case D_random:
-              Proto->Magnitude.Elliptical[i] = 1.0 /
-                (2.0 * Proto->Variance.Elliptical[i]);
-              break;
-            case DISTRIBUTION_COUNT:
-              ASSERT_HOST(!"Distribution count not allowed!");
-          }
-          Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
-        }
-        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
-        break;
-    }
-    return (Proto);
+  const int kMaxLineSize = TOKENSIZE * 4;
+  char line[kMaxLineSize];
+  if (fp->FGets(line, kMaxLineSize) == nullptr ||
+      sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
+             sig_token, shape_token, &SampleCount) != 3) {
+    tprintf("Invalid prototype: %s\n", line);
+    return nullptr;
  }
-  else if (Status == EOF)
-    return (NULL);
-  else {
-    DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
-    return (NULL);
+  Proto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
+  Proto->Cluster = NULL;
+  if (sig_token[0] == 's')
+    Proto->Significant = TRUE;
+  else
+    Proto->Significant = FALSE;
+
+  Proto->Style = ReadProtoStyle(shape_token);
+
+  if (SampleCount < 0) DoError(ILLEGALSAMPLECOUNT, "Illegal sample count");
+  Proto->NumSamples = SampleCount;
+
+  Proto->Mean = ReadNFloats(fp, N, NULL);
+  if (Proto->Mean == NULL) DoError(ILLEGALMEANSPEC, "Illegal prototype mean");
+
+  switch (Proto->Style) {
+    case spherical:
+      if (ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) == NULL)
+        DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance");
+      Proto->Magnitude.Spherical =
+          1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Spherical));
+      Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, (float)N);
+      Proto->LogMagnitude = log((double)Proto->TotalMagnitude);
+      Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
+      Proto->Distrib = NULL;
+      break;
+    case elliptical:
+      Proto->Variance.Elliptical = ReadNFloats(fp, N, NULL);
+      if (Proto->Variance.Elliptical == NULL)
+        DoError(ILLEGALVARIANCESPEC, "Illegal prototype variance");
+      Proto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
+      Proto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
+      Proto->TotalMagnitude = 1.0;
+      for (i = 0; i < N; i++) {
+        Proto->Magnitude.Elliptical[i] =
+            1.0 / sqrt((double)(2.0 * PI * Proto->Variance.Elliptical[i]));
+        Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
+        Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
+      }
+      Proto->LogMagnitude = log((double)Proto->TotalMagnitude);
+      Proto->Distrib = NULL;
+      break;
+    default:
+      Efree(Proto);
+      tprintf("Invalid prototype style\n");
+      return nullptr;
  }
+  return Proto;
 }

 /**
@ -239,30 +188,19 @@ PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
 * @note Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
 * @note History: 6/8/89, DSJ, Created.
 */
-PROTOSTYLE ReadProtoStyle(FILE *File) {
-  char Token[TOKENSIZE];
-  PROTOSTYLE Style;
-
-  if (tfscanf(File, "%s", Token) != 1)
-    DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
-  switch (Token[0]) {
+PROTOSTYLE ReadProtoStyle(const char *shape) {
+  switch (shape[0]) {
    case 's':
-      Style = spherical;
-      break;
+      return spherical;
    case 'e':
-      Style = elliptical;
-      break;
-    case 'm':
-      Style = mixed;
-      break;
+      return elliptical;
    case 'a':
-      Style = automatic;
-      break;
+      return automatic;
    default:
-      Style = elliptical;
-      DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
+      break;
  }
-  return (Style);
+  tprintf("Invalid prototype style specification:%s\n", shape);
+  return elliptical;
 }

 /**
@ -279,28 +217,30 @@ PROTOSTYLE ReadProtoStyle(FILE *File) {
 * @note Exceptions: ILLEGALFLOAT
 * @note History: 6/6/89, DSJ, Created.
 */
-FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) {
+FLOAT32 *ReadNFloats(TFile *fp, uinT16 N, FLOAT32 Buffer[]) {
+  const int kMaxLineSize = 1024;
+  char line[kMaxLineSize];
+  if (fp->FGets(line, kMaxLineSize) == nullptr) {
+    tprintf("Hit EOF in ReadNFloats!\n");
+    return nullptr;
+  }
  bool needs_free = false;
-  int i;
-  int NumFloatsRead;

  if (Buffer == NULL) {
    Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
    needs_free = true;
  }

-  for (i = 0; i < N; i++) {
-    NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
-    if (NumFloatsRead != 1) {
-      if ((NumFloatsRead == EOF) && (i == 0)) {
-        if (needs_free) {
-          Efree(Buffer);
-        }
-        return NULL;
-      } else {
-        DoError(ILLEGALFLOAT, "Illegal float specification");
-      }
+  char *startptr = line;
+  for (int i = 0; i < N; i++) {
+    char *endptr;
+    Buffer[i] = strtof(startptr, &endptr);
+    if (endptr == startptr) {
+      tprintf("Read of %d floats failed!\n", N);
+      if (needs_free) Efree(Buffer);
+      return nullptr;
    }
+    startptr = endptr;
  }
  return Buffer;
 }
--- a/classify/clusttool.h
+++ b/classify/clusttool.h
@ -20,22 +20,23 @@
 #define TESSERACT_CLASSIFY_CLUSTTOOL_H_

 //--------------------------Include Files---------------------------------------
-#include "host.h"
-#include "cluster.h"
 #include <stdio.h>
+#include "cluster.h"
+#include "host.h"
+#include "serialis.h"

 /*-------------------------------------------------------------------------
        Public Function Prototype
 --------------------------------------------------------------------------*/
-uinT16 ReadSampleSize(FILE *File);
+uinT16 ReadSampleSize(tesseract::TFile *fp);

-PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N);
+PARAM_DESC *ReadParamDesc(tesseract::TFile *fp, uinT16 N);

-PROTOTYPE *ReadPrototype(FILE *File, uinT16 N);
+PROTOTYPE *ReadPrototype(tesseract::TFile *fp, uinT16 N);

-PROTOSTYLE ReadProtoStyle(FILE *File);
+PROTOSTYLE ReadProtoStyle(const char *style);

-FLOAT32 *ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]);
+FLOAT32 *ReadNFloats(tesseract::TFile *fp, uinT16 N, FLOAT32 Buffer[]);

 void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]);

--- a/classify/cutoffs.cpp
+++ b/classify/cutoffs.cpp
@ -49,7 +49,7 @@ namespace tesseract {
 * @note Exceptions: none
 * @note History: Wed Feb 20 09:38:26 1991, DSJ, Created.
 */
-void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
+void Classify::ReadNewCutoffs(TFile* fp, bool swap,
                              CLASS_CUTOFF_ARRAY Cutoffs) {
  char Class[UNICHAR_LEN + 1];
  CLASS_ID ClassId;
@ -57,23 +57,24 @@ void Classify::ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset,
  int i;

  if (shape_table_ != NULL) {
-    if (!shapetable_cutoffs_.DeSerialize(swap, CutoffFile)) {
+    if (!shapetable_cutoffs_.DeSerialize(swap, fp)) {
      tprintf("Error during read of shapetable pffmtable!\n");
    }
  }
  for (i = 0; i < MAX_NUM_CLASSES; i++)
    Cutoffs[i] = MAX_CUTOFF;

-  while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
-         tfscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
-                Class, &Cutoff) == 2) {
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  while (fp->FGets(line, kMaxLineSize) != nullptr &&
+         sscanf(line, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d", Class,
+                &Cutoff) == 2) {
    if (strcmp(Class, "NULL") == 0) {
      ClassId = unicharset.unichar_to_id(" ");
    } else {
      ClassId = unicharset.unichar_to_id(Class);
    }
    Cutoffs[ClassId] = Cutoff;
-    SkipNewline(CutoffFile);
  }
 }

--- a/classify/intproto.cpp
+++ b/classify/intproto.cpp
@ -758,9 +758,8 @@ namespace tesseract {
 * @note Exceptions: none
 * @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
 */
-INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
+INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
  int i, j, w, x, y, z;
-  BOOL8 swap;
  int nread;
  int unicharset_size;
  int version_id = 0;
@ -786,29 +785,19 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
  /* first read the high level template struct */
  Templates = NewIntTemplates();
  // Read Templates in parts for 64 bit compatibility.
-  if (fread(&unicharset_size, sizeof(int), 1, File) != 1)
-    cprintf("Bad read of inttemp!\n");
-  if (fread(&Templates->NumClasses,
-            sizeof(Templates->NumClasses), 1, File) != 1 ||
-      fread(&Templates->NumClassPruners,
-            sizeof(Templates->NumClassPruners), 1, File) != 1)
-    cprintf("Bad read of inttemp!\n");
-  // Swap status is determined automatically.
-  swap = Templates->NumClassPruners < 0 ||
-    Templates->NumClassPruners > MAX_NUM_CLASS_PRUNERS;
-  if (swap) {
-    Reverse32(&Templates->NumClassPruners);
-    Reverse32(&Templates->NumClasses);
-    Reverse32(&unicharset_size);
-  }
+  if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) != 1)
+    tprintf("Bad read of inttemp!\n");
+  if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1,
+                      swap) != 1 ||
+      fp->FReadEndian(&Templates->NumClassPruners,
+                      sizeof(Templates->NumClassPruners), 1, swap) != 1)
+    tprintf("Bad read of inttemp!\n");
  if (Templates->NumClasses < 0) {
    // This file has a version id!
    version_id = -Templates->NumClasses;
-    if (fread(&Templates->NumClasses, sizeof(Templates->NumClasses),
-              1, File) != 1)
-      cprintf("Bad read of inttemp!\n");
-    if (swap)
-      Reverse32(&Templates->NumClasses);
+    if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
+                        1, swap) != 1)
+      tprintf("Bad read of inttemp!\n");
  }

  if (version_id < 3) {
@ -817,39 +806,24 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
  }

  if (version_id < 2) {
-    for (i = 0; i < unicharset_size; ++i) {
-      if (fread(&IndexFor[i], sizeof(inT16), 1, File) != 1)
-        cprintf("Bad read of inttemp!\n");
+    if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size, swap) !=
+        unicharset_size) {
+      tprintf("Bad read of inttemp!\n");
    }
-    for (i = 0; i < Templates->NumClasses; ++i) {
-      if (fread(&ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1)
-        cprintf("Bad read of inttemp!\n");
-    }
-    if (swap) {
-      for (i = 0; i < Templates->NumClasses; i++)
-        Reverse16(&IndexFor[i]);
-      for (i = 0; i < Templates->NumClasses; i++)
-        Reverse32(&ClassIdFor[i]);
+    if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]),
+                        Templates->NumClasses, swap) != Templates->NumClasses) {
+      tprintf("Bad read of inttemp!\n");
    }
  }

  /* then read in the class pruners */
+  const int kNumBuckets =
+      NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
  for (i = 0; i < Templates->NumClassPruners; i++) {
    Pruner = new CLASS_PRUNER_STRUCT;
-    if ((nread =
-         fread(Pruner, 1, sizeof(CLASS_PRUNER_STRUCT),
-                File)) != sizeof(CLASS_PRUNER_STRUCT))
-      cprintf("Bad read of inttemp!\n");
-    if (swap) {
-      for (x = 0; x < NUM_CP_BUCKETS; x++) {
-        for (y = 0; y < NUM_CP_BUCKETS; y++) {
-          for (z = 0; z < NUM_CP_BUCKETS; z++) {
-            for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
-              Reverse32(&Pruner->p[x][y][z][w]);
-            }
-          }
-        }
-      }
+    if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets,
+                        swap) != kNumBuckets) {
+      tprintf("Bad read of inttemp!\n");
    }
    if (version_id < 2) {
      TempClassPruner[i] = Pruner;
@ -914,39 +888,24 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
  for (i = 0; i < Templates->NumClasses; i++) {
    /* first read in the high level struct for the class */
    Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
-    if (fread(&Class->NumProtos, sizeof(Class->NumProtos), 1, File) != 1 ||
-        fread(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File) != 1 ||
-        fread(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File) != 1)
-      cprintf ("Bad read of inttemp!\n");
+    if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1, swap) !=
+            1 ||
+        fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
+        fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1)
+      tprintf("Bad read of inttemp!\n");
    if (version_id == 0) {
      // Only version 0 writes 5 pointless pointers to the file.
      for (j = 0; j < 5; ++j) {
-        int junk;
-        if (fread(&junk, sizeof(junk), 1, File) != 1)
-          cprintf ("Bad read of inttemp!\n");
+        inT32 junk;
+        if (fp->FRead(&junk, sizeof(junk), 1) != 1)
+          tprintf("Bad read of inttemp!\n");
      }
    }
-    if (version_id < 4) {
-      for (j = 0; j < MaxNumConfigs; ++j) {
-        if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
-          cprintf ("Bad read of inttemp!\n");
-      }
-      if (swap) {
-        Reverse16(&Class->NumProtos);
-        for (j = 0; j < MaxNumConfigs; j++)
-          Reverse16(&Class->ConfigLengths[j]);
-      }
-    } else {
-      ASSERT_HOST(Class->NumConfigs < MaxNumConfigs);
-      for (j = 0; j < Class->NumConfigs; ++j) {
-        if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
-          cprintf ("Bad read of inttemp!\n");
-      }
-      if (swap) {
-        Reverse16(&Class->NumProtos);
-        for (j = 0; j < MaxNumConfigs; j++)
-          Reverse16(&Class->ConfigLengths[j]);
-      }
+    int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
+    ASSERT_HOST(num_configs <= MaxNumConfigs);
+    if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs,
+                        swap) != num_configs) {
+      tprintf("Bad read of inttemp!\n");
    }
    if (version_id < 2) {
      ClassForClassId (Templates, ClassIdFor[i]) = Class;
@ -958,59 +917,41 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
    Lengths = NULL;
    if (MaxNumIntProtosIn (Class) > 0) {
      Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class));
-      if ((nread =
-           fread((char *)Lengths, sizeof(uinT8),
-                 MaxNumIntProtosIn(Class), File)) != MaxNumIntProtosIn (Class))
-        cprintf ("Bad read of inttemp!\n");
+      if (fp->FRead(Lengths, sizeof(uinT8), MaxNumIntProtosIn(Class)) !=
+          MaxNumIntProtosIn(Class))
+        tprintf("Bad read of inttemp!\n");
    }
    Class->ProtoLengths = Lengths;

    /* then read in the proto sets */
    for (j = 0; j < Class->NumProtoSets; j++) {
      ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
-      if (version_id < 3) {
-        if ((nread =
-             fread((char *) &ProtoSet->ProtoPruner, 1,
-                    sizeof(PROTO_PRUNER), File)) != sizeof(PROTO_PRUNER))
+      int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
+      if (fp->FReadEndian(&ProtoSet->ProtoPruner,
+                          sizeof(ProtoSet->ProtoPruner[0][0][0]), num_buckets,
+                          swap) != num_buckets)
+        tprintf("Bad read of inttemp!\n");
+      for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
+        if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A),
+                      1) != 1 ||
+            fp->FRead(&ProtoSet->Protos[x].B, sizeof(ProtoSet->Protos[x].B),
+                      1) != 1 ||
+            fp->FRead(&ProtoSet->Protos[x].C, sizeof(ProtoSet->Protos[x].C),
+                      1) != 1 ||
+            fp->FRead(&ProtoSet->Protos[x].Angle,
+                      sizeof(ProtoSet->Protos[x].Angle), 1) != 1)
+          tprintf("Bad read of inttemp!\n");
+        if (fp->FReadEndian(&ProtoSet->Protos[x].Configs,
+                            sizeof(ProtoSet->Protos[x].Configs[0]),
+                            WerdsPerConfigVec, swap) != WerdsPerConfigVec)
          cprintf("Bad read of inttemp!\n");
-        for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
-          if ((nread = fread((char *) &ProtoSet->Protos[x].A, 1,
-                             sizeof(inT8), File)) != sizeof(inT8) ||
-              (nread = fread((char *) &ProtoSet->Protos[x].B, 1,
-                             sizeof(uinT8), File)) != sizeof(uinT8) ||
-              (nread = fread((char *) &ProtoSet->Protos[x].C, 1,
-                             sizeof(inT8), File)) != sizeof(inT8) ||
-              (nread = fread((char *) &ProtoSet->Protos[x].Angle, 1,
-                             sizeof(uinT8), File)) != sizeof(uinT8))
-            cprintf("Bad read of inttemp!\n");
-          for (y = 0; y < WerdsPerConfigVec; y++)
-            if ((nread = fread((char *) &ProtoSet->Protos[x].Configs[y], 1,
-                               sizeof(uinT32), File)) != sizeof(uinT32))
-              cprintf("Bad read of inttemp!\n");
-        }
-      } else {
-        if ((nread =
-             fread((char *) ProtoSet, 1, sizeof(PROTO_SET_STRUCT),
-                   File)) != sizeof(PROTO_SET_STRUCT))
-          cprintf("Bad read of inttemp!\n");
-      }
-      if (swap) {
-        for (x = 0; x < NUM_PP_PARAMS; x++)
-          for (y = 0; y < NUM_PP_BUCKETS; y++)
-            for (z = 0; z < WERDS_PER_PP_VECTOR; z++)
-              Reverse32(&ProtoSet->ProtoPruner[x][y][z]);
-        for (x = 0; x < PROTOS_PER_PROTO_SET; x++)
-          for (y = 0; y < WerdsPerConfigVec; y++)
-            Reverse32(&ProtoSet->Protos[x].Configs[y]);
      }
      Class->ProtoSets[j] = ProtoSet;
    }
-    if (version_id < 4)
+    if (version_id < 4) {
      Class->font_set_id = -1;
-    else {
-      fread(&Class->font_set_id, sizeof(int), 1, File);
-      if (swap)
-        Reverse32(&Class->font_set_id);
+    } else {
+      fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1, swap);
    }
  }

@ -1037,13 +978,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(FILE *File) {
    }
  }
  if (version_id >= 4) {
-    this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap);
+    this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info), swap);
    if (version_id >= 5) {
-      this->fontinfo_table_.read(File,
-                                 NewPermanentTessCallback(read_spacing_info),
-                                 swap);
+      this->fontinfo_table_.read(
+          fp, NewPermanentTessCallback(read_spacing_info), swap);
    }
-    this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap);
+    this->fontset_table_.read(fp, NewPermanentTessCallback(read_set), swap);
  }

  // Clean up.
--- a/classify/mastertrainer.cpp
+++ b/classify/mastertrainer.cpp
@ -86,27 +86,6 @@ bool MasterTrainer::Serialize(FILE* fp) const {
  return true;
 }

-// Reads from the given file. Returns false in case of error.
-// If swap is true, assumes a big/little-endian swap is needed.
-bool MasterTrainer::DeSerialize(bool swap, FILE* fp) {
-  if (fread(&norm_mode_, sizeof(norm_mode_), 1, fp) != 1) return false;
-  if (swap) {
-    ReverseN(&norm_mode_, sizeof(norm_mode_));
-  }
-  if (!unicharset_.load_from_file(fp)) return false;
-  charsetsize_ = unicharset_.size();
-  if (!feature_space_.DeSerialize(swap, fp)) return false;
-  feature_map_.Init(feature_space_);
-  if (!samples_.DeSerialize(swap, fp)) return false;
-  if (!junk_samples_.DeSerialize(swap, fp)) return false;
-  if (!verify_samples_.DeSerialize(swap, fp)) return false;
-  if (!master_shapes_.DeSerialize(swap, fp)) return false;
-  if (!flat_shapes_.DeSerialize(swap, fp)) return false;
-  if (!fontinfo_table_.DeSerialize(swap, fp)) return false;
-  if (!xheights_.DeSerialize(swap, fp)) return false;
-  return true;
-}
-
 // Load an initial unicharset, or set one up if the file cannot be read.
 void MasterTrainer::LoadUnicharset(const char* filename) {
  if (!unicharset_.load_from_file(filename)) {
--- a/classify/mastertrainer.h
+++ b/classify/mastertrainer.h
@ -74,9 +74,6 @@ class MasterTrainer {

  // Writes to the given file. Returns false in case of error.
  bool Serialize(FILE* fp) const;
-  // Reads from the given file. Returns false in case of error.
-  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerialize(bool swap, FILE* fp);

  // Loads an initial unicharset, or sets one up if the file cannot be read.
  void LoadUnicharset(const char* filename);
--- a/classify/normmatch.cpp
+++ b/classify/normmatch.cpp
@ -242,7 +242,7 @@ namespace tesseract {
 * @note Exceptions: none
 * @note History: Wed Dec 19 16:38:49 1990, DSJ, Created.
 */
-NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
+NORM_PROTOS *Classify::ReadNormProtos(TFile *fp) {
  NORM_PROTOS *NormProtos;
  int i;
  char unichar[2 * UNICHAR_LEN + 1];
@ -258,26 +258,26 @@ NORM_PROTOS *Classify::ReadNormProtos(FILE *File, inT64 end_offset) {
    NormProtos->Protos[i] = NIL_LIST;

  /* read file header and save in data structure */
-  NormProtos->NumParams = ReadSampleSize (File);
-  NormProtos->ParamDesc = ReadParamDesc (File, NormProtos->NumParams);
+  NormProtos->NumParams = ReadSampleSize(fp);
+  NormProtos->ParamDesc = ReadParamDesc(fp, NormProtos->NumParams);

  /* read protos for each class into a separate list */
-  while ((end_offset < 0 || ftell(File) < end_offset) &&
-         tfscanf(File, "%s %d", unichar, &NumProtos) == 2) {
+  const int kMaxLineSize = 100;
+  char line[kMaxLineSize];
+  while (fp->FGets(line, kMaxLineSize) != nullptr) {
+    if (sscanf(line, "%s %d", unichar, &NumProtos) != 2) continue;
    if (unicharset.contains_unichar(unichar)) {
      unichar_id = unicharset.unichar_to_id(unichar);
      Protos = NormProtos->Protos[unichar_id];
      for (i = 0; i < NumProtos; i++)
-        Protos =
-            push_last (Protos, ReadPrototype (File, NormProtos->NumParams));
+        Protos = push_last(Protos, ReadPrototype(fp, NormProtos->NumParams));
      NormProtos->Protos[unichar_id] = Protos;
    } else {
-      cprintf("Error: unichar %s in normproto file is not in unichar set.\n",
+      tprintf("Error: unichar %s in normproto file is not in unichar set.\n",
              unichar);
      for (i = 0; i < NumProtos; i++)
-        FreePrototype(ReadPrototype (File, NormProtos->NumParams));
+        FreePrototype(ReadPrototype(fp, NormProtos->NumParams));
    }
-    SkipNewline(File);
  }
  return (NormProtos);
 }                                /* ReadNormProtos */
--- a/classify/shapetable.cpp
+++ b/classify/shapetable.cpp
@ -71,10 +71,9 @@ bool UnicharAndFonts::Serialize(FILE* fp) const {
 }
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
-bool UnicharAndFonts::DeSerialize(bool swap, FILE* fp) {
-  if (fread(&unichar_id, sizeof(unichar_id), 1, fp) != 1) return false;
-  if (swap)
-    ReverseN(&unichar_id, sizeof(unichar_id));
+bool UnicharAndFonts::DeSerialize(bool swap, TFile* fp) {
+  if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1, swap) != 1)
+    return false;
  if (!font_ids.DeSerialize(swap, fp)) return false;
  return true;
 }
@ -96,10 +95,9 @@ bool Shape::Serialize(FILE* fp) const {
 }
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
-bool Shape::DeSerialize(bool swap, FILE* fp) {
+bool Shape::DeSerialize(bool swap, TFile* fp) {
  uinT8 sorted;
-  if (fread(&sorted, sizeof(sorted), 1, fp) != 1)
-    return false;
+  if (fp->FRead(&sorted, sizeof(sorted), 1) != 1) return false;
  unichars_sorted_ = sorted != 0;
  if (!unichars_.DeSerializeClasses(swap, fp)) return false;
  return true;
@ -253,7 +251,7 @@ bool ShapeTable::Serialize(FILE* fp) const {
 }
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
-bool ShapeTable::DeSerialize(bool swap, FILE* fp) {
+bool ShapeTable::DeSerialize(bool swap, TFile* fp) {
  if (!shape_table_.DeSerialize(swap, fp)) return false;
  num_fonts_ = 0;
  return true;
--- a/classify/shapetable.h
+++ b/classify/shapetable.h
@ -168,7 +168,7 @@ struct UnicharAndFonts {
  bool Serialize(FILE* fp) const;
  // Reads from the given file. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerialize(bool swap, FILE* fp);
+  bool DeSerialize(bool swap, TFile* fp);

  // Sort function to sort a pair of UnicharAndFonts by unichar_id.
  static int SortByUnicharId(const void* v1, const void* v2);
@ -191,7 +191,7 @@ class Shape {
  bool Serialize(FILE* fp) const;
  // Reads from the given file. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerialize(bool swap, FILE* fp);
+  bool DeSerialize(bool swap, TFile* fp);

  int destination_index() const {
    return destination_index_;
@ -272,7 +272,7 @@ class ShapeTable {
  bool Serialize(FILE* fp) const;
  // Reads from the given file. Returns false in case of error.
  // If swap is true, assumes a big/little-endian swap is needed.
-  bool DeSerialize(bool swap, FILE* fp);
+  bool DeSerialize(bool swap, TFile* fp);

  // Accessors.
  int NumShapes() const {
--- a/dict/dawg.cpp
+++ b/dict/dawg.cpp
@ -174,11 +174,7 @@ bool Dawg::match_words(WERD_CHOICE *word, inT32 index,
  return false;
 }

-void Dawg::init(DawgType type, const STRING &lang,
-                PermuterType perm, int unicharset_size, int debug_level) {
-  type_ = type;
-  lang_ = lang;
-  perm_ = perm;
+void Dawg::init(int unicharset_size) {
  ASSERT_HOST(unicharset_size > 0);
  unicharset_size_ = unicharset_size;
  // Set bit masks. We will use the value unicharset_size_ as a null char, so
@ -188,8 +184,6 @@ void Dawg::init(DawgType type, const STRING &lang,
  letter_mask_ = ~(~0ull << flag_start_bit_);
  next_node_mask_ = ~0ull << (flag_start_bit_ + NUM_FLAG_BITS);
  flags_mask_ = ~(letter_mask_ | next_node_mask_);
-
-  debug_level_ = debug_level;
 }


@ -315,44 +309,34 @@ void SquishedDawg::print_edge(EDGE_REF edge) const {
  }
 }

-void SquishedDawg::read_squished_dawg(FILE *file,
-                                      DawgType type,
-                                      const STRING &lang,
-                                      PermuterType perm,
-                                      int debug_level) {
-  if (debug_level) tprintf("Reading squished dawg\n");
+bool SquishedDawg::read_squished_dawg(TFile *file) {
+  if (debug_level_) tprintf("Reading squished dawg\n");

  // Read the magic number and if it does not match kDawgMagicNumber
  // set swap to true to indicate that we need to switch endianness.
  inT16 magic;
-  fread(&magic, sizeof(inT16), 1, file);
+  if (file->FRead(&magic, sizeof(inT16), 1) != 1) return false;
  bool swap = (magic != kDawgMagicNumber);

-  int unicharset_size;
-  fread(&unicharset_size, sizeof(inT32), 1, file);
-  fread(&num_edges_, sizeof(inT32), 1, file);
-
-  if (swap) {
-    ReverseN(&unicharset_size, sizeof(unicharset_size));
-    ReverseN(&num_edges_, sizeof(num_edges_));
-  }
+  inT32 unicharset_size;
+  if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) !=
+      1)
+    return false;
+  if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1, swap) != 1)
+    return false;
  ASSERT_HOST(num_edges_ > 0);  // DAWG should not be empty
-  Dawg::init(type, lang, perm, unicharset_size, debug_level);
+  Dawg::init(unicharset_size);

  edges_ = (EDGE_ARRAY) memalloc(sizeof(EDGE_RECORD) * num_edges_);
-  fread(&edges_[0], sizeof(EDGE_RECORD), num_edges_, file);
-  EDGE_REF edge;
-  if (swap) {
-    for (edge = 0; edge < num_edges_; ++edge) {
-      ReverseN(&edges_[edge], sizeof(edges_[edge]));
-    }
-  }
-  if (debug_level > 2) {
+  if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_, swap) !=
+      num_edges_)
+    return false;
+  if (debug_level_ > 2) {
    tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n",
            type_, lang_.string(), perm_, unicharset_size_, num_edges_);
-    for (edge = 0; edge < num_edges_; ++edge)
-      print_edge(edge);
+    for (EDGE_REF edge = 0; edge < num_edges_; ++edge) print_edge(edge);
  }
+  return true;
 }

 NODE_MAP SquishedDawg::build_node_map(inT32 *num_nodes) const {
--- a/dict/dawg.h
+++ b/dict/dawg.h
@ -201,7 +201,12 @@ class Dawg {
  }

 protected:
-  Dawg() {}
+  Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
+      : type_(type),
+        lang_(lang),
+        perm_(perm),
+        unicharset_size_(0),
+        debug_level_(debug_level) {}

  /// Returns the next node visited by following this edge.
  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
@ -274,10 +279,9 @@ class Dawg {
            (!word_end || (word_end == other_word_end)));
  }

-  /// Sets type_, lang_, perm_, unicharset_size_.
+  /// Sets unicharset_size_.
  /// Initializes the values of various masks from unicharset_size_.
-  void init(DawgType type, const STRING &lang,
-            PermuterType perm, int unicharset_size, int debug_level);
+  void init(int unicharset_size);

  /// Matches all of the words that are represented by this string.
  /// If wilcard is set to something other than INVALID_UNICHAR_ID,
@ -407,32 +411,36 @@ class DawgPositionVector : public GenericVector<DawgPosition> {
 //
 class SquishedDawg : public Dawg {
 public:
-  SquishedDawg(FILE *file, DawgType type, const STRING &lang,
-               PermuterType perm, int debug_level) {
-    read_squished_dawg(file, type, lang, perm, debug_level);
+  SquishedDawg(DawgType type, const STRING &lang, PermuterType perm,
+               int debug_level)
+      : Dawg(type, lang, perm, debug_level) {}
+  SquishedDawg(const char *filename, DawgType type, const STRING &lang,
+               PermuterType perm, int debug_level)
+      : Dawg(type, lang, perm, debug_level) {
+    TFile file;
+    ASSERT_HOST(file.Open(filename, nullptr));
+    ASSERT_HOST(read_squished_dawg(&file));
    num_forward_edges_in_node0 = num_forward_edges(0);
  }
-  SquishedDawg(const char* filename, DawgType type,
-               const STRING &lang, PermuterType perm, int debug_level) {
-    FILE *file = fopen(filename, "rb");
-    if (file == NULL) {
-      tprintf("Failed to open dawg file %s\n", filename);
-      exit(1);
-    }
-    read_squished_dawg(file, type, lang, perm, debug_level);
-    num_forward_edges_in_node0 = num_forward_edges(0);
-    fclose(file);
-  }
  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
-               const STRING &lang, PermuterType perm,
-               int unicharset_size, int debug_level) :
-    edges_(edges), num_edges_(num_edges) {
-    init(type, lang, perm, unicharset_size, debug_level);
+               const STRING &lang, PermuterType perm, int unicharset_size,
+               int debug_level)
+      : Dawg(type, lang, perm, debug_level),
+        edges_(edges),
+        num_edges_(num_edges) {
+    init(unicharset_size);
    num_forward_edges_in_node0 = num_forward_edges(0);
    if (debug_level > 3) print_all("SquishedDawg:");
  }
  ~SquishedDawg();

+  // Loads using the given TFile. Returns false on failure.
+  bool Load(TFile *fp) {
+    if (!read_squished_dawg(fp)) return false;
+    num_forward_edges_in_node0 = num_forward_edges(0);
+    return true;
+  }
+
  int NumEdges() { return num_edges_; }

  /// Returns the edge that corresponds to the letter out of this node.
@ -529,8 +537,7 @@ class SquishedDawg : public Dawg {
  inT32 num_forward_edges(NODE_REF node) const;

  /// Reads SquishedDawg from a file.
-  void read_squished_dawg(FILE *file, DawgType type, const STRING &lang,
-                          PermuterType perm, int debug_level);
+  bool read_squished_dawg(TFile *file);

  /// Prints the contents of an edge indicated by the given EDGE_REF.
  void print_edge(EDGE_REF edge) const;
@ -547,7 +554,7 @@ class SquishedDawg : public Dawg {

  // Member variables.
  EDGE_ARRAY edges_;
-  int num_edges_;
+  inT32 num_edges_;
  int num_forward_edges_in_node0;
 };

--- a/dict/dawg_cache.cpp
+++ b/dict/dawg_cache.cpp
@ -27,44 +27,33 @@
 namespace tesseract {

 struct DawgLoader {
-  DawgLoader(const STRING &lang,
-             const char *data_file_name,
-             TessdataType tessdata_dawg_type,
-             int dawg_debug_level)
+  DawgLoader(const STRING &lang, TessdataType tessdata_dawg_type,
+             int dawg_debug_level, TessdataManager *data_file)
      : lang_(lang),
-        data_file_name_(data_file_name),
+        data_file_(data_file),
        tessdata_dawg_type_(tessdata_dawg_type),
        dawg_debug_level_(dawg_debug_level) {}

  Dawg *Load();

  STRING lang_;
-  const char *data_file_name_;
+  TessdataManager *data_file_;
  TessdataType tessdata_dawg_type_;
  int dawg_debug_level_;
 };

-Dawg *DawgCache::GetSquishedDawg(
-    const STRING &lang,
-    const char *data_file_name,
-    TessdataType tessdata_dawg_type,
-    int debug_level) {
-  STRING data_id = data_file_name;
+Dawg *DawgCache::GetSquishedDawg(const STRING &lang,
+                                 TessdataType tessdata_dawg_type,
+                                 int debug_level, TessdataManager *data_file) {
+  STRING data_id = data_file->GetDataFileName();
  data_id += kTessdataFileSuffixes[tessdata_dawg_type];
-  DawgLoader loader(lang, data_file_name, tessdata_dawg_type, debug_level);
+  DawgLoader loader(lang, tessdata_dawg_type, debug_level, data_file);
  return dawgs_.Get(data_id, NewTessCallback(&loader, &DawgLoader::Load));
 }

 Dawg *DawgLoader::Load() {
-  TessdataManager data_loader;
-  if (!data_loader.Init(data_file_name_, dawg_debug_level_)) {
-    return NULL;
-  }
-  if (!data_loader.SeekToStart(tessdata_dawg_type_)) {
-    data_loader.End();
-    return NULL;
-  }
-  FILE *fp = data_loader.GetDataFilePtr();
+  TFile fp;
+  if (!data_file_->GetComponent(tessdata_dawg_type_, &fp)) return nullptr;
  DawgType dawg_type;
  PermuterType perm_type;
  switch (tessdata_dawg_type_) {
@ -96,13 +85,13 @@ Dawg *DawgLoader::Load() {
      perm_type = FREQ_DAWG_PERM;
      break;
    default:
-      data_loader.End();
-      return NULL;
+      return nullptr;
  }
  SquishedDawg *retval =
-      new SquishedDawg(fp, dawg_type, lang_, perm_type, dawg_debug_level_);
-  data_loader.End();
-  return retval;
+      new SquishedDawg(dawg_type, lang_, perm_type, dawg_debug_level_);
+  if (retval->Load(&fp)) return retval;
+  delete retval;
+  return nullptr;
 }

 }  // namespace tesseract
--- a/dict/dawg_cache.h
+++ b/dict/dawg_cache.h
@ -29,11 +29,8 @@ namespace tesseract {

 class DawgCache {
 public:
-  Dawg *GetSquishedDawg(
-      const STRING &lang,
-      const char *data_file_name,
-      TessdataType tessdata_dawg_type,
-      int debug_level);
+  Dawg *GetSquishedDawg(const STRING &lang, TessdataType tessdata_dawg_type,
+                        int debug_level, TessdataManager *data_file);

  // If we manage the given dawg, decrement its count,
  // and possibly delete it if the count reaches zero.
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@ -221,35 +221,35 @@ void Dict::SetupForLoad(DawgCache *dawg_cache) {
 }

 // Loads the dawgs needed by Tesseract. Call FinishLoad() after.
-void Dict::Load(const char *data_file_name, const STRING &lang) {
+void Dict::Load(const STRING &lang, TessdataManager *data_file) {
  // Load dawgs_.
  if (load_punc_dawg) {
-    punc_dawg_ = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_PUNC_DAWG, dawg_debug_level);
+    punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_PUNC_DAWG,
+                                              dawg_debug_level, data_file);
    if (punc_dawg_) dawgs_ += punc_dawg_;
  }
  if (load_system_dawg) {
    Dawg *system_dawg = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_SYSTEM_DAWG, dawg_debug_level);
+        lang, TESSDATA_SYSTEM_DAWG, dawg_debug_level, data_file);
    if (system_dawg) dawgs_ += system_dawg;
  }
  if (load_number_dawg) {
    Dawg *number_dawg = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_NUMBER_DAWG, dawg_debug_level);
+        lang, TESSDATA_NUMBER_DAWG, dawg_debug_level, data_file);
    if (number_dawg) dawgs_ += number_dawg;
  }
  if (load_bigram_dawg) {
-    bigram_dawg_ = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_BIGRAM_DAWG, dawg_debug_level);
+    bigram_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_BIGRAM_DAWG,
+                                                dawg_debug_level, data_file);
  }
  if (load_freq_dawg) {
-    freq_dawg_ = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_FREQ_DAWG, dawg_debug_level);
+    freq_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_FREQ_DAWG,
+                                              dawg_debug_level, data_file);
    if (freq_dawg_) { dawgs_ += freq_dawg_; }
  }
  if (load_unambig_dawg) {
-    unambig_dawg_ = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_UNAMBIG_DAWG, dawg_debug_level);
+    unambig_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_UNAMBIG_DAWG,
+                                                 dawg_debug_level, data_file);
    if (unambig_dawg_) dawgs_ += unambig_dawg_;
  }

@ -302,21 +302,21 @@ void Dict::Load(const char *data_file_name, const STRING &lang) {
 }

 // Loads the dawgs needed by the LSTM model. Call FinishLoad() after.
-void Dict::LoadLSTM(const char *data_file_name, const STRING &lang) {
+void Dict::LoadLSTM(const STRING &lang, TessdataManager *data_file) {
  // Load dawgs_.
  if (load_punc_dawg) {
-    punc_dawg_ = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_LSTM_PUNC_DAWG, dawg_debug_level);
+    punc_dawg_ = dawg_cache_->GetSquishedDawg(lang, TESSDATA_LSTM_PUNC_DAWG,
+                                              dawg_debug_level, data_file);
    if (punc_dawg_) dawgs_ += punc_dawg_;
  }
  if (load_system_dawg) {
    Dawg *system_dawg = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_LSTM_SYSTEM_DAWG, dawg_debug_level);
+        lang, TESSDATA_LSTM_SYSTEM_DAWG, dawg_debug_level, data_file);
    if (system_dawg) dawgs_ += system_dawg;
  }
  if (load_number_dawg) {
    Dawg *number_dawg = dawg_cache_->GetSquishedDawg(
-        lang, data_file_name, TESSDATA_LSTM_NUMBER_DAWG, dawg_debug_level);
+        lang, TESSDATA_LSTM_NUMBER_DAWG, dawg_debug_level, data_file);
    if (number_dawg) dawgs_ += number_dawg;
  }
 }
--- a/dict/dict.h
+++ b/dict/dict.h
@ -298,9 +298,9 @@ class Dict {
  // Sets up ready for a Load or LoadLSTM.
  void SetupForLoad(DawgCache *dawg_cache);
  // Loads the dawgs needed by Tesseract. Call FinishLoad() after.
-  void Load(const char *data_file_name, const STRING &lang);
+  void Load(const STRING &lang, TessdataManager *data_file);
  // Loads the dawgs needed by the LSTM model. Call FinishLoad() after.
-  void LoadLSTM(const char *data_file_name, const STRING &lang);
+  void LoadLSTM(const STRING &lang, TessdataManager *data_file);
  // Completes the loading process after Load() and/or LoadLSTM().
  // Returns false if no dictionaries were loaded.
  bool FinishLoad();
--- a/dict/trie.h
+++ b/dict/trie.h
@ -87,8 +87,9 @@ class Trie : public Dawg {
  // contain more edges than max_num_edges, all the edges are cleared
  // so that new inserts can proceed).
  Trie(DawgType type, const STRING &lang, PermuterType perm,
-       int unicharset_size, int debug_level) {
-    init(type, lang, perm, unicharset_size, debug_level);
+       int unicharset_size, int debug_level)
+      : Dawg(type, lang, perm, debug_level) {
+    init(unicharset_size);
    num_edges_ = 0;
    deref_node_index_mask_ = ~letter_mask_;
    new_dawg_node();  // need to allocate node 0
--- a/lstm/lstmrecognizer.cpp
+++ b/lstm/lstmrecognizer.cpp
@ -127,12 +127,11 @@ bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) {
 // on the unicharset matching. This enables training to deserialize a model
 // from checkpoint or restore without having to go back and reload the
 // dictionary.
-bool LSTMRecognizer::LoadDictionary(const char* data_file_name,
-                                    const char* lang) {
+bool LSTMRecognizer::LoadDictionary(const char* lang, TessdataManager* mgr) {
  delete dict_;
  dict_ = new Dict(&ccutil_);
  dict_->SetupForLoad(Dict::GlobalDawgCache());
-  dict_->LoadLSTM(data_file_name, lang);
+  dict_->LoadLSTM(lang, mgr);
  if (dict_->FinishLoad()) return true;  // Success.
  tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n",
          lang);
--- a/lstm/lstmrecognizer.h
+++ b/lstm/lstmrecognizer.h
@ -167,7 +167,7 @@ class LSTMRecognizer {
  // on the unicharset matching. This enables training to deserialize a model
  // from checkpoint or restore without having to go back and reload the
  // dictionary.
-  bool LoadDictionary(const char* data_file_name, const char* lang);
+  bool LoadDictionary(const char* lang, TessdataManager* mgr);

  // Recognizes the line image, contained within image_data, returning the
  // ratings matrix and matching box_word for each WERD_RES in the output.
--- a/lstm/lstmtrainer.cpp
+++ b/lstm/lstmtrainer.cpp
@ -1223,7 +1223,7 @@ double LSTMTrainer::ComputeWordError(STRING* truth_str, STRING* ocr_str) {
    std::string truth_word(truth_words[i].string());
    StrMap::iterator it = word_counts.find(truth_word);
    if (it == word_counts.end())
-      word_counts.insert(make_pair(truth_word, 1));
+      word_counts.insert(std::make_pair(truth_word, 1));
    else
      ++it->second;
  }
@ -1231,7 +1231,7 @@ double LSTMTrainer::ComputeWordError(STRING* truth_str, STRING* ocr_str) {
    std::string ocr_word(ocr_words[i].string());
    StrMap::iterator it = word_counts.find(ocr_word);
    if (it == word_counts.end())
-      word_counts.insert(make_pair(ocr_word, -1));
+      word_counts.insert(std::make_pair(ocr_word, -1));
    else
      --it->second;
  }
--- a/training/classifier_tester.cpp
+++ b/training/classifier_tester.cpp
@ -31,7 +31,6 @@ STRING_PARAM_FLAG(classifier, "", "Classifier to test");
 STRING_PARAM_FLAG(lang, "eng", "Language to test");
 STRING_PARAM_FLAG(tessdata_dir, "", "Directory of traineddata files");
 DECLARE_INT_PARAM_FLAG(debug_level);
-DECLARE_STRING_PARAM_FLAG(T);

 enum ClassifierName {
  CN_PRUNER,
@ -79,13 +78,6 @@ static tesseract::ShapeClassifier* InitializeClassifier(
  }
  tesseract::ShapeClassifier* shape_classifier = nullptr;

-  if (!FLAGS_T.empty()) {
-    const char* config_name;
-    while ((config_name = GetNextFilename(argc, argv)) != nullptr) {
-      tprintf("Reading config file %s ...\n", config_name);
-      (*api)->ReadConfigFile(config_name);
-    }
-  }
  if (classifier == CN_PRUNER) {
    shape_classifier = new tesseract::TessClassifier(true, classify);
  } else if (classifier == CN_FULL) {
--- a/training/combine_tessdata.cpp
+++ b/training/combine_tessdata.cpp
@ -65,6 +65,7 @@
 //
 int main(int argc, char **argv) {
  int i;
+  tesseract::TessdataManager tm;
  if (argc == 2) {
    printf("Combining tessdata files\n");
    STRING lang = argv[1];
@ -73,8 +74,7 @@ int main(int argc, char **argv) {
      lang += '.';
    STRING output_file = lang;
    output_file += kTrainedDataSuffix;
-    if (!tesseract::TessdataManager::CombineDataFiles(
-        lang.string(), output_file.string())) {
+    if (!tm.CombineDataFiles(lang.string(), output_file.string())) {
      printf("Error combining tessdata files into %s\n",
             output_file.string());
    } else {
@ -83,8 +83,7 @@ int main(int argc, char **argv) {
  } else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 ||
                           strcmp(argv[1], "-u") == 0)) {
    // Initialize TessdataManager with the data in the given traineddata file.
-    tesseract::TessdataManager tm;
-    tm.Init(argv[2], 0);
+    tm.Init(argv[2]);
    printf("Extracting tessdata components from %s\n", argv[2]);
    if (strcmp(argv[1], "-e") == 0) {
      for (i = 3; i < argc; ++i) {
@ -107,7 +106,6 @@ int main(int argc, char **argv) {
        }
      }
    }
-    tm.End();
  } else if (argc >= 4 && strcmp(argv[1], "-o") == 0) {
    // Rename the current traineddata file to a temporary name.
    const char *new_traineddata_filename = argv[2];
@ -120,12 +118,10 @@ int main(int argc, char **argv) {
    }

    // Initialize TessdataManager with the data in the given traineddata file.
-    tesseract::TessdataManager tm;
-    tm.Init(traineddata_filename.string(), 0);
+    tm.Init(traineddata_filename.string());

    // Write the updated traineddata file.
    tm.OverwriteComponents(new_traineddata_filename, argv+3, argc-3);
-    tm.End();
  } else {
    printf("Usage for combining tessdata components:\n"
           "  %s language_data_path_prefix\n"
@ -143,4 +139,5 @@ int main(int argc, char **argv) {
           "  (e.g. %s -u eng.traineddata tmp/eng.)\n", argv[0], argv[0]);
    return 1;
  }
+  tm.Directory();
 }
--- a/training/commontraining.cpp
+++ b/training/commontraining.cpp
@ -59,7 +59,6 @@ STRING_PARAM_FLAG(F, "font_properties", "File listing font properties");
 STRING_PARAM_FLAG(X, "", "File listing font xheights");
 STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from");
 STRING_PARAM_FLAG(O, "", "File to write unicharset to");
-STRING_PARAM_FLAG(T, "", "File to load trainer from");
 STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to");
 STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string");
 DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples,
@ -118,10 +117,10 @@ ShapeTable* LoadShapeTable(const STRING& file_prefix) {
  ShapeTable* shape_table = nullptr;
  STRING shape_table_file = file_prefix;
  shape_table_file += kShapeTableFileSuffix;
-  FILE* shape_fp = fopen(shape_table_file.string(), "rb");
-  if (shape_fp != nullptr) {
+  TFile shape_fp;
+  if (shape_fp.Open(shape_table_file.string(), nullptr)) {
    shape_table = new ShapeTable;
-    if (!shape_table->DeSerialize(false, shape_fp)) {
+    if (!shape_table->DeSerialize(false, &shape_fp)) {
      delete shape_table;
      shape_table = nullptr;
      tprintf("Error: Failed to read shape table %s\n",
@ -131,7 +130,6 @@ ShapeTable* LoadShapeTable(const STRING& file_prefix) {
      tprintf("Read shape table %s of %d shapes\n",
              shape_table_file.string(), num_shapes);
    }
-    fclose(shape_fp);
  } else {
    tprintf("Warning: No shape table file present: %s\n",
            shape_table_file.string());
@ -199,75 +197,55 @@ MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
                                             FLAGS_debug_level);
  IntFeatureSpace fs;
  fs.Init(kBoostXYBuckets, kBoostXYBuckets, kBoostDirBuckets);
-  if (FLAGS_T.empty()) {
-    trainer->LoadUnicharset(FLAGS_U.c_str());
-    // Get basic font information from font_properties.
-    if (!FLAGS_F.empty()) {
-      if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
-        delete trainer;
-        return nullptr;
-      }
-    }
-    if (!FLAGS_X.empty()) {
-      if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
-        delete trainer;
-        return nullptr;
-      }
-    }
-    trainer->SetFeatureSpace(fs);
-    const char* page_name;
-    // Load training data from .tr files on the command line.
-    while ((page_name = GetNextFilename(argc, argv)) != nullptr) {
-      tprintf("Reading %s ...\n", page_name);
-      trainer->ReadTrainingSamples(page_name, feature_defs, false);
-
-      // If there is a file with [lang].[fontname].exp[num].fontinfo present,
-      // read font spacing information in to fontinfo_table.
-      int pagename_len = strlen(page_name);
-      char *fontinfo_file_name = new char[pagename_len + 7];
-      strncpy(fontinfo_file_name, page_name, pagename_len - 2);  // remove "tr"
-      strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo");  // +"fontinfo"
-      trainer->AddSpacingInfo(fontinfo_file_name);
-      delete[] fontinfo_file_name;
-
-      // Load the images into memory if required by the classifier.
-      if (FLAGS_load_images) {
-        STRING image_name = page_name;
-        // Chop off the tr and replace with tif. Extension must be tif!
-        image_name.truncate_at(image_name.length() - 2);
-        image_name += "tif";
-        trainer->LoadPageImages(image_name.string());
-      }
-    }
-    trainer->PostLoadCleanup();
-    // Write the master trainer if required.
-    if (!FLAGS_output_trainer.empty()) {
-      FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
-      if (fp == nullptr) {
-        tprintf("Can't create saved trainer data!\n");
-      } else {
-        trainer->Serialize(fp);
-        fclose(fp);
-      }
-    }
-  } else {
-    bool success = false;
-    tprintf("Loading master trainer from file:%s\n",
-            FLAGS_T.c_str());
-    FILE* fp = fopen(FLAGS_T.c_str(), "rb");
-    if (fp == nullptr) {
-      tprintf("Can't read file %s to initialize master trainer\n",
-              FLAGS_T.c_str());
-    } else {
-      success = trainer->DeSerialize(false, fp);
-      fclose(fp);
-    }
-    if (!success) {
-      tprintf("Deserialize of master trainer failed!\n");
+  trainer->LoadUnicharset(FLAGS_U.c_str());
+  // Get basic font information from font_properties.
+  if (!FLAGS_F.empty()) {
+    if (!trainer->LoadFontInfo(FLAGS_F.c_str())) {
      delete trainer;
      return nullptr;
    }
-    trainer->SetFeatureSpace(fs);
+  }
+  if (!FLAGS_X.empty()) {
+    if (!trainer->LoadXHeights(FLAGS_X.c_str())) {
+      delete trainer;
+      return nullptr;
+    }
+  }
+  trainer->SetFeatureSpace(fs);
+  const char* page_name;
+  // Load training data from .tr files on the command line.
+  while ((page_name = GetNextFilename(argc, argv)) != nullptr) {
+    tprintf("Reading %s ...\n", page_name);
+    trainer->ReadTrainingSamples(page_name, feature_defs, false);
+
+    // If there is a file with [lang].[fontname].exp[num].fontinfo present,
+    // read font spacing information in to fontinfo_table.
+    int pagename_len = strlen(page_name);
+    char* fontinfo_file_name = new char[pagename_len + 7];
+    strncpy(fontinfo_file_name, page_name, pagename_len - 2);   // remove "tr"
+    strcpy(fontinfo_file_name + pagename_len - 2, "fontinfo");  // +"fontinfo"
+    trainer->AddSpacingInfo(fontinfo_file_name);
+    delete[] fontinfo_file_name;
+
+    // Load the images into memory if required by the classifier.
+    if (FLAGS_load_images) {
+      STRING image_name = page_name;
+      // Chop off the tr and replace with tif. Extension must be tif!
+      image_name.truncate_at(image_name.length() - 2);
+      image_name += "tif";
+      trainer->LoadPageImages(image_name.string());
+    }
+  }
+  trainer->PostLoadCleanup();
+  // Write the master trainer if required.
+  if (!FLAGS_output_trainer.empty()) {
+    FILE* fp = fopen(FLAGS_output_trainer.c_str(), "wb");
+    if (fp == nullptr) {
+      tprintf("Can't create saved trainer data!\n");
+    } else {
+      trainer->Serialize(fp);
+      fclose(fp);
+    }
  }
  trainer->PreTrainingSetup();
  if (!FLAGS_O.empty() &&
--- a/training/dawg2wordlist.cpp
+++ b/training/dawg2wordlist.cpp
@ -19,6 +19,7 @@

 #include "dawg.h"
 #include "host.h"
+#include "serialis.h"
 #include "tesscallback.h"
 #include "trie.h"
 #include "unicharset.h"
@ -28,17 +29,20 @@ const int kDictDebugLevel = 1;
 tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset,
                                  const char *filename) {
  const int kDictDebugLevel = 1;
-  FILE *dawg_file = fopen(filename, "rb");
-  if (dawg_file == nullptr) {
+  tesseract::TFile dawg_file;
+  if (!dawg_file.Open(filename, nullptr)) {
    tprintf("Could not open %s for reading.\n", filename);
    return nullptr;
  }
  tprintf("Loading word list from %s\n", filename);
-  tesseract::Dawg *retval = new tesseract::SquishedDawg(
-      dawg_file, tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM,
-      kDictDebugLevel);
+  tesseract::SquishedDawg *retval = new tesseract::SquishedDawg(
+      tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM, kDictDebugLevel);
+  if (!retval->Load(&dawg_file)) {
+    tprintf("Could not read %s\n", filename);
+    delete retval;
+    return nullptr;
+  }
  tprintf("Word list loaded.\n");
-  fclose(dawg_file);
  return retval;
 }

--- a/wordrec/params_model.cpp
+++ b/wordrec/params_model.cpp
@ -100,17 +100,15 @@ bool ParamsModel::Equivalent(const ParamsModel &that) const {
 bool ParamsModel::LoadFromFile(
    const char *lang,
    const char *full_path) {
-  FILE *fp = fopen(full_path, "rb");
-  if (!fp) {
+  TFile fp;
+  if (!fp.Open(full_path, nullptr)) {
    tprintf("Error opening file %s\n", full_path);
    return false;
  }
-  bool result = LoadFromFp(lang, fp, -1);
-  fclose(fp);
-  return result;
+  return LoadFromFp(lang, &fp);
 }

-bool ParamsModel::LoadFromFp(const char *lang, FILE *fp, inT64 end_offset) {
+bool ParamsModel::LoadFromFp(const char *lang, TFile *fp) {
  const int kMaxLineSize = 100;
  char line[kMaxLineSize];
  BitVector present;
@ -120,9 +118,8 @@ bool ParamsModel::LoadFromFp(const char *lang, FILE *fp, inT64 end_offset) {
  GenericVector<float> &weights = weights_vec_[pass_];
  weights.init_to_size(PTRAIN_NUM_FEATURE_TYPES, 0.0);

-  while ((end_offset < 0 || ftell(fp) < end_offset) &&
-      fgets(line, kMaxLineSize, fp)) {
-    char *key = NULL;
+  while (fp->FGets(line, kMaxLineSize) != nullptr) {
+    char *key = nullptr;
    float value;
    if (!ParseLine(line, &key, &value))
      continue;
--- a/wordrec/params_model.h
+++ b/wordrec/params_model.h
@ -61,7 +61,7 @@ class ParamsModel {

  // Returns true on success.
  bool LoadFromFile(const char *lang, const char *full_path);
-  bool LoadFromFp(const char *lang, FILE *fp, inT64 end_offset);
+  bool LoadFromFp(const char *lang, TFile *fp);

  const GenericVector<float>& weights() const {
    return weights_vec_[pass_];
--- a/wordrec/tface.cpp
+++ b/wordrec/tface.cpp
@ -44,14 +44,14 @@ namespace tesseract {
 * and Dawg models.
 */
 void Wordrec::program_editup(const char *textbase,
-                             bool init_classifier,
-                             bool init_dict) {
+                             TessdataManager *init_classifier,
+                             TessdataManager *init_dict) {
  if (textbase != NULL) imagefile = textbase;
  InitFeatureDefs(&feature_defs_);
  InitAdaptiveClassifier(init_classifier);
  if (init_dict) {
    getDict().SetupForLoad(Dict::GlobalDawgCache());
-    getDict().Load(tessdata_manager.GetDataFileName().string(), lang);
+    getDict().Load(lang, init_dict);
    getDict().FinishLoad();
  }
  pass2_ok_split = chop_ok_split;
--- a/wordrec/wordrec.h
+++ b/wordrec/wordrec.h
@ -200,9 +200,8 @@ class Wordrec : public Classify {
  }

  // tface.cpp
-  void program_editup(const char *textbase,
-                      bool init_classifier,
-                      bool init_permute);
+  void program_editup(const char *textbase, TessdataManager *init_classifier,
+                      TessdataManager *init_dict);
  void cc_recog(WERD_RES *word);
  void program_editdown(inT32 elasped_time);
  void set_pass1();