Delete cube code

2025-01-18 14:41:36 +08:00 · 2016-12-14 11:00:43 -08:00 · 2016-12-14 11:00:43 -08:00 · 5c3839bdb4
commit 5c3839bdb4
parent 432684dd6e
84 changed files with 0 additions and 14952 deletions
--- a/ccmain/cube_control.cpp
+++ b/ccmain/cube_control.cpp
@ -1,440 +0,0 @@
-/******************************************************************
- * File:        cube_control.cpp
- * Description: Tesseract class methods for invoking cube convolutional
- *              neural network word recognizer.
- * Author:      Raquel Romano
- * Created:     September 2009
- *
- * (C) Copyright 2009, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- **********************************************************************/
-
-// Include automatically generated configuration file if running autoconf.
-#ifdef HAVE_CONFIG_H
-#include "config_auto.h"
-#endif
-
-#include "allheaders.h"
-
-#include "cube_object.h"
-#include "cube_reco_context.h"
-#include "tesseractclass.h"
-#include "tesseract_cube_combiner.h"
-
-namespace tesseract {
-
-/**
- * @name convert_prob_to_tess_certainty
- *
- * Normalize a probability in the range [0.0, 1.0] to a tesseract
- * certainty in the range [-20.0, 0.0]
- */
-static float convert_prob_to_tess_certainty(float prob) {
-  return (prob - 1.0) * 20.0;
-}
-
-/**
- * @name char_box_to_tbox
- *
- * Create a TBOX from a character bounding box. If nonzero, the
- * x_offset accounts for any additional padding of the word box that
- * should be taken into account.
- *
- */
-TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
-  l_int32 left;
-  l_int32 top;
-  l_int32 width;
-  l_int32 height;
-  l_int32 right;
-  l_int32 bottom;
-
-  boxGetGeometry(char_box, &left, &top, &width, &height);
-  left += word_box.left() - x_offset;
-  right = left + width;
-  top = word_box.bottom() + word_box.height() - top;
-  bottom = top - height;
-  return TBOX(left, bottom, right, top);
-}
-
-/**
- * @name extract_cube_state
- *
- * Extract CharSamp objects and character bounding boxes from the
- * CubeObject's state. The caller should free both structres.
- *
- */
-bool Tesseract::extract_cube_state(CubeObject* cube_obj,
-                                   int* num_chars,
-                                   Boxa** char_boxes,
-                                   CharSamp*** char_samples) {
-  if (!cube_obj) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
-              "passed to extract_cube_state\n");
-    }
-    return false;
-  }
-
-  // Note that the CubeObject accessors return either the deslanted or
-  // regular objects search object or beam search object, whichever
-  // was used in the last call to Recognize()
-  CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
-  if (!cube_search_obj) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
-              "cube's search object in extract_cube_state.\n");
-    }
-    return false;
-  }
-  BeamSearch *beam_search_obj = cube_obj->BeamObj();
-  if (!beam_search_obj) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
-              "cube's beam search object in extract_cube_state.\n");
-    }
-    return false;
-  }
-
-  // Get the character samples and bounding boxes by backtracking
-  // through the beam search path
-  int best_node_index = beam_search_obj->BestPresortedNodeIndex();
-  *char_samples = beam_search_obj->BackTrack(
-      cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
-  if (!*char_samples)
-    return false;
-  return true;
-}
-
-/**
- * @name create_cube_box_word
- *
- * Fill the given BoxWord with boxes from character bounding
- * boxes. The char_boxes have local coordinates w.r.t. the
- * word bounding box, i.e., the left-most character bbox of each word
- * has (0,0) left-top coord, but the BoxWord must be defined in page
- * coordinates.
- */
-bool Tesseract::create_cube_box_word(Boxa *char_boxes,
-                                     int num_chars,
-                                     TBOX word_box,
-                                     BoxWord* box_word) {
-  if (!box_word) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
-    }
-    return false;
-  }
-
-  // Find the x-coordinate of left-most char_box, which could be
-  // nonzero if the word image was padded before recognition took place.
-  int x_offset = -1;
-  for (int i = 0; i < num_chars; ++i) {
-    Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
-    if (x_offset < 0 || char_box->x < x_offset) {
-      x_offset = char_box->x;
-    }
-    boxDestroy(&char_box);
-  }
-
-  for (int i = 0; i < num_chars; ++i) {
-    Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
-    TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
-    boxDestroy(&char_box);
-    box_word->InsertBox(i, tbox);
-  }
-  return true;
-}
-
-/**
- * @name init_cube_objects
- *
- * Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
- * Returns false if cube context could not be created or if load_combiner is
- * true, but the combiner could not be loaded.
- */
-bool Tesseract::init_cube_objects(bool load_combiner,
-                                  TessdataManager *tessdata_manager) {
-  ASSERT_HOST(cube_cntxt_ == NULL);
-  ASSERT_HOST(tess_cube_combiner_ == NULL);
-
-  // Create the cube context object
-  cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
-  if (cube_cntxt_ == NULL) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
-              "instantiate CubeRecoContext\n");
-    }
-    return false;
-  }
-
-  // Create the combiner object and load the combiner net for target languages.
-  if (load_combiner) {
-    tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
-    if (!tess_cube_combiner_->LoadCombinerNet()) {
-      delete cube_cntxt_;
-      cube_cntxt_ = NULL;
-      delete tess_cube_combiner_;
-      tess_cube_combiner_ = NULL;
-      if (cube_debug_level > 0)
-        tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
-      return false;
-    }
-  }
-  return true;
-}
-
-/**
- * @name run_cube_combiner
- *
- * Iterates through tesseract's results and calls cube on each word,
- * combining the results with the existing tesseract result.
- */
-void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
-  if (page_res == NULL || tess_cube_combiner_ == NULL)
-    return;
-  PAGE_RES_IT page_res_it(page_res);
-  // Iterate through the word results and call cube on each word.
-  for (page_res_it.restart_page(); page_res_it.word () != NULL;
-       page_res_it.forward()) {
-    BLOCK* block = page_res_it.block()->block;
-    if (block->poly_block() != NULL && !block->poly_block()->IsText())
-      continue;  // Don't deal with non-text blocks.
-    WERD_RES* word = page_res_it.word();
-    // Skip cube entirely if tesseract's certainty is greater than threshold.
-    int combiner_run_thresh = convert_prob_to_tess_certainty(
-        cube_cntxt_->Params()->CombinerRunThresh());
-    if (word->best_choice->certainty() >= combiner_run_thresh) {
-      continue;
-    }
-    // Use the same language as Tesseract used for the word.
-    Tesseract* lang_tess = word->tesseract;
-
-    // Setup a trial WERD_RES in which to classify with cube.
-    WERD_RES cube_word;
-    cube_word.InitForRetryRecognition(*word);
-    cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
-                                  OEM_CUBE_ONLY,
-                                  NULL, false, false, false,
-                                  page_res_it.row()->row,
-                                  page_res_it.block()->block);
-    CubeObject *cube_obj = lang_tess->cube_recognize_word(
-        page_res_it.block()->block, &cube_word);
-    if (cube_obj != NULL)
-      lang_tess->cube_combine_word(cube_obj, &cube_word, word);
-    delete cube_obj;
-  }
-}
-
-/**
- * @name cube_word_pass1
- *
- * Recognizes a single word using (only) cube. Compatible with
- * Tesseract's classify_word_pass1/classify_word_pass2.
- */
-void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
-  CubeObject *cube_obj = cube_recognize_word(block, word);
-  delete cube_obj;
-}
-
-/**
- * @name cube_recognize_word
- *
- * Cube recognizer to recognize a single word as with classify_word_pass1
- * but also returns the cube object in case the combiner is needed.
- */
-CubeObject* Tesseract::cube_recognize_word(BLOCK* block, WERD_RES* word) {
-  if (!cube_binary_ || !cube_cntxt_) {
-    if (cube_debug_level > 0 && !cube_binary_)
-      tprintf("Tesseract::run_cube(): NULL binary image.\n");
-    word->SetupFake(unicharset);
-    return NULL;
-  }
-  TBOX word_box = word->word->bounding_box();
-  if (block != NULL && (block->re_rotation().x() != 1.0f ||
-        block->re_rotation().y() != 0.0f)) {
-    // TODO(rays) We have to rotate the bounding box to get the true coords.
-    // This will be achieved in the future via DENORM.
-    // In the mean time, cube can't process this word.
-    if (cube_debug_level > 0) {
-      tprintf("Cube can't process rotated word at:");
-      word_box.print();
-    }
-    word->SetupFake(unicharset);
-    return NULL;
-  }
-  CubeObject* cube_obj = new tesseract::CubeObject(
-      cube_cntxt_, cube_binary_, word_box.left(),
-      pixGetHeight(cube_binary_) - word_box.top(),
-      word_box.width(), word_box.height());
-  if (!cube_recognize(cube_obj, block, word)) {
-    delete cube_obj;
-    return NULL;
-  }
-  return cube_obj;
-}
-
-/**
- * @name cube_combine_word
- *
- * Combines the cube and tesseract results for a single word, leaving the
- * result in tess_word.
- */
-void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
-                                  WERD_RES* tess_word) {
-  float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
-                                                            cube_obj);
-  // If combiner probability is greater than tess/cube combiner
-  // classifier threshold, i.e. tesseract wins, then just return the
-  // tesseract result unchanged, as the combiner knows nothing about how
-  // correct the answer is. If cube and tesseract agree, then improve the
-  // scores before returning.
-  WERD_CHOICE* tess_best = tess_word->best_choice;
-  WERD_CHOICE* cube_best = cube_word->best_choice;
-  if (cube_debug_level || classify_debug_level) {
-    tprintf("Combiner prob = %g vs threshold %g\n",
-            combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
-  }
-  if (combiner_prob >=
-      cube_cntxt_->Params()->CombinerClassifierThresh()) {
-    if (tess_best->unichar_string() == cube_best->unichar_string()) {
-      // Cube and tess agree, so improve the scores.
-      tess_best->set_rating(tess_best->rating() / 2);
-      tess_best->set_certainty(tess_best->certainty() / 2);
-    }
-    return;
-  }
-  // Cube wins.
-  // It is better for the language combiner to have all tesseract scores,
-  // so put them in the cube result.
-  cube_best->set_rating(tess_best->rating());
-  cube_best->set_certainty(tess_best->certainty());
-  if (cube_debug_level || classify_debug_level) {
-    tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
-            tess_best->unichar_string().string(),
-            cube_best->unichar_string().string());
-  }
-  tess_word->ConsumeWordResults(cube_word);
-}
-
-/**
- * @name cube_recognize
- *
- * Call cube on the current word, and write the result to word.
- * Sets up a fake result and returns false if something goes wrong.
- */
-bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
-                               WERD_RES *word) {
-  // Run cube
-  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
-  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube returned nothing for word at:");
-      word->word->bounding_box().print();
-    }
-    word->SetupFake(unicharset);
-    return false;
-  }
-
-  // Get cube's best result and its probability, mapped to tesseract's
-  // certainty range
-  char_32 *cube_best_32 = cube_alt_list->Alt(0);
-  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
-  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
-  string cube_best_str;
-  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
-
-  // Retrieve Cube's character bounding boxes and CharSamples,
-  // corresponding to the most recent call to RecognizeWord().
-  Boxa *char_boxes = NULL;
-  CharSamp **char_samples = NULL;;
-  int num_chars;
-  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
-      && cube_debug_level > 0) {
-    tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
-            "cube state.\n");
-    word->SetupFake(unicharset);
-    return false;
-  }
-
-  // Convert cube's character bounding boxes to a BoxWord.
-  BoxWord cube_box_word;
-  TBOX tess_word_box = word->word->bounding_box();
-  if (word->denorm.block() != NULL)
-    tess_word_box.rotate(word->denorm.block()->re_rotation());
-  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
-                                               tess_word_box,
-                                               &cube_box_word);
-  boxaDestroy(&char_boxes);
-  if (!box_word_success) {
-    if (cube_debug_level > 0) {
-      tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
-              "create cube BoxWord\n");
-    }
-    word->SetupFake(unicharset);
-    return false;
-  }
-
-  // Fill tesseract result's fields with cube results
-  fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
-
-  // Create cube's best choice.
-  BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
-  for (int i = 0; i < num_chars; ++i) {
-    UNICHAR_ID uch_id =
-        cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
-    choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
-                                 -1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
-  }
-  word->FakeClassifyWord(num_chars, choices);
-  // within a word, cube recognizes the word in reading order.
-  word->best_choice->set_unichars_in_script_order(true);
-  delete [] choices;
-  delete [] char_samples;
-
-  // Some sanity checks
-  ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
-
-  if (cube_debug_level || classify_debug_level) {
-    tprintf("Cube result: %s r=%g, c=%g\n",
-            word->best_choice->unichar_string().string(),
-            word->best_choice->rating(),
-            word->best_choice->certainty());
-  }
-  return true;
-}
-
-/**
- * @name fill_werd_res
- *
- * Fill Tesseract's word result fields with cube's.
- *
- */
-void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
-                              const char* cube_best_str,
-                              WERD_RES* tess_werd_res) {
-  delete tess_werd_res->box_word;
-  tess_werd_res->box_word = new BoxWord(cube_box_word);
-  tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
-                                              tess_werd_res->word);
-  // Fill text and remaining fields
-  tess_werd_res->word->set_text(cube_best_str);
-  tess_werd_res->tess_failed = FALSE;
-  tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
-  // There is no output word, so we can' call AdaptableWord, but then I don't
-  // think we need to. Fudge the result with accepted.
-  tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
-
-  // Set word to done, i.e., ignore all of tesseract's tests for rejection
-  tess_werd_res->done = tess_werd_res->tess_accepted;
-}
-
-}  // namespace tesseract
--- a/ccmain/cube_reco_context.cpp
+++ b/ccmain/cube_reco_context.cpp
@ -1,184 +0,0 @@
-/**********************************************************************
- * File:        cube_reco_context.cpp
- * Description: Implementation of the Cube Recognition Context Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <string>
-#include <limits.h>
-
-#include "cube_reco_context.h"
-
-#include "classifier_factory.h"
-#include "cube_tuning_params.h"
-#include "dict.h"
-#include "feature_bmp.h"
-#include "tessdatamanager.h"
-#include "tesseractclass.h"
-#include "tess_lang_model.h"
-
-namespace tesseract {
-
-/**
- * Instantiate a CubeRecoContext object using a Tesseract object.
- * CubeRecoContext will not take ownership of tess_obj, but will
- * record the pointer to it and will make use of various Tesseract
- * components (language model, flags, etc). Thus the caller should
- * keep tess_obj alive so long as the instantiated CubeRecoContext is used.
- */
-CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) {
-  tess_obj_ = tess_obj;
-  lang_ = "";
-  loaded_ = false;
-  lang_mod_ = NULL;
-  params_ = NULL;
-  char_classifier_ = NULL;
-  char_set_ = NULL;
-  word_size_model_ = NULL;
-  char_bigrams_ = NULL;
-  word_unigrams_ = NULL;
-  noisy_input_ = false;
-  size_normalization_ = false;
-}
-
-CubeRecoContext::~CubeRecoContext() {
-  delete char_classifier_;
-  char_classifier_ = NULL;
-
-  delete word_size_model_;
-  word_size_model_ = NULL;
-
-  delete char_set_;
-  char_set_ = NULL;
-
-  delete char_bigrams_;
-  char_bigrams_ = NULL;
-
-  delete word_unigrams_;
-  word_unigrams_ = NULL;
-
-  delete lang_mod_;
-  lang_mod_ = NULL;
-
-  delete params_;
-  params_ = NULL;
-}
-
-/**
- * Returns the path of the data files by looking up the TESSDATA_PREFIX
- * environment variable and appending a "tessdata" directory to it
- */
-bool CubeRecoContext::GetDataFilePath(string *path) const {
-  *path = tess_obj_->datadir.string();
-  return true;
-}
-
-/**
- * The object initialization function that loads all the necessary
- * components of a RecoContext.  TessdataManager is used to load the
- * data from [lang].traineddata file.  If TESSDATA_CUBE_UNICHARSET
- * component is present, Cube will be instantiated with the unicharset
- * specified in this component and the corresponding dictionary
- * (TESSDATA_CUBE_SYSTEM_DAWG), and will map Cube's unicharset to
- * Tesseract's. Otherwise, TessdataManager will assume that Cube will
- * be using Tesseract's unicharset and dawgs, and will load the
- * unicharset from the TESSDATA_UNICHARSET component and will load the
- * dawgs from TESSDATA_*_DAWG components.
- */
-bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
-                           UNICHARSET *tess_unicharset) {
-  ASSERT_HOST(tess_obj_ != NULL);
-  tess_unicharset_ = tess_unicharset;
-  string data_file_path;
-
-  // Get the data file path.
-  if (GetDataFilePath(&data_file_path) == false) {
-    fprintf(stderr, "Unable to get data file path\n");
-    return false;
-  }
-
-  // Get the language from the Tesseract object.
-  lang_ = tess_obj_->lang.string();
-
-  // Create the char set.
-  if ((char_set_ =
-       CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
-            "CharSet\n");
-    return false;
-  }
-  // Create the language model.
-  string lm_file_name = data_file_path + lang_ + ".cube.lm";
-  string lm_params;
-  if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
-            "language model params from %s\n", lm_file_name.c_str());
-    return false;
-  }
-  lang_mod_ = new TessLangModel(lm_params, data_file_path,
-                                tess_obj_->getDict().load_system_dawg,
-                                tessdata_manager, this);
-
-  // Create the optional char bigrams object.
-  char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
-
-  // Create the optional word unigrams object.
-  word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
-
-  // Create the optional size model.
-  word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
-    char_set_, Contextual());
-
-  // Load tuning params.
-  params_ = CubeTuningParams::Create(data_file_path, lang_);
-  if (params_ == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
-            "CubeTuningParams from %s\n", data_file_path.c_str());
-    return false;
-  }
-
-  // Create the char classifier.
-  char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
-                                                   lang_mod_, char_set_,
-                                                   params_);
-  if (char_classifier_ == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
-            "CharClassifierFactory object from %s\n", data_file_path.c_str());
-    return false;
-  }
-
-  loaded_ = true;
-
-  return true;
-}
-
-/** Creates a CubeRecoContext object using a tesseract object */
-CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj,
-                                          TessdataManager *tessdata_manager,
-                                          UNICHARSET *tess_unicharset) {
-  // create the object
-  CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
-  // load the necessary components
-  if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
-    fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
-            "CubeRecoContext object\n");
-    delete cntxt;
-    return NULL;
-  }
-  // success
-  return cntxt;
-}
-}  // tesseract}
--- a/ccmain/cube_reco_context.h
+++ b/ccmain/cube_reco_context.h
@ -1,157 +0,0 @@
-/**********************************************************************
- * File:        cube_reco_context.h
- * Description: Declaration of the Cube Recognition Context Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
-// (or a thread) would create one CubeRecoContext object per language.
-// The CubeRecoContext object also provides methods to get and set the
-// different attribues of the Cube OCR Engine.
-
-#ifndef CUBE_RECO_CONTEXT_H
-#define CUBE_RECO_CONTEXT_H
-
-#include <string>
-#include "neural_net.h"
-#include "lang_model.h"
-#include "classifier_base.h"
-#include "feature_base.h"
-#include "char_set.h"
-#include "word_size_model.h"
-#include "char_bigrams.h"
-#include "word_unigrams.h"
-
-namespace tesseract {
-
-class Tesseract;
-class TessdataManager;
-
-class CubeRecoContext {
- public:
-  // Reading order enum type
-  enum ReadOrder {
-   L2R,
-   R2L
-  };
-
-  // Instantiate using a Tesseract object
-  CubeRecoContext(Tesseract *tess_obj);
-
-  ~CubeRecoContext();
-
-  // accessor functions
-  inline const string & Lang() const { return lang_; }
-  inline CharSet *CharacterSet() const { return char_set_; }
-  const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
-  inline CharClassifier *Classifier() const { return char_classifier_; }
-  inline WordSizeModel *SizeModel() const { return word_size_model_; }
-  inline CharBigrams *Bigrams() const { return char_bigrams_; }
-  inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
-  inline TuningParams *Params() const { return params_; }
-  inline LangModel *LangMod() const { return lang_mod_; }
-
-  // the reading order of the language
-  inline ReadOrder ReadingOrder() const {
-    return ((lang_ == "ara") ? R2L : L2R);
-  }
-
-  // does the language support case
-  inline bool HasCase() const {
-    return (lang_ != "ara" && lang_ != "hin");
-  }
-
-  inline bool Cursive() const {
-    return (lang_ == "ara");
-  }
-
-  inline bool HasItalics() const {
-    return (lang_ != "ara" && lang_ != "hin");
-  }
-
-  inline bool Contextual() const {
-    return (lang_ == "ara");
-  }
-
-  // RecoContext runtime flags accessor functions
-  inline bool SizeNormalization() const { return size_normalization_; }
-  inline bool NoisyInput() const { return noisy_input_; }
-  inline bool OOD() const { return lang_mod_->OOD(); }
-  inline bool Numeric() const { return lang_mod_->Numeric(); }
-  inline bool WordList() const { return lang_mod_->WordList(); }
-  inline bool Punc() const { return lang_mod_->Punc(); }
-  inline bool CaseSensitive() const {
-    return char_classifier_->CaseSensitive();
-  }
-
-  inline void SetSizeNormalization(bool size_normalization) {
-    size_normalization_ = size_normalization;
-  }
-  inline void SetNoisyInput(bool noisy_input) {
-    noisy_input_ = noisy_input;
-  }
-  inline void SetOOD(bool ood_enabled) {
-    lang_mod_->SetOOD(ood_enabled);
-  }
-  inline void SetNumeric(bool numeric_enabled) {
-    lang_mod_->SetNumeric(numeric_enabled);
-  }
-  inline void SetWordList(bool word_list_enabled) {
-    lang_mod_->SetWordList(word_list_enabled);
-  }
-  inline void SetPunc(bool punc_enabled) {
-    lang_mod_->SetPunc(punc_enabled);
-  }
-  inline void SetCaseSensitive(bool case_sensitive) {
-    char_classifier_->SetCaseSensitive(case_sensitive);
-  }
-  inline tesseract::Tesseract *TesseractObject() const {
-    return tess_obj_;
-  }
-
-  // Returns the path of the data files
-  bool GetDataFilePath(string *path) const;
-  // Creates a CubeRecoContext object using a tesseract object. Data
-  // files are loaded via the tessdata_manager, and the tesseract
-  // unicharset is provided in order to map Cube's unicharset to
-  // Tesseract's in the case where the two unicharsets differ.
-  static CubeRecoContext *Create(Tesseract *tess_obj,
-                                 TessdataManager *tessdata_manager,
-                                 UNICHARSET *tess_unicharset);
-
- private:
-  bool loaded_;
-  string lang_;
-  CharSet *char_set_;
-  UNICHARSET *tess_unicharset_;
-  WordSizeModel *word_size_model_;
-  CharClassifier *char_classifier_;
-  CharBigrams *char_bigrams_;
-  WordUnigrams *word_unigrams_;
-  TuningParams *params_;
-  LangModel *lang_mod_;
-  Tesseract *tess_obj_;  // CubeRecoContext does not own this pointer
-  bool size_normalization_;
-  bool noisy_input_;
-
-  // Loads and initialized all the necessary components of a
-  // CubeRecoContext. See .cpp for more details.
-  bool Load(TessdataManager *tessdata_manager,
-            UNICHARSET *tess_unicharset);
-};
-}
-
-#endif  // CUBE_RECO_CONTEXT_H
--- a/ccmain/cubeclassifier.cpp
+++ b/ccmain/cubeclassifier.cpp
@ -1,134 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-// Author: rays@google.com (Ray Smith)
-///////////////////////////////////////////////////////////////////////
-// File:        cubeclassifier.cpp
-// Description: Cube implementation of a ShapeClassifier.
-// Author:      Ray Smith
-// Created:     Wed Nov 23 10:39:45 PST 2011
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#include "cubeclassifier.h"
-
-#include "char_altlist.h"
-#include "char_set.h"
-#include "cube_object.h"
-#include "cube_reco_context.h"
-#include "tessclassifier.h"
-#include "tesseractclass.h"
-#include "trainingsample.h"
-#include "unicharset.h"
-
-namespace tesseract {
-
-CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
-    : cube_cntxt_(tesseract->GetCubeRecoContext()),
-      shape_table_(*tesseract->shape_table()) {
-}
-CubeClassifier::~CubeClassifier() {
-}
-
-/// Classifies the given [training] sample, writing to results.
-/// See ShapeClassifier for a full description.
-int CubeClassifier::UnicharClassifySample(
-    const TrainingSample& sample, Pix* page_pix, int debug,
-    UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
-  results->clear();
-  if (page_pix == NULL) return 0;
-
-  ASSERT_HOST(cube_cntxt_ != NULL);
-  const TBOX& char_box = sample.bounding_box();
-  CubeObject* cube_obj = new tesseract::CubeObject(
-      cube_cntxt_, page_pix, char_box.left(),
-      pixGetHeight(page_pix) - char_box.top(),
-      char_box.width(), char_box.height());
-  CharAltList* alt_list = cube_obj->RecognizeChar();
-  if (alt_list != NULL) {
-    alt_list->Sort();
-    CharSet* char_set = cube_cntxt_->CharacterSet();
-    for (int i = 0; i < alt_list->AltCount(); ++i) {
-      // Convert cube representation to a shape_id.
-      int alt_id = alt_list->Alt(i);
-      int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
-      if (unichar_id >= 0)
-        results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
-    }
-    delete alt_list;
-  }
-  delete cube_obj;
-  return results->size();
-}
-
-/** Provides access to the ShapeTable that this classifier works with. */
-const ShapeTable* CubeClassifier::GetShapeTable() const {
-  return &shape_table_;
-}
-
-CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
-    : cube_cntxt_(tesseract->GetCubeRecoContext()),
-      shape_table_(*tesseract->shape_table()),
-      pruner_(new TessClassifier(true, tesseract)) {
-}
-CubeTessClassifier::~CubeTessClassifier() {
-  delete pruner_;
-}
-
-/// Classifies the given [training] sample, writing to results.
-/// See ShapeClassifier for a full description.
-int CubeTessClassifier::UnicharClassifySample(
-    const TrainingSample& sample, Pix* page_pix, int debug,
-    UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
-  int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
-                                                   keep_this, results);
-  if (page_pix == NULL) return num_results;
-
-  ASSERT_HOST(cube_cntxt_ != NULL);
-  const TBOX& char_box = sample.bounding_box();
-  CubeObject* cube_obj = new tesseract::CubeObject(
-      cube_cntxt_, page_pix, char_box.left(),
-      pixGetHeight(page_pix) - char_box.top(),
-      char_box.width(), char_box.height());
-  CharAltList* alt_list = cube_obj->RecognizeChar();
-  CharSet* char_set = cube_cntxt_->CharacterSet();
-  if (alt_list != NULL) {
-    for (int r = 0; r < num_results; ++r) {
-      // Get the best cube probability of the unichar in the result.
-      double best_prob = 0.0;
-      for (int i = 0; i < alt_list->AltCount(); ++i) {
-        int alt_id = alt_list->Alt(i);
-        int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
-        if (unichar_id == (*results)[r].unichar_id &&
-            alt_list->AltProb(i) > best_prob) {
-          best_prob = alt_list->AltProb(i);
-        }
-      }
-      (*results)[r].rating = best_prob;
-    }
-    delete alt_list;
-    // Re-sort by rating.
-    results->sort(&UnicharRating::SortDescendingRating);
-  }
-  delete cube_obj;
-  return results->size();
-}
-
-/** Provides access to the ShapeTable that this classifier works with. */
-const ShapeTable* CubeTessClassifier::GetShapeTable() const {
-  return &shape_table_;
-}
-
-}  // namespace tesseract
-
-
-
--- a/ccmain/cubeclassifier.h
+++ b/ccmain/cubeclassifier.h
@ -1,80 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-// Author: rays@google.com (Ray Smith)
-///////////////////////////////////////////////////////////////////////
-// File:        cubeclassifier.h
-// Description: Cube implementation of a ShapeClassifier.
-// Author:      Ray Smith
-// Created:     Wed Nov 23 10:36:32 PST 2011
-//
-// (C) Copyright 2011, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#ifndef THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
-#define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_
-
-#include "shapeclassifier.h"
-
-namespace tesseract {
-
-class Classify;
-class CubeRecoContext;
-class ShapeTable;
-class TessClassifier;
-class Tesseract;
-class TrainingSample;
-struct UnicharRating;
-
-// Cube implementation of a ShapeClassifier.
-class CubeClassifier : public ShapeClassifier {
- public:
-  explicit CubeClassifier(Tesseract* tesseract);
-  virtual ~CubeClassifier();
-
-  // Classifies the given [training] sample, writing to results.
-  // See ShapeClassifier for a full description.
-  virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
-                                    int debug, UNICHAR_ID keep_this,
-                                    GenericVector<UnicharRating>* results);
-  // Provides access to the ShapeTable that this classifier works with.
-  virtual const ShapeTable* GetShapeTable() const;
-
- private:
-  // Cube objects.
-  CubeRecoContext* cube_cntxt_;
-  const ShapeTable& shape_table_;
-};
-
-// Combination of Tesseract class pruner with scoring by cube.
-class CubeTessClassifier : public ShapeClassifier {
- public:
-  explicit CubeTessClassifier(Tesseract* tesseract);
-  virtual ~CubeTessClassifier();
-
-  // Classifies the given [training] sample, writing to results.
-  // See ShapeClassifier for a full description.
-  virtual int UnicharClassifySample(const TrainingSample& sample, Pix* page_pix,
-                                    int debug, UNICHAR_ID keep_this,
-                                    GenericVector<UnicharRating>* results);
-  // Provides access to the ShapeTable that this classifier works with.
-  virtual const ShapeTable* GetShapeTable() const;
-
- private:
-  // Cube objects.
-  CubeRecoContext* cube_cntxt_;
-  const ShapeTable& shape_table_;
-  TessClassifier* pruner_;
-};
-
-}  // namespace tesseract
-
-#endif /* THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ */
--- a/cube/Makefile.am
+++ b/cube/Makefile.am
@ -1,55 +0,0 @@
-AM_CPPFLAGS += \
-    -DUSE_STD_NAMESPACE \
-    -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
-    -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \
-    -I$(top_srcdir)/ccmain -I$(top_srcdir)/classify \
-    -I$(top_srcdir)/textord -I$(top_srcdir)/wordrec \
-    -I$(top_srcdir)/neural_networks/runtime \
-    -I$(top_srcdir)/viewer
-        
-if VISIBILITY
-AM_CPPFLAGS += -DTESS_EXPORTS \
-    -fvisibility=hidden -fvisibility-inlines-hidden
-endif
-
-noinst_HEADERS = \
-    altlist.h beam_search.h bmp_8.h cached_file.h \
-    char_altlist.h char_bigrams.h char_samp.h char_samp_enum.h \
-    char_samp_set.h char_set.h classifier_base.h classifier_factory.h \
-    con_comp.h cube_const.h conv_net_classifier.h cube_line_object.h \
-    cube_line_segmenter.h cube_object.h cube_search_object.h \
-    cube_tuning_params.h cube_utils.h feature_base.h feature_bmp.h \
-    feature_chebyshev.h feature_hybrid.h hybrid_neural_net_classifier.h \
-    lang_mod_edge.h lang_model.h search_column.h search_node.h \
-    search_object.h string_32.h tess_lang_mod_edge.h tess_lang_model.h \
-    tuning_params.h word_altlist.h word_list_lang_model.h word_size_model.h \
-    word_unigrams.h
-
-if !USING_MULTIPLELIBS
-noinst_LTLIBRARIES = libtesseract_cube.la
-else
-lib_LTLIBRARIES = libtesseract_cube.la
-libtesseract_cube_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
-libtesseract_cube_la_LIBADD = \
-    ../ccstruct/libtesseract_ccstruct.la \
-    ../ccutil/libtesseract_ccutil.la \
-    ../neural_networks/runtime/libtesseract_neural.la \
-    ../viewer/libtesseract_viewer.la \
-    ../wordrec/libtesseract_wordrec.la \
-    ../cutil/libtesseract_cutil.la \
-    ../classify/libtesseract_classify.la \
-    ../dict/libtesseract_dict.la 
-endif
-
-libtesseract_cube_la_SOURCES = \
-    altlist.cpp beam_search.cpp bmp_8.cpp cached_file.cpp \
-    char_altlist.cpp char_bigrams.cpp char_samp.cpp char_samp_enum.cpp \
-    char_samp_set.cpp char_set.cpp classifier_factory.cpp \
-    con_comp.cpp conv_net_classifier.cpp cube_line_object.cpp \
-    cube_line_segmenter.cpp cube_object.cpp cube_search_object.cpp \
-    cube_tuning_params.cpp cube_utils.cpp feature_bmp.cpp \
-    feature_chebyshev.cpp feature_hybrid.cpp hybrid_neural_net_classifier.cpp \
-    search_column.cpp search_node.cpp \
-    tess_lang_mod_edge.cpp tess_lang_model.cpp \
-    word_altlist.cpp word_list_lang_model.cpp word_size_model.cpp \
-    word_unigrams.cpp
--- a/cube/altlist.cpp
+++ b/cube/altlist.cpp
@ -1,60 +0,0 @@
-/**********************************************************************
- * File:        alt_list.cpp
- * Description: Class to abstarct a list of alternate results
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "altlist.h"
-#include <stdlib.h>
-
-namespace tesseract {
-
-AltList::AltList(int max_alt) {
-  max_alt_ = max_alt;
-  alt_cnt_ = 0;
-  alt_cost_ = NULL;
-  alt_tag_ = NULL;
-}
-
-AltList::~AltList() {
-  if (alt_cost_ != NULL) {
-    delete []alt_cost_;
-    alt_cost_ = NULL;
-  }
-
-  if (alt_tag_ != NULL) {
-    delete []alt_tag_;
-    alt_tag_ = NULL;
-  }
-}
-
-// return the best possible cost and index of corresponding alternate
-int AltList::BestCost(int *best_alt) const {
-  if (alt_cnt_ <= 0) {
-    (*best_alt) = -1;
-    return -1;
-  }
-
-  int best_alt_idx = 0;
-  for (int alt_idx = 1; alt_idx < alt_cnt_; alt_idx++) {
-    if (alt_cost_[alt_idx] < alt_cost_[best_alt_idx]) {
-      best_alt_idx = alt_idx;
-    }
-  }
-  (*best_alt) = best_alt_idx;
-  return alt_cost_[best_alt_idx];
-}
-}
--- a/cube/altlist.h
+++ b/cube/altlist.h
@ -1,61 +0,0 @@
-/**********************************************************************
- * File:        alt_list.h
- * Description: Class to abstarct a list of alternate results
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The AltList class is the base class for the list of alternate recognition
-// results. Each alternate has a cost an an optional tag associated with it
-
-#ifndef ALT_LIST_H
-#define ALT_LIST_H
-
-#include <math.h>
-#include "cube_utils.h"
-
-namespace tesseract {
-class AltList {
- public:
-  explicit AltList(int max_alt);
-  virtual ~AltList();
-  // sort the list of alternates based
-  virtual void Sort() = 0;
-  // return the best possible cost and index of corresponding alternate
-  int BestCost (int *best_alt) const;
-  // return the count of alternates
-  inline int AltCount() const { return alt_cnt_; }
-  // returns the cost (-ve log prob) of an alternate
-  inline int AltCost(int alt_idx) const { return alt_cost_[alt_idx]; }
-  // returns the prob of an alternate
-  inline double AltProb(int alt_idx) const {
-    return CubeUtils::Cost2Prob(AltCost(alt_idx));
-  }
-  // returns the alternate tag
-  inline void *AltTag(int alt_idx) const { return alt_tag_[alt_idx]; }
-
- protected:
-  // max number of alternates the list can hold
-  int max_alt_;
-  // actual alternate count
-  int alt_cnt_;
-  // array of alternate costs
-  int *alt_cost_;
-  // array of alternate tags
-  void **alt_tag_;
-};
-}
-
-#endif  // ALT_LIST_H
--- a/cube/beam_search.cpp
+++ b/cube/beam_search.cpp
@ -1,470 +0,0 @@
-/**********************************************************************
- * File:        beam_search.cpp
- * Description: Class to implement Beam Word Search Algorithm
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <algorithm>
-
-#include "beam_search.h"
-#include "tesseractclass.h"
-
-namespace tesseract {
-
-BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) {
-  cntxt_ = cntxt;
-  seg_pt_cnt_ = 0;
-  col_cnt_ = 1;
-  col_ = NULL;
-  word_mode_ = word_mode;
-}
-
-// Cleanup the lattice corresponding to the last search
-void BeamSearch::Cleanup() {
-  if (col_ != NULL) {
-    for (int col = 0; col < col_cnt_; col++) {
-      delete col_[col];
-    }
-    delete []col_;
-  }
-  col_ = NULL;
-}
-
-BeamSearch::~BeamSearch() {
-  Cleanup();
-}
-
-// Creates a set of children nodes emerging from a parent node based on
-// the character alternate list and the language model.
-void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
-                                SearchNode *parent_node,
-                                LangModEdge *lm_parent_edge,
-                                CharAltList *char_alt_list, int extra_cost) {
-  // get all the edges from this parent
-  int edge_cnt;
-  LangModEdge **lm_edges = lang_mod->GetEdges(char_alt_list,
-                                              lm_parent_edge, &edge_cnt);
-  if (lm_edges) {
-    // add them to the ending column with the appropriate parent
-    for (int edge = 0; edge < edge_cnt; edge++) {
-      // add a node to the column if the current column is not the
-      // last one, or if the lang model edge indicates it is valid EOW
-      if (!cntxt_->NoisyInput() && out_col->ColIdx() >= seg_pt_cnt_ &&
-          !lm_edges[edge]->IsEOW()) {
-        // free edge since no object is going to own it
-        delete lm_edges[edge];
-        continue;
-      }
-
-      // compute the recognition cost of this node
-      int recognition_cost =  MIN_PROB_COST;
-      if (char_alt_list && char_alt_list->AltCount() > 0) {
-        recognition_cost = MAX(0, char_alt_list->ClassCost(
-            lm_edges[edge]->ClassID()));
-        // Add the no space cost. This should zero in word mode
-        recognition_cost += extra_cost;
-      }
-
-      // Note that the edge will be freed inside the column if
-      // AddNode is called
-      if (recognition_cost >= 0) {
-        out_col->AddNode(lm_edges[edge], recognition_cost, parent_node,
-                         cntxt_);
-      } else {
-        delete lm_edges[edge];
-      }
-    }  // edge
-    // free edge array
-    delete []lm_edges;
-  }  // lm_edges
-}
-
-// Performs a beam search in the specified search using the specified
-// language model; returns an alternate list of possible words as a result.
-WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
-  // verifications
-  if (!lang_mod)
-    lang_mod = cntxt_->LangMod();
-  if (!lang_mod) {
-    fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
-            "LangModel\n");
-    return NULL;
-  }
-
-  // free existing state
-  Cleanup();
-
-  // get seg pt count
-  seg_pt_cnt_ = srch_obj->SegPtCnt();
-  if (seg_pt_cnt_ < 0) {
-    return NULL;
-  }
-  col_cnt_ = seg_pt_cnt_ + 1;
-
-  // disregard suspicious cases
-  if (seg_pt_cnt_ > 128) {
-    fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
-            "suspiciously high; bailing out\n");
-    return NULL;
-  }
-
-  // alloc memory for columns
-  col_ = new SearchColumn *[col_cnt_];
-  memset(col_, 0, col_cnt_ * sizeof(*col_));
-
-  // for all possible segments
-  for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
-    // create a search column
-    col_[end_seg - 1] = new SearchColumn(end_seg - 1,
-                                         cntxt_->Params()->BeamWidth());
-
-    // for all possible start segments
-    int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
-    for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
-      int parent_nodes_cnt;
-      SearchNode **parent_nodes;
-
-      // for the root segment, we do not have a parent
-      if (strt_seg == 0) {
-        parent_nodes_cnt = 1;
-        parent_nodes = NULL;
-      } else {
-        // for all the existing nodes in the starting column
-        parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
-        parent_nodes = col_[strt_seg - 1]->Nodes();
-      }
-
-      // run the shape recognizer
-      CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
-                                                              end_seg - 1);
-      // for all the possible parents
-      for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
-        // point to the parent node
-        SearchNode *parent_node = !parent_nodes ? NULL
-            : parent_nodes[parent_idx];
-        LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
-            : parent_node->LangModelEdge();
-
-        // compute the cost of not having spaces within the segment range
-        int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);
-
-        // In phrase mode, compute the cost of not having a space before
-        // this character
-        int no_space_cost = 0;
-        if (!word_mode_ && strt_seg > 0) {
-          no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
-        }
-
-        // if the no space cost is low enough
-        if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
-          // Add the children nodes
-          CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
-                         lm_parent_edge, char_alt_list,
-                         contig_cost + no_space_cost);
-        }
-
-        // In phrase mode and if not starting at the root
-        if (!word_mode_ && strt_seg > 0) {  // parent_node must be non-NULL
-          // consider starting a new word for nodes that are valid EOW
-          if (parent_node->LangModelEdge()->IsEOW()) {
-            // get the space cost
-            int space_cost = srch_obj->SpaceCost(strt_seg - 1);
-            // if the space cost is low enough
-            if ((contig_cost + space_cost) < MIN_PROB_COST) {
-              // Restart the language model and add nodes as children to the
-              // space node.
-              CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
-                             char_alt_list, contig_cost + space_cost);
-            }
-          }
-        }
-      }  // parent
-    }  // strt_seg
-
-    // prune the column nodes
-    col_[end_seg - 1]->Prune();
-
-    // Free the column hash table. No longer needed
-    col_[end_seg - 1]->FreeHashTable();
-  }  // end_seg
-
-  WordAltList *alt_list = CreateWordAltList(srch_obj);
-  return alt_list;
-}
-
-// Creates a Word alternate list from the results in the lattice.
-WordAltList *BeamSearch::CreateWordAltList(SearchObject *srch_obj) {
-  // create an alternate list of all the nodes in the last column
-  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
-  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
-  CharBigrams *bigrams = cntxt_->Bigrams();
-  WordUnigrams *word_unigrams = cntxt_->WordUnigramsObj();
-
-  // Save the index of the best-cost node before the alt list is
-  // sorted, so that we can retrieve it from the node list when backtracking.
-  best_presorted_node_idx_ = 0;
-  int best_cost = -1;
-
-  if (node_cnt <= 0)
-    return NULL;
-
-  // start creating the word alternate list
-  WordAltList *alt_list = new WordAltList(node_cnt + 1);
-  for (int node_idx = 0; node_idx < node_cnt; node_idx++) {
-    // recognition cost
-    int recognition_cost = srch_nodes[node_idx]->BestCost();
-    // compute the size cost of the alternate
-    char_32 *ch_buff = NULL;
-    int size_cost = SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff);
-    // accumulate other costs
-    if (ch_buff) {
-      int cost = 0;
-      // char bigram cost
-      int bigram_cost = !bigrams ? 0 :
-          bigrams->Cost(ch_buff, cntxt_->CharacterSet());
-      // word unigram cost
-      int unigram_cost = !word_unigrams ? 0 :
-          word_unigrams->Cost(ch_buff, cntxt_->LangMod(),
-                              cntxt_->CharacterSet());
-      // overall cost
-      cost = static_cast<int>(
-          (size_cost * cntxt_->Params()->SizeWgt()) +
-          (bigram_cost * cntxt_->Params()->CharBigramWgt()) +
-          (unigram_cost * cntxt_->Params()->WordUnigramWgt()) +
-          (recognition_cost * cntxt_->Params()->RecoWgt()));
-
-      // insert into word alt list
-      alt_list->Insert(ch_buff, cost,
-                       static_cast<void *>(srch_nodes[node_idx]));
-      // Note that strict < is necessary because WordAltList::Sort()
-      // uses it in a bubble sort to swap entries.
-      if (best_cost < 0 || cost < best_cost) {
-        best_presorted_node_idx_ = node_idx;
-        best_cost = cost;
-      }
-      delete []ch_buff;
-    }
-  }
-
-  // sort the alternates based on cost
-  alt_list->Sort();
-  return alt_list;
-}
-
-// Returns the lattice column corresponding to the specified column index.
-SearchColumn *BeamSearch::Column(int col) const {
-  if (col < 0 || col >= col_cnt_ || !col_)
-    return NULL;
-  return col_[col];
-}
-
-// Returns the best node in the last column of last performed search.
-SearchNode *BeamSearch::BestNode() const {
-  if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
-    return NULL;
-
-  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
-  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
-  if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
-    return NULL;
-  return srch_nodes[0];
-}
-
-// Returns the string corresponding to the specified alt.
-char_32 *BeamSearch::Alt(int alt) const {
-  // get the last column of the lattice
-  if (col_cnt_ <= 0)
-    return NULL;
-
-  SearchColumn *srch_col = col_[col_cnt_ - 1];
-  if (!srch_col)
-    return NULL;
-
-  // point to the last node in the selected path
-  if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
-    return NULL;
-  }
-
-  SearchNode *srch_node = srch_col->Nodes()[alt];
-  if (!srch_node)
-    return  NULL;
-
-  // get string
-  char_32 *str32 = srch_node->PathString();
-  if (!str32)
-    return NULL;
-
-  return str32;
-}
-
-// Backtracks from the specified node index and returns the corresponding
-// character mapped segments and character count. Optional return
-// arguments are the char_32 result string and character bounding
-// boxes, if non-NULL values are passed in.
-CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, int node_index,
-                                 int *char_cnt, char_32 **str32,
-                                 Boxa **char_boxes) const {
-  // get the last column of the lattice
-  if (col_cnt_ <= 0)
-    return NULL;
-  SearchColumn *srch_col = col_[col_cnt_ - 1];
-  if (!srch_col)
-    return NULL;
-
-  // point to the last node in the selected path
-  if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
-    return NULL;
-
-  SearchNode *srch_node = srch_col->Nodes()[node_index];
-  if (!srch_node)
-    return NULL;
-  return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
-}
-
-// Backtracks from the specified node index and returns the corresponding
-// character mapped segments and character count. Optional return
-// arguments are the char_32 result string and character bounding
-// boxes, if non-NULL values are passed in.
-CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node,
-                                 int *char_cnt, char_32 **str32,
-                                 Boxa **char_boxes) const {
-  if (!srch_node)
-    return NULL;
-
-  if (str32) {
-    delete [](*str32);  // clear existing value
-    *str32 = srch_node->PathString();
-    if (!*str32)
-      return NULL;
-  }
-
-  if (char_boxes && *char_boxes) {
-    boxaDestroy(char_boxes);  // clear existing value
-  }
-
-  CharSamp **chars;
-  chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
-  if (!chars && str32)
-    delete []*str32;
-  return chars;
-}
-
-// Backtracks from the given lattice node and return the corresponding
-// char mapped segments and character count. The character bounding
-// boxes are optional return arguments, if non-NULL values are passed in.
-CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj,
-                                   SearchNode *srch_node,
-                                   int *char_cnt,
-                                   Boxa **char_boxes) const {
-  // Count the characters (could be less than the path length when in
-  // phrase mode)
-  *char_cnt = 0;
-  SearchNode *node = srch_node;
-  while (node) {
-    node = node->ParentNode();
-    (*char_cnt)++;
-  }
-
-  if (*char_cnt == 0)
-    return NULL;
-
-  // Allocate box array
-  if (char_boxes) {
-    if (*char_boxes)
-      boxaDestroy(char_boxes);  // clear existing value
-    *char_boxes = boxaCreate(*char_cnt);
-    if (*char_boxes == NULL)
-      return NULL;
-  }
-
-  // Allocate memory for CharSamp array.
-  CharSamp **chars = new CharSamp *[*char_cnt];
-
-  int ch_idx = *char_cnt - 1;
-  int seg_pt_cnt = srch_obj->SegPtCnt();
-  bool success=true;
-  while (srch_node && ch_idx >= 0) {
-    // Parent node (could be null)
-    SearchNode *parent_node = srch_node->ParentNode();
-
-    // Get the seg pts corresponding to the search node
-    int st_col = !parent_node ? 0 : parent_node->ColIdx() + 1;
-    int st_seg_pt = st_col <= 0 ? -1 : st_col - 1;
-    int end_col = srch_node->ColIdx();
-    int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col;
-
-    // Get a char sample corresponding to the segmentation points
-    CharSamp *samp = srch_obj->CharSample(st_seg_pt, end_seg_pt);
-    if (!samp) {
-      success = false;
-      break;
-    }
-    samp->SetLabel(srch_node->NodeString());
-    chars[ch_idx] = samp;
-    if (char_boxes) {
-      // Create the corresponding character bounding box
-      Box *char_box = boxCreate(samp->Left(), samp->Top(),
-                                samp->Width(), samp->Height());
-      if (!char_box) {
-        success = false;
-        break;
-      }
-      boxaAddBox(*char_boxes, char_box, L_INSERT);
-    }
-    srch_node = parent_node;
-    ch_idx--;
-  }
-  if (!success) {
-    delete []chars;
-    if (char_boxes)
-      boxaDestroy(char_boxes);
-    return NULL;
-  }
-
-  // Reverse the order of boxes.
-  if (char_boxes) {
-    int char_boxa_size = boxaGetCount(*char_boxes);
-    int limit = char_boxa_size / 2;
-    for (int i = 0; i < limit; ++i) {
-      int box1_idx = i;
-      int box2_idx = char_boxa_size - 1 - i;
-      Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE);
-      Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE);
-      boxaReplaceBox(*char_boxes, box2_idx, box1);
-      boxaReplaceBox(*char_boxes, box1_idx, box2);
-    }
-  }
-  return chars;
-}
-
-// Returns the size cost of a string for a lattice path that
-// ends at the specified lattice node.
-int BeamSearch::SizeCost(SearchObject *srch_obj, SearchNode *node,
-                         char_32 **str32) const {
-  CharSamp **chars = NULL;
-  int char_cnt = 0;
-  if (!node)
-    return 0;
-  // Backtrack to get string and character segmentation
-  chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
-  if (!chars)
-    return WORST_COST;
-  int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
-      cntxt_->SizeModel()->Cost(chars, char_cnt);
-  delete []chars;
-  return size_cost;
-}
-}  // namespace tesesract
--- a/cube/beam_search.h
+++ b/cube/beam_search.h
@ -1,126 +0,0 @@
-/**********************************************************************
- * File:        beam_search.h
- * Description: Declaration of Beam Word Search Algorithm Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The Beam Search class implements a Beam Search algorithm for the
-// N-best paths through the lattice of a search object using a language model
-// The search object is a segmented bitmap of a word image. The language model
-// is a state machine that defines valid sequences of characters
-// The cost of each path is the combined (product) probabilities of the
-// characters along the path. The character probabilities are computed using
-// the character classifier member of the RecoContext
-// The BeamSearch class itself holds the state of the last search it performed
-// using its "Search" method. Subsequent class to the Search method erase the
-// states of previously done searches
-
-#ifndef BEAM_SEARCH_H
-#define BEAM_SEARCH_H
-
-#include "search_column.h"
-#include "word_altlist.h"
-#include "search_object.h"
-#include "lang_model.h"
-#include "cube_utils.h"
-#include "cube_reco_context.h"
-#include "allheaders.h"
-
-namespace tesseract {
-
-class BeamSearch {
- public:
-  explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
-  ~BeamSearch();
-  // Performs a beam search in the specified search using the specified
-  // language model; returns an alternate list of possible words as a result.
-  WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
-  // Returns the best node in the last column of last performed search.
-  SearchNode *BestNode() const;
-  // Returns the string corresponding to the specified alt.
-  char_32 *Alt(int alt) const;
-  // Backtracks from the specified lattice node and returns the corresponding
-  // character-mapped segments, character count, char_32 result string, and
-  // character bounding boxes (if char_boxes is not NULL). If the segments
-  // cannot be constructed, returns NULL, and all result arguments
-  // will be NULL.
-  CharSamp **BackTrack(SearchObject *srch_obj, int node_index,
-                       int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
-  // Same as above, except it takes a pointer to a search node object
-  // instead of node index.
-  CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node,
-                       int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
-  // Returns the size cost of a specified string of a lattice
-  // path that ends at the specified lattice node.
-  int SizeCost(SearchObject *srch_obj, SearchNode *node,
-               char_32 **str32 = NULL) const;
-  // Returns the word unigram cost of the given string, possibly
-  // stripping out a single trailing punctuation character.
-  int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const;
-
-  // Supplementary functions needed for visualization
-  // Return column count of the lattice.
-  inline int ColCnt() const { return col_cnt_; }
-  // Returns the lattice column corresponding to the specified column index.
-  SearchColumn *Column(int col_idx) const;
-  // Return the index of the best node in the last column of the
-  // best-cost path before the alternates list is sorted.
-  inline int BestPresortedNodeIndex() const {
-    return best_presorted_node_idx_;
-  }
-
- private:
-  // Maximum reasonable segmentation point count
-  static const int kMaxSegPointCnt = 128;
-  // Recognition context object; the context holds the character classifier
-  // and the tuning parameters object
-  CubeRecoContext *cntxt_;
-  // Count of segmentation pts
-  int seg_pt_cnt_;
-  // Lattice column count; currently redundant with respect to seg_pt_cnt_
-  // but that might change in the future
-  int col_cnt_;
-  // Array of lattice columns
-  SearchColumn **col_;
-  // Run in word or phrase mode
-  bool word_mode_;
-  // Node index of best-cost node, before alternates are merged and sorted
-  int best_presorted_node_idx_;
-  // Cleans up beam search state
-  void Cleanup();
-  // Creates a Word alternate list from the results in the lattice.
-  // This function computes a cost for each node in the final column
-  // of the lattice, which is a weighted average of several costs:
-  // size cost, character bigram cost, word unigram cost, and
-  // recognition cost from the beam search. The weights are the
-  // CubeTuningParams, which are learned together with the character
-  // classifiers.
-  WordAltList *CreateWordAltList(SearchObject *srch_obj);
-  // Creates a set of children nodes emerging from a parent node based on
-  // the character alternate list and the language model.
-  void CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
-                      SearchNode *parent_node, LangModEdge *lm_parent_edge,
-                      CharAltList *char_alt_list, int extra_cost);
-  // Backtracks from the given lattice node and returns the corresponding
-  // char mapped segments, character count, and character bounding boxes (if
-  // char_boxes is not NULL). If the segments cannot be constructed,
-  // returns NULL, and all result arguments will be NULL.
-  CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node,
-                         int* char_cnt, Boxa **char_boxes) const;
-};
-}
-
-#endif  // BEAM_SEARCH_H
--- a/cube/bmp_8.cpp
+++ b/cube/bmp_8.cpp
--- a/cube/bmp_8.h
+++ b/cube/bmp_8.h
@ -1,122 +0,0 @@
-/**********************************************************************
- * File:        bmp_8.h
- * Description: Declaration of an 8-bit Bitmap class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef BMP8_H
-#define BMP8_H
-
-// The Bmp8 class is an 8-bit bitmap that represents images of
-// words, characters and segments throughout Cube
-// It is meant to provide fast access to the bitmap bits and provide
-// fast scaling, cropping, deslanting, connected components detection,
-// loading and saving functionality
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "con_comp.h"
-#include "cached_file.h"
-
-namespace tesseract {
-
-// Non-integral deslanting parameters.
-static const float kMinDeslantAngle = -30.0f;
-static const float kMaxDeslantAngle = 30.0f;
-static const float kDeslantAngleDelta = 0.5f;
-
-class Bmp8 {
- public:
-  Bmp8(unsigned short wid, unsigned short hgt);
-  ~Bmp8();
-  // Clears the bitmap
-  bool Clear();
-  // accessors to bitmap dimensions
-  inline unsigned short Width() const { return wid_; }
-  inline unsigned short Stride() const { return stride_; }
-  inline unsigned short Height() const { return hgt_; }
-  inline unsigned char *RawData() const {
-    return (line_buff_ == NULL ? NULL : line_buff_[0]);
-  }
-  // creates a scaled version of the specified bitmap
-  // Optionally, scaling can be isotropic (preserving aspect ratio) or not
-  bool ScaleFrom(Bmp8 *bmp, bool isotropic = true);
-  // Deslant the bitmap vertically
-  bool Deslant();
-  // Deslant the bitmap horizontally
-  bool HorizontalDeslant(double *deslant_angle);
-  // Create a bitmap object from a file
-  static Bmp8 *FromCharDumpFile(CachedFile *fp);
-  static Bmp8 *FromCharDumpFile(FILE *fp);
-  // are two bitmaps identical
-  bool IsIdentical(Bmp8 *pBmp) const;
-  // Detect connected components
-  ConComp ** FindConComps(int *concomp_cnt, int min_size) const;
-  // compute the foreground ratio
-  float ForegroundRatio() const;
-  // returns the mean horizontal histogram entropy of the bitmap
-  float MeanHorizontalHistogramEntropy() const;
-  // returns the horizontal histogram of the bitmap
-  int *HorizontalHistogram() const;
-
- private:
-  // Compute a look up tan table that will be used for fast slant computation
-  static bool ComputeTanTable();
-  // create a bitmap buffer (two flavors char & int) and init contents
-  unsigned char ** CreateBmpBuffer(unsigned char init_val = 0xff);
-  static unsigned int ** CreateBmpBuffer(int wid, int hgt,
-            unsigned char init_val = 0xff);
-  // Free a bitmap buffer
-  static void FreeBmpBuffer(unsigned char **buff);
-  static void FreeBmpBuffer(unsigned int **buff);
-
-  // a static array that holds the tan lookup table
-  static float *tan_table_;
-  // bitmap 32-bit-aligned stride
-  unsigned short stride_;
-  // Bmp8 magic number used to validate saved bitmaps
-  static const unsigned int kMagicNumber = 0xdeadbeef;
-
- protected:
-  // bitmap dimensions
-  unsigned short wid_;
-  unsigned short hgt_;
-  // bitmap contents
-  unsigned char **line_buff_;
-  // deslanting parameters
-  static const int kConCompAllocChunk = 16;
-  static const int kDeslantAngleCount;
-
-  // Load dimensions & contents of bitmap from file
-  bool LoadFromCharDumpFile(CachedFile *fp);
-  bool LoadFromCharDumpFile(FILE *fp);
-  // Load dimensions & contents of bitmap from raw data
-  bool LoadFromCharDumpFile(unsigned char **raw_data);
-  // Load contents of bitmap from raw data
-  bool LoadFromRawData(unsigned char *data);
-  // save bitmap to a file
-  bool SaveBmp2CharDumpFile(FILE *fp) const;
-  // checks if a row or a column are entirely blank
-  bool IsBlankColumn(int x) const;
-  bool IsBlankRow(int y) const;
-  // crop the bitmap returning new dimensions
-  void Crop(int *xst_src, int *yst_src, int *wid, int *hgt);
-  // copy part of the specified bitmap
-  void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const;
-};
-}
-
-#endif  // BMP8_H
--- a/cube/cached_file.cpp
+++ b/cube/cached_file.cpp
@ -1,147 +0,0 @@
-/**********************************************************************
- * File:        cached_file.pp
- * Description: Implementation of an Cached File Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <string>
-#include <stdlib.h>
-#include <cstring>
-#include "cached_file.h"
-
-namespace tesseract {
-
-CachedFile::CachedFile(string file_name) {
-  file_name_ = file_name;
-  buff_ = NULL;
-  buff_pos_ = 0;
-  buff_size_ = 0;
-  file_pos_ = 0;
-  file_size_ = 0;
-  fp_ = NULL;
-}
-
-CachedFile::~CachedFile() {
-  if (fp_ != NULL) {
-    fclose(fp_);
-    fp_ = NULL;
-  }
-
-  if (buff_ != NULL) {
-    delete []buff_;
-    buff_ = NULL;
-  }
-}
-
-// free buffers and init vars
-bool CachedFile::Open() {
-  if (fp_ != NULL) {
-    return true;
-  }
-
-  fp_ = fopen(file_name_.c_str(), "rb");
-  if (fp_ == NULL) {
-    return false;
-  }
-
-  // seek to the end
-  fseek(fp_, 0, SEEK_END);
-  // get file size
-  file_size_ = ftell(fp_);
-  if (file_size_ < 1) {
-    return false;
-  }
-  // rewind again
-  rewind(fp_);
-  // alloc memory for buffer
-  buff_ = new unsigned char[kCacheSize];
-  // init counters
-  buff_size_ = 0;
-  buff_pos_ = 0;
-  file_pos_ = 0;
-  return true;
-}
-
-// add a new sample
-int CachedFile::Read(void *read_buff, int bytes) {
-  int read_bytes = 0;
-  unsigned char *buff = (unsigned char *)read_buff;
-
-  // do we need to read beyond the buffer
-  if ((buff_pos_ + bytes) > buff_size_) {
-    // copy as much bytes from the current buffer if any
-    int copy_bytes = buff_size_ - buff_pos_;
-
-    if (copy_bytes > 0) {
-      memcpy(buff, buff_ + buff_pos_, copy_bytes);
-      buff += copy_bytes;
-      bytes -= copy_bytes;
-      read_bytes += copy_bytes;
-    }
-
-    // determine how much to read
-    buff_size_ = kCacheSize;
-
-    if ((file_pos_ + buff_size_) > file_size_) {
-      buff_size_ = static_cast<int>(file_size_ - file_pos_);
-    }
-
-    // EOF ?
-    if (buff_size_ <= 0 || bytes > buff_size_) {
-      return read_bytes;
-    }
-
-    // read the first chunck
-    if (fread(buff_, 1, buff_size_, fp_) != buff_size_) {
-      return read_bytes;
-    }
-
-    buff_pos_ = 0;
-    file_pos_ += buff_size_;
-  }
-
-  memcpy(buff, buff_ + buff_pos_, bytes);
-  read_bytes += bytes;
-  buff_pos_ += bytes;
-
-  return read_bytes;
-}
-
-long CachedFile::Size() {
-  if (fp_ == NULL && Open() == false) {
-    return 0;
-  }
-
-  return file_size_;
-}
-
-long CachedFile::Tell() {
-  if (fp_ == NULL && Open() == false) {
-    return 0;
-  }
-
-  return file_pos_ - buff_size_ + buff_pos_;
-}
-
-bool CachedFile::eof() {
-  if (fp_ == NULL && Open() == false) {
-    return true;
-  }
-
-  return (file_pos_ - buff_size_ + buff_pos_) >= file_size_;
-}
-
-}  // namespace tesseract
--- a/cube/cached_file.h
+++ b/cube/cached_file.h
@ -1,69 +0,0 @@
-/**********************************************************************
- * File:        cached_file.h
- * Description: Declaration of a Cached File class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef CACHED_FILE_H
-#define CACHED_FILE_H
-
-// The CachedFile class provides a large-cache read access to a file
-// It is mainly designed for loading large word dump files
-
-#include <stdio.h>
-#include <string>
-#ifdef USE_STD_NAMESPACE
-using std::string;
-#endif
-
-namespace tesseract {
-class CachedFile {
- public:
-  explicit CachedFile(string file_name);
-  ~CachedFile();
-
-  // reads a specified number of bytes to the specified buffer and
-  // returns the actual number of bytes read
-  int Read(void *read_buff, int bytes);
-  // Returns the file size
-  long Size();
-  // returns the current position in the file
-  long Tell();
-  // End of file flag
-  bool eof();
-
- private:
-  static const unsigned int kCacheSize = 0x8000000;
-  // file name
-  string file_name_;
-  // internal file buffer
-  unsigned char *buff_;
-  // file position
-  long file_pos_;
-  // file size
-  long file_size_;
-  // position of file within buffer
-  int buff_pos_;
-  // buffer size
-  int buff_size_;
-  // file handle
-  FILE *fp_;
-  // Opens the file
-  bool Open();
-};
-}
-
-#endif  // CACHED_FILE_H
--- a/cube/char_altlist.cpp
+++ b/cube/char_altlist.cpp
@ -1,108 +0,0 @@
-/**********************************************************************
- * File:        char_altlist.cpp
- * Description: Implementation of a Character Alternate List Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "char_altlist.h"
-
-namespace tesseract {
-
-// The CharSet is not class owned and must exist for
-// the life time of this class
-CharAltList::CharAltList(const CharSet *char_set, int max_alt)
-    : AltList(max_alt) {
-  char_set_ = char_set;
-  max_alt_ = max_alt;
-  class_id_alt_ = NULL;
-  class_id_cost_ = NULL;
-}
-
-CharAltList::~CharAltList() {
-  if (class_id_alt_ != NULL) {
-    delete []class_id_alt_;
-    class_id_alt_ = NULL;
-  }
-
-  if (class_id_cost_ != NULL) {
-    delete []class_id_cost_;
-    class_id_cost_ = NULL;
-  }
-}
-
-// Insert a new char alternate
-bool CharAltList::Insert(int class_id, int cost, void *tag) {
-  // validate class ID
-  if (class_id < 0 || class_id >= char_set_->ClassCount()) {
-    return false;
-  }
-
-  // allocate buffers if nedded
-  if (class_id_alt_ == NULL || alt_cost_ == NULL) {
-    class_id_alt_ = new int[max_alt_];
-    alt_cost_ = new int[max_alt_];
-    alt_tag_ = new void *[max_alt_];
-
-    memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
-  }
-
-  if (class_id_cost_ == NULL) {
-    int class_cnt = char_set_->ClassCount();
-
-    class_id_cost_ = new int[class_cnt];
-
-    for (int ich = 0; ich < class_cnt; ich++) {
-      class_id_cost_[ich] = WORST_COST;
-    }
-  }
-
-  if (class_id < 0 || class_id >= char_set_->ClassCount()) {
-    return false;
-  }
-
-  // insert the alternate
-  class_id_alt_[alt_cnt_] = class_id;
-  alt_cost_[alt_cnt_] = cost;
-  alt_tag_[alt_cnt_] = tag;
-
-  alt_cnt_++;
-
-  class_id_cost_[class_id] = cost;
-
-  return true;
-}
-
-// sort the alternate Desc. based on prob
-void CharAltList::Sort() {
-  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
-    for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
-      if (alt_cost_[alt_idx] > alt_cost_[alt]) {
-        int temp = class_id_alt_[alt_idx];
-        class_id_alt_[alt_idx] = class_id_alt_[alt];
-        class_id_alt_[alt] = temp;
-
-        temp = alt_cost_[alt_idx];
-        alt_cost_[alt_idx] = alt_cost_[alt];
-        alt_cost_[alt] = temp;
-
-        void *tag = alt_tag_[alt_idx];
-        alt_tag_[alt_idx] = alt_tag_[alt];
-        alt_tag_[alt] = tag;
-      }
-    }
-  }
-}
-}
--- a/cube/char_altlist.h
+++ b/cube/char_altlist.h
@ -1,70 +0,0 @@
-/**********************************************************************
- * File:        char_altlist.h
- * Description: Declaration of a Character Alternate List Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef CHAR_ALT_LIST_H
-#define CHAR_ALT_LIST_H
-
-// The CharAltList class holds the list of class alternates returned from
-// a character classifier. Each alternate represents a class ID.
-// It inherits from the AltList class.
-// The CharAltList owns a CharSet object that maps a class-id to a string.
-
-#include "altlist.h"
-#include "char_set.h"
-
-namespace tesseract {
-class CharAltList : public AltList {
- public:
-  CharAltList(const CharSet *char_set, int max_alt = kMaxCharAlt);
-  ~CharAltList();
-
-  // Sort the alternate list based on cost
-  void Sort();
-  // insert a new alternate with the specified class-id, cost and tag
-  bool Insert(int class_id, int cost, void *tag = NULL);
-  // returns the cost of a specific class ID
-  inline int ClassCost(int class_id) const {
-    if (class_id_cost_ == NULL ||
-        class_id < 0 ||
-        class_id >= char_set_->ClassCount()) {
-      return WORST_COST;
-    }
-    return class_id_cost_[class_id];
-  }
-  // returns the alternate class-id corresponding to an alternate index
-  inline int Alt(int alt_idx) const { return class_id_alt_[alt_idx]; }
-  // set the cost of a certain alternate
-  void SetAltCost(int alt_idx, int cost) {
-    alt_cost_[alt_idx] = cost;
-    class_id_cost_[class_id_alt_[alt_idx]] = cost;
-  }
-
- private:
-  // character set object. Passed at construction time
-  const CharSet *char_set_;
-  // array of alternate class-ids
-  int *class_id_alt_;
-  // array of alternate costs
-  int *class_id_cost_;
-  // default max count of alternates
-  static const int kMaxCharAlt = 256;
-};
-}
-
-#endif  // CHAR_ALT_LIST_H
--- a/cube/char_bigrams.cpp
+++ b/cube/char_bigrams.cpp
@ -1,191 +0,0 @@
-/**********************************************************************
- * File:        char_bigrams.cpp
- * Description: Implementation of a Character Bigrams Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <algorithm>
-#include <math.h>
-#include <string>
-#include <vector>
-
-#include "char_bigrams.h"
-#include "cube_utils.h"
-#include "ndminx.h"
-#include "cube_const.h"
-
-namespace tesseract {
-
-CharBigrams::CharBigrams() {
-  memset(&bigram_table_, 0, sizeof(bigram_table_));
-}
-
-CharBigrams::~CharBigrams() {
-  if (bigram_table_.char_bigram != NULL) {
-    for (int ch1 = 0; ch1 <= bigram_table_.max_char; ch1++) {
-      CharBigram *char_bigram = bigram_table_.char_bigram + ch1;
-
-      if (char_bigram->bigram != NULL) {
-        delete []char_bigram->bigram;
-      }
-    }
-    delete []bigram_table_.char_bigram;
-  }
-}
-
-CharBigrams *CharBigrams::Create(const string &data_file_path,
-                                 const string &lang) {
-  string file_name;
-  string str;
-
-  file_name = data_file_path + lang;
-  file_name += ".cube.bigrams";
-
-  // load the string into memory
-  if (!CubeUtils::ReadFileToString(file_name, &str)) {
-    return NULL;
-  }
-
-  // construct a new object
-  CharBigrams *char_bigrams_obj = new CharBigrams();
-  CharBigramTable *table = &char_bigrams_obj->bigram_table_;
-
-  table->total_cnt = 0;
-  table->max_char = -1;
-  table->char_bigram = NULL;
-
-  // split into lines
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
-
-  for (int big = 0; big < str_vec.size(); big++) {
-    char_32 ch1;
-    char_32 ch2;
-    int cnt;
-    if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
-      fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
-              "reading line: %s\n", str_vec[big].c_str());
-      delete char_bigrams_obj;
-      return NULL;
-    }
-
-    // expand the bigram table
-    if (ch1 > table->max_char) {
-      CharBigram *char_bigram = new CharBigram[ch1 + 1];
-
-      if (table->char_bigram != NULL && table->max_char >= 0) {
-        memcpy(char_bigram, table->char_bigram,
-          (table->max_char + 1) * sizeof(*char_bigram));
-
-        delete []table->char_bigram;
-      }
-      table->char_bigram = char_bigram;
-
-      // init
-      for (int new_big = table->max_char + 1; new_big <= ch1; new_big++) {
-        table->char_bigram[new_big].total_cnt = 0;
-        table->char_bigram[new_big].max_char = -1;
-        table->char_bigram[new_big].bigram = NULL;
-      }
-      table->max_char = ch1;
-    }
-
-    if (ch2 > table->char_bigram[ch1].max_char) {
-      Bigram *bigram = new Bigram[ch2 + 1];
-
-      if (table->char_bigram[ch1].bigram != NULL &&
-          table->char_bigram[ch1].max_char >= 0) {
-        memcpy(bigram, table->char_bigram[ch1].bigram,
-          (table->char_bigram[ch1].max_char + 1) * sizeof(*bigram));
-        delete []table->char_bigram[ch1].bigram;
-      }
-      table->char_bigram[ch1].bigram = bigram;
-
-      // init
-      for (int new_big = table->char_bigram[ch1].max_char + 1;
-           new_big <= ch2; new_big++) {
-        table->char_bigram[ch1].bigram[new_big].cnt = 0;
-      }
-      table->char_bigram[ch1].max_char = ch2;
-    }
-
-    table->char_bigram[ch1].bigram[ch2].cnt = cnt;
-    table->char_bigram[ch1].total_cnt += cnt;
-    table->total_cnt += cnt;
-  }
-
-  // compute costs (-log probs)
-  table->worst_cost = static_cast<int>(
-      -PROB2COST_SCALE * log(0.5 / table->total_cnt));
-  for (char_32 ch1 = 0; ch1 <= table->max_char; ch1++) {
-    for (char_32 ch2 = 0; ch2 <= table->char_bigram[ch1].max_char; ch2++) {
-      int cnt = table->char_bigram[ch1].bigram[ch2].cnt;
-      table->char_bigram[ch1].bigram[ch2].cost =
-          static_cast<int>(-PROB2COST_SCALE *
-                           log(MAX(0.5, static_cast<double>(cnt)) /
-                               table->total_cnt));
-    }
-  }
-  return char_bigrams_obj;
-}
-
-int CharBigrams::PairCost(char_32 ch1, char_32 ch2) const {
-  if (ch1 > bigram_table_.max_char) {
-    return bigram_table_.worst_cost;
-  }
-  if (ch2 > bigram_table_.char_bigram[ch1].max_char) {
-    return bigram_table_.worst_cost;
-  }
-  return bigram_table_.char_bigram[ch1].bigram[ch2].cost;
-}
-
-int CharBigrams::Cost(const char_32 *char_32_ptr, CharSet *char_set) const {
-  if (!char_32_ptr || char_32_ptr[0] == 0) {
-    return bigram_table_.worst_cost;
-  }
-  int cost = MeanCostWithSpaces(char_32_ptr);
-  if (CubeUtils::StrLen(char_32_ptr) >= kMinLengthCaseInvariant &&
-      CubeUtils::IsCaseInvariant(char_32_ptr, char_set)) {
-    char_32 *lower_32 = CubeUtils::ToLower(char_32_ptr, char_set);
-    if (lower_32 && lower_32[0] != 0) {
-      int cost_lower = MeanCostWithSpaces(lower_32);
-      cost = MIN(cost, cost_lower);
-    }
-    delete [] lower_32;
-    char_32 *upper_32 = CubeUtils::ToUpper(char_32_ptr, char_set);
-    if (upper_32 && upper_32[0] != 0) {
-      int cost_upper = MeanCostWithSpaces(upper_32);
-      cost = MIN(cost, cost_upper);
-    }
-    delete [] upper_32;
-  }
-  return cost;
-}
-
-int CharBigrams::MeanCostWithSpaces(const char_32 *char_32_ptr) const {
-  if (!char_32_ptr)
-    return bigram_table_.worst_cost;
-  int len = CubeUtils::StrLen(char_32_ptr);
-  int cost = 0;
-  int c = 0;
-  cost = PairCost(' ', char_32_ptr[0]);
-  for (c = 1; c < len; c++) {
-    cost += PairCost(char_32_ptr[c - 1], char_32_ptr[c]);
-  }
-  cost += PairCost(char_32_ptr[len - 1], ' ');
-  return static_cast<int>(cost / static_cast<double>(len + 1));
-}
-}  // namespace tesseract
--- a/cube/char_bigrams.h
+++ b/cube/char_bigrams.h
@ -1,89 +0,0 @@
-/**********************************************************************
- * File:        char_bigrams.h
- * Description: Declaration of a Character Bigrams Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharBigram class represents the interface to the character bigram
-// table used by Cube
-// A CharBigram object can be constructed from the Char Bigrams file
-// Given a sequence of characters, the "Cost" method returns the Char Bigram
-// cost of the string according to the table
-
-#ifndef CHAR_BIGRAMS_H
-#define CHAR_BIGRAMS_H
-
-#include <string>
-#include "char_set.h"
-
-namespace tesseract {
-
-// structure representing a single bigram value
-struct Bigram {
-  int cnt;
-  int cost;
-};
-
-// structure representing the char bigram array of characters
-// following a specific character
-struct CharBigram {
-  int total_cnt;
-  char_32 max_char;
-  Bigram *bigram;
-};
-
-// structure representing the whole bigram table
-struct CharBigramTable {
-  int total_cnt;
-  int worst_cost;
-  char_32 max_char;
-  CharBigram *char_bigram;
-};
-
-class CharBigrams {
- public:
-  CharBigrams();
-  ~CharBigrams();
-  // Construct the CharBigrams class from a file
-  static CharBigrams *Create(const string &data_file_path,
-                             const string &lang);
-  // Top-level function to return the mean character bigram cost of a
-  // sequence of characters.  If char_set is not NULL, use
-  // tesseract functions to return a case-invariant cost.
-  // This avoids unnecessarily penalizing all-one-case words or
-  // capitalized words (first-letter upper-case and remaining letters
-  // lower-case).
-  int Cost(const char_32 *str, CharSet *char_set) const;
-
- protected:
-  // Returns the character bigram cost of two characters.
-  int PairCost(char_32 ch1, char_32 ch2) const;
-  // Returns the mean character bigram cost of a sequence of
-  // characters. Adds a space at the beginning and end to account for
-  // cost of starting and ending characters.
-  int MeanCostWithSpaces(const char_32 *char_32_ptr) const;
-
- private:
-  // Only words this length or greater qualify for case-invariant character
-  // bigram cost.
-  static const int kMinLengthCaseInvariant = 4;
-
-
-  CharBigramTable bigram_table_;
-};
-}
-
-#endif  // CHAR_BIGRAMS_H
--- a/cube/char_samp.cpp
+++ b/cube/char_samp.cpp
@ -1,640 +0,0 @@
-/**********************************************************************
- * File:        char_samp.cpp
- * Description: Implementation of a Character Bitmap Sample Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <string.h>
-#include <string>
-#include "char_samp.h"
-#include "cube_utils.h"
-
-namespace tesseract {
-
-#define MAX_LINE_LEN  1024
-
-CharSamp::CharSamp()
-    : Bmp8(0, 0) {
-  left_ = 0;
-  top_ = 0;
-  label32_ = NULL;
-  page_ = -1;
-}
-
-CharSamp::CharSamp(int wid, int hgt)
-    : Bmp8(wid, hgt) {
-  left_ = 0;
-  top_ = 0;
-  label32_ = NULL;
-  page_ = -1;
-}
-
-CharSamp::CharSamp(int left, int top, int wid, int hgt)
-    : Bmp8(wid, hgt)
-    , left_(left)
-    , top_(top) {
-  label32_ = NULL;
-  page_ = -1;
-}
-
-CharSamp::~CharSamp() {
-  if (label32_ != NULL) {
-    delete []label32_;
-    label32_ = NULL;
-  }
-}
-
-// returns a UTF-8 version of the string label
-string CharSamp::stringLabel() const {
-  string str = "";
-  if (label32_ != NULL) {
-    string_32 str32(label32_);
-    CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
-  }
-  return str;
-}
-
-// set a the string label using a UTF encoded string
-void CharSamp::SetLabel(string str) {
-  if (label32_ != NULL) {
-    delete []label32_;
-    label32_ = NULL;
-  }
-  string_32 str32;
-  CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
-  SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
-}
-
-// creates a CharSamp object from file
-CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) {
-  unsigned short left;
-  unsigned short top;
-  unsigned short page;
-  unsigned short first_char;
-  unsigned short last_char;
-  unsigned short norm_top;
-  unsigned short norm_bottom;
-  unsigned short norm_aspect_ratio;
-  unsigned int val32;
-
-  char_32 *label32;
-
-  // read and check 32 bit marker
-  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
-    return NULL;
-  }
-  if (val32 != 0xabd0fefe) {
-    return NULL;
-  }
-  // read label length,
-  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
-    return NULL;
-  }
-  // the label is not null terminated in the file
-  if (val32 > 0 && val32 < MAX_UINT32) {
-    label32 = new char_32[val32 + 1];
-    // read label
-    if (fp->Read(label32, val32 * sizeof(*label32)) !=
-        (val32 * sizeof(*label32))) {
-      delete [] label32;
-      return NULL;
-    }
-    // null terminate
-    label32[val32] = 0;
-  } else {
-    label32 = NULL;
-  }
-  // read coordinates
-  if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
-    delete [] label32;
-    return NULL;
-  }
-  if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
-      sizeof(norm_aspect_ratio)) {
-    delete [] label32;
-    return NULL;
-  }
-  // create the object
-  CharSamp *char_samp = new CharSamp();
-  // init
-  char_samp->label32_ = label32;
-  char_samp->page_ = page;
-  char_samp->left_ = left;
-  char_samp->top_ = top;
-  char_samp->first_char_ = first_char;
-  char_samp->last_char_ = last_char;
-  char_samp->norm_top_ = norm_top;
-  char_samp->norm_bottom_ = norm_bottom;
-  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
-  // load the Bmp8 part
-  if (char_samp->LoadFromCharDumpFile(fp) == false) {
-    delete char_samp;
-    return NULL;
-  }
-  return char_samp;
-}
-
-// Load a Char Samp from a dump file
-CharSamp *CharSamp::FromCharDumpFile(FILE *fp) {
-  unsigned short left;
-  unsigned short top;
-  unsigned short page;
-  unsigned short first_char;
-  unsigned short last_char;
-  unsigned short norm_top;
-  unsigned short norm_bottom;
-  unsigned short norm_aspect_ratio;
-  unsigned int val32;
-  char_32 *label32;
-
-  // read and check 32 bit marker
-  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
-    return NULL;
-  }
-  if (val32 != 0xabd0fefe) {
-    return NULL;
-  }
-  // read label length,
-  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
-    return NULL;
-  }
-  // the label is not null terminated in the file
-  if (val32 > 0 && val32 < MAX_UINT32) {
-    label32 = new char_32[val32 + 1];
-    // read label
-    if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
-        (val32 * sizeof(*label32))) {
-      delete [] label32;
-      return NULL;
-    }
-    // null terminate
-    label32[val32] = 0;
-  } else {
-    label32 = NULL;
-  }
-  // read coordinates
-  if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
-      fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
-      fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
-      fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
-      fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
-      fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
-      fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
-      fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
-          sizeof(norm_aspect_ratio)) {
-    delete [] label32;
-    return NULL;
-  }
-  // create the object
-  CharSamp *char_samp = new CharSamp();
-  // init
-  char_samp->label32_ = label32;
-  char_samp->page_ = page;
-  char_samp->left_ = left;
-  char_samp->top_ = top;
-  char_samp->first_char_ = first_char;
-  char_samp->last_char_ = last_char;
-  char_samp->norm_top_ = norm_top;
-  char_samp->norm_bottom_ = norm_bottom;
-  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
-  // load the Bmp8 part
-  if (char_samp->LoadFromCharDumpFile(fp) == false) {
-    delete char_samp;  // It owns label32.
-    return NULL;
-  }
-  return char_samp;
-}
-
-// returns a copy of the charsamp that is scaled to the
-// specified width and height
-CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) {
-  CharSamp *scaled_samp = new CharSamp(wid, hgt);
-  if (scaled_samp->ScaleFrom(this, isotropic) == false) {
-    delete scaled_samp;
-    return NULL;
-  }
-  scaled_samp->left_ = left_;
-  scaled_samp->top_ = top_;
-  scaled_samp->page_ = page_;
-  scaled_samp->SetLabel(label32_);
-  scaled_samp->first_char_ = first_char_;
-  scaled_samp->last_char_ = last_char_;
-  scaled_samp->norm_top_ = norm_top_;
-  scaled_samp->norm_bottom_ = norm_bottom_;
-  scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
-  return scaled_samp;
-}
-
-// Load a Char Samp from a dump file
-CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt,
-                                unsigned char *data) {
-  // create the object
-  CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
-  if (char_samp->LoadFromRawData(data) == false) {
-    delete char_samp;
-    return NULL;
-  }
-  return char_samp;
-}
-
-// Saves the charsamp to a dump file
-bool CharSamp::Save2CharDumpFile(FILE *fp) const {
-  unsigned int val32;
-  // write and check 32 bit marker
-  val32 = 0xabd0fefe;
-  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
-    return false;
-  }
-  // write label length
-  val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
-  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
-    return false;
-  }
-  // write label
-  if (label32_ != NULL) {
-    if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
-        (val32 * sizeof(*label32_))) {
-      return false;
-    }
-  }
-  // write coordinates
-  if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
-    return false;
-  }
-  if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
-    return false;
-  }
-  if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
-    return false;
-  }
-  if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
-      sizeof(first_char_)) {
-    return false;
-  }
-  if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
-    return false;
-  }
-  if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
-    return false;
-  }
-  if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
-      sizeof(norm_bottom_)) {
-    return false;
-  }
-  if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
-      sizeof(norm_aspect_ratio_)) {
-    return false;
-  }
-  if (SaveBmp2CharDumpFile(fp) == false) {
-    return false;
-  }
-  return true;
-}
-
-// Crop the char samp such that there are no white spaces on any side.
-// The norm_top_ and norm_bottom_ fields are the character top/bottom
-// with respect to whatever context the character is being recognized
-// in (e.g. word bounding box) normalized to a standard size of
-// 255. Here they default to 0 and 255 (word box boundaries), but
-// since they are context dependent, they may need to be reset by the
-// calling function.
-CharSamp *CharSamp::Crop() {
-  // get the dimesions of the cropped img
-  int cropped_left = 0;
-  int cropped_top = 0;
-  int cropped_wid = wid_;
-  int cropped_hgt = hgt_;
-  Bmp8::Crop(&cropped_left, &cropped_top,
-             &cropped_wid, &cropped_hgt);
-
-  if (cropped_wid == 0 || cropped_hgt == 0) {
-    return NULL;
-  }
-  // create the cropped char samp
-  CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
-                                        top_ + cropped_top,
-                                        cropped_wid, cropped_hgt);
-  cropped_samp->SetLabel(label32_);
-  cropped_samp->SetFirstChar(first_char_);
-  cropped_samp->SetLastChar(last_char_);
-  // the following 3 fields may/should be reset by the calling function
-  // using context information, i.e., location of character box
-  // w.r.t. the word bounding box
-  cropped_samp->SetNormAspectRatio(255 *
-                                   cropped_wid / (cropped_wid + cropped_hgt));
-  cropped_samp->SetNormTop(0);
-  cropped_samp->SetNormBottom(255);
-
-  // copy the bitmap to the cropped img
-  Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
-  return cropped_samp;
-}
-
-// segment the char samp to connected components
-// based on contiguity and vertical pixel density histogram
-ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left,
-                            int max_hist_wnd, int min_con_comp_size) const {
-  // init
-  (*segment_cnt) = 0;
-  int concomp_cnt = 0;
-  int seg_cnt = 0;
-  // find the concomps of the image
-  ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
-  if (concomp_cnt <= 0 || !concomp_array) {
-    if (concomp_array)
-      delete []concomp_array;
-    return NULL;
-  }
-  ConComp **seg_array = NULL;
-  // segment each concomp further using vertical histogram
-  for (int concomp = 0; concomp < concomp_cnt; concomp++) {
-    int concomp_seg_cnt = 0;
-    // segment the concomp
-    ConComp **concomp_seg_array = NULL;
-    ConComp **concomp_alloc_seg =
-        concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
-    // no segments, add the whole concomp
-    if (concomp_alloc_seg == NULL) {
-      concomp_seg_cnt = 1;
-      concomp_seg_array = concomp_array + concomp;
-    } else {
-      // delete the original concomp, we no longer need it
-      concomp_seg_array = concomp_alloc_seg;
-      delete concomp_array[concomp];
-    }
-    // add the resulting segments
-    for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
-      // too small of a segment: ignore
-      if (concomp_seg_array[seg_idx]->Width() < 2 &&
-          concomp_seg_array[seg_idx]->Height() < 2) {
-        delete concomp_seg_array[seg_idx];
-      } else {
-        // add the new segment
-        // extend the segment array
-        if ((seg_cnt % kConCompAllocChunk) == 0) {
-          ConComp **temp_segm_array =
-              new ConComp *[seg_cnt + kConCompAllocChunk];
-          if (seg_cnt > 0) {
-            memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
-            delete []seg_array;
-          }
-          seg_array = temp_segm_array;
-        }
-        seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
-      }
-    }  // segment
-    if (concomp_alloc_seg != NULL) {
-      delete []concomp_alloc_seg;
-    }
-  }  // concomp
-  delete []concomp_array;
-
-  // sort the concomps from Left2Right or Right2Left, based on the reading order
-  if (seg_cnt > 0 && seg_array != NULL) {
-    qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
-        ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
-  }
-  (*segment_cnt) = seg_cnt;
-  return seg_array;
-}
-
-// builds a char samp from a set of connected components
-CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp,
-                                 int seg_flags_size, int *seg_flags,
-                                 bool *left_most, bool *right_most,
-                                 int word_hgt) {
-  int concomp;
-  int end_concomp;
-  int concomp_cnt = 0;
-  end_concomp = strt_concomp + seg_flags_size;
-  // determine ID range
-  bool once = false;
-  int min_id = -1;
-  int max_id = -1;
-  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
-    if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
-      if (!once) {
-        min_id = concomp_array[concomp]->ID();
-        max_id = concomp_array[concomp]->ID();
-        once = true;
-      } else {
-        UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
-      }
-      concomp_cnt++;
-    }
-  }
-  if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
-    return NULL;
-  }
-  // alloc memo for computing leftmost and right most attributes
-  int id_cnt = max_id - min_id + 1;
-  bool *id_exist = new bool[id_cnt];
-  bool *left_most_exist = new bool[id_cnt];
-  bool *right_most_exist = new bool[id_cnt];
-  memset(id_exist, 0, id_cnt * sizeof(*id_exist));
-  memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
-  memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
-  // find the dimensions of the charsamp
-  once = false;
-  int left = -1;
-  int right = -1;
-  int top = -1;
-  int bottom = -1;
-  int unq_ids = 0;
-  int unq_left_most = 0;
-  int unq_right_most = 0;
-  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
-    if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
-      if (!once) {
-        left = concomp_array[concomp]->Left();
-        right = concomp_array[concomp]->Right();
-        top = concomp_array[concomp]->Top();
-        bottom = concomp_array[concomp]->Bottom();
-        once = true;
-      } else {
-        UpdateRange(concomp_array[concomp]->Left(),
-                    concomp_array[concomp]->Right(), &left, &right);
-        UpdateRange(concomp_array[concomp]->Top(),
-                    concomp_array[concomp]->Bottom(), &top, &bottom);
-      }
-      // count unq ids, unq left most and right mosts ids
-      int concomp_id = concomp_array[concomp]->ID() - min_id;
-      if (!id_exist[concomp_id]) {
-        id_exist[concomp_id] = true;
-        unq_ids++;
-      }
-      if (concomp_array[concomp]->LeftMost()) {
-        if (left_most_exist[concomp_id] == false) {
-          left_most_exist[concomp_id] = true;
-          unq_left_most++;
-        }
-      }
-      if (concomp_array[concomp]->RightMost()) {
-        if (right_most_exist[concomp_id] == false) {
-          right_most_exist[concomp_id] = true;
-          unq_right_most++;
-        }
-      }
-    }
-  }
-  delete []id_exist;
-  delete []left_most_exist;
-  delete []right_most_exist;
-  if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
-    return NULL;
-  }
-  (*left_most) = (unq_left_most >= unq_ids);
-  (*right_most) = (unq_right_most >= unq_ids);
-  // create the char sample object
-  CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
-
-  // set the foreground pixels
-  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
-    if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
-      ConCompPt *pt_ptr = concomp_array[concomp]->Head();
-      while (pt_ptr) {
-        samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
-        pt_ptr = pt_ptr->Next();
-      }
-    }
-  }
-  return samp;
-}
-
-// clones the object
-CharSamp *CharSamp::Clone() const {
-  // create the cropped char samp
-  CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
-  samp->SetLabel(label32_);
-  samp->SetFirstChar(first_char_);
-  samp->SetLastChar(last_char_);
-  samp->SetNormTop(norm_top_);
-  samp->SetNormBottom(norm_bottom_);
-  samp->SetNormAspectRatio(norm_aspect_ratio_);
-  // copy the bitmap to the cropped img
-  Copy(0, 0, wid_, hgt_, samp);
-  return samp;
-}
-
-// Load a Char Samp from a dump file
-CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) {
-  unsigned int val32;
-  char_32 *label32;
-  unsigned char *raw_data = *raw_data_ptr;
-
-  // read and check 32 bit marker
-  memcpy(&val32, raw_data, sizeof(val32));
-  raw_data += sizeof(val32);
-  if (val32 != 0xabd0fefe) {
-    return NULL;
-  }
-  // read label length,
-  memcpy(&val32, raw_data, sizeof(val32));
-  raw_data += sizeof(val32);
-  // the label is not null terminated in the file
-  if (val32 > 0 && val32 < MAX_UINT32) {
-    label32 = new char_32[val32 + 1];
-    // read label
-    memcpy(label32, raw_data, val32 * sizeof(*label32));
-    raw_data += (val32 * sizeof(*label32));
-    // null terminate
-    label32[val32] = 0;
-  } else {
-    label32 = NULL;
-  }
-
-  // create the object
-  CharSamp *char_samp = new CharSamp();
-
-  // read coordinates
-  char_samp->label32_ = label32;
-  memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
-  raw_data += sizeof(char_samp->page_);
-  memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
-  raw_data += sizeof(char_samp->left_);
-  memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
-  raw_data += sizeof(char_samp->top_);
-  memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
-  raw_data += sizeof(char_samp->first_char_);
-  memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
-  raw_data += sizeof(char_samp->last_char_);
-  memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
-  raw_data += sizeof(char_samp->norm_top_);
-  memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
-  raw_data += sizeof(char_samp->norm_bottom_);
-  memcpy(&char_samp->norm_aspect_ratio_, raw_data,
-         sizeof(char_samp->norm_aspect_ratio_));
-  raw_data += sizeof(char_samp->norm_aspect_ratio_);
-
-  // load the Bmp8 part
-  if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
-    delete char_samp;
-    return NULL;
-  }
-
-  (*raw_data_ptr) = raw_data;
-  return char_samp;
-}
-
-// computes the features corresponding to the char sample
-bool CharSamp::ComputeFeatures(int conv_grid_size, float *features) {
-  // Create a scaled BMP
-  CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
-  if (!scaled_bmp) {
-    return false;
-  }
-  // prepare input
-  unsigned char *buff = scaled_bmp->RawData();
-  // bitmap features
-  int input;
-  int bmp_size = conv_grid_size * conv_grid_size;
-  for (input = 0; input < bmp_size; input++) {
-    features[input] = 255.0f - (1.0f * buff[input]);
-  }
-  // word context features
-  features[input++] = FirstChar();
-  features[input++] = LastChar();
-  features[input++] = NormTop();
-  features[input++] = NormBottom();
-  features[input++] = NormAspectRatio();
-  delete scaled_bmp;
-  return true;
-}
-}  // namespace tesseract
--- a/cube/char_samp.h
+++ b/cube/char_samp.h
@ -1,158 +0,0 @@
-/**********************************************************************
- * File:        char_samp.h
- * Description: Declaration of a Character Bitmap Sample Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharSamp inherits the Bmp8 class that represents images of
-// words, characters and segments throughout Cube
-// CharSamp adds more data members to hold the physical location of the image
-// in a page, page number in a book if available.
-// It also holds the label (GT) of the image that might correspond to a single
-// character or a word
-// It also provides methods for segmenting, scaling and cropping of the sample
-
-#ifndef CHAR_SAMP_H
-#define CHAR_SAMP_H
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include "bmp_8.h"
-#include "string_32.h"
-
-namespace tesseract {
-
-class CharSamp : public Bmp8 {
- public:
-  CharSamp();
-  CharSamp(int wid, int hgt);
-  CharSamp(int left, int top, int wid, int hgt);
-  ~CharSamp();
-  // accessor methods
-  unsigned short Left() const { return left_; }
-  unsigned short Right() const { return left_ + wid_; }
-  unsigned short Top() const { return top_; }
-  unsigned short Bottom() const { return top_ + hgt_; }
-  unsigned short Page() const { return page_; }
-  unsigned short NormTop() const { return norm_top_; }
-  unsigned short NormBottom() const { return norm_bottom_; }
-  unsigned short NormAspectRatio() const { return norm_aspect_ratio_; }
-  unsigned short FirstChar() const { return first_char_; }
-  unsigned short LastChar() const { return last_char_; }
-  char_32 Label() const {
-    if (label32_ == NULL || LabelLen() != 1) {
-      return 0;
-    }
-    return label32_[0];
-  }
-  char_32 * StrLabel() const { return label32_; }
-  string stringLabel() const;
-
-  void SetLeft(unsigned short left) { left_ = left; }
-  void SetTop(unsigned short top) { top_ = top; }
-  void SetPage(unsigned short page) { page_ = page; }
-  void SetLabel(char_32 label) {
-    delete []label32_;
-    label32_ = new char_32[2];
-    label32_[0] = label;
-    label32_[1] = 0;
-  }
-  void SetLabel(const char_32 *label32) {
-    delete []label32_;
-    label32_ = NULL;
-    if (label32 != NULL) {
-      // remove any byte order marks if any
-      if (label32[0] == 0xfeff) {
-        label32++;
-      }
-      int len = LabelLen(label32);
-      label32_ = new char_32[len + 1];
-      memcpy(label32_, label32, len * sizeof(*label32));
-      label32_[len] = 0;
-    }
-  }
-  void SetLabel(string str);
-  void SetNormTop(unsigned short norm_top) { norm_top_ = norm_top; }
-  void SetNormBottom(unsigned short norm_bottom) {
-    norm_bottom_ = norm_bottom;
-  }
-  void SetNormAspectRatio(unsigned short norm_aspect_ratio) {
-    norm_aspect_ratio_ = norm_aspect_ratio;
-  }
-  void SetFirstChar(unsigned short first_char) {
-    first_char_ = first_char;
-  }
-  void SetLastChar(unsigned short last_char) {
-    last_char_ = last_char;
-  }
-
-  // Saves the charsamp to a dump file
-  bool Save2CharDumpFile(FILE *fp) const;
-  // Crops the underlying image and returns a new CharSamp with the
-  // same character information but new dimensions. Warning: does not
-  // necessarily set the normalized top and bottom correctly since
-  // those depend on its location within the word (or CubeSearchObject).
-  CharSamp *Crop();
-  // Computes the connected components of the char sample
-  ConComp **Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd,
-                    int min_con_comp_size) const;
-  // returns a copy of the charsamp that is scaled to the
-  // specified width and height
-  CharSamp *Scale(int wid, int hgt, bool isotropic = true);
-  // returns a Clone of the charsample
-  CharSamp *Clone() const;
-  // computes the features corresponding to the char sample
-  bool ComputeFeatures(int conv_grid_size, float *features);
-  // Load a Char Samp from a dump file
-  static CharSamp *FromCharDumpFile(CachedFile *fp);
-  static CharSamp *FromCharDumpFile(FILE *fp);
-  static CharSamp *FromCharDumpFile(unsigned char **raw_data);
-  static CharSamp *FromRawData(int left, int top, int wid, int hgt,
-    unsigned char *data);
-  static CharSamp *FromConComps(ConComp **concomp_array,
-                                int strt_concomp, int seg_flags_size,
-                                int *seg_flags, bool *left_most,
-                                bool *right_most, int word_hgt);
-  static int AuxFeatureCnt() { return (5); }
-  // Return the length of the label string
-  int LabelLen() const { return LabelLen(label32_); }
-  static int LabelLen(const char_32 *label32) {
-    if (label32 == NULL) {
-      return 0;
-    }
-    int len = 0;
-    while (label32[++len] != 0);
-    return len;
-  }
- private:
-  char_32 * label32_;
-  unsigned short page_;
-  unsigned short left_;
-  unsigned short top_;
-  // top of sample normalized to a word height of 255
-  unsigned short norm_top_;
-  // bottom of sample normalized to a word height of 255
-  unsigned short norm_bottom_;
-  // 255 * ratio of character width to (width + height)
-  unsigned short norm_aspect_ratio_;
-  unsigned short first_char_;
-  unsigned short last_char_;
-};
-
-}
-
-#endif  // CHAR_SAMP_H
--- a/cube/char_samp_enum.cpp
+++ b/cube/char_samp_enum.cpp
@ -1,30 +0,0 @@
-/**********************************************************************
- * File:        char_samp_enum.cpp
- * Description: Implementation of a Character Sample Enumerator Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "char_samp_enum.h"
-
-namespace tesseract {
-
-CharSampEnum::CharSampEnum() {
-}
-
-CharSampEnum::~CharSampEnum() {
-}
-
-}  // namespace ocrlib
--- a/cube/char_samp_enum.h
+++ b/cube/char_samp_enum.h
@ -1,38 +0,0 @@
-/**********************************************************************
- * File:        char_samp_enum.h
- * Description: Declaration of a Character Sample Enumerator Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharSampEnum class provides the base class for CharSamp class
-// Enumerators. This is typically used to implement dump file readers
-
-#ifndef CHARSAMP_ENUM_H
-#define CHARSAMP_ENUM_H
-
-#include "char_samp.h"
-
-namespace tesseract {
-
-class CharSampEnum {
- public:
-  CharSampEnum();
-  virtual ~CharSampEnum();
-  virtual bool EnumCharSamp(CharSamp *char_samp, float progress) = 0;
-};
-}
-
-#endif  // CHARSAMP_ENUM_H
--- a/cube/char_samp_set.cpp
+++ b/cube/char_samp_set.cpp
@ -1,170 +0,0 @@
-/**********************************************************************
- * File:        char_samp_enum.cpp
- * Description: Implementation of a Character Sample Set Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdlib.h>
-#include <string>
-#include "char_samp_set.h"
-#include "cached_file.h"
-
-namespace tesseract {
-
-CharSampSet::CharSampSet() {
-  cnt_ = 0;
-  samp_buff_ = NULL;
-  own_samples_ = false;
-}
-
-CharSampSet::~CharSampSet() {
-  Cleanup();
-}
-
-// free buffers and init vars
-void CharSampSet::Cleanup() {
-  if (samp_buff_ != NULL) {
-    // only free samples if owned by class
-    if (own_samples_ == true) {
-      for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
-        delete samp_buff_[samp_idx];
-      }
-    }
-    delete []samp_buff_;
-  }
-  cnt_ = 0;
-  samp_buff_ = NULL;
-}
-
-// add a new sample
-bool CharSampSet::Add(CharSamp *char_samp) {
-  if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
-      // create an extended buffer
-    CharSamp **new_samp_buff =
-        reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
-    // copy old contents
-    if (cnt_ > 0) {
-      memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
-      delete []samp_buff_;
-    }
-    samp_buff_ = new_samp_buff;
-  }
-  samp_buff_[cnt_++] = char_samp;
-  return true;
-}
-
-// load char samples from file
-bool CharSampSet::LoadCharSamples(FILE *fp) {
-  // free existing
-  Cleanup();
-  // samples are created here and owned by the class
-  own_samples_ = true;
-  // start loading char samples
-  while (feof(fp) == 0) {
-    CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
-    if (new_samp != NULL) {
-      if (Add(new_samp) == false) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-// creates a CharSampSet object from file
-CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
-  FILE *fp;
-  unsigned int val32;
-  // open the file
-  fp = fopen(file_name.c_str(), "rb");
-  if (fp == NULL) {
-    return NULL;
-  }
-  // read and verify marker
-  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
-    fclose(fp);
-    return NULL;
-  }
-  if (val32 != 0xfefeabd0) {
-    fclose(fp);
-    return NULL;
-  }
-  // create an object
-  CharSampSet *samp_set = new CharSampSet();
-  if (samp_set->LoadCharSamples(fp) == false) {
-    delete samp_set;
-    samp_set = NULL;
-  }
-  fclose(fp);
-  return samp_set;
-}
-
-// Create a new Char Dump file
-FILE *CharSampSet::CreateCharDumpFile(string file_name) {
-  FILE *fp;
-  unsigned int val32;
-  // create the file
-  fp =  fopen(file_name.c_str(), "wb");
-  if (!fp) {
-    return NULL;
-  }
-  // read and verify marker
-  val32 = 0xfefeabd0;
-  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
-    fclose(fp);
-    return NULL;
-  }
-  return fp;
-}
-
-// Enumerate the Samples in the set one-by-one calling the enumertor's
-  // EnumCharSamp method for each sample
-bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
-  CachedFile *fp_in;
-  unsigned int val32;
-  long i64_size,
-    i64_pos;
-  // open the file
-  fp_in = new CachedFile(file_name);
-  i64_size = fp_in->Size();
-  if (i64_size < 1) {
-    return false;
-  }
-  // read and verify marker
-  if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
-    return false;
-  }
-  if (val32 != 0xfefeabd0) {
-    return false;
-  }
-  // start loading char samples
-  while (fp_in->eof() == false) {
-    CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
-    i64_pos = fp_in->Tell();
-    if (new_samp != NULL) {
-      bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
-                                               (100.0f * i64_pos / i64_size));
-      delete new_samp;
-      if (ret_flag == false) {
-        break;
-      }
-    }
-  }
-  delete fp_in;
-  return true;
-}
-
-}  // namespace ocrlib
--- a/cube/char_samp_set.h
+++ b/cube/char_samp_set.h
@ -1,73 +0,0 @@
-/**********************************************************************
- * File:        char_samp_set.h
- * Description: Declaration of a Character Sample Set Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharSampSet set encapsulates a set of CharSet objects typically
-// but not necessarily loaded from a file
-// It provides methods to load samples from File, Create a new file and
-// Add new char samples to the set
-
-#ifndef CHAR_SAMP_SET_H
-#define CHAR_SAMP_SET_H
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string>
-#include "char_samp.h"
-#include "char_samp_enum.h"
-#include "char_set.h"
-
-namespace tesseract {
-
-// chunks of samp pointers to allocate
-#define SAMP_ALLOC_BLOCK 10000
-
-class CharSampSet {
- public:
-  CharSampSet();
-  ~CharSampSet();
-  // return sample count
-  int SampleCount() const { return cnt_; }
-  // returns samples buffer
-  CharSamp ** Samples() const { return samp_buff_; }
-  // Create a CharSampSet set object from a file
-  static CharSampSet *FromCharDumpFile(string file_name);
-  // Enumerate the Samples in the set one-by-one calling the enumertor's
-  // EnumCharSamp method for each sample
-  static bool EnumSamples(string file_name, CharSampEnum *enumerator);
-  // Create a new Char Dump file
-  static FILE *CreateCharDumpFile(string file_name);
-  // Add a new sample to the set
-  bool Add(CharSamp *char_samp);
-
- private:
-   // sample count
-  int cnt_;
-  // the char samp array
-  CharSamp **samp_buff_;
-  // Are the samples owned by the set or not.
-  // Determines whether we should cleanup in the end
-  bool own_samples_;
-  // Cleanup
-  void Cleanup();
-  // Load character samples from a file
-  bool LoadCharSamples(FILE *fp);
-};
-}
-
-#endif  // CHAR_SAMP_SET_H
--- a/cube/char_set.cpp
+++ b/cube/char_set.cpp
@ -1,168 +0,0 @@
-/**********************************************************************
- * File:        char_samp_enum.cpp
- * Description: Implementation of a Character Set Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <string>
-
-#include "char_set.h"
-#include "cube_utils.h"
-#include "tessdatamanager.h"
-
-namespace tesseract {
-
-CharSet::CharSet() {
-  class_cnt_ = 0;
-  class_strings_ = NULL;
-  unicharset_map_ = NULL;
-  init_ = false;
-
-  // init hash table
-  memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
-}
-
-CharSet::~CharSet() {
-  if (class_strings_ != NULL) {
-    for (int cls = 0; cls < class_cnt_; cls++) {
-      if (class_strings_[cls] != NULL) {
-        delete class_strings_[cls];
-      }
-    }
-    delete []class_strings_;
-    class_strings_ = NULL;
-  }
-  delete []unicharset_map_;
-}
-
-// Creates CharSet object by reading the unicharset from the
-// TessDatamanager, and mapping Cube's unicharset to Tesseract's if
-// they differ.
-CharSet *CharSet::Create(TessdataManager *tessdata_manager,
-                         UNICHARSET *tess_unicharset) {
-  CharSet *char_set = new CharSet();
-
-  // First look for Cube's unicharset; if not there, use tesseract's
-  bool cube_unicharset_exists;
-  if (!(cube_unicharset_exists =
-        tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
-      !tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
-    fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
-            "either cube or tesseract unicharset\n");
-    return NULL;
-  }
-  FILE *charset_fp = tessdata_manager->GetDataFilePtr();
-  if (!charset_fp) {
-    fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
-            "a unicharset\n");
-    return NULL;
-  }
-
-  // If we found a cube unicharset separate from tesseract's, load it and
-  // map its unichars to tesseract's; if only one unicharset exists,
-  // just load it.
-  bool loaded;
-  if (cube_unicharset_exists) {
-    char_set->cube_unicharset_.load_from_file(charset_fp);
-    loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
-    loaded = loaded && char_set->LoadSupportedCharList(
-        tessdata_manager->GetDataFilePtr(), tess_unicharset);
-    char_set->unicharset_ = &char_set->cube_unicharset_;
-  } else {
-    loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
-    char_set->unicharset_ = tess_unicharset;
-  }
-  if (!loaded) {
-    delete char_set;
-    return NULL;
-  }
-
-  char_set->init_ = true;
-  return char_set;
-}
-
-// Load the list of supported chars from the given data file pointer.
-bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) {
-  if (init_)
-    return true;
-
-  char str_line[256];
-  // init hash table
-  memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
-  // read the char count
-  if (fgets(str_line, sizeof(str_line), fp) == NULL) {
-    fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not "
-            "read char count.\n");
-    return false;
-  }
-  class_cnt_ = atoi(str_line);
-  if  (class_cnt_ < 2) {
-    fprintf(stderr, "Cube ERROR (CharSet::InitMemory): invalid "
-            "class count: %d\n", class_cnt_);
-    return false;
-  }
-  // memory for class strings
-  class_strings_ = new string_32*[class_cnt_];
-  // memory for unicharset map
-  if (tess_unicharset) {
-    unicharset_map_ = new int[class_cnt_];
-  }
-
-  // Read in character strings and add to hash table
-  for (int class_id = 0; class_id < class_cnt_; class_id++) {
-    // Read the class string
-    if (fgets(str_line, sizeof(str_line), fp) == NULL) {
-      fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): "
-              "could not read class string with class_id=%d.\n", class_id);
-      return false;
-    }
-    // Terminate at space if any
-    char *p = strchr(str_line, ' ');
-    if (p != NULL)
-      *p = '\0';
-    // Convert to UTF32 and store
-    string_32 str32;
-    // Convert NULL to a space
-    if (strcmp(str_line, "NULL") == 0) {
-      strcpy(str_line, " ");
-    }
-    CubeUtils::UTF8ToUTF32(str_line, &str32);
-    class_strings_[class_id] = new string_32(str32);
-
-    // Add to hash-table
-    int hash_val = Hash(reinterpret_cast<const char_32 *>(str32.c_str()));
-    if (hash_bin_size_[hash_val] >= kMaxHashSize) {
-      fprintf(stderr, "Cube ERROR (CharSet::LoadSupportedCharList): hash "
-              "table is full.\n");
-      return false;
-    }
-    hash_bins_[hash_val][hash_bin_size_[hash_val]++] = class_id;
-
-    if (tess_unicharset != NULL) {
-      // Add class id to unicharset map
-      UNICHAR_ID tess_id = tess_unicharset->unichar_to_id(str_line);
-      if (tess_id == INVALID_UNICHAR_ID) {
-        tess_unicharset->unichar_insert(str_line);
-        tess_id = tess_unicharset->unichar_to_id(str_line);
-      }
-      ASSERT_HOST(tess_id != INVALID_UNICHAR_ID);
-      unicharset_map_[class_id] = tess_id;
-    }
-  }
-  return true;
-}
-
-}  // tesseract
--- a/cube/char_set.h
+++ b/cube/char_set.h
@ -1,174 +0,0 @@
-/**********************************************************************
- * File:        char_samp_enum.h
- * Description: Declaration of a Character Set Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharSet class encapsulates the list of 32-bit strings/characters that
-// Cube supports for a specific language. The char set is loaded from the
-// .unicharset file corresponding to a specific language
-// Each string has a corresponding int class-id that gets used throughout Cube
-// The class provides pass back and forth conversion between the class-id
-// and its corresponding 32-bit string. This is done using a hash table that
-// maps the string to the class id.
-
-#ifndef CHAR_SET_H
-#define CHAR_SET_H
-
-#include <string.h>
-#include <string>
-#include <algorithm>
-
-#include "string_32.h"
-#include "tessdatamanager.h"
-#include "unicharset.h"
-#include "cube_const.h"
-
-namespace tesseract {
-
-class CharSet {
- public:
-  CharSet();
-  ~CharSet();
-
-  // Returns true if Cube is sharing Tesseract's unicharset.
-  inline bool SharedUnicharset() { return (unicharset_map_ == NULL); }
-
-  // Returns the class id corresponding to a 32-bit string. Returns -1
-  // if the string is not supported. This is done by hashing the
-  // string and then looking up the string in the hash-bin if there
-  // are collisions.
-  inline int ClassID(const char_32 *str) const {
-    int hash_val = Hash(str);
-    if (hash_bin_size_[hash_val] == 0)
-      return -1;
-    for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
-      if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
-        return hash_bins_[hash_val][bin];
-    }
-    return -1;
-  }
-  // Same as above but using a 32-bit char instead of a string
-  inline int ClassID(char_32 ch) const {
-    int hash_val = Hash(ch);
-    if (hash_bin_size_[hash_val] == 0)
-      return -1;
-    for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
-      if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
-          class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
-        return hash_bins_[hash_val][bin];
-      }
-    }
-    return -1;
-  }
-  // Retrieve the unicharid in Tesseract's unicharset corresponding
-  // to a 32-bit string. When Tesseract and Cube share the same
-  // unicharset, this will just be the class id.
-  inline int UnicharID(const char_32 *str) const {
-    int class_id = ClassID(str);
-    if (class_id == INVALID_UNICHAR_ID)
-      return INVALID_UNICHAR_ID;
-    int unichar_id;
-    if (unicharset_map_)
-      unichar_id = unicharset_map_[class_id];
-    else
-      unichar_id = class_id;
-    return unichar_id;
-  }
-  // Same as above but using a 32-bit char instead of a string
-  inline int UnicharID(char_32 ch) const {
-    int class_id = ClassID(ch);
-    if (class_id == INVALID_UNICHAR_ID)
-      return INVALID_UNICHAR_ID;
-    int unichar_id;
-    if (unicharset_map_)
-      unichar_id = unicharset_map_[class_id];
-    else
-      unichar_id = class_id;
-    return unichar_id;
-  }
-  // Returns the 32-bit string corresponding to a class id
-  inline const char_32 * ClassString(int class_id) const {
-    if (class_id < 0 || class_id >= class_cnt_) {
-      return NULL;
-    }
-    return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
-  }
-  // Returns the count of supported strings
-  inline int ClassCount() const { return class_cnt_; }
-
-  // Creates CharSet object by reading the unicharset from the
-  // TessDatamanager, and mapping Cube's unicharset to Tesseract's if
-  // they differ.
-  static CharSet *Create(TessdataManager *tessdata_manager,
-                         UNICHARSET *tess_unicharset);
-
-  // Return the UNICHARSET cube is using for recognition internally --
-  // ClassId() returns unichar_id's in this unicharset.
-  UNICHARSET *InternalUnicharset() { return unicharset_; }
-
- private:
-  // Hash table configuration params. Determined emperically on
-  // the supported languages so far (Eng, Ara, Hin). Might need to be
-  // tuned for speed when more languages are supported
-  static const int kHashBins = 3001;
-  static const int kMaxHashSize = 16;
-
-  // Using djb2 hashing function to hash a 32-bit string
-  // introduced in http://www.cse.yorku.ca/~oz/hash.html
-  static inline int Hash(const char_32 *str) {
-    unsigned long hash = 5381;
-    int c;
-    while ((c = *str++))
-      hash = ((hash << 5) + hash) + c;
-    return (hash%kHashBins);
-  }
-  // Same as above but for a single char
-  static inline int Hash(char_32 ch) {
-    char_32 b[2];
-    b[0] = ch;
-    b[1] = 0;
-    return Hash(b);
-  }
-
-  // Load the list of supported chars from the given data file
-  // pointer. If tess_unicharset is non-NULL, mapping each Cube class
-  // id to a tesseract unicharid.
-  bool LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset);
-
-  // class count
-  int class_cnt_;
-  // hash-bin sizes array
-  int hash_bin_size_[kHashBins];
-  // hash bins
-  int hash_bins_[kHashBins][kMaxHashSize];
-  // supported strings array
-  string_32  **class_strings_;
-  // map from class id to secondary (tesseract's) unicharset's ids
-  int *unicharset_map_;
-  // A unicharset which is filled in with a Tesseract-style UNICHARSET for
-  // cube's data if our unicharset is different from tesseract's.
-  UNICHARSET cube_unicharset_;
-  // This points to either the tess_unicharset we're passed or cube_unicharset_,
-  // depending upon whether we just have one unicharset or one for each
-  // tesseract and cube, respectively.
-  UNICHARSET *unicharset_;
-  // has the char set been initialized flag
-  bool init_;
-};
-}
-
-#endif  // CHAR_SET_H
--- a/cube/classifier_base.h
+++ b/cube/classifier_base.h
@ -1,94 +0,0 @@
-/**********************************************************************
- * File:        classifier_base.h
- * Description: Declaration of the Base Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharClassifier class is the abstract class for any character/grapheme
-// classifier.
-
-#ifndef CHAR_CLASSIFIER_BASE_H
-#define CHAR_CLASSIFIER_BASE_H
-
-#include <string>
-#include "char_samp.h"
-#include "char_altlist.h"
-#include "char_set.h"
-#include "feature_base.h"
-#include "lang_model.h"
-#include "tuning_params.h"
-
-namespace tesseract {
-class CharClassifier {
- public:
-  CharClassifier(CharSet *char_set, TuningParams *params,
-                 FeatureBase *feat_extract) {
-    char_set_ = char_set;
-    params_ = params;
-    feat_extract_ = feat_extract;
-    fold_sets_ = NULL;
-    fold_set_cnt_ = 0;
-    fold_set_len_ = NULL;
-    init_ = false;
-    case_sensitive_ = true;
-  }
-
-  virtual ~CharClassifier() {
-    if (fold_sets_  != NULL) {
-      for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
-        delete []fold_sets_[fold_set];
-      }
-      delete []fold_sets_;
-      fold_sets_ = NULL;
-    }
-    delete []fold_set_len_;
-    fold_set_len_ = NULL;
-    delete feat_extract_;
-    feat_extract_ = NULL;
-  }
-
-  // pure virtual functions that need to be implemented by any inheriting class
-  virtual CharAltList * Classify(CharSamp *char_samp) = 0;
-  virtual int CharCost(CharSamp *char_samp) = 0;
-  virtual bool Train(CharSamp *char_samp, int ClassID) = 0;
-  virtual bool SetLearnParam(char *var_name, float val) = 0;
-  virtual bool Init(const string &data_file_path, const string &lang,
-                    LangModel *lang_mod) = 0;
-
-  // accessors
-  FeatureBase *FeatureExtractor() {return feat_extract_;}
-  inline bool CaseSensitive() const { return case_sensitive_; }
-  inline void SetCaseSensitive(bool case_sensitive) {
-    case_sensitive_ = case_sensitive;
-  }
-
- protected:
-  virtual void Fold() = 0;
-  virtual bool LoadFoldingSets(const string &data_file_path,
-                               const string &lang,
-                               LangModel *lang_mod) = 0;
-  FeatureBase *feat_extract_;
-  CharSet *char_set_;
-  TuningParams *params_;
-  int **fold_sets_;
-  int *fold_set_len_;
-  int fold_set_cnt_;
-  bool init_;
-  bool case_sensitive_;
-};
-}  // tesseract
-
-#endif  // CHAR_CLASSIFIER_BASE_H
--- a/cube/classifier_factory.cpp
+++ b/cube/classifier_factory.cpp
@ -1,85 +0,0 @@
-/**********************************************************************
- * File:        classifier_factory.cpp
- * Description: Implementation of the Base Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include "classifier_factory.h"
-#include "conv_net_classifier.h"
-#include "feature_base.h"
-#include "feature_bmp.h"
-#include "feature_chebyshev.h"
-#include "feature_hybrid.h"
-#include "hybrid_neural_net_classifier.h"
-
-namespace tesseract {
-
-// Creates a CharClassifier object of the appropriate type depending on the
-// classifier type in the settings file
-CharClassifier *CharClassifierFactory::Create(const string &data_file_path,
-                                              const string &lang,
-                                              LangModel *lang_mod,
-                                              CharSet *char_set,
-                                              TuningParams *params) {
-  // create the feature extraction object
-  FeatureBase *feat_extract;
-
-  switch (params->TypeFeature()) {
-    case TuningParams::BMP:
-      feat_extract = new FeatureBmp(params);
-      break;
-    case TuningParams::CHEBYSHEV:
-      feat_extract = new FeatureChebyshev(params);
-      break;
-    case TuningParams::HYBRID:
-      feat_extract = new FeatureHybrid(params);
-      break;
-    default:
-      fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid "
-              "feature type.\n");
-      return NULL;
-  }
-
-  // create the classifier object
-  CharClassifier *classifier_obj;
-  switch (params->TypeClassifier()) {
-    case TuningParams::NN:
-      classifier_obj = new ConvNetCharClassifier(char_set, params,
-                                                 feat_extract);
-      break;
-    case TuningParams::HYBRID_NN:
-      classifier_obj = new HybridNeuralNetCharClassifier(char_set, params,
-                                                         feat_extract);
-      break;
-    default:
-      fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): invalid "
-              "classifier type.\n");
-      return NULL;
-  }
-
-  // Init the classifier
-  if (!classifier_obj->Init(data_file_path, lang, lang_mod)) {
-    delete classifier_obj;
-    fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable "
-            "to Init() character classifier object.\n");
-    return NULL;
-  }
-  return classifier_obj;
-}
-}
--- a/cube/classifier_factory.h
+++ b/cube/classifier_factory.h
@ -1,43 +0,0 @@
-/**********************************************************************
- * File:        classifier_factory.h
- * Description: Declaration of the Base Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CharClassifierFactory provides a single static method to create an
-// instance of the desired classifier
-
-#ifndef CHAR_CLASSIFIER_FACTORY_H
-#define CHAR_CLASSIFIER_FACTORY_H
-
-#include <string>
-#include "classifier_base.h"
-#include "lang_model.h"
-
-namespace tesseract {
-class CharClassifierFactory {
- public:
-  // Creates a CharClassifier object of the appropriate type depending on the
-  // classifier type in the settings file
-  static CharClassifier *Create(const string &data_file_path,
-                                const string &lang,
-                                LangModel *lang_mod,
-                                CharSet *char_set,
-                                TuningParams *params);
-};
-}  // tesseract
-
-#endif  // CHAR_CLASSIFIER_FACTORY_H
--- a/cube/con_comp.cpp
+++ b/cube/con_comp.cpp
@ -1,268 +0,0 @@
-/**********************************************************************
- * File:        con_comp.cpp
- * Description: Implementation of a Connected Component class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdlib.h>
-#include <string.h>
-#include "con_comp.h"
-#include "cube_const.h"
-
-namespace tesseract {
-
-ConComp::ConComp() {
-  head_ = NULL;
-  tail_ = NULL;
-  left_ = 0;
-  top_ = 0;
-  right_ = 0;
-  bottom_ = 0;
-  left_most_ = false;
-  right_most_ = false;
-  id_ = -1;
-  pt_cnt_ = 0;
-}
-
-ConComp::~ConComp() {
-  if (head_ != NULL) {
-    ConCompPt *pt_ptr = head_;
-    while (pt_ptr != NULL) {
-      ConCompPt *pptNext = pt_ptr->Next();
-      delete pt_ptr;
-      pt_ptr = pptNext;
-    }
-    head_ = NULL;
-  }
-}
-
-// adds a pt to the conn comp and updates its boundaries
-bool ConComp::Add(int x, int y) {
-  ConCompPt *pt_ptr = new ConCompPt(x, y);
-
-  if (head_ == NULL) {
-    left_ = x;
-    right_ = x;
-    top_ = y;
-    bottom_ = y;
-
-    head_ = pt_ptr;
-  } else {
-    left_ = left_ <= x ? left_ : x;
-    top_ = top_ <= y ? top_ : y;
-    right_ = right_ >= x ? right_ : x;
-    bottom_ = bottom_ >= y ? bottom_ : y;
-  }
-
-  if (tail_ != NULL) {
-    tail_->SetNext(pt_ptr);
-  }
-
-  tail_ = pt_ptr;
-  pt_cnt_++;
-  return true;
-}
-
-// merges two connected components
-bool ConComp::Merge(ConComp *concomp) {
-  if (head_ == NULL || tail_ == NULL ||
-      concomp->head_ == NULL || concomp->tail_ == NULL) {
-    return false;
-  }
-
-  tail_->SetNext(concomp->head_);
-  tail_ = concomp->tail_;
-  left_ = left_ <= concomp->left_ ? left_ : concomp->left_;
-  top_ = top_ <= concomp->top_ ? top_ : concomp->top_;
-  right_ = right_ >= concomp->right_ ? right_ : concomp->right_;
-  bottom_ = bottom_ >= concomp->bottom_ ? bottom_ : concomp->bottom_;
-  pt_cnt_ += concomp->pt_cnt_;
-
-  concomp->head_ = NULL;
-  concomp->tail_ = NULL;
-
-  return true;
-}
-
-// Creates the x-coord density histogram after spreading
-// each x-coord position by the HIST_WND_RATIO fraction of the
-// height of the ConComp, but limited to max_hist_wnd
-int *ConComp::CreateHistogram(int max_hist_wnd) {
-  int wid = right_ - left_ + 1,
-    hgt = bottom_ - top_ + 1,
-    hist_wnd = static_cast<int>(hgt * HIST_WND_RATIO);
-
-  if (hist_wnd > max_hist_wnd) {
-      hist_wnd = max_hist_wnd;
-  }
-
-  // alloc memo for histogram
-  int *hist_array = new int[wid];
-
-  memset(hist_array, 0, wid * sizeof(*hist_array));
-
-  // compute windowed histogram
-  ConCompPt *pt_ptr = head_;
-
-  while (pt_ptr != NULL) {
-    int x = pt_ptr->x() - left_,
-      xw = x - hist_wnd;
-
-    for (int xdel = -hist_wnd; xdel <= hist_wnd; xdel++, xw++) {
-      if (xw >= 0 && xw < wid) {
-        hist_array[xw]++;
-      }
-    }
-
-    pt_ptr = pt_ptr->Next();
-  }
-
-  return hist_array;
-}
-
-// find out the seg pts by looking for local minima in the histogram
-int *ConComp::SegmentHistogram(int *hist_array, int *seg_pt_cnt) {
-  // init
-  (*seg_pt_cnt) = 0;
-
-  int wid = right_ - left_ + 1,
-    hgt = bottom_ - top_ + 1;
-
-  int *x_seg_pt = new int[wid];
-
-  int seg_pt_wnd = static_cast<int>(hgt * SEG_PT_WND_RATIO);
-
-  if (seg_pt_wnd > 1) {
-    seg_pt_wnd = 1;
-  }
-
-  for (int x = 2; x < (wid - 2); x++) {
-    if (hist_array[x] < hist_array[x - 1] &&
-        hist_array[x] < hist_array[x - 2] &&
-        hist_array[x] <= hist_array[x + 1] &&
-        hist_array[x] <= hist_array[x + 2]) {
-      x_seg_pt[(*seg_pt_cnt)++] = x;
-      x += seg_pt_wnd;
-    } else if (hist_array[x] <= hist_array[x - 1] &&
-               hist_array[x] <= hist_array[x - 2] &&
-               hist_array[x] < hist_array[x + 1] &&
-               hist_array[x] < hist_array[x + 2]) {
-      x_seg_pt[(*seg_pt_cnt)++] = x;
-      x += seg_pt_wnd;
-    }
-  }
-
-  // no segments, nothing to do
-  if ((*seg_pt_cnt) == 0) {
-    delete []x_seg_pt;
-    return NULL;
-  }
-
-  return x_seg_pt;
-}
-
-// segments a concomp based on pixel density histogram local minima
-// if there were none found, it returns NULL
-// this is more useful than creating a clone of itself
-ConComp **ConComp::Segment(int max_hist_wnd, int *concomp_cnt) {
-  // init
-  (*concomp_cnt) = 0;
-
-  // No pts
-  if (head_ == NULL) {
-    return NULL;
-  }
-
-  int seg_pt_cnt = 0;
-
-  // create the histogram
-  int *hist_array = CreateHistogram(max_hist_wnd);
-  if (hist_array == NULL) {
-    return NULL;
-  }
-
-  int *x_seg_pt = SegmentHistogram(hist_array, &seg_pt_cnt);
-
-  // free histogram
-  delete []hist_array;
-
-  // no segments, nothing to do
-  if (seg_pt_cnt == 0) {
-    delete []x_seg_pt;
-    return NULL;
-  }
-
-  // create concomp array
-  ConComp **concomp_array = new ConComp *[seg_pt_cnt + 1];
-
-  for (int concomp = 0; concomp <= seg_pt_cnt; concomp++) {
-    concomp_array[concomp] = new ConComp();
-
-    // split concomps inherit the ID this concomp
-    concomp_array[concomp]->SetID(id_);
-  }
-
-  // set the left and right most attributes of the
-  // appropriate concomps
-  concomp_array[0]->left_most_ = true;
-  concomp_array[seg_pt_cnt]->right_most_ = true;
-
-  // assign pts to concomps
-  ConCompPt *pt_ptr = head_;
-  while (pt_ptr != NULL) {
-    int seg_pt;
-
-    // find the first seg-pt that exceeds the x value
-    // of the pt
-    for (seg_pt = 0; seg_pt < seg_pt_cnt; seg_pt++) {
-      if ((x_seg_pt[seg_pt] + left_) > pt_ptr->x()) {
-        break;
-      }
-    }
-
-    // add the pt to the proper concomp
-    if (concomp_array[seg_pt]->Add(pt_ptr->x(), pt_ptr->y()) == false) {
-      delete []x_seg_pt;
-      delete []concomp_array;
-      return NULL;
-    }
-
-    pt_ptr = pt_ptr->Next();
-  }
-
-  delete []x_seg_pt;
-
-  (*concomp_cnt) = (seg_pt_cnt + 1);
-
-  return concomp_array;
-}
-
-// Shifts the co-ordinates of all points by the specified x & y deltas
-void ConComp::Shift(int dx, int dy) {
-  ConCompPt *pt_ptr = head_;
-
-  while (pt_ptr != NULL) {
-    pt_ptr->Shift(dx, dy);
-    pt_ptr = pt_ptr->Next();
-  }
-
-  left_ += dx;
-  right_ += dx;
-  top_ += dy;
-  bottom_ += dy;
-}
-
-}  // namespace tesseract
--- a/cube/con_comp.h
+++ b/cube/con_comp.h
@ -1,124 +0,0 @@
-/**********************************************************************
- * File:        con_comp.h
- * Description: Declaration of a Connected Component class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef CONCOMP_H
-#define CONCOMP_H
-
-// The ConComp class implements the functionality needed for a
-// Connected Component object and Connected Component (ConComp) points.
-// The points consituting a connected component are kept in a linked-list
-// The Concomp class provided methods to:
-// 1- Compare components in L2R and R2L reading orders.
-// 2- Merge ConComps
-// 3- Compute the windowed vertical pixel density histogram for a specific
-// windows size
-// 4- Segment a ConComp based on the local windowed vertical pixel
-// density histogram local minima
-
-namespace tesseract {
-
-// Implments a ConComp point in a linked list of points
-class ConCompPt {
- public:
-    ConCompPt(int x, int y) {
-      x_ = x;
-      y_ = y;
-      next_pt_ = NULL;
-    }
-    inline int x() { return x_; }
-    inline int y() { return y_; }
-    inline void Shift(int dx, int dy) {
-      x_ += dx;
-      y_ += dy;
-    }
-    inline ConCompPt * Next() { return next_pt_; }
-    inline void SetNext(ConCompPt *pt) { next_pt_ = pt; }
-
- private:
-    int x_;
-    int y_;
-    ConCompPt *next_pt_;
-};
-
-class ConComp {
- public:
-    ConComp();
-    virtual ~ConComp();
-    // accessors
-    inline ConCompPt *Head() { return head_; }
-    inline int Left() const { return left_; }
-    inline int Top() const { return top_; }
-    inline int Right() const { return right_; }
-    inline int Bottom() const { return bottom_; }
-    inline int Width() const { return right_ - left_ + 1; }
-    inline int Height() const { return bottom_ - top_ + 1; }
-
-    // Comparer used for sorting L2R reading order
-    inline static int Left2RightComparer(const void *comp1,
-                                         const void *comp2) {
-      return (*(reinterpret_cast<ConComp * const *>(comp1)))->left_ +
-          (*(reinterpret_cast<ConComp * const *>(comp1)))->right_ -
-          (*(reinterpret_cast<ConComp * const *>(comp2)))->left_ -
-          (*(reinterpret_cast<ConComp * const *>(comp2)))->right_;
-    }
-
-    // Comparer used for sorting R2L reading order
-    inline static int Right2LeftComparer(const void *comp1,
-                                         const void *comp2) {
-      return (*(reinterpret_cast<ConComp * const *>(comp2)))->right_ -
-          (*(reinterpret_cast<ConComp * const *>(comp1)))->right_;
-    }
-
-    // accessors for attribues of a ConComp
-    inline bool LeftMost() const { return left_most_; }
-    inline bool RightMost() const { return right_most_; }
-    inline void SetLeftMost(bool left_most) { left_most_ = left_most; }
-    inline void SetRightMost(bool right_most) { right_most_ = right_most;
-    }
-    inline int ID () const { return id_; }
-    inline void SetID(int id) { id_ = id; }
-    inline int PtCnt () const { return pt_cnt_; }
-    // Add a new pt
-    bool Add(int x, int y);
-    // Merge two connected components in-place
-    bool Merge(ConComp *con_comp);
-    // Shifts the co-ordinates of all points by the specified x & y deltas
-    void Shift(int dx, int dy);
-    // segments a concomp based on pixel density histogram local minima
-    ConComp **Segment(int max_hist_wnd, int *concomp_cnt);
-    // creates the vertical pixel density histogram of the concomp
-    int *CreateHistogram(int max_hist_wnd);
-    // find out the seg pts by looking for local minima in the histogram
-    int *SegmentHistogram(int *hist_array, int *seg_pt_cnt);
-
- private:
-    int id_;
-    bool left_most_;
-    bool right_most_;
-    int left_;
-    int top_;
-    int right_;
-    int bottom_;
-    ConCompPt *head_;
-    ConCompPt *tail_;
-    int pt_cnt_;
-};
-}
-
-#endif  // CONCOMP_H
--- a/cube/conv_net_classifier.cpp
+++ b/cube/conv_net_classifier.cpp
@ -1,354 +0,0 @@
-/**********************************************************************
- * File:        charclassifier.cpp
- * Description: Implementation of Convolutional-NeuralNet Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <algorithm>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <vector>
-#include <wctype.h>
-
-#include "char_set.h"
-#include "classifier_base.h"
-#include "const.h"
-#include "conv_net_classifier.h"
-#include "cube_utils.h"
-#include "feature_base.h"
-#include "feature_bmp.h"
-#include "tess_lang_model.h"
-
-namespace tesseract {
-
-ConvNetCharClassifier::ConvNetCharClassifier(CharSet *char_set,
-                                             TuningParams *params,
-                                             FeatureBase *feat_extract)
-    : CharClassifier(char_set, params, feat_extract) {
-  char_net_ = NULL;
-  net_input_ = NULL;
-  net_output_ = NULL;
-}
-
-ConvNetCharClassifier::~ConvNetCharClassifier() {
-  if (char_net_ != NULL) {
-    delete char_net_;
-    char_net_ = NULL;
-  }
-
-  if (net_input_ != NULL) {
-    delete []net_input_;
-    net_input_ = NULL;
-  }
-
-  if (net_output_ != NULL) {
-    delete []net_output_;
-    net_output_ = NULL;
-  }
-}
-
-/**
- * The main training function. Given a sample and a class ID the classifier
- * updates its parameters according to its learning algorithm. This function
- * is currently not implemented. TODO(ahmadab): implement end-2-end training
- */
-bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
-  return false;
-}
-
-/**
- * A secondary function needed for training. Allows the trainer to set the
- * value of any train-time parameter. This function is currently not
- * implemented. TODO(ahmadab): implement end-2-end training
- */
-bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
-  // TODO(ahmadab): implementation of parameter initializing.
-  return false;
-}
-
-/**
- * Folds the output of the NeuralNet using the loaded folding sets
- */
-void ConvNetCharClassifier::Fold() {
-  // in case insensitive mode
-  if (case_sensitive_ == false) {
-    int class_cnt = char_set_->ClassCount();
-    // fold case
-    for (int class_id = 0; class_id < class_cnt; class_id++) {
-      // get class string
-      const char_32 *str32 = char_set_->ClassString(class_id);
-      // get the upper case form of the string
-      string_32 upper_form32 = str32;
-      for (int ch = 0; ch < upper_form32.length(); ch++) {
-        if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
-          upper_form32[ch] = towupper(upper_form32[ch]);
-        }
-      }
-
-      // find out the upperform class-id if any
-      int upper_class_id =
-          char_set_->ClassID(reinterpret_cast<const char_32 *>(
-          upper_form32.c_str()));
-      if (upper_class_id != -1 && class_id != upper_class_id) {
-        float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
-        net_output_[class_id] = max_out;
-        net_output_[upper_class_id] = max_out;
-      }
-    }
-  }
-
-  // The folding sets specify how groups of classes should be folded
-  // Folding involved assigning a min-activation to all the members
-  // of the folding set. The min-activation is a fraction of the max-activation
-  // of the members of the folding set
-  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
-    if (fold_set_len_[fold_set] == 0)
-      continue;
-    float max_prob = net_output_[fold_sets_[fold_set][0]];
-    for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
-      if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
-        max_prob = net_output_[fold_sets_[fold_set][ch]];
-      }
-    }
-    for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
-      net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
-          net_output_[fold_sets_[fold_set][ch]]);
-    }
-  }
-}
-
-/**
- * Compute the features of specified charsamp and feedforward the
- * specified nets
- */
-bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
-  if (char_net_ == NULL) {
-    fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
-            "NeuralNet is NULL\n");
-    return false;
-  }
-  int feat_cnt = char_net_->in_cnt();
-  int class_cnt = char_set_->ClassCount();
-
-  // allocate i/p and o/p buffers if needed
-  if (net_input_ == NULL) {
-    net_input_ = new float[feat_cnt];
-    net_output_ = new float[class_cnt];
-  }
-
-  // compute input features
-  if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
-    fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
-            "unable to compute features\n");
-    return false;
-  }
-
-  if (char_net_ != NULL) {
-    if (char_net_->FeedForward(net_input_, net_output_) == false) {
-      fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): "
-              "unable to run feed-forward\n");
-      return false;
-    }
-  } else {
-    return false;
-  }
-  Fold();
-  return true;
-}
-
-/**
- * return the cost of being a char
- */
-int ConvNetCharClassifier::CharCost(CharSamp *char_samp) {
-  if (RunNets(char_samp) == false) {
-    return 0;
-  }
-  return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
-}
-
-/**
- * classifies a charsamp and returns an alternate list
- * of chars sorted by char costs
- */
-CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) {
-  // run the needed nets
-  if (RunNets(char_samp) == false) {
-    return NULL;
-  }
-
-  int class_cnt = char_set_->ClassCount();
-
-  // create an altlist
-  CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
-
-  for (int out = 1; out < class_cnt; out++) {
-    int cost = CubeUtils::Prob2Cost(net_output_[out]);
-    alt_list->Insert(out, cost);
-  }
-
-  return alt_list;
-}
-
-/**
- * Set an external net (for training purposes)
- */
-void ConvNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
-  if (char_net_ != NULL) {
-    delete char_net_;
-    char_net_ = NULL;
-  }
-  char_net_ = char_net;
-}
-
-/**
- * This function will return true if the file does not exist.
- * But will fail if the it did not pass the sanity checks
- */
-bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path,
-                                            const string &lang,
-                                            LangModel *lang_mod) {
-  fold_set_cnt_ = 0;
-  string fold_file_name;
-  fold_file_name = data_file_path + lang;
-  fold_file_name += ".cube.fold";
-
-  // folding sets are optional
-  FILE *fp = fopen(fold_file_name.c_str(), "rb");
-  if (fp == NULL) {
-    return true;
-  }
-  fclose(fp);
-
-  string fold_sets_str;
-  if (!CubeUtils::ReadFileToString(fold_file_name,
-                                   &fold_sets_str)) {
-    return false;
-  }
-
-  // split into lines
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
-  fold_set_cnt_ = str_vec.size();
-
-  fold_sets_ = new int *[fold_set_cnt_];
-  fold_set_len_ = new int[fold_set_cnt_];
-
-  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
-    reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
-        &str_vec[fold_set]);
-
-    // if all or all but one character are invalid, invalidate this set
-    if (str_vec[fold_set].length() <= 1) {
-      fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
-              "invalidating folding set %d\n", fold_set);
-      fold_set_len_[fold_set] = 0;
-      fold_sets_[fold_set] = NULL;
-      continue;
-    }
-
-    string_32 str32;
-    CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
-    fold_set_len_[fold_set] = str32.length();
-    fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
-    for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
-      fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
-    }
-  }
-  return true;
-}
-
-/**
- * Init the classifier provided a data-path and a language string
- */
-bool ConvNetCharClassifier::Init(const string &data_file_path,
-                                 const string &lang,
-                                 LangModel *lang_mod) {
-  if (init_) {
-    return true;
-  }
-
-  // load the nets if any. This function will return true if the net file
-  // does not exist. But will fail if the net did not pass the sanity checks
-  if (!LoadNets(data_file_path, lang)) {
-    return false;
-  }
-
-  // load the folding sets if any. This function will return true if the
-  // file does not exist. But will fail if the it did not pass the sanity checks
-  if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
-    return false;
-  }
-
-  init_ = true;
-  return true;
-}
-
-/**
- * Load the classifier's Neural Nets
- * This function will return true if the net file does not exist.
- * But will fail if the net did not pass the sanity checks
- */
-bool ConvNetCharClassifier::LoadNets(const string &data_file_path,
-                                     const string &lang) {
-  string char_net_file;
-
-  // add the lang identifier
-  char_net_file = data_file_path + lang;
-  char_net_file += ".cube.nn";
-
-  // neural network is optional
-  FILE *fp = fopen(char_net_file.c_str(), "rb");
-  if (fp == NULL) {
-    return true;
-  }
-  fclose(fp);
-
-  // load main net
-  char_net_ = tesseract::NeuralNet::FromFile(char_net_file);
-  if (char_net_ == NULL) {
-    fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
-            "could not load %s\n", char_net_file.c_str());
-    return false;
-  }
-
-  // validate net
-  if (char_net_->in_cnt()!= feat_extract_->FeatureCnt()) {
-    fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
-            "could not validate net %s\n", char_net_file.c_str());
-    return false;
-  }
-
-  // alloc net i/o buffers
-  int feat_cnt = char_net_->in_cnt();
-  int class_cnt = char_set_->ClassCount();
-
-  if (char_net_->out_cnt() != class_cnt) {
-    fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadNets): "
-            "output count (%d) and class count (%d) are not equal\n",
-            char_net_->out_cnt(), class_cnt);
-    return false;
-  }
-
-  // allocate i/p and o/p buffers if needed
-  if (net_input_ == NULL) {
-    net_input_ = new float[feat_cnt];
-    net_output_ = new float[class_cnt];
-  }
-
-  return true;
-}
-}  // tesseract
--- a/cube/conv_net_classifier.h
+++ b/cube/conv_net_classifier.h
@ -1,94 +0,0 @@
-/**********************************************************************
- * File:        conv_net_classifier.h
- * Description: Declaration of Convolutional-NeuralNet Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The ConvNetCharClassifier inherits from the base classifier class:
-// "CharClassifierBase". It implements a Convolutional Neural Net classifier
-// instance of the base classifier. It uses the Tesseract Neural Net library
-// The Neural Net takes a scaled version of a bitmap and feeds it to a
-// Convolutional Neural Net as input and performs a FeedForward. Each output
-// of the net corresponds to class_id in the CharSet passed at construction
-// time.
-// Afterwards, the outputs of the Net are "folded" using the folding set
-// (if any)
-#ifndef CONV_NET_CLASSIFIER_H
-#define CONV_NET_CLASSIFIER_H
-
-#include <string>
-#include "char_samp.h"
-#include "char_altlist.h"
-#include "char_set.h"
-#include "feature_base.h"
-#include "classifier_base.h"
-#include "neural_net.h"
-#include "lang_model.h"
-#include "tuning_params.h"
-
-namespace tesseract {
-
-// Folding Ratio is the ratio of the max-activation of members of a folding
-// set that is used to compute the min-activation of the rest of the set
-static const float kFoldingRatio = 0.75;
-
-class ConvNetCharClassifier : public CharClassifier {
- public:
-  ConvNetCharClassifier(CharSet *char_set, TuningParams *params,
-      FeatureBase *feat_extract);
-  virtual ~ConvNetCharClassifier();
-  // The main training function. Given a sample and a class ID the classifier
-  // updates its parameters according to its learning algorithm. This function
-  // is currently not implemented. TODO(ahmadab): implement end-2-end training
-  virtual bool Train(CharSamp *char_samp, int ClassID);
-  // A secondary function needed for training. Allows the trainer to set the
-  // value of any train-time parameter. This function is currently not
-  // implemented. TODO(ahmadab): implement end-2-end training
-  virtual bool SetLearnParam(char *var_name, float val);
-  // Externally sets the Neural Net used by the classifier. Used for training
-  void SetNet(tesseract::NeuralNet *net);
-
-  // Classifies an input charsamp and return a CharAltList object containing
-  // the possible candidates and corresponding scores
-  virtual CharAltList * Classify(CharSamp *char_samp);
-  // Computes the cost of a specific charsamp being a character (versus a
-  // non-character: part-of-a-character OR more-than-one-character)
-  virtual int CharCost(CharSamp *char_samp);
-
-
- private:
-  // Neural Net object used for classification
-  tesseract::NeuralNet *char_net_;
-  // data buffers used to hold Neural Net inputs and outputs
-  float *net_input_;
-  float *net_output_;
-
-  // Init the classifier provided a data-path and a language string
-  virtual bool Init(const string &data_file_path, const string &lang,
-                    LangModel *lang_mod);
-  // Loads the NeuralNets needed for the classifier
-  bool LoadNets(const string &data_file_path, const string &lang);
-  // Loads the folding sets provided a data-path and a language string
-  virtual bool LoadFoldingSets(const string &data_file_path,
-                               const string &lang,
-                               LangModel *lang_mod);
-  // Folds the output of the NeuralNet using the loaded folding sets
-  virtual void Fold();
-  // Scales the input char_samp and feeds it to the NeuralNet as input
-  bool RunNets(CharSamp *char_samp);
-};
-}
-#endif  // CONV_NET_CLASSIFIER_H
--- a/cube/cube_const.h
+++ b/cube/cube_const.h
@ -1,41 +0,0 @@
-/**********************************************************************
- * File:        const.h
- * Description: Defintions of constants used by Cube
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef CUBE_CONST_H
-#define CUBE_CONST_H
-
-// Scale used to normalize a log-prob to a cost
-#define PROB2COST_SCALE   4096.0
-// Maximum possible cost (-log prob of MIN_PROB)
-#define MIN_PROB_COST     65536
-// Probability corresponding to the max cost MIN_PROB_COST
-#define MIN_PROB          0.000000113
-// Worst possible cost (returned on failure)
-#define WORST_COST        0x40000
-// Oversegmentation hysteresis thresholds
-#define HIST_WND_RATIO    0.1f
-#define SEG_PT_WND_RATIO  0.1f
-
-#ifdef _WIN32
-#ifdef __GNUC__
-#include <climits>
-#endif
-#endif
-
-#endif  // CUBE_CONST_H
--- a/cube/cube_line_object.cpp
+++ b/cube/cube_line_object.cpp
@ -1,249 +0,0 @@
-/**********************************************************************
- * File:        cube_line_object.cpp
- * Description: Implementation of the Cube Line Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <algorithm>
-#include "cube_line_object.h"
-
-namespace tesseract {
-CubeLineObject::CubeLineObject(CubeRecoContext *cntxt, Pix *pix) {
-  line_pix_ = pix;
-  own_pix_ = false;
-  processed_ = false;
-  cntxt_ = cntxt;
-  phrase_cnt_ = 0;
-  phrases_ = NULL;
-}
-
-CubeLineObject::~CubeLineObject() {
-  if (line_pix_ != NULL && own_pix_ == true) {
-    pixDestroy(&line_pix_);
-    line_pix_ = NULL;
-  }
-
-  if (phrases_ != NULL) {
-    for (int phrase_idx = 0; phrase_idx < phrase_cnt_; phrase_idx++) {
-      if (phrases_[phrase_idx] != NULL) {
-        delete phrases_[phrase_idx];
-      }
-    }
-
-    delete []phrases_;
-    phrases_ = NULL;
-  }
-}
-
-// Recognize the specified pix as one line returning the recognized
-bool CubeLineObject::Process() {
-  // do nothing if pix had already been processed
-  if (processed_) {
-    return true;
-  }
-
-  // validate data
-  if (line_pix_ == NULL || cntxt_ == NULL) {
-    return false;
-  }
-
-  // create a CharSamp
-  CharSamp *char_samp = CubeUtils::CharSampleFromPix(line_pix_, 0, 0,
-                                                     line_pix_->w,
-                                                     line_pix_->h);
-  if (char_samp == NULL) {
-    return false;
-  }
-
-  // compute connected components.
-  int con_comp_cnt = 0;
-  ConComp **con_comps = char_samp->FindConComps(&con_comp_cnt,
-      cntxt_->Params()->MinConCompSize());
-  // no longer need char_samp, delete it
-  delete char_samp;
-  // no connected components, bail out
-  if (con_comp_cnt <= 0 || con_comps == NULL) {
-    return false;
-  }
-
-  // sort connected components based on reading order
-  bool rtl = (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L);
-  qsort(con_comps, con_comp_cnt, sizeof(*con_comps), rtl ?
-      ConComp::Right2LeftComparer : ConComp::Left2RightComparer);
-
-  // compute work breaking threshold as a ratio of line height
-  bool ret_val = false;
-  int word_break_threshold = ComputeWordBreakThreshold(con_comp_cnt, con_comps,
-                                                       rtl);
-  if (word_break_threshold > 0) {
-    // over-allocate phrases object buffer
-    phrases_ = new CubeObject *[con_comp_cnt];
-    // create a phrase if the horizontal distance between two consecutive
-    // concomps is higher than threshold
-    int start_con_idx = 0;
-    int current_phrase_limit = rtl ? con_comps[0]->Left() :
-                                     con_comps[0]->Right();
-
-    for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
-      bool create_new_phrase = true;
-      // if not at the end, compute the distance between two consecutive
-      // concomps
-      if (con_idx < con_comp_cnt) {
-        int dist = 0;
-        if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
-          dist = current_phrase_limit - con_comps[con_idx]->Right();
-        } else {
-          dist = con_comps[con_idx]->Left() - current_phrase_limit;
-        }
-        create_new_phrase = (dist > word_break_threshold);
-      }
-
-      // create a new phrase
-      if (create_new_phrase) {
-        // create a phrase corresponding to a range on components
-        bool left_most;
-        bool right_most;
-        CharSamp *phrase_char_samp =
-            CharSamp::FromConComps(con_comps, start_con_idx,
-                                   con_idx - start_con_idx, NULL,
-                                   &left_most, &right_most,
-                                   line_pix_->h);
-        if (phrase_char_samp == NULL) {
-          break;
-        }
-        phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp);
-        // set the ownership of the charsamp to the cube object
-        phrases_[phrase_cnt_]->SetCharSampOwnership(true);
-        phrase_cnt_++;
-        // advance the starting index to the current index
-        start_con_idx = con_idx;
-        // set the limit of the newly starting phrase (if any)
-        if (con_idx < con_comp_cnt) {
-          current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
-                                       con_comps[con_idx]->Right();
-        }
-      } else {
-        // update the limit of the current phrase
-        if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) {
-          current_phrase_limit = MIN(current_phrase_limit,
-                                     con_comps[con_idx]->Left());
-        } else {
-          current_phrase_limit = MAX(current_phrase_limit,
-                                     con_comps[con_idx]->Right());
-        }
-      }
-    }
-    ret_val = true;
-  }
-
-  // clean-up connected comps
-  for (int con_idx = 0; con_idx < con_comp_cnt; con_idx++) {
-    delete con_comps[con_idx];
-  }
-  delete []con_comps;
-
-  // success
-  processed_ = true;
-  return ret_val;
-}
-
-// Compute the least word breaking threshold that is required to produce a
-// valid set of phrases. Phrases are validated using the Aspect ratio
-// constraints specified in the language specific Params object
-int CubeLineObject::ComputeWordBreakThreshold(int con_comp_cnt,
-                                              ConComp **con_comps, bool rtl) {
-  // initial estimate of word breaking threshold
-  int word_break_threshold =
-      static_cast<int>(line_pix_->h * cntxt_->Params()->MaxSpaceHeightRatio());
-  bool valid = false;
-
-  // compute the resulting words and validate each's aspect ratio
-  do {
-    // group connected components into words based on breaking threshold
-    int start_con_idx = 0;
-    int current_phrase_limit = (rtl ? con_comps[0]->Left() :
-                                      con_comps[0]->Right());
-    int min_x = con_comps[0]->Left();
-    int max_x = con_comps[0]->Right();
-    int min_y = con_comps[0]->Top();
-    int max_y = con_comps[0]->Bottom();
-    valid = true;
-    for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) {
-      bool create_new_phrase = true;
-      // if not at the end, compute the distance between two consecutive
-      // concomps
-      if (con_idx < con_comp_cnt) {
-        int dist = 0;
-        if (rtl) {
-          dist = current_phrase_limit - con_comps[con_idx]->Right();
-        } else {
-          dist = con_comps[con_idx]->Left() - current_phrase_limit;
-        }
-        create_new_phrase = (dist > word_break_threshold);
-      }
-
-      // create a new phrase
-      if (create_new_phrase) {
-        // check aspect ratio. Break if invalid
-        if ((max_x - min_x + 1) >
-            (cntxt_->Params()->MaxWordAspectRatio() * (max_y - min_y + 1))) {
-          valid = false;
-          break;
-        }
-        // advance the starting index to the current index
-        start_con_idx = con_idx;
-        // set the limit of the newly starting phrase (if any)
-        if (con_idx < con_comp_cnt) {
-          current_phrase_limit = rtl ? con_comps[con_idx]->Left() :
-                                       con_comps[con_idx]->Right();
-          // re-init bounding box
-          min_x = con_comps[con_idx]->Left();
-          max_x = con_comps[con_idx]->Right();
-          min_y = con_comps[con_idx]->Top();
-          max_y = con_comps[con_idx]->Bottom();
-        }
-      } else {
-        // update the limit of the current phrase
-        if (rtl) {
-          current_phrase_limit = MIN(current_phrase_limit,
-                                     con_comps[con_idx]->Left());
-        } else {
-          current_phrase_limit = MAX(current_phrase_limit,
-                                     con_comps[con_idx]->Right());
-        }
-        // update bounding box
-        UpdateRange(con_comps[con_idx]->Left(),
-                    con_comps[con_idx]->Right(), &min_x, &max_x);
-        UpdateRange(con_comps[con_idx]->Top(),
-                    con_comps[con_idx]->Bottom(), &min_y, &max_y);
-      }
-    }
-
-    // return the breaking threshold if all broken word dimensions are valid
-    if (valid) {
-      return word_break_threshold;
-    }
-
-    // decrease the threshold and try again
-    word_break_threshold--;
-  } while (!valid && word_break_threshold > 0);
-
-  // failed to find a threshold that achieves the target aspect ratio.
-  // Just use the default threshold
-  return  static_cast<int>(line_pix_->h *
-                           cntxt_->Params()->MaxSpaceHeightRatio());
-}
-}
--- a/cube/cube_line_object.h
+++ b/cube/cube_line_object.h
@ -1,67 +0,0 @@
-/**********************************************************************
- * File:        cube_line_object.h
- * Description: Declaration of the Cube Line Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CubeLineObject implements an objects that holds a line of text
-// Each line is broken into phrases. Phrases are blocks within the line that
-// are unambiguously separate collections of words
-
-#ifndef CUBE_LINE_OBJECT_H
-#define CUBE_LINE_OBJECT_H
-
-#include "cube_reco_context.h"
-#include "cube_object.h"
-#include "allheaders.h"
-
-namespace tesseract {
-class CubeLineObject {
- public:
-  CubeLineObject(CubeRecoContext *cntxt, Pix *pix);
-  ~CubeLineObject();
-
-  // accessors
-  inline int PhraseCount() {
-    if (!processed_ && !Process()) {
-      return 0;
-    }
-    return phrase_cnt_;
-  }
-  inline CubeObject **Phrases() {
-    if (!processed_ && !Process()) {
-      return NULL;
-    }
-    return phrases_;
-  }
-
- private:
-  CubeRecoContext *cntxt_;
-  bool own_pix_;
-  bool processed_;
-  Pix *line_pix_;
-  CubeObject **phrases_;
-  int phrase_cnt_;
-  bool Process();
-  // Compute the least word breaking threshold that is required to produce a
-  // valid set of phrases. Phrases are validated using the Aspect ratio
-  // constraints specified in the language specific Params object
-  int ComputeWordBreakThreshold(int con_comp_cnt, ConComp **con_comps,
-                                bool rtl);
-};
-}
-
-#endif  // CUBE_LINE_OBJECT_H
--- a/cube/cube_line_segmenter.cpp
+++ b/cube/cube_line_segmenter.cpp
@ -1,949 +0,0 @@
-/**********************************************************************
- * File:        cube_page_segmenter.cpp
- * Description: Implementation of the Cube Page Segmenter Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "cube_line_segmenter.h"
-#include "ndminx.h"
-
-namespace tesseract {
-// constants that worked for Arabic page segmenter
-const int CubeLineSegmenter::kLineSepMorphMinHgt = 20;
-const int CubeLineSegmenter::kHgtBins = 20;
-const double CubeLineSegmenter::kMaxValidLineRatio = 3.2;
-const int CubeLineSegmenter::kMaxConnCompHgt = 150;
-const int CubeLineSegmenter::kMaxConnCompWid = 500;
-const int CubeLineSegmenter::kMaxHorzAspectRatio = 50;
-const int CubeLineSegmenter::kMaxVertAspectRatio = 20;
-const int CubeLineSegmenter::kMinWid = 2;
-const int CubeLineSegmenter::kMinHgt = 2;
-const float CubeLineSegmenter::kMinValidLineHgtRatio = 2.5;
-
-CubeLineSegmenter::CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img) {
-  cntxt_ = cntxt;
-  orig_img_ = img;
-  img_ = NULL;
-  lines_pixa_ = NULL;
-  init_ = false;
-  line_cnt_ = 0;
-  columns_ = NULL;
-  con_comps_ = NULL;
-  est_alef_hgt_ = 0.0;
-  est_dot_hgt_ = 0.0;
-}
-
-CubeLineSegmenter::~CubeLineSegmenter() {
-  if (img_ != NULL) {
-    pixDestroy(&img_);
-    img_ = NULL;
-  }
-
-  if (lines_pixa_ != NULL) {
-    pixaDestroy(&lines_pixa_);
-    lines_pixa_ = NULL;
-  }
-
-  if (con_comps_ != NULL) {
-    pixaDestroy(&con_comps_);
-    con_comps_ = NULL;
-  }
-
-  if (columns_ != NULL) {
-    pixaaDestroy(&columns_);
-    columns_ = NULL;
-  }
-}
-
-// compute validity ratio for a line
-double CubeLineSegmenter::ValidityRatio(Pix *line_mask_pix, Box *line_box) {
-  return line_box->h / est_alef_hgt_;
-}
-
-// validate line
-bool CubeLineSegmenter::ValidLine(Pix *line_mask_pix, Box *line_box) {
-  double validity_ratio = ValidityRatio(line_mask_pix, line_box);
-
-  return validity_ratio < kMaxValidLineRatio;
-}
-
-// perform a vertical Closing with the specified threshold
-// returning the resulting conn comps as a pixa
-Pixa *CubeLineSegmenter::VerticalClosing(Pix *pix,
-    int threshold, Boxa **boxa) {
-  char sequence_str[16];
-
-    // do the morphology
-  sprintf(sequence_str, "c100.%d", threshold);
-  Pix *morphed_pix = pixMorphCompSequence(pix, sequence_str, 0);
-  if (morphed_pix == NULL) {
-    return NULL;
-  }
-
-    // get the resulting lines by computing concomps
-  Pixa *pixac;
-  (*boxa) = pixConnComp(morphed_pix, &pixac, 8);
-
-  pixDestroy(&morphed_pix);
-
-  if ((*boxa) == NULL) {
-    return NULL;
-  }
-
-  return pixac;
-}
-
-// Helper cleans up after CrackLine.
-static void CleanupCrackLine(int line_cnt, Pixa **lines_pixa,
-                             Boxa **line_con_comps,
-                             Pixa **line_con_comps_pix) {
-  for (int line = 0; line < line_cnt; line++) {
-    if (lines_pixa[line] != NULL) {
-      pixaDestroy(&lines_pixa[line]);
-    }
-  }
-
-  delete []lines_pixa;
-  boxaDestroy(line_con_comps);
-  pixaDestroy(line_con_comps_pix);
-}
-
-// do a desperate attempt at cracking lines
-Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
-                                   Box *cracked_line_box, int line_cnt) {
-  // create lines pixa array
-  Pixa **lines_pixa = new Pixa*[line_cnt];
-
-  memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa));
-
-  // compute line conn comps
-  Pixa *line_con_comps_pix;
-  Boxa *line_con_comps = ComputeLineConComps(cracked_line_pix,
-    cracked_line_box, &line_con_comps_pix);
-
-  if (line_con_comps == NULL) {
-    delete []lines_pixa;
-    return NULL;
-  }
-
-  // assign each conn comp to the a line based on its centroid
-  for (int con = 0; con < line_con_comps->n; con++) {
-    Box *con_box = line_con_comps->box[con];
-    Pix *con_pix = line_con_comps_pix->pix[con];
-    int mid_y = (con_box->y - cracked_line_box->y) + (con_box->h / 2),
-      line_idx = MIN(line_cnt - 1,
-                     (mid_y * line_cnt / cracked_line_box->h));
-
-    // create the line if it has not been created?
-    if (lines_pixa[line_idx] == NULL) {
-      lines_pixa[line_idx] = pixaCreate(line_con_comps->n);
-      if (lines_pixa[line_idx] == NULL) {
-        CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
-                         &line_con_comps_pix);
-        return NULL;
-      }
-    }
-
-    // add the concomp to the line
-    if (pixaAddPix(lines_pixa[line_idx], con_pix, L_CLONE) != 0 ||
-        pixaAddBox(lines_pixa[line_idx], con_box, L_CLONE)) {
-      CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
-                       &line_con_comps_pix);
-      return NULL;
-    }
-  }
-
-  // create the lines pixa
-  Pixa *lines = pixaCreate(line_cnt);
-  bool success = true;
-
-  // create and check the validity of the lines
-  for (int line = 0; line < line_cnt; line++) {
-    Pixa *line_pixa = lines_pixa[line];
-
-    // skip invalid lines
-    if (line_pixa == NULL) {
-      continue;
-    }
-
-    // merge the pix, check the validity of the line
-    // and add it to the lines pixa
-    Box *line_box;
-    Pix *line_pix = Pixa2Pix(line_pixa, &line_box);
-    if (line_pix == NULL ||
-        line_box == NULL ||
-        ValidLine(line_pix, line_box) == false ||
-        pixaAddPix(lines, line_pix, L_INSERT) != 0 ||
-        pixaAddBox(lines, line_box, L_INSERT) != 0) {
-      if (line_pix != NULL) {
-        pixDestroy(&line_pix);
-      }
-
-      if (line_box != NULL) {
-        boxDestroy(&line_box);
-      }
-
-      success = false;
-
-      break;
-    }
-  }
-
-  // cleanup
-  CleanupCrackLine(line_cnt, lines_pixa, &line_con_comps,
-                   &line_con_comps_pix);
-
-  if (success == false) {
-    pixaDestroy(&lines);
-    lines = NULL;
-  }
-
-  return lines;
-}
-
-// do a desperate attempt at cracking lines
-Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
-                                   Box *cracked_line_box) {
-  // estimate max line count
-  int max_line_cnt = static_cast<int>((cracked_line_box->h /
-      est_alef_hgt_) + 0.5);
-  if (max_line_cnt < 2) {
-    return NULL;
-  }
-
-  for (int line_cnt = 2; line_cnt < max_line_cnt; line_cnt++) {
-    Pixa *lines = CrackLine(cracked_line_pix, cracked_line_box, line_cnt);
-    if (lines != NULL) {
-      return lines;
-    }
-  }
-
-  return NULL;
-}
-
-// split a line continuously until valid or fail
-Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) {
-  // clone the line mask
-  Pix *line_pix = pixClone(line_mask_pix);
-
-  if (line_pix == NULL) {
-    return NULL;
-  }
-
-  // AND with the image to get the actual line
-  pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h,
-    PIX_SRC & PIX_DST, img_, line_box->x, line_box->y);
-
-  // continue to do rasterop morphology on the line until
-  // it splits to valid lines or we fail
-  int morph_hgt = kLineSepMorphMinHgt - 1,
-    best_threshold = kLineSepMorphMinHgt - 1,
-    max_valid_portion = 0;
-
-  Boxa *boxa;
-  Pixa *pixac;
-
-  do {
-    pixac = VerticalClosing(line_pix, morph_hgt, &boxa);
-
-    // add the box offset to all the lines
-    // and check for the validity of each
-    int line,
-      valid_line_cnt = 0,
-      valid_portion = 0;
-
-    for (line = 0; line < pixac->n; line++) {
-      boxa->box[line]->x += line_box->x;
-      boxa->box[line]->y += line_box->y;
-
-      if (ValidLine(pixac->pix[line], boxa->box[line]) == true) {
-        // count valid lines
-        valid_line_cnt++;
-
-        // and the valid portions
-        valid_portion += boxa->box[line]->h;
-      }
-    }
-
-    // all the lines are valid
-    if (valid_line_cnt == pixac->n) {
-      boxaDestroy(&boxa);
-      pixDestroy(&line_pix);
-      return pixac;
-    }
-
-    // a larger valid portion
-    if (valid_portion > max_valid_portion) {
-      max_valid_portion = valid_portion;
-      best_threshold = morph_hgt;
-    }
-
-    boxaDestroy(&boxa);
-    pixaDestroy(&pixac);
-
-    morph_hgt--;
-  }
-  while (morph_hgt > 0);
-
-  // failed to break into valid lines
-  // attempt to crack the line
-  pixac = CrackLine(line_pix, line_box);
-  if (pixac != NULL) {
-    pixDestroy(&line_pix);
-    return pixac;
-  }
-
-  // try to leverage any of the lines
-  // did the best threshold yield a non zero valid portion
-  if (max_valid_portion > 0) {
-    // use this threshold to break lines
-    pixac = VerticalClosing(line_pix, best_threshold, &boxa);
-
-    // add the box offset to all the lines
-    // and check for the validity of each
-    for (int line = 0; line < pixac->n; line++) {
-      boxa->box[line]->x += line_box->x;
-      boxa->box[line]->y += line_box->y;
-
-      // remove invalid lines from the pixa
-      if (ValidLine(pixac->pix[line], boxa->box[line]) == false) {
-        pixaRemovePix(pixac, line);
-        line--;
-      }
-    }
-
-    boxaDestroy(&boxa);
-    pixDestroy(&line_pix);
-    return pixac;
-  }
-
-  // last resort: attempt to crack the line
-  pixDestroy(&line_pix);
-
-  return NULL;
-}
-
-// Checks of a line is too small
-bool CubeLineSegmenter::SmallLine(Box *line_box) {
-  return line_box->h <= (kMinValidLineHgtRatio * est_dot_hgt_);
-}
-
-// Compute the connected components in a line
-Boxa * CubeLineSegmenter::ComputeLineConComps(Pix *line_mask_pix,
-                                              Box *line_box,
-                                              Pixa **con_comps_pixa) {
-  // clone the line mask
-  Pix *line_pix = pixClone(line_mask_pix);
-
-  if (line_pix == NULL) {
-    return NULL;
-  }
-
-  // AND with the image to get the actual line
-  pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h,
-    PIX_SRC & PIX_DST, img_, line_box->x, line_box->y);
-
-  // compute the connected components of the line to be merged
-  Boxa *line_con_comps = pixConnComp(line_pix, con_comps_pixa, 8);
-
-  pixDestroy(&line_pix);
-
-  // offset boxes by the bbox of the line
-  for (int con = 0; con < line_con_comps->n; con++) {
-    line_con_comps->box[con]->x += line_box->x;
-    line_con_comps->box[con]->y += line_box->y;
-  }
-
-  return line_con_comps;
-}
-
-// create a union of two arbitrary pix
-Pix *CubeLineSegmenter::PixUnion(Pix *dest_pix, Box *dest_box,
-    Pix *src_pix, Box *src_box) {
-  // compute dimensions of union rect
-  BOX *union_box = boxBoundingRegion(src_box, dest_box);
-
-  // create the union pix
-  Pix *union_pix = pixCreate(union_box->w, union_box->h, src_pix->d);
-  if (union_pix == NULL) {
-    return NULL;
-  }
-
-  // blt the src and dest pix
-  pixRasterop(union_pix,
-    src_box->x - union_box->x, src_box->y - union_box->y,
-    src_box->w, src_box->h, PIX_SRC | PIX_DST, src_pix, 0, 0);
-
-  pixRasterop(union_pix,
-    dest_box->x - union_box->x, dest_box->y - union_box->y,
-    dest_box->w, dest_box->h, PIX_SRC | PIX_DST, dest_pix, 0, 0);
-
-  // replace the dest_box
-  *dest_box = *union_box;
-
-  boxDestroy(&union_box);
-
-  return union_pix;
-}
-
-// create a union of a number of arbitrary pix
-Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box,
-                                 int start_pix, int pix_cnt) {
-  // compute union_box
-  int min_x = INT_MAX,
-    max_x = INT_MIN,
-    min_y = INT_MAX,
-    max_y = INT_MIN;
-
-  for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) {
-    Box *pix_box = pixa->boxa->box[pix_idx];
-
-    UpdateRange(pix_box->x, pix_box->x + pix_box->w, &min_x, &max_x);
-    UpdateRange(pix_box->y, pix_box->y + pix_box->h, &min_y, &max_y);
-  }
-
-  (*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y);
-  if ((*dest_box) == NULL) {
-    return NULL;
-  }
-
-  // create the union pix
-  Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d);
-  if (union_pix == NULL) {
-    boxDestroy(dest_box);
-    return NULL;
-  }
-
-  // create a pix corresponding to the union of all pixs
-  // blt the src and dest pix
-  for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) {
-    Box *pix_box = pixa->boxa->box[pix_idx];
-    Pix *con_pix = pixa->pix[pix_idx];
-
-    pixRasterop(union_pix,
-                pix_box->x - (*dest_box)->x, pix_box->y - (*dest_box)->y,
-                pix_box->w, pix_box->h, PIX_SRC | PIX_DST, con_pix, 0, 0);
-  }
-
-  return union_pix;
-}
-
-// create a union of a number of arbitrary pix
-Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box) {
-  return Pixa2Pix(pixa, dest_box, 0, pixa->n);
-}
-
-// merges a number of lines into one line given a bounding box and a mask
-bool CubeLineSegmenter::MergeLine(Pix *line_mask_pix, Box *line_box,
-                                  Pixa *lines, Boxaa *lines_con_comps) {
-  // compute the connected components of the lines to be merged
-  Pixa *small_con_comps_pix;
-  Boxa *small_line_con_comps = ComputeLineConComps(line_mask_pix,
-      line_box, &small_con_comps_pix);
-
-  if (small_line_con_comps == NULL) {
-    return false;
-  }
-
-  // for each connected component
-  for (int con = 0; con < small_line_con_comps->n; con++) {
-    Box *small_con_comp_box = small_line_con_comps->box[con];
-    int best_line = -1,
-      best_dist = INT_MAX,
-      small_box_right = small_con_comp_box->x + small_con_comp_box->w,
-      small_box_bottom = small_con_comp_box->y + small_con_comp_box->h;
-
-    // for each valid line
-    for (int line = 0; line < lines->n; line++) {
-      if (SmallLine(lines->boxa->box[line]) == true) {
-        continue;
-      }
-
-      // for all the connected components in the line
-      Boxa *line_con_comps = lines_con_comps->boxa[line];
-
-      for (int lcon = 0; lcon < line_con_comps->n; lcon++) {
-        Box *con_comp_box = line_con_comps->box[lcon];
-        int xdist,
-          ydist,
-          box_right = con_comp_box->x + con_comp_box->w,
-          box_bottom = con_comp_box->y + con_comp_box->h;
-
-        xdist = MAX(small_con_comp_box->x, con_comp_box->x) -
-            MIN(small_box_right, box_right);
-
-        ydist = MAX(small_con_comp_box->y, con_comp_box->y) -
-            MIN(small_box_bottom, box_bottom);
-
-        // if there is an overlap in x-direction
-        if (xdist <= 0) {
-          if (best_line == -1 || ydist < best_dist) {
-            best_dist = ydist;
-            best_line = line;
-          }
-        }
-      }
-    }
-
-    // if the distance is too big, do not merged
-    if (best_line != -1 && best_dist < est_alef_hgt_) {
-      // add the pix to the best line
-      Pix *new_line = PixUnion(lines->pix[best_line],
-        lines->boxa->box[best_line],
-        small_con_comps_pix->pix[con], small_con_comp_box);
-
-      if (new_line == NULL) {
-        return false;
-      }
-
-      pixDestroy(&lines->pix[best_line]);
-      lines->pix[best_line] = new_line;
-    }
-  }
-
-  pixaDestroy(&small_con_comps_pix);
-  boxaDestroy(&small_line_con_comps);
-
-  return true;
-}
-
-// Creates new set of lines from the computed columns
-bool CubeLineSegmenter::AddLines(Pixa *lines) {
-  // create an array that will hold the bounding boxes
-  // of the concomps belonging to each line
-  Boxaa *lines_con_comps = boxaaCreate(lines->n);
-  if (lines_con_comps == NULL) {
-    return false;
-  }
-
-  for (int line = 0; line < lines->n; line++) {
-    // if the line is not valid
-    if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) {
-      // split it
-      Pixa *split_lines = SplitLine(lines->pix[line],
-          lines->boxa->box[line]);
-
-      // remove the old line
-      if (pixaRemovePix(lines, line) != 0) {
-        return false;
-      }
-
-      line--;
-
-      if (split_lines == NULL) {
-        continue;
-      }
-
-      // add the split lines instead and move the pointer
-      for (int s_line = 0; s_line < split_lines->n; s_line++) {
-        Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE);
-        Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE);
-
-        if (sp_line == NULL || sp_box == NULL) {
-          return false;
-        }
-
-        // insert the new line
-        if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) {
-          return false;
-        }
-      }
-
-      // remove the split lines
-      pixaDestroy(&split_lines);
-    }
-  }
-
-  // compute the concomps bboxes of each line
-  for (int line = 0; line < lines->n; line++) {
-    Boxa *line_con_comps = ComputeLineConComps(lines->pix[line],
-        lines->boxa->box[line], NULL);
-
-    if (line_con_comps == NULL) {
-      return false;
-    }
-
-    // insert it into the boxaa array
-    if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) {
-      return false;
-    }
-  }
-
-  // post process the lines:
-  // merge the contents of "small" lines info legitimate lines
-  for (int line = 0; line < lines->n; line++) {
-    // a small line detected
-    if (SmallLine(lines->boxa->box[line]) == true) {
-      // merge its components to one of the valid lines
-      if (MergeLine(lines->pix[line], lines->boxa->box[line],
-          lines, lines_con_comps) == true) {
-        // remove the small line
-        if (pixaRemovePix(lines, line) != 0) {
-          return false;
-        }
-
-        if (boxaaRemoveBoxa(lines_con_comps, line) != 0) {
-          return false;
-        }
-
-        line--;
-      }
-    }
-  }
-
-  boxaaDestroy(&lines_con_comps);
-
-  // add the pix masks
-  if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) {
-    return false;
-  }
-
-  return true;
-}
-
-// Index the specific pixa using RTL reading order
-int *CubeLineSegmenter::IndexRTL(Pixa *pixa) {
-  int *pix_index = new int[pixa->n];
-
-  for (int pix = 0; pix < pixa->n; pix++) {
-    pix_index[pix] = pix;
-  }
-
-  for (int ipix = 0; ipix < pixa->n; ipix++) {
-    for (int jpix = ipix + 1; jpix < pixa->n; jpix++) {
-      Box *ipix_box = pixa->boxa->box[pix_index[ipix]],
-      *jpix_box = pixa->boxa->box[pix_index[jpix]];
-
-      // swap?
-      if ((ipix_box->x + ipix_box->w) < (jpix_box->x + jpix_box->w)) {
-        int temp = pix_index[ipix];
-        pix_index[ipix] = pix_index[jpix];
-        pix_index[jpix] = temp;
-      }
-    }
-  }
-
-  return pix_index;
-}
-
-// Performs line segmentation
-bool CubeLineSegmenter::LineSegment() {
-  // Use full image morphology to find columns
-  // This only works for simple layouts where each column
-  // of text extends the full height of the input image.
-  Pix *pix_temp1 = pixMorphCompSequence(img_, "c5.500", 0);
-  if (pix_temp1 == NULL) {
-    return false;
-  }
-
-  // Mask with a single component over each column
-  Pixa *pixam;
-  Boxa *boxa = pixConnComp(pix_temp1, &pixam, 8);
-
-  if (boxa == NULL) {
-    return false;
-  }
-
-  int init_morph_min_hgt = kLineSepMorphMinHgt;
-  char sequence_str[16];
-  sprintf(sequence_str, "c100.%d", init_morph_min_hgt);
-
-  // Use selective region-based morphology to get the textline mask.
-  Pixa *pixad = pixaMorphSequenceByRegion(img_, pixam, sequence_str, 0, 0);
-  if (pixad == NULL) {
-    return false;
-  }
-
-  // for all columns
-  int col_cnt = boxaGetCount(boxa);
-
-  // create columns
-  columns_ = pixaaCreate(col_cnt);
-  if (columns_ == NULL) {
-    return false;
-  }
-
-  // index columns based on readind order (RTL)
-  int *col_order = IndexRTL(pixad);
-  if (col_order == NULL) {
-    return false;
-  }
-
-  line_cnt_ = 0;
-
-  for (int col_idx = 0; col_idx < col_cnt; col_idx++) {
-    int col = col_order[col_idx];
-
-    // get the pix and box corresponding to the column
-    Pix *pixt3 = pixaGetPix(pixad, col, L_CLONE);
-    if (pixt3 == NULL) {
-      delete []col_order;
-      return false;
-    }
-
-    Box *col_box = pixad->boxa->box[col];
-
-    Pixa *pixac;
-    Boxa *boxa2 = pixConnComp(pixt3, &pixac, 8);
-    if (boxa2 == NULL) {
-      delete []col_order;
-      return false;
-    }
-
-    // offset the boxes by the column box
-    for (int line = 0; line < pixac->n; line++) {
-      pixac->boxa->box[line]->x += col_box->x;
-      pixac->boxa->box[line]->y += col_box->y;
-    }
-
-    // add the lines
-    if (AddLines(pixac) == true) {
-      if (pixaaAddBox(columns_, col_box, L_CLONE) != 0) {
-        delete []col_order;
-        return false;
-      }
-    }
-
-    pixDestroy(&pixt3);
-    boxaDestroy(&boxa2);
-
-    line_cnt_ += columns_->pixa[col_idx]->n;
-  }
-
-  pixaDestroy(&pixam);
-  pixaDestroy(&pixad);
-  boxaDestroy(&boxa);
-
-  delete []col_order;
-  pixDestroy(&pix_temp1);
-
-  return true;
-}
-
-// Estimate the parameters of the font(s) used in the page
-bool CubeLineSegmenter::EstimateFontParams() {
-  int hgt_hist[kHgtBins];
-  int max_hgt;
-  double mean_hgt;
-
-  // init hgt histogram of concomps
-  memset(hgt_hist, 0, sizeof(hgt_hist));
-
-  // compute max hgt
-  max_hgt = 0;
-
-  for (int con = 0; con < con_comps_->n; con++) {
-    // skip conn comps that are too long or too wide
-    if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt ||
-        con_comps_->boxa->box[con]->w > kMaxConnCompWid) {
-      continue;
-    }
-
-    max_hgt = MAX(max_hgt, con_comps_->boxa->box[con]->h);
-  }
-
-  if (max_hgt <= 0) {
-    return false;
-  }
-
-  // init hgt histogram of concomps
-  memset(hgt_hist, 0, sizeof(hgt_hist));
-
-  // compute histogram
-  mean_hgt = 0.0;
-  for (int con = 0; con < con_comps_->n; con++) {
-    // skip conn comps that are too long or too wide
-    if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt ||
-        con_comps_->boxa->box[con]->w > kMaxConnCompWid) {
-      continue;
-    }
-
-    int bin = static_cast<int>(kHgtBins * con_comps_->boxa->box[con]->h /
-                               max_hgt);
-    bin = MIN(bin, kHgtBins - 1);
-    hgt_hist[bin]++;
-    mean_hgt += con_comps_->boxa->box[con]->h;
-  }
-
-  mean_hgt /= con_comps_->n;
-
-  // find the top 2 bins
-  int idx[kHgtBins];
-
-  for (int bin = 0; bin < kHgtBins; bin++) {
-    idx[bin] = bin;
-  }
-
-  for (int ibin = 0; ibin < 2; ibin++) {
-    for (int jbin = ibin + 1; jbin < kHgtBins; jbin++) {
-      if (hgt_hist[idx[ibin]] < hgt_hist[idx[jbin]]) {
-        int swap = idx[ibin];
-        idx[ibin] = idx[jbin];
-        idx[jbin] = swap;
-      }
-    }
-  }
-
-  // emperically, we found out that the 2 highest freq bins correspond
-  // respectively to the dot and alef
-  est_dot_hgt_ = (1.0 * (idx[0] + 1) * max_hgt / kHgtBins);
-  est_alef_hgt_ = (1.0 * (idx[1] + 1) * max_hgt / kHgtBins);
-
-  // as a sanity check the dot hgt must be significanly lower than alef
-  if (est_alef_hgt_ < (est_dot_hgt_ * 2)) {
-    // use max_hgt to estimate instead
-    est_alef_hgt_ = mean_hgt * 1.5;
-    est_dot_hgt_ = est_alef_hgt_ / 5.0;
-  }
-
-  est_alef_hgt_ = MAX(est_alef_hgt_, est_dot_hgt_ * 4.0);
-
-  return true;
-}
-
-// clean up the image
-Pix *CubeLineSegmenter::CleanUp(Pix *orig_img) {
-  // get rid of long horizontal lines
-  Pix *pix_temp0 = pixMorphCompSequence(orig_img, "o300.2", 0);
-  pixXor(pix_temp0, pix_temp0, orig_img);
-
-  // get rid of long vertical lines
-  Pix *pix_temp1 = pixMorphCompSequence(pix_temp0, "o2.300", 0);
-  pixXor(pix_temp1, pix_temp1, pix_temp0);
-
-  pixDestroy(&pix_temp0);
-
-  // detect connected components
-  Pixa *con_comps;
-  Boxa *boxa = pixConnComp(pix_temp1, &con_comps, 8);
-  if (boxa == NULL) {
-    return NULL;
-  }
-
-  // detect and remove suspicious conn comps
-  for (int con = 0; con < con_comps->n; con++) {
-    Box *box = boxa->box[con];
-
-    // remove if suspc. conn comp
-    if ((box->w > (box->h * kMaxHorzAspectRatio)) ||
-         (box->h > (box->w * kMaxVertAspectRatio)) ||
-         (box->w < kMinWid && box->h < kMinHgt)) {
-      pixRasterop(pix_temp1, box->x, box->y, box->w, box->h,
-        PIX_SRC ^ PIX_DST, con_comps->pix[con], 0, 0);
-    }
-  }
-
-  pixaDestroy(&con_comps);
-  boxaDestroy(&boxa);
-
-  return pix_temp1;
-}
-
-// Init the page segmenter
-bool CubeLineSegmenter::Init() {
-  if (init_ == true) {
-    return true;
-  }
-
-  if (orig_img_ == NULL) {
-    return false;
-  }
-
-  // call the internal line segmentation
-  return FindLines();
-}
-
-// return the pix mask and box of a specific line
-Pix *CubeLineSegmenter::Line(int line, Box **line_box) {
-  if (init_ == false && Init() == false) {
-    return NULL;
-  }
-
-  if (line < 0 || line >= line_cnt_) {
-    return NULL;
-  }
-
-  (*line_box) = lines_pixa_->boxa->box[line];
-  return lines_pixa_->pix[line];
-}
-
-// Implements a basic rudimentary layout analysis based on Leptonica
-// works OK for Arabic. For other languages, the function TesseractPageAnalysis
-// should be called instead.
-bool CubeLineSegmenter::FindLines() {
-  // convert the image to gray scale if necessary
-  Pix *gray_scale_img = NULL;
-  if (orig_img_->d != 2 && orig_img_->d != 8) {
-    gray_scale_img = pixConvertTo8(orig_img_, false);
-    if (gray_scale_img == NULL) {
-      return false;
-    }
-  } else {
-    gray_scale_img = orig_img_;
-  }
-
-  // threshold image
-  Pix *thresholded_img;
-  thresholded_img = pixThresholdToBinary(gray_scale_img, 128);
-  // free the gray scale image if necessary
-  if (gray_scale_img != orig_img_) {
-    pixDestroy(&gray_scale_img);
-  }
-  // bail-out if thresholding failed
-  if (thresholded_img == NULL)  {
-    return false;
-  }
-
-  // deskew
-  Pix *deskew_img = pixDeskew(thresholded_img, 2);
-  if (deskew_img == NULL) {
-    return false;
-  }
-
-  pixDestroy(&thresholded_img);
-
-  img_ = CleanUp(deskew_img);
-  pixDestroy(&deskew_img);
-  if (img_ == NULL) {
-    return false;
-  }
-
-  pixDestroy(&deskew_img);
-
-  // compute connected components
-  Boxa *boxa = pixConnComp(img_, &con_comps_, 8);
-  if (boxa == NULL) {
-    return false;
-  }
-
-  boxaDestroy(&boxa);
-
-  // estimate dot and alef hgts
-  if (EstimateFontParams() == false) {
-    return false;
-  }
-
-  // perform line segmentation
-  if (LineSegment() == false) {
-    return false;
-  }
-
-  // success
-  init_ = true;
-  return true;
-}
-
-}
--- a/cube/cube_line_segmenter.h
+++ b/cube/cube_line_segmenter.h
@ -1,156 +0,0 @@
-/**********************************************************************
- * File:        cube_page_segmenter.h
- * Description: Declaration of the Cube Page Segmenter Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// TODO(ahmadab)
-// This is really a makeshift line segmenter that works well for Arabic
-// This should eventually be replaced by Ray Smith's Page segmenter
-// There are lots of magic numbers below that were determined empirically
-// but not thoroughly tested
-
-#ifndef CUBE_LINE_SEGMENTER_H
-#define CUBE_LINE_SEGMENTER_H
-
-#include "cube_reco_context.h"
-#include "allheaders.h"
-
-namespace tesseract {
-
-class CubeLineSegmenter {
- public:
-  CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img);
-  ~CubeLineSegmenter();
-
-  // Accessor functions
-  Pix *PostProcessedImage() {
-    if (init_ == false && Init() == false) {
-      return NULL;
-    }
-    return img_;
-  }
-  int ColumnCnt() {
-    if (init_ == false && Init() == false) {
-      return 0;
-    }
-    return columns_->n;
-  }
-  Box *Column(int col) {
-    if (init_ == false && Init() == false) {
-      return NULL;
-    }
-
-    return columns_->boxa->box[col];
-  }
-  int LineCnt() {
-    if (init_ == false && Init() == false) {
-      return 0;
-    }
-
-    return line_cnt_;
-  }
-  Pixa *ConComps() {
-    if (init_ == false && Init() == false) {
-      return NULL;
-    }
-
-    return con_comps_;
-  }
-  Pixaa *Columns() {
-    if (init_ == false && Init() == false) {
-      return NULL;
-    }
-
-    return columns_;
-  }
-  inline double AlefHgtEst() { return est_alef_hgt_; }
-  inline double DotHgtEst() { return est_dot_hgt_; }
-  Pix *Line(int line, Box **line_box);
-
- private:
-  static const float kMinValidLineHgtRatio;
-  static const int kLineSepMorphMinHgt;
-  static const int kHgtBins;
-  static const int kMaxConnCompHgt;
-  static const int kMaxConnCompWid;
-  static const int kMaxHorzAspectRatio;
-  static const int kMaxVertAspectRatio;
-  static const int kMinWid;
-  static const int kMinHgt;
-  static const double kMaxValidLineRatio;
-
-  // Cube Reco context
-  CubeRecoContext *cntxt_;
-  // Original image
-  Pix *orig_img_;
-  // Post processed image
-  Pix *img_;
-  // Init flag
-  bool init_;
-  // Output Line and column info
-  int line_cnt_;
-  Pixaa *columns_;
-  Pixa *con_comps_;
-  Pixa *lines_pixa_;
-  // Estimates for sizes of ALEF and DOT needed for Arabic analysis
-  double est_alef_hgt_;
-  double est_dot_hgt_;
-
-  // Init the page analysis
-  bool Init();
-  // Performs line segmentation
-  bool LineSegment();
-  // Cleanup function
-  Pix *CleanUp(Pix *pix);
-  // compute validity ratio for a line
-  double ValidityRatio(Pix *line_mask_pix, Box *line_box);
-  // validate line
-  bool ValidLine(Pix *line_mask_pix, Box *line_box);
-  // split a line continuously until valid or fail
-  Pixa *SplitLine(Pix *line_mask_pix, Box *line_box);
-  // do a desperate attempt at cracking lines
-  Pixa *CrackLine(Pix *line_mask_pix, Box *line_box);
-  Pixa *CrackLine(Pix *line_mask_pix, Box *line_box, int line_cnt);
-  // Checks of a line is too small
-  bool SmallLine(Box *line_box);
-  // Compute the connected components in a line
-  Boxa * ComputeLineConComps(Pix *line_mask_pix, Box *line_box,
-                             Pixa **con_comps_pixa);
-  // create a union of two arbitrary pix
-  Pix *PixUnion(Pix *dest_pix, Box *dest_box, Pix *src_pix, Box *src_box);
-  // create a union of a pixa subset
-  Pix *Pixa2Pix(Pixa *pixa, Box **dest_box, int start_pix, int pix_cnt);
-  // create a union of a pixa
-  Pix *Pixa2Pix(Pixa *pixa, Box **dest_box);
-  // merges a number of lines into one line given a bounding box and a mask
-  bool MergeLine(Pix *line_mask_pix, Box *line_box,
-                 Pixa *lines, Boxaa *lines_con_comps);
-  // Creates new set of lines from the computed columns
-  bool AddLines(Pixa *lines);
-  // Estimate the parameters of the font(s) used in the page
-  bool EstimateFontParams();
-  // perform a vertical Closing with the specified threshold
-  // returning the resulting conn comps as a pixa
-  Pixa *VerticalClosing(Pix *pix, int thresold, Boxa **boxa);
-  // Index the specific pixa using RTL reading order
-  int *IndexRTL(Pixa *pixa);
-  // Implements a rudimentary page & line segmenter
-  bool FindLines();
-};
-}
-
-#endif  // CUBE_LINE_SEGMENTER_H
--- a/cube/cube_object.cpp
+++ b/cube/cube_object.cpp
@ -1,257 +0,0 @@
-/**********************************************************************
- * File:        cube_object.cpp
- * Description: Implementation of the Cube Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <math.h>
-#include "cube_object.h"
-#include "cube_utils.h"
-#include "word_list_lang_model.h"
-
-namespace tesseract {
-CubeObject::CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp) {
-  Init();
-  char_samp_ = char_samp;
-  cntxt_ = cntxt;
-}
-
-CubeObject::CubeObject(CubeRecoContext *cntxt, Pix *pix,
-                       int left, int top, int wid, int hgt) {
-  Init();
-  char_samp_ = CubeUtils::CharSampleFromPix(pix, left, top, wid, hgt);
-  own_char_samp_ = true;
-  cntxt_ = cntxt;
-}
-
-// Data member initialization function
-void CubeObject::Init() {
-  char_samp_ = NULL;
-  own_char_samp_ = false;
-  alt_list_ = NULL;
-  srch_obj_ = NULL;
-  deslanted_alt_list_ = NULL;
-  deslanted_srch_obj_ = NULL;
-  deslanted_ = false;
-  deslanted_char_samp_ = NULL;
-  beam_obj_ = NULL;
-  deslanted_beam_obj_ = NULL;
-  cntxt_ = NULL;
-}
-
-// Cleanup function
-void CubeObject::Cleanup() {
-  delete alt_list_;
-  alt_list_ = NULL;
-
-  delete deslanted_alt_list_;
-  deslanted_alt_list_ = NULL;
-}
-
-CubeObject::~CubeObject() {
-  if (own_char_samp_ == true) {
-    delete char_samp_;
-    char_samp_ = NULL;
-  }
-
-  delete srch_obj_;
-  srch_obj_ = NULL;
-
-  delete deslanted_srch_obj_;
-  deslanted_srch_obj_ = NULL;
-
-  delete beam_obj_;
-  beam_obj_ = NULL;
-
-  delete deslanted_beam_obj_;
-  deslanted_beam_obj_ = NULL;
-
-  delete deslanted_char_samp_;
-  deslanted_char_samp_ = NULL;
-
-  Cleanup();
-}
-
-/**
- * Actually do the recognition using the specified language mode. If none
- * is specified, the default language model in the CubeRecoContext is used.
- * @return the sorted list of alternate answers
- * @param word_mode determines whether recognition is done as a word or a phrase
- */
-WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) {
-  if (char_samp_ == NULL) {
-    return NULL;
-  }
-
-  // clear alt lists
-  Cleanup();
-
-  // no specified language model, use the one in the reco context
-  if (lang_mod == NULL) {
-    lang_mod = cntxt_->LangMod();
-  }
-
-  // normalize if necessary
-  if (cntxt_->SizeNormalization()) {
-    Normalize();
-  }
-
-  // assume not de-slanted by default
-  deslanted_ = false;
-
-  // create a beam search object
-  if (beam_obj_ == NULL) {
-    beam_obj_ = new BeamSearch(cntxt_, word_mode);
-  }
-
-  // create a cube search object
-  if (srch_obj_ == NULL) {
-    srch_obj_ = new CubeSearchObject(cntxt_, char_samp_);
-  }
-
-  // run a beam search against the tesslang model
-  alt_list_ = beam_obj_->Search(srch_obj_, lang_mod);
-
-  // deslant (if supported by language) and re-reco if probability is low enough
-  if (cntxt_->HasItalics() == true &&
-      (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
-       alt_list_->AltCost(0) > CubeUtils::Prob2Cost(kMinProbSkipDeslanted))) {
-
-    if (deslanted_beam_obj_ == NULL) {
-      deslanted_beam_obj_ = new BeamSearch(cntxt_);
-    }
-
-    if (deslanted_srch_obj_ == NULL) {
-      deslanted_char_samp_ = char_samp_->Clone();
-      if (deslanted_char_samp_ == NULL) {
-        fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not "
-                "construct deslanted CharSamp\n");
-        return NULL;
-      }
-
-      if (deslanted_char_samp_->Deslant() == false) {
-        return NULL;
-      }
-
-      deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_);
-    }
-
-    // run a beam search against the tesslang model
-    deslanted_alt_list_ = deslanted_beam_obj_->Search(deslanted_srch_obj_,
-                                                      lang_mod);
-    // should we use de-slanted altlist?
-    if (deslanted_alt_list_ != NULL &&  deslanted_alt_list_->AltCount() > 0) {
-      if (alt_list_ == NULL || alt_list_->AltCount() < 1 ||
-          deslanted_alt_list_->AltCost(0) < alt_list_->AltCost(0)) {
-        deslanted_ = true;
-        return deslanted_alt_list_;
-      }
-    }
-  }
-
-  return alt_list_;
-}
-
-/**
- * Recognize the member char sample as a word
- */
-WordAltList *CubeObject::RecognizeWord(LangModel *lang_mod) {
-  return Recognize(lang_mod, true);
-}
-
-/**
- * Recognize the member char sample as a phrase
- */
-WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) {
-  return Recognize(lang_mod, false);
-}
-
-/**
- * Computes the cost of a specific string. This is done by performing
- * recognition of a language model that allows only the specified word
- */
-int CubeObject::WordCost(const char *str) {
-  WordListLangModel *lang_mod = new WordListLangModel(cntxt_);
-
-  if (lang_mod->AddString(str) == false) {
-    delete lang_mod;
-    return WORST_COST;
-  }
-
-  // run a beam search against the single string wordlist model
-  WordAltList *alt_list = RecognizeWord(lang_mod);
-  delete lang_mod;
-
-  int cost = WORST_COST;
-  if (alt_list != NULL) {
-    if (alt_list->AltCount() > 0) {
-      cost = alt_list->AltCost(0);
-    }
-  }
-
-  return cost;
-}
-
-// Recognizes a single character and returns the list of results.
-CharAltList *CubeObject::RecognizeChar() {
-  if (char_samp_ == NULL) return NULL;
-  CharAltList* alt_list = NULL;
-  CharClassifier *char_classifier = cntxt_->Classifier();
-  ASSERT_HOST(char_classifier != NULL);
-  alt_list = char_classifier->Classify(char_samp_);
-  return alt_list;
-}
-
-// Normalize the input word bitmap to have a minimum aspect ratio
-bool CubeObject::Normalize() {
-  // create a cube search object
-  CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_);
-  // Perform over-segmentation
-  int seg_cnt = srch_obj->SegPtCnt();
-  // Only perform normalization if segment count is large enough
-  if (seg_cnt < kMinNormalizationSegmentCnt) {
-    delete srch_obj;
-    return true;
-  }
-  // compute the mean AR of the segments
-  double ar_mean = 0.0;
-  for (int seg_idx = 0; seg_idx <= seg_cnt; seg_idx++) {
-    CharSamp *seg_samp = srch_obj->CharSample(seg_idx - 1, seg_idx);
-    if (seg_samp != NULL && seg_samp->Width() > 0) {
-      ar_mean += (1.0 * seg_samp->Height() / seg_samp->Width());
-    }
-  }
-  ar_mean /= (seg_cnt + 1);
-  // perform normalization if segment AR is too high
-  if (ar_mean > kMinNormalizationAspectRatio) {
-    // scale down the image in the y-direction to attain AR
-    CharSamp *new_samp = char_samp_->Scale(char_samp_->Width(),
-                                           2.0 * char_samp_->Height() / ar_mean,
-                                           false);
-    if (new_samp != NULL) {
-      // free existing char samp if owned
-      if (own_char_samp_) {
-        delete char_samp_;
-      }
-      // update with new scaled charsamp and set ownership flag
-      char_samp_ = new_samp;
-      own_char_samp_ = true;
-    }
-  }
-  delete srch_obj;
-  return true;
-}
-}
--- a/cube/cube_object.h
+++ b/cube/cube_object.h
@ -1,171 +0,0 @@
-/**********************************************************************
- * File:        cube_object.h
- * Description: Declaration of the Cube Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CubeObject class is the main class used to perform recognition of
-// a specific char_samp as a single word.
-// To recognize a word, a CubeObject is constructed for this word.
-// A Call to RecognizeWord is then issued specifying the language model that
-// will be used during recognition. If none is specified, the default language
-// model in the CubeRecoContext is used. The CubeRecoContext is passed at
-// construction time
-//
-// The typical usage pattern for Cube is shown below:
-//
-//         // Create and initialize Tesseract object and get its
-//         // CubeRecoContext object (note that Tesseract object owns it,
-//         // so it will be freed when the Tesseract object is freed).
-//         tesseract::Tesseract *tess_obj =  new tesseract::Tesseract();
-//         tess_obj->init_tesseract(data_path, lang, tesseract::OEM_CUBE_ONLY);
-//         CubeRecoContext *cntxt = tess_obj->GetCubeRecoContext();
-//         CHECK(cntxt != NULL) << "Unable to create a Cube reco context";
-//         .
-//         .
-//         .
-//         // Do this to recognize a word in pix whose co-ordinates are
-//         // (left,top,width,height)
-//         tesseract::CubeObject *cube_obj;
-//         cube_obj = new tesseract::CubeObject(cntxt, pix,
-//                                              left, top, width, height);
-//
-//         // Get back Cube's list of answers
-//         tesseract::WordAltList *alt_list = cube_obj->RecognizeWord();
-//         CHECK(alt_list != NULL && alt_list->AltCount() > 0);
-//
-//         // Get the string and cost of every alternate
-//         for (int alt = 0; alt < alt_list->AltCount(); alt++) {
-//           // Return the result as a UTF-32 string
-//           string_32 res_str32 = alt_list->Alt(alt);
-//           // Convert to UTF8 if need-be
-//           string res_str;
-//           CubeUtils::UTF32ToUTF8(res_str32.c_str(), &res_str);
-//           // Get the string cost. This should get bigger as you go deeper
-//           // in the list
-//           int cost = alt_list->AltCost(alt);
-//         }
-//
-//         // Call this once you are done recognizing this word
-//         delete cube_obj;
-//
-//         // Call this once you are done recognizing all words with
-//         // for the current language
-//         delete tess_obj;
-//
-// Note that if the language supports "Italics" (see the CubeRecoContext), the
-// RecognizeWord function attempts to de-slant the word.
-
-#ifndef CUBE_OBJECT_H
-#define CUBE_OBJECT_H
-
-#include "char_samp.h"
-#include "word_altlist.h"
-#include "beam_search.h"
-#include "cube_search_object.h"
-#include "tess_lang_model.h"
-#include "cube_reco_context.h"
-
-namespace tesseract {
-
-// minimum aspect ratio needed to normalize a char_samp before recognition
-static const float kMinNormalizationAspectRatio = 3.5;
-// minimum probability a top alt choice must meet before having
-// deslanted processing applied to it
-static const float kMinProbSkipDeslanted = 0.25;
-
-class CubeObject {
- public:
-  // Different flavors of constructor. They just differ in the way the
-  // word image is specified
-  CubeObject(CubeRecoContext *cntxt, CharSamp *char_samp);
-  CubeObject(CubeRecoContext *cntxt, Pix *pix,
-             int left, int top, int wid, int hgt);
-  ~CubeObject();
-
-  // Perform the word recognition using the specified language mode. If none
-  // is specified, the default language model in the CubeRecoContext is used.
-  // Returns the sorted list of alternate word answers
-  WordAltList *RecognizeWord(LangModel *lang_mod = NULL);
-  // Same as RecognizeWord but recognizes as a phrase
-  WordAltList *RecognizePhrase(LangModel *lang_mod = NULL);
-  // Computes the cost of a specific string. This is done by performing
-  // recognition of a language model that allows only the specified word.
-  // The alternate list(s) will be permanently modified.
-  int WordCost(const char *str);
-  // Recognizes a single character and returns the list of results.
-  CharAltList *RecognizeChar();
-
-  // Returns the BeamSearch object that resulted from the last call to
-  // RecognizeWord
-  inline BeamSearch *BeamObj() const {
-    return (deslanted_ == true ? deslanted_beam_obj_ : beam_obj_);
-  }
-  // Returns the WordAltList object that resulted from the last call to
-  // RecognizeWord
-  inline WordAltList *AlternateList() const {
-    return (deslanted_ == true ? deslanted_alt_list_ : alt_list_);
-  }
-  // Returns the CubeSearchObject object that resulted from the last call to
-  // RecognizeWord
-  inline CubeSearchObject *SrchObj() const {
-    return (deslanted_ == true ? deslanted_srch_obj_ : srch_obj_);
-  }
-  // Returns the CharSamp object that resulted from the last call to
-  // RecognizeWord. Note that this object is not necessarily identical to the
-  // one passed at construction time as normalization might have occurred
-  inline CharSamp *CharSample() const {
-    return (deslanted_ == true ? deslanted_char_samp_ : char_samp_);
-  }
-
-  // Set the ownership of the CharSamp
-  inline void SetCharSampOwnership(bool own_char_samp) {
-    own_char_samp_ = own_char_samp;
-  }
-
- protected:
-  // Normalize the CharSamp if its aspect ratio exceeds the below constant.
-  bool Normalize();
-
- private:
-  // minimum segment count needed to normalize a char_samp before recognition
-  static const int kMinNormalizationSegmentCnt = 4;
-
-  // Data member initialization function
-  void Init();
-  // Free alternate lists.
-  void Cleanup();
-  // Perform the actual recognition using the specified language mode. If none
-  // is specified, the default language model in the CubeRecoContext is used.
-  // Returns the sorted list of alternate answers. Called by both
-  // RecognizerWord (word_mode is true) or RecognizePhrase (word mode is false)
-  WordAltList *Recognize(LangModel *lang_mod, bool word_mode);
-
-  CubeRecoContext *cntxt_;
-  BeamSearch *beam_obj_;
-  BeamSearch *deslanted_beam_obj_;
-  bool own_char_samp_;
-  bool deslanted_;
-  CharSamp *char_samp_;
-  CharSamp *deslanted_char_samp_;
-  CubeSearchObject *srch_obj_;
-  CubeSearchObject *deslanted_srch_obj_;
-  WordAltList *alt_list_;
-  WordAltList *deslanted_alt_list_;
-};
-}
-
-#endif  // CUBE_OBJECT_H
--- a/cube/cube_search_object.cpp
+++ b/cube/cube_search_object.cpp
@ -1,421 +0,0 @@
-/**********************************************************************
- * File:        cube_search_object.cpp
- * Description: Implementation of the Cube Search Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "cube_search_object.h"
-#include "cube_utils.h"
-#include "ndminx.h"
-
-namespace tesseract {
-
-const bool CubeSearchObject::kUseCroppedChars = true;
-
-CubeSearchObject::CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp)
-    : SearchObject(cntxt) {
-  init_ = false;
-  reco_cache_ = NULL;
-  samp_cache_ = NULL;
-  segments_ = NULL;
-  segment_cnt_ = 0;
-  samp_ = samp;
-  left_ = 0;
-  itop_ = 0;
-  space_cost_ = NULL;
-  no_space_cost_ = NULL;
-  wid_ = samp_->Width();
-  hgt_ = samp_->Height();
-  max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar();
-  rtl_ = (cntxt_->ReadingOrder() == CubeRecoContext::R2L);
-  min_spc_gap_ =
-      static_cast<int>(hgt_ * cntxt_->Params()->MinSpaceHeightRatio());
-  max_spc_gap_ =
-      static_cast<int>(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio());
-}
-
-CubeSearchObject::~CubeSearchObject() {
-  Cleanup();
-}
-
-// Cleanup
-void CubeSearchObject::Cleanup() {
-  // delete Recognition Cache
-  if (reco_cache_) {
-    for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++)  {
-      if (reco_cache_[strt_seg]) {
-        for (int end_seg = 0; end_seg < segment_cnt_; end_seg++)  {
-          if (reco_cache_[strt_seg][end_seg]) {
-            delete reco_cache_[strt_seg][end_seg];
-          }
-        }
-        delete []reco_cache_[strt_seg];
-      }
-    }
-    delete []reco_cache_;
-    reco_cache_ = NULL;
-  }
-
-  // delete CharSamp Cache
-  if (samp_cache_) {
-    for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++)  {
-      if (samp_cache_[strt_seg]) {
-        for (int end_seg = 0; end_seg < segment_cnt_; end_seg++)  {
-          if (samp_cache_[strt_seg][end_seg]) {
-            delete samp_cache_[strt_seg][end_seg];
-          }
-        }
-        delete []samp_cache_[strt_seg];
-      }
-    }
-    delete []samp_cache_;
-    samp_cache_ = NULL;
-  }
-
-  // delete segment list
-  if (segments_) {
-    for (int seg = 0; seg < segment_cnt_; seg++) {
-      if (segments_[seg]) {
-        delete segments_[seg];
-      }
-    }
-    delete []segments_;
-    segments_ = NULL;
-  }
-
-  if (space_cost_) {
-    delete []space_cost_;
-    space_cost_ = NULL;
-  }
-
-  if (no_space_cost_) {
-    delete []no_space_cost_;
-    no_space_cost_ = NULL;
-  }
-
-  segment_cnt_ = 0;
-  init_ = false;
-}
-
-// # of segmentation points. One less than the count of segments
-int CubeSearchObject::SegPtCnt() {
-  if (!init_ && !Init())
-    return -1;
-  return segment_cnt_ - 1;
-}
-
-// init and allocate variables, perform segmentation
-bool CubeSearchObject::Init() {
-  if (init_)
-    return true;
-  if (!Segment()) {
-    return false;
-  }
-
-  // init cache
-  reco_cache_ = new CharAltList **[segment_cnt_];
-
-  samp_cache_ = new CharSamp **[segment_cnt_];
-
-  for (int seg = 0; seg < segment_cnt_; seg++) {
-    reco_cache_[seg] = new CharAltList *[segment_cnt_];
-    memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg]));
-
-    samp_cache_[seg] = new CharSamp *[segment_cnt_];
-    memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg]));
-  }
-
-  init_ = true;
-  return true;
-}
-
-// returns a char sample corresponding to the bitmap between 2 seg pts
-CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
-  // init if necessary
-  if (!init_ && !Init())
-    return NULL;
-  // validate segment range
-  if (!IsValidSegmentRange(start_pt, end_pt))
-    return NULL;
-
-  // look for the samp in the cache
-  if (samp_cache_ && samp_cache_[start_pt + 1] &&
-      samp_cache_[start_pt + 1][end_pt]) {
-    return samp_cache_[start_pt + 1][end_pt];
-  }
-  // create a char samp object from the specified range of segments
-  bool left_most;
-  bool right_most;
-  CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
-                                          end_pt - start_pt, NULL,
-                                          &left_most, &right_most, hgt_);
-  if (!samp)
-    return NULL;
-
-  if (kUseCroppedChars) {
-    CharSamp *cropped_samp = samp->Crop();
-    // we no longer need the orig sample
-    delete samp;
-    if (!cropped_samp)
-      return NULL;
-    samp = cropped_samp;
-  }
-
-  // get the dimensions of the new cropped sample
-  int char_top = samp->Top();
-  int char_wid = samp->Width();
-  int char_hgt = samp->Height();
-
-  // for cursive languages, these features correspond to whether
-  // the charsamp is at the beginning or end of conncomp
-  if (cntxt_->Cursive() == true) {
-    // first and last char flags depend on reading order
-    bool first_char = rtl_ ? right_most : left_most;
-    bool last_char = rtl_ ? left_most : right_most;
-
-    samp->SetFirstChar(first_char ? 255 : 0);
-    samp->SetLastChar(last_char ? 255 : 0);
-  } else {
-    // for non cursive languages, these features correspond
-    // to whether the charsamp is at the beginning or end of the word
-    samp->SetFirstChar((start_pt == -1) ? 255 : 0);
-    samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
-  }
-  samp->SetNormTop(255 * char_top / hgt_);
-  samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_);
-  samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt));
-
-  // add to cache & return
-  samp_cache_[start_pt + 1][end_pt] = samp;
-  return samp;
-}
-
-Box *CubeSearchObject::CharBox(int start_pt, int end_pt) {
-  if (!init_ && !Init())
-    return NULL;
-  if (!IsValidSegmentRange(start_pt, end_pt)) {
-    fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid "
-            "segment range (%d, %d)\n", start_pt, end_pt);
-    return NULL;
-  }
-
-  // create a char samp object from the specified range of segments,
-  // extract its dimensions into a leptonica box, and delete it
-  bool left_most;
-  bool right_most;
-  CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
-                                          end_pt - start_pt, NULL,
-                                          &left_most, &right_most, hgt_);
-  if (!samp)
-    return NULL;
-  if (kUseCroppedChars) {
-    CharSamp *cropped_samp = samp->Crop();
-    delete samp;
-    if (!cropped_samp) {
-      return NULL;
-    }
-    samp = cropped_samp;
-  }
-  Box *box = boxCreate(samp->Left(), samp->Top(),
-                       samp->Width(), samp->Height());
-  delete samp;
-  return box;
-}
-
-// call from Beam Search to return the alt list corresponding to
-// recognizing the bitmap between two segmentation pts
-CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
-  // init if necessary
-  if (!init_ && !Init()) {
-    fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
-            "not initialize CubeSearchObject\n");
-    return NULL;
-  }
-
-  // validate segment range
-  if (!IsValidSegmentRange(start_pt, end_pt)) {
-    fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid "
-            "segment range (%d, %d)\n", start_pt, end_pt);
-    return NULL;
-  }
-
-  // look for the recognition results in cache in the cache
-  if (reco_cache_ && reco_cache_[start_pt + 1] &&
-      reco_cache_[start_pt + 1][end_pt]) {
-    return reco_cache_[start_pt + 1][end_pt];
-  }
-
-  // create the char sample corresponding to the blob
-  CharSamp *samp = CharSample(start_pt, end_pt);
-  if (!samp) {
-    fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
-            "not construct CharSamp\n");
-    return NULL;
-  }
-
-  // recognize the char sample
-  CharClassifier *char_classifier = cntxt_->Classifier();
-  if (char_classifier) {
-    reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp);
-  } else {
-    // no classifer: all characters are equally probable; add a penalty
-    // that favors 2-segment characters and aspect ratios (w/h) > 1
-    fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube "
-            "context has no character classifier!! Inventing a probability "
-            "distribution.\n");
-    int class_cnt = cntxt_->CharacterSet()->ClassCount();
-    CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt);
-    int seg_cnt = end_pt - start_pt;
-    double prob_val = (1.0 / class_cnt) *
-        exp(-fabs(seg_cnt - 2.0)) *
-        exp(-samp->Width() / static_cast<double>(samp->Height()));
-
-    for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
-      alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
-    }
-    reco_cache_[start_pt + 1][end_pt] = alt_list;
-  }
-
-  return reco_cache_[start_pt + 1][end_pt];
-}
-
-// Perform segmentation of the bitmap by detecting connected components,
-// segmenting each connected component using windowed vertical pixel density
-// histogram and sorting the resulting segments in reading order
-bool CubeSearchObject::Segment() {
-  if (!samp_)
-    return false;
-  segment_cnt_ = 0;
-  segments_ = samp_->Segment(&segment_cnt_, rtl_,
-                             cntxt_->Params()->HistWindWid(),
-                             cntxt_->Params()->MinConCompSize());
-  if (!segments_ || segment_cnt_ <= 0) {
-    return false;
-  }
-  if (segment_cnt_ >= kMaxSegmentCnt) {
-    return false;
-  }
-  return true;
-}
-
-// computes the space and no space costs at gaps between segments
-bool CubeSearchObject::ComputeSpaceCosts() {
-  // init if necessary
-  if (!init_ && !Init())
-    return false;
-
-  // Already computed
-  if (space_cost_)
-    return true;
-
-  // No segmentation points
-  if (segment_cnt_ < 2)
-    return false;
-
-  // Compute the maximum x to the left of and minimum x to the right of each
-  // segmentation point
-  int *max_left_x = new int[segment_cnt_ - 1];
-  int *min_right_x = new int[segment_cnt_ - 1];
-  if (rtl_) {
-    min_right_x[0] = segments_[0]->Left();
-    max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right();
-    for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
-      min_right_x[pt_idx] =
-          MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left());
-      max_left_x[segment_cnt_ - pt_idx - 2] =
-          MAX(max_left_x[segment_cnt_ - pt_idx - 1],
-              segments_[segment_cnt_ - pt_idx - 1]->Right());
-    }
-  } else {
-    min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left();
-    max_left_x[0] = segments_[0]->Right();
-    for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
-      min_right_x[segment_cnt_ - pt_idx - 2] =
-          MIN(min_right_x[segment_cnt_ - pt_idx - 1],
-              segments_[segment_cnt_ - pt_idx - 1]->Left());
-      max_left_x[pt_idx] =
-          MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right());
-    }
-  }
-
-  // Allocate memory for space and no space costs
-  // trivial cases
-  space_cost_ = new int[segment_cnt_ - 1];
-  no_space_cost_ = new int[segment_cnt_ - 1];
-
-  // go through all segmentation points determining the horizontal gap between
-  // the images on both sides of each break points. Use the gap to estimate
-  // the probability of a space. The probability is modeled a linear function
-  // of the gap width
-  for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) {
-    // determine the gap at the segmentation point
-    int gap = min_right_x[pt_idx] - max_left_x[pt_idx];
-    float prob = 0.0;
-
-    // gap is too small => no space
-    if (gap < min_spc_gap_ || max_spc_gap_ == min_spc_gap_) {
-      prob = 0.0;
-    } else if (gap > max_spc_gap_) {
-      // gap is too big => definite space
-      prob = 1.0;
-    } else {
-      // gap is somewhere in between, compute probability
-      prob = (gap - min_spc_gap_) /
-          static_cast<double>(max_spc_gap_ - min_spc_gap_);
-    }
-
-    // compute cost of space and non-space
-    space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) +
-                          CubeUtils::Prob2Cost(0.1);
-    no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob);
-  }
-
-  delete []min_right_x;
-  delete []max_left_x;
-
-  return true;
-}
-
-// Returns the cost of having a space before the specified segmentation point
-int CubeSearchObject::SpaceCost(int pt_idx) {
-  if (!space_cost_ && !ComputeSpaceCosts()) {
-    // Failed to compute costs return a zero prob
-    return CubeUtils::Prob2Cost(0.0);
-  }
-  return space_cost_[pt_idx];
-}
-
-// Returns the cost of not having a space before the specified
-// segmentation point
-int CubeSearchObject::NoSpaceCost(int pt_idx) {
-  // If failed to compute costs, return a 1.0 prob
-  if (!space_cost_ && !ComputeSpaceCosts())
-    return CubeUtils::Prob2Cost(0.0);
-  return no_space_cost_[pt_idx];
-}
-
-// Returns the cost of not having any spaces within the specified range
-// of segmentation points
-int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) {
-  // If fail to compute costs, return a 1.0 prob
-  if (!space_cost_ && !ComputeSpaceCosts())
-    return CubeUtils::Prob2Cost(1.0);
-  int no_spc_cost = 0;
-  for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++)
-    no_spc_cost += NoSpaceCost(pt_idx);
-  return no_spc_cost;
-}
-}
--- a/cube/cube_search_object.h
+++ b/cube/cube_search_object.h
@ -1,122 +0,0 @@
-/**********************************************************************
- * File:        cube_search_object.h
- * Description: Declaration of the Cube Search Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CubeSearchObject class represents a char_samp (a word bitmap) that is
-// being searched for characters (or recognizeable entities).
-// The Class detects the connected components and peforms an oversegmentation
-// on each ConComp. The result of which is a list of segments that are ordered
-// in reading order.
-// The class provided methods that inquire about the number of segments, the
-// CharSamp corresponding to any segment range and the recognition results
-// of any segment range
-// An object of Class CubeSearchObject is used by the BeamSearch algorithm
-// to recognize a CharSamp into a list of word alternates
-
-#ifndef CUBE_SEARCH_OBJECT_H
-#define CUBE_SEARCH_OBJECT_H
-
-#include "search_object.h"
-#include "char_samp.h"
-#include "conv_net_classifier.h"
-#include "cube_reco_context.h"
-#include "allheaders.h"
-
-namespace tesseract {
-class CubeSearchObject : public SearchObject {
- public:
-  CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp);
-  ~CubeSearchObject();
-
-  // returns the Segmentation Point count of the CharSamp owned by the class
-  int SegPtCnt();
-  // Recognize the set of segments given by the specified range and return
-  // a list of possible alternate answers
-  CharAltList * RecognizeSegment(int start_pt, int end_pt);
-  // Returns the CharSamp corresponding to the specified segment range
-  CharSamp *CharSample(int start_pt, int end_pt);
-  // Returns a leptonica box corresponding to the specified segment range
-  Box *CharBox(int start_pt, int end_pt);
-  // Returns the cost of having a space before the specified segmentation pt
-  int SpaceCost(int seg_pt);
-  // Returns the cost of not having a space before the specified
-  // segmentation pt
-  int NoSpaceCost(int seg_pt);
-  // Returns the cost of not having any spaces within the specified range
-  // of segmentation points
-  int NoSpaceCost(int seg_pt, int end_pt);
-
- private:
-  // Maximum reasonable segment count
-  static const int kMaxSegmentCnt = 128;
-  // Use cropped samples
-  static const bool kUseCroppedChars;
-
-  // reading order flag
-  bool rtl_;
-  // cached dimensions of char samp
-  int left_;
-  int itop_;
-  int wid_;
-  int hgt_;
-  // minimum and maximum and possible inter-segment gaps for spaces
-  int min_spc_gap_;
-  int max_spc_gap_;
-  // initialization flag
-  bool init_;
-  // maximum segments per character: Cached from tuning parameters object
-  int max_seg_per_char_;
-  // char sample to be processed
-  CharSamp *samp_;
-  // segment count
-  int segment_cnt_;
-  // segments of the processed char samp
-  ConComp **segments_;
-  // Cache data members:
-  // There are two caches kept; a CharSamp cache and a CharAltList cache
-  // Each is a 2-D array of CharSamp and CharAltList pointers respectively
-  // hence the triple pointer.
-  CharAltList ***reco_cache_;
-  CharSamp ***samp_cache_;
-  // Cached costs of space and no-space after every segment. Computed only
-  // in phrase mode
-  int *space_cost_;
-  int *no_space_cost_;
-
-  // init and allocate variables, perform segmentation
-  bool Init();
-  // Cleanup
-  void Cleanup();
-  // Perform segmentation of the bitmap by detecting connected components,
-  // segmenting each connected component using windowed vertical pixel density
-  // histogram and sorting the resulting segments in reading order
-  // Returns true on success
-  bool Segment();
-  // validate the segment ranges.
-  inline bool IsValidSegmentRange(int start_pt, int end_pt) {
-    return (end_pt > start_pt && start_pt >= -1 && start_pt < segment_cnt_ &&
-            end_pt >= 0 && end_pt <= segment_cnt_ &&
-            end_pt <= (start_pt + max_seg_per_char_));
-  }
-  // computes the space and no space costs at gaps between segments
-  // return true on success
-  bool ComputeSpaceCosts();
-};
-}
-
-#endif  // CUBE_SEARCH_OBJECT_H
--- a/cube/cube_tuning_params.cpp
+++ b/cube/cube_tuning_params.cpp
@ -1,213 +0,0 @@
-/**********************************************************************
- * File:        cube_tuning_params.cpp
- * Description: Implementation of the CubeTuningParameters Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <string>
-#include <vector>
-#include "cube_tuning_params.h"
-#include "tuning_params.h"
-#include "cube_utils.h"
-
-namespace tesseract {
-CubeTuningParams::CubeTuningParams() {
-  reco_wgt_ = 1.0;
-  size_wgt_ = 1.0;
-  char_bigrams_wgt_ = 1.0;
-  word_unigrams_wgt_ = 0.0;
-  max_seg_per_char_ = 8;
-  beam_width_ = 32;
-  tp_classifier_ = NN;
-  tp_feat_ = BMP;
-  conv_grid_size_ = 32;
-  hist_wind_wid_ = 0;
-  max_word_aspect_ratio_ = 10.0;
-  min_space_height_ratio_ = 0.2;
-  max_space_height_ratio_ = 0.3;
-  min_con_comp_size_ = 0;
-  combiner_run_thresh_ = 1.0;
-  combiner_classifier_thresh_ = 0.5;
-  ood_wgt_ = 1.0;
-  num_wgt_ = 1.0;
-
-}
-
-CubeTuningParams::~CubeTuningParams() {
-}
-
-// Create an Object given the data file path and the language by loading
-// the approporiate file
-CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
-                                           const string &lang) {
-  CubeTuningParams *obj = new CubeTuningParams();
-
-  string tuning_params_file;
-  tuning_params_file = data_file_path + lang;
-  tuning_params_file += ".cube.params";
-
-  if (!obj->Load(tuning_params_file)) {
-    fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
-            "load tuning parameters from %s\n", tuning_params_file.c_str());
-    delete obj;
-    obj = NULL;
-  }
-
-  return obj;
-}
-
-// Loads the params file
-bool CubeTuningParams::Load(string tuning_params_file) {
-  // load the string into memory
-  string param_str;
-
-  if (CubeUtils::ReadFileToString(tuning_params_file, &param_str) == false) {
-    fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read "
-            "file %s\n", tuning_params_file.c_str());
-    return false;
-  }
-
-  // split into lines
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec);
-  if (str_vec.size() < 8) {
-    fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows "
-            "in parameter file is too low\n");
-    return false;
-  }
-
-  // for all entries
-  for (int entry = 0; entry < str_vec.size(); entry++) {
-    // tokenize
-    vector<string> str_tok;
-
-    // should be only two tokens
-    CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok);
-    if (str_tok.size() != 2) {
-      fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in "
-              "line: %s.\n", str_vec[entry].c_str());
-      return false;
-    }
-
-    double val = 0;
-    char peekchar = (str_tok[1].c_str())[0];
-    if ((peekchar >= '0' && peekchar <= '9') ||
-         peekchar == '-' || peekchar == '+' ||
-         peekchar == '.') {
-      // read the value
-      if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) {
-        fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format "
-                "in line: %s.\n", str_vec[entry].c_str());
-        return false;
-      }
-    }
-
-    // token type
-    if (str_tok[0] == "RecoWgt") {
-      reco_wgt_ = val;
-    } else if (str_tok[0] == "SizeWgt") {
-      size_wgt_ = val;
-    } else if (str_tok[0] == "CharBigramsWgt") {
-      char_bigrams_wgt_ = val;
-    } else if (str_tok[0] == "WordUnigramsWgt") {
-      word_unigrams_wgt_ = val;
-    } else if (str_tok[0] == "MaxSegPerChar") {
-      max_seg_per_char_ = static_cast<int>(val);
-    } else if (str_tok[0] == "BeamWidth") {
-      beam_width_ = static_cast<int>(val);
-    } else if (str_tok[0] == "Classifier") {
-      if (str_tok[1] == "NN") {
-        tp_classifier_ = TuningParams::NN;
-      } else if (str_tok[1] == "HYBRID_NN") {
-        tp_classifier_ = TuningParams::HYBRID_NN;
-      } else {
-        fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid "
-                "classifier type in line: %s.\n", str_vec[entry].c_str());
-        return false;
-      }
-    } else if (str_tok[0] == "FeatureType") {
-      if (str_tok[1] == "BMP") {
-        tp_feat_ = TuningParams::BMP;
-      } else if (str_tok[1] == "CHEBYSHEV") {
-        tp_feat_ = TuningParams::CHEBYSHEV;
-      } else if (str_tok[1] == "HYBRID") {
-        tp_feat_ = TuningParams::HYBRID;
-      } else {
-        fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature "
-                "type in line: %s.\n", str_vec[entry].c_str());
-        return false;
-      }
-    } else if (str_tok[0] == "ConvGridSize") {
-      conv_grid_size_ = static_cast<int>(val);
-    } else if (str_tok[0] == "HistWindWid") {
-      hist_wind_wid_ = val;
-    } else if (str_tok[0] == "MinConCompSize") {
-      min_con_comp_size_ = val;
-    } else if (str_tok[0] == "MaxWordAspectRatio") {
-      max_word_aspect_ratio_ = val;
-    } else if (str_tok[0] == "MinSpaceHeightRatio") {
-      min_space_height_ratio_ = val;
-    } else if (str_tok[0] == "MaxSpaceHeightRatio") {
-      max_space_height_ratio_ = val;
-    } else if (str_tok[0] == "CombinerRunThresh") {
-      combiner_run_thresh_ = val;
-    } else if (str_tok[0] == "CombinerClassifierThresh") {
-      combiner_classifier_thresh_ = val;
-    } else if (str_tok[0] == "OODWgt") {
-      ood_wgt_ = val;
-    } else if (str_tok[0] == "NumWgt") {
-      num_wgt_ = val;
-    } else {
-      fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter "
-              "in line: %s.\n", str_vec[entry].c_str());
-      return false;
-    }
-  }
-
-  return true;
-}
-
-// Save the parameters to a file
-bool CubeTuningParams::Save(string file_name) {
-  FILE *params_file = fopen(file_name.c_str(), "wb");
-  if (params_file == NULL) {
-    fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file "
-            "%s for write.\n", file_name.c_str());
-    return false;
-  }
-
-  fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_);
-  fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_);
-  fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_);
-  fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_);
-  fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_);
-  fprintf(params_file, "BeamWidth=%d\n", beam_width_);
-  fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_);
-  fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_);
-  fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_);
-  fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_);
-  fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_);
-  fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_);
-  fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_);
-  fprintf(params_file, "CombinerClassifierThresh=%.4f\n",
-          combiner_classifier_thresh_);
-  fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_);
-  fprintf(params_file, "NumWgt=%.4f\n", num_wgt_);
-
-  fclose(params_file);
-  return true;
-}
-}
--- a/cube/cube_tuning_params.h
+++ b/cube/cube_tuning_params.h
@ -1,57 +0,0 @@
-/**********************************************************************
- * File:        cube_tuning_params.h
- * Description: Declaration of the CubeTuningParameters Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CubeTuningParams class abstracts all the parameters that are used
-// in Cube and are tuned/learned during the training process. Inherits
-// from the TuningParams class.
-
-#ifndef CUBE_TUNING_PARAMS_H
-#define CUBE_TUNING_PARAMS_H
-
-#include <string>
-#include "tuning_params.h"
-
-namespace tesseract {
-class CubeTuningParams : public TuningParams {
- public:
-  CubeTuningParams();
-  ~CubeTuningParams();
-
-  // Accessor functions
-  inline double OODWgt() { return ood_wgt_; }
-  inline double NumWgt() { return num_wgt_; }
-
-  inline void SetOODWgt(double wgt) { ood_wgt_ = wgt; }
-  inline void SetNumWgt(double wgt) { num_wgt_ = wgt; }
-
-  // Create an object given the data file path and the language by loading
-  // the approporiate file
-  static CubeTuningParams * Create(const string &data_file,
-                                   const string &lang);
-  // Save and load the tuning parameters to a specified file
-  bool Save(string file_name);
-  bool Load(string file_name);
-
- private:
-  double ood_wgt_;
-  double num_wgt_;
-};
-}
-
-#endif  // CUBE_TUNING_PARAMS_H
--- a/cube/cube_utils.cpp
+++ b/cube/cube_utils.cpp
@ -1,399 +0,0 @@
-/**********************************************************************
- * File:        cube_utils.cpp
- * Description: Implementation of the Cube Utilities Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <math.h>
-#include <string>
-#include <vector>
-#include "cube_utils.h"
-#include "char_set.h"
-#include "unichar.h"
-
-namespace tesseract {
-CubeUtils::CubeUtils() {
-}
-
-CubeUtils::~CubeUtils() {
-}
-
-/**
- * convert a prob to a cost (-ve log prob)
- */
-int CubeUtils::Prob2Cost(double prob_val) {
-  if (prob_val < MIN_PROB)   {
-    return MIN_PROB_COST;
-  }
-  return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
-}
-
-/**
- * converts a cost to probability
- */
-double CubeUtils::Cost2Prob(int cost) {
-  return exp(-cost / PROB2COST_SCALE);
-}
-
-/**
- * computes the length of a NULL terminated char_32 string
- */
-int CubeUtils::StrLen(const char_32 *char_32_ptr) {
-  if (char_32_ptr == NULL) {
-    return 0;
-  }
-  int len = -1;
-  while (char_32_ptr[++len]);
-  return len;
-}
-
-/**
- * compares two char_32 strings
- */
-int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) {
-  const char_32 *pch1 = str1;
-  const char_32 *pch2 = str2;
-
-  for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
-    if ((*pch1) != (*pch2)) {
-      return (*pch1) - (*pch2);
-    }
-  }
-
-  if ((*pch1) == 0) {
-    if ((*pch2) == 0) {
-      return 0;
-    } else {
-      return -1;
-    }
-  } else {
-    return 1;
-  }
-}
-
-/**
- * Duplicates a 32-bit char buffer
- */
-char_32 *CubeUtils::StrDup(const char_32 *str32) {
-  int len = StrLen(str32);
-  char_32 *new_str = new char_32[len + 1];
-  memcpy(new_str, str32, len * sizeof(*str32));
-  new_str[len] = 0;
-  return new_str;
-}
-
-/**
- * creates a char samp from a specified portion of the image
- */
-CharSamp *CubeUtils::CharSampleFromPix(Pix *pix, int left, int top,
-                                       int wid, int hgt) {
-  // get the raw img data from the image
-  unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
-  if (temp_buff == NULL) {
-    return NULL;
-  }
-
-  // create a char samp from temp buffer
-  CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
-
-  // clean up temp buffer
-  delete []temp_buff;
-  return char_samp;
-}
-
-/**
- * create a B/W image from a char_sample
- */
-Pix *CubeUtils::PixFromCharSample(CharSamp *char_samp) {
-  // parameter check
-  if (char_samp == NULL) {
-    return NULL;
-  }
-
-  // get the raw data
-  int stride = char_samp->Stride();
-  int wid = char_samp->Width();
-  int hgt = char_samp->Height();
-
-  Pix *pix = pixCreate(wid, hgt, 1);
-  if (pix == NULL) {
-    return NULL;
-  }
-
-  // copy the contents
-  unsigned char *line = char_samp->RawData();
-  for (int y = 0; y < hgt ; y++, line += stride) {
-    for (int x = 0; x < wid; x++) {
-      if (line[x] != 0) {
-        pixSetPixel(pix, x, y, 0);
-      } else {
-        pixSetPixel(pix, x, y, 255);
-      }
-    }
-  }
-
-  return pix;
-}
-
-/**
- * creates a raw buffer from the specified location of the pix
- */
-unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top,
-                                       int wid, int hgt) {
-  // skip invalid dimensions
-  if (left < 0 || top < 0 || wid < 0 || hgt < 0 ||
-      (left + wid) > pix->w || (top + hgt) > pix->h ||
-      pix->d != 1) {
-    return NULL;
-  }
-
-  // copy the char img to a temp buffer
-  unsigned char *temp_buff = new unsigned char[wid * hgt];
-  l_int32 w;
-  l_int32 h;
-  l_int32 d;
-  l_int32 wpl;
-  l_uint32 *line;
-  l_uint32 *data;
-
-  pixGetDimensions(pix, &w, &h, &d);
-  wpl = pixGetWpl(pix);
-  data = pixGetData(pix);
-  line = data + (top * wpl);
-
-  for (int y = 0, off = 0; y < hgt ; y++) {
-    for (int x = 0; x < wid; x++, off++) {
-      temp_buff[off] = GET_DATA_BIT(line, x + left) ? 0 : 255;
-    }
-    line += wpl;
-  }
-  return temp_buff;
-}
-
-/**
- * read file contents to a string
- */
-bool CubeUtils::ReadFileToString(const string &file_name, string *str) {
-  str->clear();
-  FILE *fp = fopen(file_name.c_str(), "rb");
-  if (fp == NULL) {
-    return false;
-  }
-
-  // get the size of the size
-  fseek(fp, 0, SEEK_END);
-  int file_size = ftell(fp);
-  if (file_size < 1) {
-    fclose(fp);
-    return false;
-  }
-  // adjust string size
-  str->reserve(file_size);
-  // read the contents
-  rewind(fp);
-  char *buff = new char[file_size];
-  int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
-  if (read_bytes == file_size) {
-    str->append(buff, file_size);
-  }
-  delete []buff;
-  fclose(fp);
-  return (read_bytes == file_size);
-}
-
-/**
- * splits a string into vectors based on specified delimiters
- */
-void CubeUtils::SplitStringUsing(const string &str,
-                                 const string &delims,
-                                 vector<string> *str_vec) {
-  // Optimize the common case where delims is a single character.
-  if (delims[0] != '\0' && delims[1] == '\0') {
-    char c = delims[0];
-    const char* p = str.data();
-    const char* end = p + str.size();
-    while (p != end) {
-      if (*p == c) {
-        ++p;
-      } else {
-        const char* start = p;
-        while (++p != end && *p != c);
-        str_vec->push_back(string(start, p - start));
-      }
-    }
-    return;
-  }
-
-  string::size_type begin_index, end_index;
-  begin_index = str.find_first_not_of(delims);
-  while (begin_index != string::npos) {
-    end_index = str.find_first_of(delims, begin_index);
-    if (end_index == string::npos) {
-      str_vec->push_back(str.substr(begin_index));
-      return;
-    }
-    str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
-    begin_index = str.find_first_not_of(delims, end_index);
-  }
-}
-
-/**
- * UTF-8 to UTF-32 conversion functions
- */
-void CubeUtils::UTF8ToUTF32(const char *utf8_str, string_32 *str32) {
-  str32->clear();
-  int len = strlen(utf8_str);
-  int step = 0;
-  for (int ch = 0; ch < len; ch += step) {
-    step = UNICHAR::utf8_step(utf8_str + ch);
-    if (step > 0) {
-      UNICHAR uni_ch(utf8_str + ch, step);
-      (*str32) += uni_ch.first_uni();
-    }
-  }
-}
-
-/**
- * UTF-32 to UTF-8 conversion functions
- */
-void CubeUtils::UTF32ToUTF8(const char_32 *utf32_str, string *str) {
-  str->clear();
-  for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++)  {
-    UNICHAR uni_ch((*ch_32));
-    char *utf8 = uni_ch.utf8_str();
-    if (utf8 != NULL) {
-      (*str) += utf8;
-      delete []utf8;
-    }
-  }
-}
-
-bool CubeUtils::IsCaseInvariant(const char_32 *str32, CharSet *char_set) {
-  bool all_one_case = true;
-  bool capitalized;
-  bool prev_upper;
-  bool prev_lower;
-  bool first_upper;
-  bool first_lower;
-  bool cur_upper;
-  bool cur_lower;
-
-  string str8;
-  if (!char_set) {
-    // If cube char_set is missing, use C-locale-dependent functions
-    // on UTF8 characters to determine case properties.
-    first_upper = isupper(str32[0]);
-    first_lower = islower(str32[0]);
-    if (first_upper)
-      capitalized = true;
-    prev_upper = first_upper;
-    prev_lower = first_lower;
-    for (int c = 1; str32[c] != 0; ++c) {
-      cur_upper = isupper(str32[c]);
-      cur_lower = islower(str32[c]);
-      if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
-        all_one_case = false;
-      if (cur_upper)
-        capitalized = false;
-      prev_upper = cur_upper;
-      prev_lower = cur_lower;
-    }
-  } else {
-    UNICHARSET *unicharset = char_set->InternalUnicharset();
-    // Use UNICHARSET functions to determine case properties
-    first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
-    first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
-    if (first_upper)
-      capitalized = true;
-    prev_upper = first_upper;
-    prev_lower = first_lower;
-
-    for (int c = 1; c < StrLen(str32); ++c) {
-      cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
-      cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
-      if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
-        all_one_case = false;
-      if (cur_upper)
-        capitalized = false;
-      prev_upper = cur_upper;
-      prev_lower = cur_lower;
-    }
-  }
-  return all_one_case || capitalized;
-}
-
-char_32 *CubeUtils::ToLower(const char_32 *str32, CharSet *char_set) {
-  if (!char_set) {
-    return NULL;
-  }
-  UNICHARSET *unicharset = char_set->InternalUnicharset();
-  int len = StrLen(str32);
-  char_32 *lower = new char_32[len + 1];
-  for (int i = 0; i < len; ++i) {
-    char_32 ch = str32[i];
-    if (ch == INVALID_UNICHAR_ID) {
-      delete [] lower;
-      return NULL;
-    }
-    // convert upper-case characters to lower-case
-    if (unicharset->get_isupper(char_set->ClassID(ch))) {
-      UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
-      const char_32 *str32_lower = char_set->ClassString(uid_lower);
-      // expect lower-case version of character to be a single character
-      if (!str32_lower || StrLen(str32_lower) != 1) {
-        delete [] lower;
-        return NULL;
-      }
-      lower[i] = str32_lower[0];
-    } else {
-      lower[i] = ch;
-    }
-  }
-  lower[len] = 0;
-  return lower;
-}
-
-char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) {
-  if (!char_set) {
-    return NULL;
-  }
-  UNICHARSET *unicharset = char_set->InternalUnicharset();
-  int len = StrLen(str32);
-  char_32 *upper = new char_32[len + 1];
-  for (int i = 0; i < len; ++i) {
-    char_32 ch = str32[i];
-    if (ch == INVALID_UNICHAR_ID) {
-      delete [] upper;
-      return NULL;
-    }
-    // convert lower-case characters to upper-case
-    if (unicharset->get_islower(char_set->ClassID(ch))) {
-      UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
-      const char_32 *str32_upper = char_set->ClassString(uid_upper);
-      // expect upper-case version of character to be a single character
-      if (!str32_upper || StrLen(str32_upper) != 1) {
-        delete [] upper;
-        return NULL;
-      }
-      upper[i] = str32_upper[0];
-    } else {
-      upper[i] = ch;
-    }
-  }
-  upper[len] = 0;
-  return upper;
-}
-}  // namespace tesseract
--- a/cube/cube_utils.h
+++ b/cube/cube_utils.h
@ -1,83 +0,0 @@
-/**********************************************************************
- * File:        cube_utils.h
- * Description: Declaration of the Cube Utilities Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- *(C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0(the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The CubeUtils class provides miscellaneous utility and helper functions
-// to the rest of the Cube Engine
-
-#ifndef CUBE_UTILS_H
-#define CUBE_UTILS_H
-
-#include <vector>
-#include <string>
-
-#include "allheaders.h"
-#include "const.h"
-#include "char_set.h"
-#include "char_samp.h"
-
-namespace tesseract {
-class CubeUtils {
- public:
-  CubeUtils();
-  ~CubeUtils();
-
-  // Converts a probability value to a cost by getting the -log() of the
-  // probability value to a known base
-  static int Prob2Cost(double prob_val);
-  // Converts a cost to probability by getting the exp(-normalized cost)
-  static double Cost2Prob(int cost);
-  // Computes the length of a 32-bit char buffer
-  static int StrLen(const char_32 *str);
-  // Compares two 32-bit char buffers
-  static int StrCmp(const char_32 *str1, const char_32 *str2);
-  // Duplicates a 32-bit char buffer
-  static char_32 *StrDup(const char_32 *str);
-  // Creates a CharSamp from an Pix and a bounding box
-  static CharSamp *CharSampleFromPix(Pix *pix,
-                                     int left, int top, int wid, int hgt);
-  // Creates a Pix from a CharSamp
-  static Pix *PixFromCharSample(CharSamp *char_samp);
-  // read the contents of a file to a string
-  static bool ReadFileToString(const string &file_name, string *str);
-  // split a string into vectors using any of the specified delimiters
-  static void SplitStringUsing(const string &str, const string &delims,
-                               vector<string> *str_vec);
-  // UTF-8 to UTF-32 convesion functions
-  static void UTF8ToUTF32(const char *utf8_str, string_32 *str32);
-  static void UTF32ToUTF8(const char_32 *utf32_str, string *str);
-  // Returns true if input word has either 1) all-one-case, or 2)
-  // first character upper-case, and remaining characters lower-case.
-  // If char_set is not NULL, uses tesseract's unicharset functions
-  // to determine case properties. Otherwise, uses C-locale-dependent
-  // functions, which may be unreliable on non-ASCII characters.
-  static bool IsCaseInvariant(const char_32 *str32, CharSet *char_set);
-  // Returns char_32 pointer to the lower-case-transformed version of
-  // the input string or NULL on error. If char_set is NULL returns NULL.
-  // Return array must be freed by caller.
-  static char_32 *ToLower(const char_32 *str32, CharSet *char_set);
-  // Returns char_32 pointer to the upper-case-transformed version of
-  // the input string or NULL on error. If char_set is NULL returns NULL.
-  // Return array must be freed by caller.
-  static char_32 *ToUpper(const char_32 *str32, CharSet *char_set);
- private:
-  static unsigned char *GetImageData(Pix *pix,
-                                     int left, int top, int wid, int hgt);
-};
-}  // namespace tesseract
-#endif  // CUBE_UTILS_H
--- a/cube/feature_base.h
+++ b/cube/feature_base.h
@ -1,55 +0,0 @@
-/**********************************************************************
- * File:        feature_base.h
- * Description: Declaration of the Feature Base Class
- * Author:    Ping Ping (xiupingping), Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The FeatureBase class is the base class for any Feature Extraction class
-// It provided 3 pure virtual functions (to inherit):
-// 1- FeatureCnt: A method to returns the count of features
-// 2- ComputeFeatures: A method to compute the features for a given CharSamp
-// 3- ComputeFeatureBitmap: A method to render a visualization of the features
-// to a CharSamp. This is mainly used by visual-debuggers
-
-#ifndef FEATURE_BASE_H
-#define FEATURE_BASE_H
-
-#include "char_samp.h"
-#include "tuning_params.h"
-
-namespace tesseract {
-class FeatureBase {
- public:
-  explicit FeatureBase(TuningParams *params)
-      : params_(params) {
-  }
-  virtual ~FeatureBase() {}
-
-  // Compute the features for a given CharSamp
-  virtual bool ComputeFeatures(CharSamp *char_samp, float *features) = 0;
-  // Render a visualization of the features to a CharSamp.
-  // This is mainly used by visual-debuggers
-  virtual CharSamp *ComputeFeatureBitmap(CharSamp *char_samp) = 0;
-  // Returns the count of features
-  virtual int FeatureCnt() = 0;
-
- protected:
-  TuningParams *params_;
-};
-}
-
-#endif  // FEATURE_BASE_H
-
--- a/cube/feature_bmp.cpp
+++ b/cube/feature_bmp.cpp
@ -1,50 +0,0 @@
-/**********************************************************************
- * File:        feature_bmp.cpp
- * Description: Implementation of the Bitmap Feature Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include "feature_base.h"
-#include "feature_bmp.h"
-#include "cube_utils.h"
-#include "const.h"
-#include "char_samp.h"
-
-namespace tesseract {
-
-FeatureBmp::FeatureBmp(TuningParams *params)
-    :FeatureBase(params) {
-  conv_grid_size_ = params->ConvGridSize();
-}
-
-FeatureBmp::~FeatureBmp() {
-}
-
-// Render a visualization of the features to a CharSamp.
-// This is mainly used by visual-debuggers
-CharSamp *FeatureBmp::ComputeFeatureBitmap(CharSamp *char_samp) {
-  return char_samp->Scale(conv_grid_size_, conv_grid_size_);
-}
-
-// Compute the features for a given CharSamp
-bool FeatureBmp::ComputeFeatures(CharSamp *char_samp, float *features) {
-  return char_samp->ComputeFeatures(conv_grid_size_, features);
-}
-}
-
--- a/cube/feature_bmp.h
+++ b/cube/feature_bmp.h
@ -1,53 +0,0 @@
-/**********************************************************************
- * File:        feature_bmp.h
- * Description: Declaration of the Bitmap Feature Class
- * Author:    PingPing xiu (xiupingping) & Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The FeatureBmp class implements a Bitmap feature extractor class. It
-// inherits from the FeatureBase class
-// The Bitmap feature vectors is the the bitmap of the specified CharSamp
-// scaled to a fixed grid size and then augmented by a 5 aux features that
-// describe the size, aspect ration and placement within a word
-
-#ifndef FEATURE_BMP_H
-#define FEATURE_BMP_H
-
-#include "char_samp.h"
-#include "feature_base.h"
-
-namespace tesseract {
-class FeatureBmp : public FeatureBase {
- public:
-  explicit FeatureBmp(TuningParams *params);
-  virtual ~FeatureBmp();
-  // Render a visualization of the features to a CharSamp.
-  // This is mainly used by visual-debuggers
-  virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
-    // Compute the features for a given CharSamp
-  virtual bool ComputeFeatures(CharSamp *samp, float *features);
-  // Returns the count of features
-  virtual int FeatureCnt() {
-    return 5 + (conv_grid_size_ * conv_grid_size_);
-  }
-
- protected:
-  // grid size, cached from the TuningParams object
-  int conv_grid_size_;
-};
-}
-
-#endif  // FEATURE_BMP_H
--- a/cube/feature_chebyshev.cpp
+++ b/cube/feature_chebyshev.cpp
@ -1,138 +0,0 @@
-/**********************************************************************
- * File:        feature_chebyshev.cpp
- * Description: Implementation of the Chebyshev coefficients Feature Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include "feature_base.h"
-#include "feature_chebyshev.h"
-#include "cube_utils.h"
-#include "const.h"
-#include "char_samp.h"
-
-namespace tesseract {
-
-FeatureChebyshev::FeatureChebyshev(TuningParams *params)
-    : FeatureBase(params) {
-}
-
-FeatureChebyshev::~FeatureChebyshev() {
-}
-
-// Render a visualization of the features to a CharSamp.
-// This is mainly used by visual-debuggers
-CharSamp *FeatureChebyshev::ComputeFeatureBitmap(CharSamp *char_samp) {
-  return char_samp;
-}
-
-// Compute Chebyshev coefficients for the specified vector
-void FeatureChebyshev::ChebyshevCoefficients(const vector<float> &input,
-                                             int coeff_cnt, float *coeff) {
-  // re-sample function
-  int input_range = (input.size() - 1);
-  vector<float> resamp(coeff_cnt);
-  for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) {
-    // compute sampling position
-    float samp_pos = input_range *
-                     (1 + cos(M_PI * (samp_idx + 0.5) / coeff_cnt)) / 2;
-    // interpolate
-    int samp_start = static_cast<int>(samp_pos);
-    int samp_end = static_cast<int>(samp_pos + 0.5);
-    float func_delta = input[samp_end] - input[samp_start];
-    resamp[samp_idx] = input[samp_start] +
-                       ((samp_pos - samp_start) * func_delta);
-  }
-  // compute the coefficients
-  float normalizer = 2.0 / coeff_cnt;
-  for (int coeff_idx = 0; coeff_idx < coeff_cnt; coeff_idx++, coeff++) {
-    double sum = 0.0;
-    for (int samp_idx = 0; samp_idx < coeff_cnt; samp_idx++) {
-        sum += resamp[samp_idx] * cos(M_PI * coeff_idx * (samp_idx + 0.5) /
-                                      coeff_cnt);
-    }
-    (*coeff) = (normalizer * sum);
-  }
-}
-
-// Compute the features of a given CharSamp
-bool FeatureChebyshev::ComputeFeatures(CharSamp *char_samp, float *features) {
-  return ComputeChebyshevCoefficients(char_samp, features);
-}
-
-// Compute the Chebyshev coefficients of a given CharSamp
-bool FeatureChebyshev::ComputeChebyshevCoefficients(CharSamp *char_samp,
-                                                    float *features) {
-  if (char_samp->NormBottom() <= 0) {
-    return false;
-  }
-  unsigned char *raw_data = char_samp->RawData();
-  int stride = char_samp->Stride();
-  // compute the height of the word
-  int word_hgt = (255 * (char_samp->Top() + char_samp->Height()) /
-                  char_samp->NormBottom());
-  // compute left & right profiles
-  vector<float> left_profile(word_hgt, 0.0);
-  vector<float> right_profile(word_hgt, 0.0);
-  unsigned char *line_data = raw_data;
-  for (int y = 0; y < char_samp->Height(); y++, line_data += stride) {
-    int min_x = char_samp->Width();
-    int max_x = -1;
-    for (int x = 0; x < char_samp->Width(); x++) {
-      if (line_data[x] == 0) {
-        UpdateRange(x, &min_x, &max_x);
-      }
-    }
-    left_profile[char_samp->Top() + y] =
-        1.0 * (min_x == char_samp->Width() ? 0 : (min_x + 1)) /
-        char_samp->Width();
-    right_profile[char_samp->Top() + y] =
-        1.0 * (max_x == -1 ? 0 : char_samp->Width() - max_x) /
-        char_samp->Width();
-  }
-
-  // compute top and bottom profiles
-  vector<float> top_profile(char_samp->Width(), 0);
-  vector<float> bottom_profile(char_samp->Width(), 0);
-  for (int x = 0; x < char_samp->Width(); x++) {
-    int min_y = word_hgt;
-    int max_y = -1;
-    line_data = raw_data;
-    for (int y = 0; y < char_samp->Height(); y++, line_data += stride) {
-      if (line_data[x] == 0) {
-        UpdateRange(y + char_samp->Top(), &min_y, &max_y);
-      }
-    }
-    top_profile[x] = 1.0 * (min_y == word_hgt ? 0 : (min_y + 1)) / word_hgt;
-    bottom_profile[x] = 1.0 * (max_y == -1 ? 0 : (word_hgt - max_y)) / word_hgt;
-  }
-
-  // compute the chebyshev coefficients of each profile
-  ChebyshevCoefficients(left_profile, kChebychevCoefficientCnt, features);
-  ChebyshevCoefficients(top_profile, kChebychevCoefficientCnt,
-                        features + kChebychevCoefficientCnt);
-  ChebyshevCoefficients(right_profile, kChebychevCoefficientCnt,
-                        features + (2 * kChebychevCoefficientCnt));
-  ChebyshevCoefficients(bottom_profile, kChebychevCoefficientCnt,
-                        features + (3 * kChebychevCoefficientCnt));
-  return true;
-}
-}  // namespace tesseract
--- a/cube/feature_chebyshev.h
+++ b/cube/feature_chebyshev.h
@ -1,57 +0,0 @@
-/**********************************************************************
- * File:        feature_chebyshev.h
- * Description: Declaration of the Chebyshev coefficients Feature Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The FeatureChebyshev class implements a Bitmap feature extractor class. It
-// inherits from the FeatureBase class
-// The feature vector is the composed of the chebyshev coefficients of 4 time
-// sequences. The time sequences are the left, top, right & bottom
-// bitmap profiles of the input samples
-
-#ifndef FEATURE_CHEBYSHEV_H
-#define FEATURE_CHEBYSHEV_H
-
-#include "char_samp.h"
-#include "feature_base.h"
-
-namespace tesseract {
-class FeatureChebyshev : public FeatureBase {
- public:
-  explicit FeatureChebyshev(TuningParams *params);
-  virtual ~FeatureChebyshev();
-  // Render a visualization of the features to a CharSamp.
-  // This is mainly used by visual-debuggers
-  virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
-  // Compute the features for a given CharSamp
-  virtual bool ComputeFeatures(CharSamp *samp, float *features);
-  // Returns the count of features
-  virtual int FeatureCnt() {
-    return (4 * kChebychevCoefficientCnt);
-  }
-
- protected:
-  static const int kChebychevCoefficientCnt = 40;
-  // Compute Chebychev coefficients for the specified vector
-  void ChebyshevCoefficients(const vector<float> &input,
-                             int coeff_cnt, float *coeff);
-  // Compute the features for a given CharSamp
-  bool ComputeChebyshevCoefficients(CharSamp *samp, float *features);
-};
-}
-
-#endif  // FEATURE_CHEBYSHEV_H
--- a/cube/feature_hybrid.cpp
+++ b/cube/feature_hybrid.cpp
@ -1,64 +0,0 @@
-/**********************************************************************
- * File:        feature_chebyshev.cpp
- * Description: Implementation of the Chebyshev coefficients Feature Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include "feature_base.h"
-#include "feature_hybrid.h"
-#include "cube_utils.h"
-#include "const.h"
-#include "char_samp.h"
-
-namespace tesseract {
-
-FeatureHybrid::FeatureHybrid(TuningParams *params)
-    :FeatureBase(params) {
-  feature_bmp_ = new FeatureBmp(params);
-  feature_chebyshev_ = new FeatureChebyshev(params);
-}
-
-FeatureHybrid::~FeatureHybrid() {
-  delete feature_bmp_;
-  delete feature_chebyshev_;
-}
-
-// Render a visualization of the features to a CharSamp.
-// This is mainly used by visual-debuggers
-CharSamp *FeatureHybrid::ComputeFeatureBitmap(CharSamp *char_samp) {
-  return char_samp;
-}
-
-
-// Compute the features of a given CharSamp
-bool FeatureHybrid::ComputeFeatures(CharSamp *char_samp, float *features) {
-  if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) {
-    return false;
-  }
-  if (!feature_bmp_->ComputeFeatures(char_samp, features)) {
-    return false;
-  }
-  return feature_chebyshev_->ComputeFeatures(char_samp,
-    features + feature_bmp_->FeatureCnt());
-}
-
-}  // namespace tesseract
--- a/cube/feature_hybrid.h
+++ b/cube/feature_hybrid.h
@ -1,56 +0,0 @@
-/**********************************************************************
- * File:        feature_chebyshev.h
- * Description: Declaration of the Chebyshev coefficients Feature Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The FeatureHybrid class implements a Bitmap feature extractor class. It
-// inherits from the FeatureBase class
-// This class describes the a hybrid feature vector composed by combining
-// the bitmap and the chebyshev feature vectors
-
-#ifndef FEATURE_HYBRID_H
-#define FEATURE_HYBRID_H
-
-#include "char_samp.h"
-#include "feature_bmp.h"
-#include "feature_chebyshev.h"
-
-namespace tesseract {
-class FeatureHybrid : public FeatureBase {
- public:
-  explicit FeatureHybrid(TuningParams *params);
-  virtual ~FeatureHybrid();
-  // Render a visualization of the features to a CharSamp.
-  // This is mainly used by visual-debuggers
-  virtual CharSamp *ComputeFeatureBitmap(CharSamp *samp);
-  // Compute the features for a given CharSamp
-  virtual bool ComputeFeatures(CharSamp *samp, float *features);
-  // Returns the count of features
-  virtual int FeatureCnt() {
-    if (feature_bmp_ == NULL || feature_chebyshev_ == NULL) {
-      return 0;
-    }
-    return feature_bmp_->FeatureCnt() + feature_chebyshev_->FeatureCnt();
-  }
-
- protected:
-  FeatureBmp *feature_bmp_;
-  FeatureChebyshev *feature_chebyshev_;
-};
-}
-
-#endif  // FEATURE_HYBRID_H
--- a/cube/hybrid_neural_net_classifier.cpp
+++ b/cube/hybrid_neural_net_classifier.cpp
@ -1,346 +0,0 @@
-/**********************************************************************
- * File:        charclassifier.cpp
- * Description: Implementation of Convolutional-NeuralNet Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <algorithm>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <vector>
-#include <wctype.h>
-
-#include "classifier_base.h"
-#include "char_set.h"
-#include "const.h"
-#include "conv_net_classifier.h"
-#include "cube_utils.h"
-#include "feature_base.h"
-#include "feature_bmp.h"
-#include "hybrid_neural_net_classifier.h"
-#include "tess_lang_model.h"
-
-namespace tesseract {
-
-HybridNeuralNetCharClassifier::HybridNeuralNetCharClassifier(
-    CharSet *char_set,
-    TuningParams *params,
-    FeatureBase *feat_extract)
-    : CharClassifier(char_set, params, feat_extract) {
-  net_input_ = NULL;
-  net_output_ = NULL;
-}
-
-HybridNeuralNetCharClassifier::~HybridNeuralNetCharClassifier() {
-  for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
-    if (nets_[net_idx] != NULL) {
-      delete nets_[net_idx];
-    }
-  }
-  nets_.clear();
-
-  if (net_input_ != NULL) {
-    delete []net_input_;
-    net_input_ = NULL;
-  }
-
-  if (net_output_ != NULL) {
-    delete []net_output_;
-    net_output_ = NULL;
-  }
-}
-
-// The main training function. Given a sample and a class ID the classifier
-// updates its parameters according to its learning algorithm. This function
-// is currently not implemented. TODO(ahmadab): implement end-2-end training
-bool HybridNeuralNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
-  return false;
-}
-
-// A secondary function needed for training. Allows the trainer to set the
-// value of any train-time parameter. This function is currently not
-// implemented. TODO(ahmadab): implement end-2-end training
-bool HybridNeuralNetCharClassifier::SetLearnParam(char *var_name, float val) {
-  // TODO(ahmadab): implementation of parameter initializing.
-  return false;
-}
-
-// Folds the output of the NeuralNet using the loaded folding sets
-void HybridNeuralNetCharClassifier::Fold() {
-  // in case insensitive mode
-  if (case_sensitive_ == false) {
-    int class_cnt = char_set_->ClassCount();
-    // fold case
-    for (int class_id = 0; class_id < class_cnt; class_id++) {
-      // get class string
-      const char_32 *str32 = char_set_->ClassString(class_id);
-      // get the upper case form of the string
-      string_32 upper_form32 = str32;
-      for (int ch = 0; ch < upper_form32.length(); ch++) {
-        if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
-          upper_form32[ch] = towupper(upper_form32[ch]);
-        }
-      }
-
-      // find out the upperform class-id if any
-      int upper_class_id =
-          char_set_->ClassID(reinterpret_cast<const char_32 *>(
-              upper_form32.c_str()));
-      if (upper_class_id != -1 && class_id != upper_class_id) {
-        float max_out = MAX(net_output_[class_id], net_output_[upper_class_id]);
-        net_output_[class_id] = max_out;
-        net_output_[upper_class_id] = max_out;
-      }
-    }
-  }
-
-  // The folding sets specify how groups of classes should be folded
-  // Folding involved assigning a min-activation to all the members
-  // of the folding set. The min-activation is a fraction of the max-activation
-  // of the members of the folding set
-  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
-    float max_prob = net_output_[fold_sets_[fold_set][0]];
-
-    for (int ch = 1; ch < fold_set_len_[fold_set]; ch++) {
-      if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
-        max_prob = net_output_[fold_sets_[fold_set][ch]];
-      }
-    }
-    for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
-      net_output_[fold_sets_[fold_set][ch]] = MAX(max_prob * kFoldingRatio,
-          net_output_[fold_sets_[fold_set][ch]]);
-    }
-  }
-}
-
-// compute the features of specified charsamp and
-// feedforward the specified nets
-bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) {
-  int feat_cnt = feat_extract_->FeatureCnt();
-  int class_cnt = char_set_->ClassCount();
-
-  // allocate i/p and o/p buffers if needed
-  if (net_input_ == NULL) {
-    net_input_ = new float[feat_cnt];
-    net_output_ = new float[class_cnt];
-  }
-
-  // compute input features
-  if (feat_extract_->ComputeFeatures(char_samp, net_input_) == false) {
-    return false;
-  }
-
-  // go through all the nets
-  memset(net_output_, 0, class_cnt * sizeof(*net_output_));
-  float *inputs = net_input_;
-  for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
-    // run each net
-    vector<float> net_out(class_cnt, 0.0);
-    if (!nets_[net_idx]->FeedForward(inputs, &net_out[0])) {
-      return false;
-    }
-    // add the output values
-    for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
-      net_output_[class_idx] += (net_out[class_idx] * net_wgts_[net_idx]);
-    }
-    // increment inputs pointer
-    inputs += nets_[net_idx]->in_cnt();
-  }
-
-  Fold();
-
-  return true;
-}
-
-// return the cost of being a char
-int HybridNeuralNetCharClassifier::CharCost(CharSamp *char_samp) {
-  // it is by design that a character cost is equal to zero
-  // when no nets are present. This is the case during training.
-  if (RunNets(char_samp) == false) {
-    return 0;
-  }
-
-  return CubeUtils::Prob2Cost(1.0f - net_output_[0]);
-}
-
-// classifies a charsamp and returns an alternate list
-// of chars sorted by char costs
-CharAltList *HybridNeuralNetCharClassifier::Classify(CharSamp *char_samp) {
-  // run the needed nets
-  if (RunNets(char_samp) == false) {
-    return NULL;
-  }
-
-  int class_cnt = char_set_->ClassCount();
-
-  // create an altlist
-  CharAltList *alt_list = new CharAltList(char_set_, class_cnt);
-
-  for (int out = 1; out < class_cnt; out++) {
-    int cost = CubeUtils::Prob2Cost(net_output_[out]);
-    alt_list->Insert(out, cost);
-  }
-
-  return alt_list;
-}
-
-// set an external net (for training purposes)
-void HybridNeuralNetCharClassifier::SetNet(tesseract::NeuralNet *char_net) {
-}
-
-// Load folding sets
-// This function returns true on success or if the file can't be read,
-// returns false if an error is encountered.
-bool HybridNeuralNetCharClassifier::LoadFoldingSets(
-    const string &data_file_path, const string &lang, LangModel *lang_mod) {
-  fold_set_cnt_ = 0;
-  string fold_file_name;
-  fold_file_name = data_file_path + lang;
-  fold_file_name += ".cube.fold";
-
-  // folding sets are optional
-  FILE *fp = fopen(fold_file_name.c_str(), "rb");
-  if (fp == NULL) {
-    return true;
-  }
-  fclose(fp);
-
-  string fold_sets_str;
-  if (!CubeUtils::ReadFileToString(fold_file_name,
-                                   &fold_sets_str)) {
-    return false;
-  }
-
-  // split into lines
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec);
-  fold_set_cnt_ = str_vec.size();
-  fold_sets_ = new int *[fold_set_cnt_];
-  fold_set_len_ = new int[fold_set_cnt_];
-
-  for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) {
-    reinterpret_cast<TessLangModel *>(lang_mod)->RemoveInvalidCharacters(
-        &str_vec[fold_set]);
-
-    // if all or all but one character are invalid, invalidate this set
-    if (str_vec[fold_set].length() <= 1) {
-      fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
-              "invalidating folding set %d\n", fold_set);
-      fold_set_len_[fold_set] = 0;
-      fold_sets_[fold_set] = NULL;
-      continue;
-    }
-
-    string_32 str32;
-    CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32);
-    fold_set_len_[fold_set] = str32.length();
-    fold_sets_[fold_set] = new int[fold_set_len_[fold_set]];
-    for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
-      fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]);
-    }
-  }
-  return true;
-}
-
-// Init the classifier provided a data-path and a language string
-bool HybridNeuralNetCharClassifier::Init(const string &data_file_path,
-                                         const string &lang,
-                                         LangModel *lang_mod) {
-  if (init_ == true) {
-    return true;
-  }
-
-  // load the nets if any. This function will return true if the net file
-  // does not exist. But will fail if the net did not pass the sanity checks
-  if (!LoadNets(data_file_path, lang)) {
-    return false;
-  }
-
-  // load the folding sets if any. This function will return true if the
-  // file does not exist. But will fail if the it did not pass the sanity checks
-  if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
-    return false;
-  }
-
-  init_ = true;
-  return true;
-}
-
-// Load the classifier's Neural Nets
-// This function will return true if the net file does not exist.
-// But will fail if the net did not pass the sanity checks
-bool HybridNeuralNetCharClassifier::LoadNets(const string &data_file_path,
-                                             const string &lang) {
-  string hybrid_net_file;
-  string junk_net_file;
-
-  // add the lang identifier
-  hybrid_net_file = data_file_path + lang;
-  hybrid_net_file += ".cube.hybrid";
-
-  // neural network is optional
-  FILE *fp = fopen(hybrid_net_file.c_str(), "rb");
-  if (fp == NULL) {
-    return true;
-  }
-  fclose(fp);
-
-  string str;
-  if (!CubeUtils::ReadFileToString(hybrid_net_file, &str)) {
-    return false;
-  }
-
-  // split into lines
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(str, "\r\n", &str_vec);
-  if (str_vec.empty()) {
-    return false;
-  }
-
-  // create and add the nets
-  nets_.resize(str_vec.size(), NULL);
-  net_wgts_.resize(str_vec.size(), 0);
-  int total_input_size = 0;
-  for (int net_idx = 0; net_idx < str_vec.size(); net_idx++) {
-    // parse the string
-    vector<string> tokens_vec;
-    CubeUtils::SplitStringUsing(str_vec[net_idx], " \t", &tokens_vec);
-    // has to be 2 tokens, net name and input size
-    if (tokens_vec.size() != 2) {
-      return false;
-    }
-    // load the net
-    string net_file_name = data_file_path + tokens_vec[0];
-    nets_[net_idx] = tesseract::NeuralNet::FromFile(net_file_name);
-    if (nets_[net_idx] == NULL) {
-      return false;
-    }
-    // parse the input size and validate it
-    net_wgts_[net_idx] = atof(tokens_vec[1].c_str());
-    if (net_wgts_[net_idx] < 0.0) {
-      return false;
-    }
-    total_input_size += nets_[net_idx]->in_cnt();
-  }
-  // validate total input count
-  if (total_input_size != feat_extract_->FeatureCnt()) {
-    return false;
-  }
-  // success
-  return true;
-}
-}  // tesseract
--- a/cube/hybrid_neural_net_classifier.h
+++ b/cube/hybrid_neural_net_classifier.h
@ -1,90 +0,0 @@
-/**********************************************************************
- * File:        conv_net_classifier.h
- * Description: Declaration of Convolutional-NeuralNet Character Classifier
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef HYBRID_NEURAL_NET_CLASSIFIER_H
-#define HYBRID_NEURAL_NET_CLASSIFIER_H
-
-#include <string>
-#include <vector>
-
-#include "char_samp.h"
-#include "char_altlist.h"
-#include "char_set.h"
-#include "classifier_base.h"
-#include "feature_base.h"
-#include "lang_model.h"
-#include "neural_net.h"
-#include "tuning_params.h"
-
-namespace tesseract {
-
-// Folding Ratio is the ratio of the max-activation of members of a folding
-// set that is used to compute the min-activation of the rest of the set
-// static const float kFoldingRatio = 0.75;  // see conv_net_classifier.h
-
-class HybridNeuralNetCharClassifier : public CharClassifier {
- public:
-  HybridNeuralNetCharClassifier(CharSet *char_set, TuningParams *params,
-      FeatureBase *feat_extract);
-  virtual ~HybridNeuralNetCharClassifier();
-  // The main training function. Given a sample and a class ID the classifier
-  // updates its parameters according to its learning algorithm. This function
-  // is currently not implemented. TODO(ahmadab): implement end-2-end training
-  virtual bool Train(CharSamp *char_samp, int ClassID);
-  // A secondary function needed for training. Allows the trainer to set the
-  // value of any train-time parameter. This function is currently not
-  // implemented. TODO(ahmadab): implement end-2-end training
-  virtual bool SetLearnParam(char *var_name, float val);
-  // Externally sets the Neural Net used by the classifier. Used for training
-  void SetNet(tesseract::NeuralNet *net);
-
-  // Classifies an input charsamp and return a CharAltList object containing
-  // the possible candidates and corresponding scores
-  virtual CharAltList *Classify(CharSamp *char_samp);
-  // Computes the cost of a specific charsamp being a character (versus a
-  // non-character: part-of-a-character OR more-than-one-character)
-  virtual int CharCost(CharSamp *char_samp);
-
- private:
-  // Neural Net object used for classification
-  vector<tesseract::NeuralNet *> nets_;
-  vector<float> net_wgts_;
-
-  // data buffers used to hold Neural Net inputs and outputs
-  float *net_input_;
-  float *net_output_;
-
-  // Init the classifier provided a data-path and a language string
-  virtual bool Init(const string &data_file_path, const string &lang,
-                    LangModel *lang_mod);
-  // Loads the NeuralNets needed for the classifier
-  bool LoadNets(const string &data_file_path, const string &lang);
-  // Load folding sets
-  // This function returns true on success or if the file can't be read,
-  // returns false if an error is encountered.
-  virtual bool LoadFoldingSets(const string &data_file_path,
-                               const string &lang,
-                               LangModel *lang_mod);
-  // Folds the output of the NeuralNet using the loaded folding sets
-  virtual void Fold();
-  // Scales the input char_samp and feeds it to the NeuralNet as input
-  bool RunNets(CharSamp *char_samp);
-};
-}
-#endif  // HYBRID_NEURAL_NET_CLASSIFIER_H
--- a/cube/lang_mod_edge.h
+++ b/cube/lang_mod_edge.h
@ -1,73 +0,0 @@
-/**********************************************************************
- * File:        lang_mod_edge.h
- * Description: Declaration of the Language Model Edge Base Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The LangModEdge abstracts an Edge in the language model trie
-// This is an abstract class that any Language Model Edge should inherit from
-// It provides methods for:
-// 1- Returns the class ID corresponding to the edge
-// 2- If the edge is a valid EndOfWord (EOW)
-// 3- If the edge is coming from a OutOfDictionary (OOF) state machine
-// 4- If the edge is a Terminal (has no children)
-// 5- A Hash of the edge that will be used to retrieve the edge
-// quickly from the BeamSearch lattice
-// 6- If two edges are identcial
-// 7- Returns a verbal description of the edge (use by debuggers)
-// 8- the language model cost of the edge (if any)
-// 9- The string corresponding to this edge
-// 10- Getting and setting the "Root" status of the edge
-
-#ifndef LANG_MOD_EDGE_H
-#define LANG_MOD_EDGE_H
-
-#include "cube_tuning_params.h"
-#include "char_set.h"
-
-namespace tesseract {
-
-class LangModEdge {
- public:
-   LangModEdge() {}
-  virtual ~LangModEdge() {}
-
-  // The string corresponding to this edge
-  virtual const char_32 * EdgeString() const = 0;
-  // Returns the class ID corresponding to the edge
-  virtual int ClassID() const = 0;
-  // If the edge is the root edge
-  virtual bool IsRoot() const = 0;
-  // Set the Root flag
-  virtual void SetRoot(bool flag) = 0;
-  // If the edge is a valid EndOfWord (EOW)
-  virtual bool IsEOW() const = 0;
-  // is the edge is coming from a OutOfDictionary (OOF) state machine
-  virtual bool IsOOD() const = 0;
-  // Is the edge is a Terminal (has no children)
-  virtual bool IsTerminal() const = 0;
-  // Returns A hash of the edge that will be used to retrieve the edge
-  virtual unsigned int Hash() const = 0;
-  // Are the two edges identcial?
-  virtual bool IsIdentical(LangModEdge *edge) const = 0;
-  // a verbal description of the edge (use by debuggers)
-  virtual char *Description() const = 0;
-  // the language model cost of the edge (if any)
-  virtual int PathCost() const = 0;
-};
-}
-
-#endif  // LANG_MOD_EDGE_H
--- a/cube/lang_model.h
+++ b/cube/lang_model.h
@ -1,78 +0,0 @@
-/**********************************************************************
- * File:        lang_model.h
- * Description: Declaration of the Language Model Edge Base Class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The LanguageModel class abstracts a State machine that is modeled as a Trie
-// structure. The state machine models the language being recognized by the OCR
-// Engine
-// This is an abstract class that is to be inherited by any language model
-
-#ifndef LANG_MODEL_H
-#define LANG_MODEL_H
-
-#include "lang_mod_edge.h"
-#include "char_altlist.h"
-#include "char_set.h"
-#include "tuning_params.h"
-
-namespace tesseract {
-class LangModel {
- public:
-  LangModel() {
-    ood_enabled_ = true;
-    numeric_enabled_ = true;
-    word_list_enabled_ = true;
-    punc_enabled_ = true;
-  }
-  virtual ~LangModel() {}
-
-  // Returns an edge pointer to the Root
-  virtual LangModEdge *Root() = 0;
-  // Returns the edges that fan-out of the specified edge and their count
-  virtual LangModEdge **GetEdges(CharAltList *alt_list,
-                                 LangModEdge *parent_edge,
-                                 int *edge_cnt) = 0;
-  // Returns is a sequence of 32-bit characters are valid within this language
-  // model or net. And EndOfWord flag is specified. If true, the sequence has
-  // to end on a valid word. The function also optionally returns the list
-  // of language model edges traversed to parse the string
-  virtual bool IsValidSequence(const char_32 *str, bool eow_flag,
-                               LangModEdge **edge_array = NULL) = 0;
-  virtual bool IsLeadingPunc(char_32 ch) = 0;
-  virtual bool IsTrailingPunc(char_32 ch) = 0;
-  virtual bool IsDigit(char_32 ch) = 0;
-
-  // accessor functions
-  inline bool OOD() { return ood_enabled_; }
-  inline bool Numeric() { return numeric_enabled_; }
-  inline bool WordList() { return word_list_enabled_; }
-  inline bool Punc() { return punc_enabled_; }
-  inline void SetOOD(bool ood) { ood_enabled_ = ood; }
-  inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; }
-  inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; }
-  inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; }
-
- protected:
-  bool ood_enabled_;
-  bool numeric_enabled_;
-  bool word_list_enabled_;
-  bool punc_enabled_;
-};
-}
-
-#endif  // LANG_MODEL_H
--- a/cube/search_column.cpp
+++ b/cube/search_column.cpp
@ -1,217 +0,0 @@
-/**********************************************************************
- * File:        search_column.cpp
- * Description: Implementation of the Beam Search Column Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "search_column.h"
-#include <stdlib.h>
-
-namespace tesseract {
-
-SearchColumn::SearchColumn(int col_idx, int max_node) {
-  col_idx_ = col_idx;
-  node_cnt_ = 0;
-  node_array_ = NULL;
-  max_node_cnt_ = max_node;
-  node_hash_table_ = NULL;
-  init_ = false;
-  min_cost_ = INT_MAX;
-  max_cost_ = 0;
-}
-
-// Cleanup data
-void SearchColumn::Cleanup() {
-  if (node_array_ != NULL) {
-    for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
-      if (node_array_[node_idx] != NULL) {
-        delete node_array_[node_idx];
-      }
-    }
-
-    delete []node_array_;
-    node_array_ = NULL;
-  }
-  FreeHashTable();
-  init_ = false;
-}
-
-SearchColumn::~SearchColumn() {
-  Cleanup();
-}
-
-// Initializations
-bool SearchColumn::Init() {
-  if (init_ == true) {
-    return true;
-  }
-
-  // create hash table
-  if (node_hash_table_ == NULL) {
-    node_hash_table_ = new SearchNodeHashTable();
-  }
-
-  init_ = true;
-
-  return true;
-}
-
-// Prune the nodes if necessary. Pruning is done such that a max
-// number of nodes is kept, i.e., the beam width
-void SearchColumn::Prune() {
-  // no need to prune
-  if (node_cnt_ <= max_node_cnt_) {
-    return;
-  }
-
-  // compute the cost histogram
-  memset(score_bins_, 0, sizeof(score_bins_));
-  int cost_range = max_cost_ - min_cost_ + 1;
-  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
-    int cost_bin = static_cast<int>(
-        ((node_array_[node_idx]->BestCost() - min_cost_) *
-         kScoreBins) / static_cast<double>(cost_range));
-    if (cost_bin >= kScoreBins) {
-      cost_bin = kScoreBins - 1;
-    }
-    score_bins_[cost_bin]++;
-  }
-
-  // determine the pruning cost by scanning the cost histogram from
-  // least to greatest cost bins and finding the cost at which the
-  // max number of nodes is exceeded
-  int pruning_cost = 0;
-  int new_node_cnt = 0;
-  for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) {
-    if (new_node_cnt > 0 &&
-        (new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) {
-      pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins);
-      break;
-    }
-    new_node_cnt += score_bins_[cost_bin];
-  }
-
-  // prune out all the nodes above this cost
-  for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) {
-    // prune this node out
-    if (node_array_[node_idx]->BestCost() > pruning_cost ||
-        new_node_cnt > max_node_cnt_) {
-      delete node_array_[node_idx];
-    } else {
-      // keep it
-      node_array_[new_node_cnt++] = node_array_[node_idx];
-    }
-  }
-  node_cnt_ = new_node_cnt;
-}
-
-// sort all nodes
-void SearchColumn::Sort() {
-  if (node_cnt_ > 0 && node_array_ != NULL) {
-    qsort(node_array_, node_cnt_, sizeof(*node_array_),
-          SearchNode::SearchNodeComparer);
-  }
-}
-
-// add a new node
-SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost,
-                                  SearchNode *parent_node,
-                                  CubeRecoContext *cntxt) {
-  // init if necessary
-  if (init_ == false && Init() == false) {
-    return NULL;
-  }
-
-  // find out if we have an node with the same edge
-  // look in the hash table
-  SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node);
-  // node does not exist
-  if (new_node == NULL) {
-    new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_);
-
-    // if the max node count has already been reached, check if the cost of
-    // the new node exceeds the max cost. This indicates that it will be pruned
-    // and so there is no point adding it
-    if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) {
-      delete new_node;
-      return NULL;
-    }
-
-    // expand the node buffer if necc
-    if ((node_cnt_ % kNodeAllocChunk) == 0) {
-      // alloc a new buff
-      SearchNode **new_node_buff =
-          new SearchNode *[node_cnt_ + kNodeAllocChunk];
-
-      // free existing after copying contents
-      if (node_array_ != NULL) {
-        memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff));
-        delete []node_array_;
-      }
-
-      node_array_ = new_node_buff;
-    }
-
-    // add the node to the hash table only if it is non-OOD edge
-    // because the langmod state is not unique
-    if (edge->IsOOD() == false) {
-      if (!node_hash_table_->Insert(edge, new_node)) {
-        tprintf("Hash table full!!!");
-        delete new_node;
-        return NULL;
-      }
-    }
-
-    node_array_[node_cnt_++] = new_node;
-
-  } else {
-    // node exists before
-    // if no update occurred, return NULL
-    if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) {
-      new_node = NULL;
-    }
-
-    // free the edge
-    delete edge;
-  }
-
-  // update Min and Max Costs
-  if (new_node != NULL) {
-    if (min_cost_ > new_node->BestCost()) {
-      min_cost_ = new_node->BestCost();
-    }
-
-    if (max_cost_ < new_node->BestCost()) {
-      max_cost_ = new_node->BestCost();
-    }
-  }
-
-  return new_node;
-}
-
-SearchNode *SearchColumn::BestNode() {
-  SearchNode *best_node = NULL;
-
-  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
-    if (best_node == NULL ||
-        best_node->BestCost() > node_array_[node_idx]->BestCost()) {
-      best_node = node_array_[node_idx];
-    }
-  }
-
-  return best_node;
-}
-}  // namespace tesseract
--- a/cube/search_column.h
+++ b/cube/search_column.h
@ -1,84 +0,0 @@
-/**********************************************************************
- * File:        search_column.h
- * Description: Declaration of the Beam Search Column Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The SearchColumn class abstracts a column in the lattice that is created
-// by the BeamSearch during the recognition process
-// The class holds the lattice nodes. New nodes are added by calls to AddNode
-// made from the BeamSearch
-// The class maintains a hash table of the nodes to be able to lookup nodes
-// quickly using their lang_mod_edge. This is needed to merge similar paths
-// in the lattice
-
-#ifndef SEARCH_COLUMN_H
-#define SEARCH_COLUMN_H
-
-#include "search_node.h"
-#include "lang_mod_edge.h"
-#include "cube_reco_context.h"
-
-namespace tesseract {
-
-class SearchColumn {
- public:
-  SearchColumn(int col_idx, int max_node_cnt);
-  ~SearchColumn();
-  // Accessor functions
-  inline int ColIdx() const { return col_idx_; }
-  inline int NodeCount() const { return node_cnt_; }
-  inline SearchNode **Nodes() const { return node_array_; }
-
-  // Prune the nodes if necessary. Pruning is done such that a max
-  // number of nodes is kept, i.e., the beam width
-  void Prune();
-  SearchNode *AddNode(LangModEdge *edge, int score,
-                      SearchNode *parent, CubeRecoContext *cntxt);
-  // Returns the node with the least cost
-  SearchNode *BestNode();
-  // Sort the lattice nodes. Needed for visualization
-  void Sort();
-  // Free up the Hash Table. Added to be called by the Beam Search after
-  // a column is pruned to reduce memory foot print
-  void FreeHashTable() {
-    if (node_hash_table_ != NULL) {
-      delete node_hash_table_;
-      node_hash_table_ = NULL;
-    }
-  }
-
- private:
-  static const int kNodeAllocChunk = 1024;
-  static const int kScoreBins = 1024;
-  bool init_;
-  int min_cost_;
-  int max_cost_;
-  int max_node_cnt_;
-  int node_cnt_;
-  int col_idx_;
-  int score_bins_[kScoreBins];
-  SearchNode **node_array_;
-  SearchNodeHashTable *node_hash_table_;
-
-  // Free node array and hash table
-  void Cleanup();
-  // Create hash table
-  bool Init();
-};
-}
-
-#endif  // SEARCH_COLUMN_H
--- a/cube/search_node.cpp
+++ b/cube/search_node.cpp
@ -1,229 +0,0 @@
-/**********************************************************************
- * File:        search_node.cpp
- * Description: Implementation of the Beam Search Node Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "search_node.h"
-
-namespace tesseract {
-
-// The constructor updates the best paths and costs:
-//  mean_char_reco_cost_ (returned by BestRecoCost()) is the mean
-//    char_reco cost of the best_path, including this node.
-//  best_path_reco_cost is the total char_reco_cost of the best_path,
-//    but excludes the char_reco_cost of this node.
-//  best_cost is the mean mixed cost, i.e., mean_char_reco_cost_ +
-//    current language model cost, all weighted by the cube context's
-//    RecoWgt parameter
-SearchNode::SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node,
-                       int char_reco_cost, LangModEdge *edge, int col_idx) {
-  // copy data members
-  cntxt_ = cntxt;
-  lang_mod_edge_ = edge;
-  col_idx_ = col_idx;
-  parent_node_ = parent_node;
-  char_reco_cost_ = char_reco_cost;
-
-  // the string of this node is the same as that of the language model edge
-  str_ = (edge == NULL ? NULL : edge->EdgeString());
-
-  // compute best path total reco cost
-  best_path_reco_cost_ = (parent_node_ == NULL) ?  0 :
-      parent_node_->CharRecoCost() + parent_node_->BestPathRecoCost();
-
-  // update best path length
-  best_path_len_ = (parent_node_ == NULL) ?
-      1 : parent_node_->BestPathLength() + 1;
-  if (edge != NULL && edge->IsRoot() && parent_node_ != NULL) {
-    best_path_len_++;
-  }
-
-  // compute best reco cost mean cost
-  mean_char_reco_cost_ = static_cast<int>(
-      (best_path_reco_cost_ + char_reco_cost_) /
-      static_cast<double>(best_path_len_));
-
-  // get language model cost
-  int lm_cost = LangModCost(lang_mod_edge_, parent_node_);
-
-  // compute aggregate best cost
-  best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
-                                (best_path_reco_cost_ + char_reco_cost_) /
-                                static_cast<double>(best_path_len_)
-                                ) + lm_cost;
-}
-
-SearchNode::~SearchNode() {
-  if (lang_mod_edge_ != NULL) {
-    delete lang_mod_edge_;
-  }
-}
-
-// update the parent_node node if provides a better (less) cost
-bool SearchNode::UpdateParent(SearchNode *new_parent, int new_reco_cost,
-                              LangModEdge *new_edge) {
-  if (lang_mod_edge_ == NULL) {
-    if (new_edge != NULL) {
-      return false;
-    }
-  } else {
-    // to update the parent_node, we have to have the same target
-    // state and char
-    if (new_edge == NULL || !lang_mod_edge_->IsIdentical(new_edge) ||
-        !SearchNode::IdenticalPath(parent_node_, new_parent)) {
-      return false;
-    }
-  }
-
-  // compute the path cost and combined cost of the new path
-  int new_best_path_reco_cost;
-  int new_cost;
-  int new_best_path_len;
-
-  new_best_path_reco_cost = (new_parent == NULL) ?
-      0 : new_parent->BestPathRecoCost() + new_parent->CharRecoCost();
-
-  new_best_path_len =
-      (new_parent == NULL) ? 1 : new_parent->BestPathLength() + 1;
-
-  // compute the new language model cost
-  int new_lm_cost = LangModCost(new_edge, new_parent);
-
-  new_cost = static_cast<int>(cntxt_->Params()->RecoWgt() *
-                              (new_best_path_reco_cost + new_reco_cost) /
-                              static_cast<double>(new_best_path_len)
-                              ) + new_lm_cost;
-
-  // update if it is better (less) than the current one
-  if (best_cost_ > new_cost) {
-    parent_node_ = new_parent;
-    char_reco_cost_ = new_reco_cost;
-    best_path_reco_cost_ = new_best_path_reco_cost;
-    best_path_len_ = new_best_path_len;
-    mean_char_reco_cost_ = static_cast<int>(
-        (best_path_reco_cost_ + char_reco_cost_) /
-        static_cast<double>(best_path_len_));
-    best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
-                                  (best_path_reco_cost_ + char_reco_cost_) /
-                                  static_cast<double>(best_path_len_)
-                                  ) + new_lm_cost;
-    return true;
-  }
-  return false;
-}
-
-char_32 *SearchNode::PathString() {
-  SearchNode *node = this;
-
-  // compute string length
-  int len = 0;
-
-  while (node != NULL) {
-    if (node->str_ != NULL) {
-      len += CubeUtils::StrLen(node->str_);
-    }
-
-    // if the edge is a root and does not have a NULL parent, account for space
-    LangModEdge *lm_edge = node->LangModelEdge();
-    if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
-      len++;
-    }
-
-    node = node->parent_node_;
-  }
-
-  char_32 *char_ptr = new char_32[len + 1];
-
-  int ch_idx = len;
-
-  node = this;
-  char_ptr[ch_idx--] = 0;
-
-  while (node != NULL) {
-    int str_len = ((node->str_ == NULL) ? 0 : CubeUtils::StrLen(node->str_));
-    while (str_len > 0) {
-      char_ptr[ch_idx--] = node->str_[--str_len];
-    }
-
-    // if the edge is a root and does not have a NULL parent, insert a space
-    LangModEdge *lm_edge = node->LangModelEdge();
-    if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
-      char_ptr[ch_idx--] = (char_32)' ';
-    }
-
-    node = node->parent_node_;
-  }
-
-  return char_ptr;
-}
-
-// compares the path of two nodes and checks if its identical
-bool SearchNode::IdenticalPath(SearchNode *node1, SearchNode *node2) {
-  if (node1 != NULL && node2 != NULL &&
-      node1->best_path_len_ != node2->best_path_len_) {
-    return false;
-  }
-
-  // backtrack until either a root or a NULL edge is reached
-  while (node1 != NULL && node2 != NULL) {
-    if (node1->str_ != node2->str_) {
-      return false;
-    }
-
-    // stop if either nodes is a root
-    if (node1->LangModelEdge()->IsRoot() || node2->LangModelEdge()->IsRoot()) {
-      break;
-    }
-
-    node1 = node1->parent_node_;
-    node2 = node2->parent_node_;
-  }
-
-  return ((node1 == NULL && node2 == NULL) ||
-          (node1 != NULL && node1->LangModelEdge()->IsRoot() &&
-           node2 != NULL && node2->LangModelEdge()->IsRoot()));
-}
-
-// Computes the language model cost of a path
-int SearchNode::LangModCost(LangModEdge *current_lm_edge,
-                            SearchNode *parent_node) {
-  int lm_cost = 0;
-  int node_cnt = 0;
-
-  do {
-    // check if root
-    bool is_root = ((current_lm_edge != NULL && current_lm_edge->IsRoot()) ||
-                    parent_node == NULL);
-    if (is_root) {
-      node_cnt++;
-      lm_cost += (current_lm_edge == NULL ? 0 : current_lm_edge->PathCost());
-    }
-
-    // continue until we hit a null parent
-    if (parent_node == NULL) {
-      break;
-    }
-
-    // get the previous language model edge
-    current_lm_edge = parent_node->LangModelEdge();
-    // back track
-    parent_node = parent_node->ParentNode();
-  } while (true);
-
-  return static_cast<int>(lm_cost / static_cast<double>(node_cnt));
-}
-}  // namespace tesseract
--- a/cube/search_node.h
+++ b/cube/search_node.h
@ -1,168 +0,0 @@
-/**********************************************************************
- * File:        search_node.h
- * Description: Declaration of the Beam Search Node Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The SearchNode class abstracts the search lattice node in the lattice
-// generated by the BeamSearch class
-// The SearchNode class holds the lang_mod_edge associated with the lattice
-// node. It also holds a pointer to the parent SearchNode in the search path
-// In addition it holds the recognition and the language model costs of the
-// node and the path leading to this node
-
-#ifndef SEARCH_NODE_H
-#define SEARCH_NODE_H
-
-#include "lang_mod_edge.h"
-#include "cube_reco_context.h"
-
-namespace tesseract {
-
-class SearchNode {
- public:
-  SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node,
-             int char_reco_cost, LangModEdge *edge, int col_idx);
-
-  ~SearchNode();
-
-  // Updates the parent of the current node if the specified path yields
-  // a better path cost
-  bool UpdateParent(SearchNode *new_parent, int new_reco_cost,
-                    LangModEdge *new_edge);
-  // returns the 32-bit string corresponding to the path leading to this node
-  char_32 *PathString();
-  // True if the two input nodes correspond to the same path
-  static bool IdenticalPath(SearchNode *node1, SearchNode *node2);
-
-  inline const char_32 *NodeString() { return str_; }
-  inline void SetString(char_32 *str) { str_ = str; }
-
-  // This node's character recognition cost.
-  inline int CharRecoCost() { return char_reco_cost_; }
-  // Total character recognition cost of the nodes in the best path,
-  // excluding this node.
-  inline int BestPathRecoCost() { return best_path_reco_cost_; }
-  // Number of nodes in best path.
-  inline int BestPathLength() { return best_path_len_; }
-  // Mean mixed cost, i.e., mean character recognition cost +
-  // current language model cost, all weighted by the RecoWgt parameter
-  inline int BestCost() { return best_cost_; }
-  // Mean character recognition cost of the nodes on the best path,
-  // including this node.
-  inline int BestRecoCost() { return mean_char_reco_cost_ ; }
-
-  inline int ColIdx() { return col_idx_; }
-  inline SearchNode *ParentNode() { return parent_node_; }
-  inline LangModEdge *LangModelEdge() { return lang_mod_edge_;}
-  inline int LangModCost() { return LangModCost(lang_mod_edge_, parent_node_); }
-
-  // A comparer function that allows the SearchColumn class to sort the
-  // nodes based on the path cost
-  inline static int SearchNodeComparer(const void *node1, const void *node2) {
-    return (*(reinterpret_cast<SearchNode * const *>(node1)))->best_cost_ -
-        (*(reinterpret_cast<SearchNode * const *>(node2)))->best_cost_;
-  }
-
- private:
-  CubeRecoContext *cntxt_;
-  // Character code
-  const char_32 *str_;
-  // Recognition cost of most recent character
-  int char_reco_cost_;
-  // Mean mixed cost,  i.e., mean character recognition cost +
-  // current language model cost, all weighted by the RecoWgt parameter
-  int best_cost_;
-  // Mean character recognition cost of the nodes on the best path,
-  // including this node.
-  int mean_char_reco_cost_ ;
-  // Total character recognition cost of the nodes in the best path,
-  // excluding this node.
-  int best_path_reco_cost_;
-  // Number of nodes in best path.
-  int best_path_len_;
-  // Column index
-  int col_idx_;
-  // Parent Node
-  SearchNode *parent_node_;
-  // Language model edge
-  LangModEdge *lang_mod_edge_;
-  static int LangModCost(LangModEdge *lang_mod_edge, SearchNode *parent_node);
-};
-
-// Implments a SearchNode hash table used to detect if a Search Node exists
-// or not. This is needed to make sure that identical paths in the BeamSearch
-// converge
-class SearchNodeHashTable {
- public:
-  SearchNodeHashTable() {
-    memset(bin_size_array_, 0, sizeof(bin_size_array_));
-  }
-
-  ~SearchNodeHashTable() {
-  }
-
-  // inserts an entry in the hash table
-  inline bool Insert(LangModEdge *lang_mod_edge, SearchNode *srch_node) {
-    // compute hash based on the edge and its parent node edge
-    unsigned int edge_hash = lang_mod_edge->Hash();
-    unsigned int parent_hash = (srch_node->ParentNode() == NULL ?
-        0 : srch_node->ParentNode()->LangModelEdge()->Hash());
-    unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins;
-
-    // already maxed out, just fail
-    if (bin_size_array_[hash_bin] >= kMaxSearchNodePerBin) {
-      return false;
-    }
-
-    bin_array_[hash_bin][bin_size_array_[hash_bin]++] = srch_node;
-
-    return true;
-  }
-
-  // Looks up an entry in the hash table
-  inline SearchNode *Lookup(LangModEdge *lang_mod_edge,
-                            SearchNode *parent_node) {
-    // compute hash based on the edge and its parent node edge
-    unsigned int edge_hash = lang_mod_edge->Hash();
-    unsigned int parent_hash = (parent_node == NULL ?
-        0 : parent_node->LangModelEdge()->Hash());
-    unsigned int hash_bin = (edge_hash + parent_hash) % kSearchNodeHashBins;
-
-    // lookup the entries in the hash bin
-    for (int node_idx = 0; node_idx < bin_size_array_[hash_bin]; node_idx++) {
-      if (lang_mod_edge->IsIdentical(
-          bin_array_[hash_bin][node_idx]->LangModelEdge()) == true &&
-          SearchNode::IdenticalPath(
-          bin_array_[hash_bin][node_idx]->ParentNode(), parent_node) == true) {
-        return bin_array_[hash_bin][node_idx];
-      }
-    }
-
-    return NULL;
-  }
-
- private:
-  // Hash bin size parameters. These were determined emperically. These affect
-  // the speed of the beam search but have no impact on accuracy
-  static const int kSearchNodeHashBins = 4096;
-  static const int kMaxSearchNodePerBin = 512;
-  int bin_size_array_[kSearchNodeHashBins];
-  SearchNode *bin_array_[kSearchNodeHashBins][kMaxSearchNodePerBin];
-};
-}
-
-#endif  // SEARCH_NODE_H
--- a/cube/search_object.h
+++ b/cube/search_object.h
@ -1,55 +0,0 @@
-/**********************************************************************
- * File:        search_object.h
- * Description: Declaration of the Beam Search Object Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The SearchObject class represents a char_samp (a word bitmap) that is
-// being searched for characters (or recognizeable entities).
-// This is an abstract class that all SearchObjects should inherit from
-// A SearchObject class provides methods to:
-// 1- Returns the count of segments
-// 2- Recognize a segment range
-// 3- Creates a CharSamp for a segment range
-
-#ifndef SEARCH_OBJECT_H
-#define SEARCH_OBJECT_H
-
-#include "char_altlist.h"
-#include "char_samp.h"
-#include "cube_reco_context.h"
-
-namespace tesseract {
-class SearchObject {
- public:
-  explicit SearchObject(CubeRecoContext *cntxt) { cntxt_ = cntxt; }
-  virtual ~SearchObject() {}
-
-  virtual int SegPtCnt() = 0;
-  virtual CharAltList *RecognizeSegment(int start_pt, int end_pt) = 0;
-  virtual CharSamp *CharSample(int start_pt, int end_pt) = 0;
-  virtual Box* CharBox(int start_pt, int end_pt) = 0;
-
-  virtual int SpaceCost(int seg_pt) = 0;
-  virtual int NoSpaceCost(int seg_pt) = 0;
-  virtual int NoSpaceCost(int start_pt, int end_pt) = 0;
-
- protected:
-  CubeRecoContext *cntxt_;
-};
-}
-
-#endif  // SEARCH_OBJECT_H
--- a/cube/string_32.h
+++ b/cube/string_32.h
@ -1,44 +0,0 @@
-/**********************************************************************
- * File:        string_32.h
- * Description: Declaration of a 32 Bit string class
- * Author:    Ahmad Abdulkader
- * Created:   2007
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// the string_32 class provides the functionality needed
-// for a 32-bit string class
-
-#ifndef STRING_32_H
-#define STRING_32_H
-
-#include <string.h>
-#include <string>
-#include <algorithm>
-#include <vector>
-
-#ifdef USE_STD_NAMESPACE
-using std::basic_string;
-using std::string;
-using std::vector;
-#endif
-
-namespace tesseract {
-
-// basic definitions
-typedef signed int char_32;
-typedef basic_string<char_32> string_32;
-}
-
-#endif  // STRING_32_H
--- a/cube/tess_lang_mod_edge.cpp
+++ b/cube/tess_lang_mod_edge.cpp
@ -1,120 +0,0 @@
-/**********************************************************************
- * File:        tess_lang_mod_edge.cpp
- * Description: Implementation of the Tesseract Language Model Edge Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "tess_lang_mod_edge.h"
-#include "const.h"
-#include "unichar.h"
-
-
-
-namespace tesseract {
-// OOD constructor
-TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, int class_id) {
-  root_ = false;
-  cntxt_ = cntxt;
-  dawg_ = NULL;
-  start_edge_ = 0;
-  end_edge_ = 0;
-  edge_mask_ = 0;
-  class_id_ = class_id;
-  str_ = cntxt_->CharacterSet()->ClassString(class_id);
-  path_cost_ = Cost();
-}
-
-/**
- * leading, trailing punc constructor and single byte UTF char
- */
-TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt,
-    const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
-  root_ = false;
-  cntxt_ = cntxt;
-  dawg_ = dawg;
-  start_edge_ = edge_idx;
-  end_edge_ = edge_idx;
-  edge_mask_ = 0;
-  class_id_ = class_id;
-  str_ = cntxt_->CharacterSet()->ClassString(class_id);
-  path_cost_ = Cost();
-}
-
-/**
- * dict constructor: multi byte UTF char
-*/
-TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg,
-                                 EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
-                                 int class_id) {
-  root_ = false;
-  cntxt_ = cntxt;
-  dawg_ = dawg;
-  start_edge_ = start_edge_idx;
-  end_edge_ = end_edge_idx;
-  edge_mask_ = 0;
-  class_id_ = class_id;
-  str_ = cntxt_->CharacterSet()->ClassString(class_id);
-  path_cost_ = Cost();
-}
-
-char *TessLangModEdge::Description() const {
-  char *char_ptr = new char[256];
-
-  char dawg_str[256];
-  char edge_str[32];
-  if (dawg_ == (Dawg *)DAWG_OOD) {
-    strcpy(dawg_str, "OOD");
-  } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
-    strcpy(dawg_str, "NUM");
-  } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
-    strcpy(dawg_str, "Main");
-  } else if (dawg_->permuter() == USER_DAWG_PERM) {
-    strcpy(dawg_str, "User");
-  } else if (dawg_->permuter() == DOC_DAWG_PERM) {
-    strcpy(dawg_str, "Doc");
-  } else {
-    strcpy(dawg_str, "N/A");
-  }
-
-  sprintf(edge_str, "%d", static_cast<int>(start_edge_));
-  if (IsLeadingPuncEdge(edge_mask_)) {
-    strcat(edge_str, "-LP");
-  }
-  if (IsTrailingPuncEdge(edge_mask_)) {
-    strcat(edge_str, "-TP");
-  }
-  sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
-          dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
-
-  return char_ptr;
-}
-
-int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt,
-                                    const Dawg *dawg,
-                                    NODE_REF parent_node,
-                                    LangModEdge **edge_array) {
-  int edge_cnt = 0;
-  NodeChildVector vec;
-  dawg->unichar_ids_of(parent_node, &vec, false);  // find all children
-  for (int i = 0; i < vec.size(); ++i) {
-    const NodeChild &child = vec[i];
-    if (child.unichar_id == INVALID_UNICHAR_ID) continue;
-    edge_array[edge_cnt++] =
-      new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
-  }
-  return edge_cnt;
-}
-}
--- a/cube/tess_lang_mod_edge.h
+++ b/cube/tess_lang_mod_edge.h
@ -1,233 +0,0 @@
-/**********************************************************************
- * File:        tess_lang_mod_edge.h
- * Description: Declaration of the Tesseract Language Model Edge Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The TessLangModEdge models an edge in the Tesseract language models
-// It inherits from the LangModEdge class
-
-#ifndef TESS_LANG_MOD_EDGE_H
-#define TESS_LANG_MOD_EDGE_H
-
-#include "dawg.h"
-#include "char_set.h"
-
-#include "lang_mod_edge.h"
-#include "cube_reco_context.h"
-#include "cube_utils.h"
-
-// Macros needed to identify punctuation in the langmodel state
-#ifdef _HMSW32_H
-#define LEAD_PUNC_EDGE_REF_MASK  (inT64) 0x0000000100000000i64
-#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000i64
-#define TRAIL_PUNC_REPEAT_MASK (inT64)   0xffff000000000000i64
-#else
-#define LEAD_PUNC_EDGE_REF_MASK  (inT64) 0x0000000100000000ll
-#define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000ll
-#define TRAIL_PUNC_REPEAT_MASK (inT64)   0xffff000000000000ll
-#endif
-
-// Number state machine macros
-#define NUMBER_STATE_SHIFT        0
-#define NUMBER_STATE_MASK         0x0000000fl
-#define NUMBER_LITERAL_SHIFT      4
-#define NUMBER_LITERAL_MASK       0x000000f0l
-#define NUMBER_REPEAT_SHIFT       8
-#define NUMBER_REPEAT_MASK        0x00000f00l
-#define NUM_TRM                   -99
-#define TRAIL_PUNC_REPEAT_SHIFT   48
-
-#define IsLeadingPuncEdge(edge_mask) \
-  ((edge_mask & LEAD_PUNC_EDGE_REF_MASK) != 0)
-#define IsTrailingPuncEdge(edge_mask) \
-  ((edge_mask & TRAIL_PUNC_EDGE_REF_MASK) != 0)
-#define TrailingPuncCount(edge_mask) \
-  ((edge_mask & TRAIL_PUNC_REPEAT_MASK) >> TRAIL_PUNC_REPEAT_SHIFT)
-#define TrailingPuncEdgeMask(Cnt) \
-  (TRAIL_PUNC_EDGE_REF_MASK | ((Cnt) << TRAIL_PUNC_REPEAT_SHIFT))
-
-// State machine IDs
-#define DAWG_OOD      0
-#define DAWG_NUMBER   1
-
-namespace tesseract {
-class TessLangModEdge : public LangModEdge {
- public:
-  // Different ways of constructing a TessLangModEdge
-  TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
-                  EDGE_REF edge, int class_id);
-  TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
-                  EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
-                  int class_id);
-  TessLangModEdge(CubeRecoContext *cntxt, int class_id);
-  ~TessLangModEdge() {}
-
-  // Accessors
-  inline bool IsRoot() const {
-    return root_;
-  }
-  inline void SetRoot(bool flag) { root_ = flag; }
-
-  inline bool IsOOD() const {
-    return (dawg_ == (Dawg *)DAWG_OOD);
-  }
-
-  inline bool IsNumber() const {
-    return (dawg_ == (Dawg *)DAWG_NUMBER);
-  }
-
-  inline bool IsEOW() const {
-    return (IsTerminal() || (dawg_->end_of_word(end_edge_) != 0));
-  }
-
-  inline const Dawg *GetDawg() const { return dawg_; }
-  inline EDGE_REF StartEdge() const { return start_edge_; }
-  inline EDGE_REF EndEdge() const { return end_edge_; }
-  inline EDGE_REF EdgeMask() const { return edge_mask_; }
-  inline const char_32 * EdgeString() const { return str_; }
-  inline int ClassID () const { return class_id_; }
-  inline int PathCost() const { return path_cost_; }
-  inline void SetEdgeMask(EDGE_REF edge_mask) { edge_mask_ = edge_mask; }
-  inline void SetDawg(Dawg *dawg) { dawg_ = dawg; }
-  inline void SetStartEdge(EDGE_REF edge_idx) { start_edge_ = edge_idx; }
-  inline void SetEndEdge(EDGE_REF edge_idx) { end_edge_ = edge_idx; }
-
-  // is this a terminal node:
-  // we can terminate at any OOD char, trailing punc or
-  // when the dawg terminates
-  inline bool IsTerminal() const {
-    return (IsOOD() || IsNumber() || IsTrailingPuncEdge(start_edge_) ||
-            dawg_->next_node(end_edge_) == 0);
-  }
-
-  // How many signals does the LM provide for tuning. These are flags like:
-  // OOD or not, Number of not that are used by the training to compute
-  // extra costs for each word.
-  inline int SignalCnt() const {
-    return 2;
-  }
-
-  // returns the weight assigned to a specified signal
-  inline double SignalWgt(int signal) const {
-    CubeTuningParams *params =
-        reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
-    if (params != NULL) {
-      switch (signal) {
-        case 0:
-          return params->OODWgt();
-          break;
-
-        case 1:
-          return params->NumWgt();
-          break;
-      }
-    }
-
-    return 0.0;
-  }
-
-  // sets the weight assigned to a specified signal: Used in training
-  void SetSignalWgt(int signal, double wgt) {
-    CubeTuningParams *params =
-        reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
-    if (params != NULL) {
-      switch (signal) {
-        case 0:
-          params->SetOODWgt(wgt);
-          break;
-
-        case 1:
-          params->SetNumWgt(wgt);
-          break;
-      }
-    }
-  }
-
-  // returns the actual value of a specified signal
-  int Signal(int signal) {
-    switch (signal) {
-      case 0:
-        return IsOOD() ? MIN_PROB_COST : 0;
-        break;
-
-      case 1:
-        return IsNumber() ? MIN_PROB_COST : 0;
-        break;
-
-      default:
-        return 0;
-    }
-  }
-
-  // returns the Hash value of the edge. Used by the SearchNode hash table
-  // to quickly lookup exisiting edges to converge during search
-  inline unsigned int Hash() const {
-    return static_cast<unsigned int>(
-        ((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
-        ((unsigned int)edge_mask_) ^ class_id_);
-  }
-
-  // A verbal description of the edge: Used by visualizers
-  char *Description() const;
-
-  // Is this edge identical to the specified edge
-  inline bool IsIdentical(LangModEdge *lang_mod_edge) const {
-    return (class_id_ ==
-        reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->class_id_ &&
-        str_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->str_ &&
-        dawg_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->dawg_ &&
-        start_edge_ ==
-        reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->start_edge_ &&
-        end_edge_ ==
-        reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->end_edge_ &&
-        edge_mask_ ==
-        reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->edge_mask_);
-  }
-
-  // Creates a set of fan-out edges for the specified edge
-  static int CreateChildren(CubeRecoContext *cntxt,
-                            const Dawg *edges,
-                            NODE_REF edge_reg,
-                            LangModEdge **lm_edges);
-
- private:
-  bool root_;
-  CubeRecoContext *cntxt_;
-  const Dawg *dawg_;
-  EDGE_REF  start_edge_;
-  EDGE_REF  end_edge_;
-  EDGE_REF  edge_mask_;
-  int path_cost_;
-  int class_id_;
-  const char_32 * str_;
-  // returns the cost of the lang_mod_edge
-  inline int Cost() const {
-    if (cntxt_ != NULL) {
-      CubeTuningParams *params =
-          reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
-      if (dawg_ == (Dawg *)DAWG_OOD) {
-        return static_cast<int>(params->OODWgt() * MIN_PROB_COST);
-      } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
-        return static_cast<int>(params->NumWgt() * MIN_PROB_COST);
-      }
-    }
-    return 0;
-  }
-};
-}  // namespace tesseract
-
-#endif  // TESS_LANG_MOD_EDGE_H
--- a/cube/tess_lang_model.cpp
+++ b/cube/tess_lang_model.cpp
@ -1,506 +0,0 @@
-/**********************************************************************
- * File:        tess_lang_model.cpp
- * Description: Implementation of the Tesseract Language Model Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The TessLangModel class abstracts the Tesseract language model. It inherits
-// from the LangModel class. The Tesseract language model encompasses several
-// Dawgs (words from training data, punctuation, numbers, document words).
-// On top of this Cube adds an OOD state machine
-// The class provides methods to traverse the language model in a generative
-// fashion. Given any node in the DAWG, the language model can generate a list
-// of children (or fan-out) edges
-
-#include <string>
-#include <vector>
-
-#include "char_samp.h"
-#include "cube_utils.h"
-#include "dict.h"
-#include "tesseractclass.h"
-#include "tess_lang_model.h"
-#include "tessdatamanager.h"
-#include "unicharset.h"
-
-namespace tesseract {
-// max fan-out (used for preallocation). Initialized here, but modified by
-// constructor
-int TessLangModel::max_edge_ = 4096;
-
-// Language model extra State machines
-const Dawg *TessLangModel::ood_dawg_ = reinterpret_cast<Dawg *>(DAWG_OOD);
-const Dawg *TessLangModel::number_dawg_ = reinterpret_cast<Dawg *>(DAWG_NUMBER);
-
-// number state machine
-const int TessLangModel::num_state_machine_[kStateCnt][kNumLiteralCnt] = {
-  {0, 1, 1, NUM_TRM, NUM_TRM},
-  {NUM_TRM, 1, 1, 3, 2},
-  {NUM_TRM, NUM_TRM, 1, NUM_TRM, 2},
-  {NUM_TRM, NUM_TRM, 3, NUM_TRM, 2},
-};
-const int TessLangModel::num_max_repeat_[kStateCnt] = {3, 32, 8, 3};
-
-// thresholds and penalties
-int TessLangModel::max_ood_shape_cost_ = CubeUtils::Prob2Cost(1e-4);
-
-TessLangModel::TessLangModel(const string &lm_params,
-                             const string &data_file_path,
-                             bool load_system_dawg,
-                             TessdataManager *tessdata_manager,
-                             CubeRecoContext *cntxt) {
-  cntxt_ = cntxt;
-  has_case_ = cntxt_->HasCase();
-  // Load the rest of the language model elements from file
-  LoadLangModelElements(lm_params);
-  // Load word_dawgs_ if needed.
-  if (tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
-    word_dawgs_ = new DawgVector();
-    if (load_system_dawg &&
-        tessdata_manager->SeekToStart(TESSDATA_CUBE_SYSTEM_DAWG)) {
-      // The last parameter to the Dawg constructor (the debug level) is set to
-      // false, until Cube has a way to express its preferred debug level.
-      *word_dawgs_ +=  new SquishedDawg(tessdata_manager->GetDataFilePtr(),
-                                        DAWG_TYPE_WORD,
-                                        cntxt_->Lang().c_str(),
-                                        SYSTEM_DAWG_PERM, false);
-    }
-  } else {
-    word_dawgs_ = NULL;
-  }
-}
-
-// Cleanup an edge array
-void TessLangModel::FreeEdges(int edge_cnt, LangModEdge **edge_array) {
-  if (edge_array != NULL) {
-    for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) {
-      if (edge_array[edge_idx] != NULL) {
-        delete edge_array[edge_idx];
-      }
-    }
-    delete []edge_array;
-  }
-}
-
-// Determines if a sequence of 32-bit chars is valid in this language model
-// starting from the specified edge. If the eow_flag is ON, also checks for
-// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
-// edge
-bool TessLangModel::IsValidSequence(LangModEdge *edge,
-                                    const char_32 *sequence,
-                                    bool eow_flag,
-                                    LangModEdge **final_edge) {
-  // get the edges emerging from this edge
-  int edge_cnt = 0;
-  LangModEdge **edge_array = GetEdges(NULL, edge, &edge_cnt);
-
-  // find the 1st char in the sequence in the children
-  for (int edge_idx = 0; edge_idx < edge_cnt; edge_idx++) {
-    // found a match
-    if (sequence[0] == edge_array[edge_idx]->EdgeString()[0]) {
-      // if this is the last char
-      if (sequence[1] == 0) {
-        // succeed if we are in prefix mode or this is a terminal edge
-        if (eow_flag == false || edge_array[edge_idx]->IsEOW()) {
-          if (final_edge != NULL) {
-            (*final_edge) = edge_array[edge_idx];
-            edge_array[edge_idx] = NULL;
-          }
-
-          FreeEdges(edge_cnt, edge_array);
-          return true;
-        }
-      } else {
-        // not the last char continue checking
-        if (IsValidSequence(edge_array[edge_idx], sequence + 1, eow_flag,
-                            final_edge) == true) {
-          FreeEdges(edge_cnt, edge_array);
-          return true;
-        }
-      }
-    }
-  }
-
-  FreeEdges(edge_cnt, edge_array);
-  return false;
-}
-
-// Determines if a sequence of 32-bit chars is valid in this language model
-// starting from the root. If the eow_flag is ON, also checks for
-// a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
-// edge
-bool TessLangModel::IsValidSequence(const char_32 *sequence, bool eow_flag,
-                                    LangModEdge **final_edge) {
-  if (final_edge != NULL) {
-    (*final_edge) = NULL;
-  }
-
-  return IsValidSequence(NULL, sequence, eow_flag, final_edge);
-}
-
-bool TessLangModel::IsLeadingPunc(const char_32 ch) {
-  return lead_punc_.find(ch) != string::npos;
-}
-
-bool TessLangModel::IsTrailingPunc(const char_32 ch) {
-  return trail_punc_.find(ch) != string::npos;
-}
-
-bool TessLangModel::IsDigit(const char_32 ch) {
-  return digits_.find(ch) != string::npos;
-}
-
-// The general fan-out generation function. Returns the list of edges
-// fanning-out of the specified edge and their count. If an AltList is
-// specified, only the class-ids with a minimum cost are considered
-LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list,
-                                       LangModEdge *lang_mod_edge,
-                                       int *edge_cnt) {
-  TessLangModEdge *tess_lm_edge =
-      reinterpret_cast<TessLangModEdge *>(lang_mod_edge);
-  LangModEdge **edge_array = NULL;
-  (*edge_cnt) = 0;
-
-  // if we are starting from the root, we'll instantiate every DAWG
-  // and get the all the edges that emerge from the root
-  if (tess_lm_edge == NULL) {
-    // get DAWG count from Tesseract
-    int dawg_cnt = NumDawgs();
-    // preallocate the edge buffer
-    (*edge_cnt) = dawg_cnt * max_edge_;
-    edge_array = new LangModEdge *[(*edge_cnt)];
-
-    for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) {
-      const Dawg *curr_dawg = GetDawg(dawg_idx);
-      // Only look through word Dawgs (since there is a special way of
-      // handling numbers and punctuation).
-      if (curr_dawg->type() == DAWG_TYPE_WORD) {
-        (*edge_cnt) += FanOut(alt_list, curr_dawg, 0, 0, NULL, true,
-                              edge_array + (*edge_cnt));
-      }
-    }  // dawg
-
-    (*edge_cnt) += FanOut(alt_list, number_dawg_, 0, 0, NULL, true,
-                          edge_array + (*edge_cnt));
-
-    // OOD: it is intentionally not added to the list to make sure it comes
-    // at the end
-    (*edge_cnt) += FanOut(alt_list, ood_dawg_, 0, 0, NULL, true,
-                          edge_array + (*edge_cnt));
-
-    // set the root flag for all root edges
-    for (int edge_idx = 0; edge_idx < (*edge_cnt); edge_idx++) {
-      edge_array[edge_idx]->SetRoot(true);
-    }
-  } else {  // not starting at the root
-    // preallocate the edge buffer
-    (*edge_cnt) = max_edge_;
-    // allocate memory for edges
-    edge_array = new LangModEdge *[(*edge_cnt)];
-
-    // get the FanOut edges from the root of each dawg
-    (*edge_cnt) = FanOut(alt_list,
-                         tess_lm_edge->GetDawg(),
-                         tess_lm_edge->EndEdge(), tess_lm_edge->EdgeMask(),
-                         tess_lm_edge->EdgeString(), false, edge_array);
-  }
-  return edge_array;
-}
-
-// generate edges from an NULL terminated string
-// (used for punctuation, operators and digits)
-int TessLangModel::Edges(const char *strng, const Dawg *dawg,
-                         EDGE_REF edge_ref, EDGE_REF edge_mask,
-                         LangModEdge **edge_array) {
-  int edge_idx,
-    edge_cnt = 0;
-
-  for (edge_idx = 0; strng[edge_idx] != 0; edge_idx++) {
-    int class_id = cntxt_->CharacterSet()->ClassID((char_32)strng[edge_idx]);
-    if (class_id != INVALID_UNICHAR_ID) {
-      // create an edge object
-      edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, edge_ref,
-                                                 class_id);
-
-      reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
-          SetEdgeMask(edge_mask);
-      edge_cnt++;
-    }
-  }
-
-  return edge_cnt;
-}
-
-// generate OOD edges
-int TessLangModel::OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
-                            EDGE_REF edge_ref_mask, LangModEdge **edge_array) {
-  int class_cnt = cntxt_->CharacterSet()->ClassCount();
-  int edge_cnt = 0;
-  for (int class_id = 0; class_id < class_cnt; class_id++) {
-    // produce an OOD edge only if the cost of the char is low enough
-    if ((alt_list == NULL ||
-         alt_list->ClassCost(class_id) <= max_ood_shape_cost_)) {
-      // create an edge object
-      edge_array[edge_cnt] = new TessLangModEdge(cntxt_, class_id);
-      edge_cnt++;
-    }
-  }
-
-  return edge_cnt;
-}
-
-// computes and returns the edges that fan out of an edge ref
-int TessLangModel::FanOut(CharAltList *alt_list, const Dawg *dawg,
-                          EDGE_REF edge_ref, EDGE_REF edge_mask,
-                          const char_32 *str, bool root_flag,
-                          LangModEdge **edge_array) {
-  int edge_cnt = 0;
-  NODE_REF next_node = NO_EDGE;
-
-  // OOD
-  if (dawg == reinterpret_cast<Dawg *>(DAWG_OOD)) {
-    if (ood_enabled_ == true) {
-      return OODEdges(alt_list, edge_ref, edge_mask, edge_array);
-    } else {
-      return 0;
-    }
-  } else if (dawg == reinterpret_cast<Dawg *>(DAWG_NUMBER)) {
-    // Number
-    if (numeric_enabled_ == true) {
-      return NumberEdges(edge_ref, edge_array);
-    } else {
-      return 0;
-    }
-  } else if (IsTrailingPuncEdge(edge_mask)) {
-    // a TRAILING PUNC MASK, generate more trailing punctuation and return
-    if (punc_enabled_ == true) {
-      EDGE_REF trail_cnt = TrailingPuncCount(edge_mask);
-      return Edges(trail_punc_.c_str(), dawg, edge_ref,
-                   TrailingPuncEdgeMask(trail_cnt + 1), edge_array);
-    } else {
-      return 0;
-    }
-  } else if (root_flag == true || edge_ref == 0) {
-    // Root, generate leading punctuation and continue
-    if (root_flag) {
-      if (punc_enabled_ == true) {
-        edge_cnt += Edges(lead_punc_.c_str(), dawg, 0, LEAD_PUNC_EDGE_REF_MASK,
-                          edge_array);
-      }
-    }
-    next_node = 0;
-  } else {
-    // a node in the main trie
-    bool eow_flag = (dawg->end_of_word(edge_ref) != 0);
-
-    // for EOW
-    if (eow_flag == true) {
-      // generate trailing punctuation
-      if (punc_enabled_ == true) {
-        edge_cnt += Edges(trail_punc_.c_str(), dawg, edge_ref,
-                          TrailingPuncEdgeMask((EDGE_REF)1), edge_array);
-        // generate a hyphen and go back to the root
-        edge_cnt += Edges("-/", dawg, 0, 0, edge_array + edge_cnt);
-      }
-    }
-
-    // advance node
-    next_node = dawg->next_node(edge_ref);
-    if (next_node == 0 || next_node == NO_EDGE) {
-      return edge_cnt;
-    }
-  }
-
-  // now get all the emerging edges if word list is enabled
-  if (word_list_enabled_ == true && next_node != NO_EDGE) {
-    // create child edges
-    int child_edge_cnt =
-      TessLangModEdge::CreateChildren(cntxt_, dawg, next_node,
-                                      edge_array + edge_cnt);
-    int strt_cnt = edge_cnt;
-
-    // set the edge mask
-    for (int child = 0; child < child_edge_cnt; child++) {
-      reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt++])->
-          SetEdgeMask(edge_mask);
-    }
-
-    // if we are at the root, create upper case forms of these edges if possible
-    if (root_flag == true) {
-      for (int child = 0; child < child_edge_cnt; child++) {
-        TessLangModEdge *child_edge =
-            reinterpret_cast<TessLangModEdge *>(edge_array[strt_cnt + child]);
-
-        if (has_case_ == true) {
-          const char_32 *edge_str = child_edge->EdgeString();
-          if (edge_str != NULL && islower(edge_str[0]) != 0 &&
-              edge_str[1] == 0) {
-            int class_id =
-                cntxt_->CharacterSet()->ClassID(toupper(edge_str[0]));
-            if (class_id != INVALID_UNICHAR_ID) {
-              // generate an upper case edge for lower case chars
-              edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg,
-                  child_edge->StartEdge(), child_edge->EndEdge(), class_id);
-
-              reinterpret_cast<TessLangModEdge *>(edge_array[edge_cnt])->
-                    SetEdgeMask(edge_mask);
-              edge_cnt++;
-            }
-          }
-        }
-      }
-    }
-  }
-  return edge_cnt;
-}
-
-// Generate the edges fanning-out from an edge in the number state machine
-int TessLangModel::NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array) {
-  EDGE_REF new_state,
-    state;
-
-  inT64 repeat_cnt,
-    new_repeat_cnt;
-
-  state = ((edge_ref & NUMBER_STATE_MASK) >> NUMBER_STATE_SHIFT);
-  repeat_cnt = ((edge_ref & NUMBER_REPEAT_MASK) >> NUMBER_REPEAT_SHIFT);
-
-  if (state < 0 || state >= kStateCnt) {
-    return 0;
-  }
-
-  // go through all valid transitions from the state
-  int edge_cnt = 0;
-
-  EDGE_REF new_edge_ref;
-
-  for (int lit = 0; lit < kNumLiteralCnt; lit++) {
-    // move to the new state
-    new_state = num_state_machine_[state][lit];
-    if (new_state == NUM_TRM) {
-      continue;
-    }
-
-    if (new_state == state) {
-      new_repeat_cnt = repeat_cnt + 1;
-    } else {
-      new_repeat_cnt = 1;
-    }
-
-    // not allowed to repeat beyond this
-    if (new_repeat_cnt > num_max_repeat_[state]) {
-      continue;
-    }
-
-    new_edge_ref = (new_state << NUMBER_STATE_SHIFT) |
-        (lit << NUMBER_LITERAL_SHIFT) |
-        (new_repeat_cnt << NUMBER_REPEAT_SHIFT);
-
-    edge_cnt += Edges(literal_str_[lit]->c_str(), number_dawg_,
-                      new_edge_ref, 0, edge_array + edge_cnt);
-  }
-
-  return edge_cnt;
-}
-
-// Loads Language model elements from contents of the <lang>.cube.lm file
-bool TessLangModel::LoadLangModelElements(const string &lm_params) {
-  bool success = true;
-  // split into lines, each corresponding to a token type below
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(lm_params, "\r\n", &str_vec);
-  for (int entry = 0; entry < str_vec.size(); entry++) {
-    vector<string> tokens;
-    // should be only two tokens: type and value
-    CubeUtils::SplitStringUsing(str_vec[entry], "=", &tokens);
-    if (tokens.size() != 2)
-      success = false;
-    if (tokens[0] == "LeadPunc") {
-      lead_punc_ = tokens[1];
-    } else if (tokens[0] == "TrailPunc") {
-      trail_punc_ = tokens[1];
-    } else if (tokens[0] == "NumLeadPunc") {
-      num_lead_punc_ = tokens[1];
-    } else if (tokens[0] == "NumTrailPunc") {
-      num_trail_punc_ = tokens[1];
-    } else if (tokens[0] == "Operators") {
-      operators_ = tokens[1];
-    } else if (tokens[0] == "Digits") {
-      digits_ = tokens[1];
-    } else if (tokens[0] == "Alphas") {
-      alphas_ = tokens[1];
-    } else {
-      success = false;
-    }
-  }
-
-  RemoveInvalidCharacters(&num_lead_punc_);
-  RemoveInvalidCharacters(&num_trail_punc_);
-  RemoveInvalidCharacters(&digits_);
-  RemoveInvalidCharacters(&operators_);
-  RemoveInvalidCharacters(&alphas_);
-
-  // form the array of literal strings needed for number state machine
-  // It is essential that the literal strings go in the order below
-  literal_str_[0] = &num_lead_punc_;
-  literal_str_[1] = &num_trail_punc_;
-  literal_str_[2] = &digits_;
-  literal_str_[3] = &operators_;
-  literal_str_[4] = &alphas_;
-
-  return success;
-}
-
-void TessLangModel::RemoveInvalidCharacters(string *lm_str) {
-  CharSet *char_set = cntxt_->CharacterSet();
-  tesseract::string_32 lm_str32;
-  CubeUtils::UTF8ToUTF32(lm_str->c_str(), &lm_str32);
-
-  int len = CubeUtils::StrLen(lm_str32.c_str());
-  char_32 *clean_str32 = new char_32[len + 1];
-  int clean_len = 0;
-  for (int i = 0; i < len; ++i) {
-    int class_id = char_set->ClassID((char_32)lm_str32[i]);
-    if (class_id != INVALID_UNICHAR_ID) {
-      clean_str32[clean_len] = lm_str32[i];
-      ++clean_len;
-    }
-  }
-  clean_str32[clean_len] = 0;
-  if (clean_len < len) {
-    lm_str->clear();
-    CubeUtils::UTF32ToUTF8(clean_str32, lm_str);
-  }
-  delete [] clean_str32;
-}
-
-int TessLangModel::NumDawgs() const {
-  return (word_dawgs_ != NULL) ?
-      word_dawgs_->size() : cntxt_->TesseractObject()->getDict().NumDawgs();
-}
-
-// Returns the dawgs with the given index from either the dawgs
-// stored by the Tesseract object, or the word_dawgs_.
-const Dawg *TessLangModel::GetDawg(int index) const {
-  if (word_dawgs_ != NULL) {
-    ASSERT_HOST(index < word_dawgs_->size());
-    return (*word_dawgs_)[index];
-  } else {
-    ASSERT_HOST(index < cntxt_->TesseractObject()->getDict().NumDawgs());
-    return cntxt_->TesseractObject()->getDict().GetDawg(index);
-  }
-}
-}
--- a/cube/tess_lang_model.h
+++ b/cube/tess_lang_model.h
@ -1,142 +0,0 @@
-/**********************************************************************
- * File:        tess_lang_model.h
- * Description: Declaration of the Tesseract Language Model Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef TESS_LANG_MODEL_H
-#define TESS_LANG_MODEL_H
-
-#include <string>
-
-#include "char_altlist.h"
-#include "cube_reco_context.h"
-#include "cube_tuning_params.h"
-#include "dict.h"
-#include "lang_model.h"
-#include "tessdatamanager.h"
-#include "tess_lang_mod_edge.h"
-
-namespace tesseract {
-
-const int kStateCnt = 4;
-const int kNumLiteralCnt = 5;
-
-class TessLangModel : public LangModel {
- public:
-  TessLangModel(const string &lm_params,
-                const string &data_file_path,
-                bool load_system_dawg,
-                TessdataManager *tessdata_manager,
-                CubeRecoContext *cntxt);
-  ~TessLangModel() {
-    if (word_dawgs_ != NULL) {
-      word_dawgs_->delete_data_pointers();
-      delete word_dawgs_;
-    }
-  }
-
-  // returns a pointer to the root of the language model
-  inline TessLangModEdge *Root() {
-    return NULL;
-  }
-
-  // The general fan-out generation function. Returns the list of edges
-  // fanning-out of the specified edge and their count. If an AltList is
-  // specified, only the class-ids with a minimum cost are considered
-  LangModEdge **GetEdges(CharAltList *alt_list,
-                         LangModEdge *edge,
-                         int *edge_cnt);
-  // Determines if a sequence of 32-bit chars is valid in this language model
-  // starting from the root. If the eow_flag is ON, also checks for
-  // a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
-  // edge
-  bool IsValidSequence(const char_32 *sequence, bool eow_flag,
-                       LangModEdge **final_edge = NULL);
-  bool IsLeadingPunc(char_32 ch);
-  bool IsTrailingPunc(char_32 ch);
-  bool IsDigit(char_32 ch);
-
-  void RemoveInvalidCharacters(string *lm_str);
- private:
-  // static LM state machines
-  static const Dawg *ood_dawg_;
-  static const Dawg *number_dawg_;
-  static const int num_state_machine_[kStateCnt][kNumLiteralCnt];
-  static const int num_max_repeat_[kStateCnt];
-  // word_dawgs_ should only be loaded if cube has its own version of the
-  // unicharset (different from the one used by tesseract) and therefore
-  // can not use the dawgs loaded for tesseract (since the unichar ids
-  // encoded in the dawgs differ).
-  DawgVector *word_dawgs_;
-
-  static int max_edge_;
-  static int max_ood_shape_cost_;
-
-  // remaining language model elements needed by cube. These get loaded from
-  // the .lm file
-  string lead_punc_;
-  string trail_punc_;
-  string num_lead_punc_;
-  string num_trail_punc_;
-  string operators_;
-  string digits_;
-  string alphas_;
-  // String of characters in RHS of each line of <lang>.cube.lm
-  // Each element is hard-coded to correspond to a specific token type
-  // (see LoadLangModelElements)
-  string *literal_str_[kNumLiteralCnt];
-  // Recognition context needed to access language properties
-  // (case, cursive,..)
-  CubeRecoContext *cntxt_;
-  bool has_case_;
-
-  // computes and returns the edges that fan out of an edge ref
-  int FanOut(CharAltList *alt_list,
-             const Dawg *dawg, EDGE_REF edge_ref, EDGE_REF edge_ref_mask,
-             const char_32 *str, bool root_flag, LangModEdge **edge_array);
-  // generate edges from an NULL terminated string
-  // (used for punctuation, operators and digits)
-  int Edges(const char *strng, const Dawg *dawg,
-            EDGE_REF edge_ref, EDGE_REF edge_ref_mask,
-            LangModEdge **edge_array);
-  // Generate the edges fanning-out from an edge in the number state machine
-  int NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array);
-  // Generate OOD edges
-  int OODEdges(CharAltList *alt_list, EDGE_REF edge_ref,
-               EDGE_REF edge_ref_mask, LangModEdge **edge_array);
-  // Cleanup an edge array
-  void FreeEdges(int edge_cnt, LangModEdge **edge_array);
-  // Determines if a sequence of 32-bit chars is valid in this language model
-  // starting from the specified edge. If the eow_flag is ON, also checks for
-  // a valid EndOfWord. If final_edge is not NULL, returns a pointer to the last
-  // edge
-  bool IsValidSequence(LangModEdge *edge, const char_32 *sequence,
-                       bool eow_flag, LangModEdge **final_edge);
-  // Parse language model elements from the given string, which should
-  // have been loaded from <lang>.cube.lm file, e.g. in CubeRecoContext
-  bool LoadLangModelElements(const string &lm_params);
-
-  // Returns the number of word Dawgs in the language model.
-  int NumDawgs() const;
-
-  // Returns the dawgs with the given index from either the dawgs
-  // stored by the Tesseract object, or the word_dawgs_.
-  const Dawg *GetDawg(int index) const;
-};
-}  // tesseract
-
-#endif  // TESS_LANG_MODEL_H
--- a/cube/tuning_params.h
+++ b/cube/tuning_params.h
@ -1,129 +0,0 @@
-/**********************************************************************
- * File:        tuning_params.h
- * Description: Declaration of the Tuning Parameters Base Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The TuningParams class abstracts all the parameters that can be learned or
-// tuned during the training process. It is a base class that all TuningParams
-// classes should inherit from.
-
-#ifndef TUNING_PARAMS_H
-#define TUNING_PARAMS_H
-
-#include <string>
-#ifdef USE_STD_NAMESPACE
-using std::string;
-#endif
-
-namespace tesseract {
-class TuningParams {
- public:
-  enum type_classifer {
-    NN,
-    HYBRID_NN
-  };
-  enum type_feature {
-    BMP,
-    CHEBYSHEV,
-    HYBRID
-  };
-
-  TuningParams() {}
-  virtual ~TuningParams() {}
-  // Accessor functions
-  inline double RecoWgt() const { return reco_wgt_; }
-  inline double SizeWgt() const { return size_wgt_; }
-  inline double CharBigramWgt() const { return char_bigrams_wgt_; }
-  inline double WordUnigramWgt() const { return word_unigrams_wgt_; }
-  inline int MaxSegPerChar() const { return max_seg_per_char_; }
-  inline int BeamWidth() const { return beam_width_; }
-  inline int TypeClassifier() const { return tp_classifier_; }
-  inline int TypeFeature() const { return tp_feat_; }
-  inline int ConvGridSize() const { return conv_grid_size_; }
-  inline int HistWindWid() const { return hist_wind_wid_; }
-  inline int MinConCompSize() const { return min_con_comp_size_; }
-  inline double MaxWordAspectRatio() const { return max_word_aspect_ratio_; }
-  inline double MinSpaceHeightRatio() const { return min_space_height_ratio_; }
-  inline double MaxSpaceHeightRatio() const { return max_space_height_ratio_; }
-  inline double CombinerRunThresh() const { return combiner_run_thresh_; }
-  inline double CombinerClassifierThresh() const {
-    return combiner_classifier_thresh_; }
-
-  inline void SetRecoWgt(double wgt) { reco_wgt_ = wgt; }
-  inline void SetSizeWgt(double wgt) { size_wgt_ = wgt; }
-  inline void SetCharBigramWgt(double wgt) { char_bigrams_wgt_ = wgt; }
-  inline void SetWordUnigramWgt(double wgt) { word_unigrams_wgt_ = wgt; }
-  inline void SetMaxSegPerChar(int max_seg_per_char) {
-    max_seg_per_char_ = max_seg_per_char;
-  }
-  inline void SetBeamWidth(int beam_width) { beam_width_ = beam_width; }
-  inline void SetTypeClassifier(type_classifer tp_classifier) {
-    tp_classifier_ = tp_classifier;
-  }
-  inline void SetTypeFeature(type_feature tp_feat) {tp_feat_ = tp_feat;}
-  inline void SetHistWindWid(int hist_wind_wid) {
-    hist_wind_wid_ = hist_wind_wid;
-  }
-
-  virtual bool Save(string file_name) = 0;
-  virtual bool Load(string file_name) = 0;
-
- protected:
-  // weight of recognition cost. This includes the language model cost
-  double reco_wgt_;
-  // weight of size cost
-  double size_wgt_;
-  // weight of character bigrams cost
-  double char_bigrams_wgt_;
-  // weight of word unigrams cost
-  double word_unigrams_wgt_;
-  // Maximum number of segments per character
-  int max_seg_per_char_;
-  // Beam width equal to the maximum number of nodes kept in the beam search
-  // trellis column after pruning
-  int beam_width_;
-  // Classifier type: See enum type_classifer for classifier types
-  type_classifer tp_classifier_;
-  // Feature types: See enum type_feature for feature types
-  type_feature   tp_feat_;
-  // Grid size to scale a grapheme bitmap used by the BMP feature type
-  int conv_grid_size_;
-  // Histogram window size as a ratio of the word height used in computing
-  // the vertical pixel density histogram in the segmentation algorithm
-  int hist_wind_wid_;
-  // Minimum possible size of a connected component
-  int min_con_comp_size_;
-  // Maximum aspect ratio of a word (width / height)
-  double max_word_aspect_ratio_;
-  // Minimum ratio relative to the line height of a gap to be considered as
-  // a word break
-  double min_space_height_ratio_;
-  // Maximum ratio relative to the line height of a gap to be considered as
-  // a definite word break
-  double max_space_height_ratio_;
-  // When Cube and Tesseract are run in combined mode, only run
-  // combiner classifier when tesseract confidence is below this
-  // threshold. When Cube is run without Tesseract, this is ignored.
-  double combiner_run_thresh_;
-  // When Cube and tesseract are run in combined mode, threshold on
-  // output of combiner binary classifier (chosen from ROC during
-  // combiner training). When Cube is run without Tesseract, this is ignored.
-  double combiner_classifier_thresh_;
-};
-}
-
-#endif  // TUNING_PARAMS_H
--- a/cube/word_altlist.cpp
+++ b/cube/word_altlist.cpp
@ -1,117 +0,0 @@
-/**********************************************************************
- * File:        word_altlist.cpp
- * Description: Implementation of the Word Alternate List Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "word_altlist.h"
-
-namespace tesseract {
-WordAltList::WordAltList(int max_alt)
-    : AltList(max_alt) {
-  word_alt_ = NULL;
-}
-
-WordAltList::~WordAltList() {
-  if (word_alt_ != NULL) {
-    for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
-      if (word_alt_[alt_idx] != NULL) {
-        delete []word_alt_[alt_idx];
-      }
-    }
-    delete []word_alt_;
-    word_alt_ = NULL;
-  }
-}
-
-/**
- * insert an alternate word with the specified cost and tag
- */
-bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
-  if (word_alt_ == NULL || alt_cost_ == NULL) {
-    word_alt_ = new char_32*[max_alt_];
-    alt_cost_ = new int[max_alt_];
-    alt_tag_ = new void *[max_alt_];
-    memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
-  } else {
-    // check if alt already exists
-    for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
-      if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
-        // update the cost if we have a lower one
-        if (cost < alt_cost_[alt_idx]) {
-          alt_cost_[alt_idx] = cost;
-          alt_tag_[alt_idx] = tag;
-        }
-        return true;
-      }
-    }
-  }
-
-  // determine length of alternate
-  int len = CubeUtils::StrLen(word_str);
-
-  word_alt_[alt_cnt_] = new char_32[len + 1];
-
-  if (len > 0) {
-    memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
-  }
-
-  word_alt_[alt_cnt_][len] = 0;
-  alt_cost_[alt_cnt_] = cost;
-  alt_tag_[alt_cnt_] = tag;
-
-  alt_cnt_++;
-
-  return true;
-}
-
-/**
- * sort the alternate in descending order based on the cost
- */
-void WordAltList::Sort() {
-  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
-    for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
-      if (alt_cost_[alt_idx] > alt_cost_[alt]) {
-        char_32 *pchTemp = word_alt_[alt_idx];
-        word_alt_[alt_idx] = word_alt_[alt];
-        word_alt_[alt] = pchTemp;
-
-        int temp = alt_cost_[alt_idx];
-        alt_cost_[alt_idx] = alt_cost_[alt];
-        alt_cost_[alt] = temp;
-
-        void *tag = alt_tag_[alt_idx];
-        alt_tag_[alt_idx] = alt_tag_[alt];
-        alt_tag_[alt] = tag;
-      }
-    }
-  }
-}
-
-void WordAltList::PrintDebug() {
-  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
-    char_32 *word_32 = word_alt_[alt_idx];
-    string word_str;
-    CubeUtils::UTF32ToUTF8(word_32, &word_str);
-    int num_unichars = CubeUtils::StrLen(word_32);
-    fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
-            word_str.c_str(), alt_cost_[alt_idx], num_unichars);
-    for (int i = 0; i < num_unichars; ++i)
-      fprintf(stderr, "%d ", word_32[i]);
-    fprintf(stderr, "\n");
-  }
-}
-}  // namespace tesseract
--- a/cube/word_altlist.h
+++ b/cube/word_altlist.h
@ -1,50 +0,0 @@
-/**********************************************************************
- * File:        word_altlist.h
- * Description: Declaration of the Word Alternate List Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The WordAltList abstracts a alternate list of words and their corresponding
-// costs that result from the word recognition process. The class inherits
-// from the AltList class
-// It provides methods to add a new word alternate, its corresponding score and
-// a tag.
-
-#ifndef WORD_ALT_LIST_H
-#define WORD_ALT_LIST_H
-
-#include "altlist.h"
-
-namespace tesseract {
-class WordAltList : public AltList {
- public:
-  explicit WordAltList(int max_alt);
-  ~WordAltList();
-  // Sort the list of alternates based on cost
-  void Sort();
-  // insert an alternate word with the specified cost and tag
-  bool Insert(char_32 *char_ptr, int cost, void *tag = NULL);
-  // returns the alternate string at the specified position
-  inline char_32 * Alt(int alt_idx) { return word_alt_[alt_idx]; }
-  // print each entry of the altlist, both UTF8 and unichar ids, and
-  // their costs, to stderr
-  void PrintDebug();
- private:
-  char_32 **word_alt_;
-};
-}  // namespace tesseract
-
-#endif  // WORD_ALT_LIST_H
--- a/cube/word_list_lang_model.cpp
+++ b/cube/word_list_lang_model.cpp
@ -1,199 +0,0 @@
-/**********************************************************************
- * File:        word_list_lang_model.cpp
- * Description: Implementation of the Word List Language Model Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <string>
-#include <vector>
-#include "word_list_lang_model.h"
-#include "cube_utils.h"
-
-#include "ratngs.h"
-#include "trie.h"
-
-namespace tesseract {
-WordListLangModel::WordListLangModel(CubeRecoContext *cntxt) {
-  cntxt_ = cntxt;
-  dawg_ = NULL;
-  init_ = false;
-}
-
-WordListLangModel::~WordListLangModel() {
-  Cleanup();
-}
-
-// Cleanup
-void WordListLangModel::Cleanup() {
-  if (dawg_ != NULL) {
-    delete dawg_;
-    dawg_ = NULL;
-  }
-  init_ = false;
-}
-
-// Initialize the language model
-bool WordListLangModel::Init() {
-  if (init_ == true) {
-    return true;
-  }
-  // The last parameter to the Trie constructor (the debug level) is set to
-  // false for now, until Cube has a way to express its preferred debug level.
-  dawg_ = new Trie(DAWG_TYPE_WORD, "", NO_PERM,
-                   cntxt_->CharacterSet()->ClassCount(), false);
-  init_ = true;
-  return true;
-}
-
-// return a pointer to the root
-LangModEdge * WordListLangModel::Root() {
-  return NULL;
-}
-
-// return the edges emerging from the current state
-LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list,
-                                          LangModEdge *edge,
-                                          int *edge_cnt) {
-  // initialize if necessary
-  if (init_ == false) {
-    if (Init() == false) {
-      return NULL;
-    }
-  }
-
-  (*edge_cnt) = 0;
-
-  EDGE_REF edge_ref;
-
-  TessLangModEdge *tess_lm_edge = reinterpret_cast<TessLangModEdge *>(edge);
-
-  if (tess_lm_edge == NULL) {
-    edge_ref = 0;
-  } else {
-    edge_ref = tess_lm_edge->EndEdge();
-
-    // advance node
-    edge_ref = dawg_->next_node(edge_ref);
-    if (edge_ref == 0) {
-      return NULL;
-    }
-  }
-
-  // allocate memory for edges
-  LangModEdge **edge_array = new LangModEdge *[kMaxEdge];
-
-  // now get all the emerging edges
-  (*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref,
-                                                 edge_array + (*edge_cnt));
-
-  return edge_array;
-}
-
-// returns true if the char_32 is supported by the language model
-// TODO(ahmadab) currently not implemented
-bool WordListLangModel::IsValidSequence(const char_32 *sequence,
-                                        bool terminal, LangModEdge **edges) {
-  return false;
-}
-
-// Recursive helper function for WordVariants().
-void WordListLangModel::WordVariants(const CharSet &char_set,
-                                     string_32 prefix_str32,
-                                     WERD_CHOICE *word_so_far,
-                                     string_32 str32,
-                                     vector<WERD_CHOICE *> *word_variants) {
-  int str_len = str32.length();
-  if (str_len == 0) {
-    if (word_so_far->length() > 0) {
-      word_variants->push_back(new WERD_CHOICE(*word_so_far));
-    }
-  } else {
-    // Try out all the possible prefixes of the str32.
-    for (int len = 1; len <= str_len; len++) {
-      // Check if prefix is supported in character set.
-      string_32 str_pref32 = str32.substr(0, len);
-      int class_id = char_set.ClassID(reinterpret_cast<const char_32 *>(
-          str_pref32.c_str()));
-      if (class_id <= 0) {
-        continue;
-      } else {
-        string_32 new_prefix_str32 = prefix_str32 + str_pref32;
-        string_32 new_str32 = str32.substr(len);
-        word_so_far->append_unichar_id(class_id, 1, 0.0, 0.0);
-        WordVariants(char_set, new_prefix_str32, word_so_far, new_str32,
-                     word_variants);
-        word_so_far->remove_last_unichar_id();
-      }
-    }
-  }
-}
-
-// Compute all the variants of a 32-bit string in terms of the class-ids
-// This is needed for languages that have ligatures. A word can then have more
-// than one spelling in terms of the class-ids
-void WordListLangModel::WordVariants(const CharSet &char_set,
-                                     const UNICHARSET *uchset, string_32 str32,
-                                     vector<WERD_CHOICE *> *word_variants) {
-  for (int i = 0; i < word_variants->size(); i++) {
-    delete (*word_variants)[i];
-  }
-  word_variants->clear();
-  string_32 prefix_str32;
-  WERD_CHOICE word_so_far(uchset);
-  WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
-}
-
-// add a new UTF-8 string to the lang model
-bool WordListLangModel::AddString(const char *char_ptr) {
-  if (!init_ && !Init()) {  // initialize if necessary
-    return false;
-  }
-
-  string_32 str32;
-  CubeUtils::UTF8ToUTF32(char_ptr, &str32);
-  if (str32.length() < 1) {
-    return false;
-  }
-  return AddString32(str32.c_str());
-}
-
-// add a new UTF-32 string to the lang model
-bool WordListLangModel::AddString32(const char_32 *char_32_ptr) {
-  if (char_32_ptr == NULL) {
-    return false;
-  }
-  // get all the word variants
-  vector<WERD_CHOICE *> word_variants;
-  WordVariants(*(cntxt_->CharacterSet()), cntxt_->TessUnicharset(),
-               char_32_ptr, &word_variants);
-
-  if (word_variants.size() > 0) {
-    // find the shortest variant
-    int shortest_word = 0;
-    for (int word = 1; word < word_variants.size(); word++) {
-      if (word_variants[shortest_word]->length() >
-          word_variants[word]->length()) {
-        shortest_word = word;
-      }
-    }
-    // only add the shortest grapheme interpretation of string to the word list
-    dawg_->add_word_to_dawg(*word_variants[shortest_word]);
-  }
-  for (int i = 0; i < word_variants.size(); i++) { delete word_variants[i]; }
-  return true;
-}
-
-}
--- a/cube/word_list_lang_model.h
+++ b/cube/word_list_lang_model.h
@ -1,89 +0,0 @@
-/**********************************************************************
- * File:        word_list_lang_model.h
- * Description: Declaration of the Word List Language Model Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The WordListLangModel class abstracts a language model that is based on
-// a list of words. It inherits from the LangModel abstract class
-// Besides providing the methods inherited from the LangModel abstract class,
-// the class provided methods to add new strings to the Language Model:
-// AddString & AddString32
-
-#ifndef WORD_LIST_LANG_MODEL_H
-#define WORD_LIST_LANG_MODEL_H
-
-#include <vector>
-
-#include "cube_reco_context.h"
-#include "lang_model.h"
-#include "tess_lang_mod_edge.h"
-
-namespace tesseract {
-
-class Trie;
-
-class WordListLangModel : public LangModel {
- public:
-  explicit WordListLangModel(CubeRecoContext *cntxt);
-  ~WordListLangModel();
-  // Returns an edge pointer to the Root
-  LangModEdge *Root();
-  // Returns the edges that fan-out of the specified edge and their count
-  LangModEdge **GetEdges(CharAltList *alt_list,
-                         LangModEdge *edge,
-                         int *edge_cnt);
-  // Returns is a sequence of 32-bit characters are valid within this language
-  // model or net. And EndOfWord flag is specified. If true, the sequence has
-  // to end on a valid word. The function also optionally returns the list
-  // of language model edges traversed to parse the string
-  bool IsValidSequence(const char_32 *sequence,
-                       bool eow_flag,
-                       LangModEdge **edges);
-  bool IsLeadingPunc(char_32 ch) { return false; }  // not yet implemented
-  bool IsTrailingPunc(char_32 ch) { return false; }  // not yet implemented
-  bool IsDigit(char_32 ch) { return false; }  // not yet implemented
-  // Adds a new UTF-8 string to the language model
-  bool AddString(const char *char_ptr);
-  // Adds a new UTF-32 string to the language model
-  bool AddString32(const char_32 *char_32_ptr);
-  // Compute all the variants of a 32-bit string in terms of the class-ids.
-  // This is needed for languages that have ligatures. A word can then have
-  // more than one spelling in terms of the class-ids.
-  static void WordVariants(const CharSet &char_set, const UNICHARSET *uchset,
-                           string_32 str32,
-                           vector<WERD_CHOICE *> *word_variants);
- private:
-  // constants needed to configure the language model
-  static const int kMaxEdge = 512;
-
-  CubeRecoContext *cntxt_;
-  Trie *dawg_;
-  bool init_;
-  // Initialize the language model
-  bool Init();
-  // Cleanup
-  void Cleanup();
-  // Recursive helper function for WordVariants().
-  static void WordVariants(
-      const CharSet &char_set,
-      string_32 prefix_str32, WERD_CHOICE *word_so_far,
-      string_32 str32,
-      vector<WERD_CHOICE *> *word_variants);
-};
-}  // tesseract
-
-#endif  // WORD_LIST_LANG_MODEL_H
--- a/cube/word_size_model.cpp
+++ b/cube/word_size_model.cpp
@ -1,286 +0,0 @@
-/**********************************************************************
- * File:        word_size_model.cpp
- * Description: Implementation of the Word Size Model Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <math.h>
-#include <string>
-#include <vector>
-#include "word_size_model.h"
-#include "cube_utils.h"
-
-namespace tesseract {
-
-WordSizeModel::WordSizeModel(CharSet * char_set, bool contextual) {
-  char_set_ = char_set;
-  contextual_ = contextual;
-}
-
-WordSizeModel::~WordSizeModel() {
-  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
-    FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
-    delete []fnt_info.pair_size_info[0];
-    delete []fnt_info.pair_size_info;
-  }
-}
-
-WordSizeModel *WordSizeModel::Create(const string &data_file_path,
-                                     const string &lang,
-                                     CharSet *char_set,
-                                     bool contextual) {
-  WordSizeModel *obj = new WordSizeModel(char_set, contextual);
-
-  if (!obj->Init(data_file_path, lang)) {
-    delete obj;
-    return NULL;
-  }
-  return obj;
-}
-
-bool WordSizeModel::Init(const string &data_file_path, const string &lang) {
-  string stats_file_name;
-  stats_file_name = data_file_path + lang;
-  stats_file_name += ".cube.size";
-
-  // read file to memory
-  string str_data;
-
-  if (!CubeUtils::ReadFileToString(stats_file_name, &str_data)) {
-    return false;
-  }
-
-  // split to words
-  vector<string> tokens;
-  CubeUtils::SplitStringUsing(str_data, "\t\r\n", &tokens);
-  if (tokens.size() < 1) {
-    fprintf(stderr, "Cube ERROR (WordSizeModel::Init): invalid "
-            "file contents: %s\n", stats_file_name.c_str());
-    return false;
-  }
-
-  font_pair_size_models_.clear();
-
-  // token count per line depends on whether the language is contextual or not
-  int token_cnt = contextual_ ?
-      (kExpectedTokenCount + 4) : kExpectedTokenCount;
-  // the count of size classes depends on whether the language is contextual
-  // or not. For non contextual languages (Ex: Eng), it is equal to the class
-  // count. For contextual languages (Ex: Ara), it is equal to the class count
-  // multiplied by the position count (4: start, middle, final, isolated)
-  int size_class_cnt = contextual_ ?
-      (char_set_->ClassCount() * 4) : char_set_->ClassCount();
-  string fnt_name = "";
-
-  for (int tok = 0; tok < tokens.size(); tok += token_cnt) {
-    // a new font, write the old font data and re-init
-    if (tok == 0 || fnt_name != tokens[tok]) {
-      FontPairSizeInfo fnt_info;
-
-      fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt];
-
-      fnt_info.pair_size_info[0] =
-          new PairSizeInfo[size_class_cnt * size_class_cnt];
-
-      memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
-             sizeof(PairSizeInfo));
-
-      for (int cls = 1; cls < size_class_cnt; cls++) {
-        fnt_info.pair_size_info[cls] =
-            fnt_info.pair_size_info[cls - 1] + size_class_cnt;
-      }
-
-      // strip out path and extension
-      string stripped_font_name = tokens[tok].substr(0, tokens[tok].find('.'));
-      string::size_type strt_pos = stripped_font_name.find_last_of("/\\");
-      if (strt_pos != string::npos) {
-        fnt_info.font_name = stripped_font_name.substr(strt_pos);
-      } else {
-        fnt_info.font_name = stripped_font_name;
-      }
-      font_pair_size_models_.push_back(fnt_info);
-    }
-
-    // parse the data
-    int cls_0;
-    int cls_1;
-    double delta_top;
-    double wid_0;
-    double hgt_0;
-    double wid_1;
-    double hgt_1;
-    int size_code_0;
-    int size_code_1;
-
-    // read and parse the tokens
-    if (contextual_) {
-      int start_0;
-      int end_0;
-      int start_1;
-      int end_1;
-      // The expected format for a character size bigram is as follows:
-      // ClassId0<delim>Start-flag0<delim>End-flag0<delim>String0(ignored)
-      // Width0<delim>Height0<delim>
-      // ClassId1<delim>Start-flag1<delim>End-flag1<delim>String1(ignored)
-      // HeightDelta<delim>Width1<delim>Height0<delim>
-      // In case of non-contextual languages, the Start and End flags are
-      // omitted
-      if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
-          sscanf(tokens[tok + 2].c_str(), "%d", &start_0) != 1 ||
-          sscanf(tokens[tok + 3].c_str(), "%d", &end_0) != 1 ||
-          sscanf(tokens[tok + 5].c_str(), "%lf", &wid_0) != 1 ||
-          sscanf(tokens[tok + 6].c_str(), "%lf", &hgt_0) != 1 ||
-          sscanf(tokens[tok + 7].c_str(), "%d", &cls_1) != 1 ||
-          sscanf(tokens[tok + 8].c_str(), "%d", &start_1) != 1 ||
-          sscanf(tokens[tok + 9].c_str(), "%d", &end_1) != 1 ||
-          sscanf(tokens[tok + 11].c_str(), "%lf", &delta_top) != 1 ||
-          sscanf(tokens[tok + 12].c_str(), "%lf", &wid_1) != 1 ||
-          sscanf(tokens[tok + 13].c_str(), "%lf", &hgt_1) != 1 ||
-          (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
-          (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
-        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
-                "line %d\n", 1 + (tok / token_cnt));
-        return false;
-      }
-      size_code_0 = SizeCode(cls_0, start_0, end_0);
-      size_code_1 = SizeCode(cls_1, start_1, end_1);
-    } else {
-      if (sscanf(tokens[tok + 1].c_str(), "%d", &cls_0) != 1 ||
-          sscanf(tokens[tok + 3].c_str(), "%lf", &wid_0) != 1 ||
-          sscanf(tokens[tok + 4].c_str(), "%lf", &hgt_0) != 1 ||
-          sscanf(tokens[tok + 5].c_str(), "%d", &cls_1) != 1 ||
-          sscanf(tokens[tok + 7].c_str(), "%lf", &delta_top) != 1 ||
-          sscanf(tokens[tok + 8].c_str(), "%lf", &wid_1) != 1 ||
-          sscanf(tokens[tok + 9].c_str(), "%lf", &hgt_1) != 1) {
-        fprintf(stderr, "Cube ERROR (WordSizeModel::Init): bad format at "
-                "line %d\n", 1 + (tok / token_cnt));
-        return false;
-      }
-      size_code_0 = cls_0;
-      size_code_1 = cls_1;
-    }
-
-    // copy the data to the size tables
-    FontPairSizeInfo fnt_info = font_pair_size_models_.back();
-    fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
-        static_cast<int>(delta_top * kShapeModelScale);
-    fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
-        static_cast<int>(wid_0 * kShapeModelScale);
-    fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
-        static_cast<int>(hgt_0 * kShapeModelScale);
-    fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
-        static_cast<int>(wid_1 * kShapeModelScale);
-    fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
-        static_cast<int>(hgt_1 * kShapeModelScale);
-
-    fnt_name = tokens[tok];
-  }
-
-  return true;
-}
-
-int WordSizeModel::Cost(CharSamp **samp_array, int samp_cnt) const {
-  if (samp_cnt < 2) {
-    return 0;
-  }
-  double best_dist = static_cast<double>(WORST_COST);
-  int best_fnt = -1;
-  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
-    const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
-    double mean_dist = 0;
-    int pair_cnt = 0;
-
-    for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
-      int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
-      if (cls_0 < 1) {
-        continue;
-      }
-      // compute size code for samp 0 based on class id and position
-      int size_code_0;
-      if (contextual_) {
-        size_code_0 = SizeCode(cls_0,
-                               samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
-                               samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
-      } else {
-        size_code_0 = cls_0;
-      }
-
-      int char0_height = samp_array[smp_0]->Height();
-      int char0_width = samp_array[smp_0]->Width();
-      int char0_top = samp_array[smp_0]->Top();
-
-      for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
-        int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
-        if (cls_1 < 1) {
-          continue;
-        }
-        // compute size code for samp 0 based on class id and position
-        int size_code_1;
-        if (contextual_) {
-          size_code_1 = SizeCode(cls_1,
-                                 samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
-                                 samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
-        } else {
-          size_code_1 = cls_1;
-        }
-        double dist = PairCost(
-            char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
-            samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
-            fnt_info->pair_size_info[size_code_0][size_code_1]);
-        if (dist > 0) {
-          mean_dist += dist;
-          pair_cnt++;
-        }
-      }  // smp_1
-    }  // smp_0
-    if (pair_cnt == 0) {
-      continue;
-    }
-    mean_dist /= pair_cnt;
-    if (best_fnt == -1 || mean_dist < best_dist) {
-      best_dist = mean_dist;
-      best_fnt = fnt;
-    }
-  }
-  if (best_fnt == -1) {
-    return static_cast<int>(WORST_COST);
-  } else {
-    return static_cast<int>(best_dist);
-  }
-}
-
-double WordSizeModel::PairCost(int width_0, int height_0, int top_0,
-                               int width_1, int height_1, int top_1,
-                               const PairSizeInfo& pair_info) {
-  double scale_factor = static_cast<double>(pair_info.hgt_0) /
-      static_cast<double>(height_0);
-  double dist = 0.0;
-  if (scale_factor > 0) {
-    double norm_width_0 = width_0 * scale_factor;
-    double norm_width_1 = width_1 * scale_factor;
-    double norm_height_1 = height_1 * scale_factor;
-    double norm_delta_top = (top_1 - top_0) * scale_factor;
-
-    // accumulate the distance between the model character and the
-    // predicted one on all dimensions of the pair
-    dist += fabs(pair_info.wid_0 - norm_width_0);
-    dist += fabs(pair_info.wid_1 - norm_width_1);
-    dist += fabs(pair_info.hgt_1 - norm_height_1);
-    dist += fabs(pair_info.delta_top - norm_delta_top);
-  }
-  return dist;
-}
-}  // namespace tesseract
--- a/cube/word_size_model.h
+++ b/cube/word_size_model.h
@ -1,100 +0,0 @@
-/**********************************************************************
- * File:        word_size_model.h
- * Description: Declaration of the Word Size Model Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The WordSizeModel class abstracts the geometrical relationships
-// between characters/shapes in the same word (presumeably of the same font)
-// A non-parametric bigram model describes the three geometrical properties of a
-// character pair:
-//   1- Normalized Width
-//   2- Normalized Top
-//   3- Normalized Height
-// These dimensions are computed for each character pair in a word. These are
-// then compared to the same information for each of the fonts that the size
-// model knows about. The WordSizeCost is the cost of the font that matches
-// best.
-
-#ifndef WORD_SIZE_MODEL_H
-#define WORD_SIZE_MODEL_H
-
-#include <string>
-#include "char_samp.h"
-#include "char_set.h"
-
-namespace tesseract {
-struct PairSizeInfo {
-  int delta_top;
-  int wid_0;
-  int hgt_0;
-  int wid_1;
-  int hgt_1;
-};
-
-struct FontPairSizeInfo {
-  string font_name;
-  PairSizeInfo **pair_size_info;
-};
-
-class WordSizeModel {
- public:
-  WordSizeModel(CharSet *, bool contextual);
-  virtual ~WordSizeModel();
-  static WordSizeModel *Create(const string &data_file_path,
-                               const string &lang,
-                               CharSet *char_set,
-                               bool contextual);
-  // Given a word and number of unichars, return the size cost,
-  // minimized over all fonts in the size model.
-  int Cost(CharSamp **samp_array, int samp_cnt) const;
-  // Given dimensions of a pair of character samples and a font size
-  // model for that character pair, return the pair's size cost for
-  // the font.
-  static double PairCost(int width_0, int height_0, int top_0,
-                         int width_1, int height_1, int top_1,
-                         const PairSizeInfo& pair_info);
-  bool Save(string file_name);
-  // Number of fonts in size model.
-  inline int FontCount() const {
-    return font_pair_size_models_.size();
-  }
-  inline const FontPairSizeInfo *FontInfo() const {
-    return &font_pair_size_models_[0];
-  }
-  // Helper functions to convert between size codes, class id and position
-  // codes
-  static inline int SizeCode(int cls_id, int start, int end) {
-    return (cls_id << 2) + (end << 1) + start;
-  }
-
- private:
-  // Scaling constant used to convert floating point ratios in size table
-  // to fixed point
-  static const int kShapeModelScale = 1000;
-  static const int kExpectedTokenCount = 10;
-
-  // Language properties
-  bool contextual_;
-  CharSet *char_set_;
-  // Size ratios table
-  vector<FontPairSizeInfo> font_pair_size_models_;
-
-  // Initialize the word size model object
-  bool Init(const string &data_file_path, const string &lang);
-};
-}
-#endif  // WORD_SIZE_MODEL_H
--- a/cube/word_unigrams.cpp
+++ b/cube/word_unigrams.cpp
@ -1,252 +0,0 @@
-/**********************************************************************
- * File:        word_unigrams.cpp
- * Description: Implementation of the Word Unigrams Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include <math.h>
-#include <string>
-#include <vector>
-#include <algorithm>
-
-#include "const.h"
-#include "cube_utils.h"
-#include "ndminx.h"
-#include "word_unigrams.h"
-
-namespace tesseract {
-
-WordUnigrams::WordUnigrams() {
-  costs_ = NULL;
-  words_ = NULL;
-  word_cnt_ = 0;
-}
-
-WordUnigrams::~WordUnigrams() {
-  if (words_ != NULL) {
-    if (words_[0] != NULL) {
-      delete []words_[0];
-    }
-
-    delete []words_;
-    words_ = NULL;
-  }
-
-  if (costs_ != NULL) {
-    delete []costs_;
-  }
-}
-
-/**
- * Load the word-list and unigrams from file and create an object
- * The word list is assumed to be sorted in lexicographic order.
- */
-WordUnigrams *WordUnigrams::Create(const string &data_file_path,
-                                   const string &lang) {
-  string file_name;
-  string str;
-
-  file_name = data_file_path + lang;
-  file_name += ".cube.word-freq";
-
-  // load the string into memory
-  if (CubeUtils::ReadFileToString(file_name, &str) == false) {
-    return NULL;
-  }
-
-  // split into lines
-  vector<string> str_vec;
-  CubeUtils::SplitStringUsing(str, "\r\n \t", &str_vec);
-  if (str_vec.size() < 2) {
-    return NULL;
-  }
-
-  // allocate memory
-  WordUnigrams *word_unigrams_obj = new WordUnigrams();
-
-  int full_len = str.length();
-  int word_cnt = str_vec.size() / 2;
-  word_unigrams_obj->words_ = new char*[word_cnt];
-  word_unigrams_obj->costs_ = new int[word_cnt];
-
-  word_unigrams_obj->words_[0] = new char[full_len];
-
-  // construct sorted list of words and costs
-  word_unigrams_obj->word_cnt_ = 0;
-  char *char_buff = word_unigrams_obj->words_[0];
-  word_cnt = 0;
-  int max_cost = 0;
-
-  for (int wrd = 0; wrd < str_vec.size(); wrd += 2) {
-    word_unigrams_obj->words_[word_cnt] = char_buff;
-
-    strcpy(char_buff, str_vec[wrd].c_str());
-    char_buff += (str_vec[wrd].length() + 1);
-
-    if (sscanf(str_vec[wrd + 1].c_str(), "%d",
-               word_unigrams_obj->costs_ + word_cnt) != 1) {
-      fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error reading "
-              "word unigram data.\n");
-      delete word_unigrams_obj;
-      return NULL;
-    }
-    // update max cost
-    max_cost = MAX(max_cost, word_unigrams_obj->costs_[word_cnt]);
-    word_cnt++;
-  }
-  word_unigrams_obj->word_cnt_ = word_cnt;
-
-  // compute the not-in-list-cost by assuming that a word not in the list
-  // [ahmadab]: This can be computed as follows:
-  // - Given that the distribution of words follow Zipf's law:
-  //   (F = K / (rank ^ S)), where s is slightly > 1.0
-  // - Number of words in the list is N
-  // - The mean frequency of a word that did not appear in the list is the
-  //   area under the rest of the Zipf's curve divided by 2 (the mean)
-  // - The area would be the bound integral from N to infinity =
-  //   (K * S) / (N ^ (S + 1)) ~= K / (N ^ 2)
-  // - Given that cost = -LOG(prob), the cost of an unlisted word would be
-  //   = max_cost + 2*LOG(N)
-  word_unigrams_obj->not_in_list_cost_ = max_cost +
-      (2 * CubeUtils::Prob2Cost(1.0 / word_cnt));
-  // success
-  return word_unigrams_obj;
-}
-
-/**
- * Split input into space-separated tokens, strip trailing punctuation
- * from each, determine case properties, call UTF-8 flavor of cost
- * function on each word, and aggregate all into single mean word
- * cost.
- */
-int WordUnigrams::Cost(const char_32 *key_str32,
-                       LangModel *lang_mod,
-                       CharSet *char_set) const {
-  if (!key_str32)
-    return 0;
-  // convert string to UTF8 to split into space-separated words
-  string key_str;
-  CubeUtils::UTF32ToUTF8(key_str32, &key_str);
-  vector<string> words;
-  CubeUtils::SplitStringUsing(key_str, " \t", &words);
-
-  // no words => no cost
-  if (words.empty()) {
-    return 0;
-  }
-
-  // aggregate the costs of all the words
-  int cost = 0;
-  for (int word_idx = 0; word_idx < words.size(); word_idx++) {
-    // convert each word back to UTF32 for analyzing case and punctuation
-    string_32 str32;
-    CubeUtils::UTF8ToUTF32(words[word_idx].c_str(), &str32);
-    int len = CubeUtils::StrLen(str32.c_str());
-
-    // strip all trailing punctuation
-    string clean_str;
-    int clean_len = len;
-    bool trunc = false;
-    while (clean_len > 0 &&
-           lang_mod->IsTrailingPunc(str32.c_str()[clean_len - 1])) {
-      --clean_len;
-      trunc = true;
-    }
-
-    // If either the original string was not truncated (no trailing
-    // punctuation) or the entire string was removed (all characters
-    // are trailing punctuation), evaluate original word as is;
-    // otherwise, copy all but the trailing punctuation characters
-    char_32 *clean_str32 = NULL;
-    if (clean_len == 0 || !trunc) {
-      clean_str32 = CubeUtils::StrDup(str32.c_str());
-    } else {
-      clean_str32 = new char_32[clean_len + 1];
-      for (int i = 0; i < clean_len; ++i) {
-        clean_str32[i] = str32[i];
-      }
-      clean_str32[clean_len] = '\0';
-    }
-    ASSERT_HOST(clean_str32 != NULL);
-
-    string str8;
-    CubeUtils::UTF32ToUTF8(clean_str32, &str8);
-    int word_cost = CostInternal(str8.c_str());
-
-    // if case invariant, get costs of all-upper-case and all-lower-case
-    // versions and return the min cost
-    if (clean_len >= kMinLengthNumOrCaseInvariant &&
-        CubeUtils::IsCaseInvariant(clean_str32, char_set)) {
-      char_32 *lower_32 = CubeUtils::ToLower(clean_str32, char_set);
-      if (lower_32) {
-        string lower_8;
-        CubeUtils::UTF32ToUTF8(lower_32, &lower_8);
-        word_cost = MIN(word_cost, CostInternal(lower_8.c_str()));
-        delete [] lower_32;
-      }
-      char_32 *upper_32 = CubeUtils::ToUpper(clean_str32, char_set);
-      if (upper_32) {
-        string upper_8;
-        CubeUtils::UTF32ToUTF8(upper_32, &upper_8);
-        word_cost = MIN(word_cost, CostInternal(upper_8.c_str()));
-        delete [] upper_32;
-      }
-    }
-
-    if (clean_len >= kMinLengthNumOrCaseInvariant) {
-      // if characters are all numeric, incur 0 word cost
-      bool is_numeric = true;
-      for (int i = 0; i < clean_len; ++i) {
-        if (!lang_mod->IsDigit(clean_str32[i]))
-          is_numeric = false;
-      }
-      if (is_numeric)
-        word_cost = 0;
-    }
-    delete [] clean_str32;
-    cost += word_cost;
-  }  // word_idx
-
-  // return the mean cost
-  return static_cast<int>(cost / static_cast<double>(words.size()));
-}
-
-/**
- * Search for UTF-8 string using binary search of sorted words_ array.
- */
-int WordUnigrams::CostInternal(const char *key_str) const {
-  if (strlen(key_str) == 0)
-    return not_in_list_cost_;
-  int hi = word_cnt_ - 1;
-  int lo = 0;
-  while (lo <= hi) {
-    int current = (hi + lo) / 2;
-    int comp = strcmp(key_str, words_[current]);
-    // a match
-    if (comp == 0) {
-      return costs_[current];
-    }
-    if (comp < 0) {
-      // go lower
-      hi = current - 1;
-    } else {
-      // go higher
-      lo = current + 1;
-    }
-  }
-  return not_in_list_cost_;
-}
-}  // namespace tesseract
--- a/cube/word_unigrams.h
+++ b/cube/word_unigrams.h
@ -1,69 +0,0 @@
- /**********************************************************************
- * File:        word_unigrams.h
- * Description: Declaration of the Word Unigrams Class
- * Author:    Ahmad Abdulkader
- * Created:   2008
- *
- * (C) Copyright 2008, Google Inc.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-// The WordUnigram class holds the unigrams of the most frequent set of words
-// in a language. It is an optional component of the Cube OCR engine. If
-// present, the unigram cost of a word is aggregated with the other costs
-// (Recognition, Language Model, Size) to compute a cost for a word.
-// The word list is assumed to be sorted in lexicographic order.
-
-#ifndef WORD_UNIGRAMS_H
-#define WORD_UNIGRAMS_H
-
-#include <string>
-#include "char_set.h"
-#include "lang_model.h"
-
-namespace tesseract {
-class WordUnigrams {
- public:
-  WordUnigrams();
-  ~WordUnigrams();
-  // Load the word-list and unigrams from file and create an object
-  // The word list is assumed to be sorted
-  static WordUnigrams *Create(const string &data_file_path,
-                              const string &lang);
-  // Compute the unigram cost of a UTF-32 string. Splits into
-  // space-separated tokens, strips trailing punctuation from each
-  // token, evaluates case properties, and calls internal Cost()
-  // function on UTF-8 version. To avoid unnecessarily penalizing
-  // all-one-case words or capitalized words (first-letter
-  // upper-case and remaining letters lower-case) when not all
-  // versions of the word appear in the <lang>.cube.word-freq file, a
-  // case-invariant cost is computed in those cases, assuming the word
-  // meets a minimum length.
-  int Cost(const char_32 *str32, LangModel *lang_mod,
-           CharSet *char_set) const;
- protected:
-  // Compute the word unigram cost of a UTF-8 string with binary
-  // search of sorted words_ array.
-  int CostInternal(const char *str) const;
- private:
-  // Only words this length or greater qualify for all-numeric or
-  // case-invariant word unigram cost.
-  static const int kMinLengthNumOrCaseInvariant = 4;
-
-  int word_cnt_;
-  char **words_;
-  int *costs_;
-  int not_in_list_cost_;
-};
-}
-
-#endif  // WORD_UNIGRAMS_H
--- a/neural_networks/runtime/Makefile.am
+++ b/neural_networks/runtime/Makefile.am
@ -1,25 +0,0 @@
-AM_CPPFLAGS += \
-    -DUSE_STD_NAMESPACE \
-    -I$(top_srcdir)/cutil -I$(top_srcdir)/ccutil \
-    -I$(top_srcdir)/ccstruct -I$(top_srcdir)/dict \
-    -I$(top_srcdir)/image -I$(top_srcdir)/viewer
-
-if VISIBILITY
-AM_CPPFLAGS += -DTESS_EXPORTS \
-    -fvisibility=hidden -fvisibility-inlines-hidden
-endif
-
-noinst_HEADERS = \
-    input_file_buffer.h neural_net.h neuron.h
-
-if !USING_MULTIPLELIBS
-noinst_LTLIBRARIES = libtesseract_neural.la
-else
-lib_LTLIBRARIES = libtesseract_neural.la
-libtesseract_neural_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION)
-endif
-
-libtesseract_neural_la_SOURCES = \
-    input_file_buffer.cpp neural_net.cpp neuron.cpp sigmoid_table.cpp
-
-
--- a/neural_networks/runtime/input_file_buffer.cpp
+++ b/neural_networks/runtime/input_file_buffer.cpp
@ -1,45 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// input_file_buffer.h: Declarations of a class for an object that
-// represents an input file buffer.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-#include "input_file_buffer.h"
-
-namespace tesseract {
-// default and only constructor
-InputFileBuffer::InputFileBuffer(const string &file_name)
-  : file_name_(file_name) {
-  fp_ = NULL;
-}
-
-// virtual destructor
-InputFileBuffer::~InputFileBuffer() {
-  if (fp_ != NULL) {
-    fclose(fp_);
-  }
-}
-
-// Read the specified number of bytes to the specified input buffer
-int InputFileBuffer::Read(void *buffer, int bytes_to_read) {
-  // open the file if necessary
-  if (fp_ == NULL) {
-    fp_ = fopen(file_name_.c_str(), "rb");
-    if (fp_ == NULL) {
-      return 0;
-    }
-  }
-  return fread(buffer, 1, bytes_to_read, fp_);
-}
-}
--- a/neural_networks/runtime/input_file_buffer.h
+++ b/neural_networks/runtime/input_file_buffer.h
@ -1,40 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// input_file_buffer.h: Declarations of a class for an object that
-// represents an input file buffer.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef INPUT_FILE_BUFFER_H
-#define INPUT_FILE_BUFFER_H
-
-#include <stdio.h>
-#include <string>
-#ifdef USE_STD_NAMESPACE
-using std::string;
-#endif
-
-namespace tesseract {
-class InputFileBuffer {
-  public:
-    explicit InputFileBuffer(const string &file_name);
-    virtual ~InputFileBuffer();
-    int Read(void *buffer, int bytes_to_read);
-
-  protected:
-    string file_name_;
-    FILE *fp_;
-};
-}
-
-#endif  // INPUT_FILE_BUFFER_H__
--- a/neural_networks/runtime/neural_net.cpp
+++ b/neural_networks/runtime/neural_net.cpp
@ -1,308 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// neural_net.cpp: Declarations of a class for an object that
-// represents an arbitrary network of neurons
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <vector>
-#include <string>
-#include "neural_net.h"
-#include "input_file_buffer.h"
-
-namespace tesseract {
-
-NeuralNet::NeuralNet() {
-  Init();
-}
-
-NeuralNet::~NeuralNet() {
-  // clean up the wts chunks vector
-  for (int vec = 0; vec < static_cast<int>(wts_vec_.size()); vec++) {
-    delete wts_vec_[vec];
-  }
-  // clean up neurons
-  delete []neurons_;
-  // clean up nodes
-  for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
-    delete []fast_nodes_[node_idx].inputs;
-  }
-
-}
-
-// Initiaization function
-void NeuralNet::Init() {
-  read_only_ = true;
-  auto_encoder_ = false;
-  alloc_wgt_cnt_ = 0;
-  wts_cnt_ = 0;
-  neuron_cnt_ = 0;
-  in_cnt_ = 0;
-  out_cnt_ = 0;
-  wts_vec_.clear();
-  neurons_ = NULL;
-  inputs_mean_.clear();
-  inputs_std_dev_.clear();
-  inputs_min_.clear();
-  inputs_max_.clear();
-}
-
-// Does a fast feedforward for read_only nets
-// Templatized for float and double Types
-template <typename Type> bool NeuralNet::FastFeedForward(const Type *inputs,
-                                                         Type *outputs) {
-  int node_idx = 0;
-  Node *node = &fast_nodes_[0];
-  // feed inputs in and offset them by the pre-computed bias
-  for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) {
-    node->out = inputs[node_idx] - node->bias;
-  }
-  // compute nodes activations and outputs
-  for (;node_idx < neuron_cnt_; node_idx++, node++) {
-    double activation = -node->bias;
-    for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
-      activation += (node->inputs[fan_in_idx].input_weight *
-                     node->inputs[fan_in_idx].input_node->out);
-    }
-    node->out = Neuron::Sigmoid(activation);
-  }
-  // copy the outputs to the output buffers
-  node = &fast_nodes_[neuron_cnt_ - out_cnt_];
-  for (node_idx = 0; node_idx < out_cnt_; node_idx++, node++) {
-    outputs[node_idx] = node->out;
-  }
-  return true;
-}
-
-// Performs a feedforward for general nets. Used mainly in training mode
-// Templatized for float and double Types
-template <typename Type> bool NeuralNet::FeedForward(const Type *inputs,
-                                                     Type *outputs) {
-  // call the fast version in case of readonly nets
-  if (read_only_) {
-    return FastFeedForward(inputs, outputs);
-  }
-  // clear all neurons
-  Clear();
-  // for auto encoders, apply no input normalization
-  if (auto_encoder_) {
-    for (int in = 0; in < in_cnt_; in++) {
-      neurons_[in].set_output(inputs[in]);
-    }
-  } else {
-    // Input normalization : subtract mean and divide by stddev
-    for (int in = 0; in < in_cnt_; in++) {
-      neurons_[in].set_output((inputs[in] - inputs_min_[in]) /
-                              (inputs_max_[in] - inputs_min_[in]));
-      neurons_[in].set_output((neurons_[in].output() - inputs_mean_[in]) /
-                              inputs_std_dev_[in]);
-    }
-  }
-  // compute the net outputs: follow a pull model each output pulls the
-  // outputs of its input nodes and so on
-  for (int out = neuron_cnt_ - out_cnt_; out < neuron_cnt_; out++) {
-    neurons_[out].FeedForward();
-    // copy the values to the output buffer
-    outputs[out] = neurons_[out].output();
-  }
-  return true;
-}
-
-// Sets a connection between two neurons
-bool NeuralNet::SetConnection(int from, int to) {
-  // allocate the wgt
-  float *wts  =  AllocWgt(1);
-  if (wts == NULL) {
-    return false;
-  }
-  // register the connection
-  neurons_[to].AddFromConnection(neurons_ + from, wts, 1);
-  return true;
-}
-
-// Create a fast readonly version of the net
-bool NeuralNet::CreateFastNet() {
-  fast_nodes_.resize(neuron_cnt_);
-  // build the node structures
-  int wts_cnt = 0;
-  for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
-    Node *node = &fast_nodes_[node_idx];
-    if (neurons_[node_idx].node_type() == Neuron::Input) {
-      // Input neurons have no fan-in
-      node->fan_in_cnt = 0;
-      node->inputs = NULL;
-      // Input bias is the normalization offset computed from
-      // training input stats
-      if (fabs(inputs_max_[node_idx] - inputs_min_[node_idx]) <
-          kMinInputRange) {
-        // if the range approaches zero, the stdev is not defined,
-        // this indicates that this input does not change.
-        // Set the bias to zero
-        node->bias = 0.0f;
-      } else {
-        node->bias = inputs_min_[node_idx] + (inputs_mean_[node_idx] *
-            (inputs_max_[node_idx] - inputs_min_[node_idx]));
-      }
-    } else {
-      node->bias = neurons_[node_idx].bias();
-      node->fan_in_cnt = neurons_[node_idx].fan_in_cnt();
-      // allocate memory for fan-in nodes
-      node->inputs = new WeightedNode[node->fan_in_cnt];
-      for (int fan_in = 0; fan_in < node->fan_in_cnt; fan_in++) {
-        // identify fan-in neuron
-        const int id = neurons_[node_idx].fan_in(fan_in)->id();
-        // Feedback connections are not allowed and should never happen
-        if (id >= node_idx) {
-          return false;
-        }
-        // add the the fan-in neuron and its wgt
-        node->inputs[fan_in].input_node = &fast_nodes_[id];
-        float wgt_val = neurons_[node_idx].fan_in_wts(fan_in);
-        // for input neurons normalize the wgt by the input scaling
-        // values to save time during feedforward
-        if (neurons_[node_idx].fan_in(fan_in)->node_type() == Neuron::Input) {
-          // if the range approaches zero, the stdev is not defined,
-          // this indicates that this input does not change.
-          // Set the weight to zero
-          if (fabs(inputs_max_[id] - inputs_min_[id]) < kMinInputRange) {
-            wgt_val = 0.0f;
-          } else {
-            wgt_val /= ((inputs_max_[id] - inputs_min_[id]) *
-                inputs_std_dev_[id]);
-          }
-        }
-        node->inputs[fan_in].input_weight = wgt_val;
-      }
-      // incr wgt count to validate against at the end
-      wts_cnt += node->fan_in_cnt;
-    }
-  }
-  // sanity check
-  return wts_cnt_ == wts_cnt;
-}
-
-// returns a pointer to the requested set of weights
-// Allocates in chunks
-float * NeuralNet::AllocWgt(int wgt_cnt) {
-  // see if need to allocate a new chunk of wts
-  if (wts_vec_.size() == 0 || (alloc_wgt_cnt_ + wgt_cnt) > kWgtChunkSize) {
-    // add the new chunck to the wts_chunks vector
-    wts_vec_.push_back(new vector<float> (kWgtChunkSize));
-    alloc_wgt_cnt_ = 0;
-  }
-  float *ret_ptr = &((*wts_vec_.back())[alloc_wgt_cnt_]);
-  // incr usage counts
-  alloc_wgt_cnt_ += wgt_cnt;
-  wts_cnt_ += wgt_cnt;
-  return ret_ptr;
-}
-
-// create a new net object using an input file as a source
-NeuralNet *NeuralNet::FromFile(const string file_name) {
-  // open the file
-  InputFileBuffer   input_buff(file_name);
-  // create a new net object using input buffer
-  NeuralNet *net_obj = FromInputBuffer(&input_buff);
-  return net_obj;
-}
-
-// create a net object from an input buffer
-NeuralNet *NeuralNet::FromInputBuffer(InputFileBuffer *ib) {
-      // create a new net object
-  NeuralNet *net_obj = new NeuralNet();
-      // load the net
-  if (!net_obj->ReadBinary(ib)) {
-    delete net_obj;
-    net_obj = NULL;
-  }
-  return net_obj;
-}
-
-// Compute the output of a specific output node.
-// This function is useful for application that are interested in a single
-// output of the net and do not want to waste time on the rest
-// This is the fast-read-only version of this function
-template <typename Type> bool NeuralNet::FastGetNetOutput(const Type *inputs,
-                                                          int output_id,
-                                                          Type *output) {
-  // feed inputs in and offset them by the pre-computed bias
-  int node_idx = 0;
-  Node *node = &fast_nodes_[0];
-  for (node_idx = 0; node_idx < in_cnt_; node_idx++, node++) {
-    node->out = inputs[node_idx] - node->bias;
-  }
-
-  // compute nodes' activations and outputs for hidden nodes if any
-  int hidden_node_cnt = neuron_cnt_ - out_cnt_;
-  for (;node_idx < hidden_node_cnt; node_idx++, node++) {
-    double activation = -node->bias;
-    for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
-      activation += (node->inputs[fan_in_idx].input_weight *
-                     node->inputs[fan_in_idx].input_node->out);
-    }
-    node->out = Neuron::Sigmoid(activation);
-  }
-
-  // compute the output of the required output node
-  node += output_id;
-  double activation = -node->bias;
-  for (int fan_in_idx = 0; fan_in_idx < node->fan_in_cnt; fan_in_idx++) {
-    activation += (node->inputs[fan_in_idx].input_weight *
-                   node->inputs[fan_in_idx].input_node->out);
-  }
-  (*output) = Neuron::Sigmoid(activation);
-  return true;
-}
-
-// Performs a feedforward for general nets. Used mainly in training mode
-// Templatized for float and double Types
-template <typename Type> bool NeuralNet::GetNetOutput(const Type *inputs,
-                                                      int output_id,
-                                                      Type *output) {
-  // validate output id
-  if (output_id < 0 || output_id >= out_cnt_) {
-    return false;
-  }
-
-  // call the fast version in case of readonly nets
-  if (read_only_) {
-    return FastGetNetOutput(inputs, output_id, output);
-  }
-
-  // For the slow version, we'll just call FeedForward and return the
-  // appropriate output
-  vector<Type> outputs(out_cnt_);
-  if (!FeedForward(inputs, &outputs[0])) {
-    return false;
-  }
-  (*output) = outputs[output_id];
-
-  return true;
-}
-
-// Instantiate all supported templates now that the functions have been defined.
-template bool NeuralNet::FeedForward(const float *inputs, float *outputs);
-template bool NeuralNet::FeedForward(const double *inputs, double *outputs);
-template bool NeuralNet::FastFeedForward(const float *inputs, float *outputs);
-template bool NeuralNet::FastFeedForward(const double *inputs,
-                                         double *outputs);
-template bool NeuralNet::GetNetOutput(const float *inputs, int output_id,
-                                      float *output);
-template bool NeuralNet::GetNetOutput(const double *inputs, int output_id,
-                                      double *output);
-template bool NeuralNet::FastGetNetOutput(const float *inputs, int output_id,
-                                          float *output);
-template bool NeuralNet::FastGetNetOutput(const double *inputs, int output_id,
-                                          double *output);
-template bool NeuralNet::ReadBinary(InputFileBuffer *input_buffer);
-
-}
--- a/neural_networks/runtime/neural_net.h
+++ b/neural_networks/runtime/neural_net.h
@ -1,252 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// neural_net.h: Declarations of a class for an object that
-// represents an arbitrary network of neurons
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef NEURAL_NET_H
-#define NEURAL_NET_H
-
-#include <string>
-#include <vector>
-#include "neuron.h"
-#include "input_file_buffer.h"
-
-namespace tesseract {
-
-// Minimum input range below which we set the input weight to zero
-static const float kMinInputRange = 1e-6f;
-
-class NeuralNet {
-  public:
-    NeuralNet();
-    virtual ~NeuralNet();
-    // create a net object from a file. Uses stdio
-    static NeuralNet *FromFile(const string file_name);
-    // create a net object from an input buffer
-    static NeuralNet *FromInputBuffer(InputFileBuffer *ib);
-    // Different flavors of feed forward function
-    template <typename Type> bool FeedForward(const Type *inputs,
-                                              Type *outputs);
-    // Compute the output of a specific output node.
-    // This function is useful for application that are interested in a single
-    // output of the net and do not want to waste time on the rest
-    template <typename Type> bool GetNetOutput(const Type *inputs,
-                                               int output_id,
-                                               Type *output);
-    // Accessor functions
-    int in_cnt() const { return in_cnt_; }
-    int out_cnt() const { return out_cnt_; }
-
-  protected:
-    struct Node;
-    // A node-weight pair
-    struct WeightedNode {
-      Node *input_node;
-      float input_weight;
-    };
-    // node struct used for fast feedforward in
-    // Read only nets
-    struct Node {
-      float out;
-      float bias;
-      int fan_in_cnt;
-      WeightedNode *inputs;
-    };
-    // Read-Only flag (no training: On by default)
-    // will presumeably be set to false by
-    // the inherting TrainableNeuralNet class
-    bool read_only_;
-    // input count
-    int in_cnt_;
-    // output count
-    int out_cnt_;
-    // Total neuron count (including inputs)
-    int neuron_cnt_;
-    // count of unique weights
-    int  wts_cnt_;
-    // Neuron vector
-    Neuron *neurons_;
-    // size of allocated weight chunk (in weights)
-    // This is basically the size of the biggest network
-    // that I have trained. However, the class will allow
-    // a bigger sized net if desired
-    static const int kWgtChunkSize = 0x10000;
-    // Magic number expected at the beginning of the NN
-    // binary file
-    static const unsigned int kNetSignature = 0xFEFEABD0;
-    // count of allocated wgts in the last chunk
-    int alloc_wgt_cnt_;
-    // vector of weights buffers
-    vector<vector<float> *>wts_vec_;
-    // Is the net an auto-encoder type
-    bool auto_encoder_;
-    // vector of input max values
-    vector<float> inputs_max_;
-    // vector of input min values
-    vector<float> inputs_min_;
-    // vector of input mean values
-    vector<float> inputs_mean_;
-    // vector of input standard deviation values
-    vector<float> inputs_std_dev_;
-    // vector of input offsets used by fast read-only
-    // feedforward function
-    vector<Node> fast_nodes_;
-    // Network Initialization function
-    void Init();
-    // Clears all neurons
-    void Clear() {
-      for (int node = 0; node < neuron_cnt_; node++) {
-        neurons_[node].Clear();
-      }
-    }
-    // Reads the net from an input buffer
-    template<class ReadBuffType> bool ReadBinary(ReadBuffType *input_buff) {
-      // Init vars
-      Init();
-      // is this an autoencoder
-      unsigned int read_val;
-      unsigned int auto_encode;
-      // read and verify signature
-      if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
-        return false;
-      }
-      if (read_val != kNetSignature) {
-        return false;
-      }
-      if (input_buff->Read(&auto_encode, sizeof(auto_encode)) !=
-          sizeof(auto_encode)) {
-        return false;
-      }
-      auto_encoder_ = auto_encode;
-      // read and validate total # of nodes
-      if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
-        return false;
-      }
-      neuron_cnt_ = read_val;
-      if (neuron_cnt_ <= 0) {
-        return false;
-      }
-      // set the size of the neurons vector
-      neurons_ = new Neuron[neuron_cnt_];
-      // read & validate inputs
-      if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
-        return false;
-      }
-      in_cnt_ = read_val;
-      if (in_cnt_ <= 0) {
-        return false;
-      }
-      // read outputs
-      if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
-        return false;
-      }
-      out_cnt_ = read_val;
-      if (out_cnt_ <= 0) {
-        return false;
-      }
-      // set neuron ids and types
-      for (int idx = 0; idx < neuron_cnt_; idx++) {
-        neurons_[idx].set_id(idx);
-        // input type
-        if (idx < in_cnt_) {
-          neurons_[idx].set_node_type(Neuron::Input);
-        } else if (idx >= (neuron_cnt_ - out_cnt_)) {
-          neurons_[idx].set_node_type(Neuron::Output);
-        } else {
-          neurons_[idx].set_node_type(Neuron::Hidden);
-        }
-      }
-      // read the connections
-      for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
-        // read fanout
-        if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
-          return false;
-        }
-        // read the neuron's info
-        int fan_out_cnt = read_val;
-        for (int fan_out_idx = 0; fan_out_idx < fan_out_cnt; fan_out_idx++) {
-          // read the neuron id
-          if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) {
-            return false;
-          }
-          // create the connection
-          if (!SetConnection(node_idx, read_val)) {
-            return false;
-          }
-        }
-      }
-      // read all the neurons' fan-in connections
-      for (int node_idx = 0; node_idx < neuron_cnt_; node_idx++) {
-        // read
-        if (!neurons_[node_idx].ReadBinary(input_buff)) {
-          return false;
-        }
-      }
-      // size input stats vector to expected input size
-      inputs_mean_.resize(in_cnt_);
-      inputs_std_dev_.resize(in_cnt_);
-      inputs_min_.resize(in_cnt_);
-      inputs_max_.resize(in_cnt_);
-      // read stats
-      if (input_buff->Read(&(inputs_mean_.front()),
-          sizeof(inputs_mean_[0]) * in_cnt_) !=
-          sizeof(inputs_mean_[0]) * in_cnt_) {
-        return false;
-      }
-      if (input_buff->Read(&(inputs_std_dev_.front()),
-          sizeof(inputs_std_dev_[0]) * in_cnt_) !=
-          sizeof(inputs_std_dev_[0]) * in_cnt_) {
-        return false;
-      }
-      if (input_buff->Read(&(inputs_min_.front()),
-          sizeof(inputs_min_[0]) * in_cnt_) !=
-          sizeof(inputs_min_[0]) * in_cnt_) {
-        return false;
-      }
-      if (input_buff->Read(&(inputs_max_.front()),
-          sizeof(inputs_max_[0]) * in_cnt_) !=
-          sizeof(inputs_max_[0]) * in_cnt_) {
-        return false;
-      }
-      // create a readonly version for fast feedforward
-      if (read_only_) {
-        return CreateFastNet();
-      }
-      return true;
-    }
-
-    // creates a connection between two nodes
-    bool SetConnection(int from, int to);
-    // Create a read only version of the net that
-    // has faster feedforward performance
-    bool CreateFastNet();
-    // internal function to allocate a new set of weights
-    // Centralized weight allocation attempts to increase
-    // weights locality of reference making it more cache friendly
-    float *AllocWgt(int wgt_cnt);
-    // different flavors read-only feedforward function
-    template <typename Type> bool FastFeedForward(const Type *inputs,
-                                                  Type *outputs);
-    // Compute the output of a specific output node.
-    // This function is useful for application that are interested in a single
-    // output of the net and do not want to waste time on the rest
-    // This is the fast-read-only version of this function
-    template <typename Type> bool FastGetNetOutput(const Type *inputs,
-                                                   int output_id,
-                                                   Type *output);
-};
-}
-
-#endif  // NEURAL_NET_H__
--- a/neural_networks/runtime/neuron.cpp
+++ b/neural_networks/runtime/neuron.cpp
@ -1,103 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// neuron.cpp: The implementation of a class for an object
-// that represents a single neuron in a neural network
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "neuron.h"
-#include "input_file_buffer.h"
-
-namespace tesseract {
-
-// Instantiate all supported templates
-template bool Neuron::ReadBinary(InputFileBuffer *input_buffer);
-
-// default and only constructor
-Neuron::Neuron() {
-  Init();
-}
-
-// virtual destructor
-Neuron::~Neuron() {
-}
-
-// Initializer
-void Neuron::Init() {
-  id_ = -1;
-  frwd_dirty_ = false;
-  fan_in_.clear();
-  fan_in_weights_.clear();
-  activation_ = 0.0f;
-  output_ = 0.0f;
-  bias_ = 0.0f;
-  node_type_ = Unknown;
-}
-
-// Computes the activation and output of the neuron if not fresh
-// by pulling the outputs of all fan-in neurons
-void Neuron::FeedForward() {
-  if (!frwd_dirty_ ) {
-    return;
-  }
-  // nothing to do for input nodes: just pass the input to the o/p
-  // otherwise, pull the output of all fan-in neurons
-  if (node_type_ != Input) {
-    int fan_in_cnt = fan_in_.size();
-    // sum out the activation
-    activation_ = -bias_;
-    for (int in = 0; in < fan_in_cnt; in++) {
-      if (fan_in_[in]->frwd_dirty_) {
-        fan_in_[in]->FeedForward();
-      }
-      activation_ += ((*(fan_in_weights_[in])) * fan_in_[in]->output_);
-    }
-    // sigmoid it
-    output_ = Sigmoid(activation_);
-  }
-  frwd_dirty_ = false;
-}
-
-// set the type of the neuron
-void Neuron::set_node_type(NeuronTypes Type) {
-  node_type_ = Type;
-}
-
-// Adds new connections *to* this neuron *From*
-// a target neuron using specfied params
-// Note that what is actually copied in this function are pointers to the
-// specified Neurons and weights and not the actualt values. This is by
-// design to centralize the alloction of neurons and weights and so
-// increase the locality of reference and improve cache-hits resulting
-// in a faster net. This technique resulted in a 2X-10X speedup
-// (depending on network size and processor)
-void Neuron::AddFromConnection(Neuron *neurons,
-                               float *wts_offset,
-                               int from_cnt) {
-  for (int in = 0; in < from_cnt; in++) {
-    fan_in_.push_back(neurons + in);
-    fan_in_weights_.push_back(wts_offset + in);
-  }
-}
-
-// fast computation of sigmoid function using a lookup table
-// defined in sigmoid_table.cpp
-float Neuron::Sigmoid(float activation) {
-  if (activation <= -10.0f) {
-    return 0.0f;
-  } else if (activation >= 10.0f) {
-    return 1.0f;
-  } else {
-    return kSigmoidTable[static_cast<int>(100 * (activation + 10.0))];
-  }
-}
-}
--- a/neural_networks/runtime/neuron.h
+++ b/neural_networks/runtime/neuron.h
@ -1,156 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// neuron.h: Declarations of a class for an object that
-// represents a single neuron in a neural network
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef NEURON_H
-#define NEURON_H
-
-#include <math.h>
-#include <vector>
-
-#ifdef USE_STD_NAMESPACE
-using std::vector;
-#endif
-
-namespace tesseract {
-
-// Input Node bias values
-static const float kInputNodeBias = 0.0f;
-
-class Neuron {
-  public:
-    // Types of nodes
-    enum NeuronTypes {
-      Unknown = 0,
-      Input,
-      Hidden,
-      Output
-    };
-    Neuron();
-    ~Neuron();
-    // set the forward dirty flag indicating that the
-    // activation of the net is not fresh
-    void Clear() {
-      frwd_dirty_  =  true;
-    }
-    // Read a binary representation of the neuron info from
-    // an input buffer.
-    template <class BuffType> bool ReadBinary(BuffType *input_buff) {
-      float val;
-      if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) {
-        return false;
-      }
-      // input nodes should have no biases
-      if (node_type_ == Input) {
-        bias_ = kInputNodeBias;
-      } else {
-        bias_ = val;
-      }
-      // read fanin count
-      int fan_in_cnt;
-      if (input_buff->Read(&fan_in_cnt, sizeof(fan_in_cnt)) !=
-          sizeof(fan_in_cnt)) {
-        return false;
-      }
-      // validate fan-in cnt
-      if (fan_in_cnt != fan_in_.size()) {
-        return false;
-      }
-      // read the weights
-      for (int in = 0; in < fan_in_cnt; in++) {
-        if (input_buff->Read(&val, sizeof(val)) != sizeof(val)) {
-          return false;
-        }
-        *(fan_in_weights_[in]) = val;
-      }
-      return true;
-    }
-
-    // Add a new connection from this neuron *From*
-    // a target neuron using specfied params
-    // Note that what is actually copied in this function are pointers to the
-    // specified Neurons and weights and not the actualt values. This is by
-    // design to centralize the alloction of neurons and weights and so
-    // increase the locality of reference and improve cache-hits resulting
-    // in a faster net. This technique resulted in a 2X-10X speedup
-    // (depending on network size and processor)
-    void AddFromConnection(Neuron *neuron_vec,
-                           float *wts_offset,
-                           int from_cnt);
-    // Set the type of a neuron
-    void set_node_type(NeuronTypes type);
-    // Computes the output of the node by
-    // "pulling" the output of the fan-in nodes
-    void FeedForward();
-    // fast computation of sigmoid function using a lookup table
-    // defined in sigmoid_table.cpp
-    static float Sigmoid(float activation);
-    // Accessor functions
-    float output() const {
-      return output_;
-    }
-    void set_output(float out_val) {
-      output_ = out_val;
-    }
-    int id() const {
-      return id_;
-    }
-    int fan_in_cnt() const {
-      return fan_in_.size();
-    }
-    Neuron * fan_in(int idx) const {
-      return fan_in_[idx];
-    }
-    float fan_in_wts(int idx) const {
-      return *(fan_in_weights_[idx]);
-    }
-    void set_id(int id) {
-      id_ = id;
-    }
-    float bias() const {
-      return bias_;
-    }
-    Neuron::NeuronTypes node_type() const {
-      return node_type_;
-    }
-
-  protected:
-    // Type of Neuron
-    NeuronTypes node_type_;
-    // unqique id of the neuron
-    int id_;
-    // node bias
-    float bias_;
-    // node net activation
-    float activation_;
-    // node output
-    float output_;
-    // pointers to fanin nodes
-    vector<Neuron *> fan_in_;
-    // pointers to fanin weights
-    vector<float *> fan_in_weights_;
-    // Sigmoid function lookup table used for fast computation
-    // of sigmoid function
-    static const float kSigmoidTable[];
-    // flag determining if the activation of the node
-    // is fresh or not (dirty)
-    bool frwd_dirty_;
-    // Initializer
-    void Init();
-};
-}
-
-#endif  // NEURON_H__
--- a/neural_networks/runtime/sigmoid_table.cpp
+++ b/neural_networks/runtime/sigmoid_table.cpp
@ -1,523 +0,0 @@
-// Copyright 2007 Google Inc.
-// All Rights Reserved.
-// Author: ahmadab@google.com (Ahmad Abdulkader)
-//
-// sigmoid_table.cpp: Sigmoid function lookup table
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "neuron.h"
-
-namespace tesseract {
-
-const float Neuron::kSigmoidTable[] = {
-  4.53979E-05f, 4.58541E-05f, 4.63149E-05f, 4.67804E-05f,
-  4.72505E-05f, 4.77254E-05f, 4.8205E-05f, 4.86894E-05f,
-  4.91787E-05f, 4.9673E-05f, 5.01722E-05f, 5.06764E-05f,
-  5.11857E-05f, 5.17001E-05f, 5.22196E-05f, 5.27444E-05f,
-  5.32745E-05f, 5.38099E-05f, 5.43506E-05f, 5.48968E-05f,
-  5.54485E-05f, 5.60058E-05f, 5.65686E-05f, 5.71371E-05f,
-  5.77113E-05f, 5.82913E-05f, 5.88771E-05f, 5.94688E-05f,
-  6.00664E-05f, 6.067E-05f, 6.12797E-05f, 6.18956E-05f,
-  6.25176E-05f, 6.31459E-05f, 6.37805E-05f, 6.44214E-05f,
-  6.50688E-05f, 6.57227E-05f, 6.63832E-05f, 6.70503E-05f,
-  6.77241E-05f, 6.84047E-05f, 6.90922E-05f, 6.97865E-05f,
-  7.04878E-05f, 7.11962E-05f, 7.19117E-05f, 7.26343E-05f,
-  7.33643E-05f, 7.41016E-05f, 7.48462E-05f, 7.55984E-05f,
-  7.63581E-05f, 7.71255E-05f, 7.79005E-05f, 7.86834E-05f,
-  7.94741E-05f, 8.02728E-05f, 8.10794E-05f, 8.18942E-05f,
-  8.27172E-05f, 8.35485E-05f, 8.43881E-05f, 8.52361E-05f,
-  8.60927E-05f, 8.69579E-05f, 8.78317E-05f, 8.87144E-05f,
-  8.96059E-05f, 9.05064E-05f, 9.14159E-05f, 9.23345E-05f,
-  9.32624E-05f, 9.41996E-05f, 9.51463E-05f, 9.61024E-05f,
-  9.70682E-05f, 9.80436E-05f, 9.90289E-05f, 0.000100024f,
-  0.000101029f, 0.000102044f, 0.00010307f, 0.000104106f,
-  0.000105152f, 0.000106209f, 0.000107276f, 0.000108354f,
-  0.000109443f, 0.000110542f, 0.000111653f, 0.000112775f,
-  0.000113909f, 0.000115053f, 0.000116209f, 0.000117377f,
-  0.000118557f, 0.000119748f, 0.000120951f, 0.000122167f,
-  0.000123395f, 0.000124635f, 0.000125887f, 0.000127152f,
-  0.00012843f, 0.00012972f, 0.000131024f, 0.000132341f,
-  0.00013367f, 0.000135014f, 0.00013637f, 0.000137741f,
-  0.000139125f, 0.000140523f, 0.000141935f, 0.000143361f,
-  0.000144802f, 0.000146257f, 0.000147727f, 0.000149211f,
-  0.00015071f, 0.000152225f, 0.000153754f, 0.000155299f,
-  0.00015686f, 0.000158436f, 0.000160028f, 0.000161636f,
-  0.000163261f, 0.000164901f, 0.000166558f, 0.000168232f,
-  0.000169922f, 0.00017163f, 0.000173354f, 0.000175096f,
-  0.000176856f, 0.000178633f, 0.000180428f, 0.000182241f,
-  0.000184072f, 0.000185922f, 0.00018779f, 0.000189677f,
-  0.000191583f, 0.000193508f, 0.000195452f, 0.000197416f,
-  0.0001994f, 0.000201403f, 0.000203427f, 0.000205471f,
-  0.000207536f, 0.000209621f, 0.000211727f, 0.000213855f,
-  0.000216003f, 0.000218174f, 0.000220366f, 0.00022258f,
-  0.000224817f, 0.000227076f, 0.000229357f, 0.000231662f,
-  0.00023399f, 0.000236341f, 0.000238715f, 0.000241114f,
-  0.000243537f, 0.000245984f, 0.000248455f, 0.000250951f,
-  0.000253473f, 0.00025602f, 0.000258592f, 0.00026119f,
-  0.000263815f, 0.000266465f, 0.000269143f, 0.000271847f,
-  0.000274578f, 0.000277337f, 0.000280123f, 0.000282938f,
-  0.000285781f, 0.000288652f, 0.000291552f, 0.000294481f,
-  0.00029744f, 0.000300429f, 0.000303447f, 0.000306496f,
-  0.000309575f, 0.000312685f, 0.000315827f, 0.000319f,
-  0.000322205f, 0.000325442f, 0.000328712f, 0.000332014f,
-  0.00033535f, 0.000338719f, 0.000342122f, 0.00034556f,
-  0.000349031f, 0.000352538f, 0.00035608f, 0.000359657f,
-  0.00036327f, 0.00036692f, 0.000370606f, 0.000374329f,
-  0.00037809f, 0.000381888f, 0.000385725f, 0.0003896f,
-  0.000393514f, 0.000397467f, 0.00040146f, 0.000405494f,
-  0.000409567f, 0.000413682f, 0.000417838f, 0.000422035f,
-  0.000426275f, 0.000430557f, 0.000434882f, 0.000439251f,
-  0.000443664f, 0.000448121f, 0.000452622f, 0.000457169f,
-  0.000461762f, 0.0004664f, 0.000471085f, 0.000475818f,
-  0.000480597f, 0.000485425f, 0.000490301f, 0.000495226f,
-  0.000500201f, 0.000505226f, 0.000510301f, 0.000515427f,
-  0.000520604f, 0.000525833f, 0.000531115f, 0.00053645f,
-  0.000541839f, 0.000547281f, 0.000552779f, 0.000558331f,
-  0.000563939f, 0.000569604f, 0.000575325f, 0.000581104f,
-  0.00058694f, 0.000592836f, 0.00059879f, 0.000604805f,
-  0.000610879f, 0.000617015f, 0.000623212f, 0.000629472f,
-  0.000635794f, 0.00064218f, 0.00064863f, 0.000655144f,
-  0.000661724f, 0.00066837f, 0.000675083f, 0.000681863f,
-  0.000688711f, 0.000695628f, 0.000702614f, 0.00070967f,
-  0.000716798f, 0.000723996f, 0.000731267f, 0.000738611f,
-  0.000746029f, 0.000753521f, 0.000761088f, 0.000768731f,
-  0.000776451f, 0.000784249f, 0.000792124f, 0.000800079f,
-  0.000808113f, 0.000816228f, 0.000824425f, 0.000832703f,
-  0.000841065f, 0.000849511f, 0.000858041f, 0.000866657f,
-  0.00087536f, 0.000884149f, 0.000893027f, 0.000901994f,
-  0.000911051f, 0.000920199f, 0.000929439f, 0.000938771f,
-  0.000948197f, 0.000957717f, 0.000967333f, 0.000977045f,
-  0.000986855f, 0.000996763f, 0.001006771f, 0.001016879f,
-  0.001027088f, 0.0010374f, 0.001047815f, 0.001058334f,
-  0.00106896f, 0.001079691f, 0.00109053f, 0.001101478f,
-  0.001112536f, 0.001123705f, 0.001134985f, 0.001146379f,
-  0.001157887f, 0.00116951f, 0.00118125f, 0.001193108f,
-  0.001205084f, 0.001217181f, 0.001229399f, 0.001241739f,
-  0.001254203f, 0.001266792f, 0.001279507f, 0.00129235f,
-  0.001305321f, 0.001318423f, 0.001331655f, 0.001345021f,
-  0.00135852f, 0.001372155f, 0.001385926f, 0.001399835f,
-  0.001413884f, 0.001428073f, 0.001442405f, 0.00145688f,
-  0.001471501f, 0.001486267f, 0.001501182f, 0.001516247f,
-  0.001531462f, 0.001546829f, 0.001562351f, 0.001578028f,
-  0.001593862f, 0.001609855f, 0.001626008f, 0.001642323f,
-  0.001658801f, 0.001675444f, 0.001692254f, 0.001709233f,
-  0.001726381f, 0.001743701f, 0.001761195f, 0.001778864f,
-  0.00179671f, 0.001814734f, 0.001832939f, 0.001851326f,
-  0.001869898f, 0.001888655f, 0.0019076f, 0.001926735f,
-  0.001946061f, 0.001965581f, 0.001985296f, 0.002005209f,
-  0.00202532f, 0.002045634f, 0.00206615f, 0.002086872f,
-  0.002107801f, 0.00212894f, 0.00215029f, 0.002171854f,
-  0.002193633f, 0.002215631f, 0.002237849f, 0.002260288f,
-  0.002282953f, 0.002305844f, 0.002328964f, 0.002352316f,
-  0.002375901f, 0.002399721f, 0.002423781f, 0.00244808f,
-  0.002472623f, 0.002497411f, 0.002522447f, 0.002547734f,
-  0.002573273f, 0.002599068f, 0.00262512f, 0.002651433f,
-  0.002678009f, 0.002704851f, 0.002731961f, 0.002759342f,
-  0.002786996f, 0.002814927f, 0.002843137f, 0.002871629f,
-  0.002900406f, 0.00292947f, 0.002958825f, 0.002988472f,
-  0.003018416f, 0.003048659f, 0.003079205f, 0.003110055f,
-  0.003141213f, 0.003172683f, 0.003204467f, 0.003236568f,
-  0.00326899f, 0.003301735f, 0.003334807f, 0.00336821f,
-  0.003401946f, 0.003436018f, 0.003470431f, 0.003505187f,
-  0.00354029f, 0.003575744f, 0.003611551f, 0.003647715f,
-  0.00368424f, 0.003721129f, 0.003758387f, 0.003796016f,
-  0.00383402f, 0.003872403f, 0.00391117f, 0.003950322f,
-  0.003989865f, 0.004029802f, 0.004070138f, 0.004110875f,
-  0.004152019f, 0.004193572f, 0.00423554f, 0.004277925f,
-  0.004320734f, 0.004363968f, 0.004407633f, 0.004451734f,
-  0.004496273f, 0.004541256f, 0.004586687f, 0.004632571f,
-  0.004678911f, 0.004725713f, 0.00477298f, 0.004820718f,
-  0.004868931f, 0.004917624f, 0.004966802f, 0.005016468f,
-  0.005066629f, 0.005117289f, 0.005168453f, 0.005220126f,
-  0.005272312f, 0.005325018f, 0.005378247f, 0.005432006f,
-  0.005486299f, 0.005541132f, 0.005596509f, 0.005652437f,
-  0.005708921f, 0.005765966f, 0.005823577f, 0.005881761f,
-  0.005940522f, 0.005999867f, 0.006059801f, 0.006120331f,
-  0.006181461f, 0.006243198f, 0.006305547f, 0.006368516f,
-  0.006432108f, 0.006496332f, 0.006561193f, 0.006626697f,
-  0.006692851f, 0.006759661f, 0.006827132f, 0.006895273f,
-  0.006964089f, 0.007033587f, 0.007103774f, 0.007174656f,
-  0.00724624f, 0.007318533f, 0.007391541f, 0.007465273f,
-  0.007539735f, 0.007614933f, 0.007690876f, 0.00776757f,
-  0.007845023f, 0.007923242f, 0.008002235f, 0.008082009f,
-  0.008162571f, 0.00824393f, 0.008326093f, 0.008409068f,
-  0.008492863f, 0.008577485f, 0.008662944f, 0.008749246f,
-  0.0088364f, 0.008924415f, 0.009013299f, 0.009103059f,
-  0.009193705f, 0.009285246f, 0.009377689f, 0.009471044f,
-  0.009565319f, 0.009660523f, 0.009756666f, 0.009853756f,
-  0.009951802f, 0.010050814f, 0.010150801f, 0.010251772f,
-  0.010353738f, 0.010456706f, 0.010560688f, 0.010665693f,
-  0.01077173f, 0.01087881f, 0.010986943f, 0.011096138f,
-  0.011206406f, 0.011317758f, 0.011430203f, 0.011543752f,
-  0.011658417f, 0.011774206f, 0.011891132f, 0.012009204f,
-  0.012128435f, 0.012248835f, 0.012370415f, 0.012493186f,
-  0.012617161f, 0.012742349f, 0.012868764f, 0.012996417f,
-  0.013125318f, 0.013255481f, 0.013386918f, 0.01351964f,
-  0.013653659f, 0.013788989f, 0.01392564f, 0.014063627f,
-  0.014202961f, 0.014343656f, 0.014485724f, 0.014629178f,
-  0.014774032f, 0.014920298f, 0.01506799f, 0.015217121f,
-  0.015367706f, 0.015519757f, 0.015673288f, 0.015828314f,
-  0.015984848f, 0.016142905f, 0.016302499f, 0.016463645f,
-  0.016626356f, 0.016790648f, 0.016956536f, 0.017124033f,
-  0.017293157f, 0.01746392f, 0.01763634f, 0.017810432f,
-  0.01798621f, 0.018163691f, 0.018342891f, 0.018523825f,
-  0.01870651f, 0.018890962f, 0.019077197f, 0.019265233f,
-  0.019455085f, 0.01964677f, 0.019840306f, 0.020035709f,
-  0.020232997f, 0.020432187f, 0.020633297f, 0.020836345f,
-  0.021041347f, 0.021248323f, 0.02145729f, 0.021668266f,
-  0.021881271f, 0.022096322f, 0.022313439f, 0.022532639f,
-  0.022753943f, 0.02297737f, 0.023202938f, 0.023430668f,
-  0.023660578f, 0.023892689f, 0.024127021f, 0.024363594f,
-  0.024602428f, 0.024843544f, 0.025086962f, 0.025332703f,
-  0.025580788f, 0.025831239f, 0.026084075f, 0.02633932f,
-  0.026596994f, 0.026857119f, 0.027119717f, 0.027384811f,
-  0.027652422f, 0.027922574f, 0.028195288f, 0.028470588f,
-  0.028748496f, 0.029029036f, 0.029312231f, 0.029598104f,
-  0.02988668f, 0.030177981f, 0.030472033f, 0.030768859f,
-  0.031068484f, 0.031370932f, 0.031676228f, 0.031984397f,
-  0.032295465f, 0.032609455f, 0.032926395f, 0.033246309f,
-  0.033569223f, 0.033895164f, 0.034224158f, 0.03455623f,
-  0.034891409f, 0.035229719f, 0.035571189f, 0.035915846f,
-  0.036263716f, 0.036614828f, 0.036969209f, 0.037326887f,
-  0.037687891f, 0.038052247f, 0.038419986f, 0.038791134f,
-  0.039165723f, 0.03954378f, 0.039925334f, 0.040310415f,
-  0.040699054f, 0.041091278f, 0.041487119f, 0.041886607f,
-  0.042289772f, 0.042696644f, 0.043107255f, 0.043521635f,
-  0.043939815f, 0.044361828f, 0.044787703f, 0.045217473f,
-  0.045651171f, 0.046088827f, 0.046530475f, 0.046976146f,
-  0.047425873f, 0.04787969f, 0.048337629f, 0.048799723f,
-  0.049266006f, 0.049736512f, 0.050211273f, 0.050690325f,
-  0.051173701f, 0.051661435f, 0.052153563f, 0.052650118f,
-  0.053151136f, 0.053656652f, 0.0541667f, 0.054681317f,
-  0.055200538f, 0.055724398f, 0.056252934f, 0.056786181f,
-  0.057324176f, 0.057866955f, 0.058414556f, 0.058967013f,
-  0.059524366f, 0.06008665f, 0.060653903f, 0.061226163f,
-  0.061803466f, 0.062385851f, 0.062973356f, 0.063566018f,
-  0.064163876f, 0.064766969f, 0.065375333f, 0.065989009f,
-  0.066608036f, 0.067232451f, 0.067862294f, 0.068497604f,
-  0.06913842f, 0.069784783f, 0.070436731f, 0.071094304f,
-  0.071757542f, 0.072426485f, 0.073101173f, 0.073781647f,
-  0.074467945f, 0.075160109f, 0.07585818f, 0.076562197f,
-  0.077272202f, 0.077988235f, 0.078710337f, 0.079438549f,
-  0.080172912f, 0.080913467f, 0.081660255f, 0.082413318f,
-  0.083172696f, 0.083938432f, 0.084710566f, 0.085489139f,
-  0.086274194f, 0.087065772f, 0.087863915f, 0.088668663f,
-  0.089480059f, 0.090298145f, 0.091122961f, 0.09195455f,
-  0.092792953f, 0.093638212f, 0.094490369f, 0.095349465f,
-  0.096215542f, 0.097088641f, 0.097968804f, 0.098856073f,
-  0.099750489f, 0.100652094f, 0.101560928f, 0.102477033f,
-  0.103400451f, 0.104331223f, 0.10526939f, 0.106214992f,
-  0.10716807f, 0.108128667f, 0.109096821f, 0.110072574f,
-  0.111055967f, 0.112047039f, 0.11304583f, 0.114052381f,
-  0.115066732f, 0.116088922f, 0.117118991f, 0.118156978f,
-  0.119202922f, 0.120256862f, 0.121318838f, 0.122388887f,
-  0.123467048f, 0.124553358f, 0.125647857f, 0.12675058f,
-  0.127861566f, 0.128980852f, 0.130108474f, 0.131244469f,
-  0.132388874f, 0.133541723f, 0.134703052f, 0.135872897f,
-  0.137051293f, 0.138238273f, 0.139433873f, 0.140638126f,
-  0.141851065f, 0.143072723f, 0.144303134f, 0.145542329f,
-  0.14679034f, 0.148047198f, 0.149312935f, 0.15058758f,
-  0.151871164f, 0.153163716f, 0.154465265f, 0.15577584f,
-  0.157095469f, 0.158424179f, 0.159761997f, 0.16110895f,
-  0.162465063f, 0.163830361f, 0.16520487f, 0.166588614f,
-  0.167981615f, 0.169383897f, 0.170795482f, 0.172216392f,
-  0.173646647f, 0.175086268f, 0.176535275f, 0.177993686f,
-  0.179461519f, 0.180938793f, 0.182425524f, 0.183921727f,
-  0.185427419f, 0.186942614f, 0.188467325f, 0.190001566f,
-  0.191545349f, 0.193098684f, 0.194661584f, 0.196234056f,
-  0.197816111f, 0.199407757f, 0.201009f, 0.202619846f,
-  0.204240302f, 0.205870372f, 0.207510059f, 0.209159365f,
-  0.210818293f, 0.212486844f, 0.214165017f, 0.215852811f,
-  0.217550224f, 0.219257252f, 0.220973892f, 0.222700139f,
-  0.224435986f, 0.226181426f, 0.227936451f, 0.229701051f,
-  0.231475217f, 0.233258936f, 0.235052196f, 0.236854984f,
-  0.238667285f, 0.240489083f, 0.242320361f, 0.244161101f,
-  0.246011284f, 0.247870889f, 0.249739894f, 0.251618278f,
-  0.253506017f, 0.255403084f, 0.257309455f, 0.259225101f,
-  0.261149994f, 0.263084104f, 0.265027401f, 0.266979851f,
-  0.268941421f, 0.270912078f, 0.272891784f, 0.274880502f,
-  0.276878195f, 0.278884822f, 0.280900343f, 0.282924715f,
-  0.284957894f, 0.286999837f, 0.289050497f, 0.291109827f,
-  0.293177779f, 0.295254302f, 0.297339346f, 0.299432858f,
-  0.301534784f, 0.30364507f, 0.30576366f, 0.307890496f,
-  0.310025519f, 0.312168669f, 0.314319886f, 0.316479106f,
-  0.318646266f, 0.320821301f, 0.323004144f, 0.325194727f,
-  0.327392983f, 0.32959884f, 0.331812228f, 0.334033073f,
-  0.336261303f, 0.338496841f, 0.340739612f, 0.342989537f,
-  0.345246539f, 0.347510538f, 0.349781451f, 0.352059198f,
-  0.354343694f, 0.356634854f, 0.358932594f, 0.361236825f,
-  0.36354746f, 0.365864409f, 0.368187582f, 0.370516888f,
-  0.372852234f, 0.375193526f, 0.377540669f, 0.379893568f,
-  0.382252125f, 0.384616244f, 0.386985824f, 0.389360766f,
-  0.391740969f, 0.394126332f, 0.39651675f, 0.398912121f,
-  0.40131234f, 0.403717301f, 0.406126897f, 0.408541022f,
-  0.410959566f, 0.413382421f, 0.415809477f, 0.418240623f,
-  0.420675748f, 0.423114739f, 0.425557483f, 0.428003867f,
-  0.430453776f, 0.432907095f, 0.435363708f, 0.437823499f,
-  0.440286351f, 0.442752145f, 0.445220765f, 0.44769209f,
-  0.450166003f, 0.452642382f, 0.455121108f, 0.457602059f,
-  0.460085115f, 0.462570155f, 0.465057055f, 0.467545694f,
-  0.470035948f, 0.472527696f, 0.475020813f, 0.477515175f,
-  0.48001066f, 0.482507142f, 0.485004498f, 0.487502604f,
-  0.490001333f, 0.492500562f, 0.495000167f, 0.497500021f,
-  0.5f, 0.502499979f, 0.504999833f, 0.507499438f,
-  0.509998667f, 0.512497396f, 0.514995502f, 0.517492858f,
-  0.51998934f, 0.522484825f, 0.524979187f, 0.527472304f,
-  0.529964052f, 0.532454306f, 0.534942945f, 0.537429845f,
-  0.539914885f, 0.542397941f, 0.544878892f, 0.547357618f,
-  0.549833997f, 0.55230791f, 0.554779235f, 0.557247855f,
-  0.559713649f, 0.562176501f, 0.564636292f, 0.567092905f,
-  0.569546224f, 0.571996133f, 0.574442517f, 0.576885261f,
-  0.579324252f, 0.581759377f, 0.584190523f, 0.586617579f,
-  0.589040434f, 0.591458978f, 0.593873103f, 0.596282699f,
-  0.59868766f, 0.601087879f, 0.60348325f, 0.605873668f,
-  0.608259031f, 0.610639234f, 0.613014176f, 0.615383756f,
-  0.617747875f, 0.620106432f, 0.622459331f, 0.624806474f,
-  0.627147766f, 0.629483112f, 0.631812418f, 0.634135591f,
-  0.63645254f, 0.638763175f, 0.641067406f, 0.643365146f,
-  0.645656306f, 0.647940802f, 0.650218549f, 0.652489462f,
-  0.654753461f, 0.657010463f, 0.659260388f, 0.661503159f,
-  0.663738697f, 0.665966927f, 0.668187772f, 0.67040116f,
-  0.672607017f, 0.674805273f, 0.676995856f, 0.679178699f,
-  0.681353734f, 0.683520894f, 0.685680114f, 0.687831331f,
-  0.689974481f, 0.692109504f, 0.69423634f, 0.69635493f,
-  0.698465216f, 0.700567142f, 0.702660654f, 0.704745698f,
-  0.706822221f, 0.708890173f, 0.710949503f, 0.713000163f,
-  0.715042106f, 0.717075285f, 0.719099657f, 0.721115178f,
-  0.723121805f, 0.725119498f, 0.727108216f, 0.729087922f,
-  0.731058579f, 0.733020149f, 0.734972599f, 0.736915896f,
-  0.738850006f, 0.740774899f, 0.742690545f, 0.744596916f,
-  0.746493983f, 0.748381722f, 0.750260106f, 0.752129111f,
-  0.753988716f, 0.755838899f, 0.757679639f, 0.759510917f,
-  0.761332715f, 0.763145016f, 0.764947804f, 0.766741064f,
-  0.768524783f, 0.770298949f, 0.772063549f, 0.773818574f,
-  0.775564014f, 0.777299861f, 0.779026108f, 0.780742748f,
-  0.782449776f, 0.784147189f, 0.785834983f, 0.787513156f,
-  0.789181707f, 0.790840635f, 0.792489941f, 0.794129628f,
-  0.795759698f, 0.797380154f, 0.798991f, 0.800592243f,
-  0.802183889f, 0.803765944f, 0.805338416f, 0.806901316f,
-  0.808454651f, 0.809998434f, 0.811532675f, 0.813057386f,
-  0.814572581f, 0.816078273f, 0.817574476f, 0.819061207f,
-  0.820538481f, 0.822006314f, 0.823464725f, 0.824913732f,
-  0.826353353f, 0.827783608f, 0.829204518f, 0.830616103f,
-  0.832018385f, 0.833411386f, 0.83479513f, 0.836169639f,
-  0.837534937f, 0.83889105f, 0.840238003f, 0.841575821f,
-  0.842904531f, 0.84422416f, 0.845534735f, 0.846836284f,
-  0.848128836f, 0.84941242f, 0.850687065f, 0.851952802f,
-  0.85320966f, 0.854457671f, 0.855696866f, 0.856927277f,
-  0.858148935f, 0.859361874f, 0.860566127f, 0.861761727f,
-  0.862948707f, 0.864127103f, 0.865296948f, 0.866458277f,
-  0.867611126f, 0.868755531f, 0.869891526f, 0.871019148f,
-  0.872138434f, 0.87324942f, 0.874352143f, 0.875446642f,
-  0.876532952f, 0.877611113f, 0.878681162f, 0.879743138f,
-  0.880797078f, 0.881843022f, 0.882881009f, 0.883911078f,
-  0.884933268f, 0.885947619f, 0.88695417f, 0.887952961f,
-  0.888944033f, 0.889927426f, 0.890903179f, 0.891871333f,
-  0.89283193f, 0.893785008f, 0.89473061f, 0.895668777f,
-  0.896599549f, 0.897522967f, 0.898439072f, 0.899347906f,
-  0.900249511f, 0.901143927f, 0.902031196f, 0.902911359f,
-  0.903784458f, 0.904650535f, 0.905509631f, 0.906361788f,
-  0.907207047f, 0.90804545f, 0.908877039f, 0.909701855f,
-  0.910519941f, 0.911331337f, 0.912136085f, 0.912934228f,
-  0.913725806f, 0.914510861f, 0.915289434f, 0.916061568f,
-  0.916827304f, 0.917586682f, 0.918339745f, 0.919086533f,
-  0.919827088f, 0.920561451f, 0.921289663f, 0.922011765f,
-  0.922727798f, 0.923437803f, 0.92414182f, 0.924839891f,
-  0.925532055f, 0.926218353f, 0.926898827f, 0.927573515f,
-  0.928242458f, 0.928905696f, 0.929563269f, 0.930215217f,
-  0.93086158f, 0.931502396f, 0.932137706f, 0.932767549f,
-  0.933391964f, 0.934010991f, 0.934624667f, 0.935233031f,
-  0.935836124f, 0.936433982f, 0.937026644f, 0.937614149f,
-  0.938196534f, 0.938773837f, 0.939346097f, 0.93991335f,
-  0.940475634f, 0.941032987f, 0.941585444f, 0.942133045f,
-  0.942675824f, 0.943213819f, 0.943747066f, 0.944275602f,
-  0.944799462f, 0.945318683f, 0.9458333f, 0.946343348f,
-  0.946848864f, 0.947349882f, 0.947846437f, 0.948338565f,
-  0.948826299f, 0.949309675f, 0.949788727f, 0.950263488f,
-  0.950733994f, 0.951200277f, 0.951662371f, 0.95212031f,
-  0.952574127f, 0.953023854f, 0.953469525f, 0.953911173f,
-  0.954348829f, 0.954782527f, 0.955212297f, 0.955638172f,
-  0.956060185f, 0.956478365f, 0.956892745f, 0.957303356f,
-  0.957710228f, 0.958113393f, 0.958512881f, 0.958908722f,
-  0.959300946f, 0.959689585f, 0.960074666f, 0.96045622f,
-  0.960834277f, 0.961208866f, 0.961580014f, 0.961947753f,
-  0.962312109f, 0.962673113f, 0.963030791f, 0.963385172f,
-  0.963736284f, 0.964084154f, 0.964428811f, 0.964770281f,
-  0.965108591f, 0.96544377f, 0.965775842f, 0.966104836f,
-  0.966430777f, 0.966753691f, 0.967073605f, 0.967390545f,
-  0.967704535f, 0.968015603f, 0.968323772f, 0.968629068f,
-  0.968931516f, 0.969231141f, 0.969527967f, 0.969822019f,
-  0.97011332f, 0.970401896f, 0.970687769f, 0.970970964f,
-  0.971251504f, 0.971529412f, 0.971804712f, 0.972077426f,
-  0.972347578f, 0.972615189f, 0.972880283f, 0.973142881f,
-  0.973403006f, 0.97366068f, 0.973915925f, 0.974168761f,
-  0.974419212f, 0.974667297f, 0.974913038f, 0.975156456f,
-  0.975397572f, 0.975636406f, 0.975872979f, 0.976107311f,
-  0.976339422f, 0.976569332f, 0.976797062f, 0.97702263f,
-  0.977246057f, 0.977467361f, 0.977686561f, 0.977903678f,
-  0.978118729f, 0.978331734f, 0.97854271f, 0.978751677f,
-  0.978958653f, 0.979163655f, 0.979366703f, 0.979567813f,
-  0.979767003f, 0.979964291f, 0.980159694f, 0.98035323f,
-  0.980544915f, 0.980734767f, 0.980922803f, 0.981109038f,
-  0.98129349f, 0.981476175f, 0.981657109f, 0.981836309f,
-  0.98201379f, 0.982189568f, 0.98236366f, 0.98253608f,
-  0.982706843f, 0.982875967f, 0.983043464f, 0.983209352f,
-  0.983373644f, 0.983536355f, 0.983697501f, 0.983857095f,
-  0.984015152f, 0.984171686f, 0.984326712f, 0.984480243f,
-  0.984632294f, 0.984782879f, 0.98493201f, 0.985079702f,
-  0.985225968f, 0.985370822f, 0.985514276f, 0.985656344f,
-  0.985797039f, 0.985936373f, 0.98607436f, 0.986211011f,
-  0.986346341f, 0.98648036f, 0.986613082f, 0.986744519f,
-  0.986874682f, 0.987003583f, 0.987131236f, 0.987257651f,
-  0.987382839f, 0.987506814f, 0.987629585f, 0.987751165f,
-  0.987871565f, 0.987990796f, 0.988108868f, 0.988225794f,
-  0.988341583f, 0.988456248f, 0.988569797f, 0.988682242f,
-  0.988793594f, 0.988903862f, 0.989013057f, 0.98912119f,
-  0.98922827f, 0.989334307f, 0.989439312f, 0.989543294f,
-  0.989646262f, 0.989748228f, 0.989849199f, 0.989949186f,
-  0.990048198f, 0.990146244f, 0.990243334f, 0.990339477f,
-  0.990434681f, 0.990528956f, 0.990622311f, 0.990714754f,
-  0.990806295f, 0.990896941f, 0.990986701f, 0.991075585f,
-  0.9911636f, 0.991250754f, 0.991337056f, 0.991422515f,
-  0.991507137f, 0.991590932f, 0.991673907f, 0.99175607f,
-  0.991837429f, 0.991917991f, 0.991997765f, 0.992076758f,
-  0.992154977f, 0.99223243f, 0.992309124f, 0.992385067f,
-  0.992460265f, 0.992534727f, 0.992608459f, 0.992681467f,
-  0.99275376f, 0.992825344f, 0.992896226f, 0.992966413f,
-  0.993035911f, 0.993104727f, 0.993172868f, 0.993240339f,
-  0.993307149f, 0.993373303f, 0.993438807f, 0.993503668f,
-  0.993567892f, 0.993631484f, 0.993694453f, 0.993756802f,
-  0.993818539f, 0.993879669f, 0.993940199f, 0.994000133f,
-  0.994059478f, 0.994118239f, 0.994176423f, 0.994234034f,
-  0.994291079f, 0.994347563f, 0.994403491f, 0.994458868f,
-  0.994513701f, 0.994567994f, 0.994621753f, 0.994674982f,
-  0.994727688f, 0.994779874f, 0.994831547f, 0.994882711f,
-  0.994933371f, 0.994983532f, 0.995033198f, 0.995082376f,
-  0.995131069f, 0.995179282f, 0.99522702f, 0.995274287f,
-  0.995321089f, 0.995367429f, 0.995413313f, 0.995458744f,
-  0.995503727f, 0.995548266f, 0.995592367f, 0.995636032f,
-  0.995679266f, 0.995722075f, 0.99576446f, 0.995806428f,
-  0.995847981f, 0.995889125f, 0.995929862f, 0.995970198f,
-  0.996010135f, 0.996049678f, 0.99608883f, 0.996127597f,
-  0.99616598f, 0.996203984f, 0.996241613f, 0.996278871f,
-  0.99631576f, 0.996352285f, 0.996388449f, 0.996424256f,
-  0.99645971f, 0.996494813f, 0.996529569f, 0.996563982f,
-  0.996598054f, 0.99663179f, 0.996665193f, 0.996698265f,
-  0.99673101f, 0.996763432f, 0.996795533f, 0.996827317f,
-  0.996858787f, 0.996889945f, 0.996920795f, 0.996951341f,
-  0.996981584f, 0.997011528f, 0.997041175f, 0.99707053f,
-  0.997099594f, 0.997128371f, 0.997156863f, 0.997185073f,
-  0.997213004f, 0.997240658f, 0.997268039f, 0.997295149f,
-  0.997321991f, 0.997348567f, 0.99737488f, 0.997400932f,
-  0.997426727f, 0.997452266f, 0.997477553f, 0.997502589f,
-  0.997527377f, 0.99755192f, 0.997576219f, 0.997600279f,
-  0.997624099f, 0.997647684f, 0.997671036f, 0.997694156f,
-  0.997717047f, 0.997739712f, 0.997762151f, 0.997784369f,
-  0.997806367f, 0.997828146f, 0.99784971f, 0.99787106f,
-  0.997892199f, 0.997913128f, 0.99793385f, 0.997954366f,
-  0.99797468f, 0.997994791f, 0.998014704f, 0.998034419f,
-  0.998053939f, 0.998073265f, 0.9980924f, 0.998111345f,
-  0.998130102f, 0.998148674f, 0.998167061f, 0.998185266f,
-  0.99820329f, 0.998221136f, 0.998238805f, 0.998256299f,
-  0.998273619f, 0.998290767f, 0.998307746f, 0.998324556f,
-  0.998341199f, 0.998357677f, 0.998373992f, 0.998390145f,
-  0.998406138f, 0.998421972f, 0.998437649f, 0.998453171f,
-  0.998468538f, 0.998483753f, 0.998498818f, 0.998513733f,
-  0.998528499f, 0.99854312f, 0.998557595f, 0.998571927f,
-  0.998586116f, 0.998600165f, 0.998614074f, 0.998627845f,
-  0.99864148f, 0.998654979f, 0.998668345f, 0.998681577f,
-  0.998694679f, 0.99870765f, 0.998720493f, 0.998733208f,
-  0.998745797f, 0.998758261f, 0.998770601f, 0.998782819f,
-  0.998794916f, 0.998806892f, 0.99881875f, 0.99883049f,
-  0.998842113f, 0.998853621f, 0.998865015f, 0.998876295f,
-  0.998887464f, 0.998898522f, 0.99890947f, 0.998920309f,
-  0.99893104f, 0.998941666f, 0.998952185f, 0.9989626f,
-  0.998972912f, 0.998983121f, 0.998993229f, 0.999003237f,
-  0.999013145f, 0.999022955f, 0.999032667f, 0.999042283f,
-  0.999051803f, 0.999061229f, 0.999070561f, 0.999079801f,
-  0.999088949f, 0.999098006f, 0.999106973f, 0.999115851f,
-  0.99912464f, 0.999133343f, 0.999141959f, 0.999150489f,
-  0.999158935f, 0.999167297f, 0.999175575f, 0.999183772f,
-  0.999191887f, 0.999199921f, 0.999207876f, 0.999215751f,
-  0.999223549f, 0.999231269f, 0.999238912f, 0.999246479f,
-  0.999253971f, 0.999261389f, 0.999268733f, 0.999276004f,
-  0.999283202f, 0.99929033f, 0.999297386f, 0.999304372f,
-  0.999311289f, 0.999318137f, 0.999324917f, 0.99933163f,
-  0.999338276f, 0.999344856f, 0.99935137f, 0.99935782f,
-  0.999364206f, 0.999370528f, 0.999376788f, 0.999382985f,
-  0.999389121f, 0.999395195f, 0.99940121f, 0.999407164f,
-  0.99941306f, 0.999418896f, 0.999424675f, 0.999430396f,
-  0.999436061f, 0.999441669f, 0.999447221f, 0.999452719f,
-  0.999458161f, 0.99946355f, 0.999468885f, 0.999474167f,
-  0.999479396f, 0.999484573f, 0.999489699f, 0.999494774f,
-  0.999499799f, 0.999504774f, 0.999509699f, 0.999514575f,
-  0.999519403f, 0.999524182f, 0.999528915f, 0.9995336f,
-  0.999538238f, 0.999542831f, 0.999547378f, 0.999551879f,
-  0.999556336f, 0.999560749f, 0.999565118f, 0.999569443f,
-  0.999573725f, 0.999577965f, 0.999582162f, 0.999586318f,
-  0.999590433f, 0.999594506f, 0.99959854f, 0.999602533f,
-  0.999606486f, 0.9996104f, 0.999614275f, 0.999618112f,
-  0.99962191f, 0.999625671f, 0.999629394f, 0.99963308f,
-  0.99963673f, 0.999640343f, 0.99964392f, 0.999647462f,
-  0.999650969f, 0.99965444f, 0.999657878f, 0.999661281f,
-  0.99966465f, 0.999667986f, 0.999671288f, 0.999674558f,
-  0.999677795f, 0.999681f, 0.999684173f, 0.999687315f,
-  0.999690425f, 0.999693504f, 0.999696553f, 0.999699571f,
-  0.99970256f, 0.999705519f, 0.999708448f, 0.999711348f,
-  0.999714219f, 0.999717062f, 0.999719877f, 0.999722663f,
-  0.999725422f, 0.999728153f, 0.999730857f, 0.999733535f,
-  0.999736185f, 0.99973881f, 0.999741408f, 0.99974398f,
-  0.999746527f, 0.999749049f, 0.999751545f, 0.999754016f,
-  0.999756463f, 0.999758886f, 0.999761285f, 0.999763659f,
-  0.99976601f, 0.999768338f, 0.999770643f, 0.999772924f,
-  0.999775183f, 0.99977742f, 0.999779634f, 0.999781826f,
-  0.999783997f, 0.999786145f, 0.999788273f, 0.999790379f,
-  0.999792464f, 0.999794529f, 0.999796573f, 0.999798597f,
-  0.9998006f, 0.999802584f, 0.999804548f, 0.999806492f,
-  0.999808417f, 0.999810323f, 0.99981221f, 0.999814078f,
-  0.999815928f, 0.999817759f, 0.999819572f, 0.999821367f,
-  0.999823144f, 0.999824904f, 0.999826646f, 0.99982837f,
-  0.999830078f, 0.999831768f, 0.999833442f, 0.999835099f,
-  0.999836739f, 0.999838364f, 0.999839972f, 0.999841564f,
-  0.99984314f, 0.999844701f, 0.999846246f, 0.999847775f,
-  0.99984929f, 0.999850789f, 0.999852273f, 0.999853743f,
-  0.999855198f, 0.999856639f, 0.999858065f, 0.999859477f,
-  0.999860875f, 0.999862259f, 0.99986363f, 0.999864986f,
-  0.99986633f, 0.999867659f, 0.999868976f, 0.99987028f,
-  0.99987157f, 0.999872848f, 0.999874113f, 0.999875365f,
-  0.999876605f, 0.999877833f, 0.999879049f, 0.999880252f,
-  0.999881443f, 0.999882623f, 0.999883791f, 0.999884947f,
-  0.999886091f, 0.999887225f, 0.999888347f, 0.999889458f,
-  0.999890557f, 0.999891646f, 0.999892724f, 0.999893791f,
-  0.999894848f, 0.999895894f, 0.99989693f, 0.999897956f,
-  0.999898971f, 0.999899976f, 0.999900971f, 0.999901956f,
-  0.999902932f, 0.999903898f, 0.999904854f, 0.9999058f,
-  0.999906738f, 0.999907665f, 0.999908584f, 0.999909494f,
-  0.999910394f, 0.999911286f, 0.999912168f, 0.999913042f,
-  0.999913907f, 0.999914764f, 0.999915612f, 0.999916452f,
-  0.999917283f, 0.999918106f, 0.999918921f, 0.999919727f,
-  0.999920526f, 0.999921317f, 0.999922099f, 0.999922875f,
-  0.999923642f, 0.999924402f, 0.999925154f, 0.999925898f,
-  0.999926636f, 0.999927366f, 0.999928088f, 0.999928804f,
-  0.999929512f, 0.999930213f, 0.999930908f, 0.999931595f,
-  0.999932276f, 0.99993295f, 0.999933617f, 0.999934277f,
-  0.999934931f, 0.999935579f, 0.99993622f, 0.999936854f,
-  0.999937482f, 0.999938104f, 0.99993872f, 0.99993933f,
-  0.999939934f, 0.999940531f, 0.999941123f, 0.999941709f,
-  0.999942289f, 0.999942863f, 0.999943431f, 0.999943994f,
-  0.999944551f, 0.999945103f, 0.999945649f, 0.99994619f,
-  0.999946726f, 0.999947256f, 0.99994778f, 0.9999483f,
-  0.999948814f, 0.999949324f, 0.999949828f, 0.999950327f,
-  0.999950821f, 0.999951311f, 0.999951795f, 0.999952275f,
-  0.999952749f, 0.99995322f, 0.999953685f, 0.999954146f,
-  0.999954602f
-};
-}  // namespace tesseract