From 7ec4fd7a561eb52454eb0dcb0acbfdf8d1a0e6bd Mon Sep 17 00:00:00 2001
From: "theraysmith@gmail.com"
 <theraysmith@gmail.com@d0cd1f9f-072b-0410-8dd7-cf729c803f20>
Date: Fri, 8 Nov 2013 20:30:56 +0000
Subject: [PATCH] Refactorerd control functions to enable parallel blob
 classification

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@904 d0cd1f9f-072b-0410-8dd7-cf729c803f20
---
 ccmain/Makefile.am        |   2 +-
 ccmain/applybox.cpp       |  10 +-
 ccmain/control.cpp        | 432 +++++++++++++++++++++++---------------
 ccmain/cube_control.cpp   |  12 +-
 ccmain/fixspace.cpp       |   5 +-
 ccmain/pgedit.cpp         |  20 +-
 ccmain/recogtraining.cpp  |   4 +-
 ccmain/tesseractclass.cpp |   3 +-
 ccmain/tesseractclass.h   |  48 ++++-
 ccstruct/blobs.cpp        |  36 +++-
 ccstruct/blobs.h          |   6 +-
 ccstruct/pageres.cpp      |  49 ++---
 ccstruct/pageres.h        |  24 +--
 ccstruct/ratngs.cpp       |   3 +-
 classify/adaptmatch.cpp   | 387 +++++++++-------------------------
 classify/classify.cpp     |  14 --
 classify/classify.h       |  74 ++-----
 classify/intfx.cpp        |  30 +--
 classify/intfx.h          |   6 +-
 classify/intmatcher.cpp   |  24 +--
 classify/intmatcher.h     |   9 +-
 classify/picofeat.cpp     |  14 +-
 dict/dict.cpp             |   5 +-
 dict/dict.h               |   2 +
 wordrec/chopper.cpp       |  34 ++-
 25 files changed, 580 insertions(+), 673 deletions(-)
diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am
index d3f3a70ca..eb4358085 100644
--- a/ccmain/Makefile.am
+++ b/ccmain/Makefile.am
@@ -46,7 +46,7 @@ libtesseract_main_la_SOURCES = \
     docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \
     imgscale.cpp ltrresultiterator.cpp \
     osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \
-    pagewalk.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
+    pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
     reject.cpp resultiterator.cpp scaleimg.cpp superscript.cpp \
     tesseract_cube_combiner.cpp \
     tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \
diff --git a/ccmain/applybox.cpp b/ccmain/applybox.cpp
index ce46053a4..d8723854c 100644
--- a/ccmain/applybox.cpp
+++ b/ccmain/applybox.cpp
@@ -241,10 +241,12 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
 void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
                                   BLOCK* block, ROW* row,
                                   WERD_RES* word_res) {
-  if (!word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                         textord_use_cjk_fp_model,
-                                         poly_allow_detailed_fx,
-                                         row, block)) {
+  if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
+                                     tessedit_ocr_engine_mode, NULL,
+                                     classify_bln_numeric_mode,
+                                     textord_use_cjk_fp_model,
+                                     poly_allow_detailed_fx,
+                                     row, block)) {
     word_res->CloneChoppedToRebuild();
     return;
   }
diff --git a/ccmain/control.cpp b/ccmain/control.cpp
index 0110f2970..c60a3d118 100644
--- a/ccmain/control.cpp
+++ b/ccmain/control.cpp
@@ -97,8 +97,9 @@ BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) {
   inT16 char_qual;
   inT16 good_char_qual;
 
-  classify_word_and_language(&Tesseract::classify_word_pass2,
-                             block, row, word_res);
+  WordData word_data(block, row, word_res);
+  SetupWordPassN(2, &word_data);
+  classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
   if (tessedit_debug_quality_metrics) {
     word_char_quality(word_res, row, &char_qual, &good_char_qual);
     tprintf
@@ -153,6 +154,111 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box,
   return true;
 }
 
+// If tesseract is to be run, sets the words up ready for it.
+void Tesseract::SetupAllWordsPassN(int pass_n,
+                                   const TBOX* target_word_box,
+                                   const char* word_config,
+                                   PAGE_RES* page_res,
+                                   GenericVector<WordData>* words) {
+  // Prepare all the words.
+  PAGE_RES_IT page_res_it(page_res);
+  for (page_res_it.restart_page(); page_res_it.word() != NULL;
+       page_res_it.forward()) {
+    if (pass_n == 1)
+      page_res_it.word()->SetupFake(unicharset);
+    if (target_word_box == NULL ||
+        ProcessTargetWord(page_res_it.word()->word->bounding_box(),
+                          *target_word_box, word_config, 1)) {
+      words->push_back(WordData(page_res_it));
+    }
+  }
+  // Setup all the words for recognition with polygonal approximation.
+  for (int w = 0; w < words->size(); ++w) {
+    SetupWordPassN(pass_n, &(*words)[w]);
+    if (w > 0) (*words)[w].prev_word = &(*words)[w - 1];
+  }
+}
+
+// Sets up the single word ready for whichever engine is to be run.
+void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
+  if (pass_n == 1 || !word->word->done || tessedit_training_tess) {
+    if (pass_n == 2) {
+      // TODO(rays) Should we do this on pass1 too?
+      word->word->caps_height = 0.0;
+      if (word->word->x_height == 0.0f)
+        word->word->x_height = word->row->x_height();
+    }
+    // Cube doesn't get setup for pass2.
+    if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
+      word->word->SetupForRecognition(
+            unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
+            classify_bln_numeric_mode, textord_use_cjk_fp_model,
+            poly_allow_detailed_fx, word->row, word->block);
+    }
+  }
+  if (!sub_langs_.empty()) {
+    if (word->lang_words.size() != sub_langs_.size()) {
+      // Setup the words for all the sub-languages now.
+      WERD_RES empty;
+      word->lang_words.init_to_size(sub_langs_.size(), empty);
+    }
+    for (int s = 0; s < sub_langs_.size(); ++s) {
+      Tesseract* lang_t = sub_langs_[s];
+      if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY &&
+          (!word->lang_words[s].done || lang_t->tessedit_training_tess))) {
+        word->lang_words[s].InitForRetryRecognition(*word->word);
+        word->lang_words[s].SetupForRecognition(
+              lang_t->unicharset, lang_t, BestPix(),
+              lang_t->tessedit_ocr_engine_mode, NULL,
+              lang_t->classify_bln_numeric_mode,
+              lang_t->textord_use_cjk_fp_model,
+              lang_t->poly_allow_detailed_fx, word->row, word->block);
+      }
+    }
+  }
+}
+
+
+// Runs word recognition on all the words.
+bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
+                                   GenericVector<WordData>* words) {
+  // TODO(rays) Before this loop can be parallelized (it would yield a massive
+  // speed-up) all remaining member globals need to be converted to local/heap
+  // (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be
+  // added. The results will be significantly different with adaption on, and
+  // deterioration will need investigation.
+  for (int w = 0; w < words->size(); ++w) {
+    WordData* word = &(*words)[w];
+    if (monitor != NULL) {
+      monitor->ocr_alive = TRUE;
+      if (pass_n == 1)
+        monitor->progress = 30 + 50 * w / words->size();
+      else
+        monitor->progress = 80 + 10 * w / words->size();
+      if (monitor->deadline_exceeded() ||
+          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
+                                                         words->size()))) {
+        // Timeout. Fake out the rest of the words.
+        for (; w < words->size(); ++w) {
+          (*words)[w].word->SetupFake(unicharset);
+        }
+        return false;
+      }
+    }
+    if (word->word->tess_failed) continue;
+    WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1
+                                            : &Tesseract::classify_word_pass2;
+    classify_word_and_language(recognizer, word);
+    if (tessedit_dump_choices) {
+      word_dumper(NULL, word->row, word->word);
+      tprintf("Pass%d: %s [%s]\n", pass_n,
+              word->word->best_choice->unichar_string().string(),
+              word->word->best_choice->debug_string().string());
+    }
+  }
+  return true;
+}
+
 /**
  * recog_all_words()
  *
@@ -179,27 +285,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
                                 const TBOX* target_word_box,
                                 const char* word_config,
                                 int dopasses) {
-  PAGE_RES_IT page_res_it;
-  inT32 word_index;              // current word
+  PAGE_RES_IT page_res_it(page_res);
 
   if (tessedit_minimal_rej_pass1) {
     tessedit_test_adaption.set_value (TRUE);
     tessedit_minimal_rejection.set_value (TRUE);
   }
 
-  // Before the main recognition loop below, walk through the whole page and set
-  // up fake words.  That way, if we run out of time a user will still get the
-  // expected best_choice and box_words out the end; they'll just be empty.
-  page_res_it.page_res = page_res;
-  for (page_res_it.restart_page(); page_res_it.word() != NULL;
-       page_res_it.forward()) {
-    page_res_it.word()->SetupFake(unicharset);
-  }
-
   if (dopasses==0 || dopasses==1) {
-    page_res_it.page_res=page_res;
     page_res_it.restart_page();
-
     // ****************** Pass 1 *******************
 
     // Clear adaptive classifier at the beginning of the page if it is full.
@@ -214,20 +308,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
       if (sub_langs_[i]->AdaptiveClassifierIsFull())
         sub_langs_[i]->ResetAdaptiveClassifierInternal();
     }
-
-    stats_.word_count = 0;
-    if (monitor != NULL) {
-      monitor->ocr_alive = TRUE;
-      while (page_res_it.word() != NULL) {
-        stats_.word_count++;
-        page_res_it.forward();
-      }
-      page_res_it.restart_page();
-    } else {
-      stats_.word_count = 1;
+    // Set up all words ready for recognition, so that if parallelism is on
+    // all the input and output classes are ready to run the classifier.
+    GenericVector<WordData> words;
+    SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words);
+    if (tessedit_parallelize) {
+      PrerecAllWordsPar(words);
     }
 
-    word_index = 0;
+    stats_.word_count = words.size();
 
     stats_.dict_words = 0;
     stats_.doc_blob_quality = 0;
@@ -237,56 +326,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
     stats_.doc_good_char_quality = 0;
 
     most_recently_used_ = this;
+    // Run pass 1 word recognition.
+    if (!RecogAllWordsPassN(1, monitor, &words)) return false;
+    // Pass 1 post-processing.
     while (page_res_it.word() != NULL) {
-      set_global_loc_code(LOC_PASS1);
-      word_index++;
-      if (monitor != NULL) {
-        monitor->ocr_alive = TRUE;
-        monitor->progress = 30 + 50 * word_index / stats_.word_count;
-        if (monitor->deadline_exceeded() ||
-            (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
-                                                           stats_.dict_words)))
-          return false;
-      }
-      if (target_word_box &&
-          !ProcessTargetWord(page_res_it.word()->word->bounding_box(),
-                             *target_word_box, word_config, 1)) {
-        page_res_it.forward();
-        continue;
-      }
-      classify_word_and_language(&Tesseract::classify_word_pass1,
-                                 page_res_it.block()->block,
-                                 page_res_it.row()->row,
-                                 page_res_it.word());
       if (page_res_it.word()->word->flag(W_REP_CHAR)) {
         fix_rep_char(&page_res_it);
         page_res_it.forward();
         continue;
       }
-      if (tessedit_dump_choices) {
-        word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
-        tprintf("Pass1: %s [%s]\n",
-                page_res_it.word()->best_choice->unichar_string().string(),
-                page_res_it.word()->best_choice->debug_string().string());
-      }
-
-      // tessedit_test_adaption enables testing of the accuracy of the
-      // input to the adaptive classifier.
-      if (tessedit_test_adaption && !tessedit_minimal_rejection) {
-        if (!word_adaptable (page_res_it.word(),
-          tessedit_test_adaption_mode)) {
-          page_res_it.word()->reject_map.rej_word_tess_failure();
-          // FAKE PERM REJ
-        } else {
-          // Override rejection mechanisms for this word.
-          UNICHAR_ID space = unicharset.unichar_to_id(" ");
-          for (int i = 0; i < page_res_it.word()->best_choice->length(); i++) {
-            if ((page_res_it.word()->best_choice->unichar_id(i) != space) &&
-                page_res_it.word()->reject_map[i].rejected())
-              page_res_it.word()->reject_map[i].setrej_minimal_rej_accept();
-          }
-        }
-      }
 
       // Count dict words.
       if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)
@@ -307,49 +355,26 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
   if (dopasses == 1) return true;
 
   // ****************** Pass 2 *******************
-  page_res_it.restart_page();
-  word_index = 0;
-  most_recently_used_ = this;
-  while (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption &&
-      page_res_it.word() != NULL) {
-    set_global_loc_code(LOC_PASS2);
-    word_index++;
-    if (monitor != NULL) {
-      monitor->ocr_alive = TRUE;
-      monitor->progress = 80 + 10 * word_index / stats_.word_count;
-      if (monitor->deadline_exceeded() ||
-          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
-                                                         stats_.dict_words)))
-        return false;
+  if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) {
+    page_res_it.restart_page();
+    GenericVector<WordData> words;
+    SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);
+    if (tessedit_parallelize) {
+      PrerecAllWordsPar(words);
     }
-
-    // changed by jetsoft
-    // specific to its needs to extract one word when need
-    if (target_word_box &&
-        !ProcessTargetWord(page_res_it.word()->word->bounding_box(),
-                           *target_word_box, word_config, 2)) {
+    most_recently_used_ = this;
+    // Run pass 2 word recognition.
+    if (!RecogAllWordsPassN(2, monitor, &words)) return false;
+    // Pass 2 post-processing.
+    while (page_res_it.word() != NULL) {
+      WERD_RES* word = page_res_it.word();
+       if (word->word->flag(W_REP_CHAR) && !word->done) {
+        fix_rep_char(&page_res_it);
+        page_res_it.forward();
+        continue;
+      }
       page_res_it.forward();
-      continue;
     }
-    // end jetsoft
-
-    classify_word_and_language(&Tesseract::classify_word_pass2,
-                               page_res_it.block()->block,
-                               page_res_it.row()->row,
-                               page_res_it.word());
-    if (page_res_it.word()->word->flag(W_REP_CHAR) &&
-        !page_res_it.word()->done) {
-      fix_rep_char(&page_res_it);
-      page_res_it.forward();
-      continue;
-    }
-    if (tessedit_dump_choices) {
-      word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
-      tprintf("Pass2: %s [%s]\n",
-              page_res_it.word()->best_choice->unichar_string().string(),
-              page_res_it.word()->best_choice->debug_string().string());
-    }
-    page_res_it.forward();
   }
 
   // The next passes can only be run if tesseract has been used, as cube
@@ -384,6 +409,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
     // Check the correctness of the final results.
     blamer_pass(page_res);
   }
+  script_pos_pass(page_res);
 
   // Write results pass.
   set_global_loc_code(LOC_WRITE_RESULTS);
@@ -672,6 +698,46 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) {
   }
 }
 
+// Sets script positions and detects smallcaps on all output words.
+void Tesseract::script_pos_pass(PAGE_RES* page_res) {
+  PAGE_RES_IT page_res_it(page_res);
+  for (page_res_it.restart_page(); page_res_it.word() != NULL;
+      page_res_it.forward()) {
+    WERD_RES* word = page_res_it.word();
+     if (word->word->flag(W_REP_CHAR)) {
+      page_res_it.forward();
+      continue;
+    }
+    float x_height = page_res_it.block()->block->x_height();
+    float word_x_height = word->x_height;
+    if (word_x_height < word->best_choice->min_x_height() ||
+        word_x_height > word->best_choice->max_x_height()) {
+      word_x_height = (word->best_choice->min_x_height() +
+          word->best_choice->max_x_height()) / 2.0f;
+    }
+    // Test for small caps. Word capheight must be close to block xheight,
+    // and word must contain no lower case letters, and at least one upper case.
+    double small_cap_xheight = x_height * kXHeightCapRatio;
+    double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
+    if (word->uch_set->script_has_xheight() &&
+        small_cap_xheight - small_cap_delta <= word_x_height &&
+        word_x_height <= small_cap_xheight + small_cap_delta) {
+      // Scan for upper/lower.
+      int num_upper = 0;
+      int num_lower = 0;
+      for (int i = 0; i < word->best_choice->length(); ++i) {
+        if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))
+          ++num_upper;
+        else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))
+          ++num_lower;
+      }
+      if (num_upper > 0 && num_lower == 0)
+        word->small_caps = true;
+    }
+    word->SetScriptPositions();
+  }
+}
+
 // Helper returns true if the new_word is better than the word, using a
 // simple test of better certainty AND rating (to reduce false positives
 // from cube) or a dictionary vs non-dictionary word.
@@ -701,38 +767,33 @@ static bool NewWordBetter(const WERD_RES& word, const WERD_RES& new_word,
 
 // Helper to recognize the word using the given (language-specific) tesseract.
 // Returns true if the result was better than previously.
-bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
+bool Tesseract::RetryWithLanguage(const WERD_RES& best_word,
+                                  WordData* word_data, WERD_RES* word,
                                   WordRecognizer recognizer) {
   if (classify_debug_level || cube_debug_level) {
     tprintf("Retrying word using lang %s, oem %d\n",
             lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
   }
-  // Setup a trial WERD_RES in which to classify.
-  WERD_RES lang_word;
-  lang_word.InitForRetryRecognition(*word);
   // Run the recognizer on the word.
   // Initial version is a bit of a hack based on better certainty and rating
   // (to reduce false positives from cube) or a dictionary vs non-dictionary
   // word.
-  (this->*recognizer)(block, row, &lang_word);
-  bool new_is_better = NewWordBetter(*word, lang_word,
+  (this->*recognizer)(word_data, word);
+  bool new_is_better = NewWordBetter(best_word, *word,
                                      classify_max_rating_ratio,
                                      classify_max_certainty_margin);
   if (classify_debug_level || cube_debug_level) {
-    if (lang_word.best_choice == NULL) {
-      tprintf("New result %s better:%s\n",
+    if (word->best_choice == NULL) {
+      tprintf("NULL result %s better!\n",
               new_is_better ? "IS" : "NOT");
     } else {
       tprintf("New result %s better:%s, r=%g, c=%g\n",
               new_is_better ? "IS" : "NOT",
-              lang_word.best_choice->unichar_string().string(),
-              lang_word.best_choice->rating(),
-              lang_word.best_choice->certainty());
+              word->best_choice->unichar_string().string(),
+              word->best_choice->rating(),
+              word->best_choice->certainty());
     }
   }
-  if (new_is_better) {
-    word->ConsumeWordResults(&lang_word);
-  }
   return new_is_better;
 }
 
@@ -743,9 +804,9 @@ bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
 // If recognition was not successful, tries all available languages until
 // it gets a successful result or runs out of languages. Keeps the best result.
 void Tesseract::classify_word_and_language(WordRecognizer recognizer,
-                                           BLOCK* block,
-                                           ROW *row,
-                                           WERD_RES *word) {
+                                           WordData* word_data) {
+  // Points to the best result. May be word or in lang_words.
+  WERD_RES* word = word_data->word;
   clock_t start_t = clock();
   if (classify_debug_level || cube_debug_level) {
     tprintf("Processing word with lang %s at:",
@@ -755,15 +816,23 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
   const char* result_type = "Initial";
   bool initially_done = !word->tess_failed && word->done;
   if (initially_done) {
-    // If done on pass1, we reuse the tesseract that did it, and don't try
-    // any more. The only need to call the classifier at all is for the
-    // cube combiner and xheight fixing (which may be bogus on a done word.)
+    // If done on pass1, leave it as-is.
     most_recently_used_ = word->tesseract;
     result_type = "Already done";
+  } else {
+    if (most_recently_used_ != this) {
+      // Point to the word for most_recently_used_.
+      for (int s = 0; s < sub_langs_.size(); ++s) {
+        if (most_recently_used_ == sub_langs_[s]) {
+          word = &word_data->lang_words[s];
+          break;
+        }
+      }
+    }
+    (most_recently_used_->*recognizer)(word_data, word);
+    if (!word->tess_failed && word->tess_accepted)
+      result_type = "Accepted";
   }
-  (most_recently_used_->*recognizer)(block, row, word);
-  if (!word->tess_failed && word->tess_accepted)
-    result_type = "Accepted";
   if (classify_debug_level || cube_debug_level) {
     tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d"
             " xht=[%g,%g]\n",
@@ -782,11 +851,31 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
       if (classify_debug_level) {
         tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string());
       }
-      if (RetryWithLanguage(word, block, row, recognizer)) {
-        most_recently_used_ = this;
-        if (!word->tess_failed && word->tess_accepted)
-          return;  // No need to look at the others.
+      if (word_data->word->tesseract == this) {
+        // This is pass1, and we are trying the main language.
+        if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) {
+          most_recently_used_ = this;
+          word = word_data->word;
+        }
+      } else {
+        // This is pass2, and we are trying the main language again, but it
+        // has no word allocated to it, so we must re-initialize it.
+        WERD_RES main_word(*word_data->word);
+        main_word.InitForRetryRecognition(*word_data->word);
+        main_word.SetupForRecognition(unicharset, this, BestPix(),
+                                      tessedit_ocr_engine_mode, NULL,
+                                      classify_bln_numeric_mode,
+                                      textord_use_cjk_fp_model,
+                                      poly_allow_detailed_fx,
+                                      word_data->row, word_data->block);
+        if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) {
+          most_recently_used_ = this;
+          word_data->word->ConsumeWordResults(&main_word);
+          word = word_data->word;
+        }
       }
+      if (!word->tess_failed && word->tess_accepted)
+        return;  // No need to look at the others.
     }
 
     for (int i = 0; i < sub_langs_.size(); ++i) {
@@ -795,14 +884,21 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
           tprintf("Retrying with sub-Tesseract[%d] lang: %s\n",
                   i, sub_langs_[i]->lang.string());
         }
-        if (sub_langs_[i]->RetryWithLanguage(word, block, row, recognizer)) {
+        if (sub_langs_[i]->RetryWithLanguage(*word, word_data,
+                                             &word_data->lang_words[i],
+                                             recognizer)) {
           most_recently_used_ = sub_langs_[i];
+          word = &word_data->lang_words[i];
           if (!word->tess_failed && word->tess_accepted)
-            return;  // No need to look at the others.
+            break;  // No need to look at the others.
         }
       }
     }
   }
+  if (word != word_data->word) {
+    // Move the result for the best language to the main word.
+    word_data->word->ConsumeWordResults(word);
+  }
   clock_t ocr_t = clock();
   if (tessedit_timing_debug) {
     tprintf("%s (ocr took %.2f sec)\n",
@@ -817,7 +913,11 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
  * Baseline normalize the word and pass it to Tess.
  */
 
-void Tesseract::classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
+void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) {
+  ROW* row = word_data->row;
+  BLOCK* block = word_data->block;
+  prev_word_best_choice_ = word_data->prev_word != NULL
+      ? word_data->prev_word->word->best_choice : NULL;
   // If we only intend to run cube - run it and return.
   if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
     cube_word_pass1(block, row, word);
@@ -880,6 +980,10 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
     }
     new_x_ht_word.x_height = new_x_ht;
     new_x_ht_word.caps_height = 0.0;
+    new_x_ht_word.SetupForRecognition(
+          unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
+          classify_bln_numeric_mode, textord_use_cjk_fp_model,
+          poly_allow_detailed_fx, row, block);
     match_word_pass_n(2, &new_x_ht_word, row, block);
     if (!new_x_ht_word.tess_failed) {
       int new_misfits = CountMisfitTops(&new_x_ht_word);
@@ -916,11 +1020,15 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
  * Control what to do with the word in pass 2
  */
 
-void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
+void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) {
   // Return if we do not want to run Tesseract.
   if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&
       tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED)
     return;
+  ROW* row = word_data->row;
+  BLOCK* block = word_data->block;
+  prev_word_best_choice_ = word_data->prev_word != NULL
+      ? word_data->prev_word->word->best_choice : NULL;
 
   set_global_subloc_code(SUBLOC_NORM);
   check_debug_pt(word, 30);
@@ -940,26 +1048,6 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
       // Use the tops and bottoms since they are available.
       TrainedXheightFix(word, block, row);
     }
-    // Test for small caps. Word capheight must be close to block xheight,
-    // and word must contain no lower case letters, and at least one upper case.
-    double small_cap_xheight = block->x_height() * kXHeightCapRatio;
-    double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0;
-    if (unicharset.script_has_xheight() &&
-        small_cap_xheight - small_cap_delta <= word->x_height &&
-        word->x_height <= small_cap_xheight + small_cap_delta) {
-      // Scan for upper/lower.
-      int num_upper = 0;
-      int num_lower = 0;
-      for (int i = 0; i < word->best_choice->length(); ++i) {
-        if (unicharset.get_isupper(word->best_choice->unichar_id(i)))
-          ++num_upper;
-        else if (unicharset.get_islower(word->best_choice->unichar_id(i)))
-          ++num_lower;
-      }
-      if (num_upper > 0 && num_lower == 0)
-        word->small_caps = true;
-    }
-    word->SetScriptPositions();
 
     set_global_subloc_code(SUBLOC_NORM);
   }
@@ -988,12 +1076,8 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
 
 void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
                                   ROW *row, BLOCK* block) {
-  if (word->SetupForTessRecognition(unicharset, this, BestPix(),
-                                    classify_bln_numeric_mode,
-                                    textord_use_cjk_fp_model,
-                                    poly_allow_detailed_fx,
-                                    row, block))
-    tess_segment_pass_n(pass_n, word);
+  if (word->tess_failed) return;
+  tess_segment_pass_n(pass_n, word);
 
   if (!word->tess_failed) {
     if (!word->word->flag (W_REP_CHAR)) {
@@ -1136,12 +1220,12 @@ void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice,
     WERD_RES* rep_word =
         page_res_it->InsertSimpleCloneWord(*word_res, blob_word);
     // Setup the single char WERD_RES
-    if (rep_word->SetupForTessRecognition(*word_res->uch_set, this, BestPix(),
-                                          false,
-                                          textord_use_cjk_fp_model,
-                                          poly_allow_detailed_fx,
-                                          page_res_it->row()->row,
-                                          page_res_it->block()->block)) {
+    if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(),
+                                      tessedit_ocr_engine_mode, NULL, false,
+                                      textord_use_cjk_fp_model,
+                                      poly_allow_detailed_fx,
+                                      page_res_it->row()->row,
+                                      page_res_it->block()->block)) {
       rep_word->CloneChoppedToRebuild();
       BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice);
       rep_word->FakeClassifyWord(1, &blob_choice);
diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp
index 411ea1a51..e0425e679 100644
--- a/ccmain/cube_control.cpp
+++ b/ccmain/cube_control.cpp
@@ -197,6 +197,9 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
   // Iterate through the word results and call cube on each word.
   for (page_res_it.restart_page(); page_res_it.word () != NULL;
        page_res_it.forward()) {
+    BLOCK* block = page_res_it.block()->block;
+    if (block->poly_block() != NULL && !block->poly_block()->IsText())
+      continue;  // Don't deal with non-text blocks.
     WERD_RES* word = page_res_it.word();
     // Skip cube entirely if tesseract's certainty is greater than threshold.
     int combiner_run_thresh = convert_prob_to_tess_certainty(
@@ -210,6 +213,11 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
     // Setup a trial WERD_RES in which to classify with cube.
     WERD_RES cube_word;
     cube_word.InitForRetryRecognition(*word);
+    cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
+                                  OEM_CUBE_ONLY,
+                                  NULL, false, false, false,
+                                  page_res_it.row()->row,
+                                  page_res_it.block()->block);
     CubeObject *cube_obj = lang_tess->cube_recognize_word(
         page_res_it.block()->block, &cube_word);
     if (cube_obj != NULL)
@@ -317,10 +325,6 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
  **********************************************************************/
 bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
                                WERD_RES *word) {
-  if (!word->SetupForCubeRecognition(unicharset, this, block)) {
-    return false;  // Graphics block.
-  }
-
   // Run cube
   WordAltList *cube_alt_list = cube_obj->RecognizeWord();
   if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp
index ec568720c..24b37073c 100644
--- a/ccmain/fixspace.cpp
+++ b/ccmain/fixspace.cpp
@@ -204,8 +204,9 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
   for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
     word = word_it.data();
     if ((!word->part_of_combo) && (word->box_word == NULL)) {
-      classify_word_and_language(&Tesseract::classify_word_pass2,
-                                 block, row, word);
+      WordData word_data(block, row, word);
+      SetupWordPassN(2, &word_data);
+      classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
     }
     prev_word_best_choice_ = word->best_choice;
   }
diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp
index faf7df9c3..8812808f4 100644
--- a/ccmain/pgedit.cpp
+++ b/ccmain/pgedit.cpp
@@ -731,10 +731,12 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
 BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
   TWERD *bln_word = word_res->chopped_word;
   if (bln_word == NULL) {
-    word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                      textord_use_cjk_fp_model,
-                                      poly_allow_detailed_fx,
-                                      row, block);
+    word_res->SetupForRecognition(unicharset, this, BestPix(),
+                                  tessedit_ocr_engine_mode, NULL,
+                                  classify_bln_numeric_mode,
+                                  textord_use_cjk_fp_model,
+                                  poly_allow_detailed_fx,
+                                  row, block);
     bln_word = word_res->chopped_word;
   }
   bln_word_window_handle()->Clear();
@@ -963,10 +965,12 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res,
   if (word != NULL) {
     WERD_RES word_res(word);
     word_res.x_height = row->x_height();
-    word_res.SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                     textord_use_cjk_fp_model,
-                                     poly_allow_detailed_fx,
-                                     row, block);
+    word_res.SetupForRecognition(unicharset, this, BestPix(),
+                                 tessedit_ocr_engine_mode, NULL,
+                                 classify_bln_numeric_mode,
+                                 textord_use_cjk_fp_model,
+                                 poly_allow_detailed_fx,
+                                 row, block);
     TWERD* bln_word = word_res.chopped_word;
     TBLOB* bln_blob = bln_word->blobs[0];
     INT_FX_RESULT_STRUCT fx_info;
diff --git a/ccmain/recogtraining.cpp b/ccmain/recogtraining.cpp
index d9c27b9be..e4d6e5f3d 100644
--- a/ccmain/recogtraining.cpp
+++ b/ccmain/recogtraining.cpp
@@ -203,7 +203,9 @@ void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res,
                                            FILE *output_file) {
   // Classify word.
   fflush(stdout);
-  classify_word_pass1(block_res->block, row_res->row, werd_res);
+  WordData word_data(block_res->block, row_res->row, werd_res);
+  SetupWordPassN(1, &word_data);
+  classify_word_pass1(&word_data, werd_res);
   WERD_CHOICE *best_choice = werd_res->best_choice;
   ASSERT_HOST(best_choice != NULL);
 
diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp
index 40e388395..17ed433c3 100644
--- a/ccmain/tesseractclass.cpp
+++ b/ccmain/tesseractclass.cpp
@@ -402,6 +402,8 @@ Tesseract::Tesseract()
                      "for layout analysis.", this->params()),
     BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector",
                 this->params()),
+    INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible",
+                this->params()),
 
     // The following parameters were deprecated and removed from their original
     // locations. The parameters are temporarily kept here to give Tesseract
@@ -528,7 +530,6 @@ void Tesseract::Clear() {
   reskew_ = FCOORD(1.0f, 0.0f);
   splitter_.Clear();
   scaled_factor_ = -1;
-  ResetFeaturesHaveBeenExtracted();
   for (int i = 0; i < sub_langs_.size(); ++i)
     sub_langs_[i]->Clear();
 }
diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h
index 3c1d5fa44..311bb9460 100644
--- a/ccmain/tesseractclass.h
+++ b/ccmain/tesseractclass.h
@@ -100,10 +100,6 @@ class EquationDetect;
 class Tesseract;
 class TesseractCubeCombiner;
 
-typedef void (Tesseract::*WordRecognizer)(BLOCK* block,
-                                          ROW *row,
-                                          WERD_RES *word);
-
 // A collection of various variables for statistics and debugging.
 struct TesseractStats {
   TesseractStats()
@@ -136,6 +132,24 @@ struct TesseractStats {
   bool write_results_empty_block;
 };
 
+// Struct to hold all the pointers to relevant data for processing a word.
+struct WordData {
+  WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {}
+  explicit WordData(const PAGE_RES_IT& page_res_it)
+    : word(page_res_it.word()), row(page_res_it.row()->row),
+      block(page_res_it.block()->block), prev_word(NULL) {}
+  WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)
+    : word(word_res), row(row_in), block(block_in), prev_word(NULL) {}
+
+  WERD_RES* word;
+  ROW* row;
+  BLOCK* block;
+  WordData* prev_word;
+  GenericVector<WERD_RES> lang_words;
+};
+
+typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
+
 class Tesseract : public Wordrec {
  public:
   Tesseract();
@@ -250,10 +264,23 @@ class Tesseract : public Wordrec {
       bool single_column, bool osd, bool only_osd,
       BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
       TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
+  // par_control.cpp
+  void PrerecAllWordsPar(const GenericVector<WordData>& words);
 
   //// control.h /////////////////////////////////////////////////////////
   bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
                          const char* word_config, int pass);
+  // Sets up the words ready for whichever engine is to be run
+  void SetupAllWordsPassN(int pass_n,
+                          const TBOX* target_word_box,
+                          const char* word_config,
+                          PAGE_RES* page_res,
+                          GenericVector<WordData>* words);
+  // Sets up the single word ready for whichever engine is to be run.
+  void SetupWordPassN(int pass_n, WordData* word);
+  // Runs word recognition on all the words.
+  bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
+                          GenericVector<WordData>* words);
   bool recog_all_words(PAGE_RES* page_res,
                        ETEXT_DESC* monitor,
                        const TBOX* target_word_box,
@@ -265,13 +292,15 @@ class Tesseract : public Wordrec {
                         const char* word_config);
   void bigram_correction_pass(PAGE_RES *page_res);
   void blamer_pass(PAGE_RES* page_res);
+  // Sets script positions and detects smallcaps on all output words.
+  void script_pos_pass(PAGE_RES* page_res);
   // Helper to recognize the word using the given (language-specific) tesseract.
   // Returns true if the result was better than previously.
-  bool RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
-                         WordRecognizer recognizer);
+  bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
+                         WERD_RES* word, WordRecognizer recognizer);
   void classify_word_and_language(WordRecognizer recognizer,
-                                  BLOCK* block, ROW *row, WERD_RES *word);
-  void classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);
+                                  WordData* word_data);
+  void classify_word_pass1(WordData* word_data, WERD_RES* word);
   void recog_pseudo_word(PAGE_RES* page_res,  // blocks to check
                          TBOX &selection_box);
 
@@ -282,7 +311,7 @@ class Tesseract : public Wordrec {
                                               const char *s,
                                               const char *lengths);
   void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
-  void classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word);
+  void classify_word_pass2(WordData* word_data, WERD_RES* word);
   void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
                           WERD_RES* word, WERD_RES* new_word);
   bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
@@ -936,6 +965,7 @@ class Tesseract : public Wordrec {
              "Only initialize with the config file. Useful if the instance is "
              "not going to be used for OCR but say only for layout analysis.");
   BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
+  INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
 
   // The following parameters were deprecated and removed from their original
   // locations. The parameters are temporarily kept here to give Tesseract
diff --git a/ccstruct/blobs.cpp b/ccstruct/blobs.cpp
index fe758b0d3..a8bac832a 100644
--- a/ccstruct/blobs.cpp
+++ b/ccstruct/blobs.cpp
@@ -741,19 +741,36 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) {
 // DENORMs in the blobs.
 void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
                         bool inverse, float x_height, bool numeric_mode,
+                        tesseract::OcrEngineMode hint,
+                        const TBOX* norm_box,
                         DENORM* word_denorm) {
   TBOX word_box = bounding_box();
+  if (norm_box != NULL) word_box = *norm_box;
   float word_middle = (word_box.left() + word_box.right()) / 2.0f;
+  float input_y_offset = 0.0f;
+  float final_y_offset = static_cast<float>(kBlnBaselineOffset);
+  float scale = kBlnXHeight / x_height;
+  if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) {
+    word_middle = word_box.left();
+    input_y_offset = word_box.bottom();
+    final_y_offset = 0.0f;
+    if (hint == tesseract::OEM_CUBE_ONLY)
+      scale = 1.0f;
+  } else {
+    input_y_offset = row->base_line(word_middle);
+  }
   for (int b = 0; b < blobs.size(); ++b) {
     TBLOB* blob = blobs[b];
     TBOX blob_box = blob->bounding_box();
     float mid_x = (blob_box.left() + blob_box.right()) / 2.0f;
-    float baseline = row->base_line(mid_x);
-    float scale = kBlnXHeight / x_height;
+    float baseline = input_y_offset;
+    float blob_scale = scale;
     if (numeric_mode) {
       baseline = blob_box.bottom();
-      scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
-                          scale, scale * 1.5f);
+      blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
+                               scale, scale * 1.5f);
+    } else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) {
+      baseline = row->base_line(mid_x);
     }
     // The image will be 8-bit grey if the input was grey or color. Note that in
     // a grey image 0 is black and 255 is white. If the input was binary, then
@@ -761,16 +778,13 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
     // To tell the difference pixGetDepth() will return 8 or 1.
     // The inverse flag will be true iff the word has been determined to be
     // white on black, and is independent of whether the pix is 8 bit or 1 bit.
-    blob->Normalize(block, NULL, NULL, word_middle, baseline, scale, scale,
-                    0.0f, static_cast<float>(kBlnBaselineOffset),
-                    inverse, pix);
+    blob->Normalize(block, NULL, NULL, word_middle, baseline, blob_scale,
+                    blob_scale, 0.0f, final_y_offset, inverse, pix);
   }
   if (word_denorm != NULL) {
-    float scale = kBlnXHeight / x_height;
     word_denorm->SetupNormalization(block, NULL, NULL, word_middle,
-                                    row->base_line(word_middle),
-                                    scale, scale, 0.0f,
-                                    static_cast<float>(kBlnBaselineOffset));
+                                    input_y_offset, scale, scale,
+                                    0.0f, final_y_offset);
     word_denorm->set_inverse(inverse);
     word_denorm->set_pix(pix);
   }
diff --git a/ccstruct/blobs.h b/ccstruct/blobs.h
index 24aae1134..e39761b17 100644
--- a/ccstruct/blobs.h
+++ b/ccstruct/blobs.h
@@ -31,6 +31,7 @@
 ----------------------------------------------------------------------*/
 #include "clst.h"
 #include "normalis.h"
+#include "publictypes.h"
 #include "rect.h"
 #include "vecfuncs.h"
 
@@ -316,7 +317,10 @@ struct TWERD {
   // Baseline normalizes the blobs in-place, recording the normalization in the
   // DENORMs in the blobs.
   void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
-                   float x_height, bool numeric_mode, DENORM* word_denorm);
+                   float x_height, bool numeric_mode,
+                   tesseract::OcrEngineMode hint,
+                   const TBOX* norm_box,
+                   DENORM* word_denorm);
   // Copies the data and the blobs, but leaves next untouched.
   void CopyFrom(const TWERD& src);
   // Deletes owned data.
diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp
index c710ee207..342a2a371 100644
--- a/ccstruct/pageres.cpp
+++ b/ccstruct/pageres.cpp
@@ -32,6 +32,8 @@ static const double kStopperAmbiguityThresholdGain = 8.0;
 // Constant offset for computing thresholds that determine the ambiguity of a
 // word.
 static const double kStopperAmbiguityThresholdOffset = 1.5;
+// Max number of broken pieces to associate.
+const int kWordrecMaxNumJoinChunks = 4;
 
 // Computes and returns a threshold of certainty difference used to determine
 // which words to keep, based on the adjustment factors of the two words.
@@ -245,16 +247,25 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES& source) {
 // If allow_detailed_fx is true, the feature extractor will receive fine
 // precision outline information, allowing smoother features and better
 // features on low resolution images.
+// The norm_mode_hint sets the default mode for normalization in absence
+// of any of the above flags.
+// norm_box is used to override the word bounding box to determine the
+// normalization scale and offset.
 // Returns false if the word is empty and sets up fake results.
-bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
+bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
                                    tesseract::Tesseract* tess, Pix* pix,
+                                   int norm_mode,
+                                   const TBOX* norm_box,
                                    bool numeric_mode,
                                    bool use_body_size,
                                    bool allow_detailed_fx,
-                                   ROW *row, BLOCK* block) {
+                                   ROW *row, const BLOCK* block) {
+  tesseract::OcrEngineMode norm_mode_hint =
+      static_cast<tesseract::OcrEngineMode>(norm_mode);
   tesseract = tess;
   POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
-  if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
+  if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
+       word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
     // Empty words occur when all the blobs have been moved to the rej_blobs
     // list, which seems to occur frequently in junk.
     SetupFake(unicharset_in);
@@ -264,13 +275,17 @@ bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
   ClearResults();
   SetupWordScript(unicharset_in);
   chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
-  float word_xheight = use_body_size && row->body_size() > 0.0f
+  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
                      ? row->body_size() : x_height;
   chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
-                            word_xheight, numeric_mode, &denorm);
+                            word_xheight, numeric_mode, norm_mode_hint,
+                            norm_box, &denorm);
   blob_row = row;
   SetupBasicsFromChoppedWord(unicharset_in);
   SetupBlamerBundle();
+  int num_blobs = chopped_word->NumBlobs();
+  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
+  tess_failed = false;
   return true;
 }
 
@@ -284,30 +299,6 @@ void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) {
   ClearWordChoices();
 }
 
-// Sets up the members used in recognition:
-// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
-// Returns false if the word is empty and sets up fake results.
-bool WERD_RES::SetupForCubeRecognition(const UNICHARSET& unicharset_in,
-                                       tesseract::Tesseract* tess,
-                                       const BLOCK* block) {
-  tesseract = tess;
-  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
-  if (pb != NULL && !pb->IsText()) {
-    // Ignore words in graphic regions.
-    SetupFake(unicharset_in);
-    word->set_flag(W_REP_CHAR, false);
-    return false;
-  }
-  ClearResults();
-  SetupWordScript(unicharset_in);
-  TBOX word_box = word->bounding_box();
-  denorm.SetupNormalization(block, NULL, NULL,
-                            word_box.left(), word_box.bottom(),
-                            1.0f, 1.0f, 0.0f, 0.0f);
-  SetupBlamerBundle();
-  return true;
-}
-
 // Sets up the members used in recognition for an empty recognition result:
 // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
 void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {
diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h
index d8044129a..4eb29ee4b 100644
--- a/ccstruct/pageres.h
+++ b/ccstruct/pageres.h
@@ -339,7 +339,8 @@ class WERD_RES : public ELIST_LINK {
   // characters purely based on their shape on the page, and by default produce
   // the corresponding unicode for a left-to-right context.
   const char* const BestUTF8(int blob_index, bool in_rtl_context) const {
-    if (blob_index < 0 || blob_index >= best_choice->length())
+    if (blob_index < 0 || best_choice == NULL ||
+        blob_index >= best_choice->length())
       return NULL;
     UNICHAR_ID id = best_choice->unichar_id(blob_index);
     if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
@@ -435,25 +436,22 @@ class WERD_RES : public ELIST_LINK {
   // If allow_detailed_fx is true, the feature extractor will receive fine
   // precision outline information, allowing smoother features and better
   // features on low resolution images.
+  // The norm_mode sets the default mode for normalization in absence
+  // of any of the above flags. It should really be a tesseract::OcrEngineMode
+  // but is declared as int for ease of use with tessedit_ocr_engine_mode.
   // Returns false if the word is empty and sets up fake results.
-  bool SetupForTessRecognition(const UNICHARSET& unicharset_in,
-                               tesseract::Tesseract* tesseract, Pix* pix,
-                               bool numeric_mode, bool use_body_size,
-                               bool allow_detailed_fx,
-                               ROW *row, BLOCK* block);
+  bool SetupForRecognition(const UNICHARSET& unicharset_in,
+                           tesseract::Tesseract* tesseract, Pix* pix,
+                           int norm_mode,
+                           const TBOX* norm_box, bool numeric_mode,
+                           bool use_body_size, bool allow_detailed_fx,
+                           ROW *row, const BLOCK* block);
 
   // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
   // accumulators from a made chopped word.  We presume the fields are already
   // empty.
   void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in);
 
-  // Sets up the members used in recognition:
-  // bln_boxes, chopped_word, seam_array, denorm.
-  // Returns false if the word is empty and sets up fake results.
-  bool SetupForCubeRecognition(const UNICHARSET& unicharset_in,
-                               tesseract::Tesseract* tesseract,
-                               const BLOCK* block);
-
   // Sets up the members used in recognition for an empty recognition result:
   // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
   void SetupFake(const UNICHARSET& uch);
diff --git a/ccstruct/ratngs.cpp b/ccstruct/ratngs.cpp
index 30608bb46..9460262c5 100644
--- a/ccstruct/ratngs.cpp
+++ b/ccstruct/ratngs.cpp
@@ -530,8 +530,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) {
   // Initialize to normal.
   for (int i = 0; i < length_; ++i)
     script_pos_[i] = tesseract::SP_NORMAL;
-  if (word->blobs.empty())
+  if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
     return;
+  }
 
   int position_counts[4];
   for (int i = 0; i < 4; i++) {
diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp
index 1ef606e3b..6051a95a0 100644
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@@ -122,8 +122,6 @@ struct PROTO_KEY {
 #define MarginalMatch(Rating)       \
 ((Rating) > matcher_great_threshold)
 
-#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
-
 /*-----------------------------------------------------------------------------
           Private Function Prototypes
 -----------------------------------------------------------------------------*/
@@ -179,8 +177,7 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
   ADAPT_RESULTS *Results = new ADAPT_RESULTS();
   Results->Initialize();
 
-  if (AdaptedTemplates == NULL)
-    AdaptedTemplates = NewAdaptedTemplates (true);
+  ASSERT_HOST(AdaptedTemplates != NULL);
 
   DoAdaptiveMatch(Blob, Results);
   if (CPResults != NULL)
@@ -207,7 +204,6 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
     DebugAdaptiveClassifier(Blob, Results);
 #endif
 
-  NumClassesOutput += Choices->length();
   delete Results;
 }                                /* AdaptiveClassifier */
 
@@ -249,7 +245,6 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
     if (!EnableLearning || word->best_choice == NULL)
       return;  // Can't or won't adapt.
 
-    NumWordsAdaptedTo++;
     if (classify_learning_debug_level >= 1)
       tprintf("\n\nAdapting to word = %s\n",
               word->best_choice->debug_string().string());
@@ -480,15 +475,11 @@ void Classify::EndAdaptiveClassifier() {
   FreeNormProtos();
   if (AllProtosOn != NULL) {
     FreeBitVector(AllProtosOn);
-    FreeBitVector(PrunedProtos);
     FreeBitVector(AllConfigsOn);
-    FreeBitVector(AllProtosOff);
     FreeBitVector(AllConfigsOff);
     FreeBitVector(TempProtoMask);
     AllProtosOn = NULL;
-    PrunedProtos = NULL;
     AllConfigsOn = NULL;
-    AllProtosOff = NULL;
     AllConfigsOff = NULL;
     TempProtoMask = NULL;
   }
@@ -561,19 +552,15 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
     static_classifier_ = new TessClassifier(false, this);
   }
 
-  im_.Init(&classify_debug_level, classify_integer_matcher_multiplier);
+  im_.Init(&classify_debug_level);
   InitIntegerFX();
 
   AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
-  PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
   AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
-  AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
   AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
   TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
   set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
-  set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
   set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
-  zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
   zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));
 
   for (int i = 0; i < MAX_NUM_CLASSES; i++) {
@@ -617,53 +604,11 @@ void Classify::ResetAdaptiveClassifierInternal() {
             NumAdaptationsFailed);
   }
   free_adapted_templates(AdaptedTemplates);
-  AdaptedTemplates = NULL;
+  AdaptedTemplates = NewAdaptedTemplates(true);
   NumAdaptationsFailed = 0;
 }
 
 
-/*---------------------------------------------------------------------------*/
-/**
- * Print to File the statistics which have
- * been gathered for the adaptive matcher.
- *
- * @param File open text file to print adaptive statistics to
- *
- * Globals: none
- *
- * @note Exceptions: none
- * @note History: Thu Apr 18 14:37:37 1991, DSJ, Created.
- */
-void Classify::PrintAdaptiveStatistics(FILE *File) {
-  #ifndef SECURE_NAMES
-
-  fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
-  fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
-  fprintf (File, "\tNum classes output   = %d (Avg = %4.2f)\n",
-    NumClassesOutput,
-    ((AdaptiveMatcherCalls == 0) ? (0.0) :
-  ((float) NumClassesOutput / AdaptiveMatcherCalls)));
-  fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
-    BaselineClassifierCalls,
-    ((BaselineClassifierCalls == 0) ? (0.0) :
-  ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
-  fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
-    CharNormClassifierCalls,
-    ((CharNormClassifierCalls == 0) ? (0.0) :
-  ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
-  fprintf (File, "\t\tAmbig    Classifier: %4d calls (%4.2f classes/call)\n",
-    AmbigClassifierCalls,
-    ((AmbigClassifierCalls == 0) ? (0.0) :
-  ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
-
-  fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
-  fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
-  fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
-
-  PrintAdaptedTemplates(File, AdaptedTemplates);
-  #endif
-}                                /* PrintAdaptiveStatistics */
-
 
 /*---------------------------------------------------------------------------*/
 /**
@@ -915,8 +860,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
   FEATURE_SET FloatFeatures;
   int NewTempConfigId;
 
-  ResetFeaturesHaveBeenExtracted();
-  NumCharsAdaptedTo++;
   if (!LegalClassId (ClassId))
     return;
 
@@ -932,7 +875,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
     if (NumFeatures <= 0)
       return;
 
-    im_.SetBaseLineMatch();
     // Only match configs with the matching font.
     BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
     for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
@@ -1004,17 +946,16 @@ void Classify::AdaptToChar(TBLOB *Blob,
 
 void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
 #ifndef GRAPHICS_DISABLED
-  int bloblength = 0;
-  INT_FEATURE_ARRAY features;
-  uinT8* norm_array = new uinT8[unicharset.size()];
-  int num_features = GetBaselineFeatures(blob, PreTrainedTemplates,
-                                         features,
-                                         norm_array, &bloblength);
-  delete [] norm_array;
-  INT_RESULT_STRUCT IntResult;
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == NULL) return;
 
+  INT_RESULT_STRUCT IntResult;
   im_.Match(int_class, AllProtosOn, AllConfigsOn,
-            num_features, features,
+            bl_features.size(), &bl_features[0],
             &IntResult, classify_adapt_feature_threshold,
             NO_DEBUG, matcher_debug_separate_windows);
   cprintf ("Best match to temp config %d = %4.1f%%.\n",
@@ -1024,7 +965,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
     ConfigMask = 1 << IntResult.Config;
     ShowMatchDisplay();
     im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
-              num_features, features,
+              bl_features.size(), &bl_features[0],
               &IntResult, classify_adapt_feature_threshold,
               6 | 0x19, matcher_debug_separate_windows);
     UpdateMatchDisplay();
@@ -1033,50 +974,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
 }
 
 
-/*---------------------------------------------------------------------------*/
-/**
- * @param Blob blob to add to templates for ClassId
- * @param ClassId class to add blob to
- * @param FontinfoId font information from pre-trained teamples
- * @param Threshold minimum match rating to existing template
- *
- * Globals:
- * - PreTrainedTemplates current set of built-in templates
- *
- * @note Exceptions: none
- * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created.
- */
-void Classify::AdaptToPunc(TBLOB *Blob,
-                           CLASS_ID ClassId,
-                           int FontinfoId,
-                           FLOAT32 Threshold) {
-  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
-  int i;
-
-  Results->Initialize();
-  CharNormClassifier(Blob, PreTrainedTemplates, Results);
-  RemoveBadMatches(Results);
-
-  if (Results->NumMatches != 1) {
-    if (classify_learning_debug_level >= 1) {
-      cprintf ("Rejecting punc = %s (Alternatives = ",
-               unicharset.id_to_unichar(ClassId));
-
-      for (i = 0; i < Results->NumMatches; i++)
-        tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id));
-      tprintf(")\n");
-    }
-  } else {
-    #ifndef SECURE_NAMES
-    if (classify_learning_debug_level >= 1)
-      cprintf ("Adapting to punc = %s, thr= %g\n",
-               unicharset.id_to_unichar(ClassId), Threshold);
-    #endif
-    AdaptToChar(Blob, ClassId, FontinfoId, Threshold);
-  }
-  delete Results;
-}                                /* AdaptToPunc */
-
 
 /*---------------------------------------------------------------------------*/
 /**
@@ -1167,50 +1064,41 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
  * @note Exceptions: none
  * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
  */
-void Classify::AmbigClassifier(TBLOB *Blob,
-                               INT_TEMPLATES Templates,
-                               ADAPT_CLASS *Classes,
-                               UNICHAR_ID *Ambiguities,
-                               ADAPT_RESULTS *Results) {
-  int NumFeatures;
-  INT_FEATURE_ARRAY IntFeatures;
+void Classify::AmbigClassifier(
+    const GenericVector<INT_FEATURE_STRUCT>& int_features,
+    const INT_FX_RESULT_STRUCT& fx_info,
+    const TBLOB *blob,
+    INT_TEMPLATES templates,
+    ADAPT_CLASS *classes,
+    UNICHAR_ID *ambiguities,
+    ADAPT_RESULTS *results) {
+  if (int_features.empty()) return;
   uinT8* CharNormArray = new uinT8[unicharset.size()];
   INT_RESULT_STRUCT IntResult;
-  CLASS_ID ClassId;
-
-  AmbigClassifierCalls++;
-
-  NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures,
-                                    NULL, CharNormArray,
-                                    &(Results->BlobLength));
-  if (NumFeatures <= 0) {
-    delete [] CharNormArray;
-    return;
-  }
 
+  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
+                                           CharNormArray);
   bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
   if (debug)
     tprintf("AM Matches =  ");
 
-  int top = Blob->bounding_box().top();
-  int bottom = Blob->bounding_box().bottom();
-  while (*Ambiguities >= 0) {
-    ClassId = *Ambiguities;
+  int top = blob->bounding_box().top();
+  int bottom = blob->bounding_box().bottom();
+  while (*ambiguities >= 0) {
+    CLASS_ID class_id = *ambiguities;
 
-    im_.SetCharNormMatch(classify_integer_matcher_multiplier);
-    im_.Match(ClassForClassId(Templates, ClassId),
+    im_.Match(ClassForClassId(templates, class_id),
               AllProtosOn, AllConfigsOn,
-              NumFeatures, IntFeatures,
+              int_features.size(), &int_features[0],
               &IntResult,
               classify_adapt_feature_threshold, NO_DEBUG,
               matcher_debug_separate_windows);
 
-    ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0,
-                                    Results->BlobLength, CharNormArray,
-                                    IntResult, Results);
-    Ambiguities++;
-
-    NumAmbigClassesTried++;
+    ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
+                                    results->BlobLength,
+                                    classify_integer_matcher_multiplier,
+                                    CharNormArray, IntResult, results);
+    ambiguities++;
   }
   delete [] CharNormArray;
 }                                /* AmbigClassifier */
@@ -1225,6 +1113,7 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
                              ADAPT_CLASS* classes,
                              int debug,
                              int num_classes,
+                             int matcher_multiplier,
                              const TBOX& blob_box,
                              CLASS_PRUNER_RESULTS results,
                              ADAPT_RESULTS* final_results) {
@@ -1246,7 +1135,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
     bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
     ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
                                     results[c].Rating,
-                                    final_results->BlobLength, norm_factors,
+                                    final_results->BlobLength,
+                                    matcher_multiplier, norm_factors,
                                     int_result, final_results);
   }
 }
@@ -1258,7 +1148,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
 // The results are added to the final_results output.
 void Classify::ExpandShapesAndApplyCorrections(
     ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
-    float cp_rating, int blob_length, const uinT8* cn_factors,
+    float cp_rating, int blob_length, int matcher_multiplier,
+    const uinT8* cn_factors,
     INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) {
   // Compute the fontinfo_ids.
   int fontinfo_id = kBlankFontinfoId;
@@ -1292,7 +1183,7 @@ void Classify::ExpandShapesAndApplyCorrections(
                                                int_result.Rating,
                                                int_result.FeatureMisses,
                                                bottom, top, blob_length,
-                                               cn_factors);
+                                               matcher_multiplier, cn_factors);
         if (c == 0 || rating < min_rating)
           min_rating = rating;
         if (unicharset.get_enabled(unichar_id)) {
@@ -1309,7 +1200,7 @@ void Classify::ExpandShapesAndApplyCorrections(
                                          int_result.Rating,
                                          int_result.FeatureMisses,
                                          bottom, top, blob_length,
-                                         cn_factors);
+                                         matcher_multiplier, cn_factors);
   if (unicharset.get_enabled(class_id)) {
     AddNewResult(final_results, class_id, -1, rating,
                  classes != NULL, int_result.Config,
@@ -1325,11 +1216,12 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
                                         double cp_rating, double im_rating,
                                         int feature_misses,
                                         int bottom, int top,
-                                        int blob_length,
+                                        int blob_length, int matcher_multiplier,
                                         const uinT8* cn_factors) {
   // Compute class feature corrections.
   double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
-                                              cn_factors[unichar_id]);
+                                              cn_factors[unichar_id],
+                                              matcher_multiplier);
   double miss_penalty = tessedit_class_miss_scale * feature_misses;
   double vertical_penalty = 0.0;
   // Penalize non-alnums for being vertical misfits.
@@ -1383,39 +1275,30 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
  * @note Exceptions: none
  * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created.
  */
-UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
-                                         ADAPT_TEMPLATES Templates,
-                                         ADAPT_RESULTS *Results) {
-  int NumFeatures;
+UNICHAR_ID *Classify::BaselineClassifier(
+    TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
+    const INT_FX_RESULT_STRUCT& fx_info,
+    ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
+  if (int_features.empty()) return NULL;
   int NumClasses;
-  INT_FEATURE_ARRAY IntFeatures;
   uinT8* CharNormArray = new uinT8[unicharset.size()];
-  CLASS_ID ClassId;
+  ClearCharNormArray(CharNormArray);
 
-  BaselineClassifierCalls++;
-
-  NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures,
-                                    CharNormArray, &Results->BlobLength);
-  if (NumFeatures <= 0) {
-    delete [] CharNormArray;
-    return NULL;
-  }
-
-  NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures,
+  Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
+  NumClasses = PruneClasses(Templates->Templates, int_features.size(),
+                            &int_features[0],
                             CharNormArray, BaselineCutoffs, Results->CPResults);
 
-  NumBaselineClassesTried += NumClasses;
-
   if (matcher_debug_level >= 2 || classify_debug_level > 1)
     cprintf ("BL Matches =  ");
 
-  im_.SetBaseLineMatch();
-  MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
-                Templates->Class, matcher_debug_flags, NumClasses,
+  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
+                CharNormArray,
+                Templates->Class, matcher_debug_flags, NumClasses, 0,
                 Blob->bounding_box(), Results->CPResults, Results);
 
   delete [] CharNormArray;
-  ClassId = Results->best_match.unichar_id;
+  CLASS_ID ClassId = Results->best_match.unichar_id;
   if (ClassId == NO_CLASS)
     return (NULL);
   /* this is a bug - maybe should return "" */
@@ -1445,17 +1328,13 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
  * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created.
  */
 int Classify::CharNormClassifier(TBLOB *blob,
-                                 INT_TEMPLATES Templates,
+                                 const TrainingSample& sample,
                                  ADAPT_RESULTS *adapt_results) {
-  CharNormClassifierCalls++;
-  TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
-                                                classify_nonlinear_norm);
-  if (sample == NULL) return 0;
   // This is the length that is used for scaling ratings vs certainty.
   adapt_results->BlobLength =
-      IntCastRounded(sample->outline_length() / kStandardFeatureLength);
+      IntCastRounded(sample.outline_length() / kStandardFeatureLength);
   GenericVector<UnicharRating> unichar_results;
-  static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0,
+  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
                                             -1, &unichar_results);
   // Convert results to the format used internally by AdaptiveClassifier.
   for (int r = 0; r < unichar_results.size(); ++r) {
@@ -1468,9 +1347,7 @@ int Classify::CharNormClassifier(TBLOB *blob,
     float rating = 1.0f - unichar_results[r].rating;
     AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2);
   }
-  int num_features = sample->num_features();
-  delete sample;
-  return num_features;
+  return sample.num_features();
 }                                /* CharNormClassifier */
 
 // As CharNormClassifier, but operates on a TrainingSample and outputs to
@@ -1518,10 +1395,10 @@ int Classify::CharNormTrainingSample(bool pruner_only,
           UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
     }
   } else {
-    im_.SetCharNormMatch(classify_integer_matcher_multiplier);
     MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
                   char_norm_array,
                   NULL, matcher_debug_flags, num_classes,
+                  classify_integer_matcher_multiplier,
                   blob_box, adapt_results->CPResults, adapt_results);
     // Convert master matcher results to output format.
     for (int i = 0; i < adapt_results->NumMatches; i++) {
@@ -1711,8 +1588,10 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
     if (i == 0 || Results->match[i].rating < Results->best_match.rating)
       Results->best_match = Results->match[i];
   }
-  TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
-                                                classify_nonlinear_norm);
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
   if (sample == NULL) return;
   static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
                                    Results->best_match.unichar_id);
@@ -1745,21 +1624,26 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
 void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
   UNICHAR_ID *Ambiguities;
 
-  AdaptiveMatcherCalls++;
-  InitIntFX();
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == NULL) return;
 
   if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
       tess_cn_matching) {
-    CharNormClassifier(Blob, PreTrainedTemplates, Results);
+    CharNormClassifier(Blob, *sample, Results);
   } else {
-    Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results);
+    Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
+                                     AdaptedTemplates, Results);
     if ((Results->NumMatches > 0 &&
          MarginalMatch (Results->best_match.rating) &&
          !tess_bn_matching) ||
         Results->NumMatches == 0) {
-      CharNormClassifier(Blob, PreTrainedTemplates, Results);
+      CharNormClassifier(Blob, *sample, Results);
     } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
-      AmbigClassifier(Blob,
+      AmbigClassifier(bl_features, fx_info, Blob,
                       PreTrainedTemplates,
                       AdaptedTemplates->Class,
                       Ambiguities,
@@ -1773,6 +1657,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
   // just adding a NULL classification.
   if (!Results->HasNonfragment || Results->NumMatches == 0)
     ClassifyAsNoise(Results);
+  delete sample;
 }   /* DoAdaptiveMatch */
 
 /*---------------------------------------------------------------------------*/
@@ -1799,8 +1684,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
   int i;
 
   Results->Initialize();
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == NULL) return NULL;
 
-  CharNormClassifier(Blob, PreTrainedTemplates, Results);
+  CharNormClassifier(Blob, *sample, Results);
+  delete sample;
   RemoveBadMatches(Results);
   qsort((void *)Results->match, Results->NumMatches,
         sizeof(ScoredClass), CompareByRating);
@@ -1823,58 +1715,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
   return Ambiguities;
 }                              /* GetAmbiguities */
 
-/*---------------------------------------------------------------------------*/
-/**
- * This routine calls the integer (Hardware) feature
- * extractor if it has not been called before for this blob.
- * The results from the feature extractor are placed into
- * globals so that they can be used in other routines without
- * re-extracting the features.
- * It then copies the baseline features into the IntFeatures
- * array provided by the caller.
- *
- * @param Blob blob to extract features from
- * @param Templates used to compute char norm adjustments
- * @param IntFeatures array to fill with integer features
- * @param CharNormArray array to fill with dummy char norm adjustments
- * @param BlobLength length of blob in baseline-normalized units
- *
- * Globals:
- * - FeaturesHaveBeenExtracted TRUE if fx has been done
- * - BaselineFeatures holds extracted baseline feat
- * - CharNormFeatures holds extracted char norm feat
- * - FXInfo holds misc. FX info
- *
- * @return Number of features extracted or 0 if an error occured.
- * @note Exceptions: none
- * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
- */
-int Classify::GetBaselineFeatures(TBLOB *Blob,
-                                  INT_TEMPLATES Templates,
-                                  INT_FEATURE_ARRAY IntFeatures,
-                                  uinT8* CharNormArray,
-                                  inT32 *BlobLength) {
-  if (!FeaturesHaveBeenExtracted) {
-    FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
-                                BaselineFeatures, CharNormFeatures, &FXInfo);
-    FeaturesHaveBeenExtracted = TRUE;
-  }
-
-  *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
-  if (!FeaturesOK) {
-    return 0;
-  }
-
-  memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0]));
-
-  ClearCharNormArray(CharNormArray);
-  return FXInfo.NumBL;
-}                              /* GetBaselineFeatures */
-
-void Classify::ResetFeaturesHaveBeenExtracted() {
-  FeaturesHaveBeenExtracted = FALSE;
-}
-
 // Returns true if the given blob looks too dissimilar to any character
 // present in the classifier templates.
 bool Classify::LooksLikeGarbage(TBLOB *blob) {
@@ -1921,48 +1761,28 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
  * @param BlobLength length of blob in baseline-normalized units
  *
  * Globals:
- * - FeaturesHaveBeenExtracted TRUE if fx has been done
- * - BaselineFeatures holds extracted baseline feat
- * - CharNormFeatures holds extracted char norm feat
- * - FXInfo holds misc. FX info
  *
  * @return Number of features extracted or 0 if an error occured.
  * @note Exceptions: none
  * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
  */
-int Classify::GetCharNormFeatures(TBLOB *Blob,
-                                  INT_TEMPLATES Templates,
-                                  INT_FEATURE_ARRAY IntFeatures,
-                                  uinT8* PrunerNormArray,
-                                  uinT8* CharNormArray,
-                                  inT32 *BlobLength) {
-  FEATURE NormFeature;
-  FLOAT32 Baseline, Scale;
-
-  if (!FeaturesHaveBeenExtracted) {
-    FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
-                                BaselineFeatures, CharNormFeatures, &FXInfo);
-    FeaturesHaveBeenExtracted = TRUE;
-  }
-
-  *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
-  if (!FeaturesOK) {
-    return 0;
-  }
-
-  memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0]));
-
-  NormFeature = NewFeature(&CharNormDesc);
-  Baseline = kBlnBaselineOffset;
-  Scale = MF_SCALE_FACTOR;
-  NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
-  NormFeature->Params[CharNormLength] =
-    FXInfo.Length * Scale / LENGTH_COMPRESSION;
-  NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
-  NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
-  ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
-  return FXInfo.NumCN;
-}                              /* GetCharNormFeatures */
+int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
+                                 INT_TEMPLATES templates,
+                                 uinT8* pruner_norm_array,
+                                 uinT8* char_norm_array) {
+  FEATURE norm_feature = NewFeature(&CharNormDesc);
+  float baseline = kBlnBaselineOffset;
+  float scale = MF_SCALE_FACTOR;
+  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
+  norm_feature->Params[CharNormLength] =
+      fx_info.Length * scale / LENGTH_COMPRESSION;
+  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
+  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
+  // Deletes norm_feature.
+  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
+                        pruner_norm_array);
+  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
+}                              /* GetCharNormFeature */
 
 // Computes the char_norm_array for the unicharset and, if not NULL, the
 // pruner_array as appropriate according to the existence of the shape_table.
@@ -2454,7 +2274,6 @@ void Classify::ShowBestMatchFor(int shape_id,
   }
   INT_RESULT_STRUCT cn_result;
   classify_norm_method.set_value(character);
-  im_.SetCharNormMatch(classify_integer_matcher_multiplier);
   im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
             AllProtosOn, AllConfigsOn,
             num_features, features, &cn_result,
diff --git a/classify/classify.cpp b/classify/classify.cpp
index 1eca2e9c2..166680d6e 100644
--- a/classify/classify.cpp
+++ b/classify/classify.cpp
@@ -165,27 +165,13 @@ Classify::Classify()
   AdaptedTemplates = NULL;
   PreTrainedTemplates = NULL;
   AllProtosOn = NULL;
-  PrunedProtos = NULL;
   AllConfigsOn = NULL;
-  AllProtosOff = NULL;
   AllConfigsOff = NULL;
   TempProtoMask = NULL;
   NormProtos = NULL;
 
-  AdaptiveMatcherCalls = 0;
-  BaselineClassifierCalls = 0;
-  CharNormClassifierCalls = 0;
-  AmbigClassifierCalls = 0;
-  NumWordsAdaptedTo = 0;
-  NumCharsAdaptedTo = 0;
-  NumBaselineClassesTried = 0;
-  NumCharNormClassesTried = 0;
-  NumAmbigClassesTried = 0;
-  NumClassesOutput = 0;
   NumAdaptationsFailed = 0;
 
-  FeaturesHaveBeenExtracted = false;
-  FeaturesOK = true;
   learn_debug_win_ = NULL;
   learn_fragmented_word_debug_win_ = NULL;
   learn_fragments_debug_win_ = NULL;
diff --git a/classify/classify.h b/classify/classify.h
index 92629da71..54f8b7773 100644
--- a/classify/classify.h
+++ b/classify/classify.h
@@ -145,15 +145,13 @@ class Classify : public CCStruct {
                         int FontinfoId,
                         ADAPT_CLASS Class,
                         ADAPT_TEMPLATES Templates);
-  void AdaptToPunc(TBLOB *Blob,
-                   CLASS_ID ClassId,
-                   int FontinfoId,
-                   FLOAT32 Threshold);
-  void AmbigClassifier(TBLOB *Blob,
-                       INT_TEMPLATES Templates,
-                       ADAPT_CLASS *Classes,
-                       UNICHAR_ID *Ambiguities,
-                       ADAPT_RESULTS *Results);
+  void AmbigClassifier(const GenericVector<INT_FEATURE_STRUCT>& int_features,
+                       const INT_FX_RESULT_STRUCT& fx_info,
+                       const TBLOB *blob,
+                       INT_TEMPLATES templates,
+                       ADAPT_CLASS *classes,
+                       UNICHAR_ID *ambiguities,
+                       ADAPT_RESULTS *results);
   void MasterMatcher(INT_TEMPLATES templates,
                      inT16 num_features,
                      const INT_FEATURE_STRUCT* features,
@@ -161,6 +159,7 @@ class Classify : public CCStruct {
                      ADAPT_CLASS* classes,
                      int debug,
                      int num_classes,
+                     int matcher_multiplier,
                      const TBOX& blob_box,
                      CLASS_PRUNER_RESULTS results,
                      ADAPT_RESULTS* final_results);
@@ -175,6 +174,7 @@ class Classify : public CCStruct {
                                        int bottom, int top,
                                        float cp_rating,
                                        int blob_length,
+                                       int matcher_multiplier,
                                        const uinT8* cn_factors,
                                        INT_RESULT_STRUCT& int_result,
                                        ADAPT_RESULTS* final_results);
@@ -184,7 +184,8 @@ class Classify : public CCStruct {
   double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
                                 double im_rating, int feature_misses,
                                 int bottom, int top,
-                                int blob_length, const uinT8* cn_factors);
+                                int blob_length, int matcher_multiplier,
+                                const uinT8* cn_factors);
   void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
                                ADAPT_RESULTS *Results,
                                BLOB_CHOICE_LIST *Choices);
@@ -246,12 +247,13 @@ class Classify : public CCStruct {
   // Converts a shape_table_ index to a classifier class_id index (not a
   // unichar-id!). Uses a search, so not fast.
   int ShapeIDToClassID(int shape_id) const;
-  UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
-                                 ADAPT_TEMPLATES Templates,
-                                 ADAPT_RESULTS *Results);
-  int CharNormClassifier(TBLOB *Blob,
-                         INT_TEMPLATES Templates,
-                         ADAPT_RESULTS *Results);
+  UNICHAR_ID *BaselineClassifier(
+      TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
+      const INT_FX_RESULT_STRUCT& fx_info,
+      ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
+  int CharNormClassifier(TBLOB *blob,
+                         const TrainingSample& sample,
+                         ADAPT_RESULTS *adapt_results);
 
   // As CharNormClassifier, but operates on a TrainingSample and outputs to
   // a GenericVector of ShapeRating without conversion to classes.
@@ -267,7 +269,6 @@ class Classify : public CCStruct {
   void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
   bool AdaptableWord(WERD_RES* word);
   void EndAdaptiveClassifier();
-  void PrintAdaptiveStatistics(FILE *File);
   void SettupPass1();
   void SettupPass2();
   void AdaptiveClassifier(TBLOB *Blob,
@@ -276,17 +277,10 @@ class Classify : public CCStruct {
   void ClassifyAsNoise(ADAPT_RESULTS *Results);
   void ResetAdaptiveClassifierInternal();
 
-  int GetBaselineFeatures(TBLOB *Blob,
-                          INT_TEMPLATES Templates,
-                          INT_FEATURE_ARRAY IntFeatures,
-                          uinT8* CharNormArray,
-                          inT32 *BlobLength);
-  int GetCharNormFeatures(TBLOB *Blob,
-                          INT_TEMPLATES Templates,
-                          INT_FEATURE_ARRAY IntFeatures,
-                          uinT8* PrunerNormArray,
-                          uinT8* CharNormArray,
-                          inT32 *BlobLength);
+  int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
+                         INT_TEMPLATES templates,
+                         uinT8* pruner_norm_array,
+                         uinT8* char_norm_array);
   // Computes the char_norm_array for the unicharset and, if not NULL, the
   // pruner_array as appropriate according to the existence of the shape_table.
   // The norm_feature is deleted as it is almost certainly no longer needed.
@@ -298,7 +292,6 @@ class Classify : public CCStruct {
   bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
   void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
 
-  void ResetFeaturesHaveBeenExtracted();
   bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
   bool LooksLikeGarbage(TBLOB *blob);
   void RefreshDebugWindow(ScrollView **win, const char *msg,
@@ -468,9 +461,7 @@ class Classify : public CCStruct {
 
   // Create dummy proto and config masks for use with the built-in templates.
   BIT_VECTOR AllProtosOn;
-  BIT_VECTOR PrunedProtos;
   BIT_VECTOR AllConfigsOn;
-  BIT_VECTOR AllProtosOff;
   BIT_VECTOR AllConfigsOff;
   BIT_VECTOR TempProtoMask;
   bool EnableLearning;
@@ -504,34 +495,13 @@ class Classify : public CCStruct {
   ShapeTable* shape_table_;
 
  private:
-
   Dict dict_;
   // The currently active static classifier.
   ShapeClassifier* static_classifier_;
 
   /* variables used to hold performance statistics */
-  int AdaptiveMatcherCalls;
-  int BaselineClassifierCalls;
-  int CharNormClassifierCalls;
-  int AmbigClassifierCalls;
-  int NumWordsAdaptedTo;
-  int NumCharsAdaptedTo;
-  int NumBaselineClassesTried;
-  int NumCharNormClassesTried;
-  int NumAmbigClassesTried;
-  int NumClassesOutput;
   int NumAdaptationsFailed;
 
-  /* variables used to hold onto extracted features.  This is used
-  to map from the old scheme in which baseline features and char norm
-  features are extracted separately, to the new scheme in which they
-  are extracted at the same time. */
-  bool FeaturesHaveBeenExtracted;
-  bool FeaturesOK;
-  INT_FEATURE_ARRAY BaselineFeatures;
-  INT_FEATURE_ARRAY CharNormFeatures;
-  INT_FX_RESULT_STRUCT FXInfo;
-
   // Expected number of features in the class pruner, used to penalize
   // unknowns that have too few features (like a c being classified as e) so
   // it doesn't recognize everything as '@' or '#'.
diff --git a/classify/intfx.cpp b/classify/intfx.cpp
index 63d6ddb1e..496cdad2c 100644
--- a/classify/intfx.cpp
+++ b/classify/intfx.cpp
@@ -78,31 +78,19 @@ namespace tesseract {
 // TODO(rays) BlobToTrainingSample must remain a global function until
 // the FlexFx and FeatureDescription code can be removed and LearnBlob
 // made a member of Classify.
-TrainingSample* BlobToTrainingSample(const TBLOB& blob,
-                                     tesseract::NormalizationMode mode,
-                                     bool nonlinear_norm) {
-  INT_FX_RESULT_STRUCT fx_info;
-  GenericVector<INT_FEATURE_STRUCT> bl_features;
+TrainingSample* BlobToTrainingSample(
+    const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
+    GenericVector<INT_FEATURE_STRUCT>* bl_features) {
   GenericVector<INT_FEATURE_STRUCT> cn_features;
-  Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features,
-                            &cn_features, &fx_info, NULL);
+  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
+                            &cn_features, fx_info, NULL);
   // TODO(rays) Use blob->PreciseBoundingBox() instead.
   TBOX box = blob.bounding_box();
   TrainingSample* sample = NULL;
-  if (mode == tesseract::NM_CHAR_ANISOTROPIC) {
-    int num_features = fx_info.NumCN;
-    if (num_features > 0) {
-      sample = TrainingSample::CopyFromFeatures(fx_info, box, &cn_features[0],
-                                                num_features);
-    }
-  } else if (mode == tesseract::NM_BASELINE) {
-    int num_features = fx_info.NumBL;
-    if (num_features > 0) {
-      sample = TrainingSample::CopyFromFeatures(fx_info, box, &bl_features[0],
-                                                num_features);
-    }
-  } else {
-    ASSERT_HOST(!"Unsupported normalization mode!");
+  int num_features = fx_info->NumCN;
+  if (num_features > 0) {
+    sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
+                                              num_features);
   }
   if (sample != NULL) {
     // Set the bounding box (in original image coordinates) in the sample.
diff --git a/classify/intfx.h b/classify/intfx.h
index 11a68377c..26c435374 100644
--- a/classify/intfx.h
+++ b/classify/intfx.h
@@ -60,9 +60,9 @@ namespace tesseract {
   // TODO(rays) BlobToTrainingSample must remain a global function until
   // the FlexFx and FeatureDescription code can be removed and LearnBlob
   // made a member of Classify.
-  TrainingSample* BlobToTrainingSample(const TBLOB& blob,
-                                       tesseract::NormalizationMode mode,
-                                       bool nonlinear_norm);
+  TrainingSample* BlobToTrainingSample(
+      const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
+      GenericVector<INT_FEATURE_STRUCT>* bl_features);
 }
 
 // Deprecated! Prefer tesseract::Classify::ExtractFeatures instead.
diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp
index ea7eea958..d03a14ace 100644
--- a/classify/intmatcher.cpp
+++ b/classify/intmatcher.cpp
@@ -693,13 +693,9 @@ int IntegerMatcher::FindBadFeatures(
 
 
 /*---------------------------------------------------------------------------*/
-void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
-                          int classify_integer_matcher_multiplier) {
+void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
   classify_debug_level_ = classify_debug_level;
 
-  /* Set default mode of operation of IntegerMatcher */
-  SetCharNormMatch(classify_integer_matcher_multiplier);
-
   /* Initialize table for evidence to similarity lookup */
   for (int i = 0; i < SE_TABLE_SIZE; i++) {
     uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
@@ -724,17 +720,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
   evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
 }
 
-/*--------------------------------------------------------------------------*/
-void IntegerMatcher::SetBaseLineMatch() {
-  local_matcher_multiplier_ = 0;
-}
-
-
-/*--------------------------------------------------------------------------*/
-void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) {
-  local_matcher_multiplier_ = integer_matcher_multiplier;
-}
-
 
 /**----------------------------------------------------------------------------
               Private Code
@@ -1283,10 +1268,11 @@ int IntegerMatcher::FindBestMatch(
 // Applies the CN normalization factor to the given rating and returns
 // the modified rating.
 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
-                                        int normalization_factor) {
+                                        int normalization_factor,
+                                        int matcher_multiplier) {
   return (rating * blob_length +
-    local_matcher_multiplier_ * normalization_factor / 256.0) /
-    (blob_length + local_matcher_multiplier_);
+          matcher_multiplier * normalization_factor / 256.0) /
+      (blob_length + matcher_multiplier);
 }
 
 /*---------------------------------------------------------------------------*/
diff --git a/classify/intmatcher.h b/classify/intmatcher.h
index 5598d273a..72003bacb 100644
--- a/classify/intmatcher.h
+++ b/classify/intmatcher.h
@@ -102,11 +102,7 @@ class IntegerMatcher {
 
   IntegerMatcher() : classify_debug_level_(0) {}
 
-  void Init(tesseract::IntParam *classify_debug_level,
-            int classify_integer_matcher_multiplier);
-
-  void SetBaseLineMatch();
-  void SetCharNormMatch(int integer_matcher_multiplier);
+  void Init(tesseract::IntParam *classify_debug_level);
 
   void Match(INT_CLASS ClassTemplate,
              BIT_VECTOR ProtoMask,
@@ -121,7 +117,7 @@ class IntegerMatcher {
   // Applies the CN normalization factor to the given rating and returns
   // the modified rating.
   float ApplyCNCorrection(float rating, int blob_length,
-                          int normalization_factor);
+                          int normalization_factor, int matcher_multiplier);
 
   int FindGoodProtos(INT_CLASS ClassTemplate,
                      BIT_VECTOR ProtoMask,
@@ -192,7 +188,6 @@ class IntegerMatcher {
   uinT32 evidence_table_mask_;
   uinT32 mult_trunc_shift_bits_;
   uinT32 table_trunc_shift_bits_;
-  inT16 local_matcher_multiplier_;
   tesseract::IntParam *classify_debug_level_;
   uinT32 evidence_mult_mask_;
 };
diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp
index ba19fb1ca..aff0c24b1 100644
--- a/classify/picofeat.cpp
+++ b/classify/picofeat.cpp
@@ -235,8 +235,11 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
  ** Exceptions: none
  ** History: 8/8/2011, rays, Created.
  */
-  tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
-      *blob, tesseract::NM_CHAR_ANISOTROPIC, false);
+  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  tesseract::TrainingSample* sample =
+      tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
+                                      &bl_features);
   if (sample == NULL) return NULL;
 
   int num_features = sample->num_features();
@@ -267,8 +270,11 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
  ** Exceptions: none
  ** History: 8/8/2011, rays, Created.
  */
-  tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
-      *blob, tesseract::NM_CHAR_ANISOTROPIC, false);
+  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  tesseract::TrainingSample* sample =
+      tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
+                                      &bl_features);
   if (sample == NULL) return NULL;
 
   FEATURE_SET feature_set = NewFeatureSet(1);
diff --git a/dict/dict.cpp b/dict/dict.cpp
index a21da7967..cccc2d076 100644
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@@ -119,6 +119,9 @@ Dict::Dict(Image* image_ptr)
                   "Make AcceptableChoice() always return false. Useful"
                   " when there is a need to explore all segmentations",
                   getImage()->getCCUtil()->params()),
+      BOOL_MEMBER(save_raw_choices, false,
+                  "Deprecated- backward compatablity only",
+                  getImage()->getCCUtil()->params()),
       INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
                  "Max words to keep in list",
                  getImage()->getCCUtil()->params()),
@@ -689,7 +692,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
 int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
   const WERD_CHOICE *word_ptr = &word;
   WERD_CHOICE temp_word(word.unicharset());
-  if (hyphenated()) {
+  if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) {
     copy_hyphen_info(&temp_word);
     temp_word += word;
     word_ptr = &temp_word;
diff --git a/dict/dict.h b/dict/dict.h
index 213b2cab2..9e067973a 100644
--- a/dict/dict.h
+++ b/dict/dict.h
@@ -613,6 +613,8 @@ class Dict {
   BOOL_VAR_H(stopper_no_acceptable_choices, false,
              "Make AcceptableChoice() always return false. Useful"
              " when there is a need to explore all segmentations");
+  BOOL_VAR_H(save_raw_choices, false,
+             "Deprecated- backward compatability only");
   INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
   STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information"
                " should be printed to stdout");
diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp
index f7603f6a0..624e79f74 100644
--- a/wordrec/chopper.cpp
+++ b/wordrec/chopper.cpp
@@ -440,16 +440,32 @@ namespace tesseract {
  * enough.  The results are returned in the WERD_RES.
  */
 void Wordrec::chop_word_main(WERD_RES *word) {
-  // Initial clean up.
-  word->ClearRatings();
   int num_blobs = word->chopped_word->NumBlobs();
-  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
-  // Run initial classification.
-  for (int b = 0; b < num_blobs; ++b) {
-    BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
-                                               "Initial:", word->chopped_word,
-                                               word->blamer_bundle);
-    word->ratings->put(b, b, choices);
+  if (word->ratings == NULL) {
+    word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
+  }
+  if (word->ratings->get(0, 0) == NULL) {
+    // Run initial classification.
+    for (int b = 0; b < num_blobs; ++b) {
+      BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
+                                                 "Initial:", word->chopped_word,
+                                                 word->blamer_bundle);
+      word->ratings->put(b, b, choices);
+    }
+  } else {
+    // Blobs have been pre-classified. Set matrix cell for all blob choices
+    for (int col = 0; col < word->ratings->dimension(); ++col) {
+      for (int row = col; row < word->ratings->dimension() &&
+           row < col + word->ratings->bandwidth(); ++row) {
+        BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
+        if (choices != NULL) {
+          BLOB_CHOICE_IT bc_it(choices);
+          for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
+            bc_it.data()->set_matrix_cell(col, row);
+          }
+        }
+      }
+    }
   }
 
   // Run Segmentation Search.