Refactorerd control functions to enable parallel blob classification

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@904 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2025-06-07 01:42:41 +08:00 · 2013-11-08 20:30:56 +00:00 · 2013-11-08 20:30:56 +00:00 · 7ec4fd7a56
commit 7ec4fd7a56
parent 77c1b41e4e
25 changed files with 580 additions and 673 deletions
--- a/ccmain/Makefile.am
+++ b/ccmain/Makefile.am
@ -46,7 +46,7 @@ libtesseract_main_la_SOURCES = \
    docqual.cpp equationdetect.cpp fixspace.cpp fixxht.cpp \
    imgscale.cpp ltrresultiterator.cpp \
    osdetect.cpp output.cpp pageiterator.cpp pagesegmain.cpp \
-    pagewalk.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
+    pagewalk.cpp par_control.cpp paragraphs.cpp paramsd.cpp pgedit.cpp recogtraining.cpp \
    reject.cpp resultiterator.cpp scaleimg.cpp superscript.cpp \
    tesseract_cube_combiner.cpp \
    tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \
--- a/ccmain/applybox.cpp
+++ b/ccmain/applybox.cpp
@ -241,10 +241,12 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
 void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
                                  BLOCK* block, ROW* row,
                                  WERD_RES* word_res) {
-  if (!word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                         textord_use_cjk_fp_model,
-                                         poly_allow_detailed_fx,
-                                         row, block)) {
+  if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
+                                     tessedit_ocr_engine_mode, NULL,
+                                     classify_bln_numeric_mode,
+                                     textord_use_cjk_fp_model,
+                                     poly_allow_detailed_fx,
+                                     row, block)) {
    word_res->CloneChoppedToRebuild();
    return;
  }
--- a/ccmain/control.cpp
+++ b/ccmain/control.cpp
@ -97,8 +97,9 @@ BOOL8 Tesseract::recog_interactive(BLOCK* block, ROW* row, WERD_RES* word_res) {
  inT16 char_qual;
  inT16 good_char_qual;

-  classify_word_and_language(&Tesseract::classify_word_pass2,
-                             block, row, word_res);
+  WordData word_data(block, row, word_res);
+  SetupWordPassN(2, &word_data);
+  classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
  if (tessedit_debug_quality_metrics) {
    word_char_quality(word_res, row, &char_qual, &good_char_qual);
    tprintf
@ -153,6 +154,111 @@ bool Tesseract::ProcessTargetWord(const TBOX& word_box,
  return true;
 }

+// If tesseract is to be run, sets the words up ready for it.
+void Tesseract::SetupAllWordsPassN(int pass_n,
+                                   const TBOX* target_word_box,
+                                   const char* word_config,
+                                   PAGE_RES* page_res,
+                                   GenericVector<WordData>* words) {
+  // Prepare all the words.
+  PAGE_RES_IT page_res_it(page_res);
+  for (page_res_it.restart_page(); page_res_it.word() != NULL;
+       page_res_it.forward()) {
+    if (pass_n == 1)
+      page_res_it.word()->SetupFake(unicharset);
+    if (target_word_box == NULL ||
+        ProcessTargetWord(page_res_it.word()->word->bounding_box(),
+                          *target_word_box, word_config, 1)) {
+      words->push_back(WordData(page_res_it));
+    }
+  }
+  // Setup all the words for recognition with polygonal approximation.
+  for (int w = 0; w < words->size(); ++w) {
+    SetupWordPassN(pass_n, &(*words)[w]);
+    if (w > 0) (*words)[w].prev_word = &(*words)[w - 1];
+  }
+}
+
+// Sets up the single word ready for whichever engine is to be run.
+void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
+  if (pass_n == 1 || !word->word->done || tessedit_training_tess) {
+    if (pass_n == 2) {
+      // TODO(rays) Should we do this on pass1 too?
+      word->word->caps_height = 0.0;
+      if (word->word->x_height == 0.0f)
+        word->word->x_height = word->row->x_height();
+    }
+    // Cube doesn't get setup for pass2.
+    if (pass_n != 2 || tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
+      word->word->SetupForRecognition(
+            unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
+            classify_bln_numeric_mode, textord_use_cjk_fp_model,
+            poly_allow_detailed_fx, word->row, word->block);
+    }
+  }
+  if (!sub_langs_.empty()) {
+    if (word->lang_words.size() != sub_langs_.size()) {
+      // Setup the words for all the sub-languages now.
+      WERD_RES empty;
+      word->lang_words.init_to_size(sub_langs_.size(), empty);
+    }
+    for (int s = 0; s < sub_langs_.size(); ++s) {
+      Tesseract* lang_t = sub_langs_[s];
+      if (pass_n == 1 || (lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY &&
+          (!word->lang_words[s].done || lang_t->tessedit_training_tess))) {
+        word->lang_words[s].InitForRetryRecognition(*word->word);
+        word->lang_words[s].SetupForRecognition(
+              lang_t->unicharset, lang_t, BestPix(),
+              lang_t->tessedit_ocr_engine_mode, NULL,
+              lang_t->classify_bln_numeric_mode,
+              lang_t->textord_use_cjk_fp_model,
+              lang_t->poly_allow_detailed_fx, word->row, word->block);
+      }
+    }
+  }
+}
+
+
+// Runs word recognition on all the words.
+bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
+                                   GenericVector<WordData>* words) {
+  // TODO(rays) Before this loop can be parallelized (it would yield a massive
+  // speed-up) all remaining member globals need to be converted to local/heap
+  // (eg set_pass1 and set_pass2) and an intermediate adaption pass needs to be
+  // added. The results will be significantly different with adaption on, and
+  // deterioration will need investigation.
+  for (int w = 0; w < words->size(); ++w) {
+    WordData* word = &(*words)[w];
+    if (monitor != NULL) {
+      monitor->ocr_alive = TRUE;
+      if (pass_n == 1)
+        monitor->progress = 30 + 50 * w / words->size();
+      else
+        monitor->progress = 80 + 10 * w / words->size();
+      if (monitor->deadline_exceeded() ||
+          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
+                                                         words->size()))) {
+        // Timeout. Fake out the rest of the words.
+        for (; w < words->size(); ++w) {
+          (*words)[w].word->SetupFake(unicharset);
+        }
+        return false;
+      }
+    }
+    if (word->word->tess_failed) continue;
+    WordRecognizer recognizer = pass_n == 1 ? &Tesseract::classify_word_pass1
+                                            : &Tesseract::classify_word_pass2;
+    classify_word_and_language(recognizer, word);
+    if (tessedit_dump_choices) {
+      word_dumper(NULL, word->row, word->word);
+      tprintf("Pass%d: %s [%s]\n", pass_n,
+              word->word->best_choice->unichar_string().string(),
+              word->word->best_choice->debug_string().string());
+    }
+  }
+  return true;
+}
+
 /**
 * recog_all_words()
 *
@ -179,27 +285,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
                                const TBOX* target_word_box,
                                const char* word_config,
                                int dopasses) {
-  PAGE_RES_IT page_res_it;
-  inT32 word_index;              // current word
+  PAGE_RES_IT page_res_it(page_res);

  if (tessedit_minimal_rej_pass1) {
    tessedit_test_adaption.set_value (TRUE);
    tessedit_minimal_rejection.set_value (TRUE);
  }

-  // Before the main recognition loop below, walk through the whole page and set
-  // up fake words.  That way, if we run out of time a user will still get the
-  // expected best_choice and box_words out the end; they'll just be empty.
-  page_res_it.page_res = page_res;
-  for (page_res_it.restart_page(); page_res_it.word() != NULL;
-       page_res_it.forward()) {
-    page_res_it.word()->SetupFake(unicharset);
-  }
-
  if (dopasses==0 || dopasses==1) {
-    page_res_it.page_res=page_res;
    page_res_it.restart_page();
-
    // ****************** Pass 1 *******************

    // Clear adaptive classifier at the beginning of the page if it is full.
@ -214,20 +308,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
      if (sub_langs_[i]->AdaptiveClassifierIsFull())
        sub_langs_[i]->ResetAdaptiveClassifierInternal();
    }
-
-    stats_.word_count = 0;
-    if (monitor != NULL) {
-      monitor->ocr_alive = TRUE;
-      while (page_res_it.word() != NULL) {
-        stats_.word_count++;
-        page_res_it.forward();
-      }
-      page_res_it.restart_page();
-    } else {
-      stats_.word_count = 1;
+    // Set up all words ready for recognition, so that if parallelism is on
+    // all the input and output classes are ready to run the classifier.
+    GenericVector<WordData> words;
+    SetupAllWordsPassN(1, target_word_box, word_config, page_res, &words);
+    if (tessedit_parallelize) {
+      PrerecAllWordsPar(words);
    }

-    word_index = 0;
+    stats_.word_count = words.size();

    stats_.dict_words = 0;
    stats_.doc_blob_quality = 0;
@ -237,56 +326,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
    stats_.doc_good_char_quality = 0;

    most_recently_used_ = this;
+    // Run pass 1 word recognition.
+    if (!RecogAllWordsPassN(1, monitor, &words)) return false;
+    // Pass 1 post-processing.
    while (page_res_it.word() != NULL) {
-      set_global_loc_code(LOC_PASS1);
-      word_index++;
-      if (monitor != NULL) {
-        monitor->ocr_alive = TRUE;
-        monitor->progress = 30 + 50 * word_index / stats_.word_count;
-        if (monitor->deadline_exceeded() ||
-            (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
-                                                           stats_.dict_words)))
-          return false;
-      }
-      if (target_word_box &&
-          !ProcessTargetWord(page_res_it.word()->word->bounding_box(),
-                             *target_word_box, word_config, 1)) {
-        page_res_it.forward();
-        continue;
-      }
-      classify_word_and_language(&Tesseract::classify_word_pass1,
-                                 page_res_it.block()->block,
-                                 page_res_it.row()->row,
-                                 page_res_it.word());
      if (page_res_it.word()->word->flag(W_REP_CHAR)) {
        fix_rep_char(&page_res_it);
        page_res_it.forward();
        continue;
      }
-      if (tessedit_dump_choices) {
-        word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
-        tprintf("Pass1: %s [%s]\n",
-                page_res_it.word()->best_choice->unichar_string().string(),
-                page_res_it.word()->best_choice->debug_string().string());
-      }
-
-      // tessedit_test_adaption enables testing of the accuracy of the
-      // input to the adaptive classifier.
-      if (tessedit_test_adaption && !tessedit_minimal_rejection) {
-        if (!word_adaptable (page_res_it.word(),
-          tessedit_test_adaption_mode)) {
-          page_res_it.word()->reject_map.rej_word_tess_failure();
-          // FAKE PERM REJ
-        } else {
-          // Override rejection mechanisms for this word.
-          UNICHAR_ID space = unicharset.unichar_to_id(" ");
-          for (int i = 0; i < page_res_it.word()->best_choice->length(); i++) {
-            if ((page_res_it.word()->best_choice->unichar_id(i) != space) &&
-                page_res_it.word()->reject_map[i].rejected())
-              page_res_it.word()->reject_map[i].setrej_minimal_rej_accept();
-          }
-        }
-      }

      // Count dict words.
      if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)
@ -307,49 +355,26 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
  if (dopasses == 1) return true;

  // ****************** Pass 2 *******************
-  page_res_it.restart_page();
-  word_index = 0;
-  most_recently_used_ = this;
-  while (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption &&
-      page_res_it.word() != NULL) {
-    set_global_loc_code(LOC_PASS2);
-    word_index++;
-    if (monitor != NULL) {
-      monitor->ocr_alive = TRUE;
-      monitor->progress = 80 + 10 * word_index / stats_.word_count;
-      if (monitor->deadline_exceeded() ||
-          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
-                                                         stats_.dict_words)))
-        return false;
+  if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption) {
+    page_res_it.restart_page();
+    GenericVector<WordData> words;
+    SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);
+    if (tessedit_parallelize) {
+      PrerecAllWordsPar(words);
    }
-
-    // changed by jetsoft
-    // specific to its needs to extract one word when need
-    if (target_word_box &&
-        !ProcessTargetWord(page_res_it.word()->word->bounding_box(),
-                           *target_word_box, word_config, 2)) {
+    most_recently_used_ = this;
+    // Run pass 2 word recognition.
+    if (!RecogAllWordsPassN(2, monitor, &words)) return false;
+    // Pass 2 post-processing.
+    while (page_res_it.word() != NULL) {
+      WERD_RES* word = page_res_it.word();
+       if (word->word->flag(W_REP_CHAR) && !word->done) {
+        fix_rep_char(&page_res_it);
+        page_res_it.forward();
+        continue;
+      }
      page_res_it.forward();
-      continue;
    }
-    // end jetsoft
-
-    classify_word_and_language(&Tesseract::classify_word_pass2,
-                               page_res_it.block()->block,
-                               page_res_it.row()->row,
-                               page_res_it.word());
-    if (page_res_it.word()->word->flag(W_REP_CHAR) &&
-        !page_res_it.word()->done) {
-      fix_rep_char(&page_res_it);
-      page_res_it.forward();
-      continue;
-    }
-    if (tessedit_dump_choices) {
-      word_dumper(NULL, page_res_it.row()->row, page_res_it.word());
-      tprintf("Pass2: %s [%s]\n",
-              page_res_it.word()->best_choice->unichar_string().string(),
-              page_res_it.word()->best_choice->debug_string().string());
-    }
-    page_res_it.forward();
  }

  // The next passes can only be run if tesseract has been used, as cube
@ -384,6 +409,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
    // Check the correctness of the final results.
    blamer_pass(page_res);
  }
+  script_pos_pass(page_res);

  // Write results pass.
  set_global_loc_code(LOC_WRITE_RESULTS);
@ -672,6 +698,46 @@ void Tesseract::blamer_pass(PAGE_RES* page_res) {
  }
 }

+// Sets script positions and detects smallcaps on all output words.
+void Tesseract::script_pos_pass(PAGE_RES* page_res) {
+  PAGE_RES_IT page_res_it(page_res);
+  for (page_res_it.restart_page(); page_res_it.word() != NULL;
+      page_res_it.forward()) {
+    WERD_RES* word = page_res_it.word();
+     if (word->word->flag(W_REP_CHAR)) {
+      page_res_it.forward();
+      continue;
+    }
+    float x_height = page_res_it.block()->block->x_height();
+    float word_x_height = word->x_height;
+    if (word_x_height < word->best_choice->min_x_height() ||
+        word_x_height > word->best_choice->max_x_height()) {
+      word_x_height = (word->best_choice->min_x_height() +
+          word->best_choice->max_x_height()) / 2.0f;
+    }
+    // Test for small caps. Word capheight must be close to block xheight,
+    // and word must contain no lower case letters, and at least one upper case.
+    double small_cap_xheight = x_height * kXHeightCapRatio;
+    double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
+    if (word->uch_set->script_has_xheight() &&
+        small_cap_xheight - small_cap_delta <= word_x_height &&
+        word_x_height <= small_cap_xheight + small_cap_delta) {
+      // Scan for upper/lower.
+      int num_upper = 0;
+      int num_lower = 0;
+      for (int i = 0; i < word->best_choice->length(); ++i) {
+        if (word->uch_set->get_isupper(word->best_choice->unichar_id(i)))
+          ++num_upper;
+        else if (word->uch_set->get_islower(word->best_choice->unichar_id(i)))
+          ++num_lower;
+      }
+      if (num_upper > 0 && num_lower == 0)
+        word->small_caps = true;
+    }
+    word->SetScriptPositions();
+  }
+}
+
 // Helper returns true if the new_word is better than the word, using a
 // simple test of better certainty AND rating (to reduce false positives
 // from cube) or a dictionary vs non-dictionary word.
@ -701,38 +767,33 @@ static bool NewWordBetter(const WERD_RES& word, const WERD_RES& new_word,

 // Helper to recognize the word using the given (language-specific) tesseract.
 // Returns true if the result was better than previously.
-bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
+bool Tesseract::RetryWithLanguage(const WERD_RES& best_word,
+                                  WordData* word_data, WERD_RES* word,
                                  WordRecognizer recognizer) {
  if (classify_debug_level || cube_debug_level) {
    tprintf("Retrying word using lang %s, oem %d\n",
            lang.string(), static_cast<int>(tessedit_ocr_engine_mode));
  }
-  // Setup a trial WERD_RES in which to classify.
-  WERD_RES lang_word;
-  lang_word.InitForRetryRecognition(*word);
  // Run the recognizer on the word.
  // Initial version is a bit of a hack based on better certainty and rating
  // (to reduce false positives from cube) or a dictionary vs non-dictionary
  // word.
-  (this->*recognizer)(block, row, &lang_word);
-  bool new_is_better = NewWordBetter(*word, lang_word,
+  (this->*recognizer)(word_data, word);
+  bool new_is_better = NewWordBetter(best_word, *word,
                                     classify_max_rating_ratio,
                                     classify_max_certainty_margin);
  if (classify_debug_level || cube_debug_level) {
-    if (lang_word.best_choice == NULL) {
-      tprintf("New result %s better:%s\n",
+    if (word->best_choice == NULL) {
+      tprintf("NULL result %s better!\n",
              new_is_better ? "IS" : "NOT");
    } else {
      tprintf("New result %s better:%s, r=%g, c=%g\n",
              new_is_better ? "IS" : "NOT",
-              lang_word.best_choice->unichar_string().string(),
-              lang_word.best_choice->rating(),
-              lang_word.best_choice->certainty());
+              word->best_choice->unichar_string().string(),
+              word->best_choice->rating(),
+              word->best_choice->certainty());
    }
  }
-  if (new_is_better) {
-    word->ConsumeWordResults(&lang_word);
-  }
  return new_is_better;
 }

@ -743,9 +804,9 @@ bool Tesseract::RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
 // If recognition was not successful, tries all available languages until
 // it gets a successful result or runs out of languages. Keeps the best result.
 void Tesseract::classify_word_and_language(WordRecognizer recognizer,
-                                           BLOCK* block,
-                                           ROW *row,
-                                           WERD_RES *word) {
+                                           WordData* word_data) {
+  // Points to the best result. May be word or in lang_words.
+  WERD_RES* word = word_data->word;
  clock_t start_t = clock();
  if (classify_debug_level || cube_debug_level) {
    tprintf("Processing word with lang %s at:",
@ -755,15 +816,23 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
  const char* result_type = "Initial";
  bool initially_done = !word->tess_failed && word->done;
  if (initially_done) {
-    // If done on pass1, we reuse the tesseract that did it, and don't try
-    // any more. The only need to call the classifier at all is for the
-    // cube combiner and xheight fixing (which may be bogus on a done word.)
+    // If done on pass1, leave it as-is.
    most_recently_used_ = word->tesseract;
    result_type = "Already done";
+  } else {
+    if (most_recently_used_ != this) {
+      // Point to the word for most_recently_used_.
+      for (int s = 0; s < sub_langs_.size(); ++s) {
+        if (most_recently_used_ == sub_langs_[s]) {
+          word = &word_data->lang_words[s];
+          break;
+        }
+      }
+    }
+    (most_recently_used_->*recognizer)(word_data, word);
+    if (!word->tess_failed && word->tess_accepted)
+      result_type = "Accepted";
  }
-  (most_recently_used_->*recognizer)(block, row, word);
-  if (!word->tess_failed && word->tess_accepted)
-    result_type = "Accepted";
  if (classify_debug_level || cube_debug_level) {
    tprintf("%s result: %s r=%.4g, c=%.4g, accepted=%d, adaptable=%d"
            " xht=[%g,%g]\n",
@ -782,11 +851,31 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
      if (classify_debug_level) {
        tprintf("Retrying with main-Tesseract, lang: %s\n", lang.string());
      }
-      if (RetryWithLanguage(word, block, row, recognizer)) {
-        most_recently_used_ = this;
-        if (!word->tess_failed && word->tess_accepted)
-          return;  // No need to look at the others.
+      if (word_data->word->tesseract == this) {
+        // This is pass1, and we are trying the main language.
+        if (RetryWithLanguage(*word, word_data, word_data->word, recognizer)) {
+          most_recently_used_ = this;
+          word = word_data->word;
+        }
+      } else {
+        // This is pass2, and we are trying the main language again, but it
+        // has no word allocated to it, so we must re-initialize it.
+        WERD_RES main_word(*word_data->word);
+        main_word.InitForRetryRecognition(*word_data->word);
+        main_word.SetupForRecognition(unicharset, this, BestPix(),
+                                      tessedit_ocr_engine_mode, NULL,
+                                      classify_bln_numeric_mode,
+                                      textord_use_cjk_fp_model,
+                                      poly_allow_detailed_fx,
+                                      word_data->row, word_data->block);
+        if (RetryWithLanguage(*word, word_data, &main_word, recognizer)) {
+          most_recently_used_ = this;
+          word_data->word->ConsumeWordResults(&main_word);
+          word = word_data->word;
+        }
      }
+      if (!word->tess_failed && word->tess_accepted)
+        return;  // No need to look at the others.
    }

    for (int i = 0; i < sub_langs_.size(); ++i) {
@ -795,14 +884,21 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
          tprintf("Retrying with sub-Tesseract[%d] lang: %s\n",
                  i, sub_langs_[i]->lang.string());
        }
-        if (sub_langs_[i]->RetryWithLanguage(word, block, row, recognizer)) {
+        if (sub_langs_[i]->RetryWithLanguage(*word, word_data,
+                                             &word_data->lang_words[i],
+                                             recognizer)) {
          most_recently_used_ = sub_langs_[i];
+          word = &word_data->lang_words[i];
          if (!word->tess_failed && word->tess_accepted)
-            return;  // No need to look at the others.
+            break;  // No need to look at the others.
        }
      }
    }
  }
+  if (word != word_data->word) {
+    // Move the result for the best language to the main word.
+    word_data->word->ConsumeWordResults(word);
+  }
  clock_t ocr_t = clock();
  if (tessedit_timing_debug) {
    tprintf("%s (ocr took %.2f sec)\n",
@ -817,7 +913,11 @@ void Tesseract::classify_word_and_language(WordRecognizer recognizer,
 * Baseline normalize the word and pass it to Tess.
 */

-void Tesseract::classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
+void Tesseract::classify_word_pass1(WordData* word_data, WERD_RES* word) {
+  ROW* row = word_data->row;
+  BLOCK* block = word_data->block;
+  prev_word_best_choice_ = word_data->prev_word != NULL
+      ? word_data->prev_word->word->best_choice : NULL;
  // If we only intend to run cube - run it and return.
  if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
    cube_word_pass1(block, row, word);
@ -880,6 +980,10 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
    }
    new_x_ht_word.x_height = new_x_ht;
    new_x_ht_word.caps_height = 0.0;
+    new_x_ht_word.SetupForRecognition(
+          unicharset, this, BestPix(), tessedit_ocr_engine_mode, NULL,
+          classify_bln_numeric_mode, textord_use_cjk_fp_model,
+          poly_allow_detailed_fx, row, block);
    match_word_pass_n(2, &new_x_ht_word, row, block);
    if (!new_x_ht_word.tess_failed) {
      int new_misfits = CountMisfitTops(&new_x_ht_word);
@ -916,11 +1020,15 @@ bool Tesseract::TrainedXheightFix(WERD_RES *word, BLOCK* block, ROW *row) {
 * Control what to do with the word in pass 2
 */

-void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
+void Tesseract::classify_word_pass2(WordData* word_data, WERD_RES* word) {
  // Return if we do not want to run Tesseract.
  if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY &&
      tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED)
    return;
+  ROW* row = word_data->row;
+  BLOCK* block = word_data->block;
+  prev_word_best_choice_ = word_data->prev_word != NULL
+      ? word_data->prev_word->word->best_choice : NULL;

  set_global_subloc_code(SUBLOC_NORM);
  check_debug_pt(word, 30);
@ -940,26 +1048,6 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {
      // Use the tops and bottoms since they are available.
      TrainedXheightFix(word, block, row);
    }
-    // Test for small caps. Word capheight must be close to block xheight,
-    // and word must contain no lower case letters, and at least one upper case.
-    double small_cap_xheight = block->x_height() * kXHeightCapRatio;
-    double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0;
-    if (unicharset.script_has_xheight() &&
-        small_cap_xheight - small_cap_delta <= word->x_height &&
-        word->x_height <= small_cap_xheight + small_cap_delta) {
-      // Scan for upper/lower.
-      int num_upper = 0;
-      int num_lower = 0;
-      for (int i = 0; i < word->best_choice->length(); ++i) {
-        if (unicharset.get_isupper(word->best_choice->unichar_id(i)))
-          ++num_upper;
-        else if (unicharset.get_islower(word->best_choice->unichar_id(i)))
-          ++num_lower;
-      }
-      if (num_upper > 0 && num_lower == 0)
-        word->small_caps = true;
-    }
-    word->SetScriptPositions();

    set_global_subloc_code(SUBLOC_NORM);
  }
@ -988,12 +1076,8 @@ void Tesseract::classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word) {

 void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
                                  ROW *row, BLOCK* block) {
-  if (word->SetupForTessRecognition(unicharset, this, BestPix(),
-                                    classify_bln_numeric_mode,
-                                    textord_use_cjk_fp_model,
-                                    poly_allow_detailed_fx,
-                                    row, block))
-    tess_segment_pass_n(pass_n, word);
+  if (word->tess_failed) return;
+  tess_segment_pass_n(pass_n, word);

  if (!word->tess_failed) {
    if (!word->word->flag (W_REP_CHAR)) {
@ -1136,12 +1220,12 @@ void Tesseract::ExplodeRepeatedWord(BLOB_CHOICE* best_choice,
    WERD_RES* rep_word =
        page_res_it->InsertSimpleCloneWord(*word_res, blob_word);
    // Setup the single char WERD_RES
-    if (rep_word->SetupForTessRecognition(*word_res->uch_set, this, BestPix(),
-                                          false,
-                                          textord_use_cjk_fp_model,
-                                          poly_allow_detailed_fx,
-                                          page_res_it->row()->row,
-                                          page_res_it->block()->block)) {
+    if (rep_word->SetupForRecognition(*word_res->uch_set, this, BestPix(),
+                                      tessedit_ocr_engine_mode, NULL, false,
+                                      textord_use_cjk_fp_model,
+                                      poly_allow_detailed_fx,
+                                      page_res_it->row()->row,
+                                      page_res_it->block()->block)) {
      rep_word->CloneChoppedToRebuild();
      BLOB_CHOICE* blob_choice = new BLOB_CHOICE(*best_choice);
      rep_word->FakeClassifyWord(1, &blob_choice);
--- a/ccmain/cube_control.cpp
+++ b/ccmain/cube_control.cpp
@ -197,6 +197,9 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
  // Iterate through the word results and call cube on each word.
  for (page_res_it.restart_page(); page_res_it.word () != NULL;
       page_res_it.forward()) {
+    BLOCK* block = page_res_it.block()->block;
+    if (block->poly_block() != NULL && !block->poly_block()->IsText())
+      continue;  // Don't deal with non-text blocks.
    WERD_RES* word = page_res_it.word();
    // Skip cube entirely if tesseract's certainty is greater than threshold.
    int combiner_run_thresh = convert_prob_to_tess_certainty(
@ -210,6 +213,11 @@ void Tesseract::run_cube_combiner(PAGE_RES *page_res) {
    // Setup a trial WERD_RES in which to classify with cube.
    WERD_RES cube_word;
    cube_word.InitForRetryRecognition(*word);
+    cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
+                                  OEM_CUBE_ONLY,
+                                  NULL, false, false, false,
+                                  page_res_it.row()->row,
+                                  page_res_it.block()->block);
    CubeObject *cube_obj = lang_tess->cube_recognize_word(
        page_res_it.block()->block, &cube_word);
    if (cube_obj != NULL)
@ -317,10 +325,6 @@ void Tesseract::cube_combine_word(CubeObject* cube_obj, WERD_RES* cube_word,
 **********************************************************************/
 bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
                               WERD_RES *word) {
-  if (!word->SetupForCubeRecognition(unicharset, this, block)) {
-    return false;  // Graphics block.
-  }
-
  // Run cube
  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
--- a/ccmain/fixspace.cpp
+++ b/ccmain/fixspace.cpp
@ -204,8 +204,9 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
    word = word_it.data();
    if ((!word->part_of_combo) && (word->box_word == NULL)) {
-      classify_word_and_language(&Tesseract::classify_word_pass2,
-                                 block, row, word);
+      WordData word_data(block, row, word);
+      SetupWordPassN(2, &word_data);
+      classify_word_and_language(&Tesseract::classify_word_pass2, &word_data);
    }
    prev_word_best_choice_ = word->best_choice;
  }
--- a/ccmain/pgedit.cpp
+++ b/ccmain/pgedit.cpp
@ -731,10 +731,12 @@ BOOL8 Tesseract:: word_blank_and_set_display(BLOCK* block, ROW* row,
 BOOL8 Tesseract::word_bln_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
  TWERD *bln_word = word_res->chopped_word;
  if (bln_word == NULL) {
-    word_res->SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                      textord_use_cjk_fp_model,
-                                      poly_allow_detailed_fx,
-                                      row, block);
+    word_res->SetupForRecognition(unicharset, this, BestPix(),
+                                  tessedit_ocr_engine_mode, NULL,
+                                  classify_bln_numeric_mode,
+                                  textord_use_cjk_fp_model,
+                                  poly_allow_detailed_fx,
+                                  row, block);
    bln_word = word_res->chopped_word;
  }
  bln_word_window_handle()->Clear();
@ -963,10 +965,12 @@ void Tesseract::blob_feature_display(PAGE_RES* page_res,
  if (word != NULL) {
    WERD_RES word_res(word);
    word_res.x_height = row->x_height();
-    word_res.SetupForTessRecognition(unicharset, this, BestPix(), false,
-                                     textord_use_cjk_fp_model,
-                                     poly_allow_detailed_fx,
-                                     row, block);
+    word_res.SetupForRecognition(unicharset, this, BestPix(),
+                                 tessedit_ocr_engine_mode, NULL,
+                                 classify_bln_numeric_mode,
+                                 textord_use_cjk_fp_model,
+                                 poly_allow_detailed_fx,
+                                 row, block);
    TWERD* bln_word = word_res.chopped_word;
    TBLOB* bln_blob = bln_word->blobs[0];
    INT_FX_RESULT_STRUCT fx_info;
--- a/ccmain/recogtraining.cpp
+++ b/ccmain/recogtraining.cpp
@ -203,7 +203,9 @@ void Tesseract::ambigs_classify_and_output(WERD_RES *werd_res,
                                           FILE *output_file) {
  // Classify word.
  fflush(stdout);
-  classify_word_pass1(block_res->block, row_res->row, werd_res);
+  WordData word_data(block_res->block, row_res->row, werd_res);
+  SetupWordPassN(1, &word_data);
+  classify_word_pass1(&word_data, werd_res);
  WERD_CHOICE *best_choice = werd_res->best_choice;
  ASSERT_HOST(best_choice != NULL);

--- a/ccmain/tesseractclass.cpp
+++ b/ccmain/tesseractclass.cpp
@ -402,6 +402,8 @@ Tesseract::Tesseract()
                     "for layout analysis.", this->params()),
    BOOL_MEMBER(textord_equation_detect, false, "Turn on equation detector",
                this->params()),
+    INT_MEMBER(tessedit_parallelize, 0, "Run in parallel where possible",
+                this->params()),

    // The following parameters were deprecated and removed from their original
    // locations. The parameters are temporarily kept here to give Tesseract
@ -528,7 +530,6 @@ void Tesseract::Clear() {
  reskew_ = FCOORD(1.0f, 0.0f);
  splitter_.Clear();
  scaled_factor_ = -1;
-  ResetFeaturesHaveBeenExtracted();
  for (int i = 0; i < sub_langs_.size(); ++i)
    sub_langs_[i]->Clear();
 }
--- a/ccmain/tesseractclass.h
+++ b/ccmain/tesseractclass.h
@ -100,10 +100,6 @@ class EquationDetect;
 class Tesseract;
 class TesseractCubeCombiner;

-typedef void (Tesseract::*WordRecognizer)(BLOCK* block,
-                                          ROW *row,
-                                          WERD_RES *word);
-
 // A collection of various variables for statistics and debugging.
 struct TesseractStats {
  TesseractStats()
@ -136,6 +132,24 @@ struct TesseractStats {
  bool write_results_empty_block;
 };

+// Struct to hold all the pointers to relevant data for processing a word.
+struct WordData {
+  WordData() : word(NULL), row(NULL), block(NULL), prev_word(NULL) {}
+  explicit WordData(const PAGE_RES_IT& page_res_it)
+    : word(page_res_it.word()), row(page_res_it.row()->row),
+      block(page_res_it.block()->block), prev_word(NULL) {}
+  WordData(BLOCK* block_in, ROW* row_in, WERD_RES* word_res)
+    : word(word_res), row(row_in), block(block_in), prev_word(NULL) {}
+
+  WERD_RES* word;
+  ROW* row;
+  BLOCK* block;
+  WordData* prev_word;
+  GenericVector<WERD_RES> lang_words;
+};
+
+typedef void (Tesseract::*WordRecognizer)(WordData* word_data, WERD_RES* word);
+
 class Tesseract : public Wordrec {
 public:
  Tesseract();
@ -250,10 +264,23 @@ class Tesseract : public Wordrec {
      bool single_column, bool osd, bool only_osd,
      BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,
      TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix);
+  // par_control.cpp
+  void PrerecAllWordsPar(const GenericVector<WordData>& words);

  //// control.h /////////////////////////////////////////////////////////
  bool ProcessTargetWord(const TBOX& word_box, const TBOX& target_word_box,
                         const char* word_config, int pass);
+  // Sets up the words ready for whichever engine is to be run
+  void SetupAllWordsPassN(int pass_n,
+                          const TBOX* target_word_box,
+                          const char* word_config,
+                          PAGE_RES* page_res,
+                          GenericVector<WordData>* words);
+  // Sets up the single word ready for whichever engine is to be run.
+  void SetupWordPassN(int pass_n, WordData* word);
+  // Runs word recognition on all the words.
+  bool RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
+                          GenericVector<WordData>* words);
  bool recog_all_words(PAGE_RES* page_res,
                       ETEXT_DESC* monitor,
                       const TBOX* target_word_box,
@ -265,13 +292,15 @@ class Tesseract : public Wordrec {
                        const char* word_config);
  void bigram_correction_pass(PAGE_RES *page_res);
  void blamer_pass(PAGE_RES* page_res);
+  // Sets script positions and detects smallcaps on all output words.
+  void script_pos_pass(PAGE_RES* page_res);
  // Helper to recognize the word using the given (language-specific) tesseract.
  // Returns true if the result was better than previously.
-  bool RetryWithLanguage(WERD_RES *word, BLOCK* block, ROW *row,
-                         WordRecognizer recognizer);
+  bool RetryWithLanguage(const WERD_RES& best_word, WordData* word_data,
+                         WERD_RES* word, WordRecognizer recognizer);
  void classify_word_and_language(WordRecognizer recognizer,
-                                  BLOCK* block, ROW *row, WERD_RES *word);
-  void classify_word_pass1(BLOCK* block, ROW *row, WERD_RES *word);
+                                  WordData* word_data);
+  void classify_word_pass1(WordData* word_data, WERD_RES* word);
  void recog_pseudo_word(PAGE_RES* page_res,  // blocks to check
                         TBOX &selection_box);

@ -282,7 +311,7 @@ class Tesseract : public Wordrec {
                                              const char *s,
                                              const char *lengths);
  void match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK* block);
-  void classify_word_pass2(BLOCK* block, ROW *row, WERD_RES *word);
+  void classify_word_pass2(WordData* word_data, WERD_RES* word);
  void ReportXhtFixResult(bool accept_new_word, float new_x_ht,
                          WERD_RES* word, WERD_RES* new_word);
  bool RunOldFixXht(WERD_RES *word, BLOCK* block, ROW *row);
@ -936,6 +965,7 @@ class Tesseract : public Wordrec {
             "Only initialize with the config file. Useful if the instance is "
             "not going to be used for OCR but say only for layout analysis.");
  BOOL_VAR_H(textord_equation_detect, false, "Turn on equation detector");
+  INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");

  // The following parameters were deprecated and removed from their original
  // locations. The parameters are temporarily kept here to give Tesseract
--- a/ccstruct/blobs.cpp
+++ b/ccstruct/blobs.cpp
@ -741,19 +741,36 @@ TWERD* TWERD::PolygonalCopy(bool allow_detailed_fx, WERD* src) {
 // DENORMs in the blobs.
 void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
                        bool inverse, float x_height, bool numeric_mode,
+                        tesseract::OcrEngineMode hint,
+                        const TBOX* norm_box,
                        DENORM* word_denorm) {
  TBOX word_box = bounding_box();
+  if (norm_box != NULL) word_box = *norm_box;
  float word_middle = (word_box.left() + word_box.right()) / 2.0f;
+  float input_y_offset = 0.0f;
+  float final_y_offset = static_cast<float>(kBlnBaselineOffset);
+  float scale = kBlnXHeight / x_height;
+  if (hint == tesseract::OEM_CUBE_ONLY || row == NULL) {
+    word_middle = word_box.left();
+    input_y_offset = word_box.bottom();
+    final_y_offset = 0.0f;
+    if (hint == tesseract::OEM_CUBE_ONLY)
+      scale = 1.0f;
+  } else {
+    input_y_offset = row->base_line(word_middle);
+  }
  for (int b = 0; b < blobs.size(); ++b) {
    TBLOB* blob = blobs[b];
    TBOX blob_box = blob->bounding_box();
    float mid_x = (blob_box.left() + blob_box.right()) / 2.0f;
-    float baseline = row->base_line(mid_x);
-    float scale = kBlnXHeight / x_height;
+    float baseline = input_y_offset;
+    float blob_scale = scale;
    if (numeric_mode) {
      baseline = blob_box.bottom();
-      scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
-                          scale, scale * 1.5f);
+      blob_scale = ClipToRange(kBlnXHeight * 4.0f / (3 * blob_box.height()),
+                               scale, scale * 1.5f);
+    } else if (row != NULL && hint != tesseract::OEM_CUBE_ONLY) {
+      baseline = row->base_line(mid_x);
    }
    // The image will be 8-bit grey if the input was grey or color. Note that in
    // a grey image 0 is black and 255 is white. If the input was binary, then
@ -761,16 +778,13 @@ void TWERD::BLNormalize(const BLOCK* block, const ROW* row, Pix* pix,
    // To tell the difference pixGetDepth() will return 8 or 1.
    // The inverse flag will be true iff the word has been determined to be
    // white on black, and is independent of whether the pix is 8 bit or 1 bit.
-    blob->Normalize(block, NULL, NULL, word_middle, baseline, scale, scale,
-                    0.0f, static_cast<float>(kBlnBaselineOffset),
-                    inverse, pix);
+    blob->Normalize(block, NULL, NULL, word_middle, baseline, blob_scale,
+                    blob_scale, 0.0f, final_y_offset, inverse, pix);
  }
  if (word_denorm != NULL) {
-    float scale = kBlnXHeight / x_height;
    word_denorm->SetupNormalization(block, NULL, NULL, word_middle,
-                                    row->base_line(word_middle),
-                                    scale, scale, 0.0f,
-                                    static_cast<float>(kBlnBaselineOffset));
+                                    input_y_offset, scale, scale,
+                                    0.0f, final_y_offset);
    word_denorm->set_inverse(inverse);
    word_denorm->set_pix(pix);
  }
--- a/ccstruct/blobs.h
+++ b/ccstruct/blobs.h
@ -31,6 +31,7 @@
 ----------------------------------------------------------------------*/
 #include "clst.h"
 #include "normalis.h"
+#include "publictypes.h"
 #include "rect.h"
 #include "vecfuncs.h"

@ -316,7 +317,10 @@ struct TWERD {
  // Baseline normalizes the blobs in-place, recording the normalization in the
  // DENORMs in the blobs.
  void BLNormalize(const BLOCK* block, const ROW* row, Pix* pix, bool inverse,
-                   float x_height, bool numeric_mode, DENORM* word_denorm);
+                   float x_height, bool numeric_mode,
+                   tesseract::OcrEngineMode hint,
+                   const TBOX* norm_box,
+                   DENORM* word_denorm);
  // Copies the data and the blobs, but leaves next untouched.
  void CopyFrom(const TWERD& src);
  // Deletes owned data.
--- a/ccstruct/pageres.cpp
+++ b/ccstruct/pageres.cpp
@ -32,6 +32,8 @@ static const double kStopperAmbiguityThresholdGain = 8.0;
 // Constant offset for computing thresholds that determine the ambiguity of a
 // word.
 static const double kStopperAmbiguityThresholdOffset = 1.5;
+// Max number of broken pieces to associate.
+const int kWordrecMaxNumJoinChunks = 4;

 // Computes and returns a threshold of certainty difference used to determine
 // which words to keep, based on the adjustment factors of the two words.
@ -245,16 +247,25 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES& source) {
 // If allow_detailed_fx is true, the feature extractor will receive fine
 // precision outline information, allowing smoother features and better
 // features on low resolution images.
+// The norm_mode_hint sets the default mode for normalization in absence
+// of any of the above flags.
+// norm_box is used to override the word bounding box to determine the
+// normalization scale and offset.
 // Returns false if the word is empty and sets up fake results.
-bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
+bool WERD_RES::SetupForRecognition(const UNICHARSET& unicharset_in,
                                   tesseract::Tesseract* tess, Pix* pix,
+                                   int norm_mode,
+                                   const TBOX* norm_box,
                                   bool numeric_mode,
                                   bool use_body_size,
                                   bool allow_detailed_fx,
-                                   ROW *row, BLOCK* block) {
+                                   ROW *row, const BLOCK* block) {
+  tesseract::OcrEngineMode norm_mode_hint =
+      static_cast<tesseract::OcrEngineMode>(norm_mode);
  tesseract = tess;
  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
-  if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
+  if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
+       word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
    // Empty words occur when all the blobs have been moved to the rej_blobs
    // list, which seems to occur frequently in junk.
    SetupFake(unicharset_in);
@ -264,13 +275,17 @@ bool WERD_RES::SetupForTessRecognition(const UNICHARSET& unicharset_in,
  ClearResults();
  SetupWordScript(unicharset_in);
  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
-  float word_xheight = use_body_size && row->body_size() > 0.0f
+  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
                     ? row->body_size() : x_height;
  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
-                            word_xheight, numeric_mode, &denorm);
+                            word_xheight, numeric_mode, norm_mode_hint,
+                            norm_box, &denorm);
  blob_row = row;
  SetupBasicsFromChoppedWord(unicharset_in);
  SetupBlamerBundle();
+  int num_blobs = chopped_word->NumBlobs();
+  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
+  tess_failed = false;
  return true;
 }

@ -284,30 +299,6 @@ void WERD_RES::SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in) {
  ClearWordChoices();
 }

-// Sets up the members used in recognition:
-// bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
-// Returns false if the word is empty and sets up fake results.
-bool WERD_RES::SetupForCubeRecognition(const UNICHARSET& unicharset_in,
-                                       tesseract::Tesseract* tess,
-                                       const BLOCK* block) {
-  tesseract = tess;
-  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
-  if (pb != NULL && !pb->IsText()) {
-    // Ignore words in graphic regions.
-    SetupFake(unicharset_in);
-    word->set_flag(W_REP_CHAR, false);
-    return false;
-  }
-  ClearResults();
-  SetupWordScript(unicharset_in);
-  TBOX word_box = word->bounding_box();
-  denorm.SetupNormalization(block, NULL, NULL,
-                            word_box.left(), word_box.bottom(),
-                            1.0f, 1.0f, 0.0f, 0.0f);
-  SetupBlamerBundle();
-  return true;
-}
-
 // Sets up the members used in recognition for an empty recognition result:
 // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
 void WERD_RES::SetupFake(const UNICHARSET& unicharset_in) {
--- a/ccstruct/pageres.h
+++ b/ccstruct/pageres.h
@ -339,7 +339,8 @@ class WERD_RES : public ELIST_LINK {
  // characters purely based on their shape on the page, and by default produce
  // the corresponding unicode for a left-to-right context.
  const char* const BestUTF8(int blob_index, bool in_rtl_context) const {
-    if (blob_index < 0 || blob_index >= best_choice->length())
+    if (blob_index < 0 || best_choice == NULL ||
+        blob_index >= best_choice->length())
      return NULL;
    UNICHAR_ID id = best_choice->unichar_id(blob_index);
    if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
@ -435,25 +436,22 @@ class WERD_RES : public ELIST_LINK {
  // If allow_detailed_fx is true, the feature extractor will receive fine
  // precision outline information, allowing smoother features and better
  // features on low resolution images.
+  // The norm_mode sets the default mode for normalization in absence
+  // of any of the above flags. It should really be a tesseract::OcrEngineMode
+  // but is declared as int for ease of use with tessedit_ocr_engine_mode.
  // Returns false if the word is empty and sets up fake results.
-  bool SetupForTessRecognition(const UNICHARSET& unicharset_in,
-                               tesseract::Tesseract* tesseract, Pix* pix,
-                               bool numeric_mode, bool use_body_size,
-                               bool allow_detailed_fx,
-                               ROW *row, BLOCK* block);
+  bool SetupForRecognition(const UNICHARSET& unicharset_in,
+                           tesseract::Tesseract* tesseract, Pix* pix,
+                           int norm_mode,
+                           const TBOX* norm_box, bool numeric_mode,
+                           bool use_body_size, bool allow_detailed_fx,
+                           ROW *row, const BLOCK* block);

  // Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
  // accumulators from a made chopped word.  We presume the fields are already
  // empty.
  void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in);

-  // Sets up the members used in recognition:
-  // bln_boxes, chopped_word, seam_array, denorm.
-  // Returns false if the word is empty and sets up fake results.
-  bool SetupForCubeRecognition(const UNICHARSET& unicharset_in,
-                               tesseract::Tesseract* tesseract,
-                               const BLOCK* block);
-
  // Sets up the members used in recognition for an empty recognition result:
  // bln_boxes, chopped_word, seam_array, denorm, best_choice, raw_choice.
  void SetupFake(const UNICHARSET& uch);
--- a/ccstruct/ratngs.cpp
+++ b/ccstruct/ratngs.cpp
@ -530,8 +530,9 @@ void WERD_CHOICE::SetScriptPositions(bool small_caps, TWERD* word) {
  // Initialize to normal.
  for (int i = 0; i < length_; ++i)
    script_pos_[i] = tesseract::SP_NORMAL;
-  if (word->blobs.empty())
+  if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
    return;
+  }

  int position_counts[4];
  for (int i = 0; i < 4; i++) {
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@ -122,8 +122,6 @@ struct PROTO_KEY {
 #define MarginalMatch(Rating)       \
 ((Rating) > matcher_great_threshold)

-#define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
-
 /*-----------------------------------------------------------------------------
          Private Function Prototypes
 -----------------------------------------------------------------------------*/
@ -179,8 +177,7 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
  Results->Initialize();

-  if (AdaptedTemplates == NULL)
-    AdaptedTemplates = NewAdaptedTemplates (true);
+  ASSERT_HOST(AdaptedTemplates != NULL);

  DoAdaptiveMatch(Blob, Results);
  if (CPResults != NULL)
@ -207,7 +204,6 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
    DebugAdaptiveClassifier(Blob, Results);
 #endif

-  NumClassesOutput += Choices->length();
  delete Results;
 }                                /* AdaptiveClassifier */

@ -249,7 +245,6 @@ void Classify::LearnWord(const char* filename, WERD_RES *word) {
    if (!EnableLearning || word->best_choice == NULL)
      return;  // Can't or won't adapt.

-    NumWordsAdaptedTo++;
    if (classify_learning_debug_level >= 1)
      tprintf("\n\nAdapting to word = %s\n",
              word->best_choice->debug_string().string());
@ -480,15 +475,11 @@ void Classify::EndAdaptiveClassifier() {
  FreeNormProtos();
  if (AllProtosOn != NULL) {
    FreeBitVector(AllProtosOn);
-    FreeBitVector(PrunedProtos);
    FreeBitVector(AllConfigsOn);
-    FreeBitVector(AllProtosOff);
    FreeBitVector(AllConfigsOff);
    FreeBitVector(TempProtoMask);
    AllProtosOn = NULL;
-    PrunedProtos = NULL;
    AllConfigsOn = NULL;
-    AllProtosOff = NULL;
    AllConfigsOff = NULL;
    TempProtoMask = NULL;
  }
@ -561,19 +552,15 @@ void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
    static_classifier_ = new TessClassifier(false, this);
  }

-  im_.Init(&classify_debug_level, classify_integer_matcher_multiplier);
+  im_.Init(&classify_debug_level);
  InitIntegerFX();

  AllProtosOn = NewBitVector(MAX_NUM_PROTOS);
-  PrunedProtos = NewBitVector(MAX_NUM_PROTOS);
  AllConfigsOn = NewBitVector(MAX_NUM_CONFIGS);
-  AllProtosOff = NewBitVector(MAX_NUM_PROTOS);
  AllConfigsOff = NewBitVector(MAX_NUM_CONFIGS);
  TempProtoMask = NewBitVector(MAX_NUM_PROTOS);
  set_all_bits(AllProtosOn, WordsInVectorOfSize(MAX_NUM_PROTOS));
-  set_all_bits(PrunedProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
  set_all_bits(AllConfigsOn, WordsInVectorOfSize(MAX_NUM_CONFIGS));
-  zero_all_bits(AllProtosOff, WordsInVectorOfSize(MAX_NUM_PROTOS));
  zero_all_bits(AllConfigsOff, WordsInVectorOfSize(MAX_NUM_CONFIGS));

  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
@ -617,53 +604,11 @@ void Classify::ResetAdaptiveClassifierInternal() {
            NumAdaptationsFailed);
  }
  free_adapted_templates(AdaptedTemplates);
-  AdaptedTemplates = NULL;
+  AdaptedTemplates = NewAdaptedTemplates(true);
  NumAdaptationsFailed = 0;
 }


-/*---------------------------------------------------------------------------*/
-/**
- * Print to File the statistics which have
- * been gathered for the adaptive matcher.
- *
- * @param File open text file to print adaptive statistics to
- *
- * Globals: none
- *
- * @note Exceptions: none
- * @note History: Thu Apr 18 14:37:37 1991, DSJ, Created.
- */
-void Classify::PrintAdaptiveStatistics(FILE *File) {
-  #ifndef SECURE_NAMES
-
-  fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
-  fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
-  fprintf (File, "\tNum classes output   = %d (Avg = %4.2f)\n",
-    NumClassesOutput,
-    ((AdaptiveMatcherCalls == 0) ? (0.0) :
-  ((float) NumClassesOutput / AdaptiveMatcherCalls)));
-  fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
-    BaselineClassifierCalls,
-    ((BaselineClassifierCalls == 0) ? (0.0) :
-  ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
-  fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
-    CharNormClassifierCalls,
-    ((CharNormClassifierCalls == 0) ? (0.0) :
-  ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
-  fprintf (File, "\t\tAmbig    Classifier: %4d calls (%4.2f classes/call)\n",
-    AmbigClassifierCalls,
-    ((AmbigClassifierCalls == 0) ? (0.0) :
-  ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
-
-  fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
-  fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
-  fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
-
-  PrintAdaptedTemplates(File, AdaptedTemplates);
-  #endif
-}                                /* PrintAdaptiveStatistics */
-

 /*---------------------------------------------------------------------------*/
 /**
@ -915,8 +860,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
  FEATURE_SET FloatFeatures;
  int NewTempConfigId;

-  ResetFeaturesHaveBeenExtracted();
-  NumCharsAdaptedTo++;
  if (!LegalClassId (ClassId))
    return;

@ -932,7 +875,6 @@ void Classify::AdaptToChar(TBLOB *Blob,
    if (NumFeatures <= 0)
      return;

-    im_.SetBaseLineMatch();
    // Only match configs with the matching font.
    BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
    for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
@ -1004,17 +946,16 @@ void Classify::AdaptToChar(TBLOB *Blob,

 void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
 #ifndef GRAPHICS_DISABLED
-  int bloblength = 0;
-  INT_FEATURE_ARRAY features;
-  uinT8* norm_array = new uinT8[unicharset.size()];
-  int num_features = GetBaselineFeatures(blob, PreTrainedTemplates,
-                                         features,
-                                         norm_array, &bloblength);
-  delete [] norm_array;
-  INT_RESULT_STRUCT IntResult;
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == NULL) return;

+  INT_RESULT_STRUCT IntResult;
  im_.Match(int_class, AllProtosOn, AllConfigsOn,
-            num_features, features,
+            bl_features.size(), &bl_features[0],
            &IntResult, classify_adapt_feature_threshold,
            NO_DEBUG, matcher_debug_separate_windows);
  cprintf ("Best match to temp config %d = %4.1f%%.\n",
@ -1024,7 +965,7 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
    ConfigMask = 1 << IntResult.Config;
    ShowMatchDisplay();
    im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
-              num_features, features,
+              bl_features.size(), &bl_features[0],
              &IntResult, classify_adapt_feature_threshold,
              6 | 0x19, matcher_debug_separate_windows);
    UpdateMatchDisplay();
@ -1033,50 +974,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) {
 }


-/*---------------------------------------------------------------------------*/
-/**
- * @param Blob blob to add to templates for ClassId
- * @param ClassId class to add blob to
- * @param FontinfoId font information from pre-trained teamples
- * @param Threshold minimum match rating to existing template
- *
- * Globals:
- * - PreTrainedTemplates current set of built-in templates
- *
- * @note Exceptions: none
- * @note History: Thu Mar 14 09:36:03 1991, DSJ, Created.
- */
-void Classify::AdaptToPunc(TBLOB *Blob,
-                           CLASS_ID ClassId,
-                           int FontinfoId,
-                           FLOAT32 Threshold) {
-  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
-  int i;
-
-  Results->Initialize();
-  CharNormClassifier(Blob, PreTrainedTemplates, Results);
-  RemoveBadMatches(Results);
-
-  if (Results->NumMatches != 1) {
-    if (classify_learning_debug_level >= 1) {
-      cprintf ("Rejecting punc = %s (Alternatives = ",
-               unicharset.id_to_unichar(ClassId));
-
-      for (i = 0; i < Results->NumMatches; i++)
-        tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id));
-      tprintf(")\n");
-    }
-  } else {
-    #ifndef SECURE_NAMES
-    if (classify_learning_debug_level >= 1)
-      cprintf ("Adapting to punc = %s, thr= %g\n",
-               unicharset.id_to_unichar(ClassId), Threshold);
-    #endif
-    AdaptToChar(Blob, ClassId, FontinfoId, Threshold);
-  }
-  delete Results;
-}                                /* AdaptToPunc */
-

 /*---------------------------------------------------------------------------*/
 /**
@ -1167,50 +1064,41 @@ void Classify::AddNewResult(ADAPT_RESULTS *results,
 * @note Exceptions: none
 * @note History: Tue Mar 12 19:40:36 1991, DSJ, Created.
 */
-void Classify::AmbigClassifier(TBLOB *Blob,
-                               INT_TEMPLATES Templates,
-                               ADAPT_CLASS *Classes,
-                               UNICHAR_ID *Ambiguities,
-                               ADAPT_RESULTS *Results) {
-  int NumFeatures;
-  INT_FEATURE_ARRAY IntFeatures;
+void Classify::AmbigClassifier(
+    const GenericVector<INT_FEATURE_STRUCT>& int_features,
+    const INT_FX_RESULT_STRUCT& fx_info,
+    const TBLOB *blob,
+    INT_TEMPLATES templates,
+    ADAPT_CLASS *classes,
+    UNICHAR_ID *ambiguities,
+    ADAPT_RESULTS *results) {
+  if (int_features.empty()) return;
  uinT8* CharNormArray = new uinT8[unicharset.size()];
  INT_RESULT_STRUCT IntResult;
-  CLASS_ID ClassId;
-
-  AmbigClassifierCalls++;
-
-  NumFeatures = GetCharNormFeatures(Blob, Templates, IntFeatures,
-                                    NULL, CharNormArray,
-                                    &(Results->BlobLength));
-  if (NumFeatures <= 0) {
-    delete [] CharNormArray;
-    return;
-  }

+  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
+                                           CharNormArray);
  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
  if (debug)
    tprintf("AM Matches =  ");

-  int top = Blob->bounding_box().top();
-  int bottom = Blob->bounding_box().bottom();
-  while (*Ambiguities >= 0) {
-    ClassId = *Ambiguities;
+  int top = blob->bounding_box().top();
+  int bottom = blob->bounding_box().bottom();
+  while (*ambiguities >= 0) {
+    CLASS_ID class_id = *ambiguities;

-    im_.SetCharNormMatch(classify_integer_matcher_multiplier);
-    im_.Match(ClassForClassId(Templates, ClassId),
+    im_.Match(ClassForClassId(templates, class_id),
              AllProtosOn, AllConfigsOn,
-              NumFeatures, IntFeatures,
+              int_features.size(), &int_features[0],
              &IntResult,
              classify_adapt_feature_threshold, NO_DEBUG,
              matcher_debug_separate_windows);

-    ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0,
-                                    Results->BlobLength, CharNormArray,
-                                    IntResult, Results);
-    Ambiguities++;
-
-    NumAmbigClassesTried++;
+    ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
+                                    results->BlobLength,
+                                    classify_integer_matcher_multiplier,
+                                    CharNormArray, IntResult, results);
+    ambiguities++;
  }
  delete [] CharNormArray;
 }                                /* AmbigClassifier */
@ -1225,6 +1113,7 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
                             ADAPT_CLASS* classes,
                             int debug,
                             int num_classes,
+                             int matcher_multiplier,
                             const TBOX& blob_box,
                             CLASS_PRUNER_RESULTS results,
                             ADAPT_RESULTS* final_results) {
@ -1246,7 +1135,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
    bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
    ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
                                    results[c].Rating,
-                                    final_results->BlobLength, norm_factors,
+                                    final_results->BlobLength,
+                                    matcher_multiplier, norm_factors,
                                    int_result, final_results);
  }
 }
@ -1258,7 +1148,8 @@ void Classify::MasterMatcher(INT_TEMPLATES templates,
 // The results are added to the final_results output.
 void Classify::ExpandShapesAndApplyCorrections(
    ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
-    float cp_rating, int blob_length, const uinT8* cn_factors,
+    float cp_rating, int blob_length, int matcher_multiplier,
+    const uinT8* cn_factors,
    INT_RESULT_STRUCT& int_result, ADAPT_RESULTS* final_results) {
  // Compute the fontinfo_ids.
  int fontinfo_id = kBlankFontinfoId;
@ -1292,7 +1183,7 @@ void Classify::ExpandShapesAndApplyCorrections(
                                               int_result.Rating,
                                               int_result.FeatureMisses,
                                               bottom, top, blob_length,
-                                               cn_factors);
+                                               matcher_multiplier, cn_factors);
        if (c == 0 || rating < min_rating)
          min_rating = rating;
        if (unicharset.get_enabled(unichar_id)) {
@ -1309,7 +1200,7 @@ void Classify::ExpandShapesAndApplyCorrections(
                                         int_result.Rating,
                                         int_result.FeatureMisses,
                                         bottom, top, blob_length,
-                                         cn_factors);
+                                         matcher_multiplier, cn_factors);
  if (unicharset.get_enabled(class_id)) {
    AddNewResult(final_results, class_id, -1, rating,
                 classes != NULL, int_result.Config,
@ -1325,11 +1216,12 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
                                        double cp_rating, double im_rating,
                                        int feature_misses,
                                        int bottom, int top,
-                                        int blob_length,
+                                        int blob_length, int matcher_multiplier,
                                        const uinT8* cn_factors) {
  // Compute class feature corrections.
  double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
-                                              cn_factors[unichar_id]);
+                                              cn_factors[unichar_id],
+                                              matcher_multiplier);
  double miss_penalty = tessedit_class_miss_scale * feature_misses;
  double vertical_penalty = 0.0;
  // Penalize non-alnums for being vertical misfits.
@ -1383,39 +1275,30 @@ double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
 * @note Exceptions: none
 * @note History: Tue Mar 12 19:38:03 1991, DSJ, Created.
 */
-UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
-                                         ADAPT_TEMPLATES Templates,
-                                         ADAPT_RESULTS *Results) {
-  int NumFeatures;
+UNICHAR_ID *Classify::BaselineClassifier(
+    TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
+    const INT_FX_RESULT_STRUCT& fx_info,
+    ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
+  if (int_features.empty()) return NULL;
  int NumClasses;
-  INT_FEATURE_ARRAY IntFeatures;
  uinT8* CharNormArray = new uinT8[unicharset.size()];
-  CLASS_ID ClassId;
+  ClearCharNormArray(CharNormArray);

-  BaselineClassifierCalls++;
-
-  NumFeatures = GetBaselineFeatures(Blob, Templates->Templates, IntFeatures,
-                                    CharNormArray, &Results->BlobLength);
-  if (NumFeatures <= 0) {
-    delete [] CharNormArray;
-    return NULL;
-  }
-
-  NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures,
+  Results->BlobLength = IntCastRounded(fx_info.Length / kStandardFeatureLength);
+  NumClasses = PruneClasses(Templates->Templates, int_features.size(),
+                            &int_features[0],
                            CharNormArray, BaselineCutoffs, Results->CPResults);

-  NumBaselineClassesTried += NumClasses;
-
  if (matcher_debug_level >= 2 || classify_debug_level > 1)
    cprintf ("BL Matches =  ");

-  im_.SetBaseLineMatch();
-  MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
-                Templates->Class, matcher_debug_flags, NumClasses,
+  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
+                CharNormArray,
+                Templates->Class, matcher_debug_flags, NumClasses, 0,
                Blob->bounding_box(), Results->CPResults, Results);

  delete [] CharNormArray;
-  ClassId = Results->best_match.unichar_id;
+  CLASS_ID ClassId = Results->best_match.unichar_id;
  if (ClassId == NO_CLASS)
    return (NULL);
  /* this is a bug - maybe should return "" */
@ -1445,17 +1328,13 @@ UNICHAR_ID *Classify::BaselineClassifier(TBLOB *Blob,
 * @note History: Tue Mar 12 16:02:52 1991, DSJ, Created.
 */
 int Classify::CharNormClassifier(TBLOB *blob,
-                                 INT_TEMPLATES Templates,
+                                 const TrainingSample& sample,
                                 ADAPT_RESULTS *adapt_results) {
-  CharNormClassifierCalls++;
-  TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
-                                                classify_nonlinear_norm);
-  if (sample == NULL) return 0;
  // This is the length that is used for scaling ratings vs certainty.
  adapt_results->BlobLength =
-      IntCastRounded(sample->outline_length() / kStandardFeatureLength);
+      IntCastRounded(sample.outline_length() / kStandardFeatureLength);
  GenericVector<UnicharRating> unichar_results;
-  static_classifier_->UnicharClassifySample(*sample, blob->denorm().pix(), 0,
+  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
                                            -1, &unichar_results);
  // Convert results to the format used internally by AdaptiveClassifier.
  for (int r = 0; r < unichar_results.size(); ++r) {
@ -1468,9 +1347,7 @@ int Classify::CharNormClassifier(TBLOB *blob,
    float rating = 1.0f - unichar_results[r].rating;
    AddNewResult(adapt_results, unichar_id, -1, rating, false, 0, font1, font2);
  }
-  int num_features = sample->num_features();
-  delete sample;
-  return num_features;
+  return sample.num_features();
 }                                /* CharNormClassifier */

 // As CharNormClassifier, but operates on a TrainingSample and outputs to
@ -1518,10 +1395,10 @@ int Classify::CharNormTrainingSample(bool pruner_only,
          UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
    }
  } else {
-    im_.SetCharNormMatch(classify_integer_matcher_multiplier);
    MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
                  char_norm_array,
                  NULL, matcher_debug_flags, num_classes,
+                  classify_integer_matcher_multiplier,
                  blob_box, adapt_results->CPResults, adapt_results);
    // Convert master matcher results to output format.
    for (int i = 0; i < adapt_results->NumMatches; i++) {
@ -1711,8 +1588,10 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
    if (i == 0 || Results->match[i].rating < Results->best_match.rating)
      Results->best_match = Results->match[i];
  }
-  TrainingSample* sample = BlobToTrainingSample(*blob, NM_CHAR_ANISOTROPIC,
-                                                classify_nonlinear_norm);
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
  if (sample == NULL) return;
  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
                                   Results->best_match.unichar_id);
@ -1745,21 +1624,26 @@ void Classify::DebugAdaptiveClassifier(TBLOB *blob,
 void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
  UNICHAR_ID *Ambiguities;

-  AdaptiveMatcherCalls++;
-  InitIntFX();
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == NULL) return;

  if (AdaptedTemplates->NumPermClasses < matcher_permanent_classes_min ||
      tess_cn_matching) {
-    CharNormClassifier(Blob, PreTrainedTemplates, Results);
+    CharNormClassifier(Blob, *sample, Results);
  } else {
-    Ambiguities = BaselineClassifier(Blob, AdaptedTemplates, Results);
+    Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
+                                     AdaptedTemplates, Results);
    if ((Results->NumMatches > 0 &&
         MarginalMatch (Results->best_match.rating) &&
         !tess_bn_matching) ||
        Results->NumMatches == 0) {
-      CharNormClassifier(Blob, PreTrainedTemplates, Results);
+      CharNormClassifier(Blob, *sample, Results);
    } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
-      AmbigClassifier(Blob,
+      AmbigClassifier(bl_features, fx_info, Blob,
                      PreTrainedTemplates,
                      AdaptedTemplates->Class,
                      Ambiguities,
@ -1773,6 +1657,7 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
  // just adding a NULL classification.
  if (!Results->HasNonfragment || Results->NumMatches == 0)
    ClassifyAsNoise(Results);
+  delete sample;
 }   /* DoAdaptiveMatch */

 /*---------------------------------------------------------------------------*/
@ -1799,8 +1684,15 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
  int i;

  Results->Initialize();
+  INT_FX_RESULT_STRUCT fx_info;
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  TrainingSample* sample =
+      BlobToTrainingSample(*Blob, classify_nonlinear_norm, &fx_info,
+                           &bl_features);
+  if (sample == NULL) return NULL;

-  CharNormClassifier(Blob, PreTrainedTemplates, Results);
+  CharNormClassifier(Blob, *sample, Results);
+  delete sample;
  RemoveBadMatches(Results);
  qsort((void *)Results->match, Results->NumMatches,
        sizeof(ScoredClass), CompareByRating);
@ -1823,58 +1715,6 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob,
  return Ambiguities;
 }                              /* GetAmbiguities */

-/*---------------------------------------------------------------------------*/
-/**
- * This routine calls the integer (Hardware) feature
- * extractor if it has not been called before for this blob.
- * The results from the feature extractor are placed into
- * globals so that they can be used in other routines without
- * re-extracting the features.
- * It then copies the baseline features into the IntFeatures
- * array provided by the caller.
- *
- * @param Blob blob to extract features from
- * @param Templates used to compute char norm adjustments
- * @param IntFeatures array to fill with integer features
- * @param CharNormArray array to fill with dummy char norm adjustments
- * @param BlobLength length of blob in baseline-normalized units
- *
- * Globals:
- * - FeaturesHaveBeenExtracted TRUE if fx has been done
- * - BaselineFeatures holds extracted baseline feat
- * - CharNormFeatures holds extracted char norm feat
- * - FXInfo holds misc. FX info
- *
- * @return Number of features extracted or 0 if an error occured.
- * @note Exceptions: none
- * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
- */
-int Classify::GetBaselineFeatures(TBLOB *Blob,
-                                  INT_TEMPLATES Templates,
-                                  INT_FEATURE_ARRAY IntFeatures,
-                                  uinT8* CharNormArray,
-                                  inT32 *BlobLength) {
-  if (!FeaturesHaveBeenExtracted) {
-    FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
-                                BaselineFeatures, CharNormFeatures, &FXInfo);
-    FeaturesHaveBeenExtracted = TRUE;
-  }
-
-  *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
-  if (!FeaturesOK) {
-    return 0;
-  }
-
-  memcpy(IntFeatures, BaselineFeatures, FXInfo.NumBL * sizeof(IntFeatures[0]));
-
-  ClearCharNormArray(CharNormArray);
-  return FXInfo.NumBL;
-}                              /* GetBaselineFeatures */
-
-void Classify::ResetFeaturesHaveBeenExtracted() {
-  FeaturesHaveBeenExtracted = FALSE;
-}
-
 // Returns true if the given blob looks too dissimilar to any character
 // present in the classifier templates.
 bool Classify::LooksLikeGarbage(TBLOB *blob) {
@ -1921,48 +1761,28 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
 * @param BlobLength length of blob in baseline-normalized units
 *
 * Globals:
- * - FeaturesHaveBeenExtracted TRUE if fx has been done
- * - BaselineFeatures holds extracted baseline feat
- * - CharNormFeatures holds extracted char norm feat
- * - FXInfo holds misc. FX info
 *
 * @return Number of features extracted or 0 if an error occured.
 * @note Exceptions: none
 * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
 */
-int Classify::GetCharNormFeatures(TBLOB *Blob,
-                                  INT_TEMPLATES Templates,
-                                  INT_FEATURE_ARRAY IntFeatures,
-                                  uinT8* PrunerNormArray,
-                                  uinT8* CharNormArray,
-                                  inT32 *BlobLength) {
-  FEATURE NormFeature;
-  FLOAT32 Baseline, Scale;
-
-  if (!FeaturesHaveBeenExtracted) {
-    FeaturesOK = ExtractIntFeat(*Blob, classify_nonlinear_norm,
-                                BaselineFeatures, CharNormFeatures, &FXInfo);
-    FeaturesHaveBeenExtracted = TRUE;
-  }
-
-  *BlobLength = IntCastRounded(FXInfo.Length / kStandardFeatureLength);
-  if (!FeaturesOK) {
-    return 0;
-  }
-
-  memcpy(IntFeatures, CharNormFeatures, FXInfo.NumCN * sizeof(IntFeatures[0]));
-
-  NormFeature = NewFeature(&CharNormDesc);
-  Baseline = kBlnBaselineOffset;
-  Scale = MF_SCALE_FACTOR;
-  NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
-  NormFeature->Params[CharNormLength] =
-    FXInfo.Length * Scale / LENGTH_COMPRESSION;
-  NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
-  NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
-  ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
-  return FXInfo.NumCN;
-}                              /* GetCharNormFeatures */
+int Classify::GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
+                                 INT_TEMPLATES templates,
+                                 uinT8* pruner_norm_array,
+                                 uinT8* char_norm_array) {
+  FEATURE norm_feature = NewFeature(&CharNormDesc);
+  float baseline = kBlnBaselineOffset;
+  float scale = MF_SCALE_FACTOR;
+  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
+  norm_feature->Params[CharNormLength] =
+      fx_info.Length * scale / LENGTH_COMPRESSION;
+  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
+  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
+  // Deletes norm_feature.
+  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
+                        pruner_norm_array);
+  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
+}                              /* GetCharNormFeature */

 // Computes the char_norm_array for the unicharset and, if not NULL, the
 // pruner_array as appropriate according to the existence of the shape_table.
@ -2454,7 +2274,6 @@ void Classify::ShowBestMatchFor(int shape_id,
  }
  INT_RESULT_STRUCT cn_result;
  classify_norm_method.set_value(character);
-  im_.SetCharNormMatch(classify_integer_matcher_multiplier);
  im_.Match(ClassForClassId(PreTrainedTemplates, shape_id),
            AllProtosOn, AllConfigsOn,
            num_features, features, &cn_result,
--- a/classify/classify.cpp
+++ b/classify/classify.cpp
@ -165,27 +165,13 @@ Classify::Classify()
  AdaptedTemplates = NULL;
  PreTrainedTemplates = NULL;
  AllProtosOn = NULL;
-  PrunedProtos = NULL;
  AllConfigsOn = NULL;
-  AllProtosOff = NULL;
  AllConfigsOff = NULL;
  TempProtoMask = NULL;
  NormProtos = NULL;

-  AdaptiveMatcherCalls = 0;
-  BaselineClassifierCalls = 0;
-  CharNormClassifierCalls = 0;
-  AmbigClassifierCalls = 0;
-  NumWordsAdaptedTo = 0;
-  NumCharsAdaptedTo = 0;
-  NumBaselineClassesTried = 0;
-  NumCharNormClassesTried = 0;
-  NumAmbigClassesTried = 0;
-  NumClassesOutput = 0;
  NumAdaptationsFailed = 0;

-  FeaturesHaveBeenExtracted = false;
-  FeaturesOK = true;
  learn_debug_win_ = NULL;
  learn_fragmented_word_debug_win_ = NULL;
  learn_fragments_debug_win_ = NULL;
--- a/classify/classify.h
+++ b/classify/classify.h
@ -145,15 +145,13 @@ class Classify : public CCStruct {
                        int FontinfoId,
                        ADAPT_CLASS Class,
                        ADAPT_TEMPLATES Templates);
-  void AdaptToPunc(TBLOB *Blob,
-                   CLASS_ID ClassId,
-                   int FontinfoId,
-                   FLOAT32 Threshold);
-  void AmbigClassifier(TBLOB *Blob,
-                       INT_TEMPLATES Templates,
-                       ADAPT_CLASS *Classes,
-                       UNICHAR_ID *Ambiguities,
-                       ADAPT_RESULTS *Results);
+  void AmbigClassifier(const GenericVector<INT_FEATURE_STRUCT>& int_features,
+                       const INT_FX_RESULT_STRUCT& fx_info,
+                       const TBLOB *blob,
+                       INT_TEMPLATES templates,
+                       ADAPT_CLASS *classes,
+                       UNICHAR_ID *ambiguities,
+                       ADAPT_RESULTS *results);
  void MasterMatcher(INT_TEMPLATES templates,
                     inT16 num_features,
                     const INT_FEATURE_STRUCT* features,
@ -161,6 +159,7 @@ class Classify : public CCStruct {
                     ADAPT_CLASS* classes,
                     int debug,
                     int num_classes,
+                     int matcher_multiplier,
                     const TBOX& blob_box,
                     CLASS_PRUNER_RESULTS results,
                     ADAPT_RESULTS* final_results);
@ -175,6 +174,7 @@ class Classify : public CCStruct {
                                       int bottom, int top,
                                       float cp_rating,
                                       int blob_length,
+                                       int matcher_multiplier,
                                       const uinT8* cn_factors,
                                       INT_RESULT_STRUCT& int_result,
                                       ADAPT_RESULTS* final_results);
@ -184,7 +184,8 @@ class Classify : public CCStruct {
  double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
                                double im_rating, int feature_misses,
                                int bottom, int top,
-                                int blob_length, const uinT8* cn_factors);
+                                int blob_length, int matcher_multiplier,
+                                const uinT8* cn_factors);
  void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
                               ADAPT_RESULTS *Results,
                               BLOB_CHOICE_LIST *Choices);
@ -246,12 +247,13 @@ class Classify : public CCStruct {
  // Converts a shape_table_ index to a classifier class_id index (not a
  // unichar-id!). Uses a search, so not fast.
  int ShapeIDToClassID(int shape_id) const;
-  UNICHAR_ID *BaselineClassifier(TBLOB *Blob,
-                                 ADAPT_TEMPLATES Templates,
-                                 ADAPT_RESULTS *Results);
-  int CharNormClassifier(TBLOB *Blob,
-                         INT_TEMPLATES Templates,
-                         ADAPT_RESULTS *Results);
+  UNICHAR_ID *BaselineClassifier(
+      TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
+      const INT_FX_RESULT_STRUCT& fx_info,
+      ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
+  int CharNormClassifier(TBLOB *blob,
+                         const TrainingSample& sample,
+                         ADAPT_RESULTS *adapt_results);

  // As CharNormClassifier, but operates on a TrainingSample and outputs to
  // a GenericVector of ShapeRating without conversion to classes.
@ -267,7 +269,6 @@ class Classify : public CCStruct {
  void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
  bool AdaptableWord(WERD_RES* word);
  void EndAdaptiveClassifier();
-  void PrintAdaptiveStatistics(FILE *File);
  void SettupPass1();
  void SettupPass2();
  void AdaptiveClassifier(TBLOB *Blob,
@ -276,17 +277,10 @@ class Classify : public CCStruct {
  void ClassifyAsNoise(ADAPT_RESULTS *Results);
  void ResetAdaptiveClassifierInternal();

-  int GetBaselineFeatures(TBLOB *Blob,
-                          INT_TEMPLATES Templates,
-                          INT_FEATURE_ARRAY IntFeatures,
-                          uinT8* CharNormArray,
-                          inT32 *BlobLength);
-  int GetCharNormFeatures(TBLOB *Blob,
-                          INT_TEMPLATES Templates,
-                          INT_FEATURE_ARRAY IntFeatures,
-                          uinT8* PrunerNormArray,
-                          uinT8* CharNormArray,
-                          inT32 *BlobLength);
+  int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
+                         INT_TEMPLATES templates,
+                         uinT8* pruner_norm_array,
+                         uinT8* char_norm_array);
  // Computes the char_norm_array for the unicharset and, if not NULL, the
  // pruner_array as appropriate according to the existence of the shape_table.
  // The norm_feature is deleted as it is almost certainly no longer needed.
@ -298,7 +292,6 @@ class Classify : public CCStruct {
  bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
  void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);

-  void ResetFeaturesHaveBeenExtracted();
  bool AdaptiveClassifierIsFull() { return NumAdaptationsFailed > 0; }
  bool LooksLikeGarbage(TBLOB *blob);
  void RefreshDebugWindow(ScrollView **win, const char *msg,
@ -468,9 +461,7 @@ class Classify : public CCStruct {

  // Create dummy proto and config masks for use with the built-in templates.
  BIT_VECTOR AllProtosOn;
-  BIT_VECTOR PrunedProtos;
  BIT_VECTOR AllConfigsOn;
-  BIT_VECTOR AllProtosOff;
  BIT_VECTOR AllConfigsOff;
  BIT_VECTOR TempProtoMask;
  bool EnableLearning;
@ -504,34 +495,13 @@ class Classify : public CCStruct {
  ShapeTable* shape_table_;

 private:
-
  Dict dict_;
  // The currently active static classifier.
  ShapeClassifier* static_classifier_;

  /* variables used to hold performance statistics */
-  int AdaptiveMatcherCalls;
-  int BaselineClassifierCalls;
-  int CharNormClassifierCalls;
-  int AmbigClassifierCalls;
-  int NumWordsAdaptedTo;
-  int NumCharsAdaptedTo;
-  int NumBaselineClassesTried;
-  int NumCharNormClassesTried;
-  int NumAmbigClassesTried;
-  int NumClassesOutput;
  int NumAdaptationsFailed;

-  /* variables used to hold onto extracted features.  This is used
-  to map from the old scheme in which baseline features and char norm
-  features are extracted separately, to the new scheme in which they
-  are extracted at the same time. */
-  bool FeaturesHaveBeenExtracted;
-  bool FeaturesOK;
-  INT_FEATURE_ARRAY BaselineFeatures;
-  INT_FEATURE_ARRAY CharNormFeatures;
-  INT_FX_RESULT_STRUCT FXInfo;
-
  // Expected number of features in the class pruner, used to penalize
  // unknowns that have too few features (like a c being classified as e) so
  // it doesn't recognize everything as '@' or '#'.
--- a/classify/intfx.cpp
+++ b/classify/intfx.cpp
@ -78,31 +78,19 @@ namespace tesseract {
 // TODO(rays) BlobToTrainingSample must remain a global function until
 // the FlexFx and FeatureDescription code can be removed and LearnBlob
 // made a member of Classify.
-TrainingSample* BlobToTrainingSample(const TBLOB& blob,
-                                     tesseract::NormalizationMode mode,
-                                     bool nonlinear_norm) {
-  INT_FX_RESULT_STRUCT fx_info;
-  GenericVector<INT_FEATURE_STRUCT> bl_features;
+TrainingSample* BlobToTrainingSample(
+    const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
+    GenericVector<INT_FEATURE_STRUCT>* bl_features) {
  GenericVector<INT_FEATURE_STRUCT> cn_features;
-  Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features,
-                            &cn_features, &fx_info, NULL);
+  Classify::ExtractFeatures(blob, nonlinear_norm, bl_features,
+                            &cn_features, fx_info, NULL);
  // TODO(rays) Use blob->PreciseBoundingBox() instead.
  TBOX box = blob.bounding_box();
  TrainingSample* sample = NULL;
-  if (mode == tesseract::NM_CHAR_ANISOTROPIC) {
-    int num_features = fx_info.NumCN;
-    if (num_features > 0) {
-      sample = TrainingSample::CopyFromFeatures(fx_info, box, &cn_features[0],
-                                                num_features);
-    }
-  } else if (mode == tesseract::NM_BASELINE) {
-    int num_features = fx_info.NumBL;
-    if (num_features > 0) {
-      sample = TrainingSample::CopyFromFeatures(fx_info, box, &bl_features[0],
-                                                num_features);
-    }
-  } else {
-    ASSERT_HOST(!"Unsupported normalization mode!");
+  int num_features = fx_info->NumCN;
+  if (num_features > 0) {
+    sample = TrainingSample::CopyFromFeatures(*fx_info, box, &cn_features[0],
+                                              num_features);
  }
  if (sample != NULL) {
    // Set the bounding box (in original image coordinates) in the sample.
--- a/classify/intfx.h
+++ b/classify/intfx.h
@ -60,9 +60,9 @@ namespace tesseract {
  // TODO(rays) BlobToTrainingSample must remain a global function until
  // the FlexFx and FeatureDescription code can be removed and LearnBlob
  // made a member of Classify.
-  TrainingSample* BlobToTrainingSample(const TBLOB& blob,
-                                       tesseract::NormalizationMode mode,
-                                       bool nonlinear_norm);
+  TrainingSample* BlobToTrainingSample(
+      const TBLOB& blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT* fx_info,
+      GenericVector<INT_FEATURE_STRUCT>* bl_features);
 }

 // Deprecated! Prefer tesseract::Classify::ExtractFeatures instead.
--- a/classify/intmatcher.cpp
+++ b/classify/intmatcher.cpp
@ -693,13 +693,9 @@ int IntegerMatcher::FindBadFeatures(


 /*---------------------------------------------------------------------------*/
-void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
-                          int classify_integer_matcher_multiplier) {
+void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) {
  classify_debug_level_ = classify_debug_level;

-  /* Set default mode of operation of IntegerMatcher */
-  SetCharNormMatch(classify_integer_matcher_multiplier);
-
  /* Initialize table for evidence to similarity lookup */
  for (int i = 0; i < SE_TABLE_SIZE; i++) {
    uinT32 IntSimilarity = i << (27 - SE_TABLE_BITS);
@ -724,17 +720,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level,
  evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
 }

-/*--------------------------------------------------------------------------*/
-void IntegerMatcher::SetBaseLineMatch() {
-  local_matcher_multiplier_ = 0;
-}
-
-
-/*--------------------------------------------------------------------------*/
-void IntegerMatcher::SetCharNormMatch(int integer_matcher_multiplier) {
-  local_matcher_multiplier_ = integer_matcher_multiplier;
-}
-

 /**----------------------------------------------------------------------------
              Private Code
@ -1283,10 +1268,11 @@ int IntegerMatcher::FindBestMatch(
 // Applies the CN normalization factor to the given rating and returns
 // the modified rating.
 float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
-                                        int normalization_factor) {
+                                        int normalization_factor,
+                                        int matcher_multiplier) {
  return (rating * blob_length +
-    local_matcher_multiplier_ * normalization_factor / 256.0) /
-    (blob_length + local_matcher_multiplier_);
+          matcher_multiplier * normalization_factor / 256.0) /
+      (blob_length + matcher_multiplier);
 }

 /*---------------------------------------------------------------------------*/
--- a/classify/intmatcher.h
+++ b/classify/intmatcher.h
@ -102,11 +102,7 @@ class IntegerMatcher {

  IntegerMatcher() : classify_debug_level_(0) {}

-  void Init(tesseract::IntParam *classify_debug_level,
-            int classify_integer_matcher_multiplier);
-
-  void SetBaseLineMatch();
-  void SetCharNormMatch(int integer_matcher_multiplier);
+  void Init(tesseract::IntParam *classify_debug_level);

  void Match(INT_CLASS ClassTemplate,
             BIT_VECTOR ProtoMask,
@ -121,7 +117,7 @@ class IntegerMatcher {
  // Applies the CN normalization factor to the given rating and returns
  // the modified rating.
  float ApplyCNCorrection(float rating, int blob_length,
-                          int normalization_factor);
+                          int normalization_factor, int matcher_multiplier);

  int FindGoodProtos(INT_CLASS ClassTemplate,
                     BIT_VECTOR ProtoMask,
@ -192,7 +188,6 @@ class IntegerMatcher {
  uinT32 evidence_table_mask_;
  uinT32 mult_trunc_shift_bits_;
  uinT32 table_trunc_shift_bits_;
-  inT16 local_matcher_multiplier_;
  tesseract::IntParam *classify_debug_level_;
  uinT32 evidence_mult_mask_;
 };
--- a/classify/picofeat.cpp
+++ b/classify/picofeat.cpp
@ -235,8 +235,11 @@ FEATURE_SET ExtractIntCNFeatures(TBLOB *blob, const DENORM& bl_denorm,
 ** Exceptions: none
 ** History: 8/8/2011, rays, Created.
 */
-  tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
-      *blob, tesseract::NM_CHAR_ANISOTROPIC, false);
+  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  tesseract::TrainingSample* sample =
+      tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
+                                      &bl_features);
  if (sample == NULL) return NULL;

  int num_features = sample->num_features();
@ -267,8 +270,11 @@ FEATURE_SET ExtractIntGeoFeatures(TBLOB *blob, const DENORM& bl_denorm,
 ** Exceptions: none
 ** History: 8/8/2011, rays, Created.
 */
-  tesseract::TrainingSample* sample = tesseract::BlobToTrainingSample(
-      *blob, tesseract::NM_CHAR_ANISOTROPIC, false);
+  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
+  GenericVector<INT_FEATURE_STRUCT> bl_features;
+  tesseract::TrainingSample* sample =
+      tesseract::BlobToTrainingSample(*blob, false, &local_fx_info,
+                                      &bl_features);
  if (sample == NULL) return NULL;

  FEATURE_SET feature_set = NewFeatureSet(1);
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@ -119,6 +119,9 @@ Dict::Dict(Image* image_ptr)
                  "Make AcceptableChoice() always return false. Useful"
                  " when there is a need to explore all segmentations",
                  getImage()->getCCUtil()->params()),
+      BOOL_MEMBER(save_raw_choices, false,
+                  "Deprecated- backward compatablity only",
+                  getImage()->getCCUtil()->params()),
      INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
                 "Max words to keep in list",
                 getImage()->getCCUtil()->params()),
@ -689,7 +692,7 @@ void Dict::adjust_word(WERD_CHOICE *word,
 int Dict::valid_word(const WERD_CHOICE &word, bool numbers_ok) const {
  const WERD_CHOICE *word_ptr = &word;
  WERD_CHOICE temp_word(word.unicharset());
-  if (hyphenated()) {
+  if (hyphenated() && hyphen_word_->unicharset() == word.unicharset()) {
    copy_hyphen_info(&temp_word);
    temp_word += word;
    word_ptr = &temp_word;
--- a/dict/dict.h
+++ b/dict/dict.h
@ -613,6 +613,8 @@ class Dict {
  BOOL_VAR_H(stopper_no_acceptable_choices, false,
             "Make AcceptableChoice() always return false. Useful"
             " when there is a need to explore all segmentations");
+  BOOL_VAR_H(save_raw_choices, false,
+             "Deprecated- backward compatability only");
  INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
  STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information"
               " should be printed to stdout");
--- a/wordrec/chopper.cpp
+++ b/wordrec/chopper.cpp
@ -440,16 +440,32 @@ namespace tesseract {
 * enough.  The results are returned in the WERD_RES.
 */
 void Wordrec::chop_word_main(WERD_RES *word) {
-  // Initial clean up.
-  word->ClearRatings();
  int num_blobs = word->chopped_word->NumBlobs();
-  word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
-  // Run initial classification.
-  for (int b = 0; b < num_blobs; ++b) {
-    BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
-                                               "Initial:", word->chopped_word,
-                                               word->blamer_bundle);
-    word->ratings->put(b, b, choices);
+  if (word->ratings == NULL) {
+    word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
+  }
+  if (word->ratings->get(0, 0) == NULL) {
+    // Run initial classification.
+    for (int b = 0; b < num_blobs; ++b) {
+      BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
+                                                 "Initial:", word->chopped_word,
+                                                 word->blamer_bundle);
+      word->ratings->put(b, b, choices);
+    }
+  } else {
+    // Blobs have been pre-classified. Set matrix cell for all blob choices
+    for (int col = 0; col < word->ratings->dimension(); ++col) {
+      for (int row = col; row < word->ratings->dimension() &&
+           row < col + word->ratings->bandwidth(); ++row) {
+        BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
+        if (choices != NULL) {
+          BLOB_CHOICE_IT bc_it(choices);
+          for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
+            bc_it.data()->set_matrix_cell(col, row);
+          }
+        }
+      }
+    }
  }

  // Run Segmentation Search.