Merge branch 'master' of github.com:tesseract-ocr/tesseract

2025-01-18 06:30:14 +08:00 · 2015-10-02 12:02:04 +03:00 · 2015-10-02 12:02:04 +03:00 · f369585f56
commit f369585f56
parent 25136e40ea 2e7a633f06
78 changed files with 271 additions and 285 deletions
--- a/.gitignore
+++ b/.gitignore
@ -59,6 +59,8 @@ training/wordlist2dawg
 *.o
 *.Plo
 *.a
+*.class
+*.jar

 # tessdata
 *.cube.*
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 This package contains the Tesseract Open Source OCR Engine.
-Orignally developed at Hewlett Packard Laboratories Bristol and
+Originally developed at Hewlett Packard Laboratories Bristol and
 at Hewlett Packard Co, Greeley Colorado, all the code
 in this distribution is now licensed under the Apache License:

--- a/README.md
+++ b/README.md
@ -100,7 +100,7 @@ find its data directory. You must either:
    ./autogen.sh
    ./configure
    make
-    make install
+    sudo make install
    sudo ldconfig

 to move the data files to the standard place, or:
--- a/api/baseapi.cpp
+++ b/api/baseapi.cpp
@ -1660,7 +1660,7 @@ char* TessBaseAPI::GetUNLVText() {
            word->word->space() > 0 &&
            !word->word->flag(W_FUZZY_NON) &&
            !word->word->flag(W_FUZZY_SP)) {
-          /* Write a space to separate from preceeding good text */
+          /* Write a space to separate from preceding good text */
          *ptr++ = ' ';
          last_char_was_tilde = false;
        }
--- a/api/pdfrenderer.cpp
+++ b/api/pdfrenderer.cpp
@ -178,7 +178,7 @@ void TessPDFRenderer::AppendPDFObject(const char *data) {
  AppendString((const char *)data);
 }

-// Helper function to prevent us from accidentaly writing
+// Helper function to prevent us from accidentally writing
 // scientific notation to an HOCR or PDF file. Besides, three
 // decimal points are all you really need.
 double prec(double x) {
--- a/api/tesseractmain.cpp
+++ b/api/tesseractmain.cpp
@ -227,7 +227,7 @@ int main(int argc, char **argv) {
  }

  // We have 2 possible sources of pagesegmode: a config file and
-  // the command line. For backwards compatability reasons, the
+  // the command line. For backwards compatibility reasons, the
  // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
  // default for this program is tesseract::PSM_AUTO. We will let
  // the config file take priority, so the command-line default
--- a/ccmain/control.cpp
+++ b/ccmain/control.cpp
@ -1556,7 +1556,7 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word,
       word->fix_quotes();
      if (tessedit_fix_hyphens)
        word->fix_hyphens();
-      /* Dont trust fix_quotes! - though I think I've fixed the bug */
+      /* Don't trust fix_quotes! - though I think I've fixed the bug */
      if (word->best_choice->length() != word->box_word->length()) {
        tprintf("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;"
                " #Blobs=%d\n",
@ -1694,7 +1694,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(
      goto not_a_word;
    /*
    Allow a single hyphen in a lower case word
-    - dont trust upper case - I've seen several cases of "H" -> "I-I"
+    - don't trust upper case - I've seen several cases of "H" -> "I-I"
    */
    if (lengths[i] == 1 && s[offset] == '-') {
      hyphen_pos = i;
--- a/ccmain/docqual.cpp
+++ b/ccmain/docqual.cpp
@ -129,7 +129,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
  int expected_outline_count;

  if (STRING (outlines_odd).contains (c))
-    return 0;                    //Dont use this char
+    return 0;                    //Don't use this char
  else if (STRING (outlines_2).contains (c))
    expected_outline_count = 2;
  else
@ -157,7 +157,7 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it,
 *    - Word segmentation is the same as the original image
 *		- All characters have the expected number of outlines
 * NOTE - the rejection counts are recalculated after unrejection
- *      - CANT do it in a single pass without a bit of fiddling
+ *      - CAN'T do it in a single pass without a bit of fiddling
 *		- keep it simple but inefficient
 *************************************************************************/
 void Tesseract::unrej_good_quality_words(  //unreject potential
@ -403,7 +403,7 @@ void Tesseract::doc_and_block_rejection(  //reject big chunks

 /*************************************************************************
 * reject_whole_page()
- * Dont believe any of it - set the reject map to 00..00 in all words
+ * Don't believe any of it - set the reject map to 00..00 in all words
 *
 *************************************************************************/

--- a/ccmain/fixspace.cpp
+++ b/ccmain/fixspace.cpp
@ -55,7 +55,7 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor,
  WERD_RES *word_res;
  WERD_RES_LIST fuzzy_space_words;
  inT16 new_length;
-  BOOL8 prevent_null_wd_fixsp;   // DONT process blobless wds
+  BOOL8 prevent_null_wd_fixsp;   // DON'T process blobless wds
  inT32 word_index;              // current word

  block_res_it.set_to_list(&page_res->block_res_list);
@ -222,7 +222,7 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row,
 * fuzzy spaces. The problem with the basic measure is that "561 63" would score
 * the same as "56163", though given our knowledge that the space is fuzzy, and
 * that there is a "1" next to the fuzzy space, we need to ensure that "56163"
- * is prefered.
+ * is preferred.
 *
 * The solution is to NOT COUNT the score of any word which has a digit at one
 * end and a "1Il" as the character the other side of the space.
@ -272,8 +272,8 @@ inT16 Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
    } else {
      /*
        Can we add the prev word score and potentially count this word?
-        Yes IF it didnt end in a 1 when the first char of this word is a digit
-          AND it didnt end in a digit when the first char of this word is a 1
+        Yes IF it didn't end in a 1 when the first char of this word is a digit
+          AND it didn't end in a digit when the first char of this word is a 1
      */
      word_len = word->reject_map.length();
      current_word_ok_so_far = FALSE;
@ -507,7 +507,7 @@ BOOL8 Tesseract::fixspace_thinks_word_done(WERD_RES *word) {

  /*
    Use all the standard pass 2 conditions for mode 5 in set_done() in
-    reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT
+    reject.c BUT DON'T REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DON'T
    CARE WHETHER WE HAVE of/at on/an etc.
  */
  if (fixsp_done_mode > 0 &&
--- a/ccmain/output.cpp
+++ b/ccmain/output.cpp
@ -297,7 +297,7 @@ UNICHAR_ID Tesseract::get_rep_char(WERD_RES *word) {  // what char is repeated?
 /*************************************************************************
 * SUSPECT LEVELS
 *
- * 0 - dont reject ANYTHING
+ * 0 - don't reject ANYTHING
 * 1,2 - partial rejection
 * 3 - BEST
 *
@ -337,7 +337,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {
  rating_per_ch = word.rating() / word_res->reject_map.length();

  if (rating_per_ch >= suspect_rating_per_ch)
-    return;                      //Dont touch bad ratings
+    return;                      //Don't touch bad ratings

  if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) {
    /* Unreject any Tess Acceptable word - but NOT tess reject chs*/
--- a/ccmain/paramsd.cpp
+++ b/ccmain/paramsd.cpp
@ -329,13 +329,13 @@ void ParamsEditor::WriteParams(char *filename,
    fclose(fp);
    sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename);
    int a = sv_window_->ShowYesNoDialog(msg_str);
-    if (a == 'n') { return; }  // dont write
+    if (a == 'n') { return; }  // don't write
  }


  fp = fopen (filename, "wb");  // can we write to it?
  if (fp == NULL) {
-    sv_window_->AddMessage("Cant write to file " "%s" "", filename);
+    sv_window_->AddMessage("Can't write to file " "%s" "", filename);
    return;
  }

--- a/ccmain/reject.cpp
+++ b/ccmain/reject.cpp
@ -521,7 +521,7 @@ BOOL8 Tesseract::word_contains_non_1_digit(const char *word,

 /*************************************************************************
 * dont_allow_1Il()
- * Dont unreject LONE accepted 1Il conflict set chars
+ * Don't unreject LONE accepted 1Il conflict set chars
 *************************************************************************/
 void Tesseract::dont_allow_1Il(WERD_RES *word) {
  int i = 0;
@ -633,7 +633,7 @@ void Tesseract::flip_hyphens(WERD_RES *word_res) {
      next_left = 9999;
    else
      next_left = word_res->rebuild_word->blobs[i + 1]->bounding_box().left();
-    // Dont touch small or touching blobs - it is too dangerous.
+    // Don't touch small or touching blobs - it is too dangerous.
    if ((out_box.width() > 8 * word_res->denorm.x_scale()) &&
        (out_box.left() > prev_right) && (out_box.right() < next_left)) {
      aspect_ratio = out_box.width() / (float) out_box.height();
--- a/ccmain/tesseractclass.cpp
+++ b/ccmain/tesseractclass.cpp
@ -136,7 +136,7 @@ Tesseract::Tesseract()
      BOOL_MEMBER(tessedit_fix_fuzzy_spaces, true,
                  "Try to improve fuzzy spaces", this->params()),
      BOOL_MEMBER(tessedit_unrej_any_wd, false,
-                  "Dont bother with word plausibility", this->params()),
+                  "Don't bother with word plausibility", this->params()),
      BOOL_MEMBER(tessedit_fix_hyphens, true, "Crunch double hyphens?",
                  this->params()),
      BOOL_MEMBER(tessedit_redo_xheight, true, "Check/Correct x-height",
@ -310,19 +310,19 @@ Tesseract::Tesseract()
                 this->params()),
      INT_MEMBER(crunch_pot_indicators, 1,
                 "How many potential indicators needed", this->params()),
-      BOOL_MEMBER(crunch_leave_ok_strings, true, "Dont touch sensible strings",
+      BOOL_MEMBER(crunch_leave_ok_strings, true, "Don't touch sensible strings",
                  this->params()),
      BOOL_MEMBER(crunch_accept_ok, true, "Use acceptability in okstring",
                  this->params()),
      BOOL_MEMBER(crunch_leave_accept_strings, false,
-                  "Dont pot crunch sensible strings", this->params()),
+                  "Don't pot crunch sensible strings", this->params()),
      BOOL_MEMBER(crunch_include_numerals, false, "Fiddle alpha figures",
                  this->params()),
      INT_MEMBER(crunch_leave_lc_strings, 4,
-                 "Dont crunch words with long lower case strings",
+                 "Don't crunch words with long lower case strings",
                 this->params()),
      INT_MEMBER(crunch_leave_uc_strings, 4,
-                 "Dont crunch words with long lower case strings",
+                 "Don't crunch words with long lower case strings",
                 this->params()),
      INT_MEMBER(crunch_long_repetitions, 3,
                 "Crunch words with long repetitions", this->params()),
@ -393,21 +393,21 @@ Tesseract::Tesseract()
      INT_MEMBER(suspect_space_level, 100,
                 "Min suspect level for rejecting spaces", this->params()),
      INT_MEMBER(suspect_short_words, 2,
-                 "Dont Suspect dict wds longer than this", this->params()),
+                 "Don't suspect dict wds longer than this", this->params()),
      BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected",
                  this->params()),
-      double_MEMBER(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit",
+      double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit",
                    this->params()),
      double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit",
                    this->params()),
      BOOL_MEMBER(tessedit_minimal_rejection, false,
                  "Only reject tess failures", this->params()),
-      BOOL_MEMBER(tessedit_zero_rejection, false, "Dont reject ANYTHING",
+      BOOL_MEMBER(tessedit_zero_rejection, false, "Don't reject ANYTHING",
                  this->params()),
      BOOL_MEMBER(tessedit_word_for_word, false,
                  "Make output have exactly one word per WERD", this->params()),
      BOOL_MEMBER(tessedit_zero_kelvin_rejection, false,
-                  "Dont reject ANYTHING AT ALL", this->params()),
+                  "Don't reject ANYTHING AT ALL", this->params()),
      BOOL_MEMBER(tessedit_consistent_reps, true,
                  "Force all rep chars the same", this->params()),
      INT_MEMBER(tessedit_reject_mode, 0, "Rejection algorithm",
@ -424,7 +424,7 @@ Tesseract::Tesseract()
                  "Use DOC dawg in 11l conf. detector", this->params()),
      BOOL_MEMBER(rej_1Il_use_dict_word, false, "Use dictword test",
                  this->params()),
-      BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Dont double check",
+      BOOL_MEMBER(rej_1Il_trust_permuter_type, true, "Don't double check",
                  this->params()),
      BOOL_MEMBER(rej_use_tess_accepted, true, "Individual rejection control",
                  this->params()),
--- a/ccmain/tesseractclass.h
+++ b/ccmain/tesseractclass.h
@ -733,7 +733,7 @@ class Tesseract : public Wordrec {
                               GenericVector<UNICHAR_ID>* class_ids);
  // Resegments the word to achieve the target_text from the classifier.
  // Returns false if the re-segmentation fails.
-  // Uses brute-force combination of upto kMaxGroupSize adjacent blobs, and
+  // Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
  // applies a full search on the classifier results to find the best classified
  // segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
  // substitutions ARE used.
@ -833,7 +833,7 @@ class Tesseract : public Wordrec {
  BOOL_VAR_H(tessedit_fix_fuzzy_spaces, true,
             "Try to improve fuzzy spaces");
  BOOL_VAR_H(tessedit_unrej_any_wd, false,
-             "Dont bother with word plausibility");
+             "Don't bother with word plausibility");
  BOOL_VAR_H(tessedit_fix_hyphens, true, "Crunch double hyphens?");
  BOOL_VAR_H(tessedit_redo_xheight, true, "Check/Correct x-height");
  BOOL_VAR_H(tessedit_enable_doc_dict, true,
@ -954,15 +954,15 @@ class Tesseract : public Wordrec {
  double_VAR_H(crunch_small_outlines_size, 0.6, "Small if lt xht x this");
  INT_VAR_H(crunch_rating_max, 10, "For adj length in rating per ch");
  INT_VAR_H(crunch_pot_indicators, 1, "How many potential indicators needed");
-  BOOL_VAR_H(crunch_leave_ok_strings, true, "Dont touch sensible strings");
+  BOOL_VAR_H(crunch_leave_ok_strings, true, "Don't touch sensible strings");
  BOOL_VAR_H(crunch_accept_ok, true, "Use acceptability in okstring");
  BOOL_VAR_H(crunch_leave_accept_strings, false,
-             "Dont pot crunch sensible strings");
+             "Don't pot crunch sensible strings");
  BOOL_VAR_H(crunch_include_numerals, false, "Fiddle alpha figures");
  INT_VAR_H(crunch_leave_lc_strings, 4,
-            "Dont crunch words with long lower case strings");
+            "Don't crunch words with long lower case strings");
  INT_VAR_H(crunch_leave_uc_strings, 4,
-            "Dont crunch words with long lower case strings");
+            "Don't crunch words with long lower case strings");
  INT_VAR_H(crunch_long_repetitions, 3, "Crunch words with long repetitions");
  INT_VAR_H(crunch_debug, 0, "As it says");
  INT_VAR_H(fixsp_non_noise_limit, 1,
@ -1010,16 +1010,16 @@ class Tesseract : public Wordrec {
  INT_VAR_H(suspect_space_level, 100,
            "Min suspect level for rejecting spaces");
  INT_VAR_H(suspect_short_words, 2,
-            "Dont Suspect dict wds longer than this");
+            "Don't Suspect dict wds longer than this");
  BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected");
-  double_VAR_H(suspect_rating_per_ch, 999.9, "Dont touch bad rating limit");
+  double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit");
  double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit");
  BOOL_VAR_H(tessedit_minimal_rejection, false, "Only reject tess failures");
-  BOOL_VAR_H(tessedit_zero_rejection, false, "Dont reject ANYTHING");
+  BOOL_VAR_H(tessedit_zero_rejection, false, "Don't reject ANYTHING");
  BOOL_VAR_H(tessedit_word_for_word, false,
             "Make output have exactly one word per WERD");
  BOOL_VAR_H(tessedit_zero_kelvin_rejection, false,
-             "Dont reject ANYTHING AT ALL");
+             "Don't reject ANYTHING AT ALL");
  BOOL_VAR_H(tessedit_consistent_reps, true, "Force all rep chars the same");
  INT_VAR_H(tessedit_reject_mode, 0, "Rejection algorithm");
  BOOL_VAR_H(tessedit_rejection_debug, false, "Adaption debug");
@ -1030,7 +1030,7 @@ class Tesseract : public Wordrec {
               "Aspect ratio dot/hyphen test");
  BOOL_VAR_H(rej_trust_doc_dawg, false, "Use DOC dawg in 11l conf. detector");
  BOOL_VAR_H(rej_1Il_use_dict_word, false, "Use dictword test");
-  BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Dont double check");
+  BOOL_VAR_H(rej_1Il_trust_permuter_type, true, "Don't double check");
  BOOL_VAR_H(rej_use_tess_accepted, true, "Individual rejection control");
  BOOL_VAR_H(rej_use_tess_blanks, true, "Individual rejection control");
  BOOL_VAR_H(rej_use_good_perm, true, "Individual rejection control");
--- a/ccstruct/blobbox.cpp
+++ b/ccstruct/blobbox.cpp
@ -33,7 +33,7 @@

 ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)

-// Upto 30 degrees is allowed for rotations of diacritic blobs.
+// Up to 30 degrees is allowed for rotations of diacritic blobs.
 const double kCosSmallAngle = 0.866;
 // Min aspect ratio for a joined word to indicate an obvious flow direction.
 const double kDefiniteAspectRatio = 2.0;
--- a/ccstruct/boxread.cpp
+++ b/ccstruct/boxread.cpp
@ -35,7 +35,7 @@ FILE* OpenBoxFile(const STRING& fname) {
  FILE* box_file = NULL;
  if (!(box_file = fopen(filename.string(), "rb"))) {
    CANTOPENFILE.error("read_next_box", TESSEXIT,
-                       "Cant open box file %s",
+                       "Can't open box file %s",
                       filename.string());
  }
  return box_file;
--- a/ccstruct/normalis.cpp
+++ b/ccstruct/normalis.cpp
@ -382,7 +382,7 @@ void DENORM::LocalDenormTransform(const FCOORD& pt, FCOORD* original) const {
 }

 // Transforms the given coords all the way back to source image space using
-// the full transformation sequence defined by this and its predecesors
+// the full transformation sequence defined by this and its predecessors
 // recursively, shallowest first, and finally any block re_rotation.
 // If last_denorm is not NULL, then the last transformation used will
 // be last_denorm, and the block re_rotation will never be executed.
--- a/ccstruct/normalis.h
+++ b/ccstruct/normalis.h
@ -218,7 +218,7 @@ class DENORM {
  void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
  void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
  // Transforms the given coords all the way back to source image space using
-  // the full transformation sequence defined by this and its predecesors
+  // the full transformation sequence defined by this and its predecessors
  // recursively, shallowest first, and finally any block re_rotation.
  // If last_denorm is not NULL, then the last transformation used will
  // be last_denorm, and the block re_rotation will never be executed.
--- a/ccstruct/pdblock.h
+++ b/ccstruct/pdblock.h
@ -108,7 +108,7 @@ class PDBLK
    PDBLK & operator= (const PDBLK & source);

  protected:
-    POLY_BLOCK *hand_poly;       //< wierd as well
+    POLY_BLOCK *hand_poly;       //< weird as well
    ICOORDELT_LIST leftside;     //< left side vertices
    ICOORDELT_LIST rightside;    //< right side vertices
    TBOX box;                    //< bounding box
--- a/ccstruct/rejctmap.h
+++ b/ccstruct/rejctmap.h
@ -16,7 +16,7 @@
 ** limitations under the License.
 *

-This module may look unneccessarily verbose, but here's the philosophy...
+This module may look unnecessarily verbose, but here's the philosophy...

 ALL processing of the reject map is done in this module. There are lots of
 separate calls to set reject/accept flags. These have DELIBERATELY been kept
@ -51,7 +51,7 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
 enum REJ_FLAGS
 {
  /* Reject modes which are NEVER overridden */
-  R_TESS_FAILURE,                // PERM Tess didnt classify
+  R_TESS_FAILURE,                // PERM Tess didn't classify
  R_SMALL_XHT,                   // PERM Xht too small
  R_EDGE_CHAR,                   // PERM Too close to edge of image
  R_1IL_CONFLICT,                // PERM 1Il confusion
@ -62,7 +62,7 @@ enum REJ_FLAGS

  /* Initial reject modes (pre NN_ACCEPT) */
  R_POOR_MATCH,                  // TEMP Ray's original heuristic (Not used)
-  R_NOT_TESS_ACCEPTED,           // TEMP Tess didnt accept WERD
+  R_NOT_TESS_ACCEPTED,           // TEMP Tess didn't accept WERD
  R_CONTAINS_BLANKS,             // TEMP Tess failed on other chs in WERD
  R_BAD_PERMUTER,                // POTENTIAL Bad permuter for WERD

@ -82,7 +82,7 @@ enum REJ_FLAGS
  R_ROW_REJ,                     // TEMP Row rejection
  R_UNLV_REJ,                    // TEMP ~ turned to - or ^ turned to space

-  /* Accept modes which occur inbetween the above rejection groups */
+  /* Accept modes which occur between the above rejection groups */
  R_NN_ACCEPT,                   //NN acceptance
  R_HYPHEN_ACCEPT,               //Hyphen acceptance
  R_MM_ACCEPT,                   //Matrix match acceptance
--- a/ccstruct/statistc.cpp
+++ b/ccstruct/statistc.cpp
@ -204,7 +204,7 @@ double STATS::ile(double frac) const {
 /**********************************************************************
 * STATS::min_bucket
 *
- * Find REAL minimum bucket - ile(0.0) isnt necessarily correct
+ * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
 **********************************************************************/
 inT32 STATS::min_bucket() const {  // Find min
  if (buckets_ == NULL || total_count_ == 0) {
@ -219,7 +219,7 @@ inT32 STATS::min_bucket() const {  // Find min
 /**********************************************************************
 * STATS::max_bucket
 *
- * Find REAL maximum bucket - ile(1.0) isnt necessarily correct
+ * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
 **********************************************************************/

 inT32 STATS::max_bucket() const {  // Find max
@ -249,7 +249,7 @@ double STATS::median() const {  //get median
  if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
    inT32 min_pile;
    inT32 max_pile;
-    /* Find preceeding non zero pile */
+    /* Find preceding non zero pile */
    for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
    /* Find following non zero pile */
    for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
--- a/ccstruct/vecfuncs.cpp
+++ b/ccstruct/vecfuncs.cpp
@ -23,7 +23,7 @@
 *
 ********************************************************************************
 * Revision 5.1  89/07/27  11:47:50  11:47:50  ray ()
- * Added ratings acces methods.
+ * Added ratings access methods.
 * This version ready for independent development.
 */
 /*----------------------------------------------------------------------
--- a/ccutil/clst.cpp
+++ b/ccutil/clst.cpp
@ -190,7 +190,7 @@ const void *, const void *)) {

 // Assuming list has been sorted already, insert new_data to
 // keep the list sorted according to the same comparison function.
-// Comparision function is the same as used by sort, i.e. uses double
+// Comparison function is the same as used by sort, i.e. uses double
 // indirection. Time is O(1) to add to beginning or end.
 // Time is linear to add pre-sorted items to an empty list.
 // If unique, then don't add duplicate entries.
@ -513,7 +513,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist(                             //from

  temp_it.mark_cycle_pt ();
  do {                           //walk sublist
-    if (temp_it.cycled_list ())  //cant find end pt
+    if (temp_it.cycled_list ())  //can't find end pt
      BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, NULL);

    if (temp_it.at_last ()) {
--- a/ccutil/clst.h
+++ b/ccutil/clst.h
@ -51,11 +51,11 @@ class DLLSYM CLIST_LINK
    }

    CLIST_LINK(                       //copy constructor
-               const CLIST_LINK &) {  //dont copy link
+               const CLIST_LINK &) {  //don't copy link
      data = next = NULL;
    }

-    void operator= (             //dont copy links
+    void operator= (             //don't copy links
    const CLIST_LINK &) {
      data = next = NULL;
    }
@ -89,7 +89,7 @@ class DLLSYM CLIST
    void internal_deep_clear (   //destroy all links
      void (*zapper) (void *));  //ptr to zapper functn

-    void shallow_clear();  //clear list but dont
+    void shallow_clear();  //clear list but don't
    //delete data elements

    bool empty() const {  //is list empty?
@ -117,7 +117,7 @@ class DLLSYM CLIST

    // Assuming list has been sorted already, insert new_data to
    // keep the list sorted according to the same comparison function.
-    // Comparision function is the same as used by sort, i.e. uses double
+    // Comparison function is the same as used by sort, i.e. uses double
    // indirection. Time is O(1) to add to beginning or end.
    // Time is linear to add pre-sorted items to an empty list.
    // If unique, then don't add duplicate entries.
@ -232,7 +232,7 @@ class DLLSYM CLIST_ITERATOR
    BOOL8 cycled_list();  //Completed a cycle?

    void add_to_end(                  //add at end &
-                    void *new_data);  //dont move
+                    void *new_data);  //don't move

    void exchange(                            //positions of 2 links
                  CLIST_ITERATOR *other_it);  //other iterator
@ -437,7 +437,7 @@ inline void CLIST_ITERATOR::add_before_then_move(  // element to add
 /***********************************************************************
 *							CLIST_ITERATOR::add_before_stay_put
 *
- *  Add a new element to the list before the current element but dont move the
+ *  Add a new element to the list before the current element but don't move the
 *  iterator to the new element.
 **********************************************************************/

@ -485,7 +485,7 @@ inline void CLIST_ITERATOR::add_before_stay_put(  // element to add
 /***********************************************************************
 *							CLIST_ITERATOR::add_list_after
 *
- *  Insert another list to this list after the current element but dont move the
+ *  Insert another list to this list after the current element but don't move the
 *  iterator.
 **********************************************************************/

@ -836,7 +836,7 @@ Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens

 CLASSNAME is assumed to be the name of a class to be used in a CONS list

-NOTE:  Because we dont use virtual functions in the list code, the list code
+NOTE:  Because we don't use virtual functions in the list code, the list code
 will NOT work correctly for classes derived from this.

 The macro generates:
@ -885,7 +885,7 @@ public:																			\
 							CLASSNAME##_CLIST():CLIST() {}						\
 														/* constructor */		\
 																				\
-							CLASSNAME##_CLIST(	/* dont construct */			\
+							CLASSNAME##_CLIST(	/* don't construct */			\
 	const CLASSNAME##_CLIST&)							/*by initial assign*/	\
 	{ DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_CLIST ),			\
 														ABORT, NULL ); }		\
@ -963,7 +963,7 @@ CLISTIZEH_C( CLASSNAME )
 *  A function which can delete a CLASSNAME element.  This is passed to the		\
 *  generic deep_clear list member function so that when a list is cleared the	\
 *  elements on the list are properly destroyed from the base class, even		\
-*  though we dont use a virtual destructor function.							\
+*  though we don't use a virtual destructor function.							\
 **********************************************************************/			\
 																				\
 DLLSYM void					CLASSNAME##_c1_zapper(		/*delete a link*/		\
--- a/ccutil/elst.cpp
+++ b/ccutil/elst.cpp
@ -117,7 +117,7 @@ inT32 ELIST::length() const {  // count elements
 *							ELIST::sort
 *
 *  Sort elements on list
- *  NB If you dont like the const declarations in the comparator, coerce yours:
+ *  NB If you don't like the const declarations in the comparator, coerce yours:
 *   ( int (*)(const void *, const void *)
 **********************************************************************/

@ -161,7 +161,7 @@ const void *, const void *)) {

 // Assuming list has been sorted already, insert new_link to
 // keep the list sorted according to the same comparison function.
-// Comparision function is the same as used by sort, i.e. uses double
+// Comparison function is the same as used by sort, i.e. uses double
 // indirection. Time is O(1) to add to beginning or end.
 // Time is linear to add pre-sorted items to an empty list.
 // If unique is set to true and comparator() returns 0 (an entry with the
@ -455,7 +455,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist(                             //from

  temp_it.mark_cycle_pt ();
  do {                           //walk sublist
-    if (temp_it.cycled_list ())  //cant find end pt
+    if (temp_it.cycled_list ())  //can't find end pt
      BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, NULL);

    if (temp_it.at_last ()) {
--- a/ccutil/elst.h
+++ b/ccutil/elst.h
@ -67,7 +67,7 @@ The implementation of lists is very careful about space and speed overheads.
 This is why many embedded lists are provided. The same concerns mean that
 in-line type coercion is done, rather than use virtual functions.  This is
 cumbersome in that each data type to be listed requires its own iterator and
-list class - though macros can gererate these.  It also prevents heterogenous
+list class - though macros can gererate these.  It also prevents heterogeneous
 lists.
 **********************************************************************/

@ -98,7 +98,7 @@ class DLLSYM ELIST_LINK
      next = NULL;
    }

-    void operator= (             //dont copy links
+    void operator= (             //don't copy links
    const ELIST_LINK &) {
      next = NULL;
    }
@ -158,7 +158,7 @@ class DLLSYM ELIST

    // Assuming list has been sorted already, insert new_link to
    // keep the list sorted according to the same comparison function.
-    // Comparision function is the same as used by sort, i.e. uses double
+    // Comparison function is the same as used by sort, i.e. uses double
    // indirection. Time is O(1) to add to beginning or end.
    // Time is linear to add pre-sorted items to an empty list.
    // If unique is set to true and comparator() returns 0 (an entry with the
@ -274,7 +274,7 @@ class DLLSYM ELIST_ITERATOR
    bool cycled_list();  //Completed a cycle?

    void add_to_end(                        //add at end &
-                    ELIST_LINK *new_link);  //dont move
+                    ELIST_LINK *new_link);  //don't move

    void exchange(                            //positions of 2 links
                  ELIST_ITERATOR *other_it);  //other iterator
@ -470,7 +470,7 @@ inline void ELIST_ITERATOR::add_before_then_move(  // element to add
 /***********************************************************************
 *                          ELIST_ITERATOR::add_before_stay_put
 *
- *  Add a new element to the list before the current element but dont move the
+ *  Add a new element to the list before the current element but don't move the
 *  iterator to the new element.
 **********************************************************************/

@ -515,7 +515,7 @@ inline void ELIST_ITERATOR::add_before_stay_put(  // element to add
 /***********************************************************************
 *                          ELIST_ITERATOR::add_list_after
 *
- *  Insert another list to this list after the current element but dont move the
+ *  Insert another list to this list after the current element but don't move the
 *  iterator.
 **********************************************************************/

@ -868,7 +868,7 @@ Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens
 CLASSNAME is assumed to be the name of a class which has a baseclass of
 ELIST_LINK.

-NOTE:  Because we dont use virtual functions in the list code, the list code
+NOTE:  Because we don't use virtual functions in the list code, the list code
 will NOT work correctly for classes derived from this.

 The macros generate:
@ -999,7 +999,7 @@ ELISTIZEH_C( CLASSNAME )
 *  A function which can delete a CLASSNAME element.  This is passed to the  \
 *  generic clear list member function so that when a list is cleared the    \
 *  elements on the list are properly destroyed from the base class, even    \
-*  though we dont use a virtual destructor function.                        \
+*  though we don't use a virtual destructor function.                       \
 **********************************************************************/     \
                                                                            \
 DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link) {                          \
--- a/ccutil/elst2.cpp
+++ b/ccutil/elst2.cpp
@ -118,7 +118,7 @@ inT32 ELIST2::length() const {  // count elements
 *							ELIST2::sort
 *
 *  Sort elements on list
- *  NB If you dont like the const declarations in the comparator, coerce yours:
+ *  NB If you don't like the const declarations in the comparator, coerce yours:
 *   ( int (*)(const void *, const void *)
 **********************************************************************/

@ -162,7 +162,7 @@ const void *, const void *)) {

 // Assuming list has been sorted already, insert new_link to
 // keep the list sorted according to the same comparison function.
-// Comparision function is the same as used by sort, i.e. uses double
+// Comparison function is the same as used by sort, i.e. uses double
 // indirection. Time is O(1) to add to beginning or end.
 // Time is linear to add pre-sorted items to an empty list.
 void ELIST2::add_sorted(int comparator(const void*, const void*),
@ -475,7 +475,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist(                              //fr

  temp_it.mark_cycle_pt ();
  do {                           //walk sublist
-    if (temp_it.cycled_list ())  //cant find end pt
+    if (temp_it.cycled_list ())  //can't find end pt
      BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, NULL);

    if (temp_it.at_last ()) {
--- a/ccutil/elst2.h
+++ b/ccutil/elst2.h
@ -69,11 +69,11 @@ class DLLSYM ELIST2_LINK
    }

    ELIST2_LINK(                        //copy constructor
-                const ELIST2_LINK &) {  //dont copy link
+                const ELIST2_LINK &) {  //don't copy link
      prev = next = NULL;
    }

-    void operator= (             //dont copy links
+    void operator= (             //don't copy links
    const ELIST2_LINK &) {
      prev = next = NULL;
    }
@ -133,7 +133,7 @@ class DLLSYM ELIST2

    // Assuming list has been sorted already, insert new_link to
    // keep the list sorted according to the same comparison function.
-    // Comparision function is the same as used by sort, i.e. uses double
+    // Comparison function is the same as used by sort, i.e. uses double
    // indirection. Time is O(1) to add to beginning or end.
    // Time is linear to add pre-sorted items to an empty list.
    void add_sorted(int comparator(const void*, const void*),
@ -241,7 +241,7 @@ class DLLSYM ELIST2_ITERATOR
    BOOL8 cycled_list();  //Completed a cycle?

    void add_to_end(                         //add at end &
-                    ELIST2_LINK *new_link);  //dont move
+                    ELIST2_LINK *new_link);  //don't move

    void exchange(                             //positions of 2 links
                  ELIST2_ITERATOR *other_it);  //other iterator
@ -450,7 +450,7 @@ inline void ELIST2_ITERATOR::add_before_then_move(  // element to add
 /***********************************************************************
 *							ELIST2_ITERATOR::add_before_stay_put
 *
- *  Add a new element to the list before the current element but dont move the
+ *  Add a new element to the list before the current element but don't move the
 *  iterator to the new element.
 **********************************************************************/

@ -500,7 +500,7 @@ inline void ELIST2_ITERATOR::add_before_stay_put(  // element to add
 /***********************************************************************
 *							ELIST2_ITERATOR::add_list_after
 *
- *  Insert another list to this list after the current element but dont move the
+ *  Insert another list to this list after the current element but don't move the
 *  iterator.
 **********************************************************************/

@ -883,7 +883,7 @@ Replace <parm> with "<parm>".  <parm> may be an arbitrary number of tokens
 CLASSNAME is assumed to be the name of a class which has a baseclass of
 ELIST2_LINK.

-NOTE:  Because we dont use virtual functions in the list code, the list code
+NOTE:  Because we don't use virtual functions in the list code, the list code
 will NOT work correctly for classes derived from this.

 The macro generates:
@ -927,7 +927,7 @@ public:																								\
 							CLASSNAME##_LIST():ELIST2() {} \
 														/* constructor */		\
 																										\
-							CLASSNAME##_LIST(			/* dont construct */ \
+							CLASSNAME##_LIST(			/* don't construct */ \
 	const CLASSNAME##_LIST&)							/*by initial assign*/\
 	{ DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_LIST ),      \
 														ABORT, NULL ); }							\
@ -1015,7 +1015,7 @@ ELIST2IZEH_C( CLASSNAME )
 *  A function which can delete a CLASSNAME element.  This is passed to the		\
 *  generic clear list member function so that when a list is cleared the		\
 *  elements on the list are properly destroyed from the base class, even		\
-*  though we dont use a virtual destructor function.									\
+*  though we don't use a virtual destructor function.									\
 **********************************************************************/			\
 																										\
 DLLSYM void					CLASSNAME##_zapper(			/*delete a link*/		\
--- a/ccutil/errcode.h
+++ b/ccutil/errcode.h
@ -53,7 +53,7 @@ enum TessErrorLogCode {
 #define LOC_DOC_BLK_REJ   22
 #define LOC_WRITE_RESULTS 23
 #define LOC_ADAPTIVE    24
-/* DONT DEFINE ANY LOCATION > 31 !!! */
+/* DON'T DEFINE ANY LOCATION > 31 !!! */

 /* Sub locatation determines whether pass2 was in normal mode or fix xht mode*/
 #define SUBLOC_NORM     0
--- a/ccutil/genericvector.h
+++ b/ccutil/genericvector.h
@ -949,7 +949,7 @@ bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {

 // Reads a vector of classes from the given file. Assumes the existence of
 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
-// error. Alse needs T::T() and T::T(constT&), as init_to_size is used in
+// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
 // this function. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
 template <typename T>
--- a/ccutil/helpers.h
+++ b/ccutil/helpers.h
@ -61,8 +61,8 @@ class TRand {
 private:
  // Steps the generator to the next value.
  void Iterate() {
-    seed_ *= 6364136223846793005;
-    seed_ += 1442695040888963407;
+    seed_ *= 6364136223846793005ULL;
+    seed_ += 1442695040888963407ULL;
  }

  // The current value of the seed.
--- a/ccutil/lsterr.h
+++ b/ccutil/lsterr.h
@ -38,6 +38,6 @@ const ERRCODE NULL_PREV = "Previous element on the list is NULL";
 const ERRCODE EMPTY_LIST = "List is empty";
 const ERRCODE BAD_PARAMETER = "List parameter error";
 const ERRCODE STILL_LINKED =
-"Attemting to add an element with non NULL links, to a list";
+"Attempting to add an element with non NULL links, to a list";
 #endif
 #endif
--- a/ccutil/ocrclass.h
+++ b/ccutil/ocrclass.h
@ -21,7 +21,7 @@
 * the HP OCR interface.
 * The code is designed to be used with either a C or C++ compiler.
 * The structures are designed to allow them to be used with any
- * structure alignment upto 8.
+ * structure alignment up to 8.
 **********************************************************************/

 #ifndef            CCUTIL_OCRCLASS_H_
--- a/ccutil/strngs.cpp
+++ b/ccutil/strngs.cpp
@ -45,7 +45,7 @@ const int kMaxDoubleSize = 15;
 *
 * The collection of MACROS provide different implementations depending
 * on whether the string keeps track of its strlen or not so that this
- * feature can be added in later when consumers dont modifify the string
+ * feature can be added in later when consumers don't modify the string
 **********************************************************************/

 // Smallest string to allocate by default
@ -339,7 +339,7 @@ STRING& STRING::operator=(const STRING& str) {
  const STRING_HEADER* str_header = str.GetHeader();
  int   str_used = str_header->used_;

-  GetHeader()->used_ = 0;  // clear since ensure doesnt need to copy data
+  GetHeader()->used_ = 0;  // clear since ensure doesn't need to copy data
  char* this_cstr = ensure_cstr(str_used);
  STRING_HEADER* this_header = GetHeader();

@ -398,7 +398,7 @@ STRING & STRING::operator=(const char* cstr) {
  if (cstr) {
    int len = strlen(cstr) + 1;

-    this_header->used_ = 0;  // dont bother copying data if need to realloc
+    this_header->used_ = 0;  // don't bother copying data if need to realloc
    char* this_cstr = ensure_cstr(len);
    this_header = GetHeader();  // for realloc
    memcpy(this_cstr, cstr, len);
@ -416,7 +416,7 @@ STRING & STRING::operator=(const char* cstr) {

 void STRING::assign(const char *cstr, int len) {
  STRING_HEADER* this_header = GetHeader();
-  this_header->used_ = 0;  // dont bother copying data if need to realloc
+  this_header->used_ = 0;  // don't bother copying data if need to realloc
  char* this_cstr = ensure_cstr(len + 1);  // +1 for '\0'

  this_header = GetHeader();  // for realloc
--- a/ccutil/tessdatamanager.cpp
+++ b/ccutil/tessdatamanager.cpp
@ -51,7 +51,7 @@ bool TessdataManager::Init(const char *data_file_name, int debug_level) {
             sizeof(actual_tessdata_num_entries_));
  }
  if (actual_tessdata_num_entries_ > TESSDATA_NUM_ENTRIES) {
-    // For forward compatability, truncate to the number we can handle.
+    // For forward compatibility, truncate to the number we can handle.
    actual_tessdata_num_entries_ = TESSDATA_NUM_ENTRIES;
  }
  fread(offset_table_, sizeof(inT64),
--- a/ccutil/tessdatamanager.h
+++ b/ccutil/tessdatamanager.h
@ -282,7 +282,7 @@ class TessdataManager {
   * same or smaller than TESSDATA_NUM_ENTRIES, but can never be larger,
   * since then it would be impossible to interpret the type of tessdata at
   * indices same and higher than TESSDATA_NUM_ENTRIES.
-   * This parameter is used to allow for backward compatiblity
+   * This parameter is used to allow for backward compatibility
   * when new tessdata types are introduced.
   */
  inT32 actual_tessdata_num_entries_;
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@ -515,7 +515,7 @@ void Classify::EndAdaptiveClassifier() {
 *      load_pre_trained_templates  Indicates whether the pre-trained
 *                     templates (inttemp, normproto and pffmtable components)
 *                     should be lodaded. Should only be set to true if the
- *                     necesary classifier components are present in the
+ *                     necessary classifier components are present in the
 *                     [lang].traineddata file.
 *  Globals:
 *      BuiltInTemplatesFile  file to get built-in temps from
@ -1720,7 +1720,7 @@ bool Classify::LooksLikeGarbage(TBLOB *blob) {
 *
 * Globals:
 *
- * @return Number of features extracted or 0 if an error occured.
+ * @return Number of features extracted or 0 if an error occurred.
 * @note Exceptions: none
 * @note History: Tue May 28 10:40:52 1991, DSJ, Created.
 */
@ -2082,7 +2082,7 @@ void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS& results) {

 /*---------------------------------------------------------------------------*/
 /**
- * This routine steps thru each matching class in Results
+ * This routine steps through each matching class in Results
 * and removes it from the match list if its rating
 * is worse than the BestRating plus a pad.  In other words,
 * all good matches get moved to the front of the classes
--- a/classify/classify.cpp
+++ b/classify/classify.cpp
@ -151,7 +151,7 @@ Classify::Classify()
      INT_MEMBER(classify_integer_matcher_multiplier, 10,
                 "Integer Matcher Multiplier  0-255:   ", this->params()),
      EnableLearning(true),
-      INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
+      INT_MEMBER(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word",
                 this->params()),
      BOOL_MEMBER(classify_bln_numeric_mode, 0,
                  "Assume the input is numbers [0-9].", this->params()),
--- a/classify/classify.h
+++ b/classify/classify.h
@ -495,7 +495,7 @@ class Classify : public CCStruct {
  // font combinations that the shape represents.
  UnicityTable<FontSet> fontset_table_;

-  INT_VAR_H(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word");
+  INT_VAR_H(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word");
  BOOL_VAR_H(classify_bln_numeric_mode, 0,
             "Assume the input is numbers [0-9].");
  double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size");
--- a/classify/cluster.cpp
+++ b/classify/cluster.cpp
@ -182,7 +182,7 @@ struct BUCKETS {
  FLOAT64 ChiSquared;            // test threshold
  uinT16 NumberOfBuckets;        // number of cells in histogram
  uinT16 Bucket[BUCKETTABLESIZE];// mapping to histogram buckets
-  uinT32 *Count;                 // frequency of occurence histogram
+  uinT32 *Count;                 // frequency of occurrence histogram
  FLOAT32 *ExpectedCount;        // expected histogram
 };

--- a/classify/clusttool.h
+++ b/classify/clusttool.h
@ -24,7 +24,7 @@
 #include <stdio.h>

 /*-------------------------------------------------------------------------
-        Public Funtion Prototype
+        Public Function Prototype
 --------------------------------------------------------------------------*/
 uinT16 ReadSampleSize(FILE *File);

--- a/classify/featdefs.cpp
+++ b/classify/featdefs.cpp
@ -285,7 +285,7 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs,

 /*---------------------------------------------------------------------------*/
 /**
- * Search thru all features currently defined and return
+ * Search through all features currently defined and return
 * the feature type for the feature with the specified short
 * name.  Trap an error if the specified name is not found.
 *
--- a/classify/intfx.cpp
+++ b/classify/intfx.cpp
@ -44,7 +44,7 @@ using tesseract::TrainingSample;
 // The entries are in binary degrees where a full circle is 256 binary degrees.
 static float cos_table[INT_CHAR_NORM_RANGE];
 static float sin_table[INT_CHAR_NORM_RANGE];
-// Guards write access to AtanTable so we dont create it more than once.
+// Guards write access to AtanTable so we don't create it more than once.
 tesseract::CCUtilMutex atan_table_mutex;


--- a/classify/kdtree.cpp
+++ b/classify/kdtree.cpp
@ -521,7 +521,7 @@ bool KDTreeSearch::BoxIntersectsSearch(FLOAT32 *lower, FLOAT32 *upper) {
 * Walk a tree, calling action once on each node.
 *
 * Operation:
- *   This routine walks thru the specified sub_tree and invokes action
+ *   This routine walks through the specified sub_tree and invokes action
 *   action at each node as follows:
 *       action(context, data, level)
 *   data  the data contents of the node being visited,
--- a/classify/mfoutline.cpp
+++ b/classify/mfoutline.cpp
@ -104,7 +104,7 @@ LIST ConvertOutlines(TESSLINE *outline,

 /*---------------------------------------------------------------------------*/
 /**
- * This routine searches thru the specified outline, computes
+ * This routine searches through the specified outline, computes
 * a slope for each vector in the outline, and marks each
 * vector as having one of the following directions:
 *   N, S, E, W, NE, NW, SE, SW
@ -182,7 +182,7 @@ void FreeOutlines(LIST Outlines) {

 /*---------------------------------------------------------------------------*/
 /**
- * This routine searches thru the specified outline and finds
+ * This routine searches through the specified outline and finds
 * the points at which the outline changes direction.  These
 * points are then marked as "extremities".  This routine is
 * used as an alternative to FindExtremities().  It forces the
--- a/classify/picofeat.cpp
+++ b/classify/picofeat.cpp
@ -147,7 +147,7 @@ void ConvertSegmentToPicoFeat(FPOINT *Start,

 /*---------------------------------------------------------------------------*/
 /**
- * This routine steps thru the specified outline and cuts it
+ * This routine steps through the specified outline and cuts it
 * up into pieces of equal length.  These pieces become the
 * desired pico-features.  Each segment in the outline
 * is converted into an integral number of pico-features.
--- a/cube/beam_search.cpp
+++ b/cube/beam_search.cpp
@ -93,7 +93,7 @@ void BeamSearch::CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
  }  // lm_edges
 }

-// Performs a beam seach in the specified search using the specified
+// Performs a beam search in the specified search using the specified
 // language model; returns an alternate list of possible words as a result.
 WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) {
  // verifications
--- a/cube/beam_search.h
+++ b/cube/beam_search.h
@ -45,7 +45,7 @@ class BeamSearch {
 public:
  explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
  ~BeamSearch();
-  // Performs a beam seach in the specified search using the specified
+  // Performs a beam search in the specified search using the specified
  // language model; returns an alternate list of possible words as a result.
  WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
  // Returns the best node in the last column of last performed search.
--- a/cube/conv_net_classifier.cpp
+++ b/cube/conv_net_classifier.cpp
@ -72,7 +72,7 @@ bool ConvNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {

 /**
 * A secondary function needed for training. Allows the trainer to set the
- * value of any train-time paramter. This function is currently not
+ * value of any train-time parameter. This function is currently not
 * implemented. TODO(ahmadab): implement end-2-end training
 */
 bool ConvNetCharClassifier::SetLearnParam(char *var_name, float val) {
--- a/cube/conv_net_classifier.h
+++ b/cube/conv_net_classifier.h
@ -55,7 +55,7 @@ class ConvNetCharClassifier : public CharClassifier {
  // is currently not implemented. TODO(ahmadab): implement end-2-end training
  virtual bool Train(CharSamp *char_samp, int ClassID);
  // A secondary function needed for training. Allows the trainer to set the
-  // value of any train-time paramter. This function is currently not
+  // value of any train-time parameter. This function is currently not
  // implemented. TODO(ahmadab): implement end-2-end training
  virtual bool SetLearnParam(char *var_name, float val);
  // Externally sets the Neural Net used by the classifier. Used for training
--- a/cube/cube_line_object.cpp
+++ b/cube/cube_line_object.cpp
@ -247,7 +247,7 @@ int CubeLineObject::ComputeWordBreakThreshold(int con_comp_cnt,
    word_break_threshold--;
  } while (!valid && word_break_threshold > 0);

-  // failed to find a threshold that acheives the target aspect ratio.
+  // failed to find a threshold that achieves the target aspect ratio.
  // Just use the default threshold
  return  static_cast<int>(line_pix_->h *
                           cntxt_->Params()->MaxSpaceHeightRatio());
--- a/cube/cube_line_segmenter.cpp
+++ b/cube/cube_line_segmenter.cpp
@ -237,7 +237,7 @@ Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix,
  return NULL;
 }

-// split a line continously until valid or fail
+// split a line continuously until valid or fail
 Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) {
  // clone the line mask
  Pix *line_pix = pixClone(line_mask_pix);
@ -739,7 +739,7 @@ bool CubeLineSegmenter::LineSegment() {
  return true;
 }

-// Estimate the paramters of the font(s) used in the page
+// Estimate the parameters of the font(s) used in the page
 bool CubeLineSegmenter::EstimateFontParams() {
  int hgt_hist[kHgtBins];
  int max_hgt;
--- a/cube/cube_search_object.cpp
+++ b/cube/cube_search_object.cpp
@ -212,7 +212,7 @@ CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
    samp->SetLastChar(last_char ? 255 : 0);
  } else {
    // for non cursive languages, these features correspond
-    // to whether the charsamp is at the begining or end of the word
+    // to whether the charsamp is at the beginning or end of the word
    samp->SetFirstChar((start_pt == -1) ? 255 : 0);
    samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
  }
--- a/cube/cube_search_object.h
+++ b/cube/cube_search_object.h
@ -114,7 +114,7 @@ class CubeSearchObject : public SearchObject {
            end_pt <= (start_pt + max_seg_per_char_));
  }
  // computes the space and no space costs at gaps between segments
-  // return true on sucess
+  // return true on success
  bool ComputeSpaceCosts();
 };
 }
--- a/cube/hybrid_neural_net_classifier.cpp
+++ b/cube/hybrid_neural_net_classifier.cpp
@ -72,7 +72,7 @@ bool HybridNeuralNetCharClassifier::Train(CharSamp *char_samp, int ClassID) {
 }

 // A secondary function needed for training. Allows the trainer to set the
-// value of any train-time paramter. This function is currently not
+// value of any train-time parameter. This function is currently not
 // implemented. TODO(ahmadab): implement end-2-end training
 bool HybridNeuralNetCharClassifier::SetLearnParam(char *var_name, float val) {
  // TODO(ahmadab): implementation of parameter initializing.
@ -151,7 +151,7 @@ bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) {
    return false;
  }

-  // go thru all the nets
+  // go through all the nets
  memset(net_output_, 0, class_cnt * sizeof(*net_output_));
  float *inputs = net_input_;
  for (int net_idx = 0; net_idx < nets_.size(); net_idx++) {
--- a/cube/hybrid_neural_net_classifier.h
+++ b/cube/hybrid_neural_net_classifier.h
@ -48,7 +48,7 @@ class HybridNeuralNetCharClassifier : public CharClassifier {
  // is currently not implemented. TODO(ahmadab): implement end-2-end training
  virtual bool Train(CharSamp *char_samp, int ClassID);
  // A secondary function needed for training. Allows the trainer to set the
-  // value of any train-time paramter. This function is currently not
+  // value of any train-time parameter. This function is currently not
  // implemented. TODO(ahmadab): implement end-2-end training
  virtual bool SetLearnParam(char *var_name, float val);
  // Externally sets the Neural Net used by the classifier. Used for training
--- a/cube/tess_lang_model.cpp
+++ b/cube/tess_lang_model.cpp
@ -397,7 +397,7 @@ int TessLangModel::NumberEdges(EDGE_REF edge_ref, LangModEdge **edge_array) {
    return 0;
  }

-  // go thru all valid transitions from the state
+  // go through all valid transitions from the state
  int edge_cnt = 0;

  EDGE_REF new_edge_ref;
--- a/cutil/listio.h
+++ b/cutil/listio.h
@ -37,7 +37,7 @@
 #include "oldlist.h"

 /*----------------------------------------------------------------------------
-        Public Funtion Prototypes
+        Public Function Prototypes
 --------------------------------------------------------------------------*/
 LIST read_list(const char *filename);
 #endif
--- a/cutil/oldlist.cpp
+++ b/cutil/oldlist.cpp
@ -407,7 +407,7 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) {
 *
 *  Search list, return NIL_LIST if not found. Return the list starting from
 *  the item if found.  The compare routine "is_equal" is passed in as
- *  the third paramter to this routine.   If the value NULL is supplied
+ *  the third parameter to this routine.   If the value NULL is supplied
 *  for is_equal, the is_key routine will be used.
 **********************************************************************/
 LIST search(LIST list, void *key, int_compare is_equal) {
--- a/cutil/oldlist.h
+++ b/cutil/oldlist.h
@ -234,7 +234,7 @@ first_node (list_rest (l))
 first_node (list_rest (list_rest (l)))

 /*----------------------------------------------------------------------
-          Public Funtion Prototypes
+          Public Function Prototypes
 ----------------------------------------------------------------------*/
 int count(LIST var_list);

--- a/dict/context.cpp
+++ b/dict/context.cpp
@ -33,7 +33,7 @@ static const int kMinAbsoluteGarbageWordLength = 10;
 static const float kMinAbsoluteGarbageAlphanumFrac = 0.5f;

 const int case_state_table[6][4] = { {
-                                  /*  0. Begining of word        */
+                                  /*  0. Beginning of word       */
    /*    P   U   L   D                                          */
                                  /* -1. Error on case           */
      0, 1, 5, 4
--- a/dict/dawg.h
+++ b/dict/dawg.h
@ -447,7 +447,7 @@ class SquishedDawg : public Dawg {
    EDGE_REF edge = node;
    if (!edge_occupied(edge) || edge == NO_EDGE) return;
    assert(forward_edge(edge));  // we don't expect any backward edges to
-    do {                         // be present when this funciton is called
+    do {                         // be present when this function is called
      if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
        vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
      }
--- a/dict/dict.cpp
+++ b/dict/dict.cpp
@ -127,7 +127,7 @@ Dict::Dict(CCUtil* ccutil)
                  " when there is a need to explore all segmentations",
                  getCCUtil()->params()),
      BOOL_MEMBER(save_raw_choices, false,
-                  "Deprecated- backward compatablity only",
+                  "Deprecated- backward compatibility only",
                  getCCUtil()->params()),
      INT_MEMBER(tessedit_truncate_wordchoice_log, 10,
                 "Max words to keep in list",
--- a/dict/dict.h
+++ b/dict/dict.h
@ -614,7 +614,7 @@ class Dict {
             "Make AcceptableChoice() always return false. Useful"
             " when there is a need to explore all segmentations");
  BOOL_VAR_H(save_raw_choices, false,
-             "Deprecated- backward compatability only");
+             "Deprecated- backward compatibility only");
  INT_VAR_H(tessedit_truncate_wordchoice_log, 10, "Max words to keep in list");
  STRING_VAR_H(word_to_debug, "", "Word for which stopper debug information"
               " should be printed to stdout");
--- a/dict/permdawg.cpp
+++ b/dict/permdawg.cpp
@ -303,7 +303,7 @@ void Dict::append_choices(
 *
 * The given prev_char_frag_info contains:
 * - fragment: if not NULL contains information about immediately
- *   preceeding fragmented character choice
+ *   preceding fragmented character choice
 * - num_fragments: number of fragments that have been used so far
 *   to construct a character
 * - certainty: certainty of the current choice or minimum
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
@ -1657,7 +1657,7 @@ EXTRA_PACKAGES         =
 # following commands have a special meaning inside the header: $title,
 # $datetime, $date, $doxygenversion, $projectname, $projectnumber,
 # $projectbrief, $projectlogo. Doxygen will replace $title with the empy string,
-# for the replacement values of the other commands the user is refered to
+# for the replacement values of the other commands the user is referred to
 # HTML_HEADER.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

--- a/java/Makefile.am
+++ b/java/Makefile.am
@ -42,18 +42,22 @@ SCROLLVIEW_LIBS = \
 CLASSPATH = $(srcdir)/piccolo2d-core-3.0.jar:$(srcdir)/piccolo2d-extras-3.0.jar

 ScrollView.jar : $(SCROLLVIEW_CLASSES)
-	$(JAR) cf $@ com/google/scrollview/*.class \
+	$(JAR) cfm $@ Manifest.txt com/google/scrollview/*.class \
           com/google/scrollview/events/*.class com/google/scrollview/ui/*.class

 $(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES)
 	$(JAVAC) -encoding UTF8 -sourcepath $(srcdir) -classpath $(CLASSPATH) $(SCROLLVIEW_FILES) -d $(builddir)

+fetch-jars :
+	curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0/piccolo2d-core-3.0.jar > piccolo2d-core-3.0.jar
+	curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0/piccolo2d-extras-3.0.jar > piccolo2d-extras-3.0.jar
+
 .PHONY: install-jars
 install-jars : ScrollView.jar
 	@if [ ! -d  $(scrollview_path) ]; then mkdir -p $(scrollview_path); fi;
 	$(INSTALL) -m 644 $(SCROLLVIEW_LIBS) $(scrollview_path);
 	$(INSTALL) -m 644 ScrollView.jar $(scrollview_path);
-	@echo "Don't forget to set eviroment variable SCROLLVIEW_PATH to $(scrollview_path)";
+	@echo "Don't forget to set environment variable SCROLLVIEW_PATH to $(scrollview_path)";

 uninstall:
 	rm -f $(scrollview_path)/*.jar
--- a/java/Manifest.txt
+++ b/java/Manifest.txt
@ -0,0 +1,2 @@
+Main-Class: com/google/scrollview/ScrollView
+Class-Path: ScrollView.jar piccolo2d-core-3.0.jar piccolo2d-extras-3.0.jar
--- a/java/com/google/scrollview/ui/SVMenuBar.java
+++ b/java/com/google/scrollview/ui/SVMenuBar.java
@ -50,7 +50,7 @@ public class SVMenuBar implements ActionListener {


  /**
-   * A click on one of the items in our menubar has occured. Forward it
+   * A click on one of the items in our menubar has occurred. Forward it
   * to the item itself to let it decide what happens.
   */
  public void actionPerformed(ActionEvent e) {
@ -111,7 +111,7 @@ public class SVMenuBar implements ActionListener {
   * @param name The caption of the new entry.
   * @param id The Id of the new entry. If it is -1, the entry will be treated
   *        as a menu.
-   * @param b Whether the entry is initally flagged.
+   * @param b Whether the entry is initially flagged.
   *
   */

--- a/java/com/google/scrollview/ui/SVPopupMenu.java
+++ b/java/com/google/scrollview/ui/SVPopupMenu.java
@ -123,7 +123,7 @@ public class SVPopupMenu implements ActionListener {


  /**
-   * A click on one of the items in our menubar has occured. Forward it
+   * A click on one of the items in our menubar has occurred. Forward it
   * to the item itself to let it decide what happens.
   */
  public void actionPerformed(ActionEvent e) {
--- a/java/com/google/scrollview/ui/SVWindow.java
+++ b/java/com/google/scrollview/ui/SVWindow.java
@ -298,7 +298,7 @@ public class SVWindow extends JFrame {
      ta.setEditable(false);
      getContentPane().add(ta, BorderLayout.SOUTH);
    }
-    // We need to make the window bigger to accomodate the message box.
+    // We need to make the window bigger to accommodate the message box.
    winSizeY += DEF_MESSAGEBOX_HEIGHT;
    setSize(winSizeX, winSizeY);
  }
--- a/training/language-specific.sh
+++ b/training/language-specific.sh
@ -780,7 +780,7 @@ VERTICAL_FONTS=( \
 #      holds the text corpus file for the language, used in phase F
 #   ${FONTS[@]}
 #      holds a sequence of applicable fonts for the language, used in
-#      phase F & I
+#      phase F & I. only set if not already set, i.e. from command line
 #   ${TRAINING_DATA_ARGUMENTS}
 #      non-default arguments to the training_data program used in phase T
 #   ${FILTER_ARGUMENTS} -
@ -794,7 +794,6 @@ set_lang_specific_parameters() {
  local lang=$1
  # The default text location is now given directly from the language code.
  TEXT_CORPUS="${FLAGS_webtext_prefix}/${lang}.corpus.txt"
-  FONTS=( "${LATIN_FONTS[@]}" )
  FILTER_ARGUMENTS=""
  WORDLIST2DAWG_ARGUMENTS=""
  # These dawg factors represent the fraction of the corpus not covered by the
@ -816,30 +815,30 @@ set_lang_specific_parameters() {
  case ${lang} in
    # Latin languages.
    enm ) TEXT2IMAGE_EXTRA_ARGS=" --ligatures"   # Add ligatures when supported
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
    frm ) TEXT_CORPUS="${FLAGS_webtext_prefix}/fra.corpus.txt"
          # Make long-s substitutions for Middle French text
          FILTER_ARGUMENTS="--make_early_language_variant=fra"
          TEXT2IMAGE_EXTRA_ARGS=" --ligatures"   # Add ligatures when supported.
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
    frk ) TEXT_CORPUS="${FLAGS_webtext_prefix}/deu.corpus.txt"
-          FONTS=( "${FRAKTUR_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${FRAKTUR_FONTS[@]}" );;
    ita_old )
          TEXT_CORPUS="${FLAGS_webtext_prefix}/ita.corpus.txt"
          # Make long-s substitutions for Early Italian text
          FILTER_ARGUMENTS="--make_early_language_variant=ita"
          TEXT2IMAGE_EXTRA_ARGS=" --ligatures"   # Add ligatures when supported.
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
    spa_old )
          TEXT_CORPUS="${FLAGS_webtext_prefix}/spa.corpus.txt"
          # Make long-s substitutions for Early Spanish text
          FILTER_ARGUMENTS="--make_early_language_variant=spa"
          TEXT2IMAGE_EXTRA_ARGS=" --ligatures"  # Add ligatures when supported.
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
    srp_latn )
          TEXT_CORPUS=${FLAGS_webtext_prefix}/srp.corpus.txt ;;
    vie ) TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${VIETNAMESE_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${VIETNAMESE_FONTS[@]}" ) ;;
    # Highly inflective languages get a bigger dawg size.
    # TODO(rays) Add more here!
    hun ) WORD_DAWG_SIZE=1000000 ;;
@ -899,14 +898,14 @@ set_lang_specific_parameters() {
          # Strip unrenderable words as not all fonts will render the extended
          # latin symbols found in Vietnamese text.
          WORD_DAWG_SIZE=1000000
-          FONTS=( "${EARLY_LATIN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );;

    # Cyrillic script-based languages.
-    rus ) FONTS=( "${RUSSIAN_FONTS[@]}" )
+    rus ) test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" )
          NUMBER_DAWG_FACTOR=0.05
          WORD_DAWG_SIZE=1000000 ;;
    aze_cyrl | bel | bul | kaz | mkd | srp | tgk | ukr | uzb_cyrl )
-          FONTS=( "${RUSSIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) ;;

    # Special code for performing Cyrillic language-id that is trained on
    # Russian, Serbian, Ukranian, Belarusian, Macedonian, Tajik and Mongolian
@ -916,70 +915,70 @@ set_lang_specific_parameters() {
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
          GENERATE_WORD_BIGRAMS=0
          WORD_DAWG_SIZE=1000000
-          FONTS=( "${RUSSIAN_FONTS[@]}" );;
+          test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" );;

    # South Asian scripts mostly have a lot of different graphemes, so trim
    # down the MEAN_COUNT so as not to get a huge amount of text.
    asm | ben )
          MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
-          FONTS=( "${BENGALI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${BENGALI_FONTS[@]}" ) ;;
    bih | hin | mar | nep | san )
          MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
-          FONTS=( "${DEVANAGARI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${DEVANAGARI_FONTS[@]}" ) ;;
    bod ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
-          FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
    dzo )
          WORD_DAWG_FACTOR=0.01
-          FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TIBETAN_FONTS[@]}" ) ;;
    guj ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
-          FONTS=( "${GUJARATI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${GUJARATI_FONTS[@]}" ) ;;
    kan ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
          TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${KANNADA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${KANNADA_FONTS[@]}" ) ;;
    mal ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
          TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${MALAYALAM_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${MALAYALAM_FONTS[@]}" ) ;;
    ori )
          WORD_DAWG_FACTOR=0.01
-          FONTS=( "${ORIYA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${ORIYA_FONTS[@]}" ) ;;
    pan ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.01
-          FONTS=( "${PUNJABI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${PUNJABI_FONTS[@]}" ) ;;
    sin ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.01
-          FONTS=( "${SINHALA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${SINHALA_FONTS[@]}" ) ;;
    tam ) MEAN_COUNT="30"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
          TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${TAMIL_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TAMIL_FONTS[@]}" ) ;;
    tel ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --no_newline_in_output"
          TEXT2IMAGE_EXTRA_ARGS=" --char_spacing=0.5"
-          FONTS=( "${TELUGU_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${TELUGU_FONTS[@]}" ) ;;

    # SouthEast Asian scripts.
    khm ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${KHMER_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${KHMER_FONTS[@]}" ) ;;
    lao ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${LAOTHIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${LAOTHIAN_FONTS[@]}" ) ;;
    mya ) MEAN_COUNT="12"
          WORD_DAWG_FACTOR=0.15
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
-          FONTS=( "${BURMESE_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${BURMESE_FONTS[@]}" ) ;;
    tha ) MEAN_COUNT="30"
          WORD_DAWG_FACTOR=0.01
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
@ -987,7 +986,7 @@ set_lang_specific_parameters() {
          TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
          AMBIGS_FILTER_DENOMINATOR="1000"
          LEADING=48
-          FONTS=( "${THAI_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${THAI_FONTS[@]}" ) ;;

    # CJK
    chi_sim )
@ -998,7 +997,7 @@ set_lang_specific_parameters() {
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
          TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
          FILTER_ARGUMENTS="--charset_filter=chi_sim --segmenter_lang=chi_sim"
-          FONTS=( "${CHI_SIM_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${CHI_SIM_FONTS[@]}" ) ;;
    chi_tra )
          MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.015
@ -1006,14 +1005,14 @@ set_lang_specific_parameters() {
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
          TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
          FILTER_ARGUMENTS="--charset_filter=chi_tra --segmenter_lang=chi_tra"
-          FONTS=( "${CHI_TRA_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${CHI_TRA_FONTS[@]}" ) ;;
    jpn ) MEAN_COUNT="15"
          WORD_DAWG_FACTOR=0.015
          GENERATE_WORD_BIGRAMS=0
          TRAINING_DATA_ARGUMENTS+=" --infrequent_ratio=10000"
          TRAINING_DATA_ARGUMENTS+=" --no_space_in_output --desired_bigrams="
          FILTER_ARGUMENTS="--charset_filter=jpn --segmenter_lang=jpn"
-          FONTS=( "${JPN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${JPN_FONTS[@]}" ) ;;
    kor ) MEAN_COUNT="20"
          WORD_DAWG_FACTOR=0.015
          NUMBER_DAWG_FACTOR=0.05
@ -1021,38 +1020,38 @@ set_lang_specific_parameters() {
          TRAINING_DATA_ARGUMENTS+=" --desired_bigrams="
          GENERATE_WORD_BIGRAMS=0
          FILTER_ARGUMENTS="--charset_filter=kor --segmenter_lang=kor"
-          FONTS=( "${KOREAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${KOREAN_FONTS[@]}" ) ;;

    # Middle-Eastern scripts.
-    ara ) FONTS=( "${ARABIC_FONTS[@]}" ) ;;
-    div ) FONTS=( "${THAANA_FONTS[@]}" ) ;;
+    ara ) test -z "$FONTS" && FONTS=( "${ARABIC_FONTS[@]}" ) ;;
+    div ) test -z "$FONTS" && FONTS=( "${THAANA_FONTS[@]}" ) ;;
    fas | pus | snd | uig | urd )
-          FONTS=( "${PERSIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${PERSIAN_FONTS[@]}" ) ;;
    heb | yid )
          NUMBER_DAWG_FACTOR=0.05
          WORD_DAWG_FACTOR=0.08
-          FONTS=( "${HEBREW_FONTS[@]}" ) ;;
-    syr ) FONTS=( "${SYRIAC_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${HEBREW_FONTS[@]}" ) ;;
+    syr ) test -z "$FONTS" && FONTS=( "${SYRIAC_FONTS[@]}" ) ;;

    # Other scripts.
    amh | tir)
-          FONTS=( "${AMHARIC_FONTS[@]}" ) ;;
-    chr ) FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" \
+          test -z "$FONTS" && FONTS=( "${AMHARIC_FONTS[@]}" ) ;;
+    chr ) test -z "$FONTS" && FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" \
                  "Noto Sans Cherokee" \
                ) ;;
    ell | grc )
          NUMBER_DAWG_FACTOR=0.05
          WORD_DAWG_FACTOR=0.08
-          FONTS=( "${GREEK_FONTS[@]}" ) ;;
-    hye ) FONTS=( "${ARMENIAN_FONTS[@]}" ) ;;
-    iku ) FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" ) ;;
-    kat)  FONTS=( "${GEORGIAN_FONTS[@]}" ) ;;
+          test -z "$FONTS" && FONTS=( "${GREEK_FONTS[@]}" ) ;;
+    hye ) test -z "$FONTS" && FONTS=( "${ARMENIAN_FONTS[@]}" ) ;;
+    iku ) test -z "$FONTS" && FONTS=( "${NORTH_AMERICAN_ABORIGINAL_FONTS[@]}" ) ;;
+    kat)  test -z "$FONTS" && FONTS=( "${GEORGIAN_FONTS[@]}" ) ;;
    kat_old)
          TEXT_CORPUS="${FLAGS_webtext_prefix}/kat.corpus.txt"
-          FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
-    kir ) FONTS=( "${KYRGYZ_FONTS[@]}" )
+          test -z "$FONTS" && FONTS=( "${OLD_GEORGIAN_FONTS[@]}" ) ;;
+    kir ) test -z "$FONTS" && FONTS=( "${KYRGYZ_FONTS[@]}" )
          TRAINING_DATA_ARGUMENTS=" --infrequent_ratio=100" ;;
-    kur ) FONTS=( "${KURDISH_FONTS[@]}" ) ;;
+    kur ) test -z "$FONTS" && FONTS=( "${KURDISH_FONTS[@]}" ) ;;

    *) err "Error: ${lang} is not a valid language code"
  esac
@ -1061,6 +1060,8 @@ set_lang_specific_parameters() {
  elif [[ ! -z ${MEAN_COUNT} ]]; then
    TRAINING_DATA_ARGUMENTS+=" --mean_count=${MEAN_COUNT}"
  fi
+  # Default to Latin fonts if none have been set
+  test -z "$FONTS" && test -z "$FONTS" && FONTS=( "${LATIN_FONTS[@]}" )
 }

 #=============================================================================
--- a/training/tesstrain.sh
+++ b/training/tesstrain.sh
@ -17,7 +17,6 @@
 # USAGE:
 #
 # tesstrain.sh
-#    --bin_dir PATH             # Location of training program.
 #    --fontlist FONTS_STR       # A plus-separated list of fontnames to train on.
 #    --fonts_dir FONTS_PATH     # Path to font files.
 #    --lang LANG_CODE           # ISO 639 code.
@ -25,6 +24,7 @@
 #    --output_dir OUTPUTDIR     # Location of output traineddata file.
 #    --overwrite                # Safe to overwrite files in output_dir.
 #    --run_shape_clustering     # Run shape clustering (use for Indic langs).
+#    --exposures EXPOSURES      # A list of exposure levels to use (e.g. "-1 0 1").
 #
 # OPTIONAL flags for input data. If unspecified we will look for them in
 # the langdata_dir directory.
@ -49,11 +49,8 @@ source `dirname $0`/tesstrain_utils.sh
 ARGV=("$@")
 parse_flags

-tlog "\n=== Starting training for language '${LANG_CODE}'"
-
-tlog "Cleaning workspace directory ${TRAINING_DIR}..."
 mkdir -p ${TRAINING_DIR}
-rm -fr ${TRAINING_DIR}/*
+tlog "\n=== Starting training for language '${LANG_CODE}'"

 source `dirname $0`/language-specific.sh
 set_lang_specific_parameters ${LANG_CODE}
--- a/training/tesstrain_utils.sh
+++ b/training/tesstrain_utils.sh
@ -16,10 +16,6 @@
 #
 # USAGE: source tesstrain_utils.sh

-FONTS=(
-    "Arial" \
-    "Times New Roman," \
-)
 if [ "$(uname)" == "Darwin" ];then
    FONTS_DIR="/Library/Fonts/"
 else
@ -29,7 +25,8 @@ OUTPUT_DIR="/tmp/tesstrain/tessdata"
 OVERWRITE=0
 RUN_SHAPE_CLUSTERING=0
 EXTRACT_FONT_PROPERTIES=1
-WORKSPACE_DIR="/tmp/tesstrain"
+WORKSPACE_DIR=`mktemp -d`
+EXPOSURES=0

 # Logging helper functions.
 tlog() {
@ -45,11 +42,11 @@ err_exit() {
 # if the program file is not found.
 # Usage: run_command CMD ARG1 ARG2...
 run_command() {
-    local cmd=$1
-    shift
-    if [[ ! -x ${cmd} ]]; then
-        err_exit "File ${cmd} not found"
+    local cmd=`which $1`
+    if [[ -z ${cmd} ]]; then
+        err_exit "$1 not found"
    fi
+    shift
    tlog "[$(date)] ${cmd} $@"
    ${cmd} "$@" 2>&1 1>&2 | tee -a ${LOG_FILE}
    # check completion status
@ -69,22 +66,6 @@ check_file_readable() {
    done
 }

-# Set global path variables that are based on parsed flags.
-set_prog_paths() {
-    if [[ -z ${BINDIR} ]]; then
-        err_exit "Need to specify location of program files"
-    fi
-    CN_TRAINING_EXE=${BINDIR}/cntraining
-    COMBINE_TESSDATA_EXE=${BINDIR}/combine_tessdata
-    MF_TRAINING_EXE=${BINDIR}/mftraining
-    SET_UNICHARSET_PROPERTIES_EXE=${BINDIR}/set_unicharset_properties
-    SHAPE_TRAINING_EXE=${BINDIR}/shapeclustering
-    TESSERACT_EXE=${BINDIR}/tesseract
-    TEXT2IMAGE_EXE=${BINDIR}/text2image
-    UNICHARSET_EXTRACTOR_EXE=${BINDIR}/unicharset_extractor
-    WORDLIST2DAWG_EXE=${BINDIR}/wordlist2dawg
-}
-
 # Sets the named variable to given value. Aborts if the value is missing or
 # if it looks like a flag.
 # Usage: parse_value VAR_NAME VALUE
@ -109,9 +90,6 @@ parse_flags() {
        case ${ARGV[$i]} in
            --)
                break;;
-            --bin_dir)
-                parse_value "BINDIR" ${ARGV[$j]}
-                i=$j ;;
            --fontlist)   # Expect a plus-separated list of names
                if [[ -z ${ARGV[$j]} ]] || [[ ${ARGV[$j]:0:2} == "--" ]]; then
                    err_exit "Invalid value passed to --fontlist"
@ -121,6 +99,16 @@ parse_flags() {
                FONTS=( ${ARGV[$j]} )
                IFS=$ofs
                i=$j ;;
+            --exposures)
+                exp=""
+                while test $j -lt ${#ARGV[@]}; do
+                    test -z ${ARGV[$j]} && break
+                    test `echo ${ARGV[$j]} | cut -c -2` = "--" && break
+                    exp="$exp ${ARGV[$j]}"
+                    j=$((j+1))
+                done
+                parse_value "EXPOSURES" "$exp"
+                i=$((j-1)) ;;
            --fonts_dir)
                parse_value "FONTS_DIR" ${ARGV[$j]}
                i=$j ;;
@ -156,9 +144,6 @@ parse_flags() {
    if [[ -z ${LANG_CODE} ]]; then
        err_exit "Need to specify a language --lang"
    fi
-    if [[ -z ${BINDIR} ]]; then
-        err_exit "Need to specify path to built binaries --bin_dir"
-    fi
    if [[ -z ${LANGDATA_ROOT} ]]; then
        err_exit "Need to specify path to language files --langdata_dir"
    fi
@ -171,8 +156,6 @@ parse_flags() {
        fi
    fi

-    set_prog_paths
-
    # Location where intermediate files will be created.
    TRAINING_DIR=${WORKSPACE_DIR}/${LANG_CODE}
    # Location of log file for the whole run.
@ -200,8 +183,8 @@ initialize_fontconfig() {
    export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
    local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt
    echo "Text" >${sample_path}
-    run_command ${TEXT2IMAGE_EXE} --fonts_dir=${FONTS_DIR} \
-        --font="Arial" --outputbase=${sample_path} --text=${sample_path} \
+    run_command text2image --fonts_dir=${FONTS_DIR} \
+        --font="${FONTS[0]}" --outputbase=${sample_path} --text=${sample_path} \
        --fontconfig_tmpdir=${FONT_CONFIG_CACHE}
 }

@ -228,14 +211,14 @@ generate_font_image() {
      fi
    done

-    run_command ${TEXT2IMAGE_EXE} ${common_args} --font="${font}" \
+    run_command text2image ${common_args} --font="${font}" \
        --text=${TRAINING_TEXT} ${TEXT2IMAGE_EXTRA_ARGS}
    check_file_readable ${outbase}.box ${outbase}.tif

    if (( ${EXTRACT_FONT_PROPERTIES} )) &&
        [[ -r ${TRAIN_NGRAMS_FILE} ]]; then
        tlog "Extracting font properties of ${font}"
-        run_command ${TEXT2IMAGE_EXE} ${common_args} --font="${font}" \
+        run_command text2image ${common_args} --font="${font}" \
            --ligatures=false --text=${TRAIN_NGRAMS_FILE} \
            --only_extract_font_properties --ptsize=32
        check_file_readable ${outbase}.fontinfo
@ -254,35 +237,36 @@ phase_I_generate_image() {
        err_exit "Could not find training text file ${TRAINING_TEXT}"
    fi
    CHAR_SPACING="0.0"
-    EXPOSURE="0"

-    if (( ${EXTRACT_FONT_PROPERTIES} )) && [[ -r ${BIGRAM_FREQS_FILE} ]]; then
-        # Parse .bigram_freqs file and compose a .train_ngrams file with text
-        # for tesseract to recognize during training. Take only the ngrams whose
-        # combined weight accounts for 95% of all the bigrams in the language.
-        NGRAM_FRAC=$(cat ${BIGRAM_FREQS_FILE} \
-            | awk '{s=s+$2}; END {print (s/100)*p}' p=99)
-        cat ${BIGRAM_FREQS_FILE} | sort -rnk2 \
-            | awk '{s=s+$2; if (s <= x) {printf "%s ", $1; } }' \
-            x=${NGRAM_FRAC} > ${TRAIN_NGRAMS_FILE}
-        check_file_readable ${TRAIN_NGRAMS_FILE}
-    fi
-
-    local counter=0
-    for font in "${FONTS[@]}"; do
-        generate_font_image "${font}" &
-        let counter=counter+1
-        let rem=counter%par_factor
-        if [[ "${rem}" -eq 0 ]]; then
-          wait
+    for EXPOSURE in $EXPOSURES; do
+        if (( ${EXTRACT_FONT_PROPERTIES} )) && [[ -r ${BIGRAM_FREQS_FILE} ]]; then
+            # Parse .bigram_freqs file and compose a .train_ngrams file with text
+            # for tesseract to recognize during training. Take only the ngrams whose
+            # combined weight accounts for 95% of all the bigrams in the language.
+            NGRAM_FRAC=$(cat ${BIGRAM_FREQS_FILE} \
+                | awk '{s=s+$2}; END {print (s/100)*p}' p=99)
+            cat ${BIGRAM_FREQS_FILE} | sort -rnk2 \
+                | awk '{s=s+$2; if (s <= x) {printf "%s ", $1; } }' \
+                x=${NGRAM_FRAC} > ${TRAIN_NGRAMS_FILE}
+            check_file_readable ${TRAIN_NGRAMS_FILE}
        fi
-    done
-    wait
-    # Check that each process was successful.
-    for font in "${FONTS[@]}"; do
-        local fontname=$(echo ${font} | tr ' ' '_' | sed 's/,//g')
-        local outbase=${TRAINING_DIR}/${LANG_CODE}.${fontname}.exp${EXPOSURE}
-        check_file_readable ${outbase}.box ${outbase}.tif
+
+        local counter=0
+        for font in "${FONTS[@]}"; do
+            generate_font_image "${font}" &
+            let counter=counter+1
+            let rem=counter%par_factor
+            if [[ "${rem}" -eq 0 ]]; then
+              wait
+            fi
+        done
+        wait
+        # Check that each process was successful.
+        for font in "${FONTS[@]}"; do
+            local fontname=$(echo ${font} | tr ' ' '_' | sed 's/,//g')
+            local outbase=${TRAINING_DIR}/${LANG_CODE}.${fontname}.exp${EXPOSURE}
+            check_file_readable ${outbase}.box ${outbase}.tif
+        done
    done
 }

@ -291,7 +275,7 @@ phase_UP_generate_unicharset() {
    tlog "\n=== Phase UP: Generating unicharset and unichar properties files ==="

    local box_files=$(ls ${TRAINING_DIR}/*.box)
-    run_command ${UNICHARSET_EXTRACTOR_EXE} -D "${TRAINING_DIR}/" ${box_files}
+    run_command unicharset_extractor -D "${TRAINING_DIR}/" ${box_files}
    local outfile=${TRAINING_DIR}/unicharset
    UNICHARSET_FILE="${TRAINING_DIR}/${LANG_CODE}.unicharset"
    check_file_readable ${outfile}
@ -299,7 +283,7 @@ phase_UP_generate_unicharset() {

    XHEIGHTS_FILE="${TRAINING_DIR}/${LANG_CODE}.xheights"
    check_file_readable ${UNICHARSET_FILE}
-    run_command ${SET_UNICHARSET_PROPERTIES_EXE} \
+    run_command set_unicharset_properties \
        -U ${UNICHARSET_FILE} -O ${UNICHARSET_FILE} -X ${XHEIGHTS_FILE} \
        --script_dir=${LANGDATA_ROOT}
    check_file_readable ${XHEIGHTS_FILE}
@ -327,7 +311,7 @@ phase_D_generate_dawg() {
    if [[ -s ${WORDLIST_FILE} ]]; then
        tlog "Generating word Dawg"
        check_file_readable ${UNICHARSET_FILE}
-        run_command ${WORDLIST2DAWG_EXE} -r 1 ${WORDLIST_FILE} ${WORD_DAWG} \
+        run_command wordlist2dawg -r 1 ${WORDLIST_FILE} ${WORD_DAWG} \
            ${UNICHARSET_FILE}
        check_file_readable ${WORD_DAWG}

@ -339,13 +323,13 @@ phase_D_generate_dawg() {
    if [[ -s ${freq_wordlist_file} ]]; then
        check_file_readable ${UNICHARSET_FILE}
        tlog "Generating frequent-word Dawg"
-        run_command ${WORDLIST2DAWG_EXE}  -r 1 ${freq_wordlist_file} \
+        run_command wordlist2dawg  -r 1 ${freq_wordlist_file} \
            ${FREQ_DAWG} ${UNICHARSET_FILE}
        check_file_readable ${FREQ_DAWG}
    fi

    # Punctuation DAWG
-    # -r arguments to WORDLIST2DAWG_EXE denote RTL reverse policy
+    # -r arguments to wordlist2dawg denote RTL reverse policy
    # (see Trie::RTLReversePolicy enum in third_party/tesseract/dict/trie.h).
    # We specify 0/RRP_DO_NO_REVERSE when generating number DAWG,
    # 1/RRP_REVERSE_IF_HAS_RTL for freq and word DAWGS,
@ -360,20 +344,20 @@ phase_D_generate_dawg() {
        PUNC_FILE="${LANGDATA_ROOT}/common.punc"
    fi
    check_file_readable ${PUNC_FILE}
-    run_command ${WORDLIST2DAWG_EXE} -r ${punc_reverse_policy} \
+    run_command wordlist2dawg -r ${punc_reverse_policy} \
        ${PUNC_FILE} ${PUNC_DAWG} ${UNICHARSET_FILE}
    check_file_readable ${PUNC_DAWG}

    # Numbers DAWG
    if [[ -s ${NUMBERS_FILE} ]]; then
-        run_command ${WORDLIST2DAWG_EXE} -r 0 \
+        run_command wordlist2dawg -r 0 \
            ${NUMBERS_FILE} ${NUMBER_DAWG} ${UNICHARSET_FILE}
        check_file_readable ${NUMBER_DAWG}
    fi

    # Bigram dawg
    if [[ -s ${WORD_BIGRAMS_FILE} ]]; then
-        run_command ${WORDLIST2DAWG_EXE} -r 1 \
+        run_command wordlist2dawg -r 1 \
            ${WORD_BIGRAMS_FILE} ${BIGRAM_DAWG} ${UNICHARSET_FILE}
        check_file_readable ${BIGRAM_DAWG}
    fi
@ -387,10 +371,9 @@ phase_E_extract_features() {
        par_factor=1
    fi
    tlog "\n=== Phase E: Extracting features ==="
-    TRAIN_EXPOSURES='0'

    local img_files=""
-    for exposure in ${TRAIN_EXPOSURES}; do
+    for exposure in ${EXPOSURES}; do
        img_files=${img_files}' '$(ls ${TRAINING_DIR}/*.exp${exposure}.tif)
    done

@ -405,7 +388,7 @@ phase_E_extract_features() {
    tlog "Using TESSDATA_PREFIX=${TESSDATA_PREFIX}"
    local counter=0
    for img_file in ${img_files}; do
-        run_command ${TESSERACT_EXE} ${img_file} ${img_file%.*} \
+        run_command tesseract ${img_file} ${img_file%.*} \
            ${box_config} ${config} &
      let counter=counter+1
      let rem=counter%par_factor
@ -427,7 +410,7 @@ phase_C_cluster_prototypes() {
    tlog "\n=== Phase C: Clustering feature prototypes (cnTraining) ==="
    local out_normproto=$1

-    run_command ${CN_TRAINING_EXE} -D "${TRAINING_DIR}/" \
+    run_command cntraining -D "${TRAINING_DIR}/" \
        $(ls ${TRAINING_DIR}/*.tr)

    check_file_readable ${TRAINING_DIR}/normproto
@ -447,7 +430,7 @@ phase_S_cluster_shapes() {
        font_props=${font_props}" -X ${TRAINING_DIR}/${LANG_CODE}.xheights"
    fi

-    run_command ${SHAPE_TRAINING_EXE} \
+    run_command shapeclustering \
        -D "${TRAINING_DIR}/" \
        -U ${TRAINING_DIR}/${LANG_CODE}.unicharset \
        -O ${TRAINING_DIR}/${LANG_CODE}.mfunicharset \
@ -468,7 +451,7 @@ phase_M_cluster_microfeatures() {
        font_props=${font_props}" -X ${TRAINING_DIR}/${LANG_CODE}.xheights"
    fi

-    run_command ${MF_TRAINING_EXE} \
+    run_command mftraining \
        -D "${TRAINING_DIR}/" \
        -U ${TRAINING_DIR}/${LANG_CODE}.unicharset \
        -O ${TRAINING_DIR}/${LANG_CODE}.mfunicharset \
@ -528,7 +511,7 @@ make__traineddata() {
  fi

  # Compose the traineddata file.
-  run_command ${COMBINE_TESSDATA_EXE} ${TRAINING_DIR}/${LANG_CODE}.
+  run_command combine_tessdata ${TRAINING_DIR}/${LANG_CODE}.

  # Copy it to the output dir, overwriting only if allowed by the cmdline flag.
  if [[ ! -d ${OUTPUT_DIR} ]]; then
--- a/viewer/svutil.cpp
+++ b/viewer/svutil.cpp
@ -127,7 +127,7 @@ SVSemaphore::SVSemaphore() {
  semaphore_ = CreateSemaphore(0, 0, 10, 0);
 #elif defined(__APPLE__)
  char name[50];
-  snprintf(name, sizeof(name), "%d", random());
+  snprintf(name, sizeof(name), "%ld", random());
  sem_unlink(name);
  semaphore_ = sem_open(name, O_CREAT , S_IWUSR, 0);
  if (semaphore_ == SEM_FAILED) {
@ -296,14 +296,11 @@ static std::string ScrollViewCommand(std::string scrollview_path) {
  // this unnecessary.
  // Also the path has to be separated by ; on windows and : otherwise.
 #ifdef _WIN32
-  const char* cmd_template = "-Djava.library.path=%s -cp %s/ScrollView.jar;"
-      "%s/piccolo2d-core-3.0.jar:%s/piccolo2d-extras-3.0.jar"
-      " com.google.scrollview.ScrollView";
+  const char* cmd_template = "-Djava.library.path=%s -jar %s/ScrollView.jar";
+
 #else
  const char* cmd_template = "-c \"trap 'kill %%1' 0 1 2 ; java "
-      "-Xms1024m -Xmx2048m -Djava.library.path=%s -cp %s/ScrollView.jar:"
-      "%s/piccolo2d-core-3.0.jar:%s/piccolo2d-extras-3.0.jar"
-      " com.google.scrollview.ScrollView"
+      "-Xms1024m -Xmx2048m -jar %s/ScrollView.jar"
      " & wait\"";
 #endif
  int cmdlen = strlen(cmd_template) + 4*strlen(scrollview_path.c_str()) + 1;
@ -374,7 +371,7 @@ static int GetAddrInfo(const char* hostname, int port,
                       struct addrinfo** address) {
 #if defined(__linux__)
  char port_str[40];
-  snprintf(port_str, 40, "%d", port);
+  snprintf(port_str, 40, "%ld", port);
  return getaddrinfo(hostname, port_str, NULL, address);
 #else
  return GetAddrInfoNonLinux(hostname, port, address);
--- a/wordrec/lm_state.h
+++ b/wordrec/lm_state.h
@ -177,11 +177,11 @@ struct ViterbiStateEntry : public ELIST_LINK {
  /// the smallest rating or lower/upper case letters).
  LanguageModelFlagsType top_choice_flags;

-  /// Extra information maintained by Dawg laguage model component
+  /// Extra information maintained by Dawg language model component
  /// (owned by ViterbiStateEntry).
  LanguageModelDawgInfo *dawg_info;

-  /// Extra information maintained by Ngram laguage model component
+  /// Extra information maintained by Ngram language model component
  /// (owned by ViterbiStateEntry).
  LanguageModelNgramInfo *ngram_info;

--- a/wordrec/pieces.cpp
+++ b/wordrec/pieces.cpp
@ -273,7 +273,7 @@ void Wordrec::merge_and_put_fragment_lists(inT16 row, inT16 column,
 *
 * Recursively go through the ratings matrix to find lists of fragments
 * to be merged in the function merge_and_put_fragment_lists.
- * current_frag is the postion of the piece we are looking for.
+ * current_frag is the position of the piece we are looking for.
 * current_row is the row in the rating matrix we are currently at.
 * start is the row we started initially, so that we can know where
 * to append the results to the matrix. num_frag_parts is the total
--- a/wordrec/wordrec.h
+++ b/wordrec/wordrec.h
@ -375,7 +375,7 @@ class Wordrec : public Classify {
                       inT16 num_blobs);
  // Recursively go through the ratings matrix to find lists of fragments
  // to be merged in the function merge_and_put_fragment_lists.
-  // current_frag is the postion of the piece we are looking for.
+  // current_frag is the position of the piece we are looking for.
  // current_row is the row in the rating matrix we are currently at.
  // start is the row we started initially, so that we can know where
  // to append the results to the matrix. num_frag_parts is the total