Fixed typos and improved comments

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@753 d0cd1f9f-072b-0410-8dd7-cf729c803f20
2025-06-07 09:52:40 +08:00 · 2012-09-21 15:31:20 +00:00 · 2012-09-21 15:31:20 +00:00 · da1047f020
commit da1047f020
parent 5e79160afb
5 changed files with 24 additions and 14 deletions
--- a/1
+++ b/1
@ -26,6 +26,7 @@ Dependencies and Licenses

 Leptonica is required. (www.leptonica.com). Tesseract no longer compiles
 without Leptonica.
+Libtiff is no longer required as a direct dependency.


 Installing and Running Tesseract
--- a/ccmain/paragraphs_internal.h
+++ b/ccmain/paragraphs_internal.h
@ -277,7 +277,7 @@ void RecomputeMarginsAndClearHypotheses(
    GenericVector<RowScratchRegisters> *rows, int start, int end,
    int percentile);

-// Return the minimum inter-word space in rows[row_start, row_end).
+// Return the median inter-word space in rows[row_start, row_end).
 int InterwordSpace(const GenericVector<RowScratchRegisters> &rows,
                   int row_start, int row_end);

--- a/ccstruct/ratngs.h
+++ b/ccstruct/ratngs.h
@ -138,7 +138,16 @@ class BLOB_CHOICE: public ELIST_LINK
  UNICHAR_ID unichar_id_;          // unichar id
  inT16 fontinfo_id_;              // char font information
  inT16 fontinfo_id2_;             // 2nd choice font information
+  // Rating is the classifier distance weighted by the length of the outline
+  // in the blob. In terms of probability, classifier distance is -klog p such
+  // that the resulting distance is in the range [0, 1] and then
+  // rating = w (-k log p) where w is the weight for the length of the outline.
+  // Sums of ratings may be compared meaningfully for words of different
+  // segmentation.
  float rating_;                  // size related
+  // Certainty is a number in [-20, 0] indicating the classifier certainty
+  // of the choice. In terms of probability, certainty = 20 (k log p) where
+  // k is defined as above to normalize -klog p to the range [0, 1].
  float certainty_;               // absolute
  int script_id_;
  // Stores language model information about this BLOB_CHOICE. Used during
@ -408,7 +417,9 @@ class WERD_CHOICE {
  char *fragment_lengths_;   // number of fragments in each unichar
  int reserved_;             // size of the above arrays
  int length_;               // word length
+  // Rating is the sum of the ratings of the individual blobs in the word.
  float rating_;             // size related
+  // certainty is the min (worst) certainty of the individual blobs in the word.
  float certainty_;          // absolute
  uinT8 permuter_;           // permuter code
  bool fragment_mark_;       // if true, indicates that this choice
--- a/classify/adaptmatch.cpp
+++ b/classify/adaptmatch.cpp
@ -181,12 +181,10 @@ void Classify::AdaptiveClassifier(TBLOB *Blob,
                                  CLASS_PRUNER_RESULTS CPResults) {
  assert(Choices != NULL);
  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
+  Results->Initialize();

  if (AdaptedTemplates == NULL)
    AdaptedTemplates = NewAdaptedTemplates (true);
-
-  Results->Initialize();
-
  DoAdaptiveMatch(Blob, denorm, Results);
  if (CPResults != NULL)
    memcpy(CPResults, Results->CPResults,
@ -903,7 +901,14 @@ int Classify::AdaptableWord(TWERD *Word,
      BestChoiceLength > 0 &&
      BestChoiceLength == Word->NumBlobs() &&
      BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
+      // This basically ensures that the word is at least a dictionary match
+      // (freq word, user word, system dawg word, etc).
+      // Since all the other adjustments will make adjust factor higher
+      // than higher than adaptable_score=1.1+0.05=1.15
+      // Since these are other flags that ensure that the word is dict word,
+      // this check could be at times redundant.
      getDict().CurrentBestChoiceAdjustFactor() <= adaptable_score &&
+      // Make sure that alternative choices are not dictionary words.
      getDict().AlternativeChoicesWorseThan(adaptable_score) &&
      getDict().CurrentBestChoiceIs(BestChoiceWord);
 }
@ -2487,18 +2492,12 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {

 /*----------------------------------------------------------------------------*/
 /**
- * This routine steps thru each matching class in Results
- * and removes it from the match list if its rating
- * is worse than the BestRating plus a pad.  In other words,
- * all good matches get moved to the front of the classes
- * array.
+ * This routine discards extra digits or punctuation from the results.
+ * We keep only the top 2 punctuation answers and the top 1 digit answer if
+ * present.
 *
 * @param Results contains matches to be filtered
 *
- * Globals:
- * - matcher_bad_match_pad defines a "bad match"
- *
- * @note Exceptions: none
 * @note History: Tue Mar 12 13:51:03 1991, DSJ, Created.
 */
 void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
--- a/classify/intproto.cpp
+++ b/classify/intproto.cpp
@ -1846,7 +1846,6 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature,
 * proto from the class description and adds a rendering of
 * the proto onto the ShapeList.
 *
- * @param ShapeList   shape list to append proto rendering onto
 * @param Class   class that proto is contained in
 * @param ProtoId   id of proto to be rendered
 * @param color   color to render proto in