Fix some typos (most found by codespell)

Signed-off-by: Stefan Weil <sw@weilnetz.de>
2025-08-06 13:56:47 +08:00 · 2021-04-11 11:03:04 +02:00 · 2021-04-11 11:03:04 +02:00 · 0401b9470c
commit 0401b9470c
parent 14505484c1
22 changed files with 26 additions and 26 deletions
--- a/include/tesseract/baseapi.h
+++ b/include/tesseract/baseapi.h
@ -498,7 +498,7 @@ public:
   * metadata used by side-effect processes, such as reading a box
   * file or formatting as hOCR.
   *
-   * See ProcessPages for desciptions of other parameters.
+   * See ProcessPages for descriptions of other parameters.
   */
  bool ProcessPage(Pix *pix, int page_index, const char *filename,
                   const char *retry_config, int timeout_millisec,
--- a/java/com/google/scrollview/ScrollView.java
+++ b/java/com/google/scrollview/ScrollView.java
@ -85,7 +85,7 @@ public class ScrollView {
  }

  /**
-   * The main program loop. Basically loops trough receiving messages and
+   * The main program loop. Basically loops through receiving messages and
   * processing them and then sending messages (if there are any).
   */
  private static void IOLoop() {
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
@ -235,7 +235,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
        continue;
      }
    }
-    // Sync pr_it with the wth WordData.
+    // Sync pr_it with the WordData.
    while (pr_it->word() != nullptr && pr_it->word() != word->word) {
      pr_it->forward();
    }
--- a/src/ccmain/fixspace.cpp
+++ b/src/ccmain/fixspace.cpp
@ -25,7 +25,7 @@
 #include "errcode.h"        // for ASSERT_HOST
 #include "normalis.h"       // for kBlnXHeight, kBlnBaselineOffset
 #include "pageres.h"        // for WERD_RES_IT, WERD_RES, WERD_RES_LIST
-#include "params.h"         // for IntParam, StringParam, BoolParam, Doub...
+#include "params.h"         // for IntParam, StringParam, BoolParam, DoubleParam, ...
 #include "ratngs.h"         // for WERD_CHOICE, FREQ_DAWG_PERM, NUMBER_PERM
 #include "rect.h"           // for TBOX
 #include "stepblob.h"       // for C_BLOB_IT, C_BLOB_LIST, C_BLOB
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@ -256,7 +256,7 @@ Tesseract::Tesseract()
    , INT_MEMBER(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?", this->params())
    , double_MEMBER(fixsp_small_outlines_size, 0.28, "Small if lt xht x this", this->params())
    , BOOL_MEMBER(tessedit_prefer_joined_punct, false, "Reward punctuation joins", this->params())
-    , INT_MEMBER(fixsp_done_mode, 1, "What constitues done for spacing", this->params())
+    , INT_MEMBER(fixsp_done_mode, 1, "What constitutes done for spacing", this->params())
    , INT_MEMBER(debug_fix_space_level, 0, "Contextual fixspace debug", this->params())
    , STRING_MEMBER(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers", this->params())
    , INT_MEMBER(x_ht_acceptance_tolerance, 8,
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@ -902,7 +902,7 @@ public:
  INT_VAR_H(fixsp_non_noise_limit, 1, "How many non-noise blbs either side?");
  double_VAR_H(fixsp_small_outlines_size, 0.28, "Small if lt xht x this");
  BOOL_VAR_H(tessedit_prefer_joined_punct, false, "Reward punctuation joins");
-  INT_VAR_H(fixsp_done_mode, 1, "What constitues done for spacing");
+  INT_VAR_H(fixsp_done_mode, 1, "What constitutes done for spacing");
  INT_VAR_H(debug_fix_space_level, 0, "Contextual fixspace debug");
  STRING_VAR_H(numeric_punctuation, ".,", "Punct. chs expected WITHIN numbers");
  INT_VAR_H(x_ht_acceptance_tolerance, 8, "Max allowed deviation of blob top outside of font data");
--- a/src/ccstruct/blobbox.h
+++ b/src/ccstruct/blobbox.h
@ -93,7 +93,7 @@ enum BlobSpecialTextType {
  BSTT_NONE,    // No special.
  BSTT_ITALIC,  // Italic style.
  BSTT_DIGIT,   // Digit symbols.
-  BSTT_MATH,    // Mathmatical symobls (not including digit).
+  BSTT_MATH,    // Mathematical symbols (not including digit).
  BSTT_UNCLEAR, // Characters with low recognition rate.
  BSTT_SKIP,    // Characters that we skip labeling (usually too small).
  BSTT_COUNT
--- a/src/ccstruct/pageres.h
+++ b/src/ccstruct/pageres.h
@ -217,7 +217,7 @@ public:
  std::vector<std::vector<std::pair<const char *, float>>> timesteps;
  // Stores the lstm choices of every timestep segmented by character
  std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
-  // Symbolchoices aquired during CTC
+  // Symbolchoices acquired during CTC
  std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
  // Stores if the timestep vector starts with a space
  bool leading_space = false;
--- a/src/ccstruct/polyaprx.cpp
+++ b/src/ccstruct/polyaprx.cpp
@ -296,7 +296,7 @@ void fix2(         // polygonal approx
    /*single fixed step */
    if (edgept->flags[FLAGS] & FIXED &&
        edgept->flags[RUNLENGTH] == 1
-        /*and neighours free */
+        /*and neighbours free */
        && edgept->next->flags[FLAGS] & FIXED &&
        (edgept->prev->flags[FLAGS] & FIXED) == 0
        /*same pair of dirs */
--- a/src/ccutil/ambigs.cpp
+++ b/src/ccutil/ambigs.cpp
@ -333,7 +333,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
    return false;
  }
  if (version > 0) {
-    // The next field being true indicates that the abiguity should
+    // The next field being true indicates that the ambiguity should
    // always be substituted (e.g. '' should always be changed to ").
    // For such "certain" n -> m ambigs tesseract will insert character
    // fragments for the n pieces in the unicharset. AmbigsFound()
--- a/src/classify/adaptmatch.cpp
+++ b/src/classify/adaptmatch.cpp
@ -190,7 +190,7 @@ void SetAdaptiveThreshold(float Threshold);
 * @param Blob    blob to be classified
 * @param[out] Choices    List of choices found by adaptive matcher.
 * filled on return with the choices found by the
- * class pruner and the ratings therefrom. Also
+ * class pruner and the ratings there from. Also
 * contains the detailed results of the integer matcher.
 *
 */
--- a/src/dict/dict.h
+++ b/src/dict/dict.h
@ -322,7 +322,7 @@ public:
   * initialized to NO_EDGE.  Since the punctuation dawg includes the empty
   * pattern " " (meaning anything without surrounding punctuation), having a
   * single entry for the punctuation dawg will cover all dawgs reachable
-   * therefrom -- that includes all number and word dawgs. The only dawg
+   * there from -- that includes all number and word dawgs. The only dawg
   * non-reachable from the punctuation_dawg is the pattern dawg.
   * If hyphen state needs to be applied, initial dawg_args->active_dawgs can
   * be copied from the saved hyphen state (maintained by Dict).
--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@ -335,7 +335,7 @@ private:
                              int xCoord);

  // Calculates more accurate character boundaries which can be used to
-  // provide more acurate alternative symbol choices.
+  // provide more accurate alternative symbol choices.
  static void calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
                                      std::vector<int> *character_boundaries_, int maxWidth);

--- a/src/textord/tablerecog.cpp
+++ b/src/textord/tablerecog.cpp
@ -733,7 +733,7 @@ int StructuredTable::CountHorizontalIntersections(int y) {
 }

 // Counts how many text partitions are in this box.
-// This is used to count partitons in cells, as that can indicate
+// This is used to count partitions in cells, as that can indicate
 // how "strong" a potential table row/column (or even full table) actually is.
 int StructuredTable::CountPartitions(const TBOX &box) {
  ColPartitionGridSearch gsearch(text_grid_);
--- a/src/textord/tabvector.cpp
+++ b/src/textord/tabvector.cpp
@ -776,7 +776,7 @@ void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) {
 }

 // (Re)Fit a line to the stored points. Returns false if the line
-// is degenerate. Althougth the TabVector code mostly doesn't care about the
+// is degenerate. Although the TabVector code mostly doesn't care about the
 // direction of lines, XAtY would give silly results for a horizontal line.
 // The class is mostly aimed at use for vertical lines representing
 // horizontal tab stops.
--- a/src/textord/tabvector.h
+++ b/src/textord/tabvector.h
@ -363,7 +363,7 @@ public:
  void Evaluate(const ICOORD &vertical, TabFind *finder);

  // (Re)Fit a line to the stored points. Returns false if the line
-  // is degenerate. Althougth the TabVector code mostly doesn't care about the
+  // is degenerate. Although the TabVector code mostly doesn't care about the
  // direction of lines, XAtY would give silly results for a horizontal line.
  // The class is mostly aimed at use for vertical lines representing
  // horizontal tab stops.
--- a/src/textord/textlineprojection.cpp
+++ b/src/textord/textlineprojection.cpp
@ -35,7 +35,7 @@ const int kWrongWayPenalty = 4;
 // Ratio between parallel gap and perpendicular gap used to measure total
 // distance of a box from a target box in curved textline space.
 // parallel-gap is treated more favorably by this factor to allow catching
-// quotes and elipsis at the end of textlines.
+// quotes and ellipsis at the end of textlines.
 const int kParaPerpDistRatio = 4;
 // Multiple of scale_factor_ that the inter-line gap must be before we start
 // padding the increment box perpendicular to the text line.
--- a/src/textord/tordmain.cpp
+++ b/src/textord/tordmain.cpp
@ -502,14 +502,14 @@ bool Textord::clean_noise_from_row( // remove empties
          blob_box = outline->bounding_box();
          blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
          if (blob_size < textord_noise_sizelimit * row->x_height()) {
-            dot_count++; // count smal outlines
+            dot_count++; // count small outlines
          }
          if (!outline->child()->empty() &&
              blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
              blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
              blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
              blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
-            super_norm_count++; // count smal outlines
+            super_norm_count++; // count small outlines
          }
        }
      } else {
@ -598,14 +598,14 @@ void Textord::clean_noise_from_words( // remove empties
          blob_box = outline->bounding_box();
          blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
          if (blob_size < textord_noise_sizelimit * row->x_height()) {
-            dot_count++; // count smal outlines
+            dot_count++; // count small outlines
          }
          if (!outline->child()->empty() &&
              blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
              blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
              blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
              blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
-            norm_count++; // count smal outlines
+            norm_count++; // count small outlines
          }
        }
      } else {
--- a/src/textord/tospace.cpp
+++ b/src/textord/tospace.cpp
@ -64,7 +64,7 @@ void Textord::to_spacing(ICOORD page_tr,       // topright of page
                        block_non_space_gap_width);
    // Make sure relative values of block-level space and non-space gap
    // widths are reasonable. The ratio of 1:3 is also used in
-    // block_spacing_stats, to corrrect the block_space_gap_width
+    // block_spacing_stats, to correct the block_space_gap_width
    // Useful for arabic and hindi, when the non-space gap width is
    // often over-estimated and should not be trusted. A similar ratio
    // is found in block_spacing_stats.
@ -1695,7 +1695,7 @@ TBOX Textord::reduced_box_next(TO_ROW *row,    // current row
 * the xheight.
 *
 *
- * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
+ * !!!!!!! WON'T WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
 *         "home".  Perhaps we need something which say if the width ABOVE the
 *         xht alone includes the whole of the reduced width, then use the full
 *         blob box - Might still fail on italic F
--- a/src/training/degradeimage.cpp
+++ b/src/training/degradeimage.cpp
@ -78,7 +78,7 @@ const int kMinRampSize = 1000;
 // With no dilation, after covolution, the images are so light that a heavy
 // constant offset is required to make the 0 image look reasonable. A simple
 // constant offset multiple of exposure to undo this value is enough to achieve
-// all the required lightening. This gives the advantage that exposure level 1
+// all the required lighting. This gives the advantage that exposure level 1
 // with a single dilation gives a good impression of the broken-yet-too-dark
 // problem that is often seen in scans.
 // A small random rotation gives some varying greyscale values on the edges,
--- a/unittest/README.md
+++ b/unittest/README.md
@ -69,7 +69,7 @@

 ### Fonts

-* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [instalation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/)
+* Microsoft fonts: arialbi.ttf, times.ttf, verdana.ttf - [installation guide](https://www.makeuseof.com/tag/how-to-install-microsoft-core-fonts-in-ubuntu-linux/)
 * [ae_Arab.ttf](https://www.wfonts.com/download/data/2014/12/03/ae-arab/ae-arab.zip)
 * dejavu-fonts: [DejaVuSans-ExtraLight.ttf](https://dejavu-fonts.github.io/Download.html)
 * [Lohit-Hindi.ttf](https://raw.githubusercontent.com/pratul/packageofpractices/master/assets/fonts/Lohit-Hindi.ttf)
--- a/unittest/paragraphs_test.cc
+++ b/unittest/paragraphs_test.cc
@ -182,7 +182,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
      }
      dbg_lines.push_back(absl::StrCat(correct[i].ascii, annotation));
    }
-    LOG(INFO) << "Discrepency!\n" << absl::StrJoin(dbg_lines, "\n");
+    LOG(INFO) << "Discrepancy!\n" << absl::StrJoin(dbg_lines, "\n");
  }
 }