Reduce scope of some local variables (reported by Codacy)

Apply also some smaller optimizations and add TODO comments for local variables which require further examination. Signed-off-by: Stefan Weil <sw@weilnetz.de>
2024-11-23 18:49:08 +08:00 · 2024-06-11 08:58:25 +02:00 · 2024-06-11 08:58:25 +02:00 · 96772c5761
commit 96772c5761
parent 560529332c
9 changed files with 38 additions and 54 deletions
--- a/src/api/pdfrenderer.cpp
+++ b/src/api/pdfrenderer.cpp
@ -242,13 +242,13 @@ static void GetWordBaseline(int writing_direction, int ppi, int height, int word
  double word_length;
  double x, y;
  {
-    int px = word_x1;
-    int py = word_y1;
    double l2 = dist2(line_x1, line_y1, line_x2, line_y2);
    if (l2 == 0) {
      x = line_x1;
      y = line_y1;
    } else {
+      int px = word_x1;
+      int py = word_y1;
      double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2;
      x = line_x2 + t * (line_x2 - line_x1);
      y = line_y2 + t * (line_y2 - line_y1);
--- a/src/ccmain/applybox.cpp
+++ b/src/ccmain/applybox.cpp
@ -258,10 +258,10 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
  }
  const double e = exp(1.0); // The base of natural logs.
  unsigned blob_number;
-  int right_chop_index = 0;
  if (!assume_fixed_pitch_char_segment) {
    // We only chop if the language is not fixed pitch like CJK.
    SEAM *seam = nullptr;
+    int right_chop_index = 0;
    while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) {
      word_res->InsertSeam(blob_number, seam);
      BLOB_CHOICE *left_choice = blob_choices[blob_number];
@ -685,6 +685,7 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
 void Tesseract::TidyUp(PAGE_RES *page_res) {
  int ok_blob_count = 0;
  int bad_blob_count = 0;
+  // TODO: check usage of ok_word_count.
  int ok_word_count = 0;
  int unlabelled_words = 0;
  PAGE_RES_IT pr_it(page_res);
--- a/src/ccmain/control.cpp
+++ b/src/ccmain/control.cpp
@ -949,6 +949,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
  }
  real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr);
  AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs);
+  // TODO: check code.
  int non_overlapped = 0;
  int non_overlapped_used = 0;
  for (unsigned i = 0; i < word_wanted.size(); ++i) {
@ -1121,9 +1122,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
                                            C_BLOB *blob,
                                            const std::vector<C_OUTLINE *> &outlines,
                                            int num_outlines, std::vector<bool> *ok_outlines) {
-  std::string best_str;
  float target_cert = certainty_threshold;
  if (blob != nullptr) {
+    std::string best_str;
    float target_c2;
    target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2);
    if (debug_noise_removal) {
@ -1797,9 +1798,6 @@ not_a_word:
 }

 bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
-  bool show_map_detail = false;
-  int16_t i;
-
  if (!test_pt) {
    return false;
  }
@ -1811,6 +1809,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
    if (location < 0) {
      return true; // For breakpoint use
    }
+    bool show_map_detail = false;
    tessedit_rejection_debug.set_value(true);
    debug_x_ht_level.set_value(2);
    tprintf("\n\nTESTWD::");
@ -1864,7 +1863,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
      tprintf("\n");
      if (show_map_detail) {
        tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
-        for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
+        for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
          tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
          word->reject_map[i].full_print(debug_fp);
        }
@ -1891,13 +1890,12 @@ static void find_modal_font( // good chars in word
    int16_t *font_out,       // output font
    int8_t *font_count       // output count
 ) {
-  int16_t font;  // font index
-  int32_t count; // pile count
-
  if (fonts->get_total() > 0) {
-    font = static_cast<int16_t>(fonts->mode());
+    // font index
+    int16_t font = static_cast<int16_t>(fonts->mode());
    *font_out = font;
-    count = fonts->pile_count(font);
+    // pile count
+    int32_t count = fonts->pile_count(font);
    *font_count = count < INT8_MAX ? count : INT8_MAX;
    fonts->add(font, -*font_count);
  } else {
--- a/src/ccmain/docqual.cpp
+++ b/src/ccmain/docqual.cpp
@ -60,10 +60,10 @@ int16_t Tesseract::word_blob_quality(WERD_RES *word) {
 }

 int16_t Tesseract::word_outline_errs(WERD_RES *word) {
-  int16_t i = 0;
  int16_t err_count = 0;

  if (word->rebuild_word != nullptr) {
+    int16_t i = 0;
    for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
      TBLOB *blob = word->rebuild_word->blobs[b];
      err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
@ -209,13 +209,8 @@ void Tesseract::unrej_good_quality_words( // unreject potential

 void Tesseract::doc_and_block_rejection( // reject big chunks
    PAGE_RES_IT &page_res_it, bool good_quality_doc) {
-  int16_t block_no = 0;
-  int16_t row_no = 0;
  BLOCK_RES *current_block;
-  ROW_RES *current_row;

-  bool rej_word;
-  bool prev_word_rejected;
  int16_t char_quality = 0;
  int16_t accepted_char_quality;

@ -238,7 +233,7 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
    WERD_RES *word;
    while ((word = page_res_it.word()) != nullptr) {
      current_block = page_res_it.block();
-      block_no = current_block->block->pdblk.index();
+      int16_t block_no = current_block->block->pdblk.index();
      if (current_block->char_count > 0 &&
          (current_block->rej_count * 100.0 / current_block->char_count) >
              tessedit_reject_block_percent) {
@ -246,8 +241,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
          tprintf("REJECTING BLOCK %d  #chars: %d;  #Rejects: %d\n", block_no,
                  current_block->char_count, current_block->rej_count);
        }
-        prev_word_rejected = false;
+        bool prev_word_rejected = false;
        while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) {
+          bool rej_word;
          if (tessedit_preserve_blk_rej_perfect_wds) {
            rej_word = word->reject_map.reject_count() > 0 ||
                       word->reject_map.length() < tessedit_preserve_min_wd_len;
@ -284,9 +280,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
        }

        /* Walk rows in block testing for row rejection */
-        row_no = 0;
+        int16_t row_no = 0;
        while (page_res_it.word() != nullptr && page_res_it.block() == current_block) {
-          current_row = page_res_it.row();
+          ROW_RES *current_row = page_res_it.row();
          row_no++;
          /* Reject whole row if:
  fraction of chars on row which are rejected exceed a limit AND
@ -302,9 +298,10 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
              tprintf("REJECTING ROW %d  #chars: %d;  #Rejects: %d\n", row_no,
                      current_row->char_count, current_row->rej_count);
            }
-            prev_word_rejected = false;
+            bool prev_word_rejected = false;
            while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) {
              /* Preserve words on good docs unless they are mostly rejected*/
+              bool rej_word;
              if (!tessedit_row_rej_good_docs && good_quality_doc) {
                rej_word = word->reject_map.reject_count() /
                               static_cast<float>(word->reject_map.length()) >
@ -448,8 +445,6 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
 }

 bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) {
-  float rating_per_ch;
-  int adjusted_len;
  int crunch_mode = 0;

  if (word->best_choice->unichar_string().empty() ||
@ -457,11 +452,11 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
       word->best_choice->unichar_string().size())) {
    crunch_mode = 1;
  } else {
-    adjusted_len = word->reject_map.length();
+    int adjusted_len = word->reject_map.length();
    if (adjusted_len > crunch_rating_max) {
      adjusted_len = crunch_rating_max;
    }
-    rating_per_ch = word->best_choice->rating() / adjusted_len;
+    float rating_per_ch = word->best_choice->rating() / adjusted_len;

    if (rating_per_ch > crunch_terrible_rating) {
      crunch_mode = 2;
@ -528,7 +523,6 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
 }

 void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
-  WERD_RES *word;
  PAGE_RES_IT copy_it;
  bool deleting_from_bol = false;
  bool marked_delete_point = false;
@ -539,7 +533,7 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {

  page_res_it.restart_page();
  while (page_res_it.word() != nullptr) {
-    word = page_res_it.word();
+    WERD_RES *word = page_res_it.word();

    delete_mode = word_deletable(word, debug_delete_mode);
    if (delete_mode != CR_NONE) {
--- a/src/ccmain/fixspace.cpp
+++ b/src/ccmain/fixspace.cpp
@ -171,7 +171,6 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
 void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
  int16_t best_score;
  WERD_RES_LIST current_perm;
-  int16_t current_score;
  bool improved = false;

  best_score = eval_word_spacing(best_perm); // default score
@ -183,7 +182,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *

  while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
    match_current_words(current_perm, row, block);
-    current_score = eval_word_spacing(current_perm);
+    int16_t current_score = eval_word_spacing(current_perm);
    dump_words(current_perm, current_score, 2, improved);
    if (current_score > best_score) {
      best_perm.clear();
@ -201,11 +200,10 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
 void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
  WERD_RES_IT src_it(&src_list);
  WERD_RES_IT new_it(&new_list);
-  WERD_RES *src_wd;
  WERD_RES *new_wd;

  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
-    src_wd = src_it.data();
+    WERD_RES *src_wd = src_it.data();
    if (!src_wd->combination) {
      new_wd = WERD_RES::deep_copy(src_wd);
      new_wd->combination = false;
@ -393,8 +391,6 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
  WERD_RES_IT prev_word_it(&words);
  WERD_RES *word;
  WERD_RES *prev_word;
-  WERD_RES *combo;
-  WERD *copy_word;
  int16_t prev_right = -INT16_MAX;
  TBOX box;
  int16_t gap;
@ -425,12 +421,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
          gap = box.left() - prev_right;
          if (gap <= min_gap) {
            prev_word = prev_word_it.data();
+            WERD_RES *combo;
            if (prev_word->combination) {
              combo = prev_word;
            } else {
              /* Make a new combination and insert before
               * the first word being joined. */
-              copy_word = new WERD;
+              auto *copy_word = new WERD;
              *copy_word = *(prev_word->word);
              // deep copy
              combo = new WERD_RES(copy_word);
@ -546,7 +543,6 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
  WERD_RES *word_res;
  WERD_RES_LIST sub_word_list;
  WERD_RES_IT sub_word_list_it(&sub_word_list);
-  int16_t blob_index;
  int16_t new_length;
  float junk;

@ -556,7 +552,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
    return;
  }

-  blob_index = worst_noise_blob(word_res, &junk);
+  auto blob_index = worst_noise_blob(word_res, &junk);
  if (blob_index < 0) {
    return;
  }
@ -623,7 +619,6 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
  WERD_RES_IT worst_word_it;
  float worst_noise_score = 9999;
  int worst_blob_index = -1; // Noisiest blob of noisiest wd
-  int blob_index;            // of wds noisiest blob
  float noise_score;         // of wds noisiest blob
  WERD_RES *word_res;
  C_BLOB_IT blob_it;
@ -636,7 +631,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
  int16_t i;

  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
-    blob_index = worst_noise_blob(word_it.data(), &noise_score);
+    auto blob_index = worst_noise_blob(word_it.data(), &noise_score);
    if (blob_index > -1 && worst_noise_score > noise_score) {
      worst_noise_score = noise_score;
      worst_blob_index = blob_index;
@ -806,7 +801,6 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
 void fixspace_dbg(WERD_RES *word) {
  TBOX box = word->word->bounding_box();
  const bool show_map_detail = false;
-  int16_t i;

  box.print();
  tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
@ -816,7 +810,7 @@ void fixspace_dbg(WERD_RES *word) {
  tprintf("\n");
  if (show_map_detail) {
    tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
-    for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
+    for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
      tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
      word->reject_map[i].full_print(debug_fp);
    }
--- a/src/ccmain/output.cpp
+++ b/src/ccmain/output.cpp
@ -101,11 +101,11 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
                              bool force_eol) {  // override tilde crunch?
  WERD_RES *word = page_res_it.word();
  const UNICHARSET &uchset = *word->uch_set;
-  bool need_reject = false;
  UNICHAR_ID space = uchset.unichar_to_id(" ");

  if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) &&
      !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
+    bool need_reject = false;
    if ((word->unlv_crunch_mode != CR_DELETE) &&
        (!stats_.tilde_crunch_written ||
         ((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) &&
--- a/src/ccmain/paragraphs.cpp
+++ b/src/ccmain/paragraphs.cpp
@ -2407,8 +2407,8 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
  // Set up text, lword_text, and rword_text (mostly for debug printing).
  std::string fake_text;
  PageIterator pit(static_cast<const PageIterator &>(it));
-  bool first_word = true;
  if (!pit.Empty(RIL_WORD)) {
+    bool first_word = true;
    do {
      fake_text += "x";
      if (first_word) {
--- a/src/ccmain/pgedit.cpp
+++ b/src/ccmain/pgedit.cpp
@ -703,9 +703,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
  WERD_RES *word_res = pr_it->word();
  WERD *word = word_res->word;
  TBOX word_bb;    // word bounding box
-  int word_height; // ht of word BB
  bool displayed_something = false;
-  float shift; // from bot left

  if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) {
 #  ifndef DISABLED_LEGACY_ENGINE
@ -842,13 +840,14 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
  if (text.length() > 0) {
    word_bb = word->bounding_box();
    image_win->Pen(ScrollView::RED);
-    word_height = word_bb.height();
-    int text_height = 0.50 * word_height;
+    auto word_height = word_bb.height();
+    int text_height = word_height / 2;
    if (text_height > 20) {
      text_height = 20;
    }
    image_win->TextAttributes("Arial", text_height, false, false, false);
-    shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
+    // from bot left
+    float shift = (word_height < word_bb.width()) ? 0.25f * word_height : 0.0f;
    image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
    if (blame.length() > 0) {
      image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height,
--- a/src/ccmain/reject.cpp
+++ b/src/ccmain/reject.cpp
@ -293,8 +293,6 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
  int16_t i;
  int16_t offset;
  bool non_conflict_set_char; // non conf set a/n?
-  bool conflict = false;
-  bool allow_1s;
  ACCEPTABLE_WERD_TYPE word_type;
  bool dict_perm_type;
  bool dict_word_ok;
@ -411,11 +409,11 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
    Else reject all conflict chs
 */
  if (word_contains_non_1_digit(word, lengths)) {
-    allow_1s =
+    bool allow_1s =
        (alpha_count(word, lengths) == 0) || (word_res->best_choice->permuter() == NUMBER_PERM);

    int16_t offset;
-    conflict = false;
+    bool conflict = false;
    for (i = 0, offset = 0; word[offset] != '\0';
         offset += word_res->best_choice->unichar_lengths()[i++]) {
      if ((!allow_1s || (word[offset] != '1')) &&