Reduce scope of some local variables (reported by Codacy)

Apply also some smaller optimizations and add TODO comments
for local variables which require further examination.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2024-06-11 08:58:25 +02:00
parent 560529332c
commit 96772c5761
9 changed files with 38 additions and 54 deletions

View File

@ -242,13 +242,13 @@ static void GetWordBaseline(int writing_direction, int ppi, int height, int word
double word_length;
double x, y;
{
int px = word_x1;
int py = word_y1;
double l2 = dist2(line_x1, line_y1, line_x2, line_y2);
if (l2 == 0) {
x = line_x1;
y = line_y1;
} else {
int px = word_x1;
int py = word_y1;
double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2;
x = line_x2 + t * (line_x2 - line_x1);
y = line_y2 + t * (line_y2 - line_y1);

View File

@ -258,10 +258,10 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
}
const double e = exp(1.0); // The base of natural logs.
unsigned blob_number;
int right_chop_index = 0;
if (!assume_fixed_pitch_char_segment) {
// We only chop if the language is not fixed pitch like CJK.
SEAM *seam = nullptr;
int right_chop_index = 0;
while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) {
word_res->InsertSeam(blob_number, seam);
BLOB_CHOICE *left_choice = blob_choices[blob_number];
@ -685,6 +685,7 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
void Tesseract::TidyUp(PAGE_RES *page_res) {
int ok_blob_count = 0;
int bad_blob_count = 0;
// TODO: check usage of ok_word_count.
int ok_word_count = 0;
int unlabelled_words = 0;
PAGE_RES_IT pr_it(page_res);

View File

@ -949,6 +949,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
}
real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr);
AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs);
// TODO: check code.
int non_overlapped = 0;
int non_overlapped_used = 0;
for (unsigned i = 0; i < word_wanted.size(); ++i) {
@ -1121,9 +1122,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
C_BLOB *blob,
const std::vector<C_OUTLINE *> &outlines,
int num_outlines, std::vector<bool> *ok_outlines) {
std::string best_str;
float target_cert = certainty_threshold;
if (blob != nullptr) {
std::string best_str;
float target_c2;
target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2);
if (debug_noise_removal) {
@ -1797,9 +1798,6 @@ not_a_word:
}
bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
bool show_map_detail = false;
int16_t i;
if (!test_pt) {
return false;
}
@ -1811,6 +1809,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
if (location < 0) {
return true; // For breakpoint use
}
bool show_map_detail = false;
tessedit_rejection_debug.set_value(true);
debug_x_ht_level.set_value(2);
tprintf("\n\nTESTWD::");
@ -1864,7 +1863,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
tprintf("\n");
if (show_map_detail) {
tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
word->reject_map[i].full_print(debug_fp);
}
@ -1891,13 +1890,12 @@ static void find_modal_font( // good chars in word
int16_t *font_out, // output font
int8_t *font_count // output count
) {
int16_t font; // font index
int32_t count; // pile count
if (fonts->get_total() > 0) {
font = static_cast<int16_t>(fonts->mode());
// font index
int16_t font = static_cast<int16_t>(fonts->mode());
*font_out = font;
count = fonts->pile_count(font);
// pile count
int32_t count = fonts->pile_count(font);
*font_count = count < INT8_MAX ? count : INT8_MAX;
fonts->add(font, -*font_count);
} else {

View File

@ -60,10 +60,10 @@ int16_t Tesseract::word_blob_quality(WERD_RES *word) {
}
int16_t Tesseract::word_outline_errs(WERD_RES *word) {
int16_t i = 0;
int16_t err_count = 0;
if (word->rebuild_word != nullptr) {
int16_t i = 0;
for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
TBLOB *blob = word->rebuild_word->blobs[b];
err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
@ -209,13 +209,8 @@ void Tesseract::unrej_good_quality_words( // unreject potential
void Tesseract::doc_and_block_rejection( // reject big chunks
PAGE_RES_IT &page_res_it, bool good_quality_doc) {
int16_t block_no = 0;
int16_t row_no = 0;
BLOCK_RES *current_block;
ROW_RES *current_row;
bool rej_word;
bool prev_word_rejected;
int16_t char_quality = 0;
int16_t accepted_char_quality;
@ -238,7 +233,7 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
WERD_RES *word;
while ((word = page_res_it.word()) != nullptr) {
current_block = page_res_it.block();
block_no = current_block->block->pdblk.index();
int16_t block_no = current_block->block->pdblk.index();
if (current_block->char_count > 0 &&
(current_block->rej_count * 100.0 / current_block->char_count) >
tessedit_reject_block_percent) {
@ -246,8 +241,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", block_no,
current_block->char_count, current_block->rej_count);
}
prev_word_rejected = false;
bool prev_word_rejected = false;
while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) {
bool rej_word;
if (tessedit_preserve_blk_rej_perfect_wds) {
rej_word = word->reject_map.reject_count() > 0 ||
word->reject_map.length() < tessedit_preserve_min_wd_len;
@ -284,9 +280,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
}
/* Walk rows in block testing for row rejection */
row_no = 0;
int16_t row_no = 0;
while (page_res_it.word() != nullptr && page_res_it.block() == current_block) {
current_row = page_res_it.row();
ROW_RES *current_row = page_res_it.row();
row_no++;
/* Reject whole row if:
fraction of chars on row which are rejected exceed a limit AND
@ -302,9 +298,10 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", row_no,
current_row->char_count, current_row->rej_count);
}
prev_word_rejected = false;
bool prev_word_rejected = false;
while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) {
/* Preserve words on good docs unless they are mostly rejected*/
bool rej_word;
if (!tessedit_row_rej_good_docs && good_quality_doc) {
rej_word = word->reject_map.reject_count() /
static_cast<float>(word->reject_map.length()) >
@ -448,8 +445,6 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
}
bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) {
float rating_per_ch;
int adjusted_len;
int crunch_mode = 0;
if (word->best_choice->unichar_string().empty() ||
@ -457,11 +452,11 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
word->best_choice->unichar_string().size())) {
crunch_mode = 1;
} else {
adjusted_len = word->reject_map.length();
int adjusted_len = word->reject_map.length();
if (adjusted_len > crunch_rating_max) {
adjusted_len = crunch_rating_max;
}
rating_per_ch = word->best_choice->rating() / adjusted_len;
float rating_per_ch = word->best_choice->rating() / adjusted_len;
if (rating_per_ch > crunch_terrible_rating) {
crunch_mode = 2;
@ -528,7 +523,6 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
}
void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
WERD_RES *word;
PAGE_RES_IT copy_it;
bool deleting_from_bol = false;
bool marked_delete_point = false;
@ -539,7 +533,7 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
page_res_it.restart_page();
while (page_res_it.word() != nullptr) {
word = page_res_it.word();
WERD_RES *word = page_res_it.word();
delete_mode = word_deletable(word, debug_delete_mode);
if (delete_mode != CR_NONE) {

View File

@ -171,7 +171,6 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
int16_t best_score;
WERD_RES_LIST current_perm;
int16_t current_score;
bool improved = false;
best_score = eval_word_spacing(best_perm); // default score
@ -183,7 +182,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
match_current_words(current_perm, row, block);
current_score = eval_word_spacing(current_perm);
int16_t current_score = eval_word_spacing(current_perm);
dump_words(current_perm, current_score, 2, improved);
if (current_score > best_score) {
best_perm.clear();
@ -201,11 +200,10 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
WERD_RES_IT src_it(&src_list);
WERD_RES_IT new_it(&new_list);
WERD_RES *src_wd;
WERD_RES *new_wd;
for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
src_wd = src_it.data();
WERD_RES *src_wd = src_it.data();
if (!src_wd->combination) {
new_wd = WERD_RES::deep_copy(src_wd);
new_wd->combination = false;
@ -393,8 +391,6 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
WERD_RES_IT prev_word_it(&words);
WERD_RES *word;
WERD_RES *prev_word;
WERD_RES *combo;
WERD *copy_word;
int16_t prev_right = -INT16_MAX;
TBOX box;
int16_t gap;
@ -425,12 +421,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
gap = box.left() - prev_right;
if (gap <= min_gap) {
prev_word = prev_word_it.data();
WERD_RES *combo;
if (prev_word->combination) {
combo = prev_word;
} else {
/* Make a new combination and insert before
* the first word being joined. */
copy_word = new WERD;
auto *copy_word = new WERD;
*copy_word = *(prev_word->word);
// deep copy
combo = new WERD_RES(copy_word);
@ -546,7 +543,6 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
WERD_RES *word_res;
WERD_RES_LIST sub_word_list;
WERD_RES_IT sub_word_list_it(&sub_word_list);
int16_t blob_index;
int16_t new_length;
float junk;
@ -556,7 +552,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
return;
}
blob_index = worst_noise_blob(word_res, &junk);
auto blob_index = worst_noise_blob(word_res, &junk);
if (blob_index < 0) {
return;
}
@ -623,7 +619,6 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
WERD_RES_IT worst_word_it;
float worst_noise_score = 9999;
int worst_blob_index = -1; // Noisiest blob of noisiest wd
int blob_index; // of wds noisiest blob
float noise_score; // of wds noisiest blob
WERD_RES *word_res;
C_BLOB_IT blob_it;
@ -636,7 +631,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
int16_t i;
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
blob_index = worst_noise_blob(word_it.data(), &noise_score);
auto blob_index = worst_noise_blob(word_it.data(), &noise_score);
if (blob_index > -1 && worst_noise_score > noise_score) {
worst_noise_score = noise_score;
worst_blob_index = blob_index;
@ -806,7 +801,6 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
void fixspace_dbg(WERD_RES *word) {
TBOX box = word->word->bounding_box();
const bool show_map_detail = false;
int16_t i;
box.print();
tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
@ -816,7 +810,7 @@ void fixspace_dbg(WERD_RES *word) {
tprintf("\n");
if (show_map_detail) {
tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
word->reject_map[i].full_print(debug_fp);
}

View File

@ -101,11 +101,11 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
bool force_eol) { // override tilde crunch?
WERD_RES *word = page_res_it.word();
const UNICHARSET &uchset = *word->uch_set;
bool need_reject = false;
UNICHAR_ID space = uchset.unichar_to_id(" ");
if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) &&
!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
bool need_reject = false;
if ((word->unlv_crunch_mode != CR_DELETE) &&
(!stats_.tilde_crunch_written ||
((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) &&

View File

@ -2407,8 +2407,8 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
// Set up text, lword_text, and rword_text (mostly for debug printing).
std::string fake_text;
PageIterator pit(static_cast<const PageIterator &>(it));
bool first_word = true;
if (!pit.Empty(RIL_WORD)) {
bool first_word = true;
do {
fake_text += "x";
if (first_word) {

View File

@ -703,9 +703,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
WERD_RES *word_res = pr_it->word();
WERD *word = word_res->word;
TBOX word_bb; // word bounding box
int word_height; // ht of word BB
bool displayed_something = false;
float shift; // from bot left
if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) {
# ifndef DISABLED_LEGACY_ENGINE
@ -842,13 +840,14 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
if (text.length() > 0) {
word_bb = word->bounding_box();
image_win->Pen(ScrollView::RED);
word_height = word_bb.height();
int text_height = 0.50 * word_height;
auto word_height = word_bb.height();
int text_height = word_height / 2;
if (text_height > 20) {
text_height = 20;
}
image_win->TextAttributes("Arial", text_height, false, false, false);
shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
// from bot left
float shift = (word_height < word_bb.width()) ? 0.25f * word_height : 0.0f;
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
if (blame.length() > 0) {
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height,

View File

@ -293,8 +293,6 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
int16_t i;
int16_t offset;
bool non_conflict_set_char; // non conf set a/n?
bool conflict = false;
bool allow_1s;
ACCEPTABLE_WERD_TYPE word_type;
bool dict_perm_type;
bool dict_word_ok;
@ -411,11 +409,11 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
Else reject all conflict chs
*/
if (word_contains_non_1_digit(word, lengths)) {
allow_1s =
bool allow_1s =
(alpha_count(word, lengths) == 0) || (word_res->best_choice->permuter() == NUMBER_PERM);
int16_t offset;
conflict = false;
bool conflict = false;
for (i = 0, offset = 0; word[offset] != '\0';
offset += word_res->best_choice->unichar_lengths()[i++]) {
if ((!allow_1s || (word[offset] != '1')) &&