mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
Reduce scope of some local variables (reported by Codacy)
Apply also some smaller optimizations and add TODO comments for local variables which require further examination. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
560529332c
commit
96772c5761
@ -242,13 +242,13 @@ static void GetWordBaseline(int writing_direction, int ppi, int height, int word
|
||||
double word_length;
|
||||
double x, y;
|
||||
{
|
||||
int px = word_x1;
|
||||
int py = word_y1;
|
||||
double l2 = dist2(line_x1, line_y1, line_x2, line_y2);
|
||||
if (l2 == 0) {
|
||||
x = line_x1;
|
||||
y = line_y1;
|
||||
} else {
|
||||
int px = word_x1;
|
||||
int py = word_y1;
|
||||
double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2;
|
||||
x = line_x2 + t * (line_x2 - line_x1);
|
||||
y = line_y2 + t * (line_y2 - line_y1);
|
||||
|
@ -258,10 +258,10 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
|
||||
}
|
||||
const double e = exp(1.0); // The base of natural logs.
|
||||
unsigned blob_number;
|
||||
int right_chop_index = 0;
|
||||
if (!assume_fixed_pitch_char_segment) {
|
||||
// We only chop if the language is not fixed pitch like CJK.
|
||||
SEAM *seam = nullptr;
|
||||
int right_chop_index = 0;
|
||||
while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) {
|
||||
word_res->InsertSeam(blob_number, seam);
|
||||
BLOB_CHOICE *left_choice = blob_choices[blob_number];
|
||||
@ -685,6 +685,7 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
|
||||
void Tesseract::TidyUp(PAGE_RES *page_res) {
|
||||
int ok_blob_count = 0;
|
||||
int bad_blob_count = 0;
|
||||
// TODO: check usage of ok_word_count.
|
||||
int ok_word_count = 0;
|
||||
int unlabelled_words = 0;
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
|
@ -949,6 +949,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
|
||||
}
|
||||
real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr);
|
||||
AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs);
|
||||
// TODO: check code.
|
||||
int non_overlapped = 0;
|
||||
int non_overlapped_used = 0;
|
||||
for (unsigned i = 0; i < word_wanted.size(); ++i) {
|
||||
@ -1121,9 +1122,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
|
||||
C_BLOB *blob,
|
||||
const std::vector<C_OUTLINE *> &outlines,
|
||||
int num_outlines, std::vector<bool> *ok_outlines) {
|
||||
std::string best_str;
|
||||
float target_cert = certainty_threshold;
|
||||
if (blob != nullptr) {
|
||||
std::string best_str;
|
||||
float target_c2;
|
||||
target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2);
|
||||
if (debug_noise_removal) {
|
||||
@ -1797,9 +1798,6 @@ not_a_word:
|
||||
}
|
||||
|
||||
bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
|
||||
bool show_map_detail = false;
|
||||
int16_t i;
|
||||
|
||||
if (!test_pt) {
|
||||
return false;
|
||||
}
|
||||
@ -1811,6 +1809,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
|
||||
if (location < 0) {
|
||||
return true; // For breakpoint use
|
||||
}
|
||||
bool show_map_detail = false;
|
||||
tessedit_rejection_debug.set_value(true);
|
||||
debug_x_ht_level.set_value(2);
|
||||
tprintf("\n\nTESTWD::");
|
||||
@ -1864,7 +1863,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
|
||||
tprintf("\n");
|
||||
if (show_map_detail) {
|
||||
tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
|
||||
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
|
||||
for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
|
||||
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
|
||||
word->reject_map[i].full_print(debug_fp);
|
||||
}
|
||||
@ -1891,13 +1890,12 @@ static void find_modal_font( // good chars in word
|
||||
int16_t *font_out, // output font
|
||||
int8_t *font_count // output count
|
||||
) {
|
||||
int16_t font; // font index
|
||||
int32_t count; // pile count
|
||||
|
||||
if (fonts->get_total() > 0) {
|
||||
font = static_cast<int16_t>(fonts->mode());
|
||||
// font index
|
||||
int16_t font = static_cast<int16_t>(fonts->mode());
|
||||
*font_out = font;
|
||||
count = fonts->pile_count(font);
|
||||
// pile count
|
||||
int32_t count = fonts->pile_count(font);
|
||||
*font_count = count < INT8_MAX ? count : INT8_MAX;
|
||||
fonts->add(font, -*font_count);
|
||||
} else {
|
||||
|
@ -60,10 +60,10 @@ int16_t Tesseract::word_blob_quality(WERD_RES *word) {
|
||||
}
|
||||
|
||||
int16_t Tesseract::word_outline_errs(WERD_RES *word) {
|
||||
int16_t i = 0;
|
||||
int16_t err_count = 0;
|
||||
|
||||
if (word->rebuild_word != nullptr) {
|
||||
int16_t i = 0;
|
||||
for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
|
||||
TBLOB *blob = word->rebuild_word->blobs[b];
|
||||
err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
|
||||
@ -209,13 +209,8 @@ void Tesseract::unrej_good_quality_words( // unreject potential
|
||||
|
||||
void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
PAGE_RES_IT &page_res_it, bool good_quality_doc) {
|
||||
int16_t block_no = 0;
|
||||
int16_t row_no = 0;
|
||||
BLOCK_RES *current_block;
|
||||
ROW_RES *current_row;
|
||||
|
||||
bool rej_word;
|
||||
bool prev_word_rejected;
|
||||
int16_t char_quality = 0;
|
||||
int16_t accepted_char_quality;
|
||||
|
||||
@ -238,7 +233,7 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
WERD_RES *word;
|
||||
while ((word = page_res_it.word()) != nullptr) {
|
||||
current_block = page_res_it.block();
|
||||
block_no = current_block->block->pdblk.index();
|
||||
int16_t block_no = current_block->block->pdblk.index();
|
||||
if (current_block->char_count > 0 &&
|
||||
(current_block->rej_count * 100.0 / current_block->char_count) >
|
||||
tessedit_reject_block_percent) {
|
||||
@ -246,8 +241,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", block_no,
|
||||
current_block->char_count, current_block->rej_count);
|
||||
}
|
||||
prev_word_rejected = false;
|
||||
bool prev_word_rejected = false;
|
||||
while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) {
|
||||
bool rej_word;
|
||||
if (tessedit_preserve_blk_rej_perfect_wds) {
|
||||
rej_word = word->reject_map.reject_count() > 0 ||
|
||||
word->reject_map.length() < tessedit_preserve_min_wd_len;
|
||||
@ -284,9 +280,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
}
|
||||
|
||||
/* Walk rows in block testing for row rejection */
|
||||
row_no = 0;
|
||||
int16_t row_no = 0;
|
||||
while (page_res_it.word() != nullptr && page_res_it.block() == current_block) {
|
||||
current_row = page_res_it.row();
|
||||
ROW_RES *current_row = page_res_it.row();
|
||||
row_no++;
|
||||
/* Reject whole row if:
|
||||
fraction of chars on row which are rejected exceed a limit AND
|
||||
@ -302,9 +298,10 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
|
||||
tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", row_no,
|
||||
current_row->char_count, current_row->rej_count);
|
||||
}
|
||||
prev_word_rejected = false;
|
||||
bool prev_word_rejected = false;
|
||||
while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) {
|
||||
/* Preserve words on good docs unless they are mostly rejected*/
|
||||
bool rej_word;
|
||||
if (!tessedit_row_rej_good_docs && good_quality_doc) {
|
||||
rej_word = word->reject_map.reject_count() /
|
||||
static_cast<float>(word->reject_map.length()) >
|
||||
@ -448,8 +445,6 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
|
||||
}
|
||||
|
||||
bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) {
|
||||
float rating_per_ch;
|
||||
int adjusted_len;
|
||||
int crunch_mode = 0;
|
||||
|
||||
if (word->best_choice->unichar_string().empty() ||
|
||||
@ -457,11 +452,11 @@ bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level
|
||||
word->best_choice->unichar_string().size())) {
|
||||
crunch_mode = 1;
|
||||
} else {
|
||||
adjusted_len = word->reject_map.length();
|
||||
int adjusted_len = word->reject_map.length();
|
||||
if (adjusted_len > crunch_rating_max) {
|
||||
adjusted_len = crunch_rating_max;
|
||||
}
|
||||
rating_per_ch = word->best_choice->rating() / adjusted_len;
|
||||
float rating_per_ch = word->best_choice->rating() / adjusted_len;
|
||||
|
||||
if (rating_per_ch > crunch_terrible_rating) {
|
||||
crunch_mode = 2;
|
||||
@ -528,7 +523,6 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
|
||||
}
|
||||
|
||||
void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
|
||||
WERD_RES *word;
|
||||
PAGE_RES_IT copy_it;
|
||||
bool deleting_from_bol = false;
|
||||
bool marked_delete_point = false;
|
||||
@ -539,7 +533,7 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
|
||||
|
||||
page_res_it.restart_page();
|
||||
while (page_res_it.word() != nullptr) {
|
||||
word = page_res_it.word();
|
||||
WERD_RES *word = page_res_it.word();
|
||||
|
||||
delete_mode = word_deletable(word, debug_delete_mode);
|
||||
if (delete_mode != CR_NONE) {
|
||||
|
@ -171,7 +171,6 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
|
||||
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
|
||||
int16_t best_score;
|
||||
WERD_RES_LIST current_perm;
|
||||
int16_t current_score;
|
||||
bool improved = false;
|
||||
|
||||
best_score = eval_word_spacing(best_perm); // default score
|
||||
@ -183,7 +182,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
|
||||
|
||||
while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
|
||||
match_current_words(current_perm, row, block);
|
||||
current_score = eval_word_spacing(current_perm);
|
||||
int16_t current_score = eval_word_spacing(current_perm);
|
||||
dump_words(current_perm, current_score, 2, improved);
|
||||
if (current_score > best_score) {
|
||||
best_perm.clear();
|
||||
@ -201,11 +200,10 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
|
||||
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
|
||||
WERD_RES_IT src_it(&src_list);
|
||||
WERD_RES_IT new_it(&new_list);
|
||||
WERD_RES *src_wd;
|
||||
WERD_RES *new_wd;
|
||||
|
||||
for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
|
||||
src_wd = src_it.data();
|
||||
WERD_RES *src_wd = src_it.data();
|
||||
if (!src_wd->combination) {
|
||||
new_wd = WERD_RES::deep_copy(src_wd);
|
||||
new_wd->combination = false;
|
||||
@ -393,8 +391,6 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
|
||||
WERD_RES_IT prev_word_it(&words);
|
||||
WERD_RES *word;
|
||||
WERD_RES *prev_word;
|
||||
WERD_RES *combo;
|
||||
WERD *copy_word;
|
||||
int16_t prev_right = -INT16_MAX;
|
||||
TBOX box;
|
||||
int16_t gap;
|
||||
@ -425,12 +421,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
|
||||
gap = box.left() - prev_right;
|
||||
if (gap <= min_gap) {
|
||||
prev_word = prev_word_it.data();
|
||||
WERD_RES *combo;
|
||||
if (prev_word->combination) {
|
||||
combo = prev_word;
|
||||
} else {
|
||||
/* Make a new combination and insert before
|
||||
* the first word being joined. */
|
||||
copy_word = new WERD;
|
||||
auto *copy_word = new WERD;
|
||||
*copy_word = *(prev_word->word);
|
||||
// deep copy
|
||||
combo = new WERD_RES(copy_word);
|
||||
@ -546,7 +543,6 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
|
||||
WERD_RES *word_res;
|
||||
WERD_RES_LIST sub_word_list;
|
||||
WERD_RES_IT sub_word_list_it(&sub_word_list);
|
||||
int16_t blob_index;
|
||||
int16_t new_length;
|
||||
float junk;
|
||||
|
||||
@ -556,7 +552,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
|
||||
return;
|
||||
}
|
||||
|
||||
blob_index = worst_noise_blob(word_res, &junk);
|
||||
auto blob_index = worst_noise_blob(word_res, &junk);
|
||||
if (blob_index < 0) {
|
||||
return;
|
||||
}
|
||||
@ -623,7 +619,6 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
|
||||
WERD_RES_IT worst_word_it;
|
||||
float worst_noise_score = 9999;
|
||||
int worst_blob_index = -1; // Noisiest blob of noisiest wd
|
||||
int blob_index; // of wds noisiest blob
|
||||
float noise_score; // of wds noisiest blob
|
||||
WERD_RES *word_res;
|
||||
C_BLOB_IT blob_it;
|
||||
@ -636,7 +631,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
|
||||
int16_t i;
|
||||
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
blob_index = worst_noise_blob(word_it.data(), &noise_score);
|
||||
auto blob_index = worst_noise_blob(word_it.data(), &noise_score);
|
||||
if (blob_index > -1 && worst_noise_score > noise_score) {
|
||||
worst_noise_score = noise_score;
|
||||
worst_blob_index = blob_index;
|
||||
@ -806,7 +801,6 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
|
||||
void fixspace_dbg(WERD_RES *word) {
|
||||
TBOX box = word->word->bounding_box();
|
||||
const bool show_map_detail = false;
|
||||
int16_t i;
|
||||
|
||||
box.print();
|
||||
tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
|
||||
@ -816,7 +810,7 @@ void fixspace_dbg(WERD_RES *word) {
|
||||
tprintf("\n");
|
||||
if (show_map_detail) {
|
||||
tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
|
||||
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
|
||||
for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
|
||||
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
|
||||
word->reject_map[i].full_print(debug_fp);
|
||||
}
|
||||
|
@ -101,11 +101,11 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
|
||||
bool force_eol) { // override tilde crunch?
|
||||
WERD_RES *word = page_res_it.word();
|
||||
const UNICHARSET &uchset = *word->uch_set;
|
||||
bool need_reject = false;
|
||||
UNICHAR_ID space = uchset.unichar_to_id(" ");
|
||||
|
||||
if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) &&
|
||||
!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
|
||||
bool need_reject = false;
|
||||
if ((word->unlv_crunch_mode != CR_DELETE) &&
|
||||
(!stats_.tilde_crunch_written ||
|
||||
((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) &&
|
||||
|
@ -2407,8 +2407,8 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
|
||||
// Set up text, lword_text, and rword_text (mostly for debug printing).
|
||||
std::string fake_text;
|
||||
PageIterator pit(static_cast<const PageIterator &>(it));
|
||||
bool first_word = true;
|
||||
if (!pit.Empty(RIL_WORD)) {
|
||||
bool first_word = true;
|
||||
do {
|
||||
fake_text += "x";
|
||||
if (first_word) {
|
||||
|
@ -703,9 +703,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
|
||||
WERD_RES *word_res = pr_it->word();
|
||||
WERD *word = word_res->word;
|
||||
TBOX word_bb; // word bounding box
|
||||
int word_height; // ht of word BB
|
||||
bool displayed_something = false;
|
||||
float shift; // from bot left
|
||||
|
||||
if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) {
|
||||
# ifndef DISABLED_LEGACY_ENGINE
|
||||
@ -842,13 +840,14 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
|
||||
if (text.length() > 0) {
|
||||
word_bb = word->bounding_box();
|
||||
image_win->Pen(ScrollView::RED);
|
||||
word_height = word_bb.height();
|
||||
int text_height = 0.50 * word_height;
|
||||
auto word_height = word_bb.height();
|
||||
int text_height = word_height / 2;
|
||||
if (text_height > 20) {
|
||||
text_height = 20;
|
||||
}
|
||||
image_win->TextAttributes("Arial", text_height, false, false, false);
|
||||
shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
|
||||
// from bot left
|
||||
float shift = (word_height < word_bb.width()) ? 0.25f * word_height : 0.0f;
|
||||
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
|
||||
if (blame.length() > 0) {
|
||||
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height,
|
||||
|
@ -293,8 +293,6 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
int16_t i;
|
||||
int16_t offset;
|
||||
bool non_conflict_set_char; // non conf set a/n?
|
||||
bool conflict = false;
|
||||
bool allow_1s;
|
||||
ACCEPTABLE_WERD_TYPE word_type;
|
||||
bool dict_perm_type;
|
||||
bool dict_word_ok;
|
||||
@ -411,11 +409,11 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
|
||||
Else reject all conflict chs
|
||||
*/
|
||||
if (word_contains_non_1_digit(word, lengths)) {
|
||||
allow_1s =
|
||||
bool allow_1s =
|
||||
(alpha_count(word, lengths) == 0) || (word_res->best_choice->permuter() == NUMBER_PERM);
|
||||
|
||||
int16_t offset;
|
||||
conflict = false;
|
||||
bool conflict = false;
|
||||
for (i = 0, offset = 0; word[offset] != '\0';
|
||||
offset += word_res->best_choice->unichar_lengths()[i++]) {
|
||||
if ((!allow_1s || (word[offset] != '1')) &&
|
||||
|
Loading…
Reference in New Issue
Block a user