mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Added more const.
This commit is contained in:
parent
e7e8e20119
commit
7d08e117d8
@ -467,7 +467,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
api.SetOutputName(outputbase);
|
||||
|
||||
int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
|
||||
const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
|
||||
argc - arg_i, &vars_vec, &vars_values, false);
|
||||
|
||||
SetVariablesFromCLArgs(&api, argc, argv);
|
||||
@ -508,7 +508,7 @@ int main(int argc, char** argv) {
|
||||
tesseract::TextlineOrder order;
|
||||
float deskew_angle;
|
||||
|
||||
tesseract::PageIterator* it = api.AnalyseLayout();
|
||||
const tesseract::PageIterator* it = api.AnalyseLayout();
|
||||
if (it) {
|
||||
it->Orientation(&orientation, &direction, &order, &deskew_angle);
|
||||
tprintf(
|
||||
|
@ -120,7 +120,7 @@ PAGE_RES* Tesseract::ApplyBoxes(const STRING& fname,
|
||||
return nullptr; // Can't do it.
|
||||
}
|
||||
|
||||
int box_count = boxes.size();
|
||||
const int box_count = boxes.size();
|
||||
int box_failures = 0;
|
||||
// Add an empty everything to the end.
|
||||
boxes.push_back(TBOX());
|
||||
@ -187,8 +187,8 @@ static double MedianXHeight(BLOCK_LIST *block_list) {
|
||||
/// Any row xheight that is significantly different from the median is set
|
||||
/// to the median.
|
||||
void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
|
||||
double median_xheight = MedianXHeight(block_list);
|
||||
double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
|
||||
const double median_xheight = MedianXHeight(block_list);
|
||||
const double max_deviation = kMaxXHeightDeviationFraction * median_xheight;
|
||||
// Strip all fuzzy space markers to simplify the PAGE_RES.
|
||||
BLOCK_IT b_it(block_list);
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
@ -196,7 +196,7 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
|
||||
ROW_IT r_it(block->row_list());
|
||||
for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward ()) {
|
||||
ROW* row = r_it.data();
|
||||
double diff = fabs(row->x_height() - median_xheight);
|
||||
const double diff = fabs(row->x_height() - median_xheight);
|
||||
if (diff > max_deviation) {
|
||||
if (applybox_debug) {
|
||||
tprintf("row xheight=%g, but median xheight = %g\n",
|
||||
@ -315,7 +315,7 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
|
||||
/// then the miss metric is (A-C)(B-C)/(AB) and the box with minimum
|
||||
/// miss metric gets the blob.
|
||||
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
|
||||
int overlap_area = box1.intersection(box2).area();
|
||||
const int overlap_area = box1.intersection(box2).area();
|
||||
double miss_metric = box1.area()- overlap_area;
|
||||
miss_metric /= box1.area();
|
||||
miss_metric *= box2.area() - overlap_area;
|
||||
@ -359,8 +359,8 @@ bool Tesseract::ResegmentCharBox(PAGE_RES* page_res, const TBOX *prev_box,
|
||||
break;
|
||||
if (word_res->correct_text[i + blob_count].length() > 0)
|
||||
break; // Blob is claimed already.
|
||||
double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
|
||||
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
const double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Checking blob:");
|
||||
blob_box.print();
|
||||
@ -466,8 +466,8 @@ bool Tesseract::ResegmentWordBox(BLOCK_LIST *block_list,
|
||||
TBOX blob_box = blob->bounding_box();
|
||||
if (!blob_box.major_overlap(box))
|
||||
continue;
|
||||
double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
|
||||
const double current_box_miss_metric = BoxMissMetric(blob_box, box);
|
||||
const double next_box_miss_metric = BoxMissMetric(blob_box, next_box);
|
||||
if (applybox_debug > 2) {
|
||||
tprintf("Checking blob:");
|
||||
blob_box.print();
|
||||
@ -506,7 +506,7 @@ void Tesseract::ReSegmentByClassification(PAGE_RES* page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
WERD_RES* word_res;
|
||||
for (; (word_res = pr_it.word()) != nullptr; pr_it.forward()) {
|
||||
WERD* word = word_res->word;
|
||||
const WERD* word = word_res->word;
|
||||
if (word->text() == nullptr || word->text()[0] == '\0')
|
||||
continue; // Ignore words that have no text.
|
||||
// Convert the correct text to a vector of UNICHAR_ID
|
||||
@ -555,7 +555,7 @@ bool Tesseract::ConvertStringToUnichars(const char* utf8,
|
||||
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
|
||||
WERD_RES* word_res) {
|
||||
// Classify all required combinations of blobs and save results in choices.
|
||||
int word_length = word_res->box_word->length();
|
||||
const int word_length = word_res->box_word->length();
|
||||
GenericVector<BLOB_CHOICE_LIST*>* choices =
|
||||
new GenericVector<BLOB_CHOICE_LIST*>[word_length];
|
||||
for (int i = 0; i < word_length; ++i) {
|
||||
@ -637,7 +637,7 @@ void Tesseract::SearchForText(const GenericVector<BLOB_CHOICE_LIST*>* choices,
|
||||
BLOB_CHOICE_IT choice_it(choices[choices_pos][length - 1]);
|
||||
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
|
||||
choice_it.forward()) {
|
||||
BLOB_CHOICE* choice = choice_it.data();
|
||||
const BLOB_CHOICE* choice = choice_it.data();
|
||||
choice_rating = choice->rating();
|
||||
UNICHAR_ID class_id = choice->unichar_id();
|
||||
if (class_id == target_text[text_index]) {
|
||||
|
@ -424,15 +424,15 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
|
||||
if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv))
|
||||
output_pass(page_res_it, target_word_box);
|
||||
// end jetsoft
|
||||
PageSegMode pageseg_mode = static_cast<PageSegMode>(
|
||||
const PageSegMode pageseg_mode = static_cast<PageSegMode>(
|
||||
static_cast<int>(tessedit_pageseg_mode));
|
||||
textord_.CleanupSingleRowResult(pageseg_mode, page_res);
|
||||
|
||||
// Remove empty words, as these mess up the result iterators.
|
||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr;
|
||||
page_res_it.forward()) {
|
||||
WERD_RES* word = page_res_it.word();
|
||||
POLY_BLOCK* pb = page_res_it.block()->block != nullptr
|
||||
const WERD_RES* word = page_res_it.word();
|
||||
const POLY_BLOCK* pb = page_res_it.block()->block != nullptr
|
||||
? page_res_it.block()->block->pdblk.poly_block()
|
||||
: nullptr;
|
||||
if (word->best_choice == nullptr || word->best_choice->length() == 0 ||
|
||||
@ -452,7 +452,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
|
||||
|
||||
WERD_RES *w_prev = nullptr;
|
||||
WERD_RES *w = word_it.word();
|
||||
while (1) {
|
||||
while (true) {
|
||||
w_prev = w;
|
||||
while (word_it.forward() != nullptr &&
|
||||
(!word_it.word() || word_it.word()->part_of_combo)) {
|
||||
@ -473,8 +473,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
|
||||
GenericVector<WERD_CHOICE *> overrides_word1;
|
||||
GenericVector<WERD_CHOICE *> overrides_word2;
|
||||
|
||||
STRING orig_w1_str = w_prev->best_choice->unichar_string();
|
||||
STRING orig_w2_str = w->best_choice->unichar_string();
|
||||
const STRING orig_w1_str = w_prev->best_choice->unichar_string();
|
||||
const STRING orig_w2_str = w->best_choice->unichar_string();
|
||||
WERD_CHOICE prev_best(w->uch_set);
|
||||
{
|
||||
int w1start, w1end;
|
||||
@ -550,8 +550,8 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
|
||||
}
|
||||
continue;
|
||||
}
|
||||
STRING new_w1_str = overrides_word1[best_idx]->unichar_string();
|
||||
STRING new_w2_str = overrides_word2[best_idx]->unichar_string();
|
||||
const STRING new_w1_str = overrides_word1[best_idx]->unichar_string();
|
||||
const STRING new_w2_str = overrides_word2[best_idx]->unichar_string();
|
||||
if (new_w1_str != orig_w1_str) {
|
||||
w_prev->ReplaceBestChoice(overrides_word1[best_idx]);
|
||||
}
|
||||
@ -629,19 +629,19 @@ void Tesseract::rejection_passes(PAGE_RES* page_res,
|
||||
// end jetsoft
|
||||
|
||||
page_res_it.rej_stat_word();
|
||||
int chars_in_word = word->reject_map.length();
|
||||
int rejects_in_word = word->reject_map.reject_count();
|
||||
const int chars_in_word = word->reject_map.length();
|
||||
const int rejects_in_word = word->reject_map.reject_count();
|
||||
|
||||
int blob_quality = word_blob_quality(word, page_res_it.row()->row);
|
||||
const int blob_quality = word_blob_quality(word, page_res_it.row()->row);
|
||||
stats_.doc_blob_quality += blob_quality;
|
||||
int outline_errs = word_outline_errs(word);
|
||||
const int outline_errs = word_outline_errs(word);
|
||||
stats_.doc_outline_errs += outline_errs;
|
||||
int16_t all_char_quality;
|
||||
int16_t accepted_all_char_quality;
|
||||
word_char_quality(word, page_res_it.row()->row,
|
||||
&all_char_quality, &accepted_all_char_quality);
|
||||
stats_.doc_char_quality += all_char_quality;
|
||||
uint8_t permuter_type = word->best_choice->permuter();
|
||||
const uint8_t permuter_type = word->best_choice->permuter();
|
||||
if ((permuter_type == SYSTEM_DAWG_PERM) ||
|
||||
(permuter_type == FREQ_DAWG_PERM) ||
|
||||
(permuter_type == USER_DAWG_PERM)) {
|
||||
@ -724,7 +724,7 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) {
|
||||
page_res_it.forward();
|
||||
continue;
|
||||
}
|
||||
float x_height = page_res_it.block()->block->x_height();
|
||||
const float x_height = page_res_it.block()->block->x_height();
|
||||
float word_x_height = word->x_height;
|
||||
if (word_x_height < word->best_choice->min_x_height() ||
|
||||
word_x_height > word->best_choice->max_x_height()) {
|
||||
@ -733,8 +733,8 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) {
|
||||
}
|
||||
// Test for small caps. Word capheight must be close to block xheight,
|
||||
// and word must contain no lower case letters, and at least one upper case.
|
||||
double small_cap_xheight = x_height * kXHeightCapRatio;
|
||||
double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
|
||||
const double small_cap_xheight = x_height * kXHeightCapRatio;
|
||||
const double small_cap_delta = (x_height - small_cap_xheight) / 2.0;
|
||||
if (word->uch_set->script_has_xheight() &&
|
||||
small_cap_xheight - small_cap_delta <= word_x_height &&
|
||||
word_x_height <= small_cap_xheight + small_cap_delta) {
|
||||
@ -837,8 +837,8 @@ static int SelectBestWords(double rating_ratio,
|
||||
bool b_bad = false, n_bad = false;
|
||||
// True if all words have a valid permuter.
|
||||
bool b_valid_permuter = true, n_valid_permuter = true;
|
||||
int end_b = b < best_words->size() ? b + 1 : b;
|
||||
int end_n = n < new_words->size() ? n + 1 : n;
|
||||
const int end_b = b < best_words->size() ? b + 1 : b;
|
||||
const int end_n = n < new_words->size() ? n + 1 : n;
|
||||
EvaluateWordSpan(*best_words, start_b, end_b, &b_rating, &b_certainty,
|
||||
&b_bad, &b_valid_permuter);
|
||||
EvaluateWordSpan(*new_words, start_n, end_n, &n_rating, &n_certainty,
|
||||
@ -1006,7 +1006,7 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(
|
||||
C_BLOB_IT blob_it(real_word->cblob_list());
|
||||
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
|
||||
C_BLOB* blob = blob_it.data();
|
||||
TBOX blob_box = blob->bounding_box();
|
||||
const TBOX blob_box = blob->bounding_box();
|
||||
blob_wanted.init_to_size(outlines.size(), false);
|
||||
int num_blob_outlines = 0;
|
||||
for (int i = 0; i < outlines.size(); ++i) {
|
||||
@ -1265,8 +1265,8 @@ float Tesseract::ClassifyBlobAsWord(int pass_n, PAGE_RES_IT* pr_it,
|
||||
wd.row->x_height(), wd.word->raw_choice->min_x_height(),
|
||||
wd.word->raw_choice->max_x_height());
|
||||
}
|
||||
float cert = wd.word->raw_choice->certainty();
|
||||
float rat = wd.word->raw_choice->rating();
|
||||
const float cert = wd.word->raw_choice->certainty();
|
||||
const float rat = wd.word->raw_choice->rating();
|
||||
*c2 = rat > 0.0f ? cert * cert / rat : 0.0f;
|
||||
*best_str = wd.word->raw_choice->unichar_string();
|
||||
it.DeleteCurrentWord();
|
||||
@ -1289,9 +1289,9 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT* pr_it,
|
||||
// Best result so far.
|
||||
PointerVector<WERD_RES> best_words;
|
||||
// Points to the best result. May be word or in lang_words.
|
||||
WERD_RES* word = word_data->word;
|
||||
const WERD_RES* word = word_data->word;
|
||||
clock_t start_t = clock();
|
||||
bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
|
||||
const bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
|
||||
if (debug) {
|
||||
tprintf("%s word with lang %s at:",
|
||||
word->done ? "Already done" : "Processing",
|
||||
@ -1910,7 +1910,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
if (word->chopped_word == nullptr) return;
|
||||
ASSERT_HOST(word->best_choice != nullptr);
|
||||
|
||||
int fontinfo_size = get_fontinfo_table().size();
|
||||
const int fontinfo_size = get_fontinfo_table().size();
|
||||
if (fontinfo_size == 0) return;
|
||||
GenericVector<int> font_total_score;
|
||||
font_total_score.init_to_size(fontinfo_size, 0);
|
||||
@ -1923,11 +1923,11 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
word->best_choice->debug_string().string());
|
||||
}
|
||||
for (int b = 0; b < word->best_choice->length(); ++b) {
|
||||
BLOB_CHOICE* choice = word->GetBlobChoice(b);
|
||||
const BLOB_CHOICE* choice = word->GetBlobChoice(b);
|
||||
if (choice == nullptr) continue;
|
||||
const GenericVector<ScoredFont>& fonts = choice->fonts();
|
||||
for (int f = 0; f < fonts.size(); ++f) {
|
||||
int fontinfo_id = fonts[f].fontinfo_id;
|
||||
const int fontinfo_id = fonts[f].fontinfo_id;
|
||||
if (0 <= fontinfo_id && fontinfo_id < fontinfo_size) {
|
||||
font_total_score[fontinfo_id] += fonts[f].score;
|
||||
}
|
||||
@ -1958,7 +1958,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
word->fontinfo_id_count = ClipToRange<int>(score1 / UINT16_MAX, 1, INT8_MAX);
|
||||
word->fontinfo_id2_count = ClipToRange<int>(score2 / UINT16_MAX, 0, INT8_MAX);
|
||||
if (score1 > 0) {
|
||||
FontInfo fi = fontinfo_table_.get(font_id1);
|
||||
const FontInfo fi = fontinfo_table_.get(font_id1);
|
||||
if (tessedit_debug_fonts) {
|
||||
if (word->fontinfo_id2_count > 0) {
|
||||
tprintf("Word modal font=%s, score=%d, 2nd choice %s/%d\n",
|
||||
@ -2023,9 +2023,9 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) {
|
||||
for (page_res_it.restart_page(); page_res_it.word() != nullptr;
|
||||
page_res_it.forward()) {
|
||||
word = page_res_it.word();
|
||||
int length = word->best_choice->length();
|
||||
const int length = word->best_choice->length();
|
||||
|
||||
int count = word->fontinfo_id_count;
|
||||
const int count = word->fontinfo_id_count;
|
||||
if (!(count == length || (length > 3 && count >= length * 3 / 4))) {
|
||||
word->fontinfo = modal_font;
|
||||
// Counts only get 1 as it came from the doc.
|
||||
@ -2046,7 +2046,7 @@ void Tesseract::dictionary_correction_pass(PAGE_RES *page_res) {
|
||||
if (word->best_choices.singleton())
|
||||
continue; // There are no alternates.
|
||||
|
||||
WERD_CHOICE* best = word->best_choice;
|
||||
const WERD_CHOICE* best = word->best_choice;
|
||||
if (word->tesseract->getDict().valid_word(*best) != 0)
|
||||
continue; // The best choice is in the dictionary.
|
||||
|
||||
|
@ -164,8 +164,8 @@ void EquationDetect::IdentifySpecialText(
|
||||
|
||||
// Normalize the blob. Set the origin to the place we want to be the
|
||||
// bottom-middle, and scaling is to make the height the x-height.
|
||||
float scaling = static_cast<float>(kBlnXHeight) / box.height();
|
||||
float x_orig = (box.left() + box.right()) / 2.0f, y_orig = box.bottom();
|
||||
const float scaling = static_cast<float>(kBlnXHeight) / box.height();
|
||||
const float x_orig = (box.left() + box.right()) / 2.0f, y_orig = box.bottom();
|
||||
TBLOB* normed_blob = new TBLOB(*tblob);
|
||||
normed_blob->Normalize(nullptr, nullptr, nullptr, x_orig, y_orig, scaling, scaling,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset),
|
||||
@ -188,13 +188,13 @@ void EquationDetect::IdentifySpecialText(
|
||||
equ_choice = choice_it.data();
|
||||
}
|
||||
|
||||
float lang_score = lang_choice ? lang_choice->certainty() : -FLT_MAX;
|
||||
float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX;
|
||||
const float lang_score = lang_choice ? lang_choice->certainty() : -FLT_MAX;
|
||||
const float equ_score = equ_choice ? equ_choice->certainty() : -FLT_MAX;
|
||||
|
||||
const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
|
||||
// The scores here are negative, so the max/min == fabs(min/max).
|
||||
// float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score);
|
||||
float diff = fabs(lang_score - equ_score);
|
||||
const float diff = fabs(lang_score - equ_score);
|
||||
BlobSpecialTextType type = BSTT_NONE;
|
||||
|
||||
// Classification.
|
||||
@ -222,7 +222,7 @@ void EquationDetect::IdentifySpecialText(
|
||||
|
||||
BlobSpecialTextType EquationDetect::EstimateTypeForUnichar(
|
||||
const UNICHARSET& unicharset, const UNICHAR_ID id) const {
|
||||
STRING s = unicharset.id_to_unichar(id);
|
||||
const STRING s = unicharset.id_to_unichar(id);
|
||||
if (unicharset.get_isalpha(id)) {
|
||||
return BSTT_NONE;
|
||||
}
|
||||
@ -260,8 +260,8 @@ void EquationDetect::IdentifySpecialText() {
|
||||
equ_tesseract_.tess_bn_matching.set_value(0);
|
||||
|
||||
// Set the multiplier to zero for lang_tesseract_ to improve the accuracy.
|
||||
int classify_class_pruner = lang_tesseract_->classify_class_pruner_multiplier;
|
||||
int classify_integer_matcher =
|
||||
const int classify_class_pruner = lang_tesseract_->classify_class_pruner_multiplier;
|
||||
const int classify_integer_matcher =
|
||||
lang_tesseract_->classify_integer_matcher_multiplier;
|
||||
lang_tesseract_->classify_class_pruner_multiplier.set_value(0);
|
||||
lang_tesseract_->classify_integer_matcher_multiplier.set_value(0);
|
||||
@ -284,7 +284,7 @@ void EquationDetect::IdentifySpecialText() {
|
||||
}
|
||||
}
|
||||
blob_heights.sort();
|
||||
int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2;
|
||||
const int height_th = blob_heights[blob_heights.size() / 2] / 3 * 2;
|
||||
for (bbox_it.mark_cycle_pt (); !bbox_it.cycled_list();
|
||||
bbox_it.forward()) {
|
||||
if (bbox_it.data()->special_text_type() != BSTT_SKIP) {
|
||||
@ -332,12 +332,12 @@ void EquationDetect::IdentifyBlobsToSkip(ColPartition* part) {
|
||||
break;
|
||||
}
|
||||
const float kWidthR = 0.4, kHeightR = 0.3;
|
||||
bool xoverlap = blob_box.major_x_overlap(nextblob_box),
|
||||
const bool xoverlap = blob_box.major_x_overlap(nextblob_box),
|
||||
yoverlap = blob_box.y_overlap(nextblob_box);
|
||||
float widthR = static_cast<float>(
|
||||
const float widthR = static_cast<float>(
|
||||
MIN(nextblob_box.width(), blob_box.width())) /
|
||||
MAX(nextblob_box.width(), blob_box.width());
|
||||
float heightR = static_cast<float>(
|
||||
const float heightR = static_cast<float>(
|
||||
MIN(nextblob_box.height(), blob_box.height())) /
|
||||
MAX(nextblob_box.height(), blob_box.height());
|
||||
|
||||
@ -486,7 +486,7 @@ void EquationDetect::SearchByOverlap(
|
||||
const TBOX& part_box(part->bounding_box());
|
||||
bool merge = false;
|
||||
|
||||
float x_overlap_fraction = part_box.x_overlap_fraction(seed_box),
|
||||
const float x_overlap_fraction = part_box.x_overlap_fraction(seed_box),
|
||||
y_overlap_fraction = part_box.y_overlap_fraction(seed_box);
|
||||
|
||||
// If part is large overlapped with seed, then set merge to true.
|
||||
@ -550,7 +550,7 @@ void EquationDetect::IdentifySeedParts() {
|
||||
continue;
|
||||
}
|
||||
part->ComputeSpecialBlobsDensity();
|
||||
bool blobs_check = CheckSeedBlobsCount(part);
|
||||
const bool blobs_check = CheckSeedBlobsCount(part);
|
||||
const int kTextBlobsTh = 20;
|
||||
|
||||
if (CheckSeedDensity(kMathDigitDensityTh1, kMathDigitDensityTh2, part) &&
|
||||
@ -609,7 +609,7 @@ void EquationDetect::IdentifySeedParts() {
|
||||
|
||||
float EquationDetect::ComputeForegroundDensity(const TBOX& tbox) {
|
||||
Pix *pix_bi = lang_tesseract_->pix_binary();
|
||||
int pix_height = pixGetHeight(pix_bi);
|
||||
const int pix_height = pixGetHeight(pix_bi);
|
||||
Box* box = boxCreate(tbox.left(), pix_height - tbox.top(),
|
||||
tbox.width(), tbox.height());
|
||||
Pix *pix_sub = pixClipRectangle(pix_bi, box, nullptr);
|
||||
@ -630,7 +630,7 @@ bool EquationDetect::CheckSeedFgDensity(const float density_th,
|
||||
SplitCPHorLite(part, &sub_boxes);
|
||||
float parts_passed = 0.0;
|
||||
for (int i = 0; i < sub_boxes.size(); ++i) {
|
||||
float density = ComputeForegroundDensity(sub_boxes[i]);
|
||||
const float density = ComputeForegroundDensity(sub_boxes[i]);
|
||||
if (density < density_th) {
|
||||
parts_passed++;
|
||||
}
|
||||
@ -673,7 +673,7 @@ void EquationDetect::SplitCPHor(ColPartition* part,
|
||||
box.left() - previous_right > kThreshold) {
|
||||
// We have a split position. Split the partition in two pieces.
|
||||
// Insert the left piece in the grid and keep processing the right.
|
||||
int mid_x = (box.left() + previous_right) / 2;
|
||||
const int mid_x = (box.left() + previous_right) / 2;
|
||||
ColPartition* left_part = right_part;
|
||||
right_part = left_part->SplitAt(mid_x);
|
||||
|
||||
@ -761,7 +761,8 @@ int EquationDetect::CountAlignment(
|
||||
return 0;
|
||||
}
|
||||
const int kDistTh = static_cast<int>(roundf(0.03 * resolution_));
|
||||
int pos = sorted_vec.binary_search(val), count = 0;
|
||||
const int pos = sorted_vec.binary_search(val);
|
||||
int count = 0;
|
||||
|
||||
// Search left side.
|
||||
int index = pos;
|
||||
@ -781,7 +782,7 @@ int EquationDetect::CountAlignment(
|
||||
void EquationDetect::IdentifyInlineParts() {
|
||||
ComputeCPsSuperBBox();
|
||||
IdentifyInlinePartsHorizontal();
|
||||
int textparts_linespacing = EstimateTextPartLineSpacing();
|
||||
const int textparts_linespacing = EstimateTextPartLineSpacing();
|
||||
IdentifyInlinePartsVertical(true, textparts_linespacing);
|
||||
IdentifyInlinePartsVertical(false, textparts_linespacing);
|
||||
}
|
||||
@ -807,11 +808,11 @@ void EquationDetect::IdentifyInlinePartsHorizontal() {
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
search.SetUniqueMode(true);
|
||||
// The center x coordinate of the cp_super_bbox_.
|
||||
int cps_cx = cps_super_bbox_->left() + cps_super_bbox_->width() / 2;
|
||||
const int cps_cx = cps_super_bbox_->left() + cps_super_bbox_->width() / 2;
|
||||
for (int i = 0; i < cp_seeds_.size(); ++i) {
|
||||
ColPartition* part = cp_seeds_[i];
|
||||
const TBOX& part_box(part->bounding_box());
|
||||
int left_margin = part_box.left() - cps_super_bbox_->left(),
|
||||
const int left_margin = part_box.left() - cps_super_bbox_->left(),
|
||||
right_margin = cps_super_bbox_->right() - part_box.right();
|
||||
bool right_to_left;
|
||||
if (left_margin + kMarginDiffTh < right_margin &&
|
||||
@ -985,7 +986,7 @@ bool EquationDetect::CheckSeedBlobsCount(ColPartition* part) {
|
||||
const int kSeedMathBlobsCount = 2;
|
||||
const int kSeedMathDigitBlobsCount = 5;
|
||||
|
||||
int blobs = part->boxes_count(),
|
||||
const int blobs = part->boxes_count(),
|
||||
math_blobs = part->SpecialBlobsCount(BSTT_MATH),
|
||||
digit_blobs = part->SpecialBlobsCount(BSTT_DIGIT);
|
||||
if (blobs < kSeedBlobsCountTh || math_blobs <= kSeedMathBlobsCount ||
|
||||
@ -1056,8 +1057,8 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition* part) {
|
||||
}
|
||||
|
||||
if (part_box.y_gap(neighbor_box) < kYGapTh) {
|
||||
int left_gap = part_box.left() - neighbor_box.left();
|
||||
int right_gap = neighbor_box.right() - part_box.right();
|
||||
const int left_gap = part_box.left() - neighbor_box.left();
|
||||
const int right_gap = neighbor_box.right() - part_box.right();
|
||||
if (left_gap > kXGapTh) {
|
||||
left_indented = true;
|
||||
}
|
||||
@ -1132,7 +1133,7 @@ void EquationDetect::ExpandSeedHorizontal(
|
||||
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
const TBOX& seed_box(seed->bounding_box());
|
||||
int x = search_left ? seed_box.left() : seed_box.right();
|
||||
const int x = search_left ? seed_box.left() : seed_box.right();
|
||||
search.StartSideSearch(x, seed_box.bottom(), seed_box.top());
|
||||
search.SetUniqueMode(true);
|
||||
|
||||
@ -1189,7 +1190,7 @@ void EquationDetect::ExpandSeedVertical(
|
||||
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
const TBOX& seed_box(seed->bounding_box());
|
||||
int y = search_bottom ? seed_box.bottom() : seed_box.top();
|
||||
const int y = search_bottom ? seed_box.bottom() : seed_box.top();
|
||||
search.StartVerticalSearch(
|
||||
cps_super_bbox_->left(), cps_super_bbox_->right(), y);
|
||||
search.SetUniqueMode(true);
|
||||
|
@ -110,7 +110,7 @@ STRING::STRING() {
|
||||
STRING::STRING(const STRING& str) {
|
||||
str.FixHeader();
|
||||
const STRING_HEADER* str_header = str.GetHeader();
|
||||
int str_used = str_header->used_;
|
||||
const int str_used = str_header->used_;
|
||||
char *this_cstr = AllocData(str_used, str_used);
|
||||
memcpy(this_cstr, str.GetCStr(), str_used);
|
||||
assert(InvariantOk());
|
||||
@ -121,7 +121,7 @@ STRING::STRING(const char* cstr) {
|
||||
// Empty STRINGs contain just the "\0".
|
||||
memcpy(AllocData(1, kMinCapacity), "", 1);
|
||||
} else {
|
||||
int len = strlen(cstr) + 1;
|
||||
const int len = strlen(cstr) + 1;
|
||||
char* this_cstr = AllocData(len, len);
|
||||
memcpy(this_cstr, cstr, len);
|
||||
}
|
||||
@ -285,7 +285,7 @@ char& STRING::operator[](int32_t index) const {
|
||||
|
||||
void STRING::split(const char c, GenericVector<STRING> *splited) {
|
||||
int start_index = 0;
|
||||
int len = length();
|
||||
const int len = length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
if ((*this)[i] == c) {
|
||||
if (i != start_index) {
|
||||
@ -307,8 +307,8 @@ BOOL8 STRING::operator==(const STRING& str) const {
|
||||
str.FixHeader();
|
||||
const STRING_HEADER* str_header = str.GetHeader();
|
||||
const STRING_HEADER* this_header = GetHeader();
|
||||
int this_used = this_header->used_;
|
||||
int str_used = str_header->used_;
|
||||
const int this_used = this_header->used_;
|
||||
const int str_used = str_header->used_;
|
||||
|
||||
return (this_used == str_used)
|
||||
&& (memcmp(GetCStr(), str.GetCStr(), this_used) == 0);
|
||||
@ -319,8 +319,8 @@ BOOL8 STRING::operator!=(const STRING& str) const {
|
||||
str.FixHeader();
|
||||
const STRING_HEADER* str_header = str.GetHeader();
|
||||
const STRING_HEADER* this_header = GetHeader();
|
||||
int this_used = this_header->used_;
|
||||
int str_used = str_header->used_;
|
||||
const int this_used = this_header->used_;
|
||||
const int str_used = str_header->used_;
|
||||
|
||||
return (this_used != str_used)
|
||||
|| (memcmp(GetCStr(), str.GetCStr(), this_used) != 0);
|
||||
@ -333,7 +333,7 @@ BOOL8 STRING::operator!=(const char* cstr) const {
|
||||
if (cstr == nullptr)
|
||||
return this_header->used_ > 1; // either '\0' or nullptr
|
||||
else {
|
||||
int32_t length = strlen(cstr) + 1;
|
||||
const int32_t length = strlen(cstr) + 1;
|
||||
return (this_header->used_ != length)
|
||||
|| (memcmp(GetCStr(), cstr, length) != 0);
|
||||
}
|
||||
@ -342,7 +342,7 @@ BOOL8 STRING::operator!=(const char* cstr) const {
|
||||
STRING& STRING::operator=(const STRING& str) {
|
||||
str.FixHeader();
|
||||
const STRING_HEADER* str_header = str.GetHeader();
|
||||
int str_used = str_header->used_;
|
||||
const int str_used = str_header->used_;
|
||||
|
||||
GetHeader()->used_ = 0; // clear since ensure doesn't need to copy data
|
||||
char* this_cstr = ensure_cstr(str_used);
|
||||
@ -360,8 +360,8 @@ STRING & STRING::operator+=(const STRING& str) {
|
||||
str.FixHeader();
|
||||
const STRING_HEADER* str_header = str.GetHeader();
|
||||
const char* str_cstr = str.GetCStr();
|
||||
int str_used = str_header->used_;
|
||||
int this_used = GetHeader()->used_;
|
||||
const int str_used = str_header->used_;
|
||||
const int this_used = GetHeader()->used_;
|
||||
char* this_cstr = ensure_cstr(this_used + str_used);
|
||||
|
||||
STRING_HEADER* this_header = GetHeader(); // after ensure for realloc
|
||||
@ -401,7 +401,7 @@ void STRING::add_str_double(const char* str, double number) {
|
||||
STRING & STRING::operator=(const char* cstr) {
|
||||
STRING_HEADER* this_header = GetHeader();
|
||||
if (cstr) {
|
||||
int len = strlen(cstr) + 1;
|
||||
const int len = strlen(cstr) + 1;
|
||||
|
||||
this_header->used_ = 0; // don't bother copying data if need to realloc
|
||||
char* this_cstr = ensure_cstr(len);
|
||||
@ -445,10 +445,10 @@ STRING STRING::operator+(const char ch) const {
|
||||
STRING result;
|
||||
FixHeader();
|
||||
const STRING_HEADER* this_header = GetHeader();
|
||||
int this_used = this_header->used_;
|
||||
const int this_used = this_header->used_;
|
||||
char* result_cstr = result.ensure_cstr(this_used + 1);
|
||||
STRING_HEADER* result_header = result.GetHeader();
|
||||
int result_used = result_header->used_;
|
||||
const int result_used = result_header->used_;
|
||||
|
||||
// copies '\0' but we'll overwrite that
|
||||
memcpy(result_cstr, GetCStr(), this_used);
|
||||
@ -466,8 +466,8 @@ STRING& STRING::operator+=(const char *str) {
|
||||
return *this;
|
||||
|
||||
FixHeader();
|
||||
int len = strlen(str) + 1;
|
||||
int this_used = GetHeader()->used_;
|
||||
const int len = strlen(str) + 1;
|
||||
const int this_used = GetHeader()->used_;
|
||||
char* this_cstr = ensure_cstr(this_used + len);
|
||||
STRING_HEADER* this_header = GetHeader(); // after ensure for realloc
|
||||
|
||||
|
@ -386,10 +386,10 @@ void LSTMRecognizer::DebugActivationRange(const NetworkIO& outputs,
|
||||
tprintf("%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
|
||||
double max_score = 0.0;
|
||||
double mean_score = 0.0;
|
||||
int width = x_end - x_start;
|
||||
const int width = x_end - x_start;
|
||||
for (int x = x_start; x < x_end; ++x) {
|
||||
const float* line = outputs.f(x);
|
||||
double score = line[best_choice] * 100.0;
|
||||
const double score = line[best_choice] * 100.0;
|
||||
if (score > max_score) max_score = score;
|
||||
mean_score += score / width;
|
||||
int best_c = 0;
|
||||
@ -452,10 +452,10 @@ void LSTMRecognizer::LabelsViaSimpleText(const NetworkIO& output,
|
||||
GenericVector<int>* xcoords) {
|
||||
labels->truncate(0);
|
||||
xcoords->truncate(0);
|
||||
int width = output.Width();
|
||||
const int width = output.Width();
|
||||
for (int t = 0; t < width; ++t) {
|
||||
float score = 0.0f;
|
||||
int label = output.BestLabel(t, &score);
|
||||
const int label = output.BestLabel(t, &score);
|
||||
if (label != null_char_) {
|
||||
labels->push_back(label);
|
||||
xcoords->push_back(t);
|
||||
|
Loading…
Reference in New Issue
Block a user