mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-19 06:53:36 +08:00
Fix compiler warnings (mostly -Wsign-compare)
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
c7653bf59f
commit
0c20d3f843
@ -935,7 +935,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
|
||||
int tessedit_page_number) {
|
||||
if (!flist && !buf)
|
||||
return false;
|
||||
int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
|
||||
unsigned page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
|
||||
char pagename[MAX_PATH];
|
||||
|
||||
std::vector<std::string> lines;
|
||||
@ -958,7 +958,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
|
||||
}
|
||||
|
||||
// Skip to the requested page number.
|
||||
for (int i = 0; i < page; i++) {
|
||||
for (unsigned i = 0; i < page; i++) {
|
||||
if (flist) {
|
||||
if (fgets(pagename, sizeof(pagename), flist) == nullptr)
|
||||
break;
|
||||
@ -986,7 +986,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, std::string *buf, const char
|
||||
tprintf("Image file %s cannot be read!\n", pagename);
|
||||
return false;
|
||||
}
|
||||
tprintf("Page %d : %s\n", page, pagename);
|
||||
tprintf("Page %u : %s\n", page, pagename);
|
||||
bool r = ProcessPage(pix, page, pagename, retry_config, timeout_millisec, renderer);
|
||||
pixDestroy(&pix);
|
||||
if (!r)
|
||||
|
@ -236,8 +236,8 @@ char **TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI *handle) {
|
||||
std::vector<std::string> languages;
|
||||
handle->GetLoadedLanguagesAsVector(&languages);
|
||||
char **arr = new char *[languages.size() + 1];
|
||||
for (int index = 0; index < languages.size(); ++index) {
|
||||
arr[index] = strdup(languages[index].c_str());
|
||||
for (auto &language : languages) {
|
||||
arr[&language - &languages[0]] = strdup(language.c_str());
|
||||
}
|
||||
arr[languages.size()] = nullptr;
|
||||
return arr;
|
||||
@ -247,8 +247,8 @@ char **TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI *handle) {
|
||||
std::vector<std::string> languages;
|
||||
handle->GetAvailableLanguagesAsVector(&languages);
|
||||
char **arr = new char *[languages.size() + 1];
|
||||
for (int index = 0; index < languages.size(); ++index) {
|
||||
arr[index] = strdup(languages[index].c_str());
|
||||
for (auto &language : languages) {
|
||||
arr[&language - &languages[0]] = strdup(language.c_str());
|
||||
}
|
||||
arr[languages.size()] = nullptr;
|
||||
return arr;
|
||||
|
@ -380,13 +380,13 @@ bool Tesseract::ResegmentCharBox(PAGE_RES *page_res, const TBOX *prev_box, const
|
||||
// this box.
|
||||
if (applybox_debug > 1) {
|
||||
tprintf("Best state = ");
|
||||
for (int j = 0; j < word_res->best_state.size(); ++j) {
|
||||
tprintf("%d ", word_res->best_state[j]);
|
||||
for (auto best_state : word_res->best_state) {
|
||||
tprintf("%d ", best_state);
|
||||
}
|
||||
tprintf("\n");
|
||||
tprintf("Correct text = [[ ");
|
||||
for (int j = 0; j < word_res->correct_text.size(); ++j) {
|
||||
tprintf("%s ", word_res->correct_text[j].c_str());
|
||||
for (auto &correct_text : word_res->correct_text) {
|
||||
tprintf("%s ", correct_text.c_str());
|
||||
}
|
||||
tprintf("]]\n");
|
||||
}
|
||||
@ -561,8 +561,8 @@ bool Tesseract::FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WER
|
||||
// Build the original segmentation and if it is the same length as the
|
||||
// truth, assume it will do.
|
||||
int blob_count = 1;
|
||||
for (int s = 0; s < word_res->seam_array.size(); ++s) {
|
||||
SEAM *seam = word_res->seam_array[s];
|
||||
for (auto s : word_res->seam_array) {
|
||||
SEAM *seam = s;
|
||||
if (!seam->HasAnySplits()) {
|
||||
word_res->best_state.push_back(blob_count);
|
||||
blob_count = 1;
|
||||
@ -577,8 +577,8 @@ bool Tesseract::FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WER
|
||||
}
|
||||
}
|
||||
word_res->correct_text.clear();
|
||||
for (int i = 0; i < target_text.size(); ++i) {
|
||||
word_res->correct_text.push_back(unicharset.id_to_unichar(target_text[i]));
|
||||
for (auto &text : target_text) {
|
||||
word_res->correct_text.push_back(unicharset.id_to_unichar(text));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -602,7 +602,7 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
|
||||
int text_index, float rating, std::vector<int> *segmentation,
|
||||
float *best_rating, std::vector<int> *best_segmentation) {
|
||||
const UnicharAmbigsVector &table = getDict().getUnicharAmbigs().dang_ambigs();
|
||||
for (int length = 1; length <= choices[choices_pos].size(); ++length) {
|
||||
for (unsigned length = 1; length <= choices[choices_pos].size(); ++length) {
|
||||
// Rating of matching choice or worst choice if no match.
|
||||
float choice_rating = 0.0f;
|
||||
// Find the corresponding best BLOB_CHOICE.
|
||||
@ -746,12 +746,12 @@ void Tesseract::CorrectClassifyWords(PAGE_RES *page_res) {
|
||||
PAGE_RES_IT pr_it(page_res);
|
||||
for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
|
||||
auto *choice = new WERD_CHOICE(word_res->uch_set, word_res->correct_text.size());
|
||||
for (int i = 0; i < word_res->correct_text.size(); ++i) {
|
||||
for (auto &correct_text : word_res->correct_text) {
|
||||
// The part before the first space is the real ground truth, and the
|
||||
// rest is the bounding box location and page number.
|
||||
std::vector<std::string> tokens = split(word_res->correct_text[i], ' ');
|
||||
std::vector<std::string> tokens = split(correct_text, ' ');
|
||||
UNICHAR_ID char_id = unicharset.unichar_to_id(tokens[0].c_str());
|
||||
choice->append_unichar_id_space_allocated(char_id, word_res->best_state[i], 0.0f, 0.0f);
|
||||
choice->append_unichar_id_space_allocated(char_id, word_res->best_state[&correct_text - &word_res->correct_text[0]], 0.0f, 0.0f);
|
||||
}
|
||||
word_res->ClearWordChoices();
|
||||
word_res->LogNewRawChoice(choice);
|
||||
|
@ -152,7 +152,7 @@ void Tesseract::SetupAllWordsPassN(int pass_n, const TBOX *target_word_box, cons
|
||||
}
|
||||
}
|
||||
// Setup all the words for recognition with polygonal approximation.
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
for (unsigned w = 0; w < words->size(); ++w) {
|
||||
SetupWordPassN(pass_n, &(*words)[w]);
|
||||
if (w > 0)
|
||||
(*words)[w].prev_word = &(*words)[w - 1];
|
||||
@ -173,7 +173,7 @@ void Tesseract::SetupWordPassN(int pass_n, WordData *word) {
|
||||
word->word->x_height = word->row->x_height();
|
||||
}
|
||||
word->lang_words.truncate(0);
|
||||
for (int s = 0; s <= sub_langs_.size(); ++s) {
|
||||
for (unsigned s = 0; s <= sub_langs_.size(); ++s) {
|
||||
// The sub_langs_.size() entry is for the master language.
|
||||
Tesseract *lang_t = s < sub_langs_.size() ? sub_langs_[s] : this;
|
||||
auto *word_res = new WERD_RES;
|
||||
@ -199,7 +199,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
|
||||
// added. The results will be significantly different with adaption on, and
|
||||
// deterioration will need investigation.
|
||||
pr_it->restart_page();
|
||||
for (int w = 0; w < words->size(); ++w) {
|
||||
for (unsigned w = 0; w < words->size(); ++w) {
|
||||
WordData *word = &(*words)[w];
|
||||
if (w > 0)
|
||||
word->prev_word = &(*words)[w - 1];
|
||||
@ -302,11 +302,11 @@ bool Tesseract::recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor,
|
||||
StartBackupAdaptiveClassifier();
|
||||
}
|
||||
// Now check the sub-langs as well.
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->AdaptiveClassifierIsFull()) {
|
||||
sub_langs_[i]->SwitchAdaptiveClassifier();
|
||||
} else if (!sub_langs_[i]->AdaptiveClassifierIsEmpty()) {
|
||||
sub_langs_[i]->StartBackupAdaptiveClassifier();
|
||||
for (auto &lang : sub_langs_) {
|
||||
if (lang->AdaptiveClassifierIsFull()) {
|
||||
lang->SwitchAdaptiveClassifier();
|
||||
} else if (!lang->AdaptiveClassifierIsEmpty()) {
|
||||
lang->StartBackupAdaptiveClassifier();
|
||||
}
|
||||
}
|
||||
|
||||
@ -556,7 +556,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) {
|
||||
if (tessedit_bigram_debug > 1) {
|
||||
std::string bigrams_list;
|
||||
const int kMaxChoicesToPrint = 20;
|
||||
for (int i = 0; i < overrides_word1.size() && i < kMaxChoicesToPrint; i++) {
|
||||
for (unsigned i = 0; i < overrides_word1.size() && i < kMaxChoicesToPrint; i++) {
|
||||
if (i > 0) {
|
||||
bigrams_list += ", ";
|
||||
}
|
||||
@ -679,8 +679,8 @@ void Tesseract::blamer_pass(PAGE_RES *page_res) {
|
||||
}
|
||||
if (page_res->misadaption_log.size() > 0) {
|
||||
tprintf("Misadaption log:\n");
|
||||
for (int i = 0; i < page_res->misadaption_log.size(); ++i) {
|
||||
tprintf("%s\n", page_res->misadaption_log[i].c_str());
|
||||
for (auto &log : page_res->misadaption_log) {
|
||||
tprintf("%s\n", log.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -838,8 +838,8 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
|
||||
}
|
||||
// Transfer from out_words to best_words.
|
||||
best_words->clear();
|
||||
for (int i = 0; i < out_words.size(); ++i)
|
||||
best_words->push_back(out_words[i]);
|
||||
for (auto &out_word : out_words)
|
||||
best_words->push_back(out_word);
|
||||
return num_new - num_best;
|
||||
}
|
||||
|
||||
@ -908,7 +908,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
|
||||
std::vector<C_OUTLINE *> wanted_outlines;
|
||||
int num_overlapped = 0;
|
||||
int num_overlapped_used = 0;
|
||||
for (int i = 0; i < overlapped_any_blob.size(); ++i) {
|
||||
for (unsigned i = 0; i < overlapped_any_blob.size(); ++i) {
|
||||
if (overlapped_any_blob[i]) {
|
||||
++num_overlapped;
|
||||
if (word_wanted[i])
|
||||
@ -923,7 +923,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
|
||||
AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs);
|
||||
int non_overlapped = 0;
|
||||
int non_overlapped_used = 0;
|
||||
for (int i = 0; i < word_wanted.size(); ++i) {
|
||||
for (unsigned i = 0; i < word_wanted.size(); ++i) {
|
||||
if (word_wanted[i])
|
||||
++non_overlapped_used;
|
||||
if (outlines[i] != nullptr)
|
||||
@ -967,7 +967,7 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *
|
||||
const TBOX blob_box = blob->bounding_box();
|
||||
blob_wanted.resize(outlines.size(), false);
|
||||
int num_blob_outlines = 0;
|
||||
for (int i = 0; i < outlines.size(); ++i) {
|
||||
for (unsigned i = 0; i < outlines.size(); ++i) {
|
||||
if (blob_box.major_x_overlap(outlines[i]->bounding_box()) && !(*word_wanted)[i]) {
|
||||
blob_wanted[i] = true;
|
||||
(*overlapped_any_blob)[i] = true;
|
||||
@ -985,7 +985,7 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *
|
||||
if (0 < num_blob_outlines && num_blob_outlines < noise_maxperblob) {
|
||||
if (SelectGoodDiacriticOutlines(pass, noise_cert_basechar, pr_it, blob, outlines,
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
for (int i = 0; i < blob_wanted.size(); ++i) {
|
||||
for (unsigned i = 0; i < blob_wanted.size(); ++i) {
|
||||
if (blob_wanted[i]) {
|
||||
// Claim the outline and record where it is going.
|
||||
(*word_wanted)[i] = true;
|
||||
@ -1007,7 +1007,7 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
word_wanted->resize(outlines.size(), false);
|
||||
target_blobs->resize(outlines.size(), nullptr);
|
||||
// Check for outlines that need to be turned into stand-alone blobs.
|
||||
for (int i = 0; i < outlines.size(); ++i) {
|
||||
for (unsigned i = 0; i < outlines.size(); ++i) {
|
||||
if (outlines[i] == nullptr)
|
||||
continue;
|
||||
// Get a set of adjacent outlines that don't overlap any existing blob.
|
||||
@ -1039,7 +1039,7 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
if (debug_noise_removal)
|
||||
tprintf("Added to left blob\n");
|
||||
for (int j = 0; j < blob_wanted.size(); ++j) {
|
||||
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
|
||||
if (blob_wanted[j]) {
|
||||
(*word_wanted)[j] = true;
|
||||
(*target_blobs)[j] = left_blob;
|
||||
@ -1052,7 +1052,7 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
if (debug_noise_removal)
|
||||
tprintf("Added to right blob\n");
|
||||
for (int j = 0; j < blob_wanted.size(); ++j) {
|
||||
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
|
||||
if (blob_wanted[j]) {
|
||||
(*word_wanted)[j] = true;
|
||||
(*target_blobs)[j] = right_blob;
|
||||
@ -1062,7 +1062,7 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
|
||||
num_blob_outlines, &blob_wanted)) {
|
||||
if (debug_noise_removal)
|
||||
tprintf("Fitted between blobs\n");
|
||||
for (int j = 0; j < blob_wanted.size(); ++j) {
|
||||
for (unsigned j = 0; j < blob_wanted.size(); ++j) {
|
||||
if (blob_wanted[j]) {
|
||||
(*word_wanted)[j] = true;
|
||||
(*target_blobs)[j] = nullptr;
|
||||
@ -1098,7 +1098,7 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
|
||||
float best_cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, pr_it, blob, all_str);
|
||||
if (debug_noise_removal) {
|
||||
TBOX ol_box;
|
||||
for (int i = 0; i < test_outlines.size(); ++i) {
|
||||
for (unsigned i = 0; i < test_outlines.size(); ++i) {
|
||||
if (test_outlines[i])
|
||||
ol_box += outlines[i]->bounding_box();
|
||||
}
|
||||
@ -1113,14 +1113,14 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
|
||||
(blob == nullptr || best_cert < target_cert || blob != nullptr)) {
|
||||
// Find the best bit to zero out.
|
||||
best_index = -1;
|
||||
for (int i = 0; i < outlines.size(); ++i) {
|
||||
for (unsigned i = 0; i < outlines.size(); ++i) {
|
||||
if (test_outlines[i]) {
|
||||
test_outlines[i] = false;
|
||||
std::string str;
|
||||
float cert = ClassifyBlobPlusOutlines(test_outlines, outlines, pass, pr_it, blob, str);
|
||||
if (debug_noise_removal) {
|
||||
TBOX ol_box;
|
||||
for (int j = 0; j < outlines.size(); ++j) {
|
||||
for (unsigned j = 0; j < outlines.size(); ++j) {
|
||||
if (test_outlines[j])
|
||||
ol_box += outlines[j]->bounding_box();
|
||||
tprintf("%c", test_outlines[j] ? 'T' : 'F');
|
||||
@ -1147,8 +1147,8 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
|
||||
*ok_outlines = best_outlines;
|
||||
if (debug_noise_removal) {
|
||||
tprintf("%s noise combination ", blob ? "Adding" : "New");
|
||||
for (int i = 0; i < best_outlines.size(); ++i) {
|
||||
tprintf("%c", best_outlines[i] ? 'T' : 'F');
|
||||
for (auto best_outline : best_outlines) {
|
||||
tprintf("%c", best_outline ? 'T' : 'F');
|
||||
}
|
||||
tprintf(" yields certainty %g, beating target of %g\n", best_cert, target_cert);
|
||||
}
|
||||
@ -1171,7 +1171,7 @@ float Tesseract::ClassifyBlobPlusOutlines(const std::vector<bool> &ok_outlines,
|
||||
ol_it.set_to_list(blob->out_list());
|
||||
first_to_keep = ol_it.data();
|
||||
}
|
||||
for (int i = 0; i < ok_outlines.size(); ++i) {
|
||||
for (unsigned i = 0; i < ok_outlines.size(); ++i) {
|
||||
if (ok_outlines[i]) {
|
||||
// This outline is to be added.
|
||||
if (blob == nullptr) {
|
||||
@ -1278,7 +1278,7 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
|
||||
most_recently_used_ = word->tesseract;
|
||||
return;
|
||||
}
|
||||
int sub = sub_langs_.size();
|
||||
auto sub = sub_langs_.size();
|
||||
if (most_recently_used_ != this) {
|
||||
// Get the index of the most_recently_used_.
|
||||
for (sub = 0; sub < sub_langs_.size() && most_recently_used_ != sub_langs_[sub]; ++sub) {
|
||||
@ -1294,7 +1294,7 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
|
||||
&word_data->lang_words[sub_langs_.size()], &best_words) > 0) {
|
||||
best_lang_tess = this;
|
||||
}
|
||||
for (int i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size(); ++i) {
|
||||
for (unsigned i = 0; !WordsAcceptable(best_words) && i < sub_langs_.size(); ++i) {
|
||||
if (most_recently_used_ != sub_langs_[i] &&
|
||||
sub_langs_[i]->RetryWithLanguage(*word_data, recognizer, debug, &word_data->lang_words[i],
|
||||
&best_words) > 0) {
|
||||
@ -1876,10 +1876,10 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
|
||||
if (choice == nullptr)
|
||||
continue;
|
||||
auto &fonts = choice->fonts();
|
||||
for (int f = 0; f < fonts.size(); ++f) {
|
||||
const int fontinfo_id = fonts[f].fontinfo_id;
|
||||
for (auto &f : fonts) {
|
||||
const int fontinfo_id = f.fontinfo_id;
|
||||
if (0 <= fontinfo_id && fontinfo_id < fontinfo_size) {
|
||||
font_total_score[fontinfo_id] += fonts[f].score;
|
||||
font_total_score[fontinfo_id] += f.score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -75,14 +75,14 @@ bool Tesseract::TrainLineRecognizer(const char *input_imagename, const std::stri
|
||||
// appends them to the given training_data.
|
||||
void Tesseract::TrainFromBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
|
||||
BLOCK_LIST *block_list, DocumentData *training_data) {
|
||||
int box_count = boxes.size();
|
||||
auto box_count = boxes.size();
|
||||
// Process all the text lines in this page, as defined by the boxes.
|
||||
int end_box = 0;
|
||||
unsigned end_box = 0;
|
||||
// Don't let \t, which marks newlines in the box file, get into the line
|
||||
// content, as that makes the line unusable in training.
|
||||
while (end_box < texts.size() && texts[end_box] == "\t")
|
||||
++end_box;
|
||||
for (int start_box = end_box; start_box < box_count; start_box = end_box) {
|
||||
for (auto start_box = end_box; start_box < box_count; start_box = end_box) {
|
||||
// Find the textline of boxes starting at start and their bounding box.
|
||||
TBOX line_box = boxes[start_box];
|
||||
std::string line_str = texts[start_box];
|
||||
|
@ -352,8 +352,8 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
|
||||
// We are running osd as part of layout analysis, so constrain the
|
||||
// scripts to those allowed by *this.
|
||||
AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);
|
||||
for (int s = 0; s < sub_langs_.size(); ++s) {
|
||||
AddAllScriptsConverted(sub_langs_[s]->unicharset, osd_tess->unicharset, &osd_scripts);
|
||||
for (auto &lang : sub_langs_) {
|
||||
AddAllScriptsConverted(lang->unicharset, osd_tess->unicharset, &osd_scripts);
|
||||
}
|
||||
}
|
||||
os_detect_blobs(&osd_scripts, &osd_blobs, osr, osd_tess);
|
||||
|
@ -2259,26 +2259,26 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
|
||||
|
||||
std::vector<Interval> leftovers;
|
||||
LeftoverSegments(rows, &leftovers, 0, rows.size());
|
||||
for (int i = 0; i < leftovers.size(); i++) {
|
||||
for (auto &leftover : leftovers) {
|
||||
// Pass 2a:
|
||||
// Find any strongly evidenced start-of-paragraph lines. If they're
|
||||
// followed by two lines that look like body lines, make a paragraph
|
||||
// model for that and see if that model applies throughout the text
|
||||
// (that is, "smear" it).
|
||||
StrongEvidenceClassify(debug_level, &rows, leftovers[i].begin, leftovers[i].end, &theory);
|
||||
StrongEvidenceClassify(debug_level, &rows, leftover.begin, leftover.end, &theory);
|
||||
|
||||
// Pass 2b:
|
||||
// If we had any luck in pass 2a, we got part of the page and didn't
|
||||
// know how to classify a few runs of rows. Take the segments that
|
||||
// didn't find a model and reprocess them individually.
|
||||
std::vector<Interval> leftovers2;
|
||||
LeftoverSegments(rows, &leftovers2, leftovers[i].begin, leftovers[i].end);
|
||||
LeftoverSegments(rows, &leftovers2, leftover.begin, leftover.end);
|
||||
bool pass2a_was_useful =
|
||||
leftovers2.size() > 1 ||
|
||||
(leftovers2.size() == 1 && (leftovers2[0].begin != 0 || leftovers2[0].end != rows.size()));
|
||||
if (pass2a_was_useful) {
|
||||
for (int j = 0; j < leftovers2.size(); j++) {
|
||||
StrongEvidenceClassify(debug_level, &rows, leftovers2[j].begin, leftovers2[j].end, &theory);
|
||||
for (auto &leftover2 : leftovers2) {
|
||||
StrongEvidenceClassify(debug_level, &rows, leftover2.begin, leftover2.end, &theory);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2290,8 +2290,8 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
|
||||
// and geometric clues to form matching models for. Let's see if
|
||||
// the geometric clues are simple enough that we could just use those.
|
||||
LeftoverSegments(rows, &leftovers, 0, rows.size());
|
||||
for (int i = 0; i < leftovers.size(); i++) {
|
||||
GeometricClassify(debug_level, &rows, leftovers[i].begin, leftovers[i].end, &theory);
|
||||
for (auto &leftover : leftovers) {
|
||||
GeometricClassify(debug_level, &rows, leftover.begin, leftover.end, &theory);
|
||||
}
|
||||
|
||||
// Undo any flush models for which there's little evidence.
|
||||
@ -2302,8 +2302,8 @@ void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
|
||||
// Pass 4:
|
||||
// Take everything that's still not marked up well and clear all markings.
|
||||
LeftoverSegments(rows, &leftovers, 0, rows.size());
|
||||
for (int i = 0; i < leftovers.size(); i++) {
|
||||
for (int j = leftovers[i].begin; j < leftovers[i].end; j++) {
|
||||
for (auto &leftover : leftovers) {
|
||||
for (int j = leftover.begin; j < leftover.end; j++) {
|
||||
rows[j].SetUnknown();
|
||||
}
|
||||
}
|
||||
@ -2498,16 +2498,16 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
|
||||
if (!row_infos.empty()) {
|
||||
int min_lmargin = row_infos[0].pix_ldistance;
|
||||
int min_rmargin = row_infos[0].pix_rdistance;
|
||||
for (int i = 1; i < row_infos.size(); i++) {
|
||||
for (unsigned i = 1; i < row_infos.size(); i++) {
|
||||
if (row_infos[i].pix_ldistance < min_lmargin)
|
||||
min_lmargin = row_infos[i].pix_ldistance;
|
||||
if (row_infos[i].pix_rdistance < min_rmargin)
|
||||
min_rmargin = row_infos[i].pix_rdistance;
|
||||
}
|
||||
if (min_lmargin > 0 || min_rmargin > 0) {
|
||||
for (int i = 0; i < row_infos.size(); i++) {
|
||||
row_infos[i].pix_ldistance -= min_lmargin;
|
||||
row_infos[i].pix_rdistance -= min_rmargin;
|
||||
for (auto &row_info : row_infos) {
|
||||
row_info.pix_ldistance -= min_lmargin;
|
||||
row_info.pix_rdistance -= min_rmargin;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2524,10 +2524,10 @@ void DetectParagraphs(int debug_level, bool after_text_recognition,
|
||||
|
||||
// Now stitch in the row_owners into the rows.
|
||||
row = *block_start;
|
||||
for (int i = 0; i < row_owners.size(); i++) {
|
||||
for (auto &row_owner : row_owners) {
|
||||
while (!row.PageResIt()->row())
|
||||
row.Next(RIL_TEXTLINE);
|
||||
row.PageResIt()->row()->row->set_para(row_owners[i]);
|
||||
row.PageResIt()->row()->row->set_para(row_owner);
|
||||
row.Next(RIL_TEXTLINE);
|
||||
}
|
||||
}
|
||||
|
@ -209,21 +209,20 @@ SVMenuNode *ParamsEditor::BuildListOfAllLeaves(tesseract::Tesseract *tess) {
|
||||
std::map<const char *, int> amount;
|
||||
|
||||
// Add all parameters to a list.
|
||||
int v, i;
|
||||
int num_iterations = (tess->params() == nullptr) ? 1 : 2;
|
||||
for (v = 0; v < num_iterations; ++v) {
|
||||
for (int v = 0; v < num_iterations; ++v) {
|
||||
tesseract::ParamsVectors *vec = (v == 0) ? GlobalParams() : tess->params();
|
||||
for (i = 0; i < vec->int_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->int_params[i]));
|
||||
for (auto ¶m : vec->int_params) {
|
||||
vc_it.add_after_then_move(new ParamContent(param));
|
||||
}
|
||||
for (i = 0; i < vec->bool_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->bool_params[i]));
|
||||
for (auto ¶m : vec->bool_params) {
|
||||
vc_it.add_after_then_move(new ParamContent(param));
|
||||
}
|
||||
for (i = 0; i < vec->string_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->string_params[i]));
|
||||
for (auto ¶m : vec->string_params) {
|
||||
vc_it.add_after_then_move(new ParamContent(param));
|
||||
}
|
||||
for (i = 0; i < vec->double_params.size(); ++i) {
|
||||
vc_it.add_after_then_move(new ParamContent(vec->double_params[i]));
|
||||
for (auto ¶m : vec->double_params) {
|
||||
vc_it.add_after_then_move(new ParamContent(param));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,8 +219,8 @@ public:
|
||||
pixDestroy(&pix_original_);
|
||||
pix_original_ = original_pix;
|
||||
// Clone to sublangs as well.
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
sub_langs_[i]->set_pix_original(original_pix ? pixClone(original_pix) : nullptr);
|
||||
for (auto &lang : sub_langs_) {
|
||||
lang->set_pix_original(original_pix ? pixClone(original_pix) : nullptr);
|
||||
}
|
||||
}
|
||||
// Returns a pointer to a Pix representing the best available resolution image
|
||||
@ -286,8 +286,8 @@ public:
|
||||
bool AnyTessLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
|
||||
return true;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
|
||||
for (auto &lang : sub_langs_) {
|
||||
if (lang->tessedit_ocr_engine_mode != OEM_LSTM_ONLY)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -296,8 +296,8 @@ public:
|
||||
bool AnyLSTMLang() const {
|
||||
if (tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY)
|
||||
return true;
|
||||
for (int i = 0; i < sub_langs_.size(); ++i) {
|
||||
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
|
||||
for (auto &lang : sub_langs_) {
|
||||
if (lang->tessedit_ocr_engine_mode != OEM_TESSERACT_ONLY) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -83,13 +83,13 @@ struct FontInfo {
|
||||
// (FontInfo class takes ownership of the pointer).
|
||||
// Note: init_spacing should be called before calling this function.
|
||||
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
|
||||
ASSERT_HOST(spacing_vec != nullptr && spacing_vec->size() > uch_id);
|
||||
ASSERT_HOST(static_cast<size_t>(uch_id) < spacing_vec->size());
|
||||
(*spacing_vec)[uch_id] = spacing_info;
|
||||
}
|
||||
|
||||
// Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
|
||||
const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
|
||||
return (spacing_vec == nullptr || spacing_vec->size() <= uch_id) ? nullptr
|
||||
return (spacing_vec == nullptr || spacing_vec->size() <= static_cast<size_t>(uch_id)) ? nullptr
|
||||
: (*spacing_vec)[uch_id];
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ struct FontInfo {
|
||||
const FontSpacingInfo *fsi = this->get_spacing(uch_id);
|
||||
if (prev_fsi == nullptr || fsi == nullptr)
|
||||
return false;
|
||||
int i = 0;
|
||||
size_t i = 0;
|
||||
for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
|
||||
if (prev_fsi->kerned_unichar_ids[i] == uch_id)
|
||||
break;
|
||||
|
@ -102,15 +102,15 @@ public:
|
||||
int score1 = 0, score2 = 0;
|
||||
fontinfo_id_ = -1;
|
||||
fontinfo_id2_ = -1;
|
||||
for (int f = 0; f < fonts_.size(); ++f) {
|
||||
if (fonts_[f].score > score1) {
|
||||
for (auto &f : fonts_) {
|
||||
if (f.score > score1) {
|
||||
score2 = score1;
|
||||
fontinfo_id2_ = fontinfo_id_;
|
||||
score1 = fonts_[f].score;
|
||||
fontinfo_id_ = fonts_[f].fontinfo_id;
|
||||
} else if (fonts_[f].score > score2) {
|
||||
score2 = fonts_[f].score;
|
||||
fontinfo_id2_ = fonts_[f].fontinfo_id;
|
||||
score1 = f.score;
|
||||
fontinfo_id_ = f.fontinfo_id;
|
||||
} else if (f.score > score2) {
|
||||
score2 = f.score;
|
||||
fontinfo_id2_ = f.fontinfo_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -234,7 +234,7 @@ bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) {
|
||||
return false;
|
||||
}
|
||||
if (swap) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (uint32_t i = 0; i < size; ++i) {
|
||||
ReverseN(&data[i], sizeof(T));
|
||||
}
|
||||
}
|
||||
|
@ -38,15 +38,15 @@ public:
|
||||
ObjectCache() = default;
|
||||
~ObjectCache() {
|
||||
std::lock_guard<std::mutex> guard(mu_);
|
||||
for (int i = 0; i < cache_.size(); i++) {
|
||||
if (cache_[i].count > 0) {
|
||||
for (auto &it : cache_) {
|
||||
if (it.count > 0) {
|
||||
tprintf(
|
||||
"ObjectCache(%p)::~ObjectCache(): WARNING! LEAK! object %p "
|
||||
"still has count %d (id %s)\n",
|
||||
this, cache_[i].object, cache_[i].count, cache_[i].id.c_str());
|
||||
this, it.object, it.count, it.id.c_str());
|
||||
} else {
|
||||
delete cache_[i].object;
|
||||
cache_[i].object = nullptr;
|
||||
delete it.object;
|
||||
it.object = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -60,11 +60,11 @@ public:
|
||||
T *Get(const std::string &id, std::function<T *()> loader) {
|
||||
T *retval = nullptr;
|
||||
std::lock_guard<std::mutex> guard(mu_);
|
||||
for (int i = 0; i < cache_.size(); i++) {
|
||||
if (id == cache_[i].id) {
|
||||
retval = cache_[i].object;
|
||||
if (cache_[i].object != nullptr) {
|
||||
cache_[i].count++;
|
||||
for (auto &it : cache_) {
|
||||
if (id == it.id) {
|
||||
retval = it.object;
|
||||
if (it.object != nullptr) {
|
||||
it.count++;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
@ -83,9 +83,9 @@ public:
|
||||
if (t == nullptr)
|
||||
return false;
|
||||
std::lock_guard<std::mutex> guard(mu_);
|
||||
for (int i = 0; i < cache_.size(); i++) {
|
||||
if (cache_[i].object == t) {
|
||||
--cache_[i].count;
|
||||
for (auto &it : cache_) {
|
||||
if (it.object == t) {
|
||||
--it.count;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -68,9 +68,9 @@ public:
|
||||
// Uses a linear search.
|
||||
void Add(T value, int count) {
|
||||
// Linear search for value.
|
||||
for (int i = 0; i < counts_.size(); ++i) {
|
||||
if (counts_[i].value == value) {
|
||||
counts_[i].count += count;
|
||||
for (auto &it : counts_) {
|
||||
if (it.value == value) {
|
||||
it.count += count;
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -83,11 +83,11 @@ public:
|
||||
// If the array is empty, returns -INT32_MAX and max_value is unchanged.
|
||||
int MaxCount(T *max_value) const {
|
||||
int best_count = -INT32_MAX;
|
||||
for (int i = 0; i < counts_.size(); ++i) {
|
||||
if (counts_[i].count > best_count) {
|
||||
best_count = counts_[i].count;
|
||||
for (auto &it : counts_) {
|
||||
if (it.count > best_count) {
|
||||
best_count = it.count;
|
||||
if (max_value != nullptr)
|
||||
*max_value = counts_[i].value;
|
||||
*max_value = it.value;
|
||||
}
|
||||
}
|
||||
return best_count;
|
||||
|
@ -212,7 +212,7 @@ int UNICHARSET::step(const char *str) const {
|
||||
// Return whether the given UTF-8 string is encodable with this UNICHARSET.
|
||||
// If not encodable, write the first byte offset which cannot be converted
|
||||
// into the second (return) argument.
|
||||
bool UNICHARSET::encodable_string(const char *str, int *first_bad_position) const {
|
||||
bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position) const {
|
||||
std::vector<UNICHAR_ID> encoding;
|
||||
return encode_string(str, true, &encoding, nullptr, first_bad_position);
|
||||
}
|
||||
@ -228,13 +228,13 @@ bool UNICHARSET::encodable_string(const char *str, int *first_bad_position) cons
|
||||
// Use CleanupString to perform the cleaning.
|
||||
bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
int *encoded_length) const {
|
||||
unsigned *encoded_length) const {
|
||||
std::vector<UNICHAR_ID> working_encoding;
|
||||
std::vector<char> working_lengths;
|
||||
std::vector<char> best_lengths;
|
||||
encoding->clear(); // Just in case str is empty.
|
||||
int str_length = strlen(str);
|
||||
int str_pos = 0;
|
||||
auto str_length = strlen(str);
|
||||
unsigned str_pos = 0;
|
||||
bool perfect = true;
|
||||
while (str_pos < str_length) {
|
||||
encode_string(str, str_pos, str_length, &working_encoding, &working_lengths, &str_pos, encoding,
|
||||
@ -366,8 +366,8 @@ bool UNICHARSET::get_isprivate(UNICHAR_ID unichar_id) const {
|
||||
|
||||
// Sets all ranges to empty, so they can be expanded to set the values.
|
||||
void UNICHARSET::set_ranges_empty() {
|
||||
for (int id = 0; id < unichars.size(); ++id) {
|
||||
unichars[id].properties.SetRangesEmpty();
|
||||
for (auto &uc : unichars) {
|
||||
uc.properties.SetRangesEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
@ -404,7 +404,7 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index, const UNICHARSET
|
||||
// src unicharset with ranges in it. The unicharsets don't have to be the
|
||||
// same, and graphemes are correctly accounted for.
|
||||
void UNICHARSET::ExpandRangesFromOther(const UNICHARSET &src) {
|
||||
for (int ch = 0; ch < unichars.size(); ++ch) {
|
||||
for (unsigned ch = 0; ch < unichars.size(); ++ch) {
|
||||
const char *utf8 = id_to_unichar(ch);
|
||||
UNICHAR_PROPERTIES properties;
|
||||
if (src.GetStrProperties(utf8, &properties)) {
|
||||
@ -418,7 +418,7 @@ void UNICHARSET::ExpandRangesFromOther(const UNICHARSET &src) {
|
||||
// ids will not be present in this if not in src. Does NOT reorder the set!
|
||||
void UNICHARSET::CopyFrom(const UNICHARSET &src) {
|
||||
clear();
|
||||
for (int ch = 0; ch < src.unichars.size(); ++ch) {
|
||||
for (unsigned ch = 0; ch < src.unichars.size(); ++ch) {
|
||||
const UNICHAR_PROPERTIES &src_props = src.unichars[ch].properties;
|
||||
const char *utf8 = src.id_to_unichar(ch);
|
||||
unichar_insert_backwards_compatible(utf8);
|
||||
@ -434,7 +434,7 @@ void UNICHARSET::CopyFrom(const UNICHARSET &src) {
|
||||
// ExpandRangesFromOther.
|
||||
void UNICHARSET::AppendOtherUnicharset(const UNICHARSET &src) {
|
||||
int initial_used = unichars.size();
|
||||
for (int ch = 0; ch < src.unichars.size(); ++ch) {
|
||||
for (unsigned ch = 0; ch < src.unichars.size(); ++ch) {
|
||||
const UNICHAR_PROPERTIES &src_props = src.unichars[ch].properties;
|
||||
const char *utf8 = src.id_to_unichar(ch);
|
||||
int id = unichars.size();
|
||||
@ -470,7 +470,7 @@ bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const {
|
||||
// See unicharset.h for definition of the args.
|
||||
void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
int *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<char> *best_lengths) const {
|
||||
if (str_index > *best_total_length) {
|
||||
// This is the best result so far.
|
||||
@ -519,8 +519,8 @@ bool UNICHARSET::GetStrProperties(const char *utf8_str, UNICHAR_PROPERTIES *prop
|
||||
std::vector<UNICHAR_ID> encoding;
|
||||
if (!encode_string(utf8_str, true, &encoding, nullptr, nullptr))
|
||||
return false; // Some part was invalid.
|
||||
for (int i = 0; i < encoding.size(); ++i) {
|
||||
int id = encoding[i];
|
||||
for (auto it : encoding) {
|
||||
int id = it;
|
||||
const UNICHAR_PROPERTIES &src_props = unichars[id].properties;
|
||||
// Logical OR all the bools.
|
||||
if (src_props.isalpha)
|
||||
@ -888,7 +888,7 @@ void UNICHARSET::post_load_setup() {
|
||||
// not the common script, as that still contains some "alphas".
|
||||
int *script_counts = new int[script_table_size_used];
|
||||
memset(script_counts, 0, sizeof(*script_counts) * script_table_size_used);
|
||||
for (int id = 0; id < unichars.size(); ++id) {
|
||||
for (unsigned id = 0; id < unichars.size(); ++id) {
|
||||
if (get_isalpha(id)) {
|
||||
++script_counts[get_script(id)];
|
||||
}
|
||||
@ -908,7 +908,7 @@ void UNICHARSET::post_load_setup() {
|
||||
bool UNICHARSET::major_right_to_left() const {
|
||||
int ltr_count = 0;
|
||||
int rtl_count = 0;
|
||||
for (int id = 0; id < unichars.size(); ++id) {
|
||||
for (unsigned id = 0; id < unichars.size(); ++id) {
|
||||
int dir = get_direction(id);
|
||||
if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
|
||||
ltr_count++;
|
||||
@ -927,33 +927,33 @@ void UNICHARSET::set_black_and_whitelist(const char *blacklist, const char *whit
|
||||
const char *unblacklist) {
|
||||
bool def_enabled = whitelist == nullptr || whitelist[0] == '\0';
|
||||
// Set everything to default
|
||||
for (int ch = 0; ch < unichars.size(); ++ch)
|
||||
unichars[ch].properties.enabled = def_enabled;
|
||||
for (auto &uc : unichars)
|
||||
uc.properties.enabled = def_enabled;
|
||||
if (!def_enabled) {
|
||||
// Enable the whitelist.
|
||||
std::vector<UNICHAR_ID> encoding;
|
||||
encode_string(whitelist, false, &encoding, nullptr, nullptr);
|
||||
for (int i = 0; i < encoding.size(); ++i) {
|
||||
if (encoding[i] != INVALID_UNICHAR_ID)
|
||||
unichars[encoding[i]].properties.enabled = true;
|
||||
for (auto it : encoding) {
|
||||
if (it != INVALID_UNICHAR_ID)
|
||||
unichars[it].properties.enabled = true;
|
||||
}
|
||||
}
|
||||
if (blacklist != nullptr && blacklist[0] != '\0') {
|
||||
// Disable the blacklist.
|
||||
std::vector<UNICHAR_ID> encoding;
|
||||
encode_string(blacklist, false, &encoding, nullptr, nullptr);
|
||||
for (int i = 0; i < encoding.size(); ++i) {
|
||||
if (encoding[i] != INVALID_UNICHAR_ID)
|
||||
unichars[encoding[i]].properties.enabled = false;
|
||||
for (auto it : encoding) {
|
||||
if (it != INVALID_UNICHAR_ID)
|
||||
unichars[it].properties.enabled = false;
|
||||
}
|
||||
}
|
||||
if (unblacklist != nullptr && unblacklist[0] != '\0') {
|
||||
// Re-enable the unblacklist.
|
||||
std::vector<UNICHAR_ID> encoding;
|
||||
encode_string(unblacklist, false, &encoding, nullptr, nullptr);
|
||||
for (int i = 0; i < encoding.size(); ++i) {
|
||||
if (encoding[i] != INVALID_UNICHAR_ID)
|
||||
unichars[encoding[i]].properties.enabled = true;
|
||||
for (auto it : encoding) {
|
||||
if (it != INVALID_UNICHAR_ID)
|
||||
unichars[it].properties.enabled = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ public:
|
||||
// Returns true if the given UTF-8 string is encodable with this UNICHARSET.
|
||||
// If not encodable, write the first byte offset which cannot be converted
|
||||
// into the second (return) argument.
|
||||
bool encodable_string(const char *str, int *first_bad_position) const;
|
||||
bool encodable_string(const char *str, unsigned *first_bad_position) const;
|
||||
|
||||
// Encodes the given UTF-8 string with this UNICHARSET.
|
||||
// Any part of the string that cannot be encoded (because the utf8 can't
|
||||
@ -238,7 +238,7 @@ public:
|
||||
// that do not belong in the unicharset, or encoding may fail.
|
||||
// Use CleanupString to perform the cleaning.
|
||||
bool encode_string(const char *str, bool give_up_on_failure, std::vector<UNICHAR_ID> *encoding,
|
||||
std::vector<char> *lengths, int *encoded_length) const;
|
||||
std::vector<char> *lengths, unsigned *encoded_length) const;
|
||||
|
||||
// Return the unichar representation corresponding to the given UNICHAR_ID
|
||||
// within the UNICHARSET.
|
||||
@ -294,7 +294,7 @@ public:
|
||||
// Return true if the given unichar id exists within the set.
|
||||
// Relies on the fact that unichar ids are contiguous in the unicharset.
|
||||
bool contains_unichar_id(UNICHAR_ID unichar_id) const {
|
||||
return unichar_id != INVALID_UNICHAR_ID && unichar_id < unichars.size() && unichar_id >= 0;
|
||||
return static_cast<size_t>(unichar_id) < unichars.size();
|
||||
}
|
||||
|
||||
// Return true if the given unichar representation exists within the set.
|
||||
@ -1000,7 +1000,7 @@ private:
|
||||
// best_lengths (may be null) contains the lengths of best_encoding.
|
||||
void encode_string(const char *str, int str_index, int str_length,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
int *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<char> *best_lengths) const;
|
||||
|
||||
// Gets the properties for a grapheme string, combining properties for
|
||||
|
@ -107,7 +107,7 @@ struct ADAPT_RESULTS {
|
||||
best_unichar_id = INVALID_UNICHAR_ID;
|
||||
best_match_index = -1;
|
||||
best_rating = WORST_POSSIBLE_RATING;
|
||||
for (int i = 0; i < match.size(); ++i) {
|
||||
for (unsigned i = 0; i < match.size(); ++i) {
|
||||
if (match[i].rating > best_rating) {
|
||||
best_rating = match[i].rating;
|
||||
best_unichar_id = match[i].unichar_id;
|
||||
@ -145,7 +145,7 @@ inline bool MarginalMatch(float confidence, float matcher_great_threshold) {
|
||||
// Returns the index of the given id in results, if present, or the size of the
|
||||
// vector (index it will go at) if not present.
|
||||
static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS &results) {
|
||||
for (int i = 0; i < results.match.size(); i++) {
|
||||
for (unsigned i = 0; i < results.match.size(); i++) {
|
||||
if (results.match[i].unichar_id == id)
|
||||
return i;
|
||||
}
|
||||
@ -1112,7 +1112,7 @@ void Classify::ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug,
|
||||
if (!unicharset.get_enabled(unichar_id))
|
||||
continue;
|
||||
// Find the mapped_result for unichar_id.
|
||||
int r = 0;
|
||||
unsigned r = 0;
|
||||
for (r = 0; r < mapped_results.size() && mapped_results[r].unichar_id != unichar_id;
|
||||
++r) {
|
||||
}
|
||||
@ -1127,11 +1127,11 @@ void Classify::ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug,
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int m = 0; m < mapped_results.size(); ++m) {
|
||||
mapped_results[m].rating = ComputeCorrectedRating(
|
||||
debug, mapped_results[m].unichar_id, cp_rating, int_result->rating,
|
||||
for (auto &m : mapped_results) {
|
||||
m.rating = ComputeCorrectedRating(
|
||||
debug, m.unichar_id, cp_rating, int_result->rating,
|
||||
int_result->feature_misses, bottom, top, blob_length, matcher_multiplier, cn_factors);
|
||||
AddNewResult(mapped_results[m], final_results);
|
||||
AddNewResult(m, final_results);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -1252,8 +1252,8 @@ int Classify::CharNormClassifier(TBLOB *blob, const TrainingSample &sample,
|
||||
std::vector<UnicharRating> unichar_results;
|
||||
static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0, -1, &unichar_results);
|
||||
// Convert results to the format used internally by AdaptiveClassifier.
|
||||
for (int r = 0; r < unichar_results.size(); ++r) {
|
||||
AddNewResult(unichar_results[r], adapt_results);
|
||||
for (auto &r : unichar_results) {
|
||||
AddNewResult(r, adapt_results);
|
||||
}
|
||||
return sample.num_features();
|
||||
} /* CharNormClassifier */
|
||||
@ -1289,16 +1289,16 @@ int Classify::CharNormTrainingSample(bool pruner_only, int keep_this, const Trai
|
||||
}
|
||||
if (pruner_only) {
|
||||
// Convert pruner results to output format.
|
||||
for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
|
||||
int class_id = adapt_results->CPResults[i].Class;
|
||||
results->push_back(UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
|
||||
for (auto &it : adapt_results->CPResults) {
|
||||
int class_id = it.Class;
|
||||
results->push_back(UnicharRating(class_id, 1.0f - it.Rating));
|
||||
}
|
||||
} else {
|
||||
MasterMatcher(PreTrainedTemplates, num_features, sample.features(), char_norm_array, nullptr,
|
||||
matcher_debug_flags, classify_integer_matcher_multiplier, blob_box,
|
||||
adapt_results->CPResults, adapt_results);
|
||||
// Convert master matcher results to output format.
|
||||
for (int i = 0; i < adapt_results->match.size(); i++) {
|
||||
for (unsigned i = 0; i < adapt_results->match.size(); i++) {
|
||||
results->push_back(adapt_results->match[i]);
|
||||
}
|
||||
if (results->size() > 1) {
|
||||
@ -1358,8 +1358,8 @@ void Classify::ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box,
|
||||
}
|
||||
|
||||
float best_certainty = -FLT_MAX;
|
||||
for (int i = 0; i < Results->match.size(); i++) {
|
||||
const UnicharRating &result = Results->match[i];
|
||||
for (auto &it : Results->match) {
|
||||
const UnicharRating &result = it;
|
||||
bool adapted = result.adapted;
|
||||
bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != nullptr);
|
||||
if (temp_it.length() + 1 == max_matches && !contains_nonfrag && current_is_frag) {
|
||||
@ -1504,7 +1504,6 @@ void Classify::DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results) {
|
||||
UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass) {
|
||||
auto *Results = new ADAPT_RESULTS();
|
||||
UNICHAR_ID *Ambiguities;
|
||||
int i;
|
||||
|
||||
Results->Initialize();
|
||||
INT_FX_RESULT_STRUCT fx_info;
|
||||
@ -1526,6 +1525,7 @@ UNICHAR_ID *Classify::GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass) {
|
||||
Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
|
||||
if (Results->match.size() > 1 ||
|
||||
(Results->match.size() == 1 && Results->match[0].unichar_id != CorrectClass)) {
|
||||
unsigned i;
|
||||
for (i = 0; i < Results->match.size(); i++)
|
||||
Ambiguities[i] = Results->match[i].unichar_id;
|
||||
Ambiguities[i] = -1;
|
||||
@ -1888,9 +1888,9 @@ int MakeTempProtoPerm(void *item1, void *item2) {
|
||||
* Globals: none
|
||||
*/
|
||||
void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS &results) {
|
||||
for (int i = 0; i < results.match.size(); ++i) {
|
||||
tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).c_str());
|
||||
results.match[i].Print();
|
||||
for (auto &it : results.match) {
|
||||
tprintf("%s ", unicharset.debug_str(it.unichar_id).c_str());
|
||||
it.Print();
|
||||
}
|
||||
} /* PrintAdaptiveMatchResults */
|
||||
|
||||
@ -1908,7 +1908,7 @@ void Classify::PrintAdaptiveMatchResults(const ADAPT_RESULTS &results) {
|
||||
* - matcher_bad_match_pad defines a "bad match"
|
||||
*/
|
||||
void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
||||
int Next, NextGood;
|
||||
unsigned Next, NextGood;
|
||||
float BadMatchThreshold;
|
||||
static const char *romans = "i v x I V X";
|
||||
BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
|
||||
@ -1965,7 +1965,7 @@ void Classify::RemoveBadMatches(ADAPT_RESULTS *Results) {
|
||||
* @param Results contains matches to be filtered
|
||||
*/
|
||||
void Classify::RemoveExtraPuncs(ADAPT_RESULTS *Results) {
|
||||
int Next, NextGood;
|
||||
unsigned Next, NextGood;
|
||||
int punc_count; /*no of garbage characters */
|
||||
int digit_count;
|
||||
/*garbage characters */
|
||||
|
@ -110,10 +110,10 @@ public:
|
||||
*/
|
||||
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override {
|
||||
const EDGE_VECTOR &forward_edges = nodes_[static_cast<int>(node)]->forward_edges;
|
||||
for (int i = 0; i < forward_edges.size(); ++i) {
|
||||
if (!word_end || end_of_word_from_edge_rec(forward_edges[i])) {
|
||||
for (auto &edge : forward_edges) {
|
||||
if (!word_end || end_of_word_from_edge_rec(edge)) {
|
||||
vec->push_back(
|
||||
NodeChild(unichar_id_from_edge_rec(forward_edges[i]), make_edge_ref(node, i)));
|
||||
NodeChild(unichar_id_from_edge_rec(edge), make_edge_ref(node, &edge - &forward_edges[0])));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -320,7 +320,7 @@ protected:
|
||||
// At most max_num_edges will be printed for each node.
|
||||
void print_all(const char *msg, int max_num_edges) {
|
||||
tprintf("\n__________________________\n%s\n", msg);
|
||||
for (int i = 0; i < nodes_.size(); ++i)
|
||||
for (size_t i = 0; i < nodes_.size(); ++i)
|
||||
print_node(i, max_num_edges);
|
||||
tprintf("__________________________\n");
|
||||
}
|
||||
|
@ -143,8 +143,8 @@ public:
|
||||
learning_rate_ *= factor;
|
||||
if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
|
||||
std::vector<std::string> layers = EnumerateLayers();
|
||||
for (int i = 0; i < layers.size(); ++i) {
|
||||
ScaleLayerLearningRate(layers[i], factor);
|
||||
for (auto &layer : layers) {
|
||||
ScaleLayerLearningRate(layer, factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -214,7 +214,8 @@ public:
|
||||
template <typename T>
|
||||
class Stack {
|
||||
public:
|
||||
Stack() : stack_top_(0) {}
|
||||
Stack() {
|
||||
}
|
||||
|
||||
~Stack() {
|
||||
for (auto data : stack_) {
|
||||
@ -241,9 +242,9 @@ public:
|
||||
void Return(T *item) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
// Linear search will do.
|
||||
int index = stack_top_ - 1;
|
||||
while (index >= 0 && stack_[index] != item)
|
||||
--index;
|
||||
int index = stack_top_;
|
||||
while (--index >= 0 && stack_[index] != item) {
|
||||
}
|
||||
if (index >= 0)
|
||||
flags_[index] = false;
|
||||
while (stack_top_ > 0 && !flags_[stack_top_ - 1])
|
||||
@ -253,7 +254,7 @@ public:
|
||||
private:
|
||||
std::vector<T *> stack_;
|
||||
std::vector<bool> flags_;
|
||||
int stack_top_;
|
||||
unsigned stack_top_ = 0;
|
||||
std::mutex mutex_;
|
||||
}; // class Stack.
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
// File: parallel.h
|
||||
// Description: Runs networks in parallel on the same input.
|
||||
// Author: Ray Smith
|
||||
// Created: Thu May 02 08:02:06 PST 2013
|
||||
//
|
||||
// (C) Copyright 2013, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -52,8 +51,8 @@ public:
|
||||
if (type_ == NT_REPLICATED) {
|
||||
spec += "R" + std::to_string(stack_.size()) + "(" + stack_[0]->spec();
|
||||
} else {
|
||||
for (int i = 0; i < stack_.size(); ++i)
|
||||
spec += stack_[i]->spec();
|
||||
for (auto &it : stack_)
|
||||
spec += it->spec();
|
||||
}
|
||||
spec += ")";
|
||||
}
|
||||
|
@ -427,10 +427,11 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
if (ratings[i] < ratings[bestPos])
|
||||
bestPos = i;
|
||||
}
|
||||
// TODO: bestCode is currently unused (see commit 2dd5d0d60).
|
||||
int bestCode = -10;
|
||||
for (int i = 0; i < best_nodes.size(); ++i) {
|
||||
if (best_nodes[i]->unichar_id == unichar_ids[bestPos]) {
|
||||
bestCode = best_nodes[i]->code;
|
||||
for (auto &node : best_nodes) {
|
||||
if (node->unichar_id == unichar_ids[bestPos]) {
|
||||
bestCode = node->code;
|
||||
}
|
||||
}
|
||||
// Exclude the best choice for the followup decoding.
|
||||
|
@ -54,9 +54,9 @@ public:
|
||||
to = 'y';
|
||||
}
|
||||
// Change the from char to the to char.
|
||||
for (int i = 0; i < net_spec.length(); ++i) {
|
||||
if (net_spec[i] == from)
|
||||
net_spec[i] = to;
|
||||
for (auto &it : net_spec) {
|
||||
if (it == from)
|
||||
it = to;
|
||||
}
|
||||
spec += net_spec;
|
||||
return spec;
|
||||
|
@ -36,8 +36,8 @@ public:
|
||||
|
||||
std::string spec() const override {
|
||||
std::string spec("[");
|
||||
for (int i = 0; i < stack_.size(); ++i)
|
||||
spec += stack_[i]->spec();
|
||||
for (auto &it : stack_)
|
||||
spec += it->spec();
|
||||
spec += "]";
|
||||
return spec;
|
||||
}
|
||||
|
@ -127,37 +127,37 @@ static bool SafeAtod(const char *str, double *val) {
|
||||
static void PrintCommandLineFlags() {
|
||||
const char *kFlagNamePrefix = "FLAGS_";
|
||||
const int kFlagNamePrefixLen = strlen(kFlagNamePrefix);
|
||||
for (int i = 0; i < GlobalParams()->int_params.size(); ++i) {
|
||||
if (!strncmp(GlobalParams()->int_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) {
|
||||
for (auto ¶m : GlobalParams()->int_params) {
|
||||
if (!strncmp(param->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) {
|
||||
printf(" --%s %s (type:int default:%d)\n",
|
||||
GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen,
|
||||
GlobalParams()->int_params[i]->info_str(), int32_t(*(GlobalParams()->int_params[i])));
|
||||
param->name_str() + kFlagNamePrefixLen,
|
||||
param->info_str(), int32_t(*param));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < GlobalParams()->double_params.size(); ++i) {
|
||||
if (!strncmp(GlobalParams()->double_params[i]->name_str(), kFlagNamePrefix,
|
||||
for (auto ¶m : GlobalParams()->double_params) {
|
||||
if (!strncmp(param->name_str(), kFlagNamePrefix,
|
||||
kFlagNamePrefixLen)) {
|
||||
printf(" --%s %s (type:double default:%g)\n",
|
||||
GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen,
|
||||
GlobalParams()->double_params[i]->info_str(),
|
||||
static_cast<double>(*(GlobalParams()->double_params[i])));
|
||||
param->name_str() + kFlagNamePrefixLen,
|
||||
param->info_str(),
|
||||
static_cast<double>(*param));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < GlobalParams()->bool_params.size(); ++i) {
|
||||
if (!strncmp(GlobalParams()->bool_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) {
|
||||
for (auto ¶m : GlobalParams()->bool_params) {
|
||||
if (!strncmp(param->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) {
|
||||
printf(" --%s %s (type:bool default:%s)\n",
|
||||
GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen,
|
||||
GlobalParams()->bool_params[i]->info_str(),
|
||||
bool(*(GlobalParams()->bool_params[i])) ? "true" : "false");
|
||||
param->name_str() + kFlagNamePrefixLen,
|
||||
param->info_str(),
|
||||
bool(*param) ? "true" : "false");
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < GlobalParams()->string_params.size(); ++i) {
|
||||
if (!strncmp(GlobalParams()->string_params[i]->name_str(), kFlagNamePrefix,
|
||||
for (auto ¶m : GlobalParams()->string_params) {
|
||||
if (!strncmp(param->name_str(), kFlagNamePrefix,
|
||||
kFlagNamePrefixLen)) {
|
||||
printf(" --%s %s (type:string default:%s)\n",
|
||||
GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen,
|
||||
GlobalParams()->string_params[i]->info_str(),
|
||||
GlobalParams()->string_params[i]->c_str());
|
||||
param->name_str() + kFlagNamePrefixLen,
|
||||
param->info_str(),
|
||||
param->c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -229,8 +229,8 @@ float CTC::CalculateBiasFraction() {
|
||||
for (int l = 0; l < num_labels_; ++l) {
|
||||
++truth_counts[labels_[l]];
|
||||
}
|
||||
for (int l = 0; l < output_labels.size(); ++l) {
|
||||
++output_counts[output_labels[l]];
|
||||
for (auto l : output_labels) {
|
||||
++output_counts[l];
|
||||
}
|
||||
// Count the number of true and false positive non-nulls and truth labels.
|
||||
int true_pos = 0, false_pos = 0, total_labels = 0;
|
||||
|
@ -197,10 +197,10 @@ Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool whi
|
||||
if (perspective)
|
||||
GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
|
||||
if (boxes != nullptr) {
|
||||
for (int b = 0; b < boxes->size(); ++b) {
|
||||
(*boxes)[b].scale(1.0f / box_reduction);
|
||||
if ((*boxes)[b].width() <= 0)
|
||||
(*boxes)[b].set_right((*boxes)[b].left() + 1);
|
||||
for (auto &b : *boxes) {
|
||||
b.scale(1.0f / box_reduction);
|
||||
if (b.width() <= 0)
|
||||
b.set_right(b.left() + 1);
|
||||
}
|
||||
}
|
||||
if (invert && randomizer->SignedRand(1.0) < -0)
|
||||
@ -232,16 +232,16 @@ void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix
|
||||
}
|
||||
if (boxes != nullptr) {
|
||||
// Transform the boxes.
|
||||
for (int b = 0; b < boxes->size(); ++b) {
|
||||
for (auto &b : *boxes) {
|
||||
int x1, y1, x2, y2;
|
||||
const TBOX &box = (*boxes)[b];
|
||||
const TBOX &box = b;
|
||||
projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, &y1);
|
||||
projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), &x2, &y2);
|
||||
TBOX new_box1(x1, height - y2, x2, height - y1);
|
||||
projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), &x1, &y1);
|
||||
projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, &y2);
|
||||
TBOX new_box2(x1, height - y1, x2, height - y2);
|
||||
(*boxes)[b] = new_box1.bounding_union(new_box2);
|
||||
b = new_box1.bounding_union(new_box2);
|
||||
}
|
||||
}
|
||||
free(im_coeffs);
|
||||
|
@ -718,7 +718,7 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar
|
||||
tprintf("Empty truth string!\n");
|
||||
return false;
|
||||
}
|
||||
int err_index;
|
||||
unsigned err_index;
|
||||
std::vector<int> internal_labels;
|
||||
labels->clear();
|
||||
if (!simple_text)
|
||||
@ -822,7 +822,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
std::reverse(truth_labels.begin(), truth_labels.end());
|
||||
}
|
||||
}
|
||||
int w = 0;
|
||||
unsigned w = 0;
|
||||
while (w < truth_labels.size() &&
|
||||
(truth_labels[w] == UNICHAR_SPACE || truth_labels[w] == null_char_))
|
||||
++w;
|
||||
|
@ -98,7 +98,7 @@ TEST(UnicharsetTest, Multibyte) {
|
||||
EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 8, 7}));
|
||||
// With the fi ligature the fi is picked out.
|
||||
std::vector<char> lengths;
|
||||
int encoded_length;
|
||||
unsigned encoded_length;
|
||||
std::string src_str = "\u0627\u062c\ufb01\u0635\u062b";
|
||||
// src_str has to be pre-cleaned for lengths to be correct.
|
||||
std::string cleaned = u.CleanupString(src_str.c_str());
|
||||
|
Loading…
Reference in New Issue
Block a user