mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 14:41:36 +08:00
Fewer g++ -Wsign-compare warnings (cont.)
This commit is contained in:
parent
c335508e84
commit
7f382df5ec
@ -51,7 +51,7 @@ void BoxChar::AddBox(int x, int y, int width, int height) {
|
||||
/* static */
|
||||
void BoxChar::TranslateBoxes(int xshift, int yshift,
|
||||
std::vector<BoxChar*>* boxes) {
|
||||
for (int i = 0; i < boxes->size(); ++i) {
|
||||
for (size_t i = 0; i < boxes->size(); ++i) {
|
||||
BOX* box = (*boxes)[i]->box_;
|
||||
if (box != nullptr) {
|
||||
box->x += xshift;
|
||||
@ -68,8 +68,8 @@ void BoxChar::PrepareToWrite(std::vector<BoxChar*>* boxes) {
|
||||
bool vertical_rules = MostlyVertical(*boxes);
|
||||
InsertNewlines(rtl_rules, vertical_rules, boxes);
|
||||
InsertSpaces(rtl_rules, vertical_rules, boxes);
|
||||
for (int i = 0; i < boxes->size(); ++i) {
|
||||
if ((*boxes)[i]->box_ == nullptr) tprintf("Null box at index %d\n", i);
|
||||
for (unsigned int i = 0; i < boxes->size(); ++i) {
|
||||
if ((*boxes)[i]->box_ == nullptr) tprintf("Null box at index %u\n", i);
|
||||
}
|
||||
if (rtl_rules) {
|
||||
ReorderRTLText(boxes);
|
||||
@ -82,16 +82,16 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules,
|
||||
std::vector<BoxChar*>* boxes) {
|
||||
int prev_i = -1;
|
||||
int max_shift = 0;
|
||||
for (int i = 0; i < boxes->size(); ++i) {
|
||||
for (int i = 0; static_cast<unsigned int>(i) < boxes->size(); ++i) {
|
||||
Box* box = (*boxes)[i]->box_;
|
||||
if (box == nullptr) {
|
||||
if (prev_i < 0 || prev_i < i - 1 || i + 1 == boxes->size()) {
|
||||
if (prev_i < 0 || prev_i < i - 1 || static_cast<unsigned int>(i) + 1 == boxes->size()) {
|
||||
// Erase null boxes at the start of a line and after another null box.
|
||||
do {
|
||||
delete (*boxes)[i];
|
||||
boxes->erase(boxes->begin() + i);
|
||||
--i;
|
||||
} while (i >= 0 && i + 1 == boxes->size() &&
|
||||
} while (i >= 0 && static_cast<unsigned int>(i) + 1 == boxes->size() &&
|
||||
(*boxes)[i]->box_ == nullptr);
|
||||
}
|
||||
continue;
|
||||
@ -146,7 +146,7 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
||||
std::vector<BoxChar*>* boxes) {
|
||||
// After InsertNewlines, any remaining null boxes are not newlines, and are
|
||||
// singletons, so add a box to each remaining null box.
|
||||
for (int i = 1; i + 1 < boxes->size(); ++i) {
|
||||
for (int i = 1; static_cast<unsigned int>(i) + 1 < boxes->size(); ++i) {
|
||||
Box* box = (*boxes)[i]->box_;
|
||||
if (box == nullptr) {
|
||||
Box* prev = (*boxes)[i - 1]->box_;
|
||||
@ -178,8 +178,8 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules,
|
||||
}
|
||||
// Left becomes the max right of all next boxes forward to the first
|
||||
// space or newline.
|
||||
for (int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != nullptr &&
|
||||
(*boxes)[j]->ch_ != "\t";
|
||||
for (size_t j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != nullptr &&
|
||||
(*boxes)[j]->ch_ != "\t";
|
||||
++j) {
|
||||
next = (*boxes)[j]->box_;
|
||||
if (next->x + next->w > left) {
|
||||
@ -203,8 +203,8 @@ void BoxChar::ReorderRTLText(std::vector<BoxChar*>* boxes) {
|
||||
// After adding newlines and spaces, this task is simply a matter of sorting
|
||||
// by left each group of boxes between newlines.
|
||||
BoxCharPtrSort sorter;
|
||||
int end = 0;
|
||||
for (int start = 0; start < boxes->size(); start = end + 1) {
|
||||
size_t end = 0;
|
||||
for (size_t start = 0; start < boxes->size(); start = end + 1) {
|
||||
end = start + 1;
|
||||
while (end < boxes->size() && (*boxes)[end]->ch_ != "\t") ++end;
|
||||
std::sort(boxes->begin() + start, boxes->begin() + end, sorter);
|
||||
@ -215,13 +215,13 @@ void BoxChar::ReorderRTLText(std::vector<BoxChar*>* boxes) {
|
||||
/* static */
|
||||
bool BoxChar::ContainsMostlyRTL(const std::vector<BoxChar*>& boxes) {
|
||||
int num_rtl = 0, num_ltr = 0;
|
||||
for (int i = 0; i < boxes.size(); ++i) {
|
||||
for (unsigned int i = 0; i < boxes.size(); ++i) {
|
||||
// Convert the unichar to UTF32 representation
|
||||
GenericVector<char32> uni_vector;
|
||||
if (!UNICHAR::UTF8ToUnicode(boxes[i]->ch_.c_str(), &uni_vector)) {
|
||||
tprintf("Illegal utf8 in boxchar %d string:%s = ", i,
|
||||
tprintf("Illegal utf8 in boxchar %u string:%s = ", i,
|
||||
boxes[i]->ch_.c_str());
|
||||
for (int c = 0; c < boxes[i]->ch_.size(); ++c) {
|
||||
for (size_t c = 0; c < boxes[i]->ch_.size(); ++c) {
|
||||
tprintf(" 0x%x", boxes[i]->ch_[c]);
|
||||
}
|
||||
tprintf("\n");
|
||||
@ -244,7 +244,7 @@ bool BoxChar::ContainsMostlyRTL(const std::vector<BoxChar*>& boxes) {
|
||||
/* static */
|
||||
bool BoxChar::MostlyVertical(const std::vector<BoxChar*>& boxes) {
|
||||
inT64 total_dx = 0, total_dy = 0;
|
||||
for (int i = 1; i < boxes.size(); ++i) {
|
||||
for (size_t i = 1; i < boxes.size(); ++i) {
|
||||
if (boxes[i - 1]->box_ != nullptr && boxes[i]->box_ != nullptr &&
|
||||
boxes[i - 1]->page_ == boxes[i]->page_) {
|
||||
int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
|
||||
@ -263,7 +263,7 @@ bool BoxChar::MostlyVertical(const std::vector<BoxChar*>& boxes) {
|
||||
/* static */
|
||||
int BoxChar::TotalByteLength(const std::vector<BoxChar*>& boxes) {
|
||||
int total_length = 0;
|
||||
for (int i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
|
||||
for (size_t i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
|
||||
return total_length;
|
||||
}
|
||||
|
||||
@ -302,7 +302,7 @@ string BoxChar::GetTesseractBoxStr(int height,
|
||||
const std::vector<BoxChar*>& boxes) {
|
||||
string output;
|
||||
char buffer[kMaxLineLength];
|
||||
for (int i = 0; i < boxes.size(); ++i) {
|
||||
for (size_t i = 0; i < boxes.size(); ++i) {
|
||||
const Box* box = boxes[i]->box_;
|
||||
if (box == nullptr) {
|
||||
tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "tprintf.h"
|
||||
#include "unicity_table.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
using tesseract::CCUtil;
|
||||
@ -368,7 +369,9 @@ void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
|
||||
LABELEDLIST char_sample;
|
||||
FEATURE_SET feature_samples;
|
||||
CHAR_DESC char_desc;
|
||||
int feature_type = ShortNameToFeatureType(feature_defs, feature_name);
|
||||
int ShortNameToFeatureType_res = ShortNameToFeatureType(feature_defs, feature_name);
|
||||
assert(0 <= ShortNameToFeatureType_res);
|
||||
unsigned int feature_type = static_cast<unsigned int>(ShortNameToFeatureType_res);
|
||||
// Zero out the font_sample_count for all the classes.
|
||||
LIST it = *training_samples;
|
||||
iterate(it) {
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include "normstrngs.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include "icuerrorcode.h"
|
||||
#include "unichar.h"
|
||||
#include "unicode/normalizer2.h" // From libicu
|
||||
@ -181,7 +182,13 @@ bool IsWhitespace(const char32 ch) {
|
||||
}
|
||||
|
||||
bool IsUTF8Whitespace(const char* text) {
|
||||
#if 0 // intent
|
||||
return SpanUTF8Whitespace(text) == strlen(text);
|
||||
#else // avoiding g++ -Wsign-compare warning
|
||||
const int res = SpanUTF8Whitespace(text);
|
||||
assert(0 <= res);
|
||||
return static_cast<unsigned int>(res) == strlen(text);
|
||||
#endif
|
||||
}
|
||||
|
||||
int SpanUTF8Whitespace(const char* text) {
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include "stringrenderer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
@ -241,7 +242,7 @@ void StringRenderer::SetWordUnderlineAttributes(const string& page_text) {
|
||||
PangoAttrList* attr_list = pango_layout_get_attributes(layout_);
|
||||
|
||||
const char* text = page_text.c_str();
|
||||
int offset = 0;
|
||||
size_t offset = 0;
|
||||
TRand rand;
|
||||
bool started_underline = false;
|
||||
PangoAttribute* und_attr = nullptr;
|
||||
@ -341,7 +342,7 @@ void StringRenderer::RotatePageBoxes(float rotation) {
|
||||
|
||||
|
||||
void StringRenderer::ClearBoxes() {
|
||||
for (int i = 0; i < boxchars_.size(); ++i)
|
||||
for (size_t i = 0; i < boxchars_.size(); ++i)
|
||||
delete boxchars_[i];
|
||||
boxchars_.clear();
|
||||
boxaDestroy(&page_boxes_);
|
||||
@ -416,7 +417,7 @@ bool StringRenderer::GetClusterStrings(std::vector<string>* cluster_text) {
|
||||
static void MergeBoxCharsToWords(std::vector<BoxChar*>* boxchars) {
|
||||
std::vector<BoxChar*> result;
|
||||
bool started_word = false;
|
||||
for (int i = 0; i < boxchars->size(); ++i) {
|
||||
for (size_t i = 0; i < boxchars->size(); ++i) {
|
||||
if (boxchars->at(i)->ch() == " " || boxchars->at(i)->box() == nullptr) {
|
||||
result.push_back(boxchars->at(i));
|
||||
boxchars->at(i) = nullptr;
|
||||
@ -480,7 +481,7 @@ void StringRenderer::ComputeClusterBoxes() {
|
||||
// Sort the indices and create a map from start to end indices.
|
||||
std::sort(cluster_start_indices.begin(), cluster_start_indices.end());
|
||||
std::map<int, int> cluster_start_to_end_index;
|
||||
for (int i = 0; i < cluster_start_indices.size() - 1; ++i) {
|
||||
for (size_t i = 0; i + 1 < cluster_start_indices.size(); ++i) {
|
||||
cluster_start_to_end_index[cluster_start_indices[i]]
|
||||
= cluster_start_indices[i + 1];
|
||||
}
|
||||
@ -592,7 +593,7 @@ void StringRenderer::ComputeClusterBoxes() {
|
||||
// Compute the page bounding box
|
||||
Box* page_box = nullptr;
|
||||
Boxa* all_boxes = nullptr;
|
||||
for (int i = 0; i < page_boxchars.size(); ++i) {
|
||||
for (size_t i = 0; i < page_boxchars.size(); ++i) {
|
||||
if (page_boxchars[i]->box() == nullptr) continue;
|
||||
if (all_boxes == nullptr) all_boxes = boxaCreate(0);
|
||||
boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
|
||||
@ -622,7 +623,7 @@ void StringRenderer::CorrectBoxPositionsToLayout(
|
||||
int StringRenderer::StripUnrenderableWords(string* utf8_text) const {
|
||||
string output_text;
|
||||
const char* text = utf8_text->c_str();
|
||||
int offset = 0;
|
||||
size_t offset = 0;
|
||||
int num_dropped = 0;
|
||||
while (offset < utf8_text->length()) {
|
||||
int space_len = SpanUTF8Whitespace(text + offset);
|
||||
@ -866,7 +867,8 @@ int StringRenderer::RenderAllFontsToImage(double min_coverage,
|
||||
tprintf("Total chars = %d\n", total_chars_);
|
||||
}
|
||||
const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
|
||||
for (int i = font_index_; i < all_fonts.size(); ++i) {
|
||||
assert(0 <= font_index_);
|
||||
for (unsigned int i = static_cast<unsigned int>(font_index_); i < all_fonts.size(); ++i) {
|
||||
++font_index_;
|
||||
int raw_score = 0;
|
||||
int ok_chars =
|
||||
|
@ -190,14 +190,12 @@ static bool IsWhitespaceBox(const BoxChar* boxchar) {
|
||||
static string StringReplace(const string& in,
|
||||
const string& oldsub, const string& newsub) {
|
||||
string out;
|
||||
int start_pos = 0;
|
||||
do {
|
||||
int pos = in.find(oldsub, start_pos);
|
||||
if (pos == string::npos) break;
|
||||
size_t start_pos = 0, pos;
|
||||
while ((pos = in.find(oldsub, start_pos)) != string::npos) {
|
||||
out.append(in.data() + start_pos, pos - start_pos);
|
||||
out.append(newsub.data(), newsub.length());
|
||||
start_pos = pos + oldsub.length();
|
||||
} while (true);
|
||||
}
|
||||
out.append(in.data() + start_pos, in.length() - start_pos);
|
||||
return out;
|
||||
}
|
||||
@ -239,7 +237,7 @@ void ExtractFontProperties(const string &utf8_text,
|
||||
offset -= boxes[boxes.size() - 1]->ch().size();
|
||||
}
|
||||
|
||||
for (int b = 0; b < boxes.size(); b += 2) {
|
||||
for (size_t b = 0; b < boxes.size(); b += 2) {
|
||||
while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b;
|
||||
if (b + 1 >= boxes.size()) break;
|
||||
const string &ch0 = boxes[b]->ch();
|
||||
@ -422,8 +420,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
if (FLAGS_list_available_fonts) {
|
||||
const std::vector<string>& all_fonts = FontUtils::ListAvailableFonts();
|
||||
for (int i = 0; i < all_fonts.size(); ++i) {
|
||||
printf("%3d: %s\n", i, all_fonts[i].c_str());
|
||||
for (unsigned int i = 0; i < all_fonts.size(); ++i) {
|
||||
printf("%3u: %s\n", i, all_fonts[i].c_str());
|
||||
ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
|
||||
"Font %s is unrecognized.\n", all_fonts[i].c_str());
|
||||
}
|
||||
@ -517,10 +515,10 @@ int main(int argc, char** argv) {
|
||||
// Try to preserve behavior of old text2image by expanding inter-word
|
||||
// spaces by a factor of 4.
|
||||
const string kSeparator = FLAGS_render_ngrams ? " " : " ";
|
||||
// Also restrict the number of charactes per line to try and avoid
|
||||
// Also restrict the number of characters per line to try and avoid
|
||||
// line-breaking in the middle of words like "-A", "R$" etc. which are
|
||||
// otherwise allowed by the standard unicode line-breaking rules.
|
||||
const int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
|
||||
const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
|
||||
string rand_utf8;
|
||||
UNICHARSET unicharset;
|
||||
if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
|
||||
@ -547,7 +545,7 @@ int main(int argc, char** argv) {
|
||||
if (FLAGS_render_ngrams)
|
||||
std::random_shuffle(offsets.begin(), offsets.end());
|
||||
|
||||
for (int i = 0, line = 1; i < offsets.size(); ++i) {
|
||||
for (size_t i = 0, line = 1; i < offsets.size(); ++i) {
|
||||
const char *curr_pos = str8 + offsets[i].first;
|
||||
int ngram_len = offsets[i].second;
|
||||
// Skip words that contain characters not in found in unicharset.
|
||||
@ -588,7 +586,7 @@ int main(int argc, char** argv) {
|
||||
for (int pass = 0; pass < num_pass; ++pass) {
|
||||
int page_num = 0;
|
||||
string font_used;
|
||||
for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
|
||||
for (size_t offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
|
||||
tlog(1, "Starting page %d\n", im);
|
||||
Pix* pix = nullptr;
|
||||
if (FLAGS_find_fonts) {
|
||||
@ -664,7 +662,7 @@ int main(int argc, char** argv) {
|
||||
if (fp == nullptr) {
|
||||
tprintf("Failed to create output font list %s\n", filename.c_str());
|
||||
} else {
|
||||
for (int i = 0; i < font_names.size(); ++i) {
|
||||
for (size_t i = 0; i < font_names.size(); ++i) {
|
||||
fprintf(fp, "%s\n", font_names[i].c_str());
|
||||
}
|
||||
fclose(fp);
|
||||
|
Loading…
Reference in New Issue
Block a user