mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
dict: Fix some signed/unsigned compiler warnings
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
bcc71c675a
commit
a274f4a531
@ -44,9 +44,8 @@ const int case_state_table[6][4] = {
|
||||
|
||||
int Dict::case_ok(const WERD_CHOICE &word) const {
|
||||
int state = 0;
|
||||
int x;
|
||||
const UNICHARSET *unicharset = word.unicharset();
|
||||
for (x = 0; x < word.length(); ++x) {
|
||||
for (unsigned x = 0; x < word.length(); ++x) {
|
||||
UNICHAR_ID ch_id = word.unichar_id(x);
|
||||
if (unicharset->get_isupper(ch_id)) {
|
||||
state = case_state_table[state][1];
|
||||
@ -69,7 +68,7 @@ bool Dict::absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharse
|
||||
return false;
|
||||
}
|
||||
int num_alphanum = 0;
|
||||
for (int x = 0; x < word.length(); ++x) {
|
||||
for (unsigned x = 0; x < word.length(); ++x) {
|
||||
num_alphanum +=
|
||||
(unicharset.get_isalpha(word.unichar_id(x)) || unicharset.get_isdigit(word.unichar_id(x)));
|
||||
}
|
||||
|
@ -136,10 +136,7 @@ void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore
|
||||
}
|
||||
}
|
||||
|
||||
bool Dawg::match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const {
|
||||
EDGE_REF edge;
|
||||
int32_t word_end;
|
||||
|
||||
bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const {
|
||||
if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
|
||||
bool any_matched = false;
|
||||
NodeChildVector vec;
|
||||
@ -153,8 +150,8 @@ bool Dawg::match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_
|
||||
word->set_unichar_id(wildcard, index);
|
||||
return any_matched;
|
||||
} else {
|
||||
word_end = index == word->length() - 1;
|
||||
edge = edge_char_of(node, word->unichar_id(index), word_end);
|
||||
auto word_end = index == word->length() - 1;
|
||||
auto edge = edge_char_of(node, word->unichar_id(index), word_end);
|
||||
if (edge != NO_EDGE) { // normal edge in DAWG
|
||||
node = next_node(edge);
|
||||
if (word_end) {
|
||||
|
@ -277,7 +277,7 @@ protected:
|
||||
/// the *'s in this string are interpreted as wildcards.
|
||||
/// WERD_CHOICE param is not passed by const so that wildcard searches
|
||||
/// can modify it and work without having to copy WERD_CHOICEs.
|
||||
bool match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const;
|
||||
bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const;
|
||||
|
||||
// Recursively iterate over all words in a dawg (see public iterate_words).
|
||||
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore,
|
||||
|
@ -364,7 +364,7 @@ bool Dict::FinishLoad() {
|
||||
successors_.reserve(dawgs_.size());
|
||||
for (auto dawg : dawgs_) {
|
||||
auto *lst = new SuccessorList();
|
||||
for (int j = 0; j < dawgs_.size(); ++j) {
|
||||
for (unsigned j = 0; j < dawgs_.size(); ++j) {
|
||||
const Dawg *other = dawgs_[j];
|
||||
if (dawg != nullptr && other != nullptr && (dawg->lang() == other->lang()) &&
|
||||
kDawgSuccessors[dawg->type()][other->type()]) {
|
||||
@ -432,7 +432,7 @@ int Dict::def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset,
|
||||
// Go over the active_dawgs vector and insert DawgPosition records
|
||||
// with the updated ref (an edge with the corresponding unichar id) into
|
||||
// dawg_args->updated_pos.
|
||||
for (int a = 0; a < dawg_args->active_dawgs->size(); ++a) {
|
||||
for (unsigned a = 0; a < dawg_args->active_dawgs->size(); ++a) {
|
||||
const DawgPosition &pos = (*dawg_args->active_dawgs)[a];
|
||||
const Dawg *punc_dawg = pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr;
|
||||
const Dawg *dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr;
|
||||
@ -608,11 +608,10 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, UNICHA
|
||||
// beginning of the word. If hyphenated() returns true, copy the entries
|
||||
// from hyphen_active_dawgs_ instead.
|
||||
void Dict::init_active_dawgs(DawgPositionVector *active_dawgs, bool ambigs_mode) const {
|
||||
int i;
|
||||
if (hyphenated()) {
|
||||
*active_dawgs = hyphen_active_dawgs_;
|
||||
if (dawg_debug_level >= 3) {
|
||||
for (i = 0; i < hyphen_active_dawgs_.size(); ++i) {
|
||||
for (unsigned i = 0; i < hyphen_active_dawgs_.size(); ++i) {
|
||||
tprintf("Adding hyphen beginning dawg [%d, " REFFORMAT "]\n",
|
||||
hyphen_active_dawgs_[i].dawg_index, hyphen_active_dawgs_[i].dawg_ref);
|
||||
}
|
||||
@ -626,7 +625,7 @@ void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec, bool suppress_pattern
|
||||
bool punc_dawg_available = (punc_dawg_ != nullptr) &&
|
||||
punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE;
|
||||
|
||||
for (int i = 0; i < dawgs_.size(); i++) {
|
||||
for (unsigned i = 0; i < dawgs_.size(); i++) {
|
||||
if (dawgs_[i] != nullptr && !(suppress_patterns && (dawgs_[i])->type() == DAWG_TYPE_PATTERN)) {
|
||||
int dawg_ty = dawgs_[i]->type();
|
||||
bool subsumed_by_punc = kDawgSuccessors[DAWG_TYPE_PUNCTUATION][dawg_ty];
|
||||
@ -666,7 +665,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
|
||||
if (best_choice.length() >= kDocDictMaxRepChars) {
|
||||
int num_rep_chars = 1;
|
||||
UNICHAR_ID uch_id = best_choice.unichar_id(0);
|
||||
for (int i = 1; i < best_choice.length(); ++i) {
|
||||
for (unsigned i = 1; i < best_choice.length(); ++i) {
|
||||
if (best_choice.unichar_id(i) != uch_id) {
|
||||
num_rep_chars = 1;
|
||||
uch_id = best_choice.unichar_id(i);
|
||||
@ -841,7 +840,7 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) cons
|
||||
|
||||
// Extract the core word from the middle of each word with any digits
|
||||
// replaced with question marks.
|
||||
int w1start, w1end, w2start, w2end;
|
||||
unsigned w1start, w1end, w2start, w2end;
|
||||
word1.punct_stripped(&w1start, &w1end);
|
||||
word2.punct_stripped(&w2start, &w2end);
|
||||
|
||||
@ -857,7 +856,7 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) cons
|
||||
const UNICHARSET &uchset = getUnicharset();
|
||||
std::vector<UNICHAR_ID> bigram_string;
|
||||
bigram_string.reserve(w1end + w2end + 1);
|
||||
for (int i = w1start; i < w1end; i++) {
|
||||
for (auto i = w1start; i < w1end; i++) {
|
||||
const auto &normed_ids = getUnicharset().normed_ids(word1.unichar_id(i));
|
||||
if (normed_ids.size() == 1 && uchset.get_isdigit(normed_ids[0])) {
|
||||
bigram_string.push_back(question_unichar_id_);
|
||||
@ -866,7 +865,7 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) cons
|
||||
}
|
||||
}
|
||||
bigram_string.push_back(UNICHAR_SPACE);
|
||||
for (int i = w2start; i < w2end; i++) {
|
||||
for (auto i = w2start; i < w2end; i++) {
|
||||
const auto &normed_ids = getUnicharset().normed_ids(word2.unichar_id(i));
|
||||
if (normed_ids.size() == 1 && uchset.get_isdigit(normed_ids[0])) {
|
||||
bigram_string.push_back(question_unichar_id_);
|
||||
@ -885,11 +884,10 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
|
||||
if (word.empty()) {
|
||||
return NO_PERM;
|
||||
}
|
||||
int i;
|
||||
WERD_CHOICE new_word(word.unicharset());
|
||||
int last_index = word.length() - 1;
|
||||
auto last_index = word.length() - 1;
|
||||
int new_len = 0;
|
||||
for (i = 0; i <= last_index; ++i) {
|
||||
for (unsigned i = 0; i <= last_index; ++i) {
|
||||
UNICHAR_ID unichar_id = (word.unichar_id(i));
|
||||
if (getUnicharset().get_ispunctuation(unichar_id)) {
|
||||
new_word.append_unichar_id(unichar_id, 1, 0.0, 0.0);
|
||||
@ -901,7 +899,7 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
|
||||
new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < dawgs_.size(); ++i) {
|
||||
for (unsigned i = 0; i < dawgs_.size(); ++i) {
|
||||
if (dawgs_[i] != nullptr && dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION &&
|
||||
dawgs_[i]->word_in_dawg(new_word)) {
|
||||
return true;
|
||||
|
@ -164,7 +164,6 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
// Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
|
||||
// for each unichar id in BestChoice.
|
||||
BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
|
||||
int i;
|
||||
bool ambigs_found = false;
|
||||
// For each position in best_choice:
|
||||
// -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
|
||||
@ -190,7 +189,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
// unichar id for the corresponding position in best_choice.
|
||||
// best_choice consisting from only the original letters will
|
||||
// have a rating of 0.0.
|
||||
for (i = 0; i < best_choice->length(); ++i) {
|
||||
for (unsigned i = 0; i < best_choice->length(); ++i) {
|
||||
auto *lst = new BLOB_CHOICE_LIST();
|
||||
BLOB_CHOICE_IT lst_it(lst);
|
||||
// TODO(rays/antonova) Put real xheights and y shifts here.
|
||||
@ -201,10 +200,9 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
}
|
||||
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
|
||||
int wrong_ngram_index;
|
||||
int next_index;
|
||||
int blob_index = 0;
|
||||
for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
|
||||
UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i);
|
||||
for (unsigned i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
|
||||
auto curr_unichar_id = best_choice->unichar_id(i);
|
||||
if (stopper_debug_level > 2) {
|
||||
tprintf("Looking for %s ngrams starting with %s:\n", replace ? "replaceable" : "ambiguous",
|
||||
getUnicharset().debug_str(curr_unichar_id).c_str());
|
||||
@ -212,7 +210,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
int num_wrong_blobs = best_choice->state(i);
|
||||
wrong_ngram_index = 0;
|
||||
wrong_ngram[wrong_ngram_index] = curr_unichar_id;
|
||||
if (curr_unichar_id == INVALID_UNICHAR_ID || curr_unichar_id >= table.size() ||
|
||||
if (curr_unichar_id == INVALID_UNICHAR_ID || static_cast<size_t>(curr_unichar_id) >= table.size() ||
|
||||
table[curr_unichar_id] == nullptr) {
|
||||
continue; // there is no ambig spec for this unichar id
|
||||
}
|
||||
@ -272,6 +270,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
}
|
||||
spec_it.forward();
|
||||
} else if (compare == -1) {
|
||||
unsigned next_index;
|
||||
if (wrong_ngram_index + 1 < ambig_spec->wrong_ngram_size &&
|
||||
((next_index = wrong_ngram_index + 1 + i) < best_choice->length())) {
|
||||
// Add the next unichar id to wrong_ngram and keep looking for
|
||||
@ -293,7 +292,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
if (ambigs_found) {
|
||||
if (stopper_debug_level > 2) {
|
||||
tprintf("\nResulting ambig_blob_choices:\n");
|
||||
for (i = 0; i < ambig_blob_choices.size(); ++i) {
|
||||
for (unsigned i = 0; i < ambig_blob_choices.size(); ++i) {
|
||||
print_ratings_list("", ambig_blob_choices.at(i), getUnicharset());
|
||||
tprintf("\n");
|
||||
}
|
||||
@ -310,7 +309,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
|
||||
// the capability to produce classifications combined from character
|
||||
// fragments is added to other functions.
|
||||
int orig_i = 0;
|
||||
for (i = 0; i < alt_word->length(); ++i) {
|
||||
for (unsigned i = 0; i < alt_word->length(); ++i) {
|
||||
const UNICHARSET &uchset = getUnicharset();
|
||||
bool replacement_is_ngram = uchset.get_isngram(alt_word->unichar_id(i));
|
||||
UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
|
||||
@ -444,7 +443,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
|
||||
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
|
||||
int shortest = INT32_MAX;
|
||||
int curr_len = 0;
|
||||
for (int w = 0; w < WordChoice.length(); ++w) {
|
||||
for (unsigned w = 0; w < WordChoice.length(); ++w) {
|
||||
if (WordChoice.unicharset()->get_isalpha(WordChoice.unichar_id(w))) {
|
||||
curr_len++;
|
||||
} else if (curr_len > 0) {
|
||||
|
@ -71,7 +71,7 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bo
|
||||
if (node_ref == NO_EDGE) {
|
||||
return false;
|
||||
}
|
||||
assert(node_ref < nodes_.size());
|
||||
assert(static_cast<size_t>(node_ref) < nodes_.size());
|
||||
EDGE_VECTOR &vec = (direction == FORWARD_EDGE) ? nodes_[node_ref]->forward_edges
|
||||
: nodes_[node_ref]->backward_edges;
|
||||
int vec_size = vec.size();
|
||||
@ -111,7 +111,7 @@ bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, in
|
||||
bool word_end, UNICHAR_ID unichar_id) {
|
||||
EDGE_VECTOR *vec = (direction == FORWARD_EDGE) ? &(nodes_[node1]->forward_edges)
|
||||
: &(nodes_[node1]->backward_edges);
|
||||
int search_index;
|
||||
unsigned search_index;
|
||||
if (node1 == 0 && direction == FORWARD_EDGE) {
|
||||
search_index = 0; // find the index to make the add sorted
|
||||
while (search_index < vec->size() &&
|
||||
@ -164,7 +164,7 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *re
|
||||
ASSERT_HOST(repetitions->size() == word.length());
|
||||
}
|
||||
// Make sure the word does not contain invalid unchar ids.
|
||||
for (int i = 0; i < word.length(); ++i) {
|
||||
for (unsigned i = 0; i < word.length(); ++i) {
|
||||
if (word.unichar_id(i) < 0 || word.unichar_id(i) >= unicharset_size_) {
|
||||
return false;
|
||||
}
|
||||
@ -175,7 +175,6 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *re
|
||||
NODE_REF the_next_node;
|
||||
bool marker_flag = false;
|
||||
EDGE_INDEX edge_index;
|
||||
int i;
|
||||
int32_t still_finding_chars = true;
|
||||
int32_t word_end = false;
|
||||
bool add_failed = false;
|
||||
@ -186,6 +185,7 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *re
|
||||
}
|
||||
|
||||
UNICHAR_ID unichar_id;
|
||||
unsigned i;
|
||||
for (i = 0; i < word.length() - 1; ++i) {
|
||||
unichar_id = word.unichar_id(i);
|
||||
marker_flag = (repetitions != nullptr) ? (*repetitions)[i] : false;
|
||||
@ -517,8 +517,6 @@ SquishedDawg *Trie::trie_to_dawg() {
|
||||
// Build a translation map from node indices in nodes_ vector to
|
||||
// their target indices in EDGE_ARRAY.
|
||||
std::vector<NODE_REF> node_ref_map(nodes_.size() + 1);
|
||||
int i, j;
|
||||
node_ref_map[0] = 0;
|
||||
for (i = 0; i < nodes_.size(); ++i) {
|
||||
node_ref_map[i + 1] = node_ref_map[i] + nodes_[i]->forward_edges.size();
|
||||
}
|
||||
@ -531,10 +529,10 @@ SquishedDawg *Trie::trie_to_dawg() {
|
||||
for (i = 0; i < nodes_.size(); ++i) {
|
||||
TRIE_NODE_RECORD *node_ptr = nodes_[i];
|
||||
int end = node_ptr->forward_edges.size();
|
||||
for (j = 0; j < end; ++j) {
|
||||
for (int j = 0; j < end; ++j) {
|
||||
EDGE_RECORD &edge_rec = node_ptr->forward_edges[j];
|
||||
NODE_REF node_ref = next_node_from_edge_rec(edge_rec);
|
||||
ASSERT_HOST(node_ref < nodes_.size());
|
||||
ASSERT_HOST(static_cast<size_t>(node_ref) < nodes_.size());
|
||||
UNICHAR_ID unichar_id = unichar_id_from_edge_rec(edge_rec);
|
||||
link_edge(edge_array_ptr, node_ref_map[node_ref], false, FORWARD_EDGE,
|
||||
end_of_word_from_edge_rec(edge_rec), unichar_id);
|
||||
@ -566,10 +564,9 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1,
|
||||
// Translate all edges going to/from next_node2 to go to/from next_node1.
|
||||
EDGE_RECORD *edge_ptr = nullptr;
|
||||
EDGE_INDEX edge_index;
|
||||
int i;
|
||||
// The backward link in node to next_node2 will be zeroed out by the caller.
|
||||
// Copy all the backward links in next_node2 to node next_node1
|
||||
for (i = 0; i < next_node2_ptr->backward_edges.size(); ++i) {
|
||||
for (unsigned i = 0; i < next_node2_ptr->backward_edges.size(); ++i) {
|
||||
const EDGE_RECORD &bkw_edge = next_node2_ptr->backward_edges[i];
|
||||
NODE_REF curr_next_node = next_node_from_edge_rec(bkw_edge);
|
||||
UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(bkw_edge);
|
||||
@ -600,7 +597,7 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, N
|
||||
}
|
||||
// Compare each of the edge pairs with the given unichar_id.
|
||||
bool did_something = false;
|
||||
for (int i = edge_index; i < backward_edges->size() - 1; ++i) {
|
||||
for (unsigned i = edge_index; i < backward_edges->size() - 1; ++i) {
|
||||
// Find the first edge that can be eliminated.
|
||||
UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID;
|
||||
while (i < backward_edges->size()) {
|
||||
@ -620,7 +617,7 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, N
|
||||
}
|
||||
const EDGE_RECORD &edge_rec = (*backward_edges)[i];
|
||||
// Compare it to the rest of the edges with the given unichar_id.
|
||||
for (int j = i + 1; j < backward_edges->size(); ++j) {
|
||||
for (auto j = i + 1; j < backward_edges->size(); ++j) {
|
||||
const EDGE_RECORD &next_edge_rec = (*backward_edges)[j];
|
||||
if (DeadEdge(next_edge_rec)) {
|
||||
continue;
|
||||
@ -666,7 +663,7 @@ void Trie::reduce_node_input(NODE_REF node, std::vector<bool> &reduced_nodes) {
|
||||
}
|
||||
|
||||
EDGE_INDEX edge_index = 0;
|
||||
while (edge_index < backward_edges.size()) {
|
||||
while (static_cast<size_t>(edge_index) < backward_edges.size()) {
|
||||
if (DeadEdge(backward_edges[edge_index])) {
|
||||
continue;
|
||||
}
|
||||
@ -674,7 +671,7 @@ void Trie::reduce_node_input(NODE_REF node, std::vector<bool> &reduced_nodes) {
|
||||
while (reduce_lettered_edges(edge_index, unichar_id, node, &backward_edges, reduced_nodes)) {
|
||||
;
|
||||
}
|
||||
while (++edge_index < backward_edges.size()) {
|
||||
while (static_cast<size_t>(++edge_index) < backward_edges.size()) {
|
||||
UNICHAR_ID id = unichar_id_from_edge_rec(backward_edges[edge_index]);
|
||||
if (!DeadEdge(backward_edges[edge_index]) && id != unichar_id) {
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user