dict: Fix some signed/unsigned compiler warnings

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-10-09 20:53:52 +02:00
parent bcc71c675a
commit a274f4a531
6 changed files with 36 additions and 46 deletions

View File

@ -44,9 +44,8 @@ const int case_state_table[6][4] = {
int Dict::case_ok(const WERD_CHOICE &word) const {
int state = 0;
int x;
const UNICHARSET *unicharset = word.unicharset();
for (x = 0; x < word.length(); ++x) {
for (unsigned x = 0; x < word.length(); ++x) {
UNICHAR_ID ch_id = word.unichar_id(x);
if (unicharset->get_isupper(ch_id)) {
state = case_state_table[state][1];
@ -69,7 +68,7 @@ bool Dict::absolute_garbage(const WERD_CHOICE &word, const UNICHARSET &unicharse
return false;
}
int num_alphanum = 0;
for (int x = 0; x < word.length(); ++x) {
for (unsigned x = 0; x < word.length(); ++x) {
num_alphanum +=
(unicharset.get_isalpha(word.unichar_id(x)) || unicharset.get_isdigit(word.unichar_id(x)));
}

View File

@ -136,10 +136,7 @@ void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore
}
}
bool Dawg::match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const {
EDGE_REF edge;
int32_t word_end;
bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const {
if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
bool any_matched = false;
NodeChildVector vec;
@ -153,8 +150,8 @@ bool Dawg::match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_
word->set_unichar_id(wildcard, index);
return any_matched;
} else {
word_end = index == word->length() - 1;
edge = edge_char_of(node, word->unichar_id(index), word_end);
auto word_end = index == word->length() - 1;
auto edge = edge_char_of(node, word->unichar_id(index), word_end);
if (edge != NO_EDGE) { // normal edge in DAWG
node = next_node(edge);
if (word_end) {

View File

@ -277,7 +277,7 @@ protected:
/// the *'s in this string are interpreted as wildcards.
/// WERD_CHOICE param is not passed by const so that wildcard searches
/// can modify it and work without having to copy WERD_CHOICEs.
bool match_words(WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const;
bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const;
// Recursively iterate over all words in a dawg (see public iterate_words).
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore,

View File

@ -364,7 +364,7 @@ bool Dict::FinishLoad() {
successors_.reserve(dawgs_.size());
for (auto dawg : dawgs_) {
auto *lst = new SuccessorList();
for (int j = 0; j < dawgs_.size(); ++j) {
for (unsigned j = 0; j < dawgs_.size(); ++j) {
const Dawg *other = dawgs_[j];
if (dawg != nullptr && other != nullptr && (dawg->lang() == other->lang()) &&
kDawgSuccessors[dawg->type()][other->type()]) {
@ -432,7 +432,7 @@ int Dict::def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset,
// Go over the active_dawgs vector and insert DawgPosition records
// with the updated ref (an edge with the corresponding unichar id) into
// dawg_args->updated_pos.
for (int a = 0; a < dawg_args->active_dawgs->size(); ++a) {
for (unsigned a = 0; a < dawg_args->active_dawgs->size(); ++a) {
const DawgPosition &pos = (*dawg_args->active_dawgs)[a];
const Dawg *punc_dawg = pos.punc_index >= 0 ? dawgs_[pos.punc_index] : nullptr;
const Dawg *dawg = pos.dawg_index >= 0 ? dawgs_[pos.dawg_index] : nullptr;
@ -608,11 +608,10 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, UNICHA
// beginning of the word. If hyphenated() returns true, copy the entries
// from hyphen_active_dawgs_ instead.
void Dict::init_active_dawgs(DawgPositionVector *active_dawgs, bool ambigs_mode) const {
int i;
if (hyphenated()) {
*active_dawgs = hyphen_active_dawgs_;
if (dawg_debug_level >= 3) {
for (i = 0; i < hyphen_active_dawgs_.size(); ++i) {
for (unsigned i = 0; i < hyphen_active_dawgs_.size(); ++i) {
tprintf("Adding hyphen beginning dawg [%d, " REFFORMAT "]\n",
hyphen_active_dawgs_[i].dawg_index, hyphen_active_dawgs_[i].dawg_ref);
}
@ -626,7 +625,7 @@ void Dict::default_dawgs(DawgPositionVector *dawg_pos_vec, bool suppress_pattern
bool punc_dawg_available = (punc_dawg_ != nullptr) &&
punc_dawg_->edge_char_of(0, Dawg::kPatternUnicharID, true) != NO_EDGE;
for (int i = 0; i < dawgs_.size(); i++) {
for (unsigned i = 0; i < dawgs_.size(); i++) {
if (dawgs_[i] != nullptr && !(suppress_patterns && (dawgs_[i])->type() == DAWG_TYPE_PATTERN)) {
int dawg_ty = dawgs_[i]->type();
bool subsumed_by_punc = kDawgSuccessors[DAWG_TYPE_PUNCTUATION][dawg_ty];
@ -666,7 +665,7 @@ void Dict::add_document_word(const WERD_CHOICE &best_choice) {
if (best_choice.length() >= kDocDictMaxRepChars) {
int num_rep_chars = 1;
UNICHAR_ID uch_id = best_choice.unichar_id(0);
for (int i = 1; i < best_choice.length(); ++i) {
for (unsigned i = 1; i < best_choice.length(); ++i) {
if (best_choice.unichar_id(i) != uch_id) {
num_rep_chars = 1;
uch_id = best_choice.unichar_id(i);
@ -841,7 +840,7 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) cons
// Extract the core word from the middle of each word with any digits
// replaced with question marks.
int w1start, w1end, w2start, w2end;
unsigned w1start, w1end, w2start, w2end;
word1.punct_stripped(&w1start, &w1end);
word2.punct_stripped(&w2start, &w2end);
@ -857,7 +856,7 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) cons
const UNICHARSET &uchset = getUnicharset();
std::vector<UNICHAR_ID> bigram_string;
bigram_string.reserve(w1end + w2end + 1);
for (int i = w1start; i < w1end; i++) {
for (auto i = w1start; i < w1end; i++) {
const auto &normed_ids = getUnicharset().normed_ids(word1.unichar_id(i));
if (normed_ids.size() == 1 && uchset.get_isdigit(normed_ids[0])) {
bigram_string.push_back(question_unichar_id_);
@ -866,7 +865,7 @@ bool Dict::valid_bigram(const WERD_CHOICE &word1, const WERD_CHOICE &word2) cons
}
}
bigram_string.push_back(UNICHAR_SPACE);
for (int i = w2start; i < w2end; i++) {
for (auto i = w2start; i < w2end; i++) {
const auto &normed_ids = getUnicharset().normed_ids(word2.unichar_id(i));
if (normed_ids.size() == 1 && uchset.get_isdigit(normed_ids[0])) {
bigram_string.push_back(question_unichar_id_);
@ -885,11 +884,10 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
if (word.empty()) {
return NO_PERM;
}
int i;
WERD_CHOICE new_word(word.unicharset());
int last_index = word.length() - 1;
auto last_index = word.length() - 1;
int new_len = 0;
for (i = 0; i <= last_index; ++i) {
for (unsigned i = 0; i <= last_index; ++i) {
UNICHAR_ID unichar_id = (word.unichar_id(i));
if (getUnicharset().get_ispunctuation(unichar_id)) {
new_word.append_unichar_id(unichar_id, 1, 0.0, 0.0);
@ -901,7 +899,7 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) {
new_word.append_unichar_id(Dawg::kPatternUnicharID, 1, 0.0, 0.0);
}
}
for (i = 0; i < dawgs_.size(); ++i) {
for (unsigned i = 0; i < dawgs_.size(); ++i) {
if (dawgs_[i] != nullptr && dawgs_[i]->type() == DAWG_TYPE_PUNCTUATION &&
dawgs_[i]->word_in_dawg(new_word)) {
return true;

View File

@ -164,7 +164,6 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
// Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
// for each unichar id in BestChoice.
BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
int i;
bool ambigs_found = false;
// For each position in best_choice:
// -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
@ -190,7 +189,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
// unichar id for the corresponding position in best_choice.
// best_choice consisting from only the original letters will
// have a rating of 0.0.
for (i = 0; i < best_choice->length(); ++i) {
for (unsigned i = 0; i < best_choice->length(); ++i) {
auto *lst = new BLOB_CHOICE_LIST();
BLOB_CHOICE_IT lst_it(lst);
// TODO(rays/antonova) Put real xheights and y shifts here.
@ -201,10 +200,9 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
}
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
int wrong_ngram_index;
int next_index;
int blob_index = 0;
for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i);
for (unsigned i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
auto curr_unichar_id = best_choice->unichar_id(i);
if (stopper_debug_level > 2) {
tprintf("Looking for %s ngrams starting with %s:\n", replace ? "replaceable" : "ambiguous",
getUnicharset().debug_str(curr_unichar_id).c_str());
@ -212,7 +210,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
int num_wrong_blobs = best_choice->state(i);
wrong_ngram_index = 0;
wrong_ngram[wrong_ngram_index] = curr_unichar_id;
if (curr_unichar_id == INVALID_UNICHAR_ID || curr_unichar_id >= table.size() ||
if (curr_unichar_id == INVALID_UNICHAR_ID || static_cast<size_t>(curr_unichar_id) >= table.size() ||
table[curr_unichar_id] == nullptr) {
continue; // there is no ambig spec for this unichar id
}
@ -272,6 +270,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
}
spec_it.forward();
} else if (compare == -1) {
unsigned next_index;
if (wrong_ngram_index + 1 < ambig_spec->wrong_ngram_size &&
((next_index = wrong_ngram_index + 1 + i) < best_choice->length())) {
// Add the next unichar id to wrong_ngram and keep looking for
@ -293,7 +292,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
if (ambigs_found) {
if (stopper_debug_level > 2) {
tprintf("\nResulting ambig_blob_choices:\n");
for (i = 0; i < ambig_blob_choices.size(); ++i) {
for (unsigned i = 0; i < ambig_blob_choices.size(); ++i) {
print_ratings_list("", ambig_blob_choices.at(i), getUnicharset());
tprintf("\n");
}
@ -310,7 +309,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
// the capability to produce classifications combined from character
// fragments is added to other functions.
int orig_i = 0;
for (i = 0; i < alt_word->length(); ++i) {
for (unsigned i = 0; i < alt_word->length(); ++i) {
const UNICHARSET &uchset = getUnicharset();
bool replacement_is_ngram = uchset.get_isngram(alt_word->unichar_id(i));
UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
@ -444,7 +443,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
int shortest = INT32_MAX;
int curr_len = 0;
for (int w = 0; w < WordChoice.length(); ++w) {
for (unsigned w = 0; w < WordChoice.length(); ++w) {
if (WordChoice.unicharset()->get_isalpha(WordChoice.unichar_id(w))) {
curr_len++;
} else if (curr_len > 0) {

View File

@ -71,7 +71,7 @@ bool Trie::edge_char_of(NODE_REF node_ref, NODE_REF next_node, int direction, bo
if (node_ref == NO_EDGE) {
return false;
}
assert(node_ref < nodes_.size());
assert(static_cast<size_t>(node_ref) < nodes_.size());
EDGE_VECTOR &vec = (direction == FORWARD_EDGE) ? nodes_[node_ref]->forward_edges
: nodes_[node_ref]->backward_edges;
int vec_size = vec.size();
@ -111,7 +111,7 @@ bool Trie::add_edge_linkage(NODE_REF node1, NODE_REF node2, bool marker_flag, in
bool word_end, UNICHAR_ID unichar_id) {
EDGE_VECTOR *vec = (direction == FORWARD_EDGE) ? &(nodes_[node1]->forward_edges)
: &(nodes_[node1]->backward_edges);
int search_index;
unsigned search_index;
if (node1 == 0 && direction == FORWARD_EDGE) {
search_index = 0; // find the index to make the add sorted
while (search_index < vec->size() &&
@ -164,7 +164,7 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *re
ASSERT_HOST(repetitions->size() == word.length());
}
// Make sure the word does not contain invalid unchar ids.
for (int i = 0; i < word.length(); ++i) {
for (unsigned i = 0; i < word.length(); ++i) {
if (word.unichar_id(i) < 0 || word.unichar_id(i) >= unicharset_size_) {
return false;
}
@ -175,7 +175,6 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *re
NODE_REF the_next_node;
bool marker_flag = false;
EDGE_INDEX edge_index;
int i;
int32_t still_finding_chars = true;
int32_t word_end = false;
bool add_failed = false;
@ -186,6 +185,7 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, const std::vector<bool> *re
}
UNICHAR_ID unichar_id;
unsigned i;
for (i = 0; i < word.length() - 1; ++i) {
unichar_id = word.unichar_id(i);
marker_flag = (repetitions != nullptr) ? (*repetitions)[i] : false;
@ -517,8 +517,6 @@ SquishedDawg *Trie::trie_to_dawg() {
// Build a translation map from node indices in nodes_ vector to
// their target indices in EDGE_ARRAY.
std::vector<NODE_REF> node_ref_map(nodes_.size() + 1);
int i, j;
node_ref_map[0] = 0;
for (i = 0; i < nodes_.size(); ++i) {
node_ref_map[i + 1] = node_ref_map[i] + nodes_[i]->forward_edges.size();
}
@ -531,10 +529,10 @@ SquishedDawg *Trie::trie_to_dawg() {
for (i = 0; i < nodes_.size(); ++i) {
TRIE_NODE_RECORD *node_ptr = nodes_[i];
int end = node_ptr->forward_edges.size();
for (j = 0; j < end; ++j) {
for (int j = 0; j < end; ++j) {
EDGE_RECORD &edge_rec = node_ptr->forward_edges[j];
NODE_REF node_ref = next_node_from_edge_rec(edge_rec);
ASSERT_HOST(node_ref < nodes_.size());
ASSERT_HOST(static_cast<size_t>(node_ref) < nodes_.size());
UNICHAR_ID unichar_id = unichar_id_from_edge_rec(edge_rec);
link_edge(edge_array_ptr, node_ref_map[node_ref], false, FORWARD_EDGE,
end_of_word_from_edge_rec(edge_rec), unichar_id);
@ -566,10 +564,9 @@ bool Trie::eliminate_redundant_edges(NODE_REF node, const EDGE_RECORD &edge1,
// Translate all edges going to/from next_node2 to go to/from next_node1.
EDGE_RECORD *edge_ptr = nullptr;
EDGE_INDEX edge_index;
int i;
// The backward link in node to next_node2 will be zeroed out by the caller.
// Copy all the backward links in next_node2 to node next_node1
for (i = 0; i < next_node2_ptr->backward_edges.size(); ++i) {
for (unsigned i = 0; i < next_node2_ptr->backward_edges.size(); ++i) {
const EDGE_RECORD &bkw_edge = next_node2_ptr->backward_edges[i];
NODE_REF curr_next_node = next_node_from_edge_rec(bkw_edge);
UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(bkw_edge);
@ -600,7 +597,7 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, N
}
// Compare each of the edge pairs with the given unichar_id.
bool did_something = false;
for (int i = edge_index; i < backward_edges->size() - 1; ++i) {
for (unsigned i = edge_index; i < backward_edges->size() - 1; ++i) {
// Find the first edge that can be eliminated.
UNICHAR_ID curr_unichar_id = INVALID_UNICHAR_ID;
while (i < backward_edges->size()) {
@ -620,7 +617,7 @@ bool Trie::reduce_lettered_edges(EDGE_INDEX edge_index, UNICHAR_ID unichar_id, N
}
const EDGE_RECORD &edge_rec = (*backward_edges)[i];
// Compare it to the rest of the edges with the given unichar_id.
for (int j = i + 1; j < backward_edges->size(); ++j) {
for (auto j = i + 1; j < backward_edges->size(); ++j) {
const EDGE_RECORD &next_edge_rec = (*backward_edges)[j];
if (DeadEdge(next_edge_rec)) {
continue;
@ -666,7 +663,7 @@ void Trie::reduce_node_input(NODE_REF node, std::vector<bool> &reduced_nodes) {
}
EDGE_INDEX edge_index = 0;
while (edge_index < backward_edges.size()) {
while (static_cast<size_t>(edge_index) < backward_edges.size()) {
if (DeadEdge(backward_edges[edge_index])) {
continue;
}
@ -674,7 +671,7 @@ void Trie::reduce_node_input(NODE_REF node, std::vector<bool> &reduced_nodes) {
while (reduce_lettered_edges(edge_index, unichar_id, node, &backward_edges, reduced_nodes)) {
;
}
while (++edge_index < backward_edges.size()) {
while (static_cast<size_t>(++edge_index) < backward_edges.size()) {
UNICHAR_ID id = unichar_id_from_edge_rec(backward_edges[edge_index]);
if (!DeadEdge(backward_edges[edge_index]) && id != unichar_id) {
break;