mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-24 02:59:07 +08:00
Fix more signed/unsigned compiler warnings
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
86d981eee6
commit
842cca1d49
@ -2,7 +2,6 @@
|
||||
* File: mod128.h (Formerly dir128.h)
|
||||
* Description: Header for class which implements modulo arithmetic.
|
||||
* Author: Ray Smith
|
||||
* Created: Tue Mar 26 17:48:13 GMT 1991
|
||||
*
|
||||
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -81,7 +80,6 @@ public:
|
||||
return dir;
|
||||
}
|
||||
|
||||
private:
|
||||
int8_t dir; // a direction
|
||||
};
|
||||
|
||||
|
@ -362,7 +362,7 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID id = best_choice->unichar_id(blob_index);
|
||||
if (id < 0 || id >= uch_set->size()) {
|
||||
if (static_cast<unsigned>(id) >= uch_set->size()) {
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID mirrored = uch_set->get_mirror(id);
|
||||
@ -377,7 +377,7 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
UNICHAR_ID id = raw_choice->unichar_id(blob_index);
|
||||
if (id < 0 || id >= uch_set->size()) {
|
||||
if (static_cast<unsigned>(id) >= uch_set->size()) {
|
||||
return nullptr;
|
||||
}
|
||||
return uch_set->id_to_unichar(id);
|
||||
@ -395,8 +395,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
for (unsigned id = 0; id < best_choice->length(); id++) {
|
||||
int unichar_id = best_choice->unichar_id(id);
|
||||
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
|
||||
unsigned unichar_id = best_choice->unichar_id(id);
|
||||
if (unichar_id >= uch_set->size()) {
|
||||
continue; // Ignore illegal chars.
|
||||
}
|
||||
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
|
||||
@ -412,8 +412,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
for (unsigned id = 0; id < best_choice->length(); id++) {
|
||||
int unichar_id = best_choice->unichar_id(id);
|
||||
if (unichar_id < 0 || unichar_id >= uch_set->size()) {
|
||||
unsigned unichar_id = best_choice->unichar_id(id);
|
||||
if (unichar_id >= uch_set->size()) {
|
||||
continue; // Ignore illegal chars.
|
||||
}
|
||||
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
|
||||
|
@ -49,7 +49,7 @@ AmbigSpec::AmbigSpec() {
|
||||
|
||||
// Initializes the ambigs by adding a nullptr pointer to each table.
|
||||
void UnicharAmbigs::InitUnicharAmbigs(const UNICHARSET &unicharset, bool use_ambigs_for_adaption) {
|
||||
for (int i = 0; i < unicharset.size(); ++i) {
|
||||
for (unsigned i = 0; i < unicharset.size(); ++i) {
|
||||
replace_ambigs_.push_back(nullptr);
|
||||
dang_ambigs_.push_back(nullptr);
|
||||
one_to_one_definite_ambigs_.push_back(nullptr);
|
||||
@ -72,7 +72,6 @@ void UnicharAmbigs::LoadUniversal(const UNICHARSET &encoder_set, UNICHARSET *uni
|
||||
void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambig_file,
|
||||
int debug_level, bool use_ambigs_for_adaption,
|
||||
UNICHARSET *unicharset) {
|
||||
int i, j;
|
||||
UnicharIdVector *adaption_ambigs_entry;
|
||||
if (debug_level) {
|
||||
tprintf("Reading ambiguities\n");
|
||||
@ -130,7 +129,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
// Silently ignore invalid strings, as before, so it is safe to use a
|
||||
// universal ambigs file.
|
||||
if (unicharset->encode_string(replacement_string, true, &encoding, nullptr, nullptr)) {
|
||||
for (i = 0; i < test_ambig_part_size; ++i) {
|
||||
for (int i = 0; i < test_ambig_part_size; ++i) {
|
||||
if (ambigs_for_adaption_[test_unichar_ids[i]] == nullptr) {
|
||||
ambigs_for_adaption_[test_unichar_ids[i]] = new UnicharIdVector();
|
||||
}
|
||||
@ -139,6 +138,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
ASSERT_HOST(id_to_insert != INVALID_UNICHAR_ID);
|
||||
// Add the new unichar id to adaption_ambigs_entry (only if the
|
||||
// vector does not already contain it) keeping it in sorted order.
|
||||
size_t j;
|
||||
for (j = 0;
|
||||
j < adaption_ambigs_entry->size() && (*adaption_ambigs_entry)[j] > id_to_insert;
|
||||
++j) {
|
||||
@ -160,12 +160,12 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
|
||||
// Fill in reverse_ambigs_for_adaption from ambigs_for_adaption vector.
|
||||
if (use_ambigs_for_adaption) {
|
||||
for (i = 0; i < ambigs_for_adaption_.size(); ++i) {
|
||||
for (size_t i = 0; i < ambigs_for_adaption_.size(); ++i) {
|
||||
adaption_ambigs_entry = ambigs_for_adaption_[i];
|
||||
if (adaption_ambigs_entry == nullptr) {
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < adaption_ambigs_entry->size(); ++j) {
|
||||
for (size_t j = 0; j < adaption_ambigs_entry->size(); ++j) {
|
||||
UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j];
|
||||
if (reverse_ambigs_for_adaption_[ambig_id] == nullptr) {
|
||||
reverse_ambigs_for_adaption_[ambig_id] = new UnicharIdVector();
|
||||
@ -179,7 +179,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
if (debug_level > 1) {
|
||||
for (int tbl = 0; tbl < 2; ++tbl) {
|
||||
const UnicharAmbigsVector &print_table = (tbl == 0) ? replace_ambigs_ : dang_ambigs_;
|
||||
for (i = 0; i < print_table.size(); ++i) {
|
||||
for (size_t i = 0; i < print_table.size(); ++i) {
|
||||
AmbigSpec_LIST *lst = print_table[i];
|
||||
if (lst == nullptr) {
|
||||
continue;
|
||||
@ -202,12 +202,12 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
|
||||
for (int vec_id = 0; vec_id < 2; ++vec_id) {
|
||||
const std::vector<UnicharIdVector *> &vec =
|
||||
(vec_id == 0) ? ambigs_for_adaption_ : reverse_ambigs_for_adaption_;
|
||||
for (i = 0; i < vec.size(); ++i) {
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
adaption_ambigs_entry = vec[i];
|
||||
if (adaption_ambigs_entry != nullptr) {
|
||||
tprintf("%sAmbigs for adaption for %s:\n", (vec_id == 0) ? "" : "Reverse ",
|
||||
unicharset->debug_str(i).c_str());
|
||||
for (j = 0; j < adaption_ambigs_entry->size(); ++j) {
|
||||
for (size_t j = 0; j < adaption_ambigs_entry->size(); ++j) {
|
||||
tprintf("%s ", unicharset->debug_str((*adaption_ambigs_entry)[j]).c_str());
|
||||
}
|
||||
tprintf("\n");
|
||||
@ -246,7 +246,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
|
||||
return false;
|
||||
}
|
||||
// Copy encoded string to output.
|
||||
for (int i = 0; i < unichars.size(); ++i) {
|
||||
for (size_t i = 0; i < unichars.size(); ++i) {
|
||||
test_unichar_ids[i] = unichars[i];
|
||||
}
|
||||
test_unichar_ids[unichars.size()] = INVALID_UNICHAR_ID;
|
||||
|
@ -222,8 +222,8 @@ bool IndexMapBiDi::Serialize(FILE *fp) const {
|
||||
// then each additional sparse entry needs to be stored.
|
||||
// Normally we store only the compact map to save space.
|
||||
std::vector<int32_t> remaining_pairs;
|
||||
for (size_t i = 0; i < sparse_map_.size(); ++i) {
|
||||
if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
|
||||
for (unsigned i = 0; i < sparse_map_.size(); ++i) {
|
||||
if (sparse_map_[i] >= 0 && static_cast<unsigned>(compact_map_[sparse_map_[i]]) != i) {
|
||||
remaining_pairs.push_back(i);
|
||||
remaining_pairs.push_back(sparse_map_[i]);
|
||||
}
|
||||
@ -243,7 +243,7 @@ bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
|
||||
}
|
||||
sparse_map_.clear();
|
||||
sparse_map_.resize(sparse_size_, -1);
|
||||
for (size_t i = 0; i < compact_map_.size(); ++i) {
|
||||
for (unsigned i = 0; i < compact_map_.size(); ++i) {
|
||||
sparse_map_[compact_map_[i]] = i;
|
||||
}
|
||||
for (size_t i = 0; i < remaining_pairs.size(); ++i) {
|
||||
|
@ -61,7 +61,7 @@ static bool DecodeRadicalLine(std::string &radical_data_line, RSMap *radical_map
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<std::vector<int>> radicals(new std::vector<int>);
|
||||
for (int i = 1; i < entries.size(); ++i) {
|
||||
for (size_t i = 1; i < entries.size(); ++i) {
|
||||
int radical = strtol(&entries[i][0], &end, 10);
|
||||
if (*end != '\0') {
|
||||
return false;
|
||||
@ -132,10 +132,10 @@ bool UnicharCompress::ComputeEncoding(const UNICHARSET &unicharset, int null_id,
|
||||
// to measure the number of radicals and strokes, initially we use the same
|
||||
// code range for all 3 Han code positions, and fix them after.
|
||||
int han_offset = hangul_offset + kTotalJamos;
|
||||
for (int u = 0; u <= unicharset.size(); ++u) {
|
||||
for (unsigned u = 0; u <= unicharset.size(); ++u) {
|
||||
// We special-case allow null_id to be equal to unicharset.size() in case
|
||||
// there is no space in unicharset for it.
|
||||
if (u == unicharset.size() && u != null_id) {
|
||||
if (u == unicharset.size() && static_cast<int>(u) != null_id) {
|
||||
break; // Finished
|
||||
}
|
||||
RecodedCharID code;
|
||||
@ -173,7 +173,7 @@ bool UnicharCompress::ComputeEncoding(const UNICHARSET &unicharset, int null_id,
|
||||
// Special cases.
|
||||
if (u == UNICHAR_SPACE) {
|
||||
code.Set(0, 0); // Space.
|
||||
} else if (u == null_id ||
|
||||
} else if (static_cast<int>(u) == null_id ||
|
||||
(unicharset.has_special_codes() && u < SPECIAL_UNICHAR_CODES_COUNT)) {
|
||||
code.Set(0, direct_set.unichar_to_id(kNullChar));
|
||||
} else {
|
||||
@ -207,7 +207,7 @@ bool UnicharCompress::ComputeEncoding(const UNICHARSET &unicharset, int null_id,
|
||||
int code_offset = 0;
|
||||
for (int i = 0; i < RecodedCharID::kMaxCodeLen; ++i) {
|
||||
int max_offset = 0;
|
||||
for (int u = 0; u < unicharset.size(); ++u) {
|
||||
for (unsigned u = 0; u < unicharset.size(); ++u) {
|
||||
RecodedCharID *code = &encoder_[u];
|
||||
if (code->length() <= i) {
|
||||
continue;
|
||||
@ -229,7 +229,7 @@ bool UnicharCompress::ComputeEncoding(const UNICHARSET &unicharset, int null_id,
|
||||
// passes them through unchanged.
|
||||
void UnicharCompress::SetupPassThrough(const UNICHARSET &unicharset) {
|
||||
std::vector<RecodedCharID> codes;
|
||||
for (int u = 0; u < unicharset.size(); ++u) {
|
||||
for (unsigned u = 0; u < unicharset.size(); ++u) {
|
||||
RecodedCharID code;
|
||||
code.Set(0, u);
|
||||
codes.push_back(code);
|
||||
@ -268,7 +268,7 @@ void UnicharCompress::DefragmentCodeValues(int encoded_null) {
|
||||
for (unsigned i = 0; i < offsets.size(); ++i) {
|
||||
// If not used, decrement everything above here.
|
||||
// We are moving encoded_null to the end, so it is not "used".
|
||||
if (offsets[i] == 0 || i == encoded_null) {
|
||||
if (offsets[i] == 0 || i == static_cast<unsigned>(encoded_null)) {
|
||||
--offset;
|
||||
} else {
|
||||
offsets[i] = offset;
|
||||
@ -292,8 +292,8 @@ void UnicharCompress::DefragmentCodeValues(int encoded_null) {
|
||||
|
||||
// Encodes a single unichar_id. Returns the length of the code, or zero if
|
||||
// invalid input, and the encoding itself
|
||||
int UnicharCompress::EncodeUnichar(int unichar_id, RecodedCharID *code) const {
|
||||
if (unichar_id < 0 || unichar_id >= encoder_.size()) {
|
||||
int UnicharCompress::EncodeUnichar(unsigned unichar_id, RecodedCharID *code) const {
|
||||
if (unichar_id >= encoder_.size()) {
|
||||
return 0;
|
||||
}
|
||||
*code = encoder_[unichar_id];
|
||||
@ -397,7 +397,7 @@ void UnicharCompress::SetupDecoder() {
|
||||
Cleanup();
|
||||
is_valid_start_.clear();
|
||||
is_valid_start_.resize(code_range_);
|
||||
for (int c = 0; c < encoder_.size(); ++c) {
|
||||
for (unsigned c = 0; c < encoder_.size(); ++c) {
|
||||
const RecodedCharID &code = encoder_[c];
|
||||
decoder_[code] = c;
|
||||
is_valid_start_[code(0)] = true;
|
||||
|
@ -174,7 +174,7 @@ public:
|
||||
|
||||
// Encodes a single unichar_id. Returns the length of the code, (or zero if
|
||||
// invalid input), and the encoding itself in code.
|
||||
int EncodeUnichar(int unichar_id, RecodedCharID *code) const;
|
||||
int EncodeUnichar(unsigned unichar_id, RecodedCharID *code) const;
|
||||
// Decodes code, returning the original unichar-id, or
|
||||
// INVALID_UNICHAR_ID if the input is invalid.
|
||||
int DecodeUnichar(const RecodedCharID &code) const;
|
||||
|
@ -272,7 +272,7 @@ const char *UNICHARSET::id_to_unichar(UNICHAR_ID id) const {
|
||||
if (id == INVALID_UNICHAR_ID) {
|
||||
return INVALID_UNICHAR;
|
||||
}
|
||||
ASSERT_HOST(id < this->size());
|
||||
ASSERT_HOST(static_cast<unsigned>(id) < this->size());
|
||||
return unichars[id].representation;
|
||||
}
|
||||
|
||||
@ -280,7 +280,7 @@ const char *UNICHARSET::id_to_unichar_ext(UNICHAR_ID id) const {
|
||||
if (id == INVALID_UNICHAR_ID) {
|
||||
return INVALID_UNICHAR;
|
||||
}
|
||||
ASSERT_HOST(id < this->size());
|
||||
ASSERT_HOST(static_cast<unsigned>(id) < this->size());
|
||||
// Resolve from the kCustomLigatures table if this is a private encoding.
|
||||
if (get_isprivate(id)) {
|
||||
const char *ch = id_to_unichar(id);
|
||||
@ -384,7 +384,7 @@ void UNICHARSET::set_ranges_empty() {
|
||||
// everything set. The unicharsets don't have to be the same, and graphemes
|
||||
// are correctly accounted for.
|
||||
void UNICHARSET::PartialSetPropertiesFromOther(int start_index, const UNICHARSET &src) {
|
||||
for (int ch = start_index; ch < unichars.size(); ++ch) {
|
||||
for (unsigned ch = start_index; ch < unichars.size(); ++ch) {
|
||||
const char *utf8 = id_to_unichar(ch);
|
||||
UNICHAR_PROPERTIES properties;
|
||||
if (src.GetStrProperties(utf8, &properties)) {
|
||||
@ -481,7 +481,7 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<char> *best_lengths) const {
|
||||
if (str_index > *best_total_length) {
|
||||
if (str_index > static_cast<int>(*best_total_length)) {
|
||||
// This is the best result so far.
|
||||
*best_total_length = str_index;
|
||||
*best_encoding = *encoding;
|
||||
@ -506,7 +506,7 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
|
||||
lengths->push_back(length);
|
||||
encode_string(str, str_index + length, str_length, encoding, lengths, best_total_length,
|
||||
best_encoding, best_lengths);
|
||||
if (*best_total_length == str_length) {
|
||||
if (static_cast<int>(*best_total_length) == str_length) {
|
||||
return; // Tail recursion success!
|
||||
}
|
||||
// Failed with that length, truncate back and try again.
|
||||
@ -695,9 +695,9 @@ bool UNICHARSET::eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
|
||||
bool UNICHARSET::save_to_string(std::string &str) const {
|
||||
const int kFileBufSize = 1024;
|
||||
char buffer[kFileBufSize + 1];
|
||||
snprintf(buffer, kFileBufSize, "%d\n", this->size());
|
||||
snprintf(buffer, kFileBufSize, "%zu\n", this->size());
|
||||
str = buffer;
|
||||
for (UNICHAR_ID id = 0; id < this->size(); ++id) {
|
||||
for (unsigned id = 0; id < this->size(); ++id) {
|
||||
int min_bottom, max_bottom, min_top, max_top;
|
||||
get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
|
||||
float width, width_sd;
|
||||
|
@ -283,7 +283,7 @@ public:
|
||||
if (cleaned != unichar_repr) {
|
||||
unichar_insert(unichar_repr, OldUncleanUnichars::kTrue);
|
||||
} else {
|
||||
int old_size = size();
|
||||
auto old_size = size();
|
||||
unichar_insert(unichar_repr, OldUncleanUnichars::kFalse);
|
||||
if (size() == old_size) {
|
||||
unichar_insert(unichar_repr, OldUncleanUnichars::kTrue);
|
||||
@ -345,7 +345,7 @@ public:
|
||||
}
|
||||
|
||||
// Return the size of the set (the number of different UNICHAR it holds).
|
||||
int size() const {
|
||||
size_t size() const {
|
||||
return unichars.size();
|
||||
}
|
||||
|
||||
|
@ -99,7 +99,7 @@ ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) {
|
||||
NumNonEmptyClasses = 0;
|
||||
|
||||
/* Insert an empty class for each unichar id in unicharset */
|
||||
for (int i = 0; i < MAX_NUM_CLASSES; i++) {
|
||||
for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
|
||||
Class[i] = nullptr;
|
||||
if (i < unicharset.size()) {
|
||||
AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i);
|
||||
@ -108,7 +108,7 @@ ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) {
|
||||
}
|
||||
|
||||
ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() {
|
||||
for (int i = 0; i < (Templates)->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
|
||||
delete Class[i];
|
||||
}
|
||||
delete Templates;
|
||||
@ -160,11 +160,11 @@ void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templat
|
||||
fprintf(File, " Id NC NPC NP NPP\n");
|
||||
fprintf(File, "------------------------\n");
|
||||
|
||||
for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
IClass = Templates->Templates->Class[i];
|
||||
AClass = Templates->Class[i];
|
||||
if (!IsEmptyAdaptedClass(AClass)) {
|
||||
fprintf(File, "%5d %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
|
||||
fprintf(File, "%5u %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
|
||||
AClass->NumPermConfigs, IClass->NumProtos,
|
||||
IClass->NumProtos - AClass->TempProtos->size());
|
||||
}
|
||||
@ -242,7 +242,7 @@ ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) {
|
||||
Templates->Templates = ReadIntTemplates(fp);
|
||||
|
||||
/* then read in the adaptive info for each class */
|
||||
for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
Templates->Class[i] = ReadAdaptedClass(fp);
|
||||
}
|
||||
return (Templates);
|
||||
@ -343,8 +343,6 @@ void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
|
||||
* @note Globals: none
|
||||
*/
|
||||
void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
|
||||
int i;
|
||||
|
||||
/* first write the high level adaptive template struct */
|
||||
fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
|
||||
|
||||
@ -352,7 +350,7 @@ void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templat
|
||||
WriteIntTemplates(File, Templates->Templates, unicharset);
|
||||
|
||||
/* then write out the adaptive info for each class */
|
||||
for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
|
||||
}
|
||||
} /* WriteAdaptedTemplates */
|
||||
|
@ -578,7 +578,7 @@ void Classify::InitAdaptiveClassifier(TessdataManager *mgr) {
|
||||
tprintf("\n");
|
||||
PrintAdaptedTemplates(stdout, AdaptedTemplates);
|
||||
|
||||
for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
|
||||
BaselineCutoffs[i] = CharNormCutoffs[i];
|
||||
}
|
||||
}
|
||||
@ -1294,7 +1294,7 @@ int Classify::CharNormTrainingSample(bool pruner_only, int keep_this, const Trai
|
||||
// Compute the char_norm_array from the saved cn_feature.
|
||||
FEATURE norm_feature = sample.GetCNFeature();
|
||||
std::vector<uint8_t> char_norm_array(unicharset.size());
|
||||
int num_pruner_classes = std::max(unicharset.size(), PreTrainedTemplates->NumClasses);
|
||||
auto num_pruner_classes = std::max(static_cast<unsigned>(unicharset.size()), PreTrainedTemplates->NumClasses);
|
||||
std::vector<uint8_t> pruner_norm_array(num_pruner_classes);
|
||||
adapt_results->BlobLength = static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
|
||||
ComputeCharNormArrays(norm_feature, PreTrainedTemplates, &char_norm_array[0], &pruner_norm_array[0]);
|
||||
@ -1631,7 +1631,7 @@ void Classify::ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES
|
||||
memset(&pruner_array[0], UINT8_MAX, templates->NumClasses * sizeof(pruner_array[0]));
|
||||
// Each entry in the pruner norm array is the MIN of all the entries of
|
||||
// the corresponding unichars in the CharNormArray.
|
||||
for (int id = 0; id < templates->NumClasses; ++id) {
|
||||
for (unsigned id = 0; id < templates->NumClasses; ++id) {
|
||||
int font_set_id = templates->Class[id]->font_set_id;
|
||||
const FontSet &fs = fontset_table_.at(font_set_id);
|
||||
for (unsigned config = 0; config < fs.size(); ++config) {
|
||||
@ -2114,7 +2114,7 @@ int Classify::ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_confi
|
||||
// Converts a shape_table_ index to a classifier class_id index (not a
|
||||
// unichar-id!). Uses a search, so not fast.
|
||||
int Classify::ShapeIDToClassID(int shape_id) const {
|
||||
for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
|
||||
for (unsigned id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
|
||||
int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
|
||||
ASSERT_HOST(font_set_id >= 0);
|
||||
const FontSet &fs = fontset_table_.at(font_set_id);
|
||||
|
@ -57,7 +57,7 @@ void Classify::ClearCharNormArray(uint8_t *char_norm_array) {
|
||||
*/
|
||||
void Classify::ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature,
|
||||
uint8_t *char_norm_array) {
|
||||
for (int i = 0; i < unicharset.size(); i++) {
|
||||
for (unsigned i = 0; i < unicharset.size(); i++) {
|
||||
if (i < PreTrainedTemplates->NumClasses) {
|
||||
int norm_adjust =
|
||||
static_cast<int>(INT_CHAR_NORM_RANGE * ComputeNormMatch(i, norm_feature, false));
|
||||
|
@ -165,7 +165,7 @@ public:
|
||||
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features,
|
||||
const INT_FEATURE_STRUCT *features) {
|
||||
num_features_ = num_features;
|
||||
int num_pruners = int_templates->NumClassPruners;
|
||||
auto num_pruners = int_templates->NumClassPruners;
|
||||
for (int f = 0; f < num_features; ++f) {
|
||||
const INT_FEATURE_STRUCT *feature = &features[f];
|
||||
// Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
|
||||
@ -175,7 +175,7 @@ public:
|
||||
int class_id = 0;
|
||||
// Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
|
||||
// we need a collection of them, indexed by pruner_set.
|
||||
for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
|
||||
for (unsigned pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
|
||||
// Look up quantized feature in a 3-D array, an array of weights for
|
||||
// each class.
|
||||
const uint32_t *pruner_word_ptr = int_templates->ClassPruners[pruner_set]->p[x][y][theta];
|
||||
|
@ -221,7 +221,7 @@ void AddIntClass(INT_TEMPLATES_STRUCT *Templates, CLASS_ID ClassId, INT_CLASS_ST
|
||||
int Pruner;
|
||||
|
||||
assert(LegalClassId(ClassId));
|
||||
if (ClassId != Templates->NumClasses) {
|
||||
if (static_cast<unsigned>(ClassId) != Templates->NumClasses) {
|
||||
fprintf(stderr,
|
||||
"Please make sure that classes are added to templates"
|
||||
" in increasing order of ClassIds\n");
|
||||
@ -491,13 +491,12 @@ INT_TEMPLATES_STRUCT *Classify::CreateIntTemplates(CLASSES FloatProtos,
|
||||
const UNICHARSET &target_unicharset) {
|
||||
CLASS_TYPE FClass;
|
||||
INT_CLASS_STRUCT *IClass;
|
||||
int ClassId;
|
||||
int ProtoId;
|
||||
int ConfigId;
|
||||
|
||||
auto IntTemplates = new INT_TEMPLATES_STRUCT;
|
||||
|
||||
for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
|
||||
for (unsigned ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
|
||||
FClass = &(FloatProtos[ClassId]);
|
||||
if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
|
||||
strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
|
||||
@ -613,10 +612,10 @@ INT_TEMPLATES_STRUCT::INT_TEMPLATES_STRUCT() {
|
||||
}
|
||||
|
||||
INT_TEMPLATES_STRUCT::~INT_TEMPLATES_STRUCT() {
|
||||
for (int i = 0; i < NumClasses; i++) {
|
||||
for (unsigned i = 0; i < NumClasses; i++) {
|
||||
delete Class[i];
|
||||
}
|
||||
for (int i = 0; i < NumClassPruners; i++) {
|
||||
for (unsigned i = 0; i < NumClassPruners; i++) {
|
||||
delete ClassPruners[i];
|
||||
}
|
||||
}
|
||||
@ -630,9 +629,7 @@ INT_TEMPLATES_STRUCT::~INT_TEMPLATES_STRUCT() {
|
||||
* @note Globals: none
|
||||
*/
|
||||
INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
int i, j, w, x, y, z;
|
||||
int unicharset_size;
|
||||
int version_id = 0;
|
||||
int j, w, x, y, z;
|
||||
INT_TEMPLATES_STRUCT *Templates;
|
||||
CLASS_PRUNER_STRUCT *Pruner;
|
||||
INT_CLASS_STRUCT *Class;
|
||||
@ -645,25 +642,29 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
uint32_t SetBitsForMask = // word with NUM_BITS_PER_CLASS
|
||||
(1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
|
||||
uint32_t Mask, NewMask, ClassBits;
|
||||
int MaxNumConfigs = MAX_NUM_CONFIGS;
|
||||
int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
|
||||
unsigned MaxNumConfigs = MAX_NUM_CONFIGS;
|
||||
unsigned WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
|
||||
|
||||
/* first read the high level template struct */
|
||||
Templates = new INT_TEMPLATES_STRUCT;
|
||||
// Read Templates in parts for 64 bit compatibility.
|
||||
uint32_t unicharset_size;
|
||||
if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1) != 1 ||
|
||||
int32_t version_id = 0;
|
||||
if (fp->FReadEndian(&version_id, sizeof(version_id), 1) != 1 ||
|
||||
fp->FReadEndian(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners), 1) != 1) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
if (Templates->NumClasses < 0) {
|
||||
if (version_id < 0) {
|
||||
// This file has a version id!
|
||||
version_id = -Templates->NumClasses;
|
||||
version_id = -version_id;
|
||||
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1) != 1) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
} else {
|
||||
Templates->NumClasses = version_id;
|
||||
}
|
||||
|
||||
if (version_id < 3) {
|
||||
@ -683,8 +684,8 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
}
|
||||
|
||||
/* then read in the class pruners */
|
||||
const int kNumBuckets = NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
|
||||
for (i = 0; i < Templates->NumClassPruners; i++) {
|
||||
const unsigned kNumBuckets = NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
|
||||
for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
|
||||
Pruner = new CLASS_PRUNER_STRUCT;
|
||||
if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) != kNumBuckets) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
@ -700,19 +701,19 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
if (version_id < 2) {
|
||||
// Allocate enough class pruners to cover all the class ids.
|
||||
max_class_id = 0;
|
||||
for (i = 0; i < Templates->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < Templates->NumClasses; i++) {
|
||||
if (ClassIdFor[i] > max_class_id) {
|
||||
max_class_id = ClassIdFor[i];
|
||||
}
|
||||
}
|
||||
for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
|
||||
for (int i = 0; i <= CPrunerIdFor(max_class_id); i++) {
|
||||
Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
|
||||
memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
|
||||
}
|
||||
// Convert class pruners from the old format (indexed by class index)
|
||||
// to the new format (indexed by class id).
|
||||
last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
|
||||
for (i = 0; i < Templates->NumClassPruners; i++) {
|
||||
for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
|
||||
for (x = 0; x < NUM_CP_BUCKETS; x++) {
|
||||
for (y = 0; y < NUM_CP_BUCKETS; y++) {
|
||||
for (z = 0; z < NUM_CP_BUCKETS; z++) {
|
||||
@ -750,13 +751,13 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < Templates->NumClassPruners; i++) {
|
||||
for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
|
||||
delete TempClassPruner[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* then read in each class */
|
||||
for (i = 0; i < Templates->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < Templates->NumClasses; i++) {
|
||||
/* first read in the high level struct for the class */
|
||||
Class = new INT_CLASS_STRUCT;
|
||||
if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
|
||||
@ -773,7 +774,7 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
}
|
||||
}
|
||||
}
|
||||
int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
|
||||
unsigned num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
|
||||
ASSERT_HOST(num_configs <= MaxNumConfigs);
|
||||
if (fp->FReadEndian(Class->ConfigLengths, sizeof(uint16_t), num_configs) != num_configs) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
@ -797,7 +798,7 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
/* then read in the proto sets */
|
||||
for (j = 0; j < Class->NumProtoSets; j++) {
|
||||
auto ProtoSet = new PROTO_SET_STRUCT;
|
||||
int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
|
||||
unsigned num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
|
||||
if (fp->FReadEndian(&ProtoSet->ProtoPruner, sizeof(ProtoSet->ProtoPruner[0][0][0]),
|
||||
num_buckets) != num_buckets) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
@ -830,7 +831,7 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
ClassForClassId(Templates, 0)->font_set_id = -1;
|
||||
Templates->NumClasses++;
|
||||
/* make sure the classes are contiguous */
|
||||
for (i = 0; i < MAX_NUM_CLASSES; i++) {
|
||||
for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
|
||||
if (i < Templates->NumClasses) {
|
||||
if (ClassForClassId(Templates, i) == nullptr) {
|
||||
fprintf(stderr, "Non-contiguous class ids in inttemp\n");
|
||||
@ -838,7 +839,7 @@ INT_TEMPLATES_STRUCT *Classify::ReadIntTemplates(TFile *fp) {
|
||||
}
|
||||
} else {
|
||||
if (ClassForClassId(Templates, i) != nullptr) {
|
||||
fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n", i,
|
||||
fprintf(stderr, "Class id %u exceeds NumClassesIn (Templates) %u\n", i,
|
||||
Templates->NumClasses);
|
||||
exit(1);
|
||||
}
|
||||
@ -919,9 +920,8 @@ void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window) {
|
||||
*/
|
||||
void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates,
|
||||
const UNICHARSET &target_unicharset) {
|
||||
int i, j;
|
||||
INT_CLASS_STRUCT *Class;
|
||||
int unicharset_size = target_unicharset.size();
|
||||
auto unicharset_size = target_unicharset.size();
|
||||
int version_id = -5; // When negated by the reader -1 becomes +1 etc.
|
||||
|
||||
if (Templates->NumClasses != unicharset_size) {
|
||||
@ -938,12 +938,12 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates,
|
||||
fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
|
||||
|
||||
/* then write out the class pruners */
|
||||
for (i = 0; i < Templates->NumClassPruners; i++) {
|
||||
for (unsigned i = 0; i < Templates->NumClassPruners; i++) {
|
||||
fwrite(Templates->ClassPruners[i], sizeof(CLASS_PRUNER_STRUCT), 1, File);
|
||||
}
|
||||
|
||||
/* then write out each class */
|
||||
for (i = 0; i < Templates->NumClasses; i++) {
|
||||
for (unsigned i = 0; i < Templates->NumClasses; i++) {
|
||||
Class = Templates->Class[i];
|
||||
|
||||
/* first write out the high level struct for the class */
|
||||
@ -951,7 +951,7 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates,
|
||||
fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
|
||||
ASSERT_HOST(Class->NumConfigs == this->fontset_table_.at(Class->font_set_id).size());
|
||||
fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
|
||||
for (j = 0; j < Class->NumConfigs; ++j) {
|
||||
for (int j = 0; j < Class->NumConfigs; ++j) {
|
||||
fwrite(&Class->ConfigLengths[j], sizeof(uint16_t), 1, File);
|
||||
}
|
||||
|
||||
@ -961,7 +961,7 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates,
|
||||
}
|
||||
|
||||
/* then write out the proto sets */
|
||||
for (j = 0; j < Class->NumProtoSets; j++) {
|
||||
for (int j = 0; j < Class->NumProtoSets; j++) {
|
||||
fwrite(Class->ProtoSets[j], sizeof(PROTO_SET_STRUCT), 1, File);
|
||||
}
|
||||
|
||||
@ -991,7 +991,7 @@ void Classify::WriteIntTemplates(FILE *File, INT_TEMPLATES_STRUCT *Templates,
|
||||
* @note Globals: none
|
||||
*/
|
||||
float BucketStart(int Bucket, float Offset, int NumBuckets) {
|
||||
return ((static_cast<float>(Bucket) / NumBuckets) - Offset);
|
||||
return static_cast<float>(Bucket) / NumBuckets - Offset;
|
||||
|
||||
} /* BucketStart */
|
||||
|
||||
@ -1007,7 +1007,7 @@ float BucketStart(int Bucket, float Offset, int NumBuckets) {
|
||||
* @note Globals: none
|
||||
*/
|
||||
float BucketEnd(int Bucket, float Offset, int NumBuckets) {
|
||||
return ((static_cast<float>(Bucket + 1) / NumBuckets) - Offset);
|
||||
return static_cast<float>(Bucket + 1) / NumBuckets - Offset;
|
||||
} /* BucketEnd */
|
||||
|
||||
/**
|
||||
@ -1180,7 +1180,7 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *
|
||||
*shape_id = atoi(ev->parameter);
|
||||
*adaptive_on = false;
|
||||
*pretrained_on = true;
|
||||
if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
|
||||
if (*shape_id >= 0 && static_cast<unsigned>(*shape_id) < shape_table_->NumShapes()) {
|
||||
int font_id;
|
||||
shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id, &font_id);
|
||||
tprintf("Shape %d, first unichar=%d, font=%d\n", *shape_id, unichar_id, font_id);
|
||||
@ -1208,7 +1208,7 @@ CLASS_ID Classify::GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *
|
||||
*shape_id = -1;
|
||||
return unichar_id;
|
||||
}
|
||||
for (int s = 0; s < shape_table_->NumShapes(); ++s) {
|
||||
for (unsigned s = 0; s < shape_table_->NumShapes(); ++s) {
|
||||
if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
|
||||
tprintf("%s\n", shape_table_->DebugStr(s).c_str());
|
||||
}
|
||||
|
@ -106,8 +106,8 @@ struct INT_CLASS_STRUCT {
|
||||
struct TESS_API INT_TEMPLATES_STRUCT {
|
||||
INT_TEMPLATES_STRUCT();
|
||||
~INT_TEMPLATES_STRUCT();
|
||||
int NumClasses;
|
||||
int NumClassPruners;
|
||||
unsigned NumClasses;
|
||||
unsigned NumClassPruners;
|
||||
INT_CLASS_STRUCT *Class[MAX_NUM_CLASSES];
|
||||
CLASS_PRUNER_STRUCT *ClassPruners[MAX_NUM_CLASS_PRUNERS];
|
||||
};
|
||||
|
@ -38,7 +38,7 @@ namespace tesseract {
|
||||
int ShapeRating::FirstResultWithUnichar(const std::vector<ShapeRating> &results,
|
||||
const ShapeTable &shape_table, UNICHAR_ID unichar_id) {
|
||||
for (unsigned r = 0; r < results.size(); ++r) {
|
||||
const int shape_id = results[r].shape_id;
|
||||
const auto shape_id = results[r].shape_id;
|
||||
const Shape &shape = shape_table.GetShape(shape_id);
|
||||
if (shape.ContainsUnichar(unichar_id)) {
|
||||
return r;
|
||||
@ -289,8 +289,8 @@ void ShapeTable::ReMapClassIds(const std::vector<int> &unicharset_map) {
|
||||
}
|
||||
|
||||
// Returns a string listing the classes/fonts in a shape.
|
||||
std::string ShapeTable::DebugStr(int shape_id) const {
|
||||
if (shape_id < 0 || shape_id >= shape_table_.size()) {
|
||||
std::string ShapeTable::DebugStr(unsigned shape_id) const {
|
||||
if (shape_id >= shape_table_.size()) {
|
||||
return "INVALID_UNICHAR_ID";
|
||||
}
|
||||
const Shape &shape = GetShape(shape_id);
|
||||
@ -348,8 +348,8 @@ std::string ShapeTable::SummaryStr() const {
|
||||
|
||||
// Adds a new shape starting with the given unichar_id and font_id.
|
||||
// Returns the assigned index.
|
||||
int ShapeTable::AddShape(int unichar_id, int font_id) {
|
||||
int index = shape_table_.size();
|
||||
unsigned ShapeTable::AddShape(int unichar_id, int font_id) {
|
||||
auto index = shape_table_.size();
|
||||
auto *shape = new Shape;
|
||||
shape->AddToShape(unichar_id, font_id);
|
||||
shape_table_.push_back(shape);
|
||||
@ -359,8 +359,8 @@ int ShapeTable::AddShape(int unichar_id, int font_id) {
|
||||
|
||||
// Adds a copy of the given shape unless it is already present.
|
||||
// Returns the assigned index or index of existing shape if already present.
|
||||
int ShapeTable::AddShape(const Shape &other) {
|
||||
int index;
|
||||
unsigned ShapeTable::AddShape(const Shape &other) {
|
||||
unsigned index;
|
||||
for (index = 0; index < shape_table_.size() && !(other == *shape_table_[index]); ++index) {
|
||||
continue;
|
||||
}
|
||||
@ -373,21 +373,21 @@ int ShapeTable::AddShape(const Shape &other) {
|
||||
}
|
||||
|
||||
// Removes the shape given by the shape index.
|
||||
void ShapeTable::DeleteShape(int shape_id) {
|
||||
void ShapeTable::DeleteShape(unsigned shape_id) {
|
||||
delete shape_table_[shape_id];
|
||||
shape_table_.erase(shape_table_.begin() + shape_id);
|
||||
}
|
||||
|
||||
// Adds a font_id to the given existing shape index for the given
|
||||
// unichar_id. If the unichar_id is not in the shape, it is added.
|
||||
void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) {
|
||||
void ShapeTable::AddToShape(unsigned shape_id, int unichar_id, int font_id) {
|
||||
Shape &shape = *shape_table_[shape_id];
|
||||
shape.AddToShape(unichar_id, font_id);
|
||||
num_fonts_ = std::max(num_fonts_, font_id + 1);
|
||||
}
|
||||
|
||||
// Adds the given shape to the existing shape with the given index.
|
||||
void ShapeTable::AddShapeToShape(int shape_id, const Shape &other) {
|
||||
void ShapeTable::AddShapeToShape(unsigned shape_id, const Shape &other) {
|
||||
Shape &shape = *shape_table_[shape_id];
|
||||
shape.AddShape(other);
|
||||
num_fonts_ = 0;
|
||||
@ -417,7 +417,7 @@ int ShapeTable::FindShape(int unichar_id, int font_id) const {
|
||||
}
|
||||
|
||||
// Returns the first unichar_id and font_id in the given shape.
|
||||
void ShapeTable::GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const {
|
||||
void ShapeTable::GetFirstUnicharAndFont(unsigned shape_id, int *unichar_id, int *font_id) const {
|
||||
const UnicharAndFonts &unichar_and_fonts = (*shape_table_[shape_id])[0];
|
||||
*unichar_id = unichar_and_fonts.unichar_id;
|
||||
*font_id = unichar_and_fonts.font_ids[0];
|
||||
@ -440,7 +440,7 @@ int ShapeTable::BuildFromShape(const Shape &shape, const ShapeTable &master_shap
|
||||
}
|
||||
}
|
||||
int num_masters = 0;
|
||||
for (int s = 0; s < master_shapes.NumShapes(); ++s) {
|
||||
for (unsigned s = 0; s < master_shapes.NumShapes(); ++s) {
|
||||
if (shape_map[s]) {
|
||||
AddShape(master_shapes.GetShape(s));
|
||||
++num_masters;
|
||||
@ -450,14 +450,14 @@ int ShapeTable::BuildFromShape(const Shape &shape, const ShapeTable &master_shap
|
||||
}
|
||||
|
||||
// Returns true if the shapes are already merged.
|
||||
bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const {
|
||||
bool ShapeTable::AlreadyMerged(unsigned shape_id1, unsigned shape_id2) const {
|
||||
return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
|
||||
}
|
||||
|
||||
// Returns true if any shape contains multiple unichars.
|
||||
bool ShapeTable::AnyMultipleUnichars() const {
|
||||
int num_shapes = NumShapes();
|
||||
for (int s1 = 0; s1 < num_shapes; ++s1) {
|
||||
auto num_shapes = NumShapes();
|
||||
for (unsigned s1 = 0; s1 < num_shapes; ++s1) {
|
||||
if (MasterDestinationIndex(s1) != s1) {
|
||||
continue;
|
||||
}
|
||||
@ -482,11 +482,11 @@ int ShapeTable::MaxNumUnichars() const {
|
||||
|
||||
// Merges shapes with a common unichar over the [start, end) interval.
|
||||
// Assumes single unichar per shape.
|
||||
void ShapeTable::ForceFontMerges(int start, int end) {
|
||||
for (int s1 = start; s1 < end; ++s1) {
|
||||
void ShapeTable::ForceFontMerges(unsigned start, unsigned end) {
|
||||
for (unsigned s1 = start; s1 < end; ++s1) {
|
||||
if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
|
||||
int unichar_id = GetShape(s1)[0].unichar_id;
|
||||
for (int s2 = s1 + 1; s2 < end; ++s2) {
|
||||
for (auto s2 = s1 + 1; s2 < end; ++s2) {
|
||||
if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
|
||||
unichar_id == GetShape(s2)[0].unichar_id) {
|
||||
MergeShapes(s1, s2);
|
||||
@ -500,13 +500,13 @@ void ShapeTable::ForceFontMerges(int start, int end) {
|
||||
}
|
||||
|
||||
// Returns the number of unichars in the master shape.
|
||||
int ShapeTable::MasterUnicharCount(int shape_id) const {
|
||||
unsigned ShapeTable::MasterUnicharCount(unsigned shape_id) const {
|
||||
int master_id = MasterDestinationIndex(shape_id);
|
||||
return GetShape(master_id).size();
|
||||
}
|
||||
|
||||
// Returns the sum of the font counts in the master shape.
|
||||
int ShapeTable::MasterFontCount(int shape_id) const {
|
||||
int ShapeTable::MasterFontCount(unsigned shape_id) const {
|
||||
int master_id = MasterDestinationIndex(shape_id);
|
||||
const Shape &shape = GetShape(master_id);
|
||||
int font_count = 0;
|
||||
@ -517,7 +517,7 @@ int ShapeTable::MasterFontCount(int shape_id) const {
|
||||
}
|
||||
|
||||
// Returns the number of unichars that would result from merging the shapes.
|
||||
int ShapeTable::MergedUnicharCount(int shape_id1, int shape_id2) const {
|
||||
int ShapeTable::MergedUnicharCount(unsigned shape_id1, unsigned shape_id2) const {
|
||||
// Do it the easy way for now.
|
||||
int master_id1 = MasterDestinationIndex(shape_id1);
|
||||
int master_id2 = MasterDestinationIndex(shape_id2);
|
||||
@ -527,9 +527,9 @@ int ShapeTable::MergedUnicharCount(int shape_id1, int shape_id2) const {
|
||||
}
|
||||
|
||||
// Merges two shape_ids, leaving shape_id2 marked as merged.
|
||||
void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
|
||||
int master_id1 = MasterDestinationIndex(shape_id1);
|
||||
int master_id2 = MasterDestinationIndex(shape_id2);
|
||||
void ShapeTable::MergeShapes(unsigned shape_id1, unsigned shape_id2) {
|
||||
auto master_id1 = MasterDestinationIndex(shape_id1);
|
||||
auto master_id2 = MasterDestinationIndex(shape_id2);
|
||||
// Point master_id2 (and all merged shapes) to master_id1.
|
||||
shape_table_[master_id2]->set_destination_index(master_id1);
|
||||
// Add all the shapes of master_id2 to master_id1.
|
||||
@ -537,7 +537,7 @@ void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
|
||||
}
|
||||
|
||||
// Swaps two shape_ids.
|
||||
void ShapeTable::SwapShapes(int shape_id1, int shape_id2) {
|
||||
void ShapeTable::SwapShapes(unsigned shape_id1, unsigned shape_id2) {
|
||||
Shape *tmp = shape_table_[shape_id1];
|
||||
shape_table_[shape_id1] = shape_table_[shape_id2];
|
||||
shape_table_[shape_id2] = tmp;
|
||||
@ -545,12 +545,12 @@ void ShapeTable::SwapShapes(int shape_id1, int shape_id2) {
|
||||
|
||||
// Returns the destination of this shape, (if merged), taking into account
|
||||
// the fact that the destination may itself have been merged.
|
||||
int ShapeTable::MasterDestinationIndex(int shape_id) const {
|
||||
int dest_id = shape_table_[shape_id]->destination_index();
|
||||
if (dest_id == shape_id || dest_id < 0) {
|
||||
unsigned ShapeTable::MasterDestinationIndex(unsigned shape_id) const {
|
||||
auto dest_id = shape_table_[shape_id]->destination_index();
|
||||
if (static_cast<unsigned>(dest_id) == shape_id || dest_id < 0) {
|
||||
return shape_id; // Is master already.
|
||||
}
|
||||
int master_id = shape_table_[dest_id]->destination_index();
|
||||
auto master_id = shape_table_[dest_id]->destination_index();
|
||||
if (master_id == dest_id || master_id < 0) {
|
||||
return dest_id; // Dest is the master and shape_id points to it.
|
||||
}
|
||||
@ -559,7 +559,7 @@ int ShapeTable::MasterDestinationIndex(int shape_id) const {
|
||||
}
|
||||
|
||||
// Returns false if the unichars in neither shape is a subset of the other.
|
||||
bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const {
|
||||
bool ShapeTable::SubsetUnichar(unsigned shape_id1, unsigned shape_id2) const {
|
||||
const Shape &shape1 = GetShape(shape_id1);
|
||||
const Shape &shape2 = GetShape(shape_id2);
|
||||
int c1, c2;
|
||||
@ -579,7 +579,7 @@ bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const {
|
||||
}
|
||||
|
||||
// Returns false if the unichars in neither shape is a subset of the other.
|
||||
bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const {
|
||||
bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, unsigned shape_id) const {
|
||||
const Shape &merge1 = GetShape(merge_id1);
|
||||
const Shape &merge2 = GetShape(merge_id2);
|
||||
const Shape &shape = GetShape(shape_id);
|
||||
@ -606,7 +606,7 @@ bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id)
|
||||
}
|
||||
|
||||
// Returns true if the unichar sets are equal between the shapes.
|
||||
bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const {
|
||||
bool ShapeTable::EqualUnichars(unsigned shape_id1, unsigned shape_id2) const {
|
||||
const Shape &shape1 = GetShape(shape_id1);
|
||||
const Shape &shape2 = GetShape(shape_id2);
|
||||
for (int c1 = 0; c1 < shape1.size(); ++c1) {
|
||||
@ -625,7 +625,7 @@ bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const {
|
||||
}
|
||||
|
||||
// Returns true if the unichar sets are equal between the shapes.
|
||||
bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const {
|
||||
bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, unsigned shape_id) const {
|
||||
const Shape &merge1 = GetShape(merge_id1);
|
||||
const Shape &merge2 = GetShape(merge_id2);
|
||||
const Shape &shape = GetShape(shape_id);
|
||||
@ -651,7 +651,7 @@ bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id)
|
||||
}
|
||||
|
||||
// Returns true if there is a common unichar between the shapes.
|
||||
bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const {
|
||||
bool ShapeTable::CommonUnichars(unsigned shape_id1, unsigned shape_id2) const {
|
||||
const Shape &shape1 = GetShape(shape_id1);
|
||||
const Shape &shape2 = GetShape(shape_id2);
|
||||
for (int c1 = 0; c1 < shape1.size(); ++c1) {
|
||||
@ -664,7 +664,7 @@ bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const {
|
||||
}
|
||||
|
||||
// Returns true if there is a common font id between the shapes.
|
||||
bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
|
||||
bool ShapeTable::CommonFont(unsigned shape_id1, unsigned shape_id2) const {
|
||||
const Shape &shape1 = GetShape(shape_id1);
|
||||
const Shape &shape2 = GetShape(shape_id2);
|
||||
for (int c1 = 0; c1 < shape1.size(); ++c1) {
|
||||
|
@ -245,7 +245,7 @@ public:
|
||||
bool DeSerialize(TFile *fp);
|
||||
|
||||
// Accessors.
|
||||
int NumShapes() const {
|
||||
unsigned NumShapes() const {
|
||||
return shape_table_.size();
|
||||
}
|
||||
const UNICHARSET &unicharset() const {
|
||||
@ -263,36 +263,36 @@ public:
|
||||
// Useful in conjunction with set_unicharset.
|
||||
void ReMapClassIds(const std::vector<int> &unicharset_map);
|
||||
// Returns a string listing the classes/fonts in a shape.
|
||||
std::string DebugStr(int shape_id) const;
|
||||
std::string DebugStr(unsigned shape_id) const;
|
||||
// Returns a debug string summarizing the table.
|
||||
std::string SummaryStr() const;
|
||||
|
||||
// Adds a new shape starting with the given unichar_id and font_id.
|
||||
// Returns the assigned index.
|
||||
int AddShape(int unichar_id, int font_id);
|
||||
unsigned AddShape(int unichar_id, int font_id);
|
||||
// Adds a copy of the given shape unless it is already present.
|
||||
// Returns the assigned index or index of existing shape if already present.
|
||||
int AddShape(const Shape &other);
|
||||
unsigned AddShape(const Shape &other);
|
||||
// Removes the shape given by the shape index. All indices above are changed!
|
||||
void DeleteShape(int shape_id);
|
||||
void DeleteShape(unsigned shape_id);
|
||||
// Adds a font_id to the given existing shape index for the given
|
||||
// unichar_id. If the unichar_id is not in the shape, it is added.
|
||||
void AddToShape(int shape_id, int unichar_id, int font_id);
|
||||
void AddToShape(unsigned shape_id, int unichar_id, int font_id);
|
||||
// Adds the given shape to the existing shape with the given index.
|
||||
void AddShapeToShape(int shape_id, const Shape &other);
|
||||
void AddShapeToShape(unsigned shape_id, const Shape &other);
|
||||
// Returns the id of the shape that contains the given unichar and font.
|
||||
// If not found, returns -1.
|
||||
// If font_id < 0, the font_id is ignored and the first shape that matches
|
||||
// the unichar_id is returned.
|
||||
int FindShape(int unichar_id, int font_id) const;
|
||||
// Returns the first unichar_id and font_id in the given shape.
|
||||
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const;
|
||||
void GetFirstUnicharAndFont(unsigned shape_id, int *unichar_id, int *font_id) const;
|
||||
|
||||
// Accessors for the Shape with the given shape_id.
|
||||
const Shape &GetShape(int shape_id) const {
|
||||
const Shape &GetShape(unsigned shape_id) const {
|
||||
return *shape_table_[shape_id];
|
||||
}
|
||||
Shape *MutableShape(int shape_id) {
|
||||
Shape *MutableShape(unsigned shape_id) {
|
||||
return shape_table_[shape_id];
|
||||
}
|
||||
|
||||
@ -301,24 +301,24 @@ public:
|
||||
int BuildFromShape(const Shape &shape, const ShapeTable &master_shapes);
|
||||
|
||||
// Returns true if the shapes are already merged.
|
||||
bool AlreadyMerged(int shape_id1, int shape_id2) const;
|
||||
bool AlreadyMerged(unsigned shape_id1, unsigned shape_id2) const;
|
||||
// Returns true if any shape contains multiple unichars.
|
||||
bool AnyMultipleUnichars() const;
|
||||
// Returns the maximum number of unichars over all shapes.
|
||||
int MaxNumUnichars() const;
|
||||
// Merges shapes with a common unichar over the [start, end) interval.
|
||||
// Assumes single unichar per shape.
|
||||
void ForceFontMerges(int start, int end);
|
||||
void ForceFontMerges(unsigned start, unsigned end);
|
||||
// Returns the number of unichars in the master shape.
|
||||
int MasterUnicharCount(int shape_id) const;
|
||||
unsigned MasterUnicharCount(unsigned shape_id) const;
|
||||
// Returns the sum of the font counts in the master shape.
|
||||
int MasterFontCount(int shape_id) const;
|
||||
int MasterFontCount(unsigned shape_id) const;
|
||||
// Returns the number of unichars that would result from merging the shapes.
|
||||
int MergedUnicharCount(int shape_id1, int shape_id2) const;
|
||||
int MergedUnicharCount(unsigned shape_id1, unsigned shape_id2) const;
|
||||
// Merges two shape_ids, leaving shape_id2 marked as merged.
|
||||
void MergeShapes(int shape_id1, int shape_id2);
|
||||
void MergeShapes(unsigned shape_id1, unsigned shape_id2);
|
||||
// Swaps two shape_ids.
|
||||
void SwapShapes(int shape_id1, int shape_id2);
|
||||
void SwapShapes(unsigned shape_id1, unsigned shape_id2);
|
||||
// Appends the master shapes from other to this.
|
||||
// Used to create a clean ShapeTable from a merged one, or to create a
|
||||
// copy of a ShapeTable.
|
||||
@ -330,19 +330,19 @@ public:
|
||||
// Returns the destination of this shape, (if merged), taking into account
|
||||
// the fact that the destination may itself have been merged.
|
||||
// For a non-merged shape, returns the input shape_id.
|
||||
int MasterDestinationIndex(int shape_id) const;
|
||||
unsigned MasterDestinationIndex(unsigned shape_id) const;
|
||||
|
||||
// Returns false if the unichars in neither shape is a subset of the other..
|
||||
bool SubsetUnichar(int shape_id1, int shape_id2) const;
|
||||
bool SubsetUnichar(unsigned shape_id1, unsigned shape_id2) const;
|
||||
// Returns false if the unichars in neither shape is a subset of the other..
|
||||
bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const;
|
||||
bool MergeSubsetUnichar(int merge_id1, int merge_id2, unsigned shape_id) const;
|
||||
// Returns true if the unichar sets are equal between the shapes.
|
||||
bool EqualUnichars(int shape_id1, int shape_id2) const;
|
||||
bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const;
|
||||
bool EqualUnichars(unsigned shape_id1, unsigned shape_id2) const;
|
||||
bool MergeEqualUnichars(int merge_id1, int merge_id2, unsigned shape_id) const;
|
||||
// Returns true if there is a common unichar between the shapes.
|
||||
bool CommonUnichars(int shape_id1, int shape_id2) const;
|
||||
bool CommonUnichars(unsigned shape_id1, unsigned shape_id2) const;
|
||||
// Returns true if there is a common font id between the shapes.
|
||||
bool CommonFont(int shape_id1, int shape_id2) const;
|
||||
bool CommonFont(unsigned shape_id1, unsigned shape_id2) const;
|
||||
|
||||
// Adds the unichars of the given shape_id to the vector of results. Any
|
||||
// unichar_id that is already present just has the fonts added to the
|
||||
|
@ -46,7 +46,7 @@ void Dict::go_deeper_dawg_fxn(const char *debug, const BLOB_CHOICE_LIST_VECTOR &
|
||||
float *limit, WERD_CHOICE *best_choice, int *attempts_left,
|
||||
void *void_more_args) {
|
||||
auto *more_args = static_cast<DawgArgs *>(void_more_args);
|
||||
word_ending = (char_choice_index == char_choices.size() - 1);
|
||||
word_ending = (static_cast<unsigned>(char_choice_index) == char_choices.size() - 1);
|
||||
int word_index = word->length() - 1;
|
||||
if (best_choice->rating() < *limit) {
|
||||
return;
|
||||
@ -73,7 +73,7 @@ void Dict::go_deeper_dawg_fxn(const char *debug, const BLOB_CHOICE_LIST_VECTOR &
|
||||
DawgPositionVector unigram_updated_dawgs;
|
||||
DawgArgs unigram_dawg_args(&unigram_active_dawgs, &unigram_updated_dawgs, more_args->permuter);
|
||||
// Check unigrams in the ngram with letter_is_okay().
|
||||
for (int i = 0; unigrams_ok && i < encoding.size(); ++i) {
|
||||
for (size_t i = 0; unigrams_ok && i < encoding.size(); ++i) {
|
||||
UNICHAR_ID uch_id = encoding[i];
|
||||
ASSERT_HOST(uch_id != INVALID_UNICHAR_ID);
|
||||
++num_unigrams;
|
||||
@ -195,7 +195,7 @@ void Dict::permute_choices(const char *debug, const BLOB_CHOICE_LIST_VECTOR &cha
|
||||
debug, char_choice_index, *limit, word->rating(), word->certainty(),
|
||||
word->debug_string().c_str());
|
||||
}
|
||||
if (char_choice_index < char_choices.size()) {
|
||||
if (static_cast<unsigned>(char_choice_index) < char_choices.size()) {
|
||||
BLOB_CHOICE_IT blob_choice_it;
|
||||
blob_choice_it.set_to_list(char_choices.at(char_choice_index));
|
||||
for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list(); blob_choice_it.forward()) {
|
||||
@ -226,7 +226,7 @@ void Dict::append_choices(const char *debug, const BLOB_CHOICE_LIST_VECTOR &char
|
||||
const CHAR_FRAGMENT_INFO *prev_char_frag_info, WERD_CHOICE *word,
|
||||
float certainties[], float *limit, WERD_CHOICE *best_choice,
|
||||
int *attempts_left, void *more_args) {
|
||||
int word_ending = (char_choice_index == char_choices.size() - 1);
|
||||
auto word_ending = (static_cast<unsigned>(char_choice_index) == char_choices.size() - 1);
|
||||
|
||||
// Deal with fragments.
|
||||
CHAR_FRAGMENT_INFO char_frag_info;
|
||||
|
@ -517,6 +517,7 @@ SquishedDawg *Trie::trie_to_dawg() {
|
||||
// Build a translation map from node indices in nodes_ vector to
|
||||
// their target indices in EDGE_ARRAY.
|
||||
std::vector<NODE_REF> node_ref_map(nodes_.size() + 1);
|
||||
unsigned i;
|
||||
for (i = 0; i < nodes_.size(); ++i) {
|
||||
node_ref_map[i + 1] = node_ref_map[i] + nodes_[i]->forward_edges.size();
|
||||
}
|
||||
|
@ -120,14 +120,14 @@ bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch
|
||||
#endif
|
||||
debug = false;
|
||||
}
|
||||
int stack_size = stack_.size();
|
||||
auto stack_size = stack_.size();
|
||||
if (type_ == NT_PAR_2D_LSTM) {
|
||||
// Special case, run parallel in parallel.
|
||||
std::vector<NetworkScratch::IO> in_deltas(stack_size);
|
||||
std::vector<NetworkScratch::IO> out_deltas(stack_size);
|
||||
// Split the forward deltas for each stack element.
|
||||
int feature_offset = 0;
|
||||
for (int i = 0; i < stack_.size(); ++i) {
|
||||
for (unsigned i = 0; i < stack_.size(); ++i) {
|
||||
int num_features = stack_[i]->NumOutputs();
|
||||
in_deltas[i].Resize(fwd_deltas, num_features, scratch);
|
||||
out_deltas[i].Resize(fwd_deltas, stack_[i]->NumInputs(), scratch);
|
||||
@ -137,11 +137,11 @@ bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch
|
||||
#ifdef _OPENMP
|
||||
# pragma omp parallel for num_threads(stack_size)
|
||||
#endif
|
||||
for (int i = 0; i < stack_size; ++i) {
|
||||
for (unsigned i = 0; i < stack_size; ++i) {
|
||||
stack_[i]->Backward(debug, *in_deltas[i], scratch, i == 0 ? back_deltas : out_deltas[i]);
|
||||
}
|
||||
if (needs_to_backprop_) {
|
||||
for (int i = 1; i < stack_size; ++i) {
|
||||
for (unsigned i = 1; i < stack_size; ++i) {
|
||||
back_deltas->AddAllToFloat(*out_deltas[i]);
|
||||
}
|
||||
}
|
||||
@ -152,7 +152,7 @@ bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch
|
||||
// back_deltas.
|
||||
NetworkScratch::IO out_deltas;
|
||||
int feature_offset = 0;
|
||||
for (int i = 0; i < stack_.size(); ++i) {
|
||||
for (unsigned i = 0; i < stack_.size(); ++i) {
|
||||
int num_features = stack_[i]->NumOutputs();
|
||||
in_deltas->CopyUnpacking(fwd_deltas, feature_offset, num_features);
|
||||
feature_offset += num_features;
|
||||
|
@ -161,7 +161,7 @@ void Plumbing::EnumerateLayers(const std::string *prefix, std::vector<std::strin
|
||||
Network *Plumbing::GetLayer(const char *id) const {
|
||||
char *next_id;
|
||||
int index = strtol(id, &next_id, 10);
|
||||
if (index < 0 || index >= stack_.size()) {
|
||||
if (index < 0 || static_cast<unsigned>(index) >= stack_.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (stack_[index]->IsPlumbingType()) {
|
||||
@ -176,7 +176,7 @@ Network *Plumbing::GetLayer(const char *id) const {
|
||||
float *Plumbing::LayerLearningRatePtr(const char *id) {
|
||||
char *next_id;
|
||||
int index = strtol(id, &next_id, 10);
|
||||
if (index < 0 || index >= stack_.size()) {
|
||||
if (index < 0 || static_cast<unsigned>(index) >= stack_.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (stack_[index]->IsPlumbingType()) {
|
||||
@ -184,7 +184,7 @@ float *Plumbing::LayerLearningRatePtr(const char *id) {
|
||||
ASSERT_HOST(*next_id == ':');
|
||||
return plumbing->LayerLearningRatePtr(next_id + 1);
|
||||
}
|
||||
if (index >= learning_rates_.size()) {
|
||||
if (static_cast<unsigned>(index) >= learning_rates_.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
return &learning_rates_[index];
|
||||
|
@ -122,7 +122,7 @@ void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict
|
||||
return;
|
||||
}
|
||||
int width = output.Width();
|
||||
int bucketNumber = 0;
|
||||
unsigned bucketNumber = 0;
|
||||
for (int t = 0; t < width; ++t) {
|
||||
while ((bucketNumber + 1) < character_boundaries_.size() &&
|
||||
t >= character_boundaries_[bucketNumber + 1]) {
|
||||
@ -160,7 +160,7 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::segmentTimestepsByCharacters() {
|
||||
for (int i = 1; i < character_boundaries_.size(); ++i) {
|
||||
for (unsigned i = 1; i < character_boundaries_.size(); ++i) {
|
||||
std::vector<std::vector<std::pair<const char *, float>>> segment;
|
||||
for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; ++j) {
|
||||
segment.push_back(timesteps[j]);
|
||||
@ -183,7 +183,7 @@ RecodeBeamSearch::combineSegmentedTimesteps(
|
||||
void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
|
||||
std::vector<int> *char_bounds_, int maxWidth) {
|
||||
char_bounds_->push_back(0);
|
||||
for (int i = 0; i < ends->size(); ++i) {
|
||||
for (unsigned i = 0; i < ends->size(); ++i) {
|
||||
int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
|
||||
char_bounds_->push_back((*ends)[i] + middle);
|
||||
}
|
||||
@ -339,7 +339,7 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *
|
||||
}
|
||||
}
|
||||
int ct = 0;
|
||||
int cb = 1;
|
||||
unsigned cb = 1;
|
||||
for (std::vector<const RecodeNode *> layer : topology) {
|
||||
if (cb >= character_boundaries_.size()) {
|
||||
break;
|
||||
@ -399,7 +399,7 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
// new beam is calculated based on the results from the original beam.
|
||||
std::vector<RecodeBeam *> ¤tBeam = secondary_beam_.empty() ? beam_ : secondary_beam_;
|
||||
character_boundaries_[0] = 0;
|
||||
for (int j = 1; j < character_boundaries_.size(); ++j) {
|
||||
for (unsigned j = 1; j < character_boundaries_.size(); ++j) {
|
||||
std::vector<int> unichar_ids;
|
||||
std::vector<float> certs;
|
||||
std::vector<float> ratings;
|
||||
@ -434,7 +434,7 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
}
|
||||
if (!unichar_ids.empty()) {
|
||||
int bestPos = 0;
|
||||
for (int i = 1; i < unichar_ids.size(); ++i) {
|
||||
for (unsigned i = 1; i < unichar_ids.size(); ++i) {
|
||||
if (ratings[i] < ratings[bestPos]) {
|
||||
bestPos = i;
|
||||
}
|
||||
@ -619,7 +619,7 @@ WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_
|
||||
C_BLOB_LIST blobs;
|
||||
C_BLOB_IT b_it(&blobs);
|
||||
for (int i = word_start; i < word_end; ++i) {
|
||||
if (character_boundaries_.size() > (i + 1)) {
|
||||
if (static_cast<unsigned>(i + 1) < character_boundaries_.size()) {
|
||||
TBOX box(static_cast<int16_t>(std::floor(character_boundaries_[i] * scale_factor)) +
|
||||
line_box.left(),
|
||||
line_box.bottom(),
|
||||
@ -714,7 +714,7 @@ void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const flo
|
||||
void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
const UNICHARSET *charset, bool debug) {
|
||||
if (t == beam_.size()) {
|
||||
if (t == static_cast<int>(beam_.size())) {
|
||||
beam_.push_back(new RecodeBeam);
|
||||
}
|
||||
RecodeBeam *step = beam_[t];
|
||||
@ -783,7 +783,7 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
|
||||
void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
const UNICHARSET *charset, bool debug) {
|
||||
if (t == secondary_beam_.size()) {
|
||||
if (t == static_cast<int>(secondary_beam_.size())) {
|
||||
secondary_beam_.push_back(new RecodeBeam);
|
||||
}
|
||||
RecodeBeam *step = secondary_beam_[t];
|
||||
|
@ -137,13 +137,13 @@ public:
|
||||
|
||||
float EstimateYFor(float x, float r) {
|
||||
ASSERT_HOST(finalized_);
|
||||
int start = 0, end = values_.size();
|
||||
unsigned start = 0, end = values_.size();
|
||||
// Because the number of samples (used_) is assumed to be small,
|
||||
// just use linear search to find values within the range.
|
||||
while (start < values_.size() && values_[start].x < x * (1.0 - r)) {
|
||||
while (start < values_.size() && values_[start].x < x * (1 - r)) {
|
||||
start++;
|
||||
}
|
||||
while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) {
|
||||
while (end > 0 && values_[end - 1].x > x * (1 + r)) {
|
||||
end--;
|
||||
}
|
||||
|
||||
@ -157,7 +157,7 @@ public:
|
||||
// Compute weighted average of the values.
|
||||
float rc = 0;
|
||||
int vote = 0;
|
||||
for (int i = start; i < end; i++) {
|
||||
for (auto i = start; i < end; i++) {
|
||||
rc += values_[i].vote * x * values_[i].y / values_[i].x;
|
||||
vote += values_[i].vote;
|
||||
}
|
||||
@ -457,8 +457,8 @@ private:
|
||||
|
||||
// Cleanup chars that are already merged to others.
|
||||
void DeleteChars() {
|
||||
int index = 0;
|
||||
for (int i = 0; i < characters_.size(); ++i) {
|
||||
unsigned index = 0;
|
||||
for (unsigned i = 0; i < characters_.size(); ++i) {
|
||||
if (!characters_[i].delete_flag()) {
|
||||
if (index != i) {
|
||||
characters_[index] = characters_[i];
|
||||
|
@ -1528,8 +1528,8 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
|
||||
// By iteratively including the next smallest distance across the vectors,
|
||||
// (as in a merge sort) we can use the vector indices as counts of each type
|
||||
// and find the nearest set of objects that give us a definite decision.
|
||||
int counts[NPT_COUNT];
|
||||
memset(counts, 0, sizeof(counts[0]) * NPT_COUNT);
|
||||
unsigned counts[NPT_COUNT];
|
||||
memset(counts, 0, sizeof(counts));
|
||||
// If there is image in the search box, tip the balance in image's favor.
|
||||
int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0;
|
||||
BlobRegionType text_dir = part.blob_type();
|
||||
@ -1551,15 +1551,15 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
|
||||
}
|
||||
*best_distance = min_dist;
|
||||
if (debug) {
|
||||
tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n", counts[NPT_HTEXT],
|
||||
tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n", counts[NPT_HTEXT],
|
||||
counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE],
|
||||
image_bias, min_dist);
|
||||
}
|
||||
// See if we have a decision yet.
|
||||
int image_count = counts[NPT_IMAGE];
|
||||
int htext_score =
|
||||
auto image_count = counts[NPT_IMAGE];
|
||||
auto htext_score =
|
||||
counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - (image_count + counts[NPT_WEAK_VTEXT]);
|
||||
int vtext_score =
|
||||
auto vtext_score =
|
||||
counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - (image_count + counts[NPT_WEAK_HTEXT]);
|
||||
if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin &&
|
||||
image_bias - vtext_score >= kSmoothDecisionMargin) {
|
||||
|
@ -187,7 +187,7 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthC
|
||||
delete this;
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < column_sets->size(); ++i) {
|
||||
for (unsigned i = 0; i < column_sets->size(); ++i) {
|
||||
ColPartitionSet *columns = column_sets->at(i);
|
||||
// In ordering the column set candidates, good_coverage_ is king,
|
||||
// followed by good_column_count_ and then bad_coverage_.
|
||||
|
@ -89,13 +89,13 @@ void StructuredTable::set_max_text_height(int height) {
|
||||
bool StructuredTable::is_lined() const {
|
||||
return is_lined_;
|
||||
}
|
||||
int StructuredTable::row_count() const {
|
||||
unsigned StructuredTable::row_count() const {
|
||||
return cell_y_.empty() ? 0 : cell_y_.size() - 1;
|
||||
}
|
||||
int StructuredTable::column_count() const {
|
||||
unsigned StructuredTable::column_count() const {
|
||||
return cell_x_.empty() ? 0 : cell_x_.size() - 1;
|
||||
}
|
||||
int StructuredTable::cell_count() const {
|
||||
unsigned StructuredTable::cell_count() const {
|
||||
return row_count() * column_count();
|
||||
}
|
||||
void StructuredTable::set_bounding_box(const TBOX &box) {
|
||||
@ -110,12 +110,12 @@ int StructuredTable::median_cell_height() {
|
||||
int StructuredTable::median_cell_width() {
|
||||
return median_cell_width_;
|
||||
}
|
||||
int StructuredTable::row_height(int row) const {
|
||||
ASSERT_HOST(0 <= row && row < row_count());
|
||||
int StructuredTable::row_height(unsigned row) const {
|
||||
ASSERT_HOST(row < row_count());
|
||||
return cell_y_[row + 1] - cell_y_[row];
|
||||
}
|
||||
int StructuredTable::column_width(int column) const {
|
||||
ASSERT_HOST(0 <= column && column < column_count());
|
||||
int StructuredTable::column_width(unsigned column) const {
|
||||
ASSERT_HOST(column < column_count());
|
||||
return cell_x_[column + 1] - cell_x_[column];
|
||||
}
|
||||
int StructuredTable::space_above() const {
|
||||
@ -234,16 +234,16 @@ int StructuredTable::CountFilledCellsInRow(int row) {
|
||||
int StructuredTable::CountFilledCellsInColumn(int column) {
|
||||
return CountFilledCells(0, row_count() - 1, column, column);
|
||||
}
|
||||
int StructuredTable::CountFilledCells(int row_start, int row_end, int column_start,
|
||||
int column_end) {
|
||||
ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count());
|
||||
ASSERT_HOST(0 <= column_start && column_start <= column_end && column_end < column_count());
|
||||
int StructuredTable::CountFilledCells(unsigned row_start, unsigned row_end, unsigned column_start,
|
||||
unsigned column_end) {
|
||||
ASSERT_HOST(row_start <= row_end && row_end < row_count());
|
||||
ASSERT_HOST(column_start <= column_end && column_end < column_count());
|
||||
int cell_count = 0;
|
||||
TBOX cell_box;
|
||||
for (int row = row_start; row <= row_end; ++row) {
|
||||
for (unsigned row = row_start; row <= row_end; ++row) {
|
||||
cell_box.set_bottom(cell_y_[row]);
|
||||
cell_box.set_top(cell_y_[row + 1]);
|
||||
for (int col = column_start; col <= column_end; ++col) {
|
||||
for (unsigned col = column_start; col <= column_end; ++col) {
|
||||
cell_box.set_left(cell_x_[col]);
|
||||
cell_box.set_right(cell_x_[col + 1]);
|
||||
if (CountPartitions(cell_box) > 0) {
|
||||
@ -258,8 +258,8 @@ int StructuredTable::CountFilledCells(int row_start, int row_end, int column_sta
|
||||
// This can filter out large whitespace caused by growing tables too far
|
||||
// and page numbers.
|
||||
bool StructuredTable::VerifyRowFilled(int row) {
|
||||
for (int i = 0; i < column_count(); ++i) {
|
||||
double area_filled = CalculateCellFilledPercentage(row, i);
|
||||
for (unsigned i = 0; i < column_count(); ++i) {
|
||||
auto area_filled = CalculateCellFilledPercentage(row, i);
|
||||
if (area_filled >= kMinFilledArea) {
|
||||
return true;
|
||||
}
|
||||
@ -269,9 +269,9 @@ bool StructuredTable::VerifyRowFilled(int row) {
|
||||
|
||||
// Finds the filled area in a cell.
|
||||
// Assume ColPartitions do not overlap for simplicity (even though they do).
|
||||
double StructuredTable::CalculateCellFilledPercentage(int row, int column) {
|
||||
ASSERT_HOST(0 <= row && row <= row_count());
|
||||
ASSERT_HOST(0 <= column && column <= column_count());
|
||||
double StructuredTable::CalculateCellFilledPercentage(unsigned row, unsigned column) {
|
||||
ASSERT_HOST(row <= row_count());
|
||||
ASSERT_HOST(column <= column_count());
|
||||
const TBOX kCellBox(cell_x_[column], cell_y_[row], cell_x_[column + 1], cell_y_[row + 1]);
|
||||
ASSERT_HOST(!kCellBox.null_box());
|
||||
|
||||
@ -532,10 +532,10 @@ void StructuredTable::CalculateStats() {
|
||||
STATS height_stats(0, kMaxCellHeight + 1);
|
||||
STATS width_stats(0, kMaxCellWidth + 1);
|
||||
|
||||
for (int i = 0; i < row_count(); ++i) {
|
||||
for (unsigned i = 0; i < row_count(); ++i) {
|
||||
height_stats.add(row_height(i), column_count());
|
||||
}
|
||||
for (int i = 0; i < column_count(); ++i) {
|
||||
for (unsigned i = 0; i < column_count(); ++i) {
|
||||
width_stats.add(column_width(i), row_count());
|
||||
}
|
||||
|
||||
@ -617,8 +617,8 @@ void StructuredTable::FindCellSplitLocations(const std::vector<int> &min_list,
|
||||
ASSERT_HOST(min_list.at(min_list.size() - 1) < max_list.at(max_list.size() - 1));
|
||||
|
||||
locations->push_back(min_list.at(0));
|
||||
int min_index = 0;
|
||||
int max_index = 0;
|
||||
unsigned min_index = 0;
|
||||
unsigned max_index = 0;
|
||||
int stacked_partitions = 0;
|
||||
int last_cross_position = INT32_MAX;
|
||||
// max_index will expire after min_index.
|
||||
@ -904,7 +904,7 @@ bool TableRecognizer::RecognizeWhitespacedTable(const TBOX &guess_box, Structure
|
||||
const int kMidGuessY = (guess_box.bottom() + guess_box.top()) / 2;
|
||||
// Keeps track of the most columns in an accepted table. The resulting table
|
||||
// may be less than the max, but we don't want to stray too far.
|
||||
int best_cols = 0;
|
||||
unsigned best_cols = 0;
|
||||
// Make sure we find a good border.
|
||||
bool found_good_border = false;
|
||||
|
||||
|
@ -86,15 +86,15 @@ public:
|
||||
// Basic accessors. Some are treated as attributes despite having indirect
|
||||
// representation.
|
||||
bool is_lined() const;
|
||||
int row_count() const;
|
||||
int column_count() const;
|
||||
int cell_count() const;
|
||||
unsigned row_count() const;
|
||||
unsigned column_count() const;
|
||||
unsigned cell_count() const;
|
||||
void set_bounding_box(const TBOX &box);
|
||||
const TBOX &bounding_box() const;
|
||||
int median_cell_height();
|
||||
int median_cell_width();
|
||||
int row_height(int row) const;
|
||||
int column_width(int column) const;
|
||||
int row_height(unsigned row) const;
|
||||
int column_width(unsigned column) const;
|
||||
int space_above() const;
|
||||
int space_below() const;
|
||||
|
||||
@ -120,7 +120,7 @@ public:
|
||||
int CountFilledCells();
|
||||
int CountFilledCellsInRow(int row);
|
||||
int CountFilledCellsInColumn(int column);
|
||||
int CountFilledCells(int row_start, int row_end, int column_start, int column_end);
|
||||
int CountFilledCells(unsigned row_start, unsigned row_end, unsigned column_start, unsigned column_end);
|
||||
|
||||
// Makes sure that at least one cell in a row has substantial area filled.
|
||||
// This can filter out large whitespace caused by growing tables too far
|
||||
@ -128,7 +128,7 @@ public:
|
||||
// (currently bugged for some reason).
|
||||
bool VerifyRowFilled(int row);
|
||||
// Finds the filled area in a cell.
|
||||
double CalculateCellFilledPercentage(int row, int column);
|
||||
double CalculateCellFilledPercentage(unsigned row, unsigned column);
|
||||
|
||||
// Debug display, draws the table in the given color. If the table is not
|
||||
// valid, the table and "best" grid lines are still drawn in the given color.
|
||||
|
@ -38,7 +38,7 @@ int main(int argc, char **argv) {
|
||||
for (int arg = 1; arg < argc - 1; ++arg) {
|
||||
// Load the input unicharset
|
||||
if (input_unicharset.load_from_file(argv[arg])) {
|
||||
printf("Loaded unicharset of size %d from file %s\n", input_unicharset.size(), argv[arg]);
|
||||
printf("Loaded unicharset of size %zu from file %s\n", input_unicharset.size(), argv[arg]);
|
||||
result_unicharset.AppendOtherUnicharset(input_unicharset);
|
||||
} else {
|
||||
printf("Failed to load unicharset from file %s!!\n", argv[arg]);
|
||||
|
@ -301,7 +301,7 @@ SEAM *Wordrec::chop_overlapping_blob(const std::vector<TBOX> &boxes, bool italic
|
||||
}
|
||||
}
|
||||
|
||||
*blob_number = -1;
|
||||
*blob_number = UINT_MAX;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -323,20 +323,21 @@ SEAM *Wordrec::improve_one_blob(const std::vector<BLOB_CHOICE *> &blob_choices,
|
||||
float rating_ceiling = FLT_MAX;
|
||||
SEAM *seam = nullptr;
|
||||
do {
|
||||
*blob_number = select_blob_to_split_from_fixpt(fixpt);
|
||||
auto blob = select_blob_to_split_from_fixpt(fixpt);
|
||||
if (chop_debug) {
|
||||
tprintf("blob_number from fixpt = %u\n", *blob_number);
|
||||
tprintf("blob_number from fixpt = %d\n", blob);
|
||||
}
|
||||
bool split_point_from_dict = (*blob_number != -1);
|
||||
bool split_point_from_dict = (blob != -1);
|
||||
if (split_point_from_dict) {
|
||||
fixpt->clear();
|
||||
} else {
|
||||
*blob_number = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment);
|
||||
blob = select_blob_to_split(blob_choices, rating_ceiling, split_next_to_fragment);
|
||||
}
|
||||
if (chop_debug) {
|
||||
tprintf("blob_number = %u\n", *blob_number);
|
||||
tprintf("blob_number = %d\n", blob);
|
||||
}
|
||||
if (*blob_number == -1) {
|
||||
*blob_number = blob;
|
||||
if (blob == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -859,7 +859,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(bool word_end, int curr_c
|
||||
// like don't.
|
||||
const auto &normed_ids = dict_->getUnicharset().normed_ids(b.unichar_id());
|
||||
DawgPositionVector tmp_active_dawgs;
|
||||
for (int i = 0; i < normed_ids.size(); ++i) {
|
||||
for (unsigned i = 0; i < normed_ids.size(); ++i) {
|
||||
if (language_model_debug_level > 2) {
|
||||
tprintf("Test Letter OK for unichar %d, normed %d\n", b.unichar_id(), normed_ids[i]);
|
||||
}
|
||||
|
@ -164,8 +164,8 @@ void Wordrec::UpdateSegSearchNodes(float rating_cert_scale, int starting_col,
|
||||
LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle,
|
||||
BlamerBundle *blamer_bundle) {
|
||||
MATRIX *ratings = word_res->ratings;
|
||||
ASSERT_HOST(ratings->dimension() == pending->size());
|
||||
ASSERT_HOST(ratings->dimension() == best_choice_bundle->beam.size());
|
||||
ASSERT_HOST(static_cast<unsigned>(ratings->dimension()) == pending->size());
|
||||
ASSERT_HOST(static_cast<unsigned>(ratings->dimension()) == best_choice_bundle->beam.size());
|
||||
for (int col = starting_col; col < ratings->dimension(); ++col) {
|
||||
if (!(*pending)[col].WorkToDo()) {
|
||||
continue;
|
||||
|
Loading…
Reference in New Issue
Block a user