From 296a836f4efcdbc4fff7ff3b5a850c3911a9071e Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 5 Jul 2018 08:35:54 +0200 Subject: [PATCH 1/3] Fix compiler warnings [-Wunused-const-variable] clang warnings: src/classify/trainingsampleset.cpp:39:11: warning: unused variable 'kMinOutlierSamples' [-Wunused-const-variable] src/lstm/lstmrecognizer.cpp:45:11: warning: unused variable 'kMaxChoices' [-Wunused-const-variable] src/training/dawg2wordlist.cpp:28:11: warning: unused variable 'kDictDebugLevel' [-Wunused-const-variable] src/training/stringrenderer.cpp:50:21: warning: unused variable 'kWordJoiner' [-Wunused-const-variable] Signed-off-by: Stefan Weil --- src/classify/trainingsampleset.cpp | 2 -- src/lstm/lstmrecognizer.cpp | 2 -- src/training/dawg2wordlist.cpp | 2 -- src/training/stringrenderer.cpp | 1 - 4 files changed, 7 deletions(-) diff --git a/src/classify/trainingsampleset.cpp b/src/classify/trainingsampleset.cpp index 2a53d722..e2f020f4 100644 --- a/src/classify/trainingsampleset.cpp +++ b/src/classify/trainingsampleset.cpp @@ -35,8 +35,6 @@ const int kSquareLimit = 25; // Prime numbers for subsampling distances. const int kPrime1 = 17; const int kPrime2 = 13; -// Min samples from which to start discarding outliers. -const int kMinOutlierSamples = 5; TrainingSampleSet::FontClassInfo::FontClassInfo() : num_raw_samples(0), canonical_sample(-1), canonical_dist(0.0f) { diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp index 060cf261..523305ef 100644 --- a/src/lstm/lstmrecognizer.cpp +++ b/src/lstm/lstmrecognizer.cpp @@ -41,8 +41,6 @@ namespace tesseract { -// Max number of blob choices to return in any given position. -const int kMaxChoices = 4; // Default ratio between dict and non-dict words. const double kDictRatio = 2.25; // Default certainty offset to give the dictionary a chance. diff --git a/src/training/dawg2wordlist.cpp b/src/training/dawg2wordlist.cpp index 355c6fba..ca8612c7 100644 --- a/src/training/dawg2wordlist.cpp +++ b/src/training/dawg2wordlist.cpp @@ -25,8 +25,6 @@ #include "trie.h" #include "unicharset.h" -const int kDictDebugLevel = 1; - tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset, const char *filename) { const int kDictDebugLevel = 1; diff --git a/src/training/stringrenderer.cpp b/src/training/stringrenderer.cpp index 8cc9c907..5719c1c4 100644 --- a/src/training/stringrenderer.cpp +++ b/src/training/stringrenderer.cpp @@ -47,7 +47,6 @@ static const int kDefaultOutputResolution = 300; // recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at // hyphens and other non-alpha characters. static const char* kWordJoinerUTF8 = "\u2060"; -static const char32 kWordJoiner = 0x2060; static bool IsCombiner(int ch) { const int char_type = u_charType(ch); From a74d467e903039ad1aef8f6ae8f453544a209207 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 5 Jul 2018 09:27:27 +0200 Subject: [PATCH 2/3] Fix compiler warnings [-Wcomma] clang warnings: src/api/baseapi.cpp:1642:18: warning: possible misuse of comma operator here [-Wcomma] src/api/baseapi.cpp:1642:31: warning: possible misuse of comma operator here [-Wcomma] src/api/baseapi.cpp:1642:45: warning: possible misuse of comma operator here [-Wcomma] src/api/baseapi.cpp:1652:16: warning: possible misuse of comma operator here [-Wcomma] src/api/baseapi.cpp:1652:30: warning: possible misuse of comma operator here [-Wcomma] src/api/baseapi.cpp:1662:17: warning: possible misuse of comma operator here [-Wcomma] Signed-off-by: Stefan Weil --- src/api/baseapi.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 09894dcb..c4abc249 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -456,7 +456,7 @@ void TessBaseAPI::GetAvailableLanguagesAsVector( } } -//TODO(amit): Adapt to lstm +//TODO(amit): Adapt to lstm #ifndef DISABLED_LEGACY_ENGINE /** * Init only the lang model component of Tesseract. The only functions @@ -833,8 +833,8 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); } else if (tesseract_->tessedit_resegment_from_boxes) { page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); - } else -#endif // ndef DISABLED_LEGACY_ENGINE + } else +#endif // ndef DISABLED_LEGACY_ENGINE { page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(), block_list_, &tesseract_->prev_word_best_choice_); @@ -1616,8 +1616,11 @@ char* TessBaseAPI::GetTSVText(int page_number) { STRING tsv_str(""); - int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, - word_num = 0; + int page_num = page_id; + int block_num = 0; + int par_num = 0; + int line_num = 0; + int word_num = 0; tsv_str.add_str_int("1\t", page_num); // level 1 - page tsv_str.add_str_int("\t", block_num); @@ -1639,7 +1642,10 @@ char* TessBaseAPI::GetTSVText(int page_number) { // Add rows for any new block/paragraph/textline. if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - block_num++, par_num = 0, line_num = 0, word_num = 0; + block_num++; + par_num = 0; + line_num = 0; + word_num = 0; tsv_str.add_str_int("2\t", page_num); // level 2 - block tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); @@ -1649,7 +1655,9 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str += "\t-1\t\n"; // end of row for block } if (res_it->IsAtBeginningOf(RIL_PARA)) { - par_num++, line_num = 0, word_num = 0; + par_num++; + line_num = 0; + word_num = 0; tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); @@ -1659,7 +1667,8 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str += "\t-1\t\n"; // end of row for para } if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { - line_num++, word_num = 0; + line_num++; + word_num = 0; tsv_str.add_str_int("4\t", page_num); // level 4 - line tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); From f107f116d93dc9b6f5e3759b093b8f9f7884f03b Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 5 Jul 2018 09:31:40 +0200 Subject: [PATCH 3/3] Fix compiler warnings [-Wconditional-uninitialized] clang warnings: src/ccstruct/coutln.cpp:231:15: warning: variable 'destindex' may be uninitialized when used here [-Wconditional-uninitialized] src/wordrec/language_model.cpp:1170:27: warning: variable 'expected_gap' may be uninitialized when used here [-Wconditional-uninitialized] Signed-off-by: Stefan Weil --- src/ccstruct/coutln.cpp | 8 ++++---- src/wordrec/language_model.cpp | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/ccstruct/coutln.cpp b/src/ccstruct/coutln.cpp index fb4ccbbc..a1dea5d0 100644 --- a/src/ccstruct/coutln.cpp +++ b/src/ccstruct/coutln.cpp @@ -141,14 +141,14 @@ int16_t length //length of loop */ C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) { - TBOX new_box; //easy bounding - int16_t stepindex; //index to step - int16_t dirdiff; //direction change + TBOX new_box; //easy bounding + int16_t stepindex; //index to step + int16_t dirdiff; //direction change ICOORD pos; //current position ICOORD prevpos; //previous dest point ICOORD destpos; //destination point - int16_t destindex; //index to step + int16_t destindex = INT16_MAX; //index to step DIR128 dir; //coded direction uint8_t new_step; diff --git a/src/wordrec/language_model.cpp b/src/wordrec/language_model.cpp index b0ee4c38..2a08e8c6 100644 --- a/src/wordrec/language_model.cpp +++ b/src/wordrec/language_model.cpp @@ -1127,7 +1127,7 @@ void LanguageModel::FillConsistencyInfo( } if (!word_res->blob_widths.empty()) { // if we have widths/gaps info bool expected_gap_found = false; - float expected_gap; + float expected_gap = 0.0f; int temp_gap; if (fontinfo_id >= 0) { // found a common font ASSERT_HOST(fontinfo_id < fontinfo_table_->size()); @@ -1140,7 +1140,6 @@ void LanguageModel::FillConsistencyInfo( consistency_info->inconsistent_font = true; // Get an average of the expected gaps in each font int num_addends = 0; - expected_gap = 0; int temp_fid; for (int i = 0; i < 4; ++i) { if (i == 0) { @@ -1159,9 +1158,9 @@ void LanguageModel::FillConsistencyInfo( num_addends++; } } - expected_gap_found = (num_addends > 0); if (num_addends > 0) { expected_gap /= static_cast(num_addends); + expected_gap_found = true; } } if (expected_gap_found) {