From 296a836f4efcdbc4fff7ff3b5a850c3911a9071e Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Thu, 5 Jul 2018 08:35:54 +0200
Subject: [PATCH 1/3] Fix compiler warnings [-Wunused-const-variable]

clang warnings:

src/classify/trainingsampleset.cpp:39:11: warning:
 unused variable 'kMinOutlierSamples' [-Wunused-const-variable]
src/lstm/lstmrecognizer.cpp:45:11: warning:
 unused variable 'kMaxChoices' [-Wunused-const-variable]
src/training/dawg2wordlist.cpp:28:11: warning:
 unused variable 'kDictDebugLevel' [-Wunused-const-variable]
src/training/stringrenderer.cpp:50:21: warning:
 unused variable 'kWordJoiner' [-Wunused-const-variable]

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 src/classify/trainingsampleset.cpp | 2 --
 src/lstm/lstmrecognizer.cpp        | 2 --
 src/training/dawg2wordlist.cpp     | 2 --
 src/training/stringrenderer.cpp    | 1 -
 4 files changed, 7 deletions(-)

diff --git a/src/classify/trainingsampleset.cpp b/src/classify/trainingsampleset.cpp
index 2a53d722..e2f020f4 100644
--- a/src/classify/trainingsampleset.cpp
+++ b/src/classify/trainingsampleset.cpp
@@ -35,8 +35,6 @@ const int kSquareLimit = 25;
 // Prime numbers for subsampling distances.
 const int kPrime1 = 17;
 const int kPrime2 = 13;
-// Min samples from which to start discarding outliers.
-const int kMinOutlierSamples = 5;
 
 TrainingSampleSet::FontClassInfo::FontClassInfo()
   : num_raw_samples(0), canonical_sample(-1), canonical_dist(0.0f) {
diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp
index 060cf261..523305ef 100644
--- a/src/lstm/lstmrecognizer.cpp
+++ b/src/lstm/lstmrecognizer.cpp
@@ -41,8 +41,6 @@
 
 namespace tesseract {
 
-// Max number of blob choices to return in any given position.
-const int kMaxChoices = 4;
 // Default ratio between dict and non-dict words.
 const double kDictRatio = 2.25;
 // Default certainty offset to give the dictionary a chance.
diff --git a/src/training/dawg2wordlist.cpp b/src/training/dawg2wordlist.cpp
index 355c6fba..ca8612c7 100644
--- a/src/training/dawg2wordlist.cpp
+++ b/src/training/dawg2wordlist.cpp
@@ -25,8 +25,6 @@
 #include "trie.h"
 #include "unicharset.h"
 
-const int kDictDebugLevel = 1;
-
 tesseract::Dawg *LoadSquishedDawg(const UNICHARSET &unicharset,
                                   const char *filename) {
   const int kDictDebugLevel = 1;
diff --git a/src/training/stringrenderer.cpp b/src/training/stringrenderer.cpp
index 8cc9c907..5719c1c4 100644
--- a/src/training/stringrenderer.cpp
+++ b/src/training/stringrenderer.cpp
@@ -47,7 +47,6 @@ static const int kDefaultOutputResolution = 300;
 // recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at
 // hyphens and other non-alpha characters.
 static const char* kWordJoinerUTF8 = "\u2060";
-static const char32 kWordJoiner = 0x2060;
 
 static bool IsCombiner(int ch) {
   const int char_type = u_charType(ch);

From a74d467e903039ad1aef8f6ae8f453544a209207 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Thu, 5 Jul 2018 09:27:27 +0200
Subject: [PATCH 2/3] Fix compiler warnings [-Wcomma]

clang warnings:

src/api/baseapi.cpp:1642:18: warning:
 possible misuse of comma operator here [-Wcomma]
src/api/baseapi.cpp:1642:31: warning:
 possible misuse of comma operator here [-Wcomma]
src/api/baseapi.cpp:1642:45: warning:
 possible misuse of comma operator here [-Wcomma]
src/api/baseapi.cpp:1652:16: warning:
 possible misuse of comma operator here [-Wcomma]
src/api/baseapi.cpp:1652:30: warning:
 possible misuse of comma operator here [-Wcomma]
src/api/baseapi.cpp:1662:17: warning:
 possible misuse of comma operator here [-Wcomma]

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 src/api/baseapi.cpp | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 09894dcb..c4abc249 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -456,7 +456,7 @@ void TessBaseAPI::GetAvailableLanguagesAsVector(
   }
 }
 
-//TODO(amit): Adapt to lstm 
+//TODO(amit): Adapt to lstm
 #ifndef DISABLED_LEGACY_ENGINE
 /**
  * Init only the lang model component of Tesseract. The only functions
@@ -833,8 +833,8 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) {
     page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_);
   } else if (tesseract_->tessedit_resegment_from_boxes) {
     page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_);
-  } else 
-#endif  // ndef DISABLED_LEGACY_ENGINE 
+  } else
+#endif  // ndef DISABLED_LEGACY_ENGINE
   {
     page_res_ = new PAGE_RES(tesseract_->AnyLSTMLang(),
                              block_list_, &tesseract_->prev_word_best_choice_);
@@ -1616,8 +1616,11 @@ char* TessBaseAPI::GetTSVText(int page_number) {
 
   STRING tsv_str("");
 
-  int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
-      word_num = 0;
+  int page_num = page_id;
+  int block_num = 0;
+  int par_num = 0;
+  int line_num = 0;
+  int word_num = 0;
 
   tsv_str.add_str_int("1\t", page_num);  // level 1 - page
   tsv_str.add_str_int("\t", block_num);
@@ -1639,7 +1642,10 @@ char* TessBaseAPI::GetTSVText(int page_number) {
 
     // Add rows for any new block/paragraph/textline.
     if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
-      block_num++, par_num = 0, line_num = 0, word_num = 0;
+      block_num++;
+      par_num = 0;
+      line_num = 0;
+      word_num = 0;
       tsv_str.add_str_int("2\t", page_num);  // level 2 - block
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);
@@ -1649,7 +1655,9 @@ char* TessBaseAPI::GetTSVText(int page_number) {
       tsv_str += "\t-1\t\n";  // end of row for block
     }
     if (res_it->IsAtBeginningOf(RIL_PARA)) {
-      par_num++, line_num = 0, word_num = 0;
+      par_num++;
+      line_num = 0;
+      word_num = 0;
       tsv_str.add_str_int("3\t", page_num);  // level 3 - paragraph
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);
@@ -1659,7 +1667,8 @@ char* TessBaseAPI::GetTSVText(int page_number) {
       tsv_str += "\t-1\t\n";  // end of row for para
     }
     if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
-      line_num++, word_num = 0;
+      line_num++;
+      word_num = 0;
       tsv_str.add_str_int("4\t", page_num);  // level 4 - line
       tsv_str.add_str_int("\t", block_num);
       tsv_str.add_str_int("\t", par_num);

From f107f116d93dc9b6f5e3759b093b8f9f7884f03b Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Thu, 5 Jul 2018 09:31:40 +0200
Subject: [PATCH 3/3] Fix compiler warnings [-Wconditional-uninitialized]

clang warnings:

src/ccstruct/coutln.cpp:231:15: warning:
 variable 'destindex' may be uninitialized when used here [-Wconditional-uninitialized]
src/wordrec/language_model.cpp:1170:27: warning:
 variable 'expected_gap' may be uninitialized when used here [-Wconditional-uninitialized]

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 src/ccstruct/coutln.cpp        | 8 ++++----
 src/wordrec/language_model.cpp | 5 ++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/ccstruct/coutln.cpp b/src/ccstruct/coutln.cpp
index fb4ccbbc..a1dea5d0 100644
--- a/src/ccstruct/coutln.cpp
+++ b/src/ccstruct/coutln.cpp
@@ -141,14 +141,14 @@ int16_t length                     //length of loop
  */
 
 C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(nullptr) {
-  TBOX new_box;                   //easy bounding
-  int16_t stepindex;               //index to step
-  int16_t dirdiff;                 //direction change
+  TBOX new_box;                  //easy bounding
+  int16_t stepindex;             //index to step
+  int16_t dirdiff;               //direction change
   ICOORD pos;                    //current position
   ICOORD prevpos;                //previous dest point
 
   ICOORD destpos;                //destination point
-  int16_t destindex;               //index to step
+  int16_t destindex = INT16_MAX; //index to step
   DIR128 dir;                    //coded direction
   uint8_t new_step;
 
diff --git a/src/wordrec/language_model.cpp b/src/wordrec/language_model.cpp
index b0ee4c38..2a08e8c6 100644
--- a/src/wordrec/language_model.cpp
+++ b/src/wordrec/language_model.cpp
@@ -1127,7 +1127,7 @@ void LanguageModel::FillConsistencyInfo(
     }
     if (!word_res->blob_widths.empty()) {  // if we have widths/gaps info
       bool expected_gap_found = false;
-      float expected_gap;
+      float expected_gap = 0.0f;
       int temp_gap;
       if (fontinfo_id >= 0) {  // found a common font
         ASSERT_HOST(fontinfo_id < fontinfo_table_->size());
@@ -1140,7 +1140,6 @@ void LanguageModel::FillConsistencyInfo(
         consistency_info->inconsistent_font = true;
         // Get an average of the expected gaps in each font
         int num_addends = 0;
-        expected_gap = 0;
         int temp_fid;
         for (int i = 0; i < 4; ++i) {
           if (i == 0) {
@@ -1159,9 +1158,9 @@ void LanguageModel::FillConsistencyInfo(
             num_addends++;
           }
         }
-        expected_gap_found = (num_addends > 0);
         if (num_addends > 0) {
           expected_gap /= static_cast<float>(num_addends);
+          expected_gap_found = true;
         }
       }
       if (expected_gap_found) {