Merge pull request #2650 from stweil/cid

Fix several issues reported by Coverity Scan
2024-12-01 07:59:05 +08:00 · 2019-09-14 21:18:37 +03:00 · 2019-09-14 21:18:37 +03:00 · 6a9584fbc2
commit 6a9584fbc2
parent 8be05c6bed 763f4781e8
23 changed files with 82 additions and 113 deletions
--- a/src/ccmain/equationdetect.h
+++ b/src/ccmain/equationdetect.h
@ -2,7 +2,6 @@
 // File:        equationdetect.h
 // Description: The equation detection class that inherits equationdetectbase.
 // Author:      Zongyi (Joe) Liu (joeliu@google.com)
-// Created:     Fri Aug 31 11:13:01 PST 2011
 //
 // (C) Copyright 2011, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -249,12 +248,12 @@ class EquationDetect : public EquationDetectBase {

  // The ColPartitionGrid that we are processing. This pointer is passed in from
  // the caller, so do NOT destroy it in the class.
-  ColPartitionGrid* part_grid_;
+  ColPartitionGrid* part_grid_ = nullptr;

  // A simple array of pointers to the best assigned column division at
  // each grid y coordinate. This pointer is passed in from the caller, so do
  // NOT destroy it in the class.
-  ColPartitionSet** best_columns_;
+  ColPartitionSet** best_columns_ = nullptr;

  // The super bounding box of all cps in the part_grid_.
  TBOX* cps_super_bbox_;
--- a/src/ccmain/par_control.cpp
+++ b/src/ccmain/par_control.cpp
@ -2,7 +2,6 @@
 // File:        par_control.cpp
 // Description: Control code for parallel implementation.
 // Author:      Ray Smith
-// Created:     Mon Nov 04 13:23:15 PST 2013
 //
 // (C) Copyright 2013, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -25,15 +24,15 @@
 namespace tesseract {

 struct BlobData {
-  BlobData() : blob(nullptr), choices(nullptr) {}
+  BlobData() = default;
  BlobData(int index, Tesseract* tess, const WERD_RES& word)
    : blob(word.chopped_word->blobs[index]),
      tesseract(tess),
      choices(&(*word.ratings)(index, index)) {}

-  TBLOB* blob;
-  Tesseract* tesseract;
-  BLOB_CHOICE_LIST** choices;
+  TBLOB* blob = nullptr;
+  Tesseract* tesseract = nullptr;
+  BLOB_CHOICE_LIST** choices = nullptr;
 };

 void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
--- a/src/ccmain/paragraphs.cpp
+++ b/src/ccmain/paragraphs.cpp
@ -2,7 +2,6 @@
 * File:        paragraphs.cpp
 * Description: Paragraph detection for tesseract.
 * Author:      David Eger
- * Created:     25 February 2011
 *
 * (C) Copyright 2011, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -868,8 +867,7 @@ struct GeometricClassifierState {
  GeometricClassifierState(int dbg_level,
                           GenericVector<RowScratchRegisters> *r,
                           int r_start, int r_end)
-      : debug_level(dbg_level), rows(r), row_start(r_start), row_end(r_end),
-        margin(0) {
+      : debug_level(dbg_level), rows(r), row_start(r_start), row_end(r_end) {
    tolerance = InterwordSpace(*r, r_start, r_end);
    CalculateTabStops(r, r_start, r_end, tolerance,
                      &left_tabs, &right_tabs);
@ -938,20 +936,20 @@ struct GeometricClassifierState {
  }

  // We print out messages with a debug level at least as great as debug_level.
-  int debug_level;
+  int debug_level = 0;

  // The Geometric Classifier was asked to find a single paragraph model
  // to fit the text rows (*rows)[row_start, row_end)
  GenericVector<RowScratchRegisters> *rows;
-  int row_start;
-  int row_end;
+  int row_start = 0;
+  int row_end = 0;

  // The amount by which we expect the text edge can vary and still be aligned.
-  int tolerance;
+  int tolerance = 0;

  // Is the script in this text block left-to-right?
  // HORRIBLE ROUGH APPROXIMATION.  TODO(eger): Improve
-  bool ltr;
+  bool ltr = false;

  // These left and right tab stops were determined to be the common tab
  // stops for the given text.
@ -959,13 +957,13 @@ struct GeometricClassifierState {
  GenericVector<Cluster> right_tabs;

  // These are parameters we must determine to create a ParagraphModel.
-  tesseract::ParagraphJustification just;
-  int margin;
-  int first_indent;
-  int body_indent;
+  tesseract::ParagraphJustification just = JUSTIFICATION_UNKNOWN;
+  int margin = 0;
+  int first_indent = 0;
+  int body_indent = 0;

  // eop_threshold > 0 if the text is fully justified.  See MarkRowsWithModel()
-  int eop_threshold;
+  int eop_threshold = 0;
 };

 // Given a section of text where strong textual clues did not help identifying
--- a/src/ccmain/paramsd.h
+++ b/src/ccmain/paramsd.h
@ -2,7 +2,6 @@
 // File:        paramsd.h
 // Description: Tesseract parameter editor
 // Author:      Joern Wanke
-// Created:     Wed Jul 18 10:05:01 PDT 2007
 //
 // (C) Copyright 2007, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -80,14 +79,16 @@ class ParamContent : public ELIST_LINK {
  // The unique ID of this VC object.
  int my_id_;
  // Whether the parameter was changed_ and thus needs to be rewritten.
-  bool changed_;
+  bool changed_ = false;
  // The actual ParamType of this VC object.
  ParamType param_type_;

-  tesseract::StringParam* sIt;
-  tesseract::IntParam* iIt;
-  tesseract::BoolParam* bIt;
-  tesseract::DoubleParam* dIt;
+  union {
+    tesseract::StringParam* sIt;
+    tesseract::IntParam* iIt;
+    tesseract::BoolParam* bIt;
+    tesseract::DoubleParam* dIt;
+  };
 };

 ELISTIZEH(ParamContent)
--- a/src/ccstruct/blobbox.h
+++ b/src/ccstruct/blobbox.h
@ -642,7 +642,7 @@ class TO_ROW: public ELIST2_LINK
    }

                                 // true when dead
-    bool merged;
+    bool merged = false;
    bool all_caps;              // had no ascenders
    bool used_dm_model;         // in guessing pitch
    int16_t projection_left;       // start of projection
--- a/src/ccstruct/pdblock.h
+++ b/src/ccstruct/pdblock.h
@ -2,7 +2,6 @@
 * File:        pdblock.h  (Formerly pdblk.h)
 * Description: Page block class definition.
 * Author:      Ray Smith
- * Created:     Thu Mar 14 17:32:01 GMT 1991
 *
 * (C) Copyright 1991, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -134,9 +133,9 @@ class DLLSYM BLOCK_RECT_IT       //rectangle iterator
    }

  private:
-    int16_t ymin;                ///< bottom of rectangle
-    int16_t ymax;                ///< top of rectangle
-    PDBLK *block;                ///< block to iterate
+    int16_t ymin = 0;            ///< bottom of rectangle
+    int16_t ymax = 0;            ///< top of rectangle
+    PDBLK* block = nullptr;      ///< block to iterate
    ICOORDELT_IT left_it;        ///< boundary iterators
    ICOORDELT_IT right_it;
 };
--- a/src/ccstruct/statistc.cpp
+++ b/src/ccstruct/statistc.cpp
@ -48,12 +48,6 @@ STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
  clear();
 }

-STATS::STATS() {
-  rangemax_ = 0;
-  rangemin_ = 0;
-  buckets_ = nullptr;
-}
-
 /**********************************************************************
 * STATS::set_range
 *
--- a/src/ccstruct/statistc.h
+++ b/src/ccstruct/statistc.h
@ -41,7 +41,7 @@ class STATS {
  // TODO(rays) This is ugly. Convert the second argument to
  // max_bucket_value and all the code that uses it.
  STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1);
-  STATS();  // empty for arrays
+  STATS() = default; // empty for arrays

  ~STATS();

@ -139,11 +139,11 @@ class STATS {
  #endif  // GRAPHICS_DISABLED

 private:
-  int32_t rangemin_;                // min of range
+  int32_t rangemin_ = 0;            // min of range
  // rangemax_ is not well named as it is really one past the max.
-  int32_t rangemax_;                // max of range
-  int32_t total_count_;             // no of samples
-  int32_t* buckets_;                // array of cells
+  int32_t rangemax_ = 0;            // max of range
+  int32_t total_count_ = 0;         // no of samples
+  int32_t* buckets_ = nullptr;      // array of cells
 };

 // Returns the nth ordered item from the array, as if they were
--- a/src/ccstruct/werd.cpp
+++ b/src/ccstruct/werd.cpp
@ -336,7 +336,6 @@ WERD* WERD::shallow_copy() {

  new_word->blanks = blanks;
  new_word->flags = flags;
-  new_word->dummy = dummy;
  new_word->correct = correct;
  return new_word;
 }
@ -352,7 +351,6 @@ WERD& WERD::operator=(const WERD& source) {
  blanks = source.blanks;
  flags = source.flags;
  script_id_ = source.script_id_;
-  dummy = source.dummy;
  correct = source.correct;
  if (!cblobs.empty()) cblobs.clear();
  cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
--- a/src/ccstruct/werd.h
+++ b/src/ccstruct/werd.h
@ -172,7 +172,6 @@ class WERD : public ELIST2_LINK {

 private:
  uint8_t blanks;          // no of blanks
-  uint8_t dummy;           // padding
  BITS16 flags;            // flags about word
  BITS16 disp_flags;       // display flags
  int16_t script_id_;      // From unicharset.
--- a/src/classify/adaptive.h
+++ b/src/classify/adaptive.h
@ -2,7 +2,6 @@
 ** Filename:   adaptive.h
 ** Purpose:    Interface to adaptive matcher.
 ** Author:     Dan Johnson
- ** History:    Fri Mar  8 10:00:49 1991, DSJ, Created.
 **
 ** (c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -27,7 +26,6 @@

 typedef struct {
  uint16_t ProtoId;
-  uint16_t dummy;
  PROTO_STRUCT Proto;
 }

@ -57,7 +55,7 @@ typedef union {
 typedef struct {
  uint8_t NumPermConfigs;
  uint8_t MaxNumTimesSeen;  // maximum number of times any TEMP_CONFIG was seen
-  uint8_t dummy[2];         // (cut at matcher_min_examples_for_prototyping)
+                            // (cut at matcher_min_examples_for_prototyping)
  BIT_VECTOR PermProtos;
  BIT_VECTOR PermConfigs;
  LIST TempProtos;
@ -69,7 +67,6 @@ typedef struct {
  INT_TEMPLATES Templates;
  int NumNonEmptyClasses;
  uint8_t NumPermClasses;
-  uint8_t dummy[3];
  ADAPT_CLASS Class[MAX_NUM_CLASSES];
 } ADAPT_TEMPLATES_STRUCT;
 using ADAPT_TEMPLATES = ADAPT_TEMPLATES_STRUCT*;
--- a/src/classify/classify.cpp
+++ b/src/classify/classify.cpp
@ -205,6 +205,7 @@ Classify::Classify()
  learn_debug_win_ = nullptr;
  learn_fragmented_word_debug_win_ = nullptr;
  learn_fragments_debug_win_ = nullptr;
+  InitFeatureDefs(&feature_defs_);
 }

 Classify::~Classify() {
--- a/src/classify/featdefs.h
+++ b/src/classify/featdefs.h
@ -2,7 +2,6 @@
 ** Filename:    featdefs.h
 ** Purpose:     Definitions of currently defined feature types.
 ** Author:      Dan Johnson
- ** History:     Mon May 21 08:28:01 1990, DSJ, Created.
 **
 ** (c) Copyright Hewlett-Packard Company, 1988.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -46,7 +45,6 @@ using CHAR_DESC = CHAR_DESC_STRUCT *;
 struct FEATURE_DEFS_STRUCT {
  int32_t NumFeatureTypes;
  const FEATURE_DESC_STRUCT* FeatureDesc[NUM_FEATURE_TYPES];
-  int FeatureEnabled[NUM_FEATURE_TYPES];
 };
 using FEATURE_DEFS = FEATURE_DEFS_STRUCT *;

--- a/src/classify/shapetable.h
+++ b/src/classify/shapetable.h
@ -5,7 +5,6 @@
 // Description: Class to map a classifier shape index to unicharset
 //              indices and font indices.
 // Author:      Ray Smith
-// Created:     Thu Oct 28 17:46:32 PDT 2010
 //
 // (C) Copyright 2010, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -243,10 +242,10 @@ class Shape {

  // Flag indicates that the unichars are sorted, allowing faster set
  // operations with another shape.
-  bool unichars_sorted_;
+  bool unichars_sorted_ = false;
  // If this Shape is part of a ShapeTable the destiation_index_ is the index
  // of some other shape in the ShapeTable with which this shape is merged.
-  int destination_index_;
+  int destination_index_ = 0;
  // Array of unichars, each with a set of fonts. Each unichar has at most
  // one entry in the vector.
  GenericVector<UnicharAndFonts> unichars_;
--- a/src/dict/dawg.h
+++ b/src/dict/dawg.h
@ -348,9 +348,7 @@ class Dawg {
 //  DawgPosition(k, w, p, pe true)
 //    We're back in the punctuation dawg.  Continuing there is the only option.
 struct DawgPosition {
-  DawgPosition()
-      : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE),
-        back_to_punc(false) {}
+  DawgPosition() = default;
  DawgPosition(int dawg_idx, EDGE_REF dawgref,
               int punc_idx, EDGE_REF puncref,
               bool backtopunc)
@ -366,12 +364,12 @@ struct DawgPosition {
        back_to_punc == other.back_to_punc;
  }

-  EDGE_REF dawg_ref;
-  EDGE_REF punc_ref;
-  int8_t dawg_index;
-  int8_t punc_index;
+  EDGE_REF dawg_ref = NO_EDGE;
+  EDGE_REF punc_ref = NO_EDGE;
+  int8_t dawg_index = -1;
+  int8_t punc_index = -1;
  // Have we returned to the punc dawg at the end of the word?
-  bool back_to_punc;
+  bool back_to_punc = false;
 };

 class DawgPositionVector : public GenericVector<DawgPosition> {
--- a/src/textord/bbgrid.h
+++ b/src/textord/bbgrid.h
@ -3,7 +3,6 @@
 // Description: Class to hold BLOBNBOXs in a grid for fast access
 //              to neighbours.
 // Author:      Ray Smith
-// Created:     Wed Jun 06 17:22:01 PDT 2007
 //
 // (C) Copyright 2007, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -236,8 +235,7 @@ template<typename T> struct PtrHash {
 template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
 public:
  GridSearch(BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid)
-      : grid_(grid), unique_mode_(false),
-        previous_return_(nullptr), next_return_(nullptr) {
+      : grid_(grid) {
  }

  // Get the grid x, y coords of the most recently returned BBC.
@ -347,21 +345,21 @@ template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {

 private:
  // The grid we are searching.
-  BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid_;
+  BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid_ = nullptr;
  // For executing a search. The different search algorithms use these in
  // different ways, but most use x_origin_ and y_origin_ as the start position.
-  int x_origin_;
-  int y_origin_;
-  int max_radius_;
-  int radius_;
-  int rad_index_;
-  int rad_dir_;
+  int x_origin_ = 0;
+  int y_origin_ = 0;
+  int max_radius_ = 0;
+  int radius_ = 0;
+  int rad_index_ = 0;
+  int rad_dir_ = 0;
  TBOX rect_;
-  int x_;  // The current location in grid coords, of the current search.
-  int y_;
-  bool unique_mode_;
-  BBC* previous_return_;  // Previous return from Next*.
-  BBC* next_return_;  // Current value of it_.data() used for repositioning.
+  int x_ = 0; // The current location in grid coords, of the current search.
+  int y_ = 0;
+  bool unique_mode_ = false;
+  BBC* previous_return_ = nullptr; // Previous return from Next*.
+  BBC* next_return_ = nullptr; // Current value of it_.data() used for repositioning.
  // An iterator over the list at (x_, y_) in the grid_.
  BBC_C_IT it_;
  // Set of unique returned elements used when unique_mode_ is true.
--- a/src/training/lstmtester.cpp
+++ b/src/training/lstmtester.cpp
@ -22,7 +22,7 @@
 namespace tesseract {

 LSTMTester::LSTMTester(int64_t max_memory)
-    : test_data_(max_memory), total_pages_(0), async_running_(false) {}
+    : test_data_(max_memory) {}

 // Loads a set of lstmf files that were created using the lstm.train config to
 // tesseract into memory ready for testing. Returns false if nothing was
--- a/src/training/lstmtester.h
+++ b/src/training/lstmtester.h
@ -73,16 +73,16 @@ class LSTMTester {

  // The data to test with.
  DocumentCache test_data_;
-  int total_pages_;
+  int total_pages_ = 0;
  // Flag that indicates an asynchronous test is currently running.
  // Protected by running_mutex_.
-  bool async_running_;
+  bool async_running_ = false;
  std::mutex running_mutex_;
  // Stored copies of the args for use while running asynchronously.
-  int test_iteration_;
-  const double* test_training_errors_;
+  int test_iteration_ = 0;
+  const double* test_training_errors_ = nullptr;
  TessdataManager test_model_mgr_;
-  int test_training_stage_;
+  int test_training_stage_ = 0;
  STRING test_result_;
 };

--- a/src/training/text2image.cpp
+++ b/src/training/text2image.cpp
@ -598,7 +598,7 @@ static int Main() {
        rand_utf8.append(kSeparator);
      }
    }
-    tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
+    tlog(1, "Rendered ngram string of size %zu\n", rand_utf8.length());
    src_utf8.swap(rand_utf8);
  }
  if (FLAGS_only_extract_font_properties) {
--- a/src/training/wordlist2dawg.cpp
+++ b/src/training/wordlist2dawg.cpp
@ -54,7 +54,6 @@ int main(int argc, char** argv) {
    tprintf("Set reverse_policy to %s\n",
            tesseract::Trie::get_reverse_policy_name(reverse_policy));
  }
-  if (argc == 7) argv_index += 3;
  const char* wordlist_filename = argv[++argv_index];
  const char* dawg_filename = argv[++argv_index];
  const char* unicharset_file = argv[++argv_index];
--- a/src/viewer/scrollview.h
+++ b/src/viewer/scrollview.h
@ -60,21 +60,17 @@ enum SVEventType {
 struct SVEvent {
  ~SVEvent() { delete [] parameter; }
  SVEvent* copy();
-  SVEventType type;    // What kind of event.
-  ScrollView* window;  // Window event relates to.
-  int x;               // Coords of click or selection.
-  int y;
-  int x_size;          // Size of selection.
-  int y_size;
-  int command_id;      // The ID of the possibly associated event (e.g. MENU)
-  char* parameter;     // Any string that might have been passed as argument.
-  int counter;         // Used to detect which kind of event to process next.
-
-  SVEvent() {
-    window = nullptr;
-    parameter = nullptr;
-  }
+  SVEventType type = SVET_DESTROY; // What kind of event.
+  ScrollView* window = nullptr; // Window event relates to.
+  char* parameter = nullptr; // Any string that might have been passed as argument.
+  int x = 0;           // Coords of click or selection.
+  int y = 0;
+  int x_size = 0;      // Size of selection.
+  int y_size = 0;
+  int command_id = 0;  // The ID of the possibly associated event (e.g. MENU)
+  int counter = 0;     // Used to detect which kind of event to process next.

+  SVEvent() = default;
  SVEvent(const SVEvent&);
  SVEvent& operator=(const SVEvent&);
 };
--- a/src/wordrec/language_model.cpp
+++ b/src/wordrec/language_model.cpp
@ -127,10 +127,7 @@ LanguageModel::LanguageModel(const UnicityTable<FontInfo> *fontinfo_table,
                       dict->getCCUtil()->params()),
      dawg_args_(nullptr, new DawgPositionVector(), NO_PERM),
      fontinfo_table_(fontinfo_table),
-      dict_(dict),
-      fixed_pitch_(false),
-      max_char_wh_ratio_(0.0),
-      acceptable_choice_found_(false) {
+      dict_(dict) {
  ASSERT_HOST(dict_ != nullptr);
 }

--- a/src/wordrec/language_model.h
+++ b/src/wordrec/language_model.h
@ -4,7 +4,6 @@
 //              structure and statistics of the language to help segmentation
 //              search.
 // Author:      Daria Antonova
-// Created:     Mon Nov 11 11:26:43 PST 2009
 //
 // (C) Copyright 2009, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -372,26 +371,26 @@ class LanguageModel {
  // avoid dynamic memory re-allocation (should be cleared before each use).
  DawgArgs dawg_args_;
  // Scaling for recovering blob outline length from rating and certainty.
-  float rating_cert_scale_;
+  float rating_cert_scale_ = 0.0f;

  // The following variables are set at construction time.

  // Pointer to fontinfo table (not owned by LanguageModel).
-  const UnicityTable<FontInfo> *fontinfo_table_;
+  const UnicityTable<FontInfo>* fontinfo_table_ = nullptr;

  // Pointer to Dict class, that is used for querying the dictionaries
  // (the pointer is not owned by LanguageModel).
-  Dict *dict_;
+  Dict* dict_ = nullptr;

  // TODO(daria): the following variables should become LanguageModel params
  // when the old code in bestfirst.cpp and heuristic.cpp is deprecated.
  //
  // Set to true if we are dealing with fixed pitch text
  // (set to assume_fixed_pitch_char_segment).
-  bool fixed_pitch_;
+  bool fixed_pitch_ = false;
  // Max char width-to-height ratio allowed
  // (set to segsearch_max_char_wh_ratio).
-  float max_char_wh_ratio_;
+  float max_char_wh_ratio_ = 0.0f;

  // The following variables are initialized with InitForWord().

@ -399,7 +398,7 @@ class LanguageModel {
  // (since this is only used by the character ngram model component,
  // only the last language_model_ngram_order of the word are stored).
  STRING prev_word_str_;
-  int prev_word_unichar_step_len_;
+  int prev_word_unichar_step_len_ = 0;
  // Active dawg vector.
  DawgPositionVector very_beginning_active_dawgs_;  // includes continuation
  DawgPositionVector beginning_active_dawgs_;
@ -414,9 +413,9 @@ class LanguageModel {
  // choices. This way the stopper will know that the best choice is not
  // ambiguous (i.e. there are best choices in the best choice list that have
  // ratings close to the very best one) and will be less likely to mis-adapt.
-  bool acceptable_choice_found_;
+  bool acceptable_choice_found_ = false;
  // Set to true if a choice representing correct segmentation was explored.
-  bool correct_segmentation_explored_;
+  bool correct_segmentation_explored_ = false;

  // Params models containing weights for for computing ViterbiStateEntry costs.
  ParamsModel params_model_;