Merge pull request #1759 from stweil/cov

Fix several issues reported by Coverity Scan (related to untrusted external data)
2024-12-15 17:49:29 +08:00 · 2018-07-06 17:41:39 +03:00 · 2018-07-06 17:41:39 +03:00 · d16e518343
commit d16e518343
parent a078ce02bb 4bb41b8952
10 changed files with 51 additions and 41 deletions
--- a/src/ccstruct/matrix.h
+++ b/src/ccstruct/matrix.h
@ -1,6 +1,6 @@
 /* -*-C-*-
 ******************************************************************************
- * File:         matrix.h  (Formerly matrix.h)
+ * File:         matrix.h
 * Description:  Generic 2-d array/matrix and banded triangular matrix class.
 * Author:       Ray Smith
 * TODO(rays) Separate from ratings matrix, which it also contains:
@ -10,9 +10,6 @@
 * Author:       Mark Seaman, OCR Technology
 * Created:      Wed May 16 13:22:06 1990
 * Modified:     Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
 * Language:     C
 * Package:      N/A
 * Status:       Experimental (Do Not Distribute)
 *
 * (c) Copyright 1990, Hewlett-Packard Company.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -492,6 +489,9 @@ class GENERIC_2D_ARRAY {
      ReverseN(&size1, sizeof(size1));
      ReverseN(&size2, sizeof(size2));
    }
    // Arbitrarily limit the number of elements to protect against bad data.
    if (size1 > UINT16_MAX) return false;
    if (size2 > UINT16_MAX) return false;
    Resize(size1, size2, empty_);
    return true;
  }
@ -499,6 +499,9 @@ class GENERIC_2D_ARRAY {
    int32_t size1, size2;
    if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false;
    if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false;
    // Arbitrarily limit the number of elements to protect against bad data.
    if (size1 > UINT16_MAX) return false;
    if (size2 > UINT16_MAX) return false;
    Resize(size1, size2, empty_);
    return true;
  }
--- a/src/ccutil/genericvector.h
+++ b/src/ccutil/genericvector.h
@ -564,12 +564,14 @@ class PointerVector : public GenericVector<T*> {
  // Also needs T::T(), as new T is used in this function.
  // Returns false in case of error.
  bool DeSerialize(bool swap, FILE* fp) {
-    int32_t reserved;
+    uint32_t reserved;
    if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
    if (swap) Reverse32(&reserved);
    // Arbitrarily limit the number of elements to protect against bad data.
    if (reserved > UINT16_MAX) return false;
    GenericVector<T*>::reserve(reserved);
    truncate(0);
-    for (int i = 0; i < reserved; ++i) {
+    for (uint32_t i = 0; i < reserved; ++i) {
      int8_t non_null;
      if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
      T* item = nullptr;
@ -943,9 +945,11 @@ bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
 // If swap is true, assumes a big/little-endian swap is needed.
 template <typename T>
 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
-  int32_t reserved;
+  uint32_t reserved;
  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
  if (swap) Reverse32(&reserved);
  // Arbitrarily limit the number of elements to protect against bad data.
  if (reserved > UINT16_MAX) return false;
  reserve(reserved);
  size_used_ = reserved;
  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false;
@ -957,15 +961,17 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
 }
 template <typename T>
 bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
-  int32_t reserved;
+  uint32_t reserved;
  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
  // Arbitrarily limit the number of elements to protect against bad data.
  if (reserved > UINT16_MAX) return false;
  reserve(reserved);
  size_used_ = reserved;
  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
 }
 template <typename T>
 bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
-  int32_t reserved;
+  uint32_t reserved;
  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
  return fp->FRead(nullptr, sizeof(T), reserved) == reserved;
 }
--- a/src/ccutil/indexmapbidi.cpp
+++ b/src/ccutil/indexmapbidi.cpp
@ -50,10 +50,12 @@ bool IndexMap::Serialize(FILE* fp) const {
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
 bool IndexMap::DeSerialize(bool swap, FILE* fp) {
-  int32_t sparse_size;
+  uint32_t sparse_size;
  if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
  if (swap)
    ReverseN(&sparse_size, sizeof(sparse_size));
  // Arbitrarily limit the number of elements to protect against bad data.
  if (sparse_size > UINT16_MAX) return false;
  sparse_size_ = sparse_size;
  if (!compact_map_.DeSerialize(swap, fp)) return false;
  return true;
--- a/src/ccutil/strngs.cpp
+++ b/src/ccutil/strngs.cpp
@ -161,13 +161,14 @@ bool STRING::Serialize(TFile* fp) const {
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
 bool STRING::DeSerialize(bool swap, FILE* fp) {
-  int32_t len;
+  uint32_t len;
  if (fread(&len, sizeof(len), 1, fp) != 1) return false;
  if (swap)
    ReverseN(&len, sizeof(len));
  // Arbitrarily limit the number of characters to protect against bad data.
  if (len > UINT16_MAX) return false;
  truncate_at(len);
-  if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false;
+  return fread(GetCStr(), 1, len, fp) == len;
  return true;
 }
 // Reads from the given file. Returns false in case of error.
 // If swap is true, assumes a big/little-endian swap is needed.
--- a/src/classify/adaptmatch.cpp
+++ b/src/classify/adaptmatch.cpp
@ -1339,7 +1339,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
  adapt_results->Initialize();
  // Compute the bounding box of the features.
-  int num_features = sample.num_features();
+  uint32_t num_features = sample.num_features();
  // Only the top and bottom of the blob_box are used by MasterMatcher, so
  // fabricate right and left using top and bottom.
  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
--- a/src/classify/mastertrainer.cpp
+++ b/src/classify/mastertrainer.cpp
@ -1,5 +1,3 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 // Author: rays@google.com (Ray Smith)
 ///////////////////////////////////////////////////////////////////////
 // File:        mastertrainer.cpp
 // Description: Trainer to build the MasterClassifier.
@ -552,8 +550,8 @@ CLUSTERER* MasterTrainer::SetupForClustering(
  int sample_id = 0;
  for (int i = sample_ptrs.size() - 1; i >= 0; --i) {
    const TrainingSample* sample = sample_ptrs[i];
-    int num_features = sample->num_micro_features();
+    uint32_t num_features = sample->num_micro_features();
-    for (int f = 0; f < num_features; ++f)
+    for (uint32_t f = 0; f < num_features; ++f)
      MakeSample(clusterer, sample->micro_features()[f], sample_id);
    ++sample_id;
  }
@ -706,7 +704,7 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font,
  if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
    const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font,
                                                               class_id2);
-    for (int f = 0; f < sample->num_features(); ++f) {
+    for (uint32_t f = 0; f < sample->num_features(); ++f) {
      RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED);
    }
  }
--- a/src/classify/picofeat.cpp
+++ b/src/classify/picofeat.cpp
@ -224,10 +224,10 @@ FEATURE_SET Classify::ExtractIntCNFeatures(
      blob, false, &local_fx_info, &bl_features);
  if (sample == nullptr) return nullptr;
-  int num_features = sample->num_features();
+  uint32_t num_features = sample->num_features();
  const INT_FEATURE_STRUCT* features = sample->features();
  FEATURE_SET feature_set = NewFeatureSet(num_features);
-  for (int f = 0; f < num_features; ++f) {
+  for (uint32_t f = 0; f < num_features; ++f) {
    FEATURE feature = NewFeature(&IntFeatDesc);
    feature->Params[IntX] = features[f].X;
--- a/src/classify/shapeclassifier.cpp
+++ b/src/classify/shapeclassifier.cpp
@ -109,8 +109,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample,
  popup_menu->BuildMenu(debug_win, false);
  // Display the features in green.
  const INT_FEATURE_STRUCT* features = sample.features();
-  int num_features = sample.num_features();
+  uint32_t num_features = sample.num_features();
-  for (int f = 0; f < num_features; ++f) {
+  for (uint32_t f = 0; f < num_features; ++f) {
    RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
  }
  debug_win->Update();
--- a/src/classify/trainingsample.cpp
+++ b/src/classify/trainingsample.cpp
@ -61,12 +61,10 @@ bool TrainingSample::Serialize(FILE* fp) const {
    return false;
  if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
    return false;
-  if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp))
+  if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
      != num_features_)
    return false;
-  if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_),
+  if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
-                              num_micro_features_,
+             fp) != num_micro_features_)
                              fp)) != num_micro_features_)
    return false;
  if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
      kNumCNParams) return false;
@ -102,16 +100,18 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
    ReverseN(&num_micro_features_, sizeof(num_micro_features_));
    ReverseN(&outline_length_, sizeof(outline_length_));
  }
  // Arbitrarily limit the number of elements to protect against bad data.
  if (num_features_ > UINT16_MAX) return false;
  if (num_micro_features_ > UINT16_MAX) return false;
  delete [] features_;
  features_ = new INT_FEATURE_STRUCT[num_features_];
-  if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp))
+  if (fread(features_, sizeof(*features_), num_features_, fp)
      != num_features_)
    return false;
  delete [] micro_features_;
  micro_features_ = new MicroFeature[num_micro_features_];
-  if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_),
+  if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
-                             num_micro_features_,
+            fp) != num_micro_features_)
                             fp)) != num_micro_features_)
    return false;
  if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
            kNumCNParams) return false;
@ -165,7 +165,7 @@ TrainingSample* TrainingSample::RandomizedCopy(int index) const {
    ++index;  // Remove the first combination.
    const int yshift = kYShiftValues[index / kSampleScaleSize];
    double scaling = kScaleValues[index % kSampleScaleSize];
-    for (int i = 0; i < num_features_; ++i) {
+    for (uint32_t i = 0; i < num_features_; ++i) {
      double result = (features_[i].X - kRandomizingCenter) * scaling;
      result += kRandomizingCenter;
      sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
@ -217,7 +217,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
  } else {
    num_features_ = char_features->NumFeatures;
    features_ = new INT_FEATURE_STRUCT[num_features_];
-    for (int f = 0; f < num_features_; ++f) {
+    for (uint32_t f = 0; f < num_features_; ++f) {
      features_[f].X =
          static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
      features_[f].Y =
@ -238,7 +238,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
  } else {
    num_micro_features_ = char_features->NumFeatures;
    micro_features_ = new MicroFeature[num_micro_features_];
-    for (int f = 0; f < num_micro_features_; ++f) {
+    for (uint32_t f = 0; f < num_micro_features_; ++f) {
      for (int d = 0; d < MFCount; ++d) {
        micro_features_[f][d] = char_features->Features[f]->Params[d];
      }
@ -294,7 +294,7 @@ void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
 // Returns a pix representing the sample. (Int features only.)
 Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
  Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
-  for (int f = 0; f < num_features_; ++f) {
+  for (uint32_t f = 0; f < num_features_; ++f) {
    int start_x = features_[f].X;
    int start_y = kIntFeatureExtent - features_[f].Y;
    double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
@ -315,7 +315,7 @@ Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
 void TrainingSample::DisplayFeatures(ScrollView::Color color,
                                     ScrollView* window) const {
  #ifndef GRAPHICS_DISABLED
-  for (int f = 0; f < num_features_; ++f) {
+  for (uint32_t f = 0; f < num_features_; ++f) {
    RenderIntFeature(window, &features_[f], color);
  }
  #endif  // GRAPHICS_DISABLED
--- a/src/classify/trainingsample.h
+++ b/src/classify/trainingsample.h
@ -137,13 +137,13 @@ class TrainingSample : public ELIST_LINK {
  void set_bounding_box(const TBOX& box) {
    bounding_box_ = box;
  }
-  int num_features() const {
+  uint32_t num_features() const {
    return num_features_;
  }
  const INT_FEATURE_STRUCT* features() const {
    return features_;
  }
-  int num_micro_features() const {
+  uint32_t num_micro_features() const {
    return num_micro_features_;
  }
  const MicroFeature* micro_features() const {
@ -206,9 +206,9 @@ class TrainingSample : public ELIST_LINK {
  // Bounding box of sample in original image.
  TBOX bounding_box_;
  // Number of INT_FEATURE_STRUCT in features_ array.
-  int num_features_;
+  uint32_t num_features_;
  // Number of MicroFeature in micro_features_ array.
-  int num_micro_features_;
+  uint32_t num_micro_features_;
  // Total length of outline in the baseline normalized coordinate space.
  // See comment in WERD_RES class definition for a discussion of coordinate
  // spaces.