Merge pull request from stweil/cov

Fix several issues reported by Coverity Scan (related to untrusted external data)
This commit is contained in:
Egor Pugin 2018-07-06 17:41:39 +03:00 committed by GitHub
commit d16e518343
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 51 additions and 41 deletions

View File

@ -1,6 +1,6 @@
/* -*-C-*- /* -*-C-*-
****************************************************************************** ******************************************************************************
* File: matrix.h (Formerly matrix.h) * File: matrix.h
* Description: Generic 2-d array/matrix and banded triangular matrix class. * Description: Generic 2-d array/matrix and banded triangular matrix class.
* Author: Ray Smith * Author: Ray Smith
* TODO(rays) Separate from ratings matrix, which it also contains: * TODO(rays) Separate from ratings matrix, which it also contains:
@ -10,9 +10,6 @@
* Author: Mark Seaman, OCR Technology * Author: Mark Seaman, OCR Technology
* Created: Wed May 16 13:22:06 1990 * Created: Wed May 16 13:22:06 1990
* Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt * Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
* Language: C
* Package: N/A
* Status: Experimental (Do Not Distribute)
* *
* (c) Copyright 1990, Hewlett-Packard Company. * (c) Copyright 1990, Hewlett-Packard Company.
** Licensed under the Apache License, Version 2.0 (the "License"); ** Licensed under the Apache License, Version 2.0 (the "License");
@ -492,6 +489,9 @@ class GENERIC_2D_ARRAY {
ReverseN(&size1, sizeof(size1)); ReverseN(&size1, sizeof(size1));
ReverseN(&size2, sizeof(size2)); ReverseN(&size2, sizeof(size2));
} }
// Arbitrarily limit the number of elements to protect against bad data.
if (size1 > UINT16_MAX) return false;
if (size2 > UINT16_MAX) return false;
Resize(size1, size2, empty_); Resize(size1, size2, empty_);
return true; return true;
} }
@ -499,6 +499,9 @@ class GENERIC_2D_ARRAY {
int32_t size1, size2; int32_t size1, size2;
if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false; if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false;
if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false; if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false;
// Arbitrarily limit the number of elements to protect against bad data.
if (size1 > UINT16_MAX) return false;
if (size2 > UINT16_MAX) return false;
Resize(size1, size2, empty_); Resize(size1, size2, empty_);
return true; return true;
} }

View File

@ -564,12 +564,14 @@ class PointerVector : public GenericVector<T*> {
// Also needs T::T(), as new T is used in this function. // Also needs T::T(), as new T is used in this function.
// Returns false in case of error. // Returns false in case of error.
bool DeSerialize(bool swap, FILE* fp) { bool DeSerialize(bool swap, FILE* fp) {
int32_t reserved; uint32_t reserved;
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
if (swap) Reverse32(&reserved); if (swap) Reverse32(&reserved);
// Arbitrarily limit the number of elements to protect against bad data.
if (reserved > UINT16_MAX) return false;
GenericVector<T*>::reserve(reserved); GenericVector<T*>::reserve(reserved);
truncate(0); truncate(0);
for (int i = 0; i < reserved; ++i) { for (uint32_t i = 0; i < reserved; ++i) {
int8_t non_null; int8_t non_null;
if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false; if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
T* item = nullptr; T* item = nullptr;
@ -943,9 +945,11 @@ bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
template <typename T> template <typename T>
bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) { bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
int32_t reserved; uint32_t reserved;
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false; if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
if (swap) Reverse32(&reserved); if (swap) Reverse32(&reserved);
// Arbitrarily limit the number of elements to protect against bad data.
if (reserved > UINT16_MAX) return false;
reserve(reserved); reserve(reserved);
size_used_ = reserved; size_used_ = reserved;
if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false; if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false;
@ -957,15 +961,17 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
} }
template <typename T> template <typename T>
bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) { bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
int32_t reserved; uint32_t reserved;
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
// Arbitrarily limit the number of elements to protect against bad data.
if (reserved > UINT16_MAX) return false;
reserve(reserved); reserve(reserved);
size_used_ = reserved; size_used_ = reserved;
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_; return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
} }
template <typename T> template <typename T>
bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) { bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
int32_t reserved; uint32_t reserved;
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
return fp->FRead(nullptr, sizeof(T), reserved) == reserved; return fp->FRead(nullptr, sizeof(T), reserved) == reserved;
} }

View File

@ -50,10 +50,12 @@ bool IndexMap::Serialize(FILE* fp) const {
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool IndexMap::DeSerialize(bool swap, FILE* fp) { bool IndexMap::DeSerialize(bool swap, FILE* fp) {
int32_t sparse_size; uint32_t sparse_size;
if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false; if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
if (swap) if (swap)
ReverseN(&sparse_size, sizeof(sparse_size)); ReverseN(&sparse_size, sizeof(sparse_size));
// Arbitrarily limit the number of elements to protect against bad data.
if (sparse_size > UINT16_MAX) return false;
sparse_size_ = sparse_size; sparse_size_ = sparse_size;
if (!compact_map_.DeSerialize(swap, fp)) return false; if (!compact_map_.DeSerialize(swap, fp)) return false;
return true; return true;

View File

@ -161,13 +161,14 @@ bool STRING::Serialize(TFile* fp) const {
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.
bool STRING::DeSerialize(bool swap, FILE* fp) { bool STRING::DeSerialize(bool swap, FILE* fp) {
int32_t len; uint32_t len;
if (fread(&len, sizeof(len), 1, fp) != 1) return false; if (fread(&len, sizeof(len), 1, fp) != 1) return false;
if (swap) if (swap)
ReverseN(&len, sizeof(len)); ReverseN(&len, sizeof(len));
// Arbitrarily limit the number of characters to protect against bad data.
if (len > UINT16_MAX) return false;
truncate_at(len); truncate_at(len);
if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false; return fread(GetCStr(), 1, len, fp) == len;
return true;
} }
// Reads from the given file. Returns false in case of error. // Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed. // If swap is true, assumes a big/little-endian swap is needed.

View File

@ -1339,7 +1339,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS(); ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
adapt_results->Initialize(); adapt_results->Initialize();
// Compute the bounding box of the features. // Compute the bounding box of the features.
int num_features = sample.num_features(); uint32_t num_features = sample.num_features();
// Only the top and bottom of the blob_box are used by MasterMatcher, so // Only the top and bottom of the blob_box are used by MasterMatcher, so
// fabricate right and left using top and bottom. // fabricate right and left using top and bottom.
TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom), TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),

View File

@ -1,5 +1,3 @@
// Copyright 2010 Google Inc. All Rights Reserved.
// Author: rays@google.com (Ray Smith)
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// File: mastertrainer.cpp // File: mastertrainer.cpp
// Description: Trainer to build the MasterClassifier. // Description: Trainer to build the MasterClassifier.
@ -552,8 +550,8 @@ CLUSTERER* MasterTrainer::SetupForClustering(
int sample_id = 0; int sample_id = 0;
for (int i = sample_ptrs.size() - 1; i >= 0; --i) { for (int i = sample_ptrs.size() - 1; i >= 0; --i) {
const TrainingSample* sample = sample_ptrs[i]; const TrainingSample* sample = sample_ptrs[i];
int num_features = sample->num_micro_features(); uint32_t num_features = sample->num_micro_features();
for (int f = 0; f < num_features; ++f) for (uint32_t f = 0; f < num_features; ++f)
MakeSample(clusterer, sample->micro_features()[f], sample_id); MakeSample(clusterer, sample->micro_features()[f], sample_id);
++sample_id; ++sample_id;
} }
@ -706,7 +704,7 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font,
if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) { if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font, const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font,
class_id2); class_id2);
for (int f = 0; f < sample->num_features(); ++f) { for (uint32_t f = 0; f < sample->num_features(); ++f) {
RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED); RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED);
} }
} }

View File

@ -224,10 +224,10 @@ FEATURE_SET Classify::ExtractIntCNFeatures(
blob, false, &local_fx_info, &bl_features); blob, false, &local_fx_info, &bl_features);
if (sample == nullptr) return nullptr; if (sample == nullptr) return nullptr;
int num_features = sample->num_features(); uint32_t num_features = sample->num_features();
const INT_FEATURE_STRUCT* features = sample->features(); const INT_FEATURE_STRUCT* features = sample->features();
FEATURE_SET feature_set = NewFeatureSet(num_features); FEATURE_SET feature_set = NewFeatureSet(num_features);
for (int f = 0; f < num_features; ++f) { for (uint32_t f = 0; f < num_features; ++f) {
FEATURE feature = NewFeature(&IntFeatDesc); FEATURE feature = NewFeature(&IntFeatDesc);
feature->Params[IntX] = features[f].X; feature->Params[IntX] = features[f].X;

View File

@ -109,8 +109,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample,
popup_menu->BuildMenu(debug_win, false); popup_menu->BuildMenu(debug_win, false);
// Display the features in green. // Display the features in green.
const INT_FEATURE_STRUCT* features = sample.features(); const INT_FEATURE_STRUCT* features = sample.features();
int num_features = sample.num_features(); uint32_t num_features = sample.num_features();
for (int f = 0; f < num_features; ++f) { for (uint32_t f = 0; f < num_features; ++f) {
RenderIntFeature(debug_win, &features[f], ScrollView::GREEN); RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
} }
debug_win->Update(); debug_win->Update();

View File

@ -61,12 +61,10 @@ bool TrainingSample::Serialize(FILE* fp) const {
return false; return false;
if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1) if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
return false; return false;
if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp)) if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
!= num_features_)
return false; return false;
if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_), if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
num_micro_features_, fp) != num_micro_features_)
fp)) != num_micro_features_)
return false; return false;
if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
kNumCNParams) return false; kNumCNParams) return false;
@ -102,16 +100,18 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
ReverseN(&num_micro_features_, sizeof(num_micro_features_)); ReverseN(&num_micro_features_, sizeof(num_micro_features_));
ReverseN(&outline_length_, sizeof(outline_length_)); ReverseN(&outline_length_, sizeof(outline_length_));
} }
// Arbitrarily limit the number of elements to protect against bad data.
if (num_features_ > UINT16_MAX) return false;
if (num_micro_features_ > UINT16_MAX) return false;
delete [] features_; delete [] features_;
features_ = new INT_FEATURE_STRUCT[num_features_]; features_ = new INT_FEATURE_STRUCT[num_features_];
if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp)) if (fread(features_, sizeof(*features_), num_features_, fp)
!= num_features_) != num_features_)
return false; return false;
delete [] micro_features_; delete [] micro_features_;
micro_features_ = new MicroFeature[num_micro_features_]; micro_features_ = new MicroFeature[num_micro_features_];
if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_), if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
num_micro_features_, fp) != num_micro_features_)
fp)) != num_micro_features_)
return false; return false;
if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
kNumCNParams) return false; kNumCNParams) return false;
@ -165,7 +165,7 @@ TrainingSample* TrainingSample::RandomizedCopy(int index) const {
++index; // Remove the first combination. ++index; // Remove the first combination.
const int yshift = kYShiftValues[index / kSampleScaleSize]; const int yshift = kYShiftValues[index / kSampleScaleSize];
double scaling = kScaleValues[index % kSampleScaleSize]; double scaling = kScaleValues[index % kSampleScaleSize];
for (int i = 0; i < num_features_; ++i) { for (uint32_t i = 0; i < num_features_; ++i) {
double result = (features_[i].X - kRandomizingCenter) * scaling; double result = (features_[i].X - kRandomizingCenter) * scaling;
result += kRandomizingCenter; result += kRandomizingCenter;
sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX); sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
@ -217,7 +217,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
} else { } else {
num_features_ = char_features->NumFeatures; num_features_ = char_features->NumFeatures;
features_ = new INT_FEATURE_STRUCT[num_features_]; features_ = new INT_FEATURE_STRUCT[num_features_];
for (int f = 0; f < num_features_; ++f) { for (uint32_t f = 0; f < num_features_; ++f) {
features_[f].X = features_[f].X =
static_cast<uint8_t>(char_features->Features[f]->Params[IntX]); static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
features_[f].Y = features_[f].Y =
@ -238,7 +238,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
} else { } else {
num_micro_features_ = char_features->NumFeatures; num_micro_features_ = char_features->NumFeatures;
micro_features_ = new MicroFeature[num_micro_features_]; micro_features_ = new MicroFeature[num_micro_features_];
for (int f = 0; f < num_micro_features_; ++f) { for (uint32_t f = 0; f < num_micro_features_; ++f) {
for (int d = 0; d < MFCount; ++d) { for (int d = 0; d < MFCount; ++d) {
micro_features_[f][d] = char_features->Features[f]->Params[d]; micro_features_[f][d] = char_features->Features[f]->Params[d];
} }
@ -294,7 +294,7 @@ void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
// Returns a pix representing the sample. (Int features only.) // Returns a pix representing the sample. (Int features only.)
Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const { Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1); Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
for (int f = 0; f < num_features_; ++f) { for (uint32_t f = 0; f < num_features_; ++f) {
int start_x = features_[f].X; int start_x = features_[f].X;
int start_y = kIntFeatureExtent - features_[f].Y; int start_y = kIntFeatureExtent - features_[f].Y;
double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI); double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
@ -315,7 +315,7 @@ Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
void TrainingSample::DisplayFeatures(ScrollView::Color color, void TrainingSample::DisplayFeatures(ScrollView::Color color,
ScrollView* window) const { ScrollView* window) const {
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
for (int f = 0; f < num_features_; ++f) { for (uint32_t f = 0; f < num_features_; ++f) {
RenderIntFeature(window, &features_[f], color); RenderIntFeature(window, &features_[f], color);
} }
#endif // GRAPHICS_DISABLED #endif // GRAPHICS_DISABLED

View File

@ -137,13 +137,13 @@ class TrainingSample : public ELIST_LINK {
void set_bounding_box(const TBOX& box) { void set_bounding_box(const TBOX& box) {
bounding_box_ = box; bounding_box_ = box;
} }
int num_features() const { uint32_t num_features() const {
return num_features_; return num_features_;
} }
const INT_FEATURE_STRUCT* features() const { const INT_FEATURE_STRUCT* features() const {
return features_; return features_;
} }
int num_micro_features() const { uint32_t num_micro_features() const {
return num_micro_features_; return num_micro_features_;
} }
const MicroFeature* micro_features() const { const MicroFeature* micro_features() const {
@ -206,9 +206,9 @@ class TrainingSample : public ELIST_LINK {
// Bounding box of sample in original image. // Bounding box of sample in original image.
TBOX bounding_box_; TBOX bounding_box_;
// Number of INT_FEATURE_STRUCT in features_ array. // Number of INT_FEATURE_STRUCT in features_ array.
int num_features_; uint32_t num_features_;
// Number of MicroFeature in micro_features_ array. // Number of MicroFeature in micro_features_ array.
int num_micro_features_; uint32_t num_micro_features_;
// Total length of outline in the baseline normalized coordinate space. // Total length of outline in the baseline normalized coordinate space.
// See comment in WERD_RES class definition for a discussion of coordinate // See comment in WERD_RES class definition for a discussion of coordinate
// spaces. // spaces.