mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-12 15:39:04 +08:00
Merge pull request #1759 from stweil/cov
Fix several issues reported by Coverity Scan (related to untrusted external data)
This commit is contained in:
commit
d16e518343
@ -1,6 +1,6 @@
|
||||
/* -*-C-*-
|
||||
******************************************************************************
|
||||
* File: matrix.h (Formerly matrix.h)
|
||||
* File: matrix.h
|
||||
* Description: Generic 2-d array/matrix and banded triangular matrix class.
|
||||
* Author: Ray Smith
|
||||
* TODO(rays) Separate from ratings matrix, which it also contains:
|
||||
@ -10,9 +10,6 @@
|
||||
* Author: Mark Seaman, OCR Technology
|
||||
* Created: Wed May 16 13:22:06 1990
|
||||
* Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt
|
||||
* Language: C
|
||||
* Package: N/A
|
||||
* Status: Experimental (Do Not Distribute)
|
||||
*
|
||||
* (c) Copyright 1990, Hewlett-Packard Company.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -492,6 +489,9 @@ class GENERIC_2D_ARRAY {
|
||||
ReverseN(&size1, sizeof(size1));
|
||||
ReverseN(&size2, sizeof(size2));
|
||||
}
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (size1 > UINT16_MAX) return false;
|
||||
if (size2 > UINT16_MAX) return false;
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
@ -499,6 +499,9 @@ class GENERIC_2D_ARRAY {
|
||||
int32_t size1, size2;
|
||||
if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false;
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (size1 > UINT16_MAX) return false;
|
||||
if (size2 > UINT16_MAX) return false;
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
|
@ -564,12 +564,14 @@ class PointerVector : public GenericVector<T*> {
|
||||
// Also needs T::T(), as new T is used in this function.
|
||||
// Returns false in case of error.
|
||||
bool DeSerialize(bool swap, FILE* fp) {
|
||||
int32_t reserved;
|
||||
uint32_t reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (reserved > UINT16_MAX) return false;
|
||||
GenericVector<T*>::reserve(reserved);
|
||||
truncate(0);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
for (uint32_t i = 0; i < reserved; ++i) {
|
||||
int8_t non_null;
|
||||
if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
|
||||
T* item = nullptr;
|
||||
@ -943,9 +945,11 @@ bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
||||
int32_t reserved;
|
||||
uint32_t reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (reserved > UINT16_MAX) return false;
|
||||
reserve(reserved);
|
||||
size_used_ = reserved;
|
||||
if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false;
|
||||
@ -957,15 +961,17 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
|
||||
int32_t reserved;
|
||||
uint32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (reserved > UINT16_MAX) return false;
|
||||
reserve(reserved);
|
||||
size_used_ = reserved;
|
||||
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
|
||||
int32_t reserved;
|
||||
uint32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
return fp->FRead(nullptr, sizeof(T), reserved) == reserved;
|
||||
}
|
||||
|
@ -50,10 +50,12 @@ bool IndexMap::Serialize(FILE* fp) const {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool IndexMap::DeSerialize(bool swap, FILE* fp) {
|
||||
int32_t sparse_size;
|
||||
uint32_t sparse_size;
|
||||
if (fread(&sparse_size, sizeof(sparse_size), 1, fp) != 1) return false;
|
||||
if (swap)
|
||||
ReverseN(&sparse_size, sizeof(sparse_size));
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (sparse_size > UINT16_MAX) return false;
|
||||
sparse_size_ = sparse_size;
|
||||
if (!compact_map_.DeSerialize(swap, fp)) return false;
|
||||
return true;
|
||||
|
@ -161,13 +161,14 @@ bool STRING::Serialize(TFile* fp) const {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool STRING::DeSerialize(bool swap, FILE* fp) {
|
||||
int32_t len;
|
||||
uint32_t len;
|
||||
if (fread(&len, sizeof(len), 1, fp) != 1) return false;
|
||||
if (swap)
|
||||
ReverseN(&len, sizeof(len));
|
||||
// Arbitrarily limit the number of characters to protect against bad data.
|
||||
if (len > UINT16_MAX) return false;
|
||||
truncate_at(len);
|
||||
if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false;
|
||||
return true;
|
||||
return fread(GetCStr(), 1, len, fp) == len;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
|
@ -1339,7 +1339,7 @@ int Classify::CharNormTrainingSample(bool pruner_only,
|
||||
ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
|
||||
adapt_results->Initialize();
|
||||
// Compute the bounding box of the features.
|
||||
int num_features = sample.num_features();
|
||||
uint32_t num_features = sample.num_features();
|
||||
// Only the top and bottom of the blob_box are used by MasterMatcher, so
|
||||
// fabricate right and left using top and bottom.
|
||||
TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
|
||||
|
@ -1,5 +1,3 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: mastertrainer.cpp
|
||||
// Description: Trainer to build the MasterClassifier.
|
||||
@ -552,8 +550,8 @@ CLUSTERER* MasterTrainer::SetupForClustering(
|
||||
int sample_id = 0;
|
||||
for (int i = sample_ptrs.size() - 1; i >= 0; --i) {
|
||||
const TrainingSample* sample = sample_ptrs[i];
|
||||
int num_features = sample->num_micro_features();
|
||||
for (int f = 0; f < num_features; ++f)
|
||||
uint32_t num_features = sample->num_micro_features();
|
||||
for (uint32_t f = 0; f < num_features; ++f)
|
||||
MakeSample(clusterer, sample->micro_features()[f], sample_id);
|
||||
++sample_id;
|
||||
}
|
||||
@ -706,7 +704,7 @@ void MasterTrainer::DisplaySamples(const char* unichar_str1, int cloud_font,
|
||||
if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
|
||||
const TrainingSample* sample = samples_.GetCanonicalSample(canonical_font,
|
||||
class_id2);
|
||||
for (int f = 0; f < sample->num_features(); ++f) {
|
||||
for (uint32_t f = 0; f < sample->num_features(); ++f) {
|
||||
RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED);
|
||||
}
|
||||
}
|
||||
|
@ -224,10 +224,10 @@ FEATURE_SET Classify::ExtractIntCNFeatures(
|
||||
blob, false, &local_fx_info, &bl_features);
|
||||
if (sample == nullptr) return nullptr;
|
||||
|
||||
int num_features = sample->num_features();
|
||||
uint32_t num_features = sample->num_features();
|
||||
const INT_FEATURE_STRUCT* features = sample->features();
|
||||
FEATURE_SET feature_set = NewFeatureSet(num_features);
|
||||
for (int f = 0; f < num_features; ++f) {
|
||||
for (uint32_t f = 0; f < num_features; ++f) {
|
||||
FEATURE feature = NewFeature(&IntFeatDesc);
|
||||
|
||||
feature->Params[IntX] = features[f].X;
|
||||
|
@ -109,8 +109,8 @@ void ShapeClassifier::DebugDisplay(const TrainingSample& sample,
|
||||
popup_menu->BuildMenu(debug_win, false);
|
||||
// Display the features in green.
|
||||
const INT_FEATURE_STRUCT* features = sample.features();
|
||||
int num_features = sample.num_features();
|
||||
for (int f = 0; f < num_features; ++f) {
|
||||
uint32_t num_features = sample.num_features();
|
||||
for (uint32_t f = 0; f < num_features; ++f) {
|
||||
RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
|
||||
}
|
||||
debug_win->Update();
|
||||
|
@ -61,12 +61,10 @@ bool TrainingSample::Serialize(FILE* fp) const {
|
||||
return false;
|
||||
if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
|
||||
return false;
|
||||
if (static_cast<int>(fwrite(features_, sizeof(*features_), num_features_, fp))
|
||||
!= num_features_)
|
||||
if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
|
||||
return false;
|
||||
if (static_cast<int>(fwrite(micro_features_, sizeof(*micro_features_),
|
||||
num_micro_features_,
|
||||
fp)) != num_micro_features_)
|
||||
if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
|
||||
fp) != num_micro_features_)
|
||||
return false;
|
||||
if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
|
||||
kNumCNParams) return false;
|
||||
@ -102,16 +100,18 @@ bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
|
||||
ReverseN(&num_micro_features_, sizeof(num_micro_features_));
|
||||
ReverseN(&outline_length_, sizeof(outline_length_));
|
||||
}
|
||||
// Arbitrarily limit the number of elements to protect against bad data.
|
||||
if (num_features_ > UINT16_MAX) return false;
|
||||
if (num_micro_features_ > UINT16_MAX) return false;
|
||||
delete [] features_;
|
||||
features_ = new INT_FEATURE_STRUCT[num_features_];
|
||||
if (static_cast<int>(fread(features_, sizeof(*features_), num_features_, fp))
|
||||
if (fread(features_, sizeof(*features_), num_features_, fp)
|
||||
!= num_features_)
|
||||
return false;
|
||||
delete [] micro_features_;
|
||||
micro_features_ = new MicroFeature[num_micro_features_];
|
||||
if (static_cast<int>(fread(micro_features_, sizeof(*micro_features_),
|
||||
num_micro_features_,
|
||||
fp)) != num_micro_features_)
|
||||
if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
|
||||
fp) != num_micro_features_)
|
||||
return false;
|
||||
if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
|
||||
kNumCNParams) return false;
|
||||
@ -165,7 +165,7 @@ TrainingSample* TrainingSample::RandomizedCopy(int index) const {
|
||||
++index; // Remove the first combination.
|
||||
const int yshift = kYShiftValues[index / kSampleScaleSize];
|
||||
double scaling = kScaleValues[index % kSampleScaleSize];
|
||||
for (int i = 0; i < num_features_; ++i) {
|
||||
for (uint32_t i = 0; i < num_features_; ++i) {
|
||||
double result = (features_[i].X - kRandomizingCenter) * scaling;
|
||||
result += kRandomizingCenter;
|
||||
sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
|
||||
@ -217,7 +217,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
|
||||
} else {
|
||||
num_features_ = char_features->NumFeatures;
|
||||
features_ = new INT_FEATURE_STRUCT[num_features_];
|
||||
for (int f = 0; f < num_features_; ++f) {
|
||||
for (uint32_t f = 0; f < num_features_; ++f) {
|
||||
features_[f].X =
|
||||
static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
|
||||
features_[f].Y =
|
||||
@ -238,7 +238,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type,
|
||||
} else {
|
||||
num_micro_features_ = char_features->NumFeatures;
|
||||
micro_features_ = new MicroFeature[num_micro_features_];
|
||||
for (int f = 0; f < num_micro_features_; ++f) {
|
||||
for (uint32_t f = 0; f < num_micro_features_; ++f) {
|
||||
for (int d = 0; d < MFCount; ++d) {
|
||||
micro_features_[f][d] = char_features->Features[f]->Params[d];
|
||||
}
|
||||
@ -294,7 +294,7 @@ void TrainingSample::MapFeatures(const IntFeatureMap& feature_map) {
|
||||
// Returns a pix representing the sample. (Int features only.)
|
||||
Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
|
||||
Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
|
||||
for (int f = 0; f < num_features_; ++f) {
|
||||
for (uint32_t f = 0; f < num_features_; ++f) {
|
||||
int start_x = features_[f].X;
|
||||
int start_y = kIntFeatureExtent - features_[f].Y;
|
||||
double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
|
||||
@ -315,7 +315,7 @@ Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
|
||||
void TrainingSample::DisplayFeatures(ScrollView::Color color,
|
||||
ScrollView* window) const {
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
for (int f = 0; f < num_features_; ++f) {
|
||||
for (uint32_t f = 0; f < num_features_; ++f) {
|
||||
RenderIntFeature(window, &features_[f], color);
|
||||
}
|
||||
#endif // GRAPHICS_DISABLED
|
||||
|
@ -137,13 +137,13 @@ class TrainingSample : public ELIST_LINK {
|
||||
void set_bounding_box(const TBOX& box) {
|
||||
bounding_box_ = box;
|
||||
}
|
||||
int num_features() const {
|
||||
uint32_t num_features() const {
|
||||
return num_features_;
|
||||
}
|
||||
const INT_FEATURE_STRUCT* features() const {
|
||||
return features_;
|
||||
}
|
||||
int num_micro_features() const {
|
||||
uint32_t num_micro_features() const {
|
||||
return num_micro_features_;
|
||||
}
|
||||
const MicroFeature* micro_features() const {
|
||||
@ -206,9 +206,9 @@ class TrainingSample : public ELIST_LINK {
|
||||
// Bounding box of sample in original image.
|
||||
TBOX bounding_box_;
|
||||
// Number of INT_FEATURE_STRUCT in features_ array.
|
||||
int num_features_;
|
||||
uint32_t num_features_;
|
||||
// Number of MicroFeature in micro_features_ array.
|
||||
int num_micro_features_;
|
||||
uint32_t num_micro_features_;
|
||||
// Total length of outline in the baseline normalized coordinate space.
|
||||
// See comment in WERD_RES class definition for a discussion of coordinate
|
||||
// spaces.
|
||||
|
Loading…
Reference in New Issue
Block a user