From fa93232517d819d015dc13144d03864f496cc58a Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 16 Mar 2021 20:44:45 +0100 Subject: [PATCH] Replace more GenericVector by std::vector for src/classify Signed-off-by: Stefan Weil --- src/ccutil/helpers.h | 55 +++++++++++++++++++++++++++++++++++++ src/classify/shapetable.cpp | 10 +++---- src/classify/shapetable.h | 2 +- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/ccutil/helpers.h b/src/ccutil/helpers.h index 7988279f..1fd6c037 100644 --- a/src/ccutil/helpers.h +++ b/src/ccutil/helpers.h @@ -209,6 +209,61 @@ inline void Reverse64(void *ptr) { ReverseN(ptr, 8); } +// Reads a vector of simple types from the given file. Assumes that bitwise +// read/write will work with ReverseN according to sizeof(T). +// Returns false in case of error. +// If swap is true, assumes a big/little-endian swap is needed. +template +bool DeSerialize(bool swap, FILE *fp, std::vector &data) { + uint32_t size; + if (fread(&size, sizeof(size), 1, fp) != 1) { + return false; + } + if (swap) { + Reverse32(&size); + } + // Arbitrarily limit the number of elements to protect against bad data. + assert(size <= UINT16_MAX); + if (size > UINT16_MAX) { + return false; + } + // TODO: optimize. + data.resize(size); + if (size > 0) { + if (fread(&data[0], sizeof(T), size, fp) != size) { + return false; + } + if (swap) { + for (int i = 0; i < size; ++i) { + ReverseN(&data[i], sizeof(T)); + } + } + } + return true; +} + +// Writes a vector of simple types to the given file. Assumes that bitwise +// read/write of T will work. Returns false in case of error. +template +bool Serialize(FILE *fp, const std::vector &data) { + uint32_t size = data.size(); + if (fwrite(&size, sizeof(size), 1, fp) != 1) { + return false; + } else if constexpr (std::is_class_v) { + // Serialize a tesseract class. + for (auto &item : data) { + if (!item.Serialize(fp)) { + return false; + } + } + } else if (size > 0) { + if (fwrite(&data[0], sizeof(T), size, fp) != size) { + return false; + } + } + return true; +} + } // namespace tesseract #endif // TESSERACT_CCUTIL_HELPERS_H_ diff --git a/src/classify/shapetable.cpp b/src/classify/shapetable.cpp index d30066a9..8cc6cffe 100644 --- a/src/classify/shapetable.cpp +++ b/src/classify/shapetable.cpp @@ -62,12 +62,12 @@ int UnicharRating::FirstResultWithUnichar(const GenericVector &re // Writes to the given file. Returns false in case of error. bool UnicharAndFonts::Serialize(FILE *fp) const { - return tesseract::Serialize(fp, &unichar_id) && font_ids.Serialize(fp); + return tesseract::Serialize(fp, &unichar_id) && tesseract::Serialize(fp, font_ids); } // Reads from the given file. Returns false in case of error. bool UnicharAndFonts::DeSerialize(TFile *fp) { - return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp); + return fp->DeSerialize(&unichar_id) && fp->DeSerialize(font_ids); } // Sort function to sort a pair of UnicharAndFonts by unichar_id. @@ -98,7 +98,7 @@ void Shape::AddToShape(int unichar_id, int font_id) { for (int c = 0; c < unichars_.size(); ++c) { if (unichars_[c].unichar_id == unichar_id) { // Found the unichar in the shape table. - GenericVector &font_list = unichars_[c].font_ids; + std::vector &font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { if (font_list[f] == font_id) return; // Font is already there. @@ -195,7 +195,7 @@ bool Shape::operator==(const Shape &other) const { bool Shape::IsSubsetOf(const Shape &other) const { for (int c = 0; c < unichars_.size(); ++c) { int unichar_id = unichars_[c].unichar_id; - const GenericVector &font_list = unichars_[c].font_ids; + const std::vector &font_list = unichars_[c].font_ids; for (int f = 0; f < font_list.size(); ++f) { if (!other.ContainsUnicharAndFont(unichar_id, font_list[f])) return false; @@ -629,7 +629,7 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const { const Shape &shape1 = GetShape(shape_id1); const Shape &shape2 = GetShape(shape_id2); for (int c1 = 0; c1 < shape1.size(); ++c1) { - const GenericVector &font_list1 = shape1[c1].font_ids; + const std::vector &font_list1 = shape1[c1].font_ids; for (int f = 0; f < font_list1.size(); ++f) { if (shape2.ContainsFont(font_list1[f])) return true; diff --git a/src/classify/shapetable.h b/src/classify/shapetable.h index 393cb0af..31b40154 100644 --- a/src/classify/shapetable.h +++ b/src/classify/shapetable.h @@ -140,7 +140,7 @@ struct UnicharAndFonts { // Sort function to sort a pair of UnicharAndFonts by unichar_id. static int SortByUnicharId(const void *v1, const void *v2); - GenericVector font_ids; + std::vector font_ids; int32_t unichar_id; };