Merge pull request #3335 from stweil/genericvector

Replace more GenericVector by std::vector
2025-01-21 17:13:09 +08:00 · 2021-03-17 18:39:59 +03:00 · 2021-03-17 18:39:59 +03:00 · 345e74e28b
commit 345e74e28b
parent 122daf1d64 2fb6f9eb72
25 changed files with 157 additions and 109 deletions
--- a/src/ccstruct/blobs.cpp
+++ b/src/ccstruct/blobs.cpp
@ -542,17 +542,16 @@ void TBLOB::GetPreciseBoundingBox(TBOX *precise_box) const {
 // x-coord starting at box.left().
 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
-void TBLOB::GetEdgeCoords(const TBOX &box, GenericVector<GenericVector<int>> *x_coords,
-                          GenericVector<GenericVector<int>> *y_coords) const {
-  GenericVector<int> empty;
-  x_coords->init_to_size(box.height(), empty);
-  y_coords->init_to_size(box.width(), empty);
-  CollectEdges(box, nullptr, nullptr, x_coords, y_coords);
+void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
+                          std::vector<std::vector<int>> &y_coords) const {
+  x_coords.resize(box.height());
+  y_coords.resize(box.width());
+  CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
  // Sort the output vectors.
-  for (int i = 0; i < x_coords->size(); ++i)
-    (*x_coords)[i].sort();
-  for (int i = 0; i < y_coords->size(); ++i)
-    (*y_coords)[i].sort();
+  for (int i = 0; i < x_coords.size(); ++i)
+    std::sort(x_coords[i].begin(), x_coords[i].end());
+  for (int i = 0; i < y_coords.size(); ++i)
+    std::sort(y_coords[i].begin(), y_coords[i].end());
 }

 // Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over
@ -585,8 +584,8 @@ static void SegmentLLSQ(const FCOORD &pt1, const FCOORD &pt2, LLSQ *accumulator)
 // are clipped to ([0,x_limit], [0,y_limit]).
 // See GetEdgeCoords above for a description of x_coords, y_coords.
 static void SegmentCoords(const FCOORD &pt1, const FCOORD &pt2, int x_limit, int y_limit,
-                          GenericVector<GenericVector<int>> *x_coords,
-                          GenericVector<GenericVector<int>> *y_coords) {
+                          std::vector<std::vector<int>> *x_coords,
+                          std::vector<std::vector<int>> *y_coords) {
  FCOORD step(pt2);
  step -= pt1;
  int start = ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit);
@ -639,8 +638,8 @@ static void SegmentBBox(const FCOORD &pt1, const FCOORD &pt2, TBOX *bbox) {
 // indices into x_coords, y_coords are offset by box.botleft().
 static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const DENORM &denorm,
                              const TBOX &box, TBOX *bounding_box, LLSQ *accumulator,
-                              GenericVector<GenericVector<int>> *x_coords,
-                              GenericVector<GenericVector<int>> *y_coords) {
+                              std::vector<std::vector<int>> *x_coords,
+                              std::vector<std::vector<int>> *y_coords) {
  const C_OUTLINE *outline = startpt->src_outline;
  int x_limit = box.width() - 1;
  int y_limit = box.height() - 1;
@ -727,8 +726,8 @@ static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const
 // normalization.
 // For a description of x_coords, y_coords, see GetEdgeCoords above.
 void TBLOB::CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
-                         GenericVector<GenericVector<int>> *x_coords,
-                         GenericVector<GenericVector<int>> *y_coords) const {
+                         std::vector<std::vector<int>> *x_coords,
+                         std::vector<std::vector<int>> *y_coords) const {
  // Iterate the outlines.
  for (const TESSLINE *ol = outlines; ol != nullptr; ol = ol->next) {
    // Iterate the polygon.
--- a/src/ccstruct/blobs.h
+++ b/src/ccstruct/blobs.h
@ -391,8 +391,8 @@ struct TBLOB {
  // x-coord starting at box.left().
  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
-  void GetEdgeCoords(const TBOX &box, GenericVector<GenericVector<int>> *x_coords,
-                     GenericVector<GenericVector<int>> *y_coords) const;
+  void GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
+                     std::vector<std::vector<int>> &y_coords) const;

  TESSLINE *outlines; // List of outlines in blob.

@ -403,8 +403,8 @@ private: // TODO(rays) Someday the data members will be private too.
  // normalization.
  // For a description of x_coords, y_coords, see GetEdgeCoords above.
  void CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
-                    GenericVector<GenericVector<int>> *x_coords,
-                    GenericVector<GenericVector<int>> *y_coords) const;
+                    std::vector<std::vector<int>> *x_coords,
+                    std::vector<std::vector<int>> *y_coords) const;

 private:
  // DENORM indicating the transformations that this blob has undergone so far.
--- a/src/ccstruct/normalis.cpp
+++ b/src/ccstruct/normalis.cpp
@ -153,8 +153,8 @@ void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation,
 // pre-initialized to be the same size as box. Each element will contain the
 // minimum of x and y run-length as shown above.
 static void ComputeRunlengthImage(const TBOX &box,
-                                  const GenericVector<GenericVector<int>> &x_coords,
-                                  const GenericVector<GenericVector<int>> &y_coords,
+                                  const std::vector<std::vector<int>> &x_coords,
+                                  const std::vector<std::vector<int>> &y_coords,
                                  GENERIC_2D_ARRAY<int> *minruns) {
  int width = box.width();
  int height = box.height();
@ -264,8 +264,8 @@ static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<i
 // See comments on the helper functions above for more details.
 void DENORM::SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
                            float target_height, float final_xshift, float final_yshift,
-                            const GenericVector<GenericVector<int>> &x_coords,
-                            const GenericVector<GenericVector<int>> &y_coords) {
+                            const std::vector<std::vector<int>> &x_coords,
+                            const std::vector<std::vector<int>> &y_coords) {
  Clear();
  predecessor_ = predecessor;
  // x_map_ and y_map_ store a mapping from input x and y coordinate to output
--- a/src/ccstruct/normalis.h
+++ b/src/ccstruct/normalis.h
@ -2,7 +2,6 @@
 * File:        normalis.h  (Formerly denorm.h)
 * Description: Code for the DENORM class.
 * Author:      Ray Smith
- * Created:     Thu Apr 23 09:22:43 BST 1992
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
@ -20,6 +19,7 @@
 #ifndef NORMALIS_H
 #define NORMALIS_H

+#include <vector>
 #include <tesseract/export.h>

 struct Pix;
@ -193,8 +193,8 @@ public:
  // The second-level vectors must all be sorted in ascending order.
  void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
                      float target_height, float final_xshift, float final_yshift,
-                      const GenericVector<GenericVector<int>> &x_coords,
-                      const GenericVector<GenericVector<int>> &y_coords);
+                      const std::vector<std::vector<int>> &x_coords,
+                      const std::vector<std::vector<int>> &y_coords);

  // Transforms the given coords one step forward to normalized space, without
  // using any block rotation or predecessor.
--- a/src/ccutil/helpers.h
+++ b/src/ccutil/helpers.h
@ -209,6 +209,61 @@ inline void Reverse64(void *ptr) {
  ReverseN(ptr, 8);
 }

+// Reads a vector of simple types from the given file. Assumes that bitwise
+// read/write will work with ReverseN according to sizeof(T).
+// Returns false in case of error.
+// If swap is true, assumes a big/little-endian swap is needed.
+template <typename T>
+bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) {
+  uint32_t size;
+  if (fread(&size, sizeof(size), 1, fp) != 1) {
+    return false;
+  }
+  if (swap) {
+    Reverse32(&size);
+  }
+  // Arbitrarily limit the number of elements to protect against bad data.
+  assert(size <= UINT16_MAX);
+  if (size > UINT16_MAX) {
+    return false;
+  }
+  // TODO: optimize.
+  data.resize(size);
+  if (size > 0) {
+    if (fread(&data[0], sizeof(T), size, fp) != size) {
+      return false;
+    }
+    if (swap) {
+      for (int i = 0; i < size; ++i) {
+        ReverseN(&data[i], sizeof(T));
+      }
+    }
+  }
+  return true;
+}
+
+// Writes a vector of simple types to the given file. Assumes that bitwise
+// read/write of T will work. Returns false in case of error.
+template <typename T>
+bool Serialize(FILE *fp, const std::vector<T> &data) {
+  uint32_t size = data.size();
+  if (fwrite(&size, sizeof(size), 1, fp) != 1) {
+    return false;
+  } else if constexpr (std::is_class_v<T>) {
+    // Serialize a tesseract class.
+    for (auto &item : data) {
+      if (!item.Serialize(fp)) {
+        return false;
+      }
+    }
+  } else if (size > 0) {
+    if (fwrite(&data[0], sizeof(T), size, fp) != size) {
+      return false;
+    }
+  }
+  return true;
+}
+
 } // namespace tesseract

 #endif // TESSERACT_CCUTIL_HELPERS_H_
--- a/src/ccutil/indexmapbidi.cpp
+++ b/src/ccutil/indexmapbidi.cpp
@ -2,7 +2,6 @@
 // File:        indexmapbidi.cpp
 // Description: Bi-directional mapping between a sparse and compact space.
 // Author:      rays@google.com (Ray Smith)
-// Created:     Tue Apr 06 11:33:59 PDT 2010
 //
 // (C) Copyright 2010, Google Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
@ -239,8 +238,8 @@ bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
 // Assumes the input is sorted. The output indices are sorted and uniqued.
 // Return value is the number of "missed" features, being features that
 // don't map to the compact feature space.
-int IndexMapBiDi::MapFeatures(const GenericVector<int> &sparse, GenericVector<int> *compact) const {
-  compact->truncate(0);
+int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const {
+  compact->clear();
  int num_features = sparse.size();
  int missed_features = 0;
  int prev_good_feature = -1;
--- a/src/ccutil/indexmapbidi.h
+++ b/src/ccutil/indexmapbidi.h
@ -157,7 +157,7 @@ public:
  // Assumes the input is sorted. The output indices are sorted and uniqued.
  // Return value is the number of "missed" features, being features that
  // don't map to the compact feature space.
-  int MapFeatures(const GenericVector<int> &sparse, GenericVector<int> *compact) const;
+  int MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const;

 private:
  // Returns the master compact index for a given compact index.
--- a/src/classify/adaptmatch.cpp
+++ b/src/classify/adaptmatch.cpp
@ -60,7 +60,6 @@
 #include "unicity_table.h"   // for UnicityTable

 #include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID
-#include "genericvector.h"     // for GenericVector
 #include "helpers.h"           // for IntCastRounded, ClipToRange
 #include "serialis.h"          // for TFile

@ -1104,7 +1103,7 @@ void Classify::ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug,
      // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
      // by int_result. In this case, build a vector of UnicharRating to
      // gather together different font-ids for each unichar. Also covers case1.
-      GenericVector<UnicharRating> mapped_results;
+      std::vector<UnicharRating> mapped_results;
      for (int f = 0; f < int_result->fonts.size(); ++f) {
        int shape_id = int_result->fonts[f].fontinfo_id;
        const Shape &shape = shape_table_->GetShape(shape_id);
@ -1260,7 +1259,7 @@ int Classify::CharNormClassifier(TBLOB *blob, const TrainingSample &sample,
 } /* CharNormClassifier */

 // As CharNormClassifier, but operates on a TrainingSample and outputs to
-// a GenericVector of ShapeRating without conversion to classes.
+// a vector of ShapeRating without conversion to classes.
 int Classify::CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample,
                                     std::vector<UnicharRating> *results) {
  results->clear();
--- a/src/classify/classify.h
+++ b/src/classify/classify.h
@ -238,7 +238,7 @@ public:
  int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results);

  // As CharNormClassifier, but operates on a TrainingSample and outputs to
-  // a GenericVector of ShapeRating without conversion to classes.
+  // a vector of ShapeRating without conversion to classes.
  int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample,
                             std::vector<UnicharRating> *results);
  UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass);
@ -312,7 +312,7 @@ public:
  static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm,
                              std::vector<INT_FEATURE_STRUCT> *bl_features,
                              std::vector<INT_FEATURE_STRUCT> *cn_features,
-                              INT_FX_RESULT_STRUCT *results, GenericVector<int> *outline_cn_counts);
+                              INT_FX_RESULT_STRUCT *results, std::vector<int> *outline_cn_counts);
  /* float2int.cpp ************************************************************/
  void ClearCharNormArray(uint8_t *char_norm_array);
  void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array);
@ -481,7 +481,7 @@ private:

  Dict dict_;

-  GenericVector<uint16_t> shapetable_cutoffs_;
+  std::vector<uint16_t> shapetable_cutoffs_;

  /* variables used to hold performance statistics */
  int NumAdaptationsFailed = 0;
--- a/src/classify/cutoffs.cpp
+++ b/src/classify/cutoffs.cpp
@ -42,7 +42,7 @@ void Classify::ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs) {
  int Cutoff;

  if (shape_table_ != nullptr) {
-    if (!shapetable_cutoffs_.DeSerialize(fp)) {
+    if (!fp->DeSerialize(shapetable_cutoffs_)) {
      tprintf("Error during read of shapetable pffmtable!\n");
    }
  }
--- a/src/classify/intfeaturespace.cpp
+++ b/src/classify/intfeaturespace.cpp
@ -53,8 +53,8 @@ INT_FEATURE_STRUCT IntFeatureSpace::PositionFromIndex(int index) const {
 // Bulk calls to Index. Maps the given array of features to a vector of
 // int32_t indices in the same order as the input.
 void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT *features, int num_features,
-                                    GenericVector<int> *mapped_features) const {
-  mapped_features->truncate(0);
+                                    std::vector<int> *mapped_features) const {
+  mapped_features->clear();
  for (int f = 0; f < num_features; ++f)
    mapped_features->push_back(Index(features[f]));
 }
@ -62,11 +62,11 @@ void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT *features, int num_
 // Bulk calls to Index. Maps the given array of features to a vector of
 // sorted int32_t indices.
 void IntFeatureSpace::IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
-                                           GenericVector<int> *sorted_features) const {
-  sorted_features->truncate(0);
+                                           std::vector<int> *sorted_features) const {
+  sorted_features->clear();
  for (int f = 0; f < num_features; ++f)
    sorted_features->push_back(Index(features[f]));
-  sorted_features->sort();
+  std::sort(sorted_features->begin(), sorted_features->end());
 }

 // Returns a feature space index for the given x,y position in a display
--- a/src/classify/intfeaturespace.h
+++ b/src/classify/intfeaturespace.h
@ -3,7 +3,6 @@
 ///////////////////////////////////////////////////////////////////////
 // File:        intfeaturespace.h
 // Description: Indexed feature space based on INT_FEATURE_STRUCT.
-// Created:     Wed Mar 24 10:55:30 PDT 2010
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -20,7 +19,6 @@
 #ifndef TESSERACT_CLASSIFY_INTFEATURESPACE_H_
 #define TESSERACT_CLASSIFY_INTFEATURESPACE_H_

-#include "genericvector.h"
 #include "intproto.h"

 // Extent of x,y,theta in the input feature space. [0,255].
@ -63,11 +61,11 @@ public:
  // Bulk calls to Index. Maps the given array of features to a vector of
  // int32_t indices in the same order as the input.
  void IndexFeatures(const INT_FEATURE_STRUCT *features, int num_features,
-                     GenericVector<int> *mapped_features) const;
+                     std::vector<int> *mapped_features) const;
  // Bulk calls to Index. Maps the given array of features to a vector of
  // sorted int32_t indices.
  void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
-                            GenericVector<int> *sorted_features) const;
+                            std::vector<int> *sorted_features) const;
  // Returns a feature space index for the given x,y position in a display
  // window, or -1 if the feature is a miss.
  int XYToFeatureIndex(int x, int y) const;
--- a/src/classify/intfx.cpp
+++ b/src/classify/intfx.cpp
@ -144,12 +144,12 @@ void Classify::SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *
                                128.0f, 128.0f);
  // Setup the denorm for character normalization.
  if (nonlinear_norm) {
-    GenericVector<GenericVector<int>> x_coords;
-    GenericVector<GenericVector<int>> y_coords;
+    std::vector<std::vector<int>> x_coords;
+    std::vector<std::vector<int>> y_coords;
    TBOX box;
    blob.GetPreciseBoundingBox(&box);
    box.pad(1, 1);
-    blob.GetEdgeCoords(box, &x_coords, &y_coords);
+    blob.GetEdgeCoords(box, x_coords, y_coords);
    cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, 0.0f, 0.0f, x_coords,
                              y_coords);
  } else {
@ -431,11 +431,11 @@ void Classify::ExtractFeatures(const TBLOB &blob, bool nonlinear_norm,
                               std::vector<INT_FEATURE_STRUCT> *bl_features,
                               std::vector<INT_FEATURE_STRUCT> *cn_features,
                               INT_FX_RESULT_STRUCT *results,
-                               GenericVector<int> *outline_cn_counts) {
+                               std::vector<int> *outline_cn_counts) {
  DENORM bl_denorm, cn_denorm;
  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, &bl_denorm, &cn_denorm, results);
  if (outline_cn_counts != nullptr)
-    outline_cn_counts->truncate(0);
+    outline_cn_counts->clear();
  // Iterate the outlines.
  for (TESSLINE *ol = blob.outlines; ol != nullptr; ol = ol->next) {
    // Iterate the polygon.
--- a/src/classify/shapeclassifier.cpp
+++ b/src/classify/shapeclassifier.cpp
@ -31,8 +31,6 @@
 #include "tprintf.h"
 #include "trainingsample.h"

-#include "genericvector.h"
-
 namespace tesseract {

 // Classifies the given [training] sample, writing to results.
@ -45,8 +43,8 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *pa
  std::vector<ShapeRating> shape_results;
  int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, &shape_results);
  const ShapeTable *shapes = GetShapeTable();
-  GenericVector<int> unichar_map;
-  unichar_map.init_to_size(shapes->unicharset().size(), -1);
+  std::vector<int> unichar_map;
+  unichar_map.resize(shapes->unicharset().size(), -1);
  for (int r = 0; r < num_shape_results; ++r) {
    shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
  }
--- a/src/classify/shapetable.cpp
+++ b/src/classify/shapetable.cpp
@ -35,7 +35,7 @@ namespace tesseract {
 // unichar_id. If the results are sorted by rating, this will also be the
 // best result with the required unichar_id.
 // Returns -1 if the unichar_id is not found
-int ShapeRating::FirstResultWithUnichar(const GenericVector<ShapeRating> &results,
+int ShapeRating::FirstResultWithUnichar(const std::vector<ShapeRating> &results,
                                        const ShapeTable &shape_table, UNICHAR_ID unichar_id) {
  for (int r = 0; r < results.size(); ++r) {
    const int shape_id = results[r].shape_id;
@ -51,7 +51,7 @@ int ShapeRating::FirstResultWithUnichar(const GenericVector<ShapeRating> &result
 // unichar_id. If the results are sorted by rating, this will also be the
 // best result with the required unichar_id.
 // Returns -1 if the unichar_id is not found
-int UnicharRating::FirstResultWithUnichar(const GenericVector<UnicharRating> &results,
+int UnicharRating::FirstResultWithUnichar(const std::vector<UnicharRating> &results,
                                          UNICHAR_ID unichar_id) {
  for (int r = 0; r < results.size(); ++r) {
    if (results[r].unichar_id == unichar_id)
@ -62,12 +62,12 @@ int UnicharRating::FirstResultWithUnichar(const GenericVector<UnicharRating> &re

 // Writes to the given file. Returns false in case of error.
 bool UnicharAndFonts::Serialize(FILE *fp) const {
-  return tesseract::Serialize(fp, &unichar_id) && font_ids.Serialize(fp);
+  return tesseract::Serialize(fp, &unichar_id) && tesseract::Serialize(fp, font_ids);
 }
-// Reads from the given file. Returns false in case of error.

+// Reads from the given file. Returns false in case of error.
 bool UnicharAndFonts::DeSerialize(TFile *fp) {
-  return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp);
+  return fp->DeSerialize(&unichar_id) && fp->DeSerialize(font_ids);
 }

 // Sort function to sort a pair of UnicharAndFonts by unichar_id.
@ -77,10 +77,14 @@ int UnicharAndFonts::SortByUnicharId(const void *v1, const void *v2) {
  return p1->unichar_id - p2->unichar_id;
 }

+bool UnicharAndFonts::StdSortByUnicharId(const UnicharAndFonts &v1, const UnicharAndFonts &v2) {
+  return v1.unichar_id < v2.unichar_id;
+}
+
 // Writes to the given file. Returns false in case of error.
 bool Shape::Serialize(FILE *fp) const {
  uint8_t sorted = unichars_sorted_;
-  return tesseract::Serialize(fp, &sorted) && unichars_.SerializeClasses(fp);
+  return tesseract::Serialize(fp, &sorted) && tesseract::Serialize(fp, unichars_);
 }
 // Reads from the given file. Returns false in case of error.

@ -89,7 +93,7 @@ bool Shape::DeSerialize(TFile *fp) {
  if (!fp->DeSerialize(&sorted))
    return false;
  unichars_sorted_ = sorted != 0;
-  return unichars_.DeSerializeClasses(fp);
+  return fp->DeSerialize(unichars_);
 }

 // Adds a font_id for the given unichar_id. If the unichar_id is not
@ -98,7 +102,7 @@ void Shape::AddToShape(int unichar_id, int font_id) {
  for (int c = 0; c < unichars_.size(); ++c) {
    if (unichars_[c].unichar_id == unichar_id) {
      // Found the unichar in the shape table.
-      GenericVector<int> &font_list = unichars_[c].font_ids;
+      std::vector<int> &font_list = unichars_[c].font_ids;
      for (int f = 0; f < font_list.size(); ++f) {
        if (font_list[f] == font_id)
          return; // Font is already there.
@ -195,7 +199,7 @@ bool Shape::operator==(const Shape &other) const {
 bool Shape::IsSubsetOf(const Shape &other) const {
  for (int c = 0; c < unichars_.size(); ++c) {
    int unichar_id = unichars_[c].unichar_id;
-    const GenericVector<int> &font_list = unichars_[c].font_ids;
+    const std::vector<int> &font_list = unichars_[c].font_ids;
    for (int f = 0; f < font_list.size(); ++f) {
      if (!other.ContainsUnicharAndFont(unichar_id, font_list[f]))
        return false;
@ -223,7 +227,7 @@ bool Shape::IsEqualUnichars(Shape *other) {

 // Sorts the unichars_ vector by unichar.
 void Shape::SortUnichars() {
-  unichars_.sort(UnicharAndFonts::SortByUnicharId);
+  std::sort(unichars_.begin(), unichars_.end(), UnicharAndFonts::StdSortByUnicharId);
  unichars_sorted_ = true;
 }

@ -262,7 +266,7 @@ int ShapeTable::NumFonts() const {

 // Re-indexes the class_ids in the shapetable according to the given map.
 // Useful in conjunction with set_unicharset.
-void ShapeTable::ReMapClassIds(const GenericVector<int> &unicharset_map) {
+void ShapeTable::ReMapClassIds(const std::vector<int> &unicharset_map) {
  for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
    Shape *shape = shape_table_[shape_id];
    for (int c = 0; c < shape->size(); ++c) {
@ -629,7 +633,7 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
  const Shape &shape1 = GetShape(shape_id1);
  const Shape &shape2 = GetShape(shape_id2);
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
-    const GenericVector<int> &font_list1 = shape1[c1].font_ids;
+    const std::vector<int> &font_list1 = shape1[c1].font_ids;
    for (int f = 0; f < font_list1.size(); ++f) {
      if (shape2.ContainsFont(font_list1[f]))
        return true;
@ -640,9 +644,9 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {

 // Appends the master shapes from other to this.
 // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
-void ShapeTable::AppendMasterShapes(const ShapeTable &other, GenericVector<int> *shape_map) {
+void ShapeTable::AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map) {
  if (shape_map != nullptr)
-    shape_map->init_to_size(other.NumShapes(), -1);
+    shape_map->resize(other.NumShapes(), -1);
  for (int s = 0; s < other.shape_table_.size(); ++s) {
    if (other.shape_table_[s]->destination_index() < 0) {
      int index = AddShape(*other.shape_table_[s]);
@ -669,7 +673,7 @@ int ShapeTable::NumMasterShapes() const {
 // of decreasing rating.
 // The unichar_map vector indicates the index of the results entry containing
 // each unichar, or -1 if the unichar is not yet included in results.
-void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, GenericVector<int> *unichar_map,
+void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, std::vector<int> *unichar_map,
                                   std::vector<UnicharRating> *results) const {
  if (shape_rating.joined) {
    AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, results);
@ -690,9 +694,9 @@ void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, GenericVecto

 // Adds the given unichar_id to the results if needed, updating unichar_map
 // and returning the index of unichar in results.
-int ShapeTable::AddUnicharToResults(int unichar_id, float rating, GenericVector<int> *unichar_map,
+int ShapeTable::AddUnicharToResults(int unichar_id, float rating, std::vector<int> *unichar_map,
                                    std::vector<UnicharRating> *results) const {
-  int result_index = unichar_map->get(unichar_id);
+  int result_index = unichar_map->at(unichar_id);
  if (result_index < 0) {
    UnicharRating result(unichar_id, rating);
    result_index = results->size();
--- a/src/classify/shapetable.h
+++ b/src/classify/shapetable.h
@ -27,8 +27,6 @@
 #include "genericheap.h"
 #include "intmatcher.h"

-#include "genericvector.h"
-
 namespace tesseract {

 class UNICHARSET;
@ -53,7 +51,7 @@ struct UnicharRating {
  // unichar_id. If the results are sorted by rating, this will also be the
  // best result with the required unichar_id.
  // Returns -1 if the unichar_id is not found
-  static int FirstResultWithUnichar(const GenericVector<UnicharRating> &results,
+  static int FirstResultWithUnichar(const std::vector<UnicharRating> &results,
                                    UNICHAR_ID unichar_id);

  // Index into some UNICHARSET table indicates the class of the answer.
@ -84,7 +82,7 @@ struct ShapeRating {
  // unichar_id. If the results are sorted by rating, this will also be the
  // best result with the required unichar_id.
  // Returns -1 if the unichar_id is not found
-  static int FirstResultWithUnichar(const GenericVector<ShapeRating> &results,
+  static int FirstResultWithUnichar(const std::vector<ShapeRating> &results,
                                    const ShapeTable &shape_table, UNICHAR_ID unichar_id);

  // Index into some shape table indicates the class of the answer.
@ -139,8 +137,9 @@ struct UnicharAndFonts {

  // Sort function to sort a pair of UnicharAndFonts by unichar_id.
  static int SortByUnicharId(const void *v1, const void *v2);
+  static bool StdSortByUnicharId(const UnicharAndFonts &v1, const UnicharAndFonts &v2);

-  GenericVector<int32_t> font_ids;
+  std::vector<int32_t> font_ids;
  int32_t unichar_id;
 };

@ -216,7 +215,7 @@ private:
  int destination_index_ = 0;
  // Array of unichars, each with a set of fonts. Each unichar has at most
  // one entry in the vector.
-  GenericVector<UnicharAndFonts> unichars_;
+  std::vector<UnicharAndFonts> unichars_;
 };

 // ShapeTable is a class to encapsulate the triple indirection that is
@ -255,7 +254,7 @@ public:
  }
  // Re-indexes the class_ids in the shapetable according to the given map.
  // Useful in conjunction with set_unicharset.
-  void ReMapClassIds(const GenericVector<int> &unicharset_map);
+  void ReMapClassIds(const std::vector<int> &unicharset_map);
  // Returns a string listing the classes/fonts in a shape.
  std::string DebugStr(int shape_id) const;
  // Returns a debug string summarizing the table.
@ -318,7 +317,7 @@ public:
  // copy of a ShapeTable.
  // If not nullptr, shape_map is set to map other shape_ids to this's
  // shape_ids.
-  void AppendMasterShapes(const ShapeTable &other, GenericVector<int> *shape_map);
+  void AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map);
  // Returns the number of master shapes remaining after merging.
  int NumMasterShapes() const;
  // Returns the destination of this shape, (if merged), taking into account
@ -345,13 +344,13 @@ public:
  // of decreasing rating.
  // The unichar_map vector indicates the index of the results entry containing
  // each unichar, or -1 if the unichar is not yet included in results.
-  void AddShapeToResults(const ShapeRating &shape_rating, GenericVector<int> *unichar_map,
+  void AddShapeToResults(const ShapeRating &shape_rating, std::vector<int> *unichar_map,
                         std::vector<UnicharRating> *results) const;

 private:
  // Adds the given unichar_id to the results if needed, updating unichar_map
  // and returning the index of unichar in results.
-  int AddUnicharToResults(int unichar_id, float rating, GenericVector<int> *unichar_map,
+  int AddUnicharToResults(int unichar_id, float rating, std::vector<int> *unichar_map,
                          std::vector<UnicharRating> *results) const;

  // Pointer to a provided unicharset used only by the Debugstr member.
--- a/src/classify/trainingsample.cpp
+++ b/src/classify/trainingsample.cpp
@ -272,7 +272,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, int micro_type, int c
 // Sets the mapped_features_ from the features_ using the provided
 // feature_space to the indexed versions of the features.
 void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) {
-  GenericVector<int> indexed_features;
+  std::vector<int> indexed_features;
  feature_space.IndexAndSortFeatures(features_, num_features_, &mapped_features_);
  features_are_indexed_ = true;
  features_are_mapped_ = false;
--- a/src/classify/trainingsample.h
+++ b/src/classify/trainingsample.h
@ -181,11 +181,11 @@ public:
  bool features_are_mapped() const {
    return features_are_mapped_;
  }
-  const GenericVector<int> &mapped_features() const {
+  const std::vector<int> &mapped_features() const {
    ASSERT_HOST(features_are_mapped_);
    return mapped_features_;
  }
-  const GenericVector<int> &indexed_features() const {
+  const std::vector<int> &indexed_features() const {
    ASSERT_HOST(features_are_indexed_);
    return mapped_features_;
  }
@ -239,7 +239,7 @@ public:
  // hide after refactoring

  // Indexed/mapped features, as indicated by the bools below.
-  GenericVector<int> mapped_features_;
+  std::vector<int> mapped_features_;
  bool features_are_indexed_;
  bool features_are_mapped_;

--- a/src/training/common/intfeaturedist.cpp
+++ b/src/training/common/intfeaturedist.cpp
@ -3,7 +3,6 @@
 ///////////////////////////////////////////////////////////////////////
 // File:        intfeaturedist.cpp
 // Description: Fast set-difference-based feature distance calculator.
-// Created:     Thu Sep 01 13:07:30 PDT 2011
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -50,7 +49,7 @@ void IntFeatureDist::Init(const IntFeatureMap *feature_map) {

 // Setup the map for the given indexed_features that have been indexed by
 // feature_map.
-void IntFeatureDist::Set(const GenericVector<int> &indexed_features, int canonical_count,
+void IntFeatureDist::Set(const std::vector<int> &indexed_features, int canonical_count,
                         bool value) {
  total_feature_weight_ = canonical_count;
  for (int i = 0; i < indexed_features.size(); ++i) {
@ -76,7 +75,7 @@ void IntFeatureDist::Set(const GenericVector<int> &indexed_features, int canonic

 // Compute the distance between the given feature vector and the last
 // Set feature vector.
-double IntFeatureDist::FeatureDistance(const GenericVector<int> &features) const {
+double IntFeatureDist::FeatureDistance(const std::vector<int> &features) const {
  const int num_test_features = features.size();
  const double denominator = total_feature_weight_ + num_test_features;
  double misses = denominator;
--- a/src/training/common/intfeaturedist.h
+++ b/src/training/common/intfeaturedist.h
@ -3,7 +3,6 @@
 ///////////////////////////////////////////////////////////////////////
 // File:        intfeaturedist.h
 // Description: Fast set-difference-based feature distance calculator.
-// Created:     Thu Sep 01 12:14:30 PDT 2011
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -49,11 +48,11 @@ public:
  // Setup the map for the given indexed_features that have been indexed by
  // feature_map. After use, use Set(..., false) to reset to the initial state
  // as this is faster than calling Init for sparse spaces.
-  void Set(const GenericVector<int> &indexed_features, int canonical_count, bool value);
+  void Set(const std::vector<int> &indexed_features, int canonical_count, bool value);

  // Compute the distance between the given feature vector and the last
  // Set feature vector.
-  double FeatureDistance(const GenericVector<int> &features) const;
+  double FeatureDistance(const std::vector<int> &features) const;
  double DebugFeatureDistance(const GenericVector<int> &features) const;

 private:
--- a/src/training/common/intfeaturemap.h
+++ b/src/training/common/intfeaturemap.h
@ -4,7 +4,6 @@
 // File:        intfeaturemap.h
 // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
 //              to provide a subspace mapping and fast feature lookup.
-// Created:     Tue Oct 26 08:58:30 PDT 2010
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -103,7 +102,7 @@ public:

  // Indexes the given array of features to a vector of sorted indices.
  void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
-                            GenericVector<int> *sorted_features) const {
+                            std::vector<int> *sorted_features) const {
    feature_space_.IndexAndSortFeatures(features, num_features, sorted_features);
  }
  // Maps the given array of index/sparse features to an array of map/compact
@ -111,8 +110,8 @@ public:
  // Assumes the input is sorted. The output indices are sorted and uniqued.
  // Returns the number of "missed" features, being features that
  // don't map to the compact feature space.
-  int MapIndexedFeatures(const GenericVector<int> &index_features,
-                         GenericVector<int> *map_features) const {
+  int MapIndexedFeatures(const std::vector<int> &index_features,
+                         std::vector<int> *map_features) const {
    return feature_map_.MapFeatures(index_features, map_features);
  }

--- a/src/training/common/sampleiterator.cpp
+++ b/src/training/common/sampleiterator.cpp
@ -204,7 +204,7 @@ int SampleIterator::SparseCharsetSize() const {
 // Sets the mapped_features_ from the features using the provided
 // feature_map.
 static void MapFeatures(TrainingSample &s, const IntFeatureMap &feature_map) {
-  GenericVector<int> indexed_features;
+  std::vector<int> indexed_features;
  feature_map.feature_space().IndexAndSortFeatures(s.features(), s.num_features(),
                                                   &indexed_features);
  feature_map.MapIndexedFeatures(indexed_features, &s.mapped_features_);
--- a/src/training/common/trainingsampleset.cpp
+++ b/src/training/common/trainingsampleset.cpp
@ -234,7 +234,7 @@ const BitVector &TrainingSampleSet::GetCloudFeatures(int font_id, int class_id)
 }
 // Gets the indexed features of the canonical sample of the given
 // font/class combination.
-const GenericVector<int> &TrainingSampleSet::GetCanonicalFeatures(int font_id, int class_id) const {
+const std::vector<int> &TrainingSampleSet::GetCanonicalFeatures(int font_id, int class_id) const {
  int font_index = font_id_map_.SparseToCompact(font_id);
  ASSERT_HOST(font_index >= 0);
  return (*font_class_array_)(font_index, class_id).canonical_features;
@ -420,7 +420,7 @@ int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, int font_i
  const TrainingSample *sample2 = GetCanonicalSample(font_id2, class_id2);
  if (sample2 == nullptr)
    return 0; // There are no canonical features.
-  const GenericVector<int> &canonical2 = GetCanonicalFeatures(font_id2, class_id2);
+  const std::vector<int> &canonical2 = GetCanonicalFeatures(font_id2, class_id2);
  const BitVector &cloud1 = GetCloudFeatures(font_id1, class_id1);
  if (cloud1.size() == 0)
    return canonical2.size(); // There are no cloud features.
@ -598,7 +598,7 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap &map, bool d
      fcinfo.canonical_dist = 0.0f;
      for (int i = 0; i < fcinfo.samples.size(); ++i) {
        int s1 = fcinfo.samples[i];
-        const GenericVector<int> &features1 = samples_[s1]->indexed_features();
+        const std::vector<int> &features1 = samples_[s1]->indexed_features();
        f_table.Set(features1, features1.size(), true);
        double max_dist = 0.0;
        // Run the full squared-order search for similar samples. It is still
@ -609,7 +609,7 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap &map, bool d
          int s2 = fcinfo.samples[j];
          if (samples_[s2]->class_id() != c || samples_[s2]->font_id() != font_id || s2 == s1)
            continue;
-          GenericVector<int> features2 = samples_[s2]->indexed_features();
+          std::vector<int> features2 = samples_[s2]->indexed_features();
          double dist = f_table.FeatureDistance(features2);
          if (dist > max_dist) {
            max_dist = dist;
@ -719,7 +719,7 @@ void TrainingSampleSet::ComputeCloudFeatures(int feature_space_size) {
      fcinfo.cloud_features.Init(feature_space_size);
      for (int s = 0; s < num_samples; ++s) {
        const TrainingSample *sample = GetSample(font_id, c, s);
-        const GenericVector<int> &sample_features = sample->indexed_features();
+        const std::vector<int> &sample_features = sample->indexed_features();
        for (int i = 0; i < sample_features.size(); ++i)
          fcinfo.cloud_features.SetBit(sample_features[i]);
      }
@ -746,7 +746,7 @@ void TrainingSampleSet::DisplaySamplesWithFeature(int f_index, const Shape &shap
  for (int s = 0; s < num_raw_samples(); ++s) {
    const TrainingSample *sample = GetSample(s);
    if (shape.ContainsUnichar(sample->class_id())) {
-      GenericVector<int> indexed_features;
+      std::vector<int> indexed_features;
      space.IndexAndSortFeatures(sample->features(), sample->num_features(), &indexed_features);
      for (int f = 0; f < indexed_features.size(); ++f) {
        if (indexed_features[f] == f_index) {
--- a/src/training/common/trainingsampleset.h
+++ b/src/training/common/trainingsampleset.h
@ -108,7 +108,7 @@ public:
  const BitVector &GetCloudFeatures(int font_id, int class_id) const;
  // Gets the indexed features of the canonical sample of the given
  // font/class combination.
-  const GenericVector<int> &GetCanonicalFeatures(int font_id, int class_id) const;
+  const std::vector<int> &GetCanonicalFeatures(int font_id, int class_id) const;

  // Returns the distance between the given UniCharAndFonts pair.
  // If matched_fonts, only matching fonts, are considered, unless that yields
@ -241,7 +241,7 @@ private:

    // Non-serialized cache data.
    // Indexed features of the canonical sample.
-    GenericVector<int> canonical_features;
+    std::vector<int> canonical_features;
    // The mapped features of all the samples.
    BitVector cloud_features;

--- a/unittest/intfeaturemap_test.cc
+++ b/unittest/intfeaturemap_test.cc
@ -31,7 +31,7 @@ protected:
 public:
  // Expects that the given vector has contiguous integer values in the
  // range [start, end).
-  void ExpectContiguous(const GenericVector<int> &v, int start, int end) {
+  void ExpectContiguous(const std::vector<int> &v, int start, int end) {
    for (int i = start; i < end; ++i) {
      EXPECT_EQ(i, v[i - start]);
    }
@ -61,11 +61,11 @@ TEST_F(IntFeatureMapTest, Exhaustive) {
      }
    }
  }
-  GenericVector<int> index_features;
+  std::vector<int> index_features;
  map.IndexAndSortFeatures(features.get(), total_size, &index_features);
  EXPECT_EQ(total_size, index_features.size());
  int total_buckets = kXBuckets * kYBuckets * kThetaBuckets;
-  GenericVector<int> map_features;
+  std::vector<int> map_features;
  int misses = map.MapIndexedFeatures(index_features, &map_features);
  EXPECT_EQ(0, misses);
  EXPECT_EQ(total_buckets, map_features.size());