Merge pull request #3335 from stweil/genericvector

Replace more GenericVector by std::vector
This commit is contained in:
Egor Pugin 2021-03-17 18:39:59 +03:00 committed by GitHub
commit 345e74e28b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 157 additions and 109 deletions

View File

@ -542,17 +542,16 @@ void TBLOB::GetPreciseBoundingBox(TBOX *precise_box) const {
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
void TBLOB::GetEdgeCoords(const TBOX &box, GenericVector<GenericVector<int>> *x_coords,
GenericVector<GenericVector<int>> *y_coords) const {
GenericVector<int> empty;
x_coords->init_to_size(box.height(), empty);
y_coords->init_to_size(box.width(), empty);
CollectEdges(box, nullptr, nullptr, x_coords, y_coords);
void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
std::vector<std::vector<int>> &y_coords) const {
x_coords.resize(box.height());
y_coords.resize(box.width());
CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
// Sort the output vectors.
for (int i = 0; i < x_coords->size(); ++i)
(*x_coords)[i].sort();
for (int i = 0; i < y_coords->size(); ++i)
(*y_coords)[i].sort();
for (int i = 0; i < x_coords.size(); ++i)
std::sort(x_coords[i].begin(), x_coords[i].end());
for (int i = 0; i < y_coords.size(); ++i)
std::sort(y_coords[i].begin(), y_coords[i].end());
}
// Accumulates the segment between pt1 and pt2 in the LLSQ, quantizing over
@ -585,8 +584,8 @@ static void SegmentLLSQ(const FCOORD &pt1, const FCOORD &pt2, LLSQ *accumulator)
// are clipped to ([0,x_limit], [0,y_limit]).
// See GetEdgeCoords above for a description of x_coords, y_coords.
static void SegmentCoords(const FCOORD &pt1, const FCOORD &pt2, int x_limit, int y_limit,
GenericVector<GenericVector<int>> *x_coords,
GenericVector<GenericVector<int>> *y_coords) {
std::vector<std::vector<int>> *x_coords,
std::vector<std::vector<int>> *y_coords) {
FCOORD step(pt2);
step -= pt1;
int start = ClipToRange(IntCastRounded(std::min(pt1.x(), pt2.x())), 0, x_limit);
@ -639,8 +638,8 @@ static void SegmentBBox(const FCOORD &pt1, const FCOORD &pt2, TBOX *bbox) {
// indices into x_coords, y_coords are offset by box.botleft().
static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const DENORM &denorm,
const TBOX &box, TBOX *bounding_box, LLSQ *accumulator,
GenericVector<GenericVector<int>> *x_coords,
GenericVector<GenericVector<int>> *y_coords) {
std::vector<std::vector<int>> *x_coords,
std::vector<std::vector<int>> *y_coords) {
const C_OUTLINE *outline = startpt->src_outline;
int x_limit = box.width() - 1;
int y_limit = box.height() - 1;
@ -727,8 +726,8 @@ static void CollectEdgesOfRun(const EDGEPT *startpt, const EDGEPT *lastpt, const
// normalization.
// For a description of x_coords, y_coords, see GetEdgeCoords above.
void TBLOB::CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
GenericVector<GenericVector<int>> *x_coords,
GenericVector<GenericVector<int>> *y_coords) const {
std::vector<std::vector<int>> *x_coords,
std::vector<std::vector<int>> *y_coords) const {
// Iterate the outlines.
for (const TESSLINE *ol = outlines; ol != nullptr; ol = ol->next) {
// Iterate the polygon.

View File

@ -391,8 +391,8 @@ struct TBLOB {
// x-coord starting at box.left().
// Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
void GetEdgeCoords(const TBOX &box, GenericVector<GenericVector<int>> *x_coords,
GenericVector<GenericVector<int>> *y_coords) const;
void GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
std::vector<std::vector<int>> &y_coords) const;
TESSLINE *outlines; // List of outlines in blob.
@ -403,8 +403,8 @@ private: // TODO(rays) Someday the data members will be private too.
// normalization.
// For a description of x_coords, y_coords, see GetEdgeCoords above.
void CollectEdges(const TBOX &box, TBOX *bounding_box, LLSQ *llsq,
GenericVector<GenericVector<int>> *x_coords,
GenericVector<GenericVector<int>> *y_coords) const;
std::vector<std::vector<int>> *x_coords,
std::vector<std::vector<int>> *y_coords) const;
private:
// DENORM indicating the transformations that this blob has undergone so far.

View File

@ -153,8 +153,8 @@ void DENORM::SetupNormalization(const BLOCK *block, const FCOORD *rotation,
// pre-initialized to be the same size as box. Each element will contain the
// minimum of x and y run-length as shown above.
static void ComputeRunlengthImage(const TBOX &box,
const GenericVector<GenericVector<int>> &x_coords,
const GenericVector<GenericVector<int>> &y_coords,
const std::vector<std::vector<int>> &x_coords,
const std::vector<std::vector<int>> &y_coords,
GENERIC_2D_ARRAY<int> *minruns) {
int width = box.width();
int height = box.height();
@ -264,8 +264,8 @@ static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<i
// See comments on the helper functions above for more details.
void DENORM::SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
float target_height, float final_xshift, float final_yshift,
const GenericVector<GenericVector<int>> &x_coords,
const GenericVector<GenericVector<int>> &y_coords) {
const std::vector<std::vector<int>> &x_coords,
const std::vector<std::vector<int>> &y_coords) {
Clear();
predecessor_ = predecessor;
// x_map_ and y_map_ store a mapping from input x and y coordinate to output

View File

@ -2,7 +2,6 @@
* File: normalis.h (Formerly denorm.h)
* Description: Code for the DENORM class.
* Author: Ray Smith
* Created: Thu Apr 23 09:22:43 BST 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
@ -20,6 +19,7 @@
#ifndef NORMALIS_H
#define NORMALIS_H
#include <vector>
#include <tesseract/export.h>
struct Pix;
@ -193,8 +193,8 @@ public:
// The second-level vectors must all be sorted in ascending order.
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width,
float target_height, float final_xshift, float final_yshift,
const GenericVector<GenericVector<int>> &x_coords,
const GenericVector<GenericVector<int>> &y_coords);
const std::vector<std::vector<int>> &x_coords,
const std::vector<std::vector<int>> &y_coords);
// Transforms the given coords one step forward to normalized space, without
// using any block rotation or predecessor.

View File

@ -209,6 +209,61 @@ inline void Reverse64(void *ptr) {
ReverseN(ptr, 8);
}
// Reads a vector of simple types from the given file. Assumes that bitwise
// read/write will work with ReverseN according to sizeof(T).
// Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
template <typename T>
bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) {
uint32_t size;
if (fread(&size, sizeof(size), 1, fp) != 1) {
return false;
}
if (swap) {
Reverse32(&size);
}
// Arbitrarily limit the number of elements to protect against bad data.
assert(size <= UINT16_MAX);
if (size > UINT16_MAX) {
return false;
}
// TODO: optimize.
data.resize(size);
if (size > 0) {
if (fread(&data[0], sizeof(T), size, fp) != size) {
return false;
}
if (swap) {
for (int i = 0; i < size; ++i) {
ReverseN(&data[i], sizeof(T));
}
}
}
return true;
}
// Writes a vector of simple types to the given file. Assumes that bitwise
// read/write of T will work. Returns false in case of error.
template <typename T>
bool Serialize(FILE *fp, const std::vector<T> &data) {
uint32_t size = data.size();
if (fwrite(&size, sizeof(size), 1, fp) != 1) {
return false;
} else if constexpr (std::is_class_v<T>) {
// Serialize a tesseract class.
for (auto &item : data) {
if (!item.Serialize(fp)) {
return false;
}
}
} else if (size > 0) {
if (fwrite(&data[0], sizeof(T), size, fp) != size) {
return false;
}
}
return true;
}
} // namespace tesseract
#endif // TESSERACT_CCUTIL_HELPERS_H_

View File

@ -2,7 +2,6 @@
// File: indexmapbidi.cpp
// Description: Bi-directional mapping between a sparse and compact space.
// Author: rays@google.com (Ray Smith)
// Created: Tue Apr 06 11:33:59 PDT 2010
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
@ -239,8 +238,8 @@ bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
// Assumes the input is sorted. The output indices are sorted and uniqued.
// Return value is the number of "missed" features, being features that
// don't map to the compact feature space.
int IndexMapBiDi::MapFeatures(const GenericVector<int> &sparse, GenericVector<int> *compact) const {
compact->truncate(0);
int IndexMapBiDi::MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const {
compact->clear();
int num_features = sparse.size();
int missed_features = 0;
int prev_good_feature = -1;

View File

@ -157,7 +157,7 @@ public:
// Assumes the input is sorted. The output indices are sorted and uniqued.
// Return value is the number of "missed" features, being features that
// don't map to the compact feature space.
int MapFeatures(const GenericVector<int> &sparse, GenericVector<int> *compact) const;
int MapFeatures(const std::vector<int> &sparse, std::vector<int> *compact) const;
private:
// Returns the master compact index for a given compact index.

View File

@ -60,7 +60,6 @@
#include "unicity_table.h" // for UnicityTable
#include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID
#include "genericvector.h" // for GenericVector
#include "helpers.h" // for IntCastRounded, ClipToRange
#include "serialis.h" // for TFile
@ -1104,7 +1103,7 @@ void Classify::ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug,
// 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
// by int_result. In this case, build a vector of UnicharRating to
// gather together different font-ids for each unichar. Also covers case1.
GenericVector<UnicharRating> mapped_results;
std::vector<UnicharRating> mapped_results;
for (int f = 0; f < int_result->fonts.size(); ++f) {
int shape_id = int_result->fonts[f].fontinfo_id;
const Shape &shape = shape_table_->GetShape(shape_id);
@ -1260,7 +1259,7 @@ int Classify::CharNormClassifier(TBLOB *blob, const TrainingSample &sample,
} /* CharNormClassifier */
// As CharNormClassifier, but operates on a TrainingSample and outputs to
// a GenericVector of ShapeRating without conversion to classes.
// a vector of ShapeRating without conversion to classes.
int Classify::CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample,
std::vector<UnicharRating> *results) {
results->clear();

View File

@ -238,7 +238,7 @@ public:
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results);
// As CharNormClassifier, but operates on a TrainingSample and outputs to
// a GenericVector of ShapeRating without conversion to classes.
// a vector of ShapeRating without conversion to classes.
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample,
std::vector<UnicharRating> *results);
UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass);
@ -312,7 +312,7 @@ public:
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm,
std::vector<INT_FEATURE_STRUCT> *bl_features,
std::vector<INT_FEATURE_STRUCT> *cn_features,
INT_FX_RESULT_STRUCT *results, GenericVector<int> *outline_cn_counts);
INT_FX_RESULT_STRUCT *results, std::vector<int> *outline_cn_counts);
/* float2int.cpp ************************************************************/
void ClearCharNormArray(uint8_t *char_norm_array);
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array);
@ -481,7 +481,7 @@ private:
Dict dict_;
GenericVector<uint16_t> shapetable_cutoffs_;
std::vector<uint16_t> shapetable_cutoffs_;
/* variables used to hold performance statistics */
int NumAdaptationsFailed = 0;

View File

@ -42,7 +42,7 @@ void Classify::ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs) {
int Cutoff;
if (shape_table_ != nullptr) {
if (!shapetable_cutoffs_.DeSerialize(fp)) {
if (!fp->DeSerialize(shapetable_cutoffs_)) {
tprintf("Error during read of shapetable pffmtable!\n");
}
}

View File

@ -53,8 +53,8 @@ INT_FEATURE_STRUCT IntFeatureSpace::PositionFromIndex(int index) const {
// Bulk calls to Index. Maps the given array of features to a vector of
// int32_t indices in the same order as the input.
void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT *features, int num_features,
GenericVector<int> *mapped_features) const {
mapped_features->truncate(0);
std::vector<int> *mapped_features) const {
mapped_features->clear();
for (int f = 0; f < num_features; ++f)
mapped_features->push_back(Index(features[f]));
}
@ -62,11 +62,11 @@ void IntFeatureSpace::IndexFeatures(const INT_FEATURE_STRUCT *features, int num_
// Bulk calls to Index. Maps the given array of features to a vector of
// sorted int32_t indices.
void IntFeatureSpace::IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
GenericVector<int> *sorted_features) const {
sorted_features->truncate(0);
std::vector<int> *sorted_features) const {
sorted_features->clear();
for (int f = 0; f < num_features; ++f)
sorted_features->push_back(Index(features[f]));
sorted_features->sort();
std::sort(sorted_features->begin(), sorted_features->end());
}
// Returns a feature space index for the given x,y position in a display

View File

@ -3,7 +3,6 @@
///////////////////////////////////////////////////////////////////////
// File: intfeaturespace.h
// Description: Indexed feature space based on INT_FEATURE_STRUCT.
// Created: Wed Mar 24 10:55:30 PDT 2010
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -20,7 +19,6 @@
#ifndef TESSERACT_CLASSIFY_INTFEATURESPACE_H_
#define TESSERACT_CLASSIFY_INTFEATURESPACE_H_
#include "genericvector.h"
#include "intproto.h"
// Extent of x,y,theta in the input feature space. [0,255].
@ -63,11 +61,11 @@ public:
// Bulk calls to Index. Maps the given array of features to a vector of
// int32_t indices in the same order as the input.
void IndexFeatures(const INT_FEATURE_STRUCT *features, int num_features,
GenericVector<int> *mapped_features) const;
std::vector<int> *mapped_features) const;
// Bulk calls to Index. Maps the given array of features to a vector of
// sorted int32_t indices.
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
GenericVector<int> *sorted_features) const;
std::vector<int> *sorted_features) const;
// Returns a feature space index for the given x,y position in a display
// window, or -1 if the feature is a miss.
int XYToFeatureIndex(int x, int y) const;

View File

@ -144,12 +144,12 @@ void Classify::SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *
128.0f, 128.0f);
// Setup the denorm for character normalization.
if (nonlinear_norm) {
GenericVector<GenericVector<int>> x_coords;
GenericVector<GenericVector<int>> y_coords;
std::vector<std::vector<int>> x_coords;
std::vector<std::vector<int>> y_coords;
TBOX box;
blob.GetPreciseBoundingBox(&box);
box.pad(1, 1);
blob.GetEdgeCoords(box, &x_coords, &y_coords);
blob.GetEdgeCoords(box, x_coords, y_coords);
cn_denorm->SetupNonLinear(&blob.denorm(), box, UINT8_MAX, UINT8_MAX, 0.0f, 0.0f, x_coords,
y_coords);
} else {
@ -431,11 +431,11 @@ void Classify::ExtractFeatures(const TBLOB &blob, bool nonlinear_norm,
std::vector<INT_FEATURE_STRUCT> *bl_features,
std::vector<INT_FEATURE_STRUCT> *cn_features,
INT_FX_RESULT_STRUCT *results,
GenericVector<int> *outline_cn_counts) {
std::vector<int> *outline_cn_counts) {
DENORM bl_denorm, cn_denorm;
tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm, &bl_denorm, &cn_denorm, results);
if (outline_cn_counts != nullptr)
outline_cn_counts->truncate(0);
outline_cn_counts->clear();
// Iterate the outlines.
for (TESSLINE *ol = blob.outlines; ol != nullptr; ol = ol->next) {
// Iterate the polygon.

View File

@ -31,8 +31,6 @@
#include "tprintf.h"
#include "trainingsample.h"
#include "genericvector.h"
namespace tesseract {
// Classifies the given [training] sample, writing to results.
@ -45,8 +43,8 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Pix *pa
std::vector<ShapeRating> shape_results;
int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, &shape_results);
const ShapeTable *shapes = GetShapeTable();
GenericVector<int> unichar_map;
unichar_map.init_to_size(shapes->unicharset().size(), -1);
std::vector<int> unichar_map;
unichar_map.resize(shapes->unicharset().size(), -1);
for (int r = 0; r < num_shape_results; ++r) {
shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
}

View File

@ -35,7 +35,7 @@ namespace tesseract {
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
int ShapeRating::FirstResultWithUnichar(const GenericVector<ShapeRating> &results,
int ShapeRating::FirstResultWithUnichar(const std::vector<ShapeRating> &results,
const ShapeTable &shape_table, UNICHAR_ID unichar_id) {
for (int r = 0; r < results.size(); ++r) {
const int shape_id = results[r].shape_id;
@ -51,7 +51,7 @@ int ShapeRating::FirstResultWithUnichar(const GenericVector<ShapeRating> &result
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
int UnicharRating::FirstResultWithUnichar(const GenericVector<UnicharRating> &results,
int UnicharRating::FirstResultWithUnichar(const std::vector<UnicharRating> &results,
UNICHAR_ID unichar_id) {
for (int r = 0; r < results.size(); ++r) {
if (results[r].unichar_id == unichar_id)
@ -62,12 +62,12 @@ int UnicharRating::FirstResultWithUnichar(const GenericVector<UnicharRating> &re
// Writes to the given file. Returns false in case of error.
bool UnicharAndFonts::Serialize(FILE *fp) const {
return tesseract::Serialize(fp, &unichar_id) && font_ids.Serialize(fp);
return tesseract::Serialize(fp, &unichar_id) && tesseract::Serialize(fp, font_ids);
}
// Reads from the given file. Returns false in case of error.
// Reads from the given file. Returns false in case of error.
bool UnicharAndFonts::DeSerialize(TFile *fp) {
return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp);
return fp->DeSerialize(&unichar_id) && fp->DeSerialize(font_ids);
}
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
@ -77,10 +77,14 @@ int UnicharAndFonts::SortByUnicharId(const void *v1, const void *v2) {
return p1->unichar_id - p2->unichar_id;
}
bool UnicharAndFonts::StdSortByUnicharId(const UnicharAndFonts &v1, const UnicharAndFonts &v2) {
return v1.unichar_id < v2.unichar_id;
}
// Writes to the given file. Returns false in case of error.
bool Shape::Serialize(FILE *fp) const {
uint8_t sorted = unichars_sorted_;
return tesseract::Serialize(fp, &sorted) && unichars_.SerializeClasses(fp);
return tesseract::Serialize(fp, &sorted) && tesseract::Serialize(fp, unichars_);
}
// Reads from the given file. Returns false in case of error.
@ -89,7 +93,7 @@ bool Shape::DeSerialize(TFile *fp) {
if (!fp->DeSerialize(&sorted))
return false;
unichars_sorted_ = sorted != 0;
return unichars_.DeSerializeClasses(fp);
return fp->DeSerialize(unichars_);
}
// Adds a font_id for the given unichar_id. If the unichar_id is not
@ -98,7 +102,7 @@ void Shape::AddToShape(int unichar_id, int font_id) {
for (int c = 0; c < unichars_.size(); ++c) {
if (unichars_[c].unichar_id == unichar_id) {
// Found the unichar in the shape table.
GenericVector<int> &font_list = unichars_[c].font_ids;
std::vector<int> &font_list = unichars_[c].font_ids;
for (int f = 0; f < font_list.size(); ++f) {
if (font_list[f] == font_id)
return; // Font is already there.
@ -195,7 +199,7 @@ bool Shape::operator==(const Shape &other) const {
bool Shape::IsSubsetOf(const Shape &other) const {
for (int c = 0; c < unichars_.size(); ++c) {
int unichar_id = unichars_[c].unichar_id;
const GenericVector<int> &font_list = unichars_[c].font_ids;
const std::vector<int> &font_list = unichars_[c].font_ids;
for (int f = 0; f < font_list.size(); ++f) {
if (!other.ContainsUnicharAndFont(unichar_id, font_list[f]))
return false;
@ -223,7 +227,7 @@ bool Shape::IsEqualUnichars(Shape *other) {
// Sorts the unichars_ vector by unichar.
void Shape::SortUnichars() {
unichars_.sort(UnicharAndFonts::SortByUnicharId);
std::sort(unichars_.begin(), unichars_.end(), UnicharAndFonts::StdSortByUnicharId);
unichars_sorted_ = true;
}
@ -262,7 +266,7 @@ int ShapeTable::NumFonts() const {
// Re-indexes the class_ids in the shapetable according to the given map.
// Useful in conjunction with set_unicharset.
void ShapeTable::ReMapClassIds(const GenericVector<int> &unicharset_map) {
void ShapeTable::ReMapClassIds(const std::vector<int> &unicharset_map) {
for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
Shape *shape = shape_table_[shape_id];
for (int c = 0; c < shape->size(); ++c) {
@ -629,7 +633,7 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
const Shape &shape1 = GetShape(shape_id1);
const Shape &shape2 = GetShape(shape_id2);
for (int c1 = 0; c1 < shape1.size(); ++c1) {
const GenericVector<int> &font_list1 = shape1[c1].font_ids;
const std::vector<int> &font_list1 = shape1[c1].font_ids;
for (int f = 0; f < font_list1.size(); ++f) {
if (shape2.ContainsFont(font_list1[f]))
return true;
@ -640,9 +644,9 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
// Appends the master shapes from other to this.
// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
void ShapeTable::AppendMasterShapes(const ShapeTable &other, GenericVector<int> *shape_map) {
void ShapeTable::AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map) {
if (shape_map != nullptr)
shape_map->init_to_size(other.NumShapes(), -1);
shape_map->resize(other.NumShapes(), -1);
for (int s = 0; s < other.shape_table_.size(); ++s) {
if (other.shape_table_[s]->destination_index() < 0) {
int index = AddShape(*other.shape_table_[s]);
@ -669,7 +673,7 @@ int ShapeTable::NumMasterShapes() const {
// of decreasing rating.
// The unichar_map vector indicates the index of the results entry containing
// each unichar, or -1 if the unichar is not yet included in results.
void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, GenericVector<int> *unichar_map,
void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, std::vector<int> *unichar_map,
std::vector<UnicharRating> *results) const {
if (shape_rating.joined) {
AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map, results);
@ -690,9 +694,9 @@ void ShapeTable::AddShapeToResults(const ShapeRating &shape_rating, GenericVecto
// Adds the given unichar_id to the results if needed, updating unichar_map
// and returning the index of unichar in results.
int ShapeTable::AddUnicharToResults(int unichar_id, float rating, GenericVector<int> *unichar_map,
int ShapeTable::AddUnicharToResults(int unichar_id, float rating, std::vector<int> *unichar_map,
std::vector<UnicharRating> *results) const {
int result_index = unichar_map->get(unichar_id);
int result_index = unichar_map->at(unichar_id);
if (result_index < 0) {
UnicharRating result(unichar_id, rating);
result_index = results->size();

View File

@ -27,8 +27,6 @@
#include "genericheap.h"
#include "intmatcher.h"
#include "genericvector.h"
namespace tesseract {
class UNICHARSET;
@ -53,7 +51,7 @@ struct UnicharRating {
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
static int FirstResultWithUnichar(const GenericVector<UnicharRating> &results,
static int FirstResultWithUnichar(const std::vector<UnicharRating> &results,
UNICHAR_ID unichar_id);
// Index into some UNICHARSET table indicates the class of the answer.
@ -84,7 +82,7 @@ struct ShapeRating {
// unichar_id. If the results are sorted by rating, this will also be the
// best result with the required unichar_id.
// Returns -1 if the unichar_id is not found
static int FirstResultWithUnichar(const GenericVector<ShapeRating> &results,
static int FirstResultWithUnichar(const std::vector<ShapeRating> &results,
const ShapeTable &shape_table, UNICHAR_ID unichar_id);
// Index into some shape table indicates the class of the answer.
@ -139,8 +137,9 @@ struct UnicharAndFonts {
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
static int SortByUnicharId(const void *v1, const void *v2);
static bool StdSortByUnicharId(const UnicharAndFonts &v1, const UnicharAndFonts &v2);
GenericVector<int32_t> font_ids;
std::vector<int32_t> font_ids;
int32_t unichar_id;
};
@ -216,7 +215,7 @@ private:
int destination_index_ = 0;
// Array of unichars, each with a set of fonts. Each unichar has at most
// one entry in the vector.
GenericVector<UnicharAndFonts> unichars_;
std::vector<UnicharAndFonts> unichars_;
};
// ShapeTable is a class to encapsulate the triple indirection that is
@ -255,7 +254,7 @@ public:
}
// Re-indexes the class_ids in the shapetable according to the given map.
// Useful in conjunction with set_unicharset.
void ReMapClassIds(const GenericVector<int> &unicharset_map);
void ReMapClassIds(const std::vector<int> &unicharset_map);
// Returns a string listing the classes/fonts in a shape.
std::string DebugStr(int shape_id) const;
// Returns a debug string summarizing the table.
@ -318,7 +317,7 @@ public:
// copy of a ShapeTable.
// If not nullptr, shape_map is set to map other shape_ids to this's
// shape_ids.
void AppendMasterShapes(const ShapeTable &other, GenericVector<int> *shape_map);
void AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map);
// Returns the number of master shapes remaining after merging.
int NumMasterShapes() const;
// Returns the destination of this shape, (if merged), taking into account
@ -345,13 +344,13 @@ public:
// of decreasing rating.
// The unichar_map vector indicates the index of the results entry containing
// each unichar, or -1 if the unichar is not yet included in results.
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector<int> *unichar_map,
void AddShapeToResults(const ShapeRating &shape_rating, std::vector<int> *unichar_map,
std::vector<UnicharRating> *results) const;
private:
// Adds the given unichar_id to the results if needed, updating unichar_map
// and returning the index of unichar in results.
int AddUnicharToResults(int unichar_id, float rating, GenericVector<int> *unichar_map,
int AddUnicharToResults(int unichar_id, float rating, std::vector<int> *unichar_map,
std::vector<UnicharRating> *results) const;
// Pointer to a provided unicharset used only by the Debugstr member.

View File

@ -272,7 +272,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, int micro_type, int c
// Sets the mapped_features_ from the features_ using the provided
// feature_space to the indexed versions of the features.
void TrainingSample::IndexFeatures(const IntFeatureSpace &feature_space) {
GenericVector<int> indexed_features;
std::vector<int> indexed_features;
feature_space.IndexAndSortFeatures(features_, num_features_, &mapped_features_);
features_are_indexed_ = true;
features_are_mapped_ = false;

View File

@ -181,11 +181,11 @@ public:
bool features_are_mapped() const {
return features_are_mapped_;
}
const GenericVector<int> &mapped_features() const {
const std::vector<int> &mapped_features() const {
ASSERT_HOST(features_are_mapped_);
return mapped_features_;
}
const GenericVector<int> &indexed_features() const {
const std::vector<int> &indexed_features() const {
ASSERT_HOST(features_are_indexed_);
return mapped_features_;
}
@ -239,7 +239,7 @@ public:
// hide after refactoring
// Indexed/mapped features, as indicated by the bools below.
GenericVector<int> mapped_features_;
std::vector<int> mapped_features_;
bool features_are_indexed_;
bool features_are_mapped_;

View File

@ -3,7 +3,6 @@
///////////////////////////////////////////////////////////////////////
// File: intfeaturedist.cpp
// Description: Fast set-difference-based feature distance calculator.
// Created: Thu Sep 01 13:07:30 PDT 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -50,7 +49,7 @@ void IntFeatureDist::Init(const IntFeatureMap *feature_map) {
// Setup the map for the given indexed_features that have been indexed by
// feature_map.
void IntFeatureDist::Set(const GenericVector<int> &indexed_features, int canonical_count,
void IntFeatureDist::Set(const std::vector<int> &indexed_features, int canonical_count,
bool value) {
total_feature_weight_ = canonical_count;
for (int i = 0; i < indexed_features.size(); ++i) {
@ -76,7 +75,7 @@ void IntFeatureDist::Set(const GenericVector<int> &indexed_features, int canonic
// Compute the distance between the given feature vector and the last
// Set feature vector.
double IntFeatureDist::FeatureDistance(const GenericVector<int> &features) const {
double IntFeatureDist::FeatureDistance(const std::vector<int> &features) const {
const int num_test_features = features.size();
const double denominator = total_feature_weight_ + num_test_features;
double misses = denominator;

View File

@ -3,7 +3,6 @@
///////////////////////////////////////////////////////////////////////
// File: intfeaturedist.h
// Description: Fast set-difference-based feature distance calculator.
// Created: Thu Sep 01 12:14:30 PDT 2011
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -49,11 +48,11 @@ public:
// Setup the map for the given indexed_features that have been indexed by
// feature_map. After use, use Set(..., false) to reset to the initial state
// as this is faster than calling Init for sparse spaces.
void Set(const GenericVector<int> &indexed_features, int canonical_count, bool value);
void Set(const std::vector<int> &indexed_features, int canonical_count, bool value);
// Compute the distance between the given feature vector and the last
// Set feature vector.
double FeatureDistance(const GenericVector<int> &features) const;
double FeatureDistance(const std::vector<int> &features) const;
double DebugFeatureDistance(const GenericVector<int> &features) const;
private:

View File

@ -4,7 +4,6 @@
// File: intfeaturemap.h
// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
// to provide a subspace mapping and fast feature lookup.
// Created: Tue Oct 26 08:58:30 PDT 2010
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -103,7 +102,7 @@ public:
// Indexes the given array of features to a vector of sorted indices.
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
GenericVector<int> *sorted_features) const {
std::vector<int> *sorted_features) const {
feature_space_.IndexAndSortFeatures(features, num_features, sorted_features);
}
// Maps the given array of index/sparse features to an array of map/compact
@ -111,8 +110,8 @@ public:
// Assumes the input is sorted. The output indices are sorted and uniqued.
// Returns the number of "missed" features, being features that
// don't map to the compact feature space.
int MapIndexedFeatures(const GenericVector<int> &index_features,
GenericVector<int> *map_features) const {
int MapIndexedFeatures(const std::vector<int> &index_features,
std::vector<int> *map_features) const {
return feature_map_.MapFeatures(index_features, map_features);
}

View File

@ -204,7 +204,7 @@ int SampleIterator::SparseCharsetSize() const {
// Sets the mapped_features_ from the features using the provided
// feature_map.
static void MapFeatures(TrainingSample &s, const IntFeatureMap &feature_map) {
GenericVector<int> indexed_features;
std::vector<int> indexed_features;
feature_map.feature_space().IndexAndSortFeatures(s.features(), s.num_features(),
&indexed_features);
feature_map.MapIndexedFeatures(indexed_features, &s.mapped_features_);

View File

@ -234,7 +234,7 @@ const BitVector &TrainingSampleSet::GetCloudFeatures(int font_id, int class_id)
}
// Gets the indexed features of the canonical sample of the given
// font/class combination.
const GenericVector<int> &TrainingSampleSet::GetCanonicalFeatures(int font_id, int class_id) const {
const std::vector<int> &TrainingSampleSet::GetCanonicalFeatures(int font_id, int class_id) const {
int font_index = font_id_map_.SparseToCompact(font_id);
ASSERT_HOST(font_index >= 0);
return (*font_class_array_)(font_index, class_id).canonical_features;
@ -420,7 +420,7 @@ int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, int font_i
const TrainingSample *sample2 = GetCanonicalSample(font_id2, class_id2);
if (sample2 == nullptr)
return 0; // There are no canonical features.
const GenericVector<int> &canonical2 = GetCanonicalFeatures(font_id2, class_id2);
const std::vector<int> &canonical2 = GetCanonicalFeatures(font_id2, class_id2);
const BitVector &cloud1 = GetCloudFeatures(font_id1, class_id1);
if (cloud1.size() == 0)
return canonical2.size(); // There are no cloud features.
@ -598,7 +598,7 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap &map, bool d
fcinfo.canonical_dist = 0.0f;
for (int i = 0; i < fcinfo.samples.size(); ++i) {
int s1 = fcinfo.samples[i];
const GenericVector<int> &features1 = samples_[s1]->indexed_features();
const std::vector<int> &features1 = samples_[s1]->indexed_features();
f_table.Set(features1, features1.size(), true);
double max_dist = 0.0;
// Run the full squared-order search for similar samples. It is still
@ -609,7 +609,7 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap &map, bool d
int s2 = fcinfo.samples[j];
if (samples_[s2]->class_id() != c || samples_[s2]->font_id() != font_id || s2 == s1)
continue;
GenericVector<int> features2 = samples_[s2]->indexed_features();
std::vector<int> features2 = samples_[s2]->indexed_features();
double dist = f_table.FeatureDistance(features2);
if (dist > max_dist) {
max_dist = dist;
@ -719,7 +719,7 @@ void TrainingSampleSet::ComputeCloudFeatures(int feature_space_size) {
fcinfo.cloud_features.Init(feature_space_size);
for (int s = 0; s < num_samples; ++s) {
const TrainingSample *sample = GetSample(font_id, c, s);
const GenericVector<int> &sample_features = sample->indexed_features();
const std::vector<int> &sample_features = sample->indexed_features();
for (int i = 0; i < sample_features.size(); ++i)
fcinfo.cloud_features.SetBit(sample_features[i]);
}
@ -746,7 +746,7 @@ void TrainingSampleSet::DisplaySamplesWithFeature(int f_index, const Shape &shap
for (int s = 0; s < num_raw_samples(); ++s) {
const TrainingSample *sample = GetSample(s);
if (shape.ContainsUnichar(sample->class_id())) {
GenericVector<int> indexed_features;
std::vector<int> indexed_features;
space.IndexAndSortFeatures(sample->features(), sample->num_features(), &indexed_features);
for (int f = 0; f < indexed_features.size(); ++f) {
if (indexed_features[f] == f_index) {

View File

@ -108,7 +108,7 @@ public:
const BitVector &GetCloudFeatures(int font_id, int class_id) const;
// Gets the indexed features of the canonical sample of the given
// font/class combination.
const GenericVector<int> &GetCanonicalFeatures(int font_id, int class_id) const;
const std::vector<int> &GetCanonicalFeatures(int font_id, int class_id) const;
// Returns the distance between the given UniCharAndFonts pair.
// If matched_fonts, only matching fonts, are considered, unless that yields
@ -241,7 +241,7 @@ private:
// Non-serialized cache data.
// Indexed features of the canonical sample.
GenericVector<int> canonical_features;
std::vector<int> canonical_features;
// The mapped features of all the samples.
BitVector cloud_features;

View File

@ -31,7 +31,7 @@ protected:
public:
// Expects that the given vector has contiguous integer values in the
// range [start, end).
void ExpectContiguous(const GenericVector<int> &v, int start, int end) {
void ExpectContiguous(const std::vector<int> &v, int start, int end) {
for (int i = start; i < end; ++i) {
EXPECT_EQ(i, v[i - start]);
}
@ -61,11 +61,11 @@ TEST_F(IntFeatureMapTest, Exhaustive) {
}
}
}
GenericVector<int> index_features;
std::vector<int> index_features;
map.IndexAndSortFeatures(features.get(), total_size, &index_features);
EXPECT_EQ(total_size, index_features.size());
int total_buckets = kXBuckets * kYBuckets * kThetaBuckets;
GenericVector<int> map_features;
std::vector<int> map_features;
int misses = map.MapIndexedFeatures(index_features, &map_features);
EXPECT_EQ(0, misses);
EXPECT_EQ(total_buckets, map_features.size());