From 699f727f3ecd732f5ed4bd6f7231b880b4d22828 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:43:07 +0100 Subject: [PATCH 01/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/common/intfeaturedist.cpp | 2 +- src/training/common/intfeaturedist.h | 4 ++-- src/training/common/intfeaturemap.cpp | 4 ++-- src/training/common/intfeaturemap.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/training/common/intfeaturedist.cpp b/src/training/common/intfeaturedist.cpp index b519d59b1..48e35876a 100644 --- a/src/training/common/intfeaturedist.cpp +++ b/src/training/common/intfeaturedist.cpp @@ -97,7 +97,7 @@ double IntFeatureDist::FeatureDistance(const std::vector &features) const { // Compute the distance between the given feature vector and the last // Set feature vector. -double IntFeatureDist::DebugFeatureDistance(const GenericVector &features) const { +double IntFeatureDist::DebugFeatureDistance(const std::vector &features) const { const int num_test_features = features.size(); const double denominator = total_feature_weight_ + num_test_features; double misses = denominator; diff --git a/src/training/common/intfeaturedist.h b/src/training/common/intfeaturedist.h index 29c92722d..6f2d340a0 100644 --- a/src/training/common/intfeaturedist.h +++ b/src/training/common/intfeaturedist.h @@ -19,7 +19,7 @@ #ifndef TESSERACT_CLASSIFY_INTFEATUREDIST_H_ #define TESSERACT_CLASSIFY_INTFEATUREDIST_H_ -#include "genericvector.h" +#include namespace tesseract { @@ -53,7 +53,7 @@ public: // Compute the distance between the given feature vector and the last // Set feature vector. double FeatureDistance(const std::vector &features) const; - double DebugFeatureDistance(const GenericVector &features) const; + double DebugFeatureDistance(const std::vector &features) const; private: // Clear all data. diff --git a/src/training/common/intfeaturemap.cpp b/src/training/common/intfeaturemap.cpp index d203a6e0e..ce6c1e2d2 100644 --- a/src/training/common/intfeaturemap.cpp +++ b/src/training/common/intfeaturemap.cpp @@ -133,7 +133,7 @@ int IntFeatureMap::FindNZFeatureMapping(SampleIterator *it) { int total_samples = 0; for (it->Begin(); !it->AtEnd(); it->Next()) { const TrainingSample &sample = it->GetSample(); - GenericVector features; + std::vector features; feature_space_.IndexAndSortFeatures(sample.features(), sample.num_features(), &features); int num_features = features.size(); for (int f = 0; f < num_features; ++f) @@ -164,7 +164,7 @@ int IntFeatureMap::FinalizeMapping(SampleIterator *it) { } // Prints the map features from the set in human-readable form. -void IntFeatureMap::DebugMapFeatures(const GenericVector &map_features) const { +void IntFeatureMap::DebugMapFeatures(const std::vector &map_features) const { for (int i = 0; i < map_features.size(); ++i) { INT_FEATURE_STRUCT f = InverseMapFeature(map_features[i]); f.print(); diff --git a/src/training/common/intfeaturemap.h b/src/training/common/intfeaturemap.h index b66c3575f..330d5c2b4 100644 --- a/src/training/common/intfeaturemap.h +++ b/src/training/common/intfeaturemap.h @@ -116,7 +116,7 @@ public: } // Prints the map features from the set in human-readable form. - void DebugMapFeatures(const GenericVector &map_features) const; + void DebugMapFeatures(const std::vector &map_features) const; private: void Clear(); From b0b6bbf019b7c550f85e4f8a532238594fbf840e Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:44:28 +0100 Subject: [PATCH 02/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/cntraining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training/cntraining.cpp b/src/training/cntraining.cpp index cca073d97..bd0883db5 100644 --- a/src/training/cntraining.cpp +++ b/src/training/cntraining.cpp @@ -136,7 +136,7 @@ int main(int argc, char *argv[]) { pCharList = CharList; // The norm protos will count the source protos, so we keep them here in // freeable_protos, so they can be freed later. - GenericVector freeable_protos; + std::vector freeable_protos; iterate(pCharList) { // Cluster CharSample = reinterpret_cast first_node(pCharList); From cb207ce6452d027435c5f9583e3ebbf44e804632 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:47:50 +0100 Subject: [PATCH 03/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/degradeimage.cpp | 6 ++---- src/training/degradeimage.h | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/training/degradeimage.cpp b/src/training/degradeimage.cpp index da10d9641..d66dc477f 100644 --- a/src/training/degradeimage.cpp +++ b/src/training/degradeimage.cpp @@ -3,7 +3,6 @@ * Description: Function to degrade an image (usually of text) as if it * has been printed and then scanned. * Authors: Ray Smith - * Created: Tue Nov 19 2013 * * (C) Copyright 2013, Google Inc. * Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,7 +21,6 @@ #include // from leptonica #include -#include "genericvector.h" #include "helpers.h" // For TRand. #include "rect.h" @@ -175,7 +173,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation) // Returns nullptr on error. The returned Pix must be pixDestroyed. Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, - GenericVector *boxes) { + std::vector *boxes) { Pix *distorted = pixCopy(nullptr, const_cast(pix)); // Things to do to synthetic training data. if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) { @@ -214,7 +212,7 @@ Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool whi // perspective distortion. Width and height only need to be set if there // is no pix. If there is a pix, then they will be taken from there. void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, - GenericVector *boxes) { + std::vector *boxes) { if (pix != nullptr && *pix != nullptr) { width = pixGetWidth(*pix); height = pixGetHeight(*pix); diff --git a/src/training/degradeimage.h b/src/training/degradeimage.h index 7451e3689..61146eac5 100644 --- a/src/training/degradeimage.h +++ b/src/training/degradeimage.h @@ -3,7 +3,6 @@ * Description: Function to degrade an image (usually of text) as if it * has been printed and then scanned. * Authors: Ray Smith - * Created: Tue Nov 19 2013 * * (C) Copyright 2013, Google Inc. * Licensed under the Apache License, Version 2.0 (the "License"); @@ -41,12 +40,12 @@ struct Pix *DegradeImage(struct Pix *input, int exposure, TRand *randomizer, flo // Returns nullptr on error. The returned Pix must be pixDestroyed. Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, - GenericVector *boxes); + std::vector *boxes); // Distorts anything that has a non-null pointer with the same pseudo-random // perspective distortion. Width and height only need to be set if there // is no pix. If there is a pix, then they will be taken from there. void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, - GenericVector *boxes); + std::vector *boxes); // Computes the coefficients of a randomized projective transformation. // The image transform requires backward transformation coefficient, and the // box transform the forward coefficients. From 16090145256a9d8d26ed8845a960555d7b634939 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:50:00 +0100 Subject: [PATCH 04/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/lstmeval.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/training/lstmeval.cpp b/src/training/lstmeval.cpp index 7d1998dad..edb5a0d5f 100644 --- a/src/training/lstmeval.cpp +++ b/src/training/lstmeval.cpp @@ -16,7 +16,6 @@ /////////////////////////////////////////////////////////////////////// #include "commontraining.h" -#include "genericvector.h" #include "lstmtester.h" #include "tprintf.h" @@ -52,7 +51,7 @@ int main(int argc, char **argv) { tprintf("Failed to load language model from %s!\n", FLAGS_traineddata.c_str()); return 1; } - GenericVector model_data; + std::vector model_data; if (!tesseract::LoadDataFromFile(FLAGS_model.c_str(), &model_data)) { tprintf("Failed to load model from: %s\n", FLAGS_model.c_str()); return 1; From a00e7bc2bb8da731928c2f016c8746df289d9ff8 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:51:07 +0100 Subject: [PATCH 05/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/degradeimage.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/training/degradeimage.h b/src/training/degradeimage.h index 61146eac5..1d499e31b 100644 --- a/src/training/degradeimage.h +++ b/src/training/degradeimage.h @@ -20,7 +20,6 @@ #define TESSERACT_TRAINING_DEGRADEIMAGE_H_ #include -#include "genericvector.h" #include "helpers.h" // For TRand. #include "rect.h" From 37c9cf494046f96c57fdadd8c488baa4f15b8114 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:53:41 +0100 Subject: [PATCH 06/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/common/mastertrainer.cpp | 2 +- src/training/common/mastertrainer.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/training/common/mastertrainer.cpp b/src/training/common/mastertrainer.cpp index 46837e635..7a1b8172d 100644 --- a/src/training/common/mastertrainer.cpp +++ b/src/training/common/mastertrainer.cpp @@ -89,7 +89,7 @@ bool MasterTrainer::Serialize(FILE *fp) const { return false; if (!fontinfo_table_.Serialize(fp)) return false; - if (!xheights_.Serialize(fp)) + if (!tesseract::Serialize(fp, xheights_)) return false; return true; } diff --git a/src/training/common/mastertrainer.h b/src/training/common/mastertrainer.h index def748a89..f6b0104a1 100644 --- a/src/training/common/mastertrainer.h +++ b/src/training/common/mastertrainer.h @@ -261,7 +261,7 @@ private: // Font metrics gathered from multiple files. FontInfoTable fontinfo_table_; // Array of xheights indexed by font ids in fontinfo_table_; - GenericVector xheights_; + std::vector xheights_; // Non-serialized data initialized by other means or used temporarily // during loading of training samples. From 4d8e9dc659f8a07b2ca2ed62caeb0385fe985784 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 07:58:07 +0100 Subject: [PATCH 07/11] Replace more GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/common/mastertrainer.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/training/common/mastertrainer.cpp b/src/training/common/mastertrainer.cpp index 7a1b8172d..d92189e74 100644 --- a/src/training/common/mastertrainer.cpp +++ b/src/training/common/mastertrainer.cpp @@ -507,7 +507,7 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable *shape_table) { // must be clustered in order the fonts arrived, and reverse order of the // characters within each font. // Get a list of the fonts in the order they appeared. - GenericVector active_fonts; + std::vector active_fonts; int num_shapes = flat_shapes_.NumShapes(); for (int s = 0; s < num_shapes; ++s) { int font = flat_shapes_.GetShape(s)[0].font_ids[0]; @@ -547,7 +547,7 @@ CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table, shape_map.SetMap(shape_id, true); shape_map.Setup(); // Reverse the order of the samples to match the previous behavior. - GenericVector sample_ptrs; + std::vector sample_ptrs; SampleIterator it; it.Init(&shape_map, &shape_table, false, &samples_); for (it.Begin(); !it.AtEnd(); it.Next()) { @@ -588,10 +588,10 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset, // Now write pffmtable. This is complicated by the fact that the adaptive // classifier still wants one indexed by unichar-id, but the static // classifier needs one indexed by its shape class id. - // We put the shapetable_cutoffs in a GenericVector, and compute the + // We put the shapetable_cutoffs in a vector, and compute the // unicharset cutoffs along the way. - GenericVector shapetable_cutoffs; - GenericVector unichar_cutoffs; + std::vector shapetable_cutoffs; + std::vector unichar_cutoffs; for (int c = 0; c < unicharset.size(); ++c) unichar_cutoffs.push_back(0); /* then write out each class */ @@ -620,7 +620,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset, if (fp == nullptr) { tprintf("Error, failed to open file \"%s\"\n", pffmtable_file); } else { - shapetable_cutoffs.Serialize(fp); + tesseract::Serialize(fp, shapetable_cutoffs); for (int c = 0; c < unicharset.size(); ++c) { const char *unichar = unicharset.id_to_unichar(c); if (strcmp(unichar, " ") == 0) { @@ -894,7 +894,8 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float ShapeTable *shapes) { int num_shapes = shapes->NumShapes(); int max_merges = num_shapes - min_shapes; - auto *shape_dists = new GenericVector[num_shapes]; + // TODO: avoid new / delete. + auto *shape_dists = new std::vector[num_shapes]; float min_dist = kInfiniteDist; int min_s1 = 0; int min_s2 = 0; From 7df1cb0babfc4b988c175de857ae0d2f5f2fd89e Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 08:00:19 +0100 Subject: [PATCH 08/11] Replace remaining GenericVector by std::vector for src/training Signed-off-by: Stefan Weil --- src/training/common/ctc.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/training/common/ctc.cpp b/src/training/common/ctc.cpp index 5a035c81c..6f8f3544a 100644 --- a/src/training/common/ctc.cpp +++ b/src/training/common/ctc.cpp @@ -2,7 +2,6 @@ // File: ctc.cpp // Description: Slightly improved standard CTC to compute the targets. // Author: Ray Smith -// Created: Wed Jul 13 15:50:06 PDT 2016 // // (C) Copyright 2016, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,7 +17,6 @@ #include "ctc.h" -#include "genericvector.h" #include "matrix.h" #include "network.h" #include "networkio.h" @@ -217,7 +215,7 @@ static int BestLabel(const GENERIC_2D_ARRAY &outputs, int t) { // to the network outputs. float CTC::CalculateBiasFraction() { // Compute output labels via basic decoding. - GenericVector output_labels; + std::vector output_labels; for (int t = 0; t < num_timesteps_; ++t) { int label = BestLabel(outputs_, t); while (t + 1 < num_timesteps_ && BestLabel(outputs_, t + 1) == label) @@ -226,8 +224,8 @@ float CTC::CalculateBiasFraction() { output_labels.push_back(label); } // Simple bag of labels error calculation. - GenericVector truth_counts(num_classes_, 0); - GenericVector output_counts(num_classes_, 0); + std::vector truth_counts(num_classes_, 0); + std::vector output_counts(num_classes_, 0); for (int l = 0; l < num_labels_; ++l) { ++truth_counts[labels_[l]]; } @@ -353,10 +351,10 @@ void CTC::NormalizeSequence(GENERIC_2D_ARRAY *probs) const { void CTC::LabelsToClasses(const GENERIC_2D_ARRAY &probs, NetworkIO *targets) const { // For each timestep compute the max prob for each class over all // instances of the class in the labels_. - GenericVector class_probs; + std::vector class_probs; for (int t = 0; t < num_timesteps_; ++t) { float *targets_t = targets->f(t); - class_probs.init_to_size(num_classes_, 0.0); + class_probs.resize(num_classes_); for (int u = 0; u < num_labels_; ++u) { double prob = probs(t, u); // Note that although Graves specifies sum over all labels of the same From a847e0f9b5918523a68f5ff30e07ea239ecea48b Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 16 Mar 2021 07:49:30 +0100 Subject: [PATCH 09/11] Replace remaining GenericVector by std::vector for src/classify Signed-off-by: Stefan Weil --- src/ccstruct/normalis.cpp | 1 + src/ccstruct/normalis.h | 1 + src/ccutil/indexmapbidi.cpp | 32 +++++++++++++---------- src/ccutil/indexmapbidi.h | 5 ++-- src/training/common/intfeaturemap.cpp | 1 - src/training/common/trainingsampleset.cpp | 1 + 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/ccstruct/normalis.cpp b/src/ccstruct/normalis.cpp index dd87c0d8f..b2f0f433f 100644 --- a/src/ccstruct/normalis.cpp +++ b/src/ccstruct/normalis.cpp @@ -356,6 +356,7 @@ void DENORM::LocalDenormTransform(const TPOINT &pt, TPOINT *original) const { original->x = IntCastRounded(float_result.x()); original->y = IntCastRounded(float_result.y()); } + void DENORM::LocalDenormTransform(const FCOORD &pt, FCOORD *original) const { FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_); if (x_map_ != nullptr && y_map_ != nullptr) { diff --git a/src/ccstruct/normalis.h b/src/ccstruct/normalis.h index f7398ed13..f190f5962 100644 --- a/src/ccstruct/normalis.h +++ b/src/ccstruct/normalis.h @@ -21,6 +21,7 @@ #include #include +#include struct Pix; diff --git a/src/ccutil/indexmapbidi.cpp b/src/ccutil/indexmapbidi.cpp index bdbc7995b..32f189a14 100644 --- a/src/ccutil/indexmapbidi.cpp +++ b/src/ccutil/indexmapbidi.cpp @@ -29,7 +29,11 @@ IndexMap::~IndexMap() = default; // Uses a binary search to find the result. For faster speed use // IndexMapBiDi, but that takes more memory. int IndexMap::SparseToCompact(int sparse_index) const { - int result = compact_map_.binary_search(sparse_index); + auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index); + if (pos > compact_map_.begin()) { + --pos; + } + auto result = pos - compact_map_.begin(); return compact_map_[result] == sparse_index ? result : -1; } @@ -45,7 +49,7 @@ void IndexMap::CopyFrom(const IndexMapBiDi &src) { // Writes to the given file. Returns false in case of error. bool IndexMap::Serialize(FILE *fp) const { - return tesseract::Serialize(fp, &sparse_size_) && compact_map_.Serialize(fp); + return tesseract::Serialize(fp, &sparse_size_) && tesseract::Serialize(fp, compact_map_); } // Reads from the given file. Returns false in case of error. @@ -60,7 +64,7 @@ bool IndexMap::DeSerialize(bool swap, FILE *fp) { if (sparse_size > UINT16_MAX) return false; sparse_size_ = sparse_size; - return compact_map_.DeSerialize(swap, fp); + return tesseract::DeSerialize(swap, fp, compact_map_); } // Destructor. @@ -85,7 +89,7 @@ void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) { // Call Setup immediately after, or make calls to SetMap first to adjust the // mapping and then call Setup before using the map. void IndexMapBiDi::Init(int size, bool all_mapped) { - sparse_map_.init_to_size(size, -1); + sparse_map_.resize(size, -1); if (all_mapped) { for (int i = 0; i < size; ++i) sparse_map_[i] = i; @@ -107,7 +111,7 @@ void IndexMapBiDi::Setup() { sparse_map_[i] = compact_size++; } } - compact_map_.init_to_size(compact_size, -1); + compact_map_.resize(compact_size, -1); for (int i = 0; i < sparse_map_.size(); ++i) { if (sparse_map_[i] >= 0) { compact_map_[sparse_map_[i]] = i; @@ -168,7 +172,7 @@ void IndexMapBiDi::CompleteMerges() { compact_size = compact_index + 1; } // Re-generate the compact_map leaving holes for unused indices. - compact_map_.init_to_size(compact_size, -1); + compact_map_.resize(compact_size, -1); for (int i = 0; i < sparse_map_.size(); ++i) { if (sparse_map_[i] >= 0) { if (compact_map_[sparse_map_[i]] == -1) @@ -177,8 +181,8 @@ void IndexMapBiDi::CompleteMerges() { } // Compact the compact_map, leaving tmp_compact_map saying where each // index went to in the compacted map. - GenericVector tmp_compact_map; - tmp_compact_map.init_to_size(compact_size, -1); + std::vector tmp_compact_map; + tmp_compact_map.resize(compact_size, -1); compact_size = 0; for (int i = 0; i < compact_map_.size(); ++i) { if (compact_map_[i] >= 0) { @@ -186,7 +190,7 @@ void IndexMapBiDi::CompleteMerges() { compact_map_[compact_size++] = compact_map_[i]; } } - compact_map_.truncate(compact_size); + compact_map_.resize(compact_size); // Now modify the entries in the sparse map to point to the new locations. for (int i = 0; i < sparse_map_.size(); ++i) { if (sparse_map_[i] >= 0) { @@ -202,14 +206,14 @@ bool IndexMapBiDi::Serialize(FILE *fp) const { // Make a vector containing the rest of the map. If the map is many-to-one // then each additional sparse entry needs to be stored. // Normally we store only the compact map to save space. - GenericVector remaining_pairs; + std::vector remaining_pairs; for (int i = 0; i < sparse_map_.size(); ++i) { if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) { remaining_pairs.push_back(i); remaining_pairs.push_back(sparse_map_[i]); } } - if (!remaining_pairs.Serialize(fp)) + if (!tesseract::Serialize(fp, remaining_pairs)) return false; return true; } @@ -219,10 +223,10 @@ bool IndexMapBiDi::Serialize(FILE *fp) const { bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) { if (!IndexMap::DeSerialize(swap, fp)) return false; - GenericVector remaining_pairs; - if (!remaining_pairs.DeSerialize(swap, fp)) + std::vector remaining_pairs; + if (!tesseract::DeSerialize(swap, fp, remaining_pairs)) return false; - sparse_map_.init_to_size(sparse_size_, -1); + sparse_map_.resize(sparse_size_, -1); for (int i = 0; i < compact_map_.size(); ++i) { sparse_map_[compact_map_[i]] = i; } diff --git a/src/ccutil/indexmapbidi.h b/src/ccutil/indexmapbidi.h index 48ac42311..0d15b4c52 100644 --- a/src/ccutil/indexmapbidi.h +++ b/src/ccutil/indexmapbidi.h @@ -2,7 +2,6 @@ // File: indexmapbidi.h // Description: Bi-directional mapping between a sparse and compact space. // Author: rays@google.com (Ray Smith) -// Created: Tue Apr 06 11:33:59 PDT 2010 // // (C) Copyright 2010, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -77,7 +76,7 @@ protected: int32_t sparse_size_; // The compact space covers integers in the range [0, compact_map_.size()-1]. // Each element contains the corresponding sparse index. - GenericVector compact_map_; + std::vector compact_map_; }; // Bidirectional many-to-one mapping between a sparse and a compact discrete @@ -170,7 +169,7 @@ private: } // Direct look-up of the compact index for each element in sparse space. - GenericVector sparse_map_; + std::vector sparse_map_; }; } // namespace tesseract. diff --git a/src/training/common/intfeaturemap.cpp b/src/training/common/intfeaturemap.cpp index ce6c1e2d2..da5f9625a 100644 --- a/src/training/common/intfeaturemap.cpp +++ b/src/training/common/intfeaturemap.cpp @@ -4,7 +4,6 @@ // File: intfeaturemap.cpp // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi // to provide a subspace mapping and fast feature lookup. -// Created: Tue Oct 26 08:58:30 PDT 2010 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/src/training/common/trainingsampleset.cpp b/src/training/common/trainingsampleset.cpp index 2822cd221..cfc3c5fe0 100644 --- a/src/training/common/trainingsampleset.cpp +++ b/src/training/common/trainingsampleset.cpp @@ -22,6 +22,7 @@ #include #include "boxread.h" #include "fontinfo.h" +//#include "helpers.h" #include "indexmapbidi.h" #include "intfeaturedist.h" #include "intfeaturemap.h" From 7fdf79aff4fa22682ed3404be2265719953765c3 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 12 Mar 2021 18:39:55 +0100 Subject: [PATCH 10/11] Move function ExtractFontName to baseapi.cpp It is only used there, so now a local function. This also allows removing blobclass.h. Signed-off-by: Stefan Weil --- Makefile.am | 1 - src/api/baseapi.cpp | 31 ++++++++++++++++++++++++++++--- src/classify/blobclass.cpp | 31 ------------------------------- src/classify/blobclass.h | 33 --------------------------------- 4 files changed, 28 insertions(+), 68 deletions(-) delete mode 100644 src/classify/blobclass.h diff --git a/Makefile.am b/Makefile.am index efd0199bd..9f17fe06f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -387,7 +387,6 @@ endif noinst_HEADERS += src/classify/classify.h if !DISABLED_LEGACY_ENGINE noinst_HEADERS += src/classify/adaptive.h -noinst_HEADERS += src/classify/blobclass.h noinst_HEADERS += src/classify/cluster.h noinst_HEADERS += src/classify/clusttool.h noinst_HEADERS += src/classify/featdefs.h diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 61b7b390a..d1383693b 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -23,9 +23,6 @@ # include "config_auto.h" #endif -#ifndef DISABLED_LEGACY_ENGINE -# include "blobclass.h" // for ExtractFontName -#endif #include "boxword.h" // for BoxWord #include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST #include "dawg_cache.h" // for DawgCache @@ -125,6 +122,34 @@ static const char *kOldVarsFile = "failed_vars.txt"; /** Max string length of an int. */ const int kMaxIntSize = 22; +#ifndef DISABLED_LEGACY_ENGINE +static const char kUnknownFontName[] = "UnknownFont"; + +static STRING_VAR(classify_font_name, kUnknownFontName, + "Default font name to be used in training"); + +// Finds the name of the training font and returns it in fontname, by cutting +// it out based on the expectation that the filename is of the form: +// /path/to/dir/[lang].[fontname].exp[num] +// The [lang], [fontname] and [num] fields should not have '.' characters. +// If the global parameter classify_font_name is set, its value is used instead. +static void ExtractFontName(const char* filename, std::string* fontname) { + *fontname = classify_font_name; + if (*fontname == kUnknownFontName) { + // filename is expected to be of the form [lang].[fontname].exp[num] + // The [lang], [fontname] and [num] fields should not have '.' characters. + const char *basename = strrchr(filename, '/'); + const char *firstdot = strchr(basename ? basename : filename, '.'); + const char *lastdot = strrchr(filename, '.'); + if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) { + ++firstdot; + *fontname = firstdot; + fontname->resize(lastdot - firstdot); + } + } +} +#endif + /* Add all available languages recursively. */ static void addAvailableLanguages(const std::string &datadir, const std::string &base, diff --git a/src/classify/blobclass.cpp b/src/classify/blobclass.cpp index bac156577..501d069a3 100644 --- a/src/classify/blobclass.cpp +++ b/src/classify/blobclass.cpp @@ -15,8 +15,6 @@ ** limitations under the License. ******************************************************************************/ -#include "blobclass.h" - #include #include "classify.h" @@ -26,35 +24,6 @@ namespace tesseract { -static const char kUnknownFontName[] = "UnknownFont"; - -static STRING_VAR(classify_font_name, kUnknownFontName, "Default font name to be used in training"); - -/**---------------------------------------------------------------------------- - Public Code -----------------------------------------------------------------------------**/ - -// Finds the name of the training font and returns it in fontname, by cutting -// it out based on the expectation that the filename is of the form: -// /path/to/dir/[lang].[fontname].exp[num] -// The [lang], [fontname] and [num] fields should not have '.' characters. -// If the global parameter classify_font_name is set, its value is used instead. -void ExtractFontName(const char *filename, std::string *fontname) { - *fontname = classify_font_name; - if (*fontname == kUnknownFontName) { - // filename is expected to be of the form [lang].[fontname].exp[num] - // The [lang], [fontname] and [num] fields should not have '.' characters. - const char *basename = strrchr(filename, '/'); - const char *firstdot = strchr(basename ? basename : filename, '.'); - const char *lastdot = strrchr(filename, '.'); - if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) { - ++firstdot; - *fontname = firstdot; - fontname->resize(lastdot - firstdot); - } - } -} - /*---------------------------------------------------------------------------*/ // Extracts features from the given blob and saves them in the tr_file_data_ diff --git a/src/classify/blobclass.h b/src/classify/blobclass.h deleted file mode 100644 index 3bc9cfd88..000000000 --- a/src/classify/blobclass.h +++ /dev/null @@ -1,33 +0,0 @@ -/****************************************************************************** - ** Filename: blobclass.h - ** Purpose: Interface to high level classification and training. - ** Author: Dan Johnson - ** - ** (c) Copyright Hewlett-Packard Company, 1988. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - ******************************************************************************/ - -#ifndef BLOBCLASS_H -#define BLOBCLASS_H - -#include - -namespace tesseract { -// Finds the name of the training font and returns it in fontname, by cutting -// it out based on the expectation that the filename is of the form: -// /path/to/dir/[lang].[fontname].exp[num] -// The [lang], [fontname] and [num] fields should not have '.' characters. -// If the global parameter classify_font_name is set, its value is used instead. -void ExtractFontName(const char *filename, std::string *fontname); - -} // namespace tesseract. - -#endif From 77dbd3ee02f3a254a6a076bb156114bed7775a26 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Mar 2021 09:04:39 +0100 Subject: [PATCH 11/11] Remove two type casts Signed-off-by: Stefan Weil --- src/textord/makerow.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index 5aae5643e..6ef0c7eda 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -126,13 +126,8 @@ static int row_y_order( // sort function * Qsort style function to compare 2 TO_ROWS based on their spacing value. */ static int row_spacing_order( // sort function - const void *item1, // items to compare - const void *item2) { - // converted ptr - const TO_ROW *row1 = *reinterpret_cast(item1); - // converted ptr - const TO_ROW *row2 = *reinterpret_cast(item2); - + const TO_ROW *row1, // items to compare + const TO_ROW *row2) { return row1->spacing < row2->spacing; }