mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-22 09:53:03 +08:00
Merge pull request #3338 from stweil/vector
Replace more GenericVector by std::vector
This commit is contained in:
commit
18017a1334
@ -387,7 +387,6 @@ endif
|
||||
noinst_HEADERS += src/classify/classify.h
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
noinst_HEADERS += src/classify/adaptive.h
|
||||
noinst_HEADERS += src/classify/blobclass.h
|
||||
noinst_HEADERS += src/classify/cluster.h
|
||||
noinst_HEADERS += src/classify/clusttool.h
|
||||
noinst_HEADERS += src/classify/featdefs.h
|
||||
|
@ -23,9 +23,6 @@
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
# include "blobclass.h" // for ExtractFontName
|
||||
#endif
|
||||
#include "boxword.h" // for BoxWord
|
||||
#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST
|
||||
#include "dawg_cache.h" // for DawgCache
|
||||
@ -125,6 +122,34 @@ static const char *kOldVarsFile = "failed_vars.txt";
|
||||
/** Max string length of an int. */
|
||||
const int kMaxIntSize = 22;
|
||||
|
||||
#ifndef DISABLED_LEGACY_ENGINE
|
||||
static const char kUnknownFontName[] = "UnknownFont";
|
||||
|
||||
static STRING_VAR(classify_font_name, kUnknownFontName,
|
||||
"Default font name to be used in training");
|
||||
|
||||
// Finds the name of the training font and returns it in fontname, by cutting
|
||||
// it out based on the expectation that the filename is of the form:
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
static void ExtractFontName(const char* filename, std::string* fontname) {
|
||||
*fontname = classify_font_name;
|
||||
if (*fontname == kUnknownFontName) {
|
||||
// filename is expected to be of the form [lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
const char *basename = strrchr(filename, '/');
|
||||
const char *firstdot = strchr(basename ? basename : filename, '.');
|
||||
const char *lastdot = strrchr(filename, '.');
|
||||
if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
|
||||
++firstdot;
|
||||
*fontname = firstdot;
|
||||
fontname->resize(lastdot - firstdot);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Add all available languages recursively.
|
||||
*/
|
||||
static void addAvailableLanguages(const std::string &datadir, const std::string &base,
|
||||
|
@ -356,6 +356,7 @@ void DENORM::LocalDenormTransform(const TPOINT &pt, TPOINT *original) const {
|
||||
original->x = IntCastRounded(float_result.x());
|
||||
original->y = IntCastRounded(float_result.y());
|
||||
}
|
||||
|
||||
void DENORM::LocalDenormTransform(const FCOORD &pt, FCOORD *original) const {
|
||||
FCOORD rotated(pt.x() - final_xshift_, pt.y() - final_yshift_);
|
||||
if (x_map_ != nullptr && y_map_ != nullptr) {
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <tesseract/export.h>
|
||||
#include <vector>
|
||||
|
||||
struct Pix;
|
||||
|
||||
|
@ -29,7 +29,11 @@ IndexMap::~IndexMap() = default;
|
||||
// Uses a binary search to find the result. For faster speed use
|
||||
// IndexMapBiDi, but that takes more memory.
|
||||
int IndexMap::SparseToCompact(int sparse_index) const {
|
||||
int result = compact_map_.binary_search(sparse_index);
|
||||
auto pos = std::upper_bound(compact_map_.begin(), compact_map_.end(), sparse_index);
|
||||
if (pos > compact_map_.begin()) {
|
||||
--pos;
|
||||
}
|
||||
auto result = pos - compact_map_.begin();
|
||||
return compact_map_[result] == sparse_index ? result : -1;
|
||||
}
|
||||
|
||||
@ -45,7 +49,7 @@ void IndexMap::CopyFrom(const IndexMapBiDi &src) {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool IndexMap::Serialize(FILE *fp) const {
|
||||
return tesseract::Serialize(fp, &sparse_size_) && compact_map_.Serialize(fp);
|
||||
return tesseract::Serialize(fp, &sparse_size_) && tesseract::Serialize(fp, compact_map_);
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
@ -60,7 +64,7 @@ bool IndexMap::DeSerialize(bool swap, FILE *fp) {
|
||||
if (sparse_size > UINT16_MAX)
|
||||
return false;
|
||||
sparse_size_ = sparse_size;
|
||||
return compact_map_.DeSerialize(swap, fp);
|
||||
return tesseract::DeSerialize(swap, fp, compact_map_);
|
||||
}
|
||||
|
||||
// Destructor.
|
||||
@ -85,7 +89,7 @@ void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
|
||||
// Call Setup immediately after, or make calls to SetMap first to adjust the
|
||||
// mapping and then call Setup before using the map.
|
||||
void IndexMapBiDi::Init(int size, bool all_mapped) {
|
||||
sparse_map_.init_to_size(size, -1);
|
||||
sparse_map_.resize(size, -1);
|
||||
if (all_mapped) {
|
||||
for (int i = 0; i < size; ++i)
|
||||
sparse_map_[i] = i;
|
||||
@ -107,7 +111,7 @@ void IndexMapBiDi::Setup() {
|
||||
sparse_map_[i] = compact_size++;
|
||||
}
|
||||
}
|
||||
compact_map_.init_to_size(compact_size, -1);
|
||||
compact_map_.resize(compact_size, -1);
|
||||
for (int i = 0; i < sparse_map_.size(); ++i) {
|
||||
if (sparse_map_[i] >= 0) {
|
||||
compact_map_[sparse_map_[i]] = i;
|
||||
@ -168,7 +172,7 @@ void IndexMapBiDi::CompleteMerges() {
|
||||
compact_size = compact_index + 1;
|
||||
}
|
||||
// Re-generate the compact_map leaving holes for unused indices.
|
||||
compact_map_.init_to_size(compact_size, -1);
|
||||
compact_map_.resize(compact_size, -1);
|
||||
for (int i = 0; i < sparse_map_.size(); ++i) {
|
||||
if (sparse_map_[i] >= 0) {
|
||||
if (compact_map_[sparse_map_[i]] == -1)
|
||||
@ -177,8 +181,8 @@ void IndexMapBiDi::CompleteMerges() {
|
||||
}
|
||||
// Compact the compact_map, leaving tmp_compact_map saying where each
|
||||
// index went to in the compacted map.
|
||||
GenericVector<int32_t> tmp_compact_map;
|
||||
tmp_compact_map.init_to_size(compact_size, -1);
|
||||
std::vector<int32_t> tmp_compact_map;
|
||||
tmp_compact_map.resize(compact_size, -1);
|
||||
compact_size = 0;
|
||||
for (int i = 0; i < compact_map_.size(); ++i) {
|
||||
if (compact_map_[i] >= 0) {
|
||||
@ -186,7 +190,7 @@ void IndexMapBiDi::CompleteMerges() {
|
||||
compact_map_[compact_size++] = compact_map_[i];
|
||||
}
|
||||
}
|
||||
compact_map_.truncate(compact_size);
|
||||
compact_map_.resize(compact_size);
|
||||
// Now modify the entries in the sparse map to point to the new locations.
|
||||
for (int i = 0; i < sparse_map_.size(); ++i) {
|
||||
if (sparse_map_[i] >= 0) {
|
||||
@ -202,14 +206,14 @@ bool IndexMapBiDi::Serialize(FILE *fp) const {
|
||||
// Make a vector containing the rest of the map. If the map is many-to-one
|
||||
// then each additional sparse entry needs to be stored.
|
||||
// Normally we store only the compact map to save space.
|
||||
GenericVector<int32_t> remaining_pairs;
|
||||
std::vector<int32_t> remaining_pairs;
|
||||
for (int i = 0; i < sparse_map_.size(); ++i) {
|
||||
if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
|
||||
remaining_pairs.push_back(i);
|
||||
remaining_pairs.push_back(sparse_map_[i]);
|
||||
}
|
||||
}
|
||||
if (!remaining_pairs.Serialize(fp))
|
||||
if (!tesseract::Serialize(fp, remaining_pairs))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -219,10 +223,10 @@ bool IndexMapBiDi::Serialize(FILE *fp) const {
|
||||
bool IndexMapBiDi::DeSerialize(bool swap, FILE *fp) {
|
||||
if (!IndexMap::DeSerialize(swap, fp))
|
||||
return false;
|
||||
GenericVector<int32_t> remaining_pairs;
|
||||
if (!remaining_pairs.DeSerialize(swap, fp))
|
||||
std::vector<int32_t> remaining_pairs;
|
||||
if (!tesseract::DeSerialize(swap, fp, remaining_pairs))
|
||||
return false;
|
||||
sparse_map_.init_to_size(sparse_size_, -1);
|
||||
sparse_map_.resize(sparse_size_, -1);
|
||||
for (int i = 0; i < compact_map_.size(); ++i) {
|
||||
sparse_map_[compact_map_[i]] = i;
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
// File: indexmapbidi.h
|
||||
// Description: Bi-directional mapping between a sparse and compact space.
|
||||
// Author: rays@google.com (Ray Smith)
|
||||
// Created: Tue Apr 06 11:33:59 PDT 2010
|
||||
//
|
||||
// (C) Copyright 2010, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -77,7 +76,7 @@ protected:
|
||||
int32_t sparse_size_;
|
||||
// The compact space covers integers in the range [0, compact_map_.size()-1].
|
||||
// Each element contains the corresponding sparse index.
|
||||
GenericVector<int32_t> compact_map_;
|
||||
std::vector<int32_t> compact_map_;
|
||||
};
|
||||
|
||||
// Bidirectional many-to-one mapping between a sparse and a compact discrete
|
||||
@ -170,7 +169,7 @@ private:
|
||||
}
|
||||
|
||||
// Direct look-up of the compact index for each element in sparse space.
|
||||
GenericVector<int32_t> sparse_map_;
|
||||
std::vector<int32_t> sparse_map_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -15,8 +15,6 @@
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#include "blobclass.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "classify.h"
|
||||
@ -26,35 +24,6 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
static const char kUnknownFontName[] = "UnknownFont";
|
||||
|
||||
static STRING_VAR(classify_font_name, kUnknownFontName, "Default font name to be used in training");
|
||||
|
||||
/**----------------------------------------------------------------------------
|
||||
Public Code
|
||||
----------------------------------------------------------------------------**/
|
||||
|
||||
// Finds the name of the training font and returns it in fontname, by cutting
|
||||
// it out based on the expectation that the filename is of the form:
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
void ExtractFontName(const char *filename, std::string *fontname) {
|
||||
*fontname = classify_font_name;
|
||||
if (*fontname == kUnknownFontName) {
|
||||
// filename is expected to be of the form [lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
const char *basename = strrchr(filename, '/');
|
||||
const char *firstdot = strchr(basename ? basename : filename, '.');
|
||||
const char *lastdot = strrchr(filename, '.');
|
||||
if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
|
||||
++firstdot;
|
||||
*fontname = firstdot;
|
||||
fontname->resize(lastdot - firstdot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
// Extracts features from the given blob and saves them in the tr_file_data_
|
||||
|
@ -1,33 +0,0 @@
|
||||
/******************************************************************************
|
||||
** Filename: blobclass.h
|
||||
** Purpose: Interface to high level classification and training.
|
||||
** Author: Dan Johnson
|
||||
**
|
||||
** (c) Copyright Hewlett-Packard Company, 1988.
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
** You may obtain a copy of the License at
|
||||
** http://www.apache.org/licenses/LICENSE-2.0
|
||||
** Unless required by applicable law or agreed to in writing, software
|
||||
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
** See the License for the specific language governing permissions and
|
||||
** limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef BLOBCLASS_H
|
||||
#define BLOBCLASS_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace tesseract {
|
||||
// Finds the name of the training font and returns it in fontname, by cutting
|
||||
// it out based on the expectation that the filename is of the form:
|
||||
// /path/to/dir/[lang].[fontname].exp[num]
|
||||
// The [lang], [fontname] and [num] fields should not have '.' characters.
|
||||
// If the global parameter classify_font_name is set, its value is used instead.
|
||||
void ExtractFontName(const char *filename, std::string *fontname);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif
|
@ -126,13 +126,8 @@ static int row_y_order( // sort function
|
||||
* Qsort style function to compare 2 TO_ROWS based on their spacing value.
|
||||
*/
|
||||
static int row_spacing_order( // sort function
|
||||
const void *item1, // items to compare
|
||||
const void *item2) {
|
||||
// converted ptr
|
||||
const TO_ROW *row1 = *reinterpret_cast<const TO_ROW *const *>(item1);
|
||||
// converted ptr
|
||||
const TO_ROW *row2 = *reinterpret_cast<const TO_ROW *const *>(item2);
|
||||
|
||||
const TO_ROW *row1, // items to compare
|
||||
const TO_ROW *row2) {
|
||||
return row1->spacing < row2->spacing;
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ int main(int argc, char *argv[]) {
|
||||
pCharList = CharList;
|
||||
// The norm protos will count the source protos, so we keep them here in
|
||||
// freeable_protos, so they can be freed later.
|
||||
GenericVector<LIST> freeable_protos;
|
||||
std::vector<LIST> freeable_protos;
|
||||
iterate(pCharList) {
|
||||
// Cluster
|
||||
CharSample = reinterpret_cast<LABELEDLIST> first_node(pCharList);
|
||||
|
@ -2,7 +2,6 @@
|
||||
// File: ctc.cpp
|
||||
// Description: Slightly improved standard CTC to compute the targets.
|
||||
// Author: Ray Smith
|
||||
// Created: Wed Jul 13 15:50:06 PDT 2016
|
||||
//
|
||||
// (C) Copyright 2016, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -18,7 +17,6 @@
|
||||
|
||||
#include "ctc.h"
|
||||
|
||||
#include "genericvector.h"
|
||||
#include "matrix.h"
|
||||
#include "network.h"
|
||||
#include "networkio.h"
|
||||
@ -217,7 +215,7 @@ static int BestLabel(const GENERIC_2D_ARRAY<float> &outputs, int t) {
|
||||
// to the network outputs.
|
||||
float CTC::CalculateBiasFraction() {
|
||||
// Compute output labels via basic decoding.
|
||||
GenericVector<int> output_labels;
|
||||
std::vector<int> output_labels;
|
||||
for (int t = 0; t < num_timesteps_; ++t) {
|
||||
int label = BestLabel(outputs_, t);
|
||||
while (t + 1 < num_timesteps_ && BestLabel(outputs_, t + 1) == label)
|
||||
@ -226,8 +224,8 @@ float CTC::CalculateBiasFraction() {
|
||||
output_labels.push_back(label);
|
||||
}
|
||||
// Simple bag of labels error calculation.
|
||||
GenericVector<int> truth_counts(num_classes_, 0);
|
||||
GenericVector<int> output_counts(num_classes_, 0);
|
||||
std::vector<int> truth_counts(num_classes_, 0);
|
||||
std::vector<int> output_counts(num_classes_, 0);
|
||||
for (int l = 0; l < num_labels_; ++l) {
|
||||
++truth_counts[labels_[l]];
|
||||
}
|
||||
@ -353,10 +351,10 @@ void CTC::NormalizeSequence(GENERIC_2D_ARRAY<double> *probs) const {
|
||||
void CTC::LabelsToClasses(const GENERIC_2D_ARRAY<double> &probs, NetworkIO *targets) const {
|
||||
// For each timestep compute the max prob for each class over all
|
||||
// instances of the class in the labels_.
|
||||
GenericVector<double> class_probs;
|
||||
std::vector<double> class_probs;
|
||||
for (int t = 0; t < num_timesteps_; ++t) {
|
||||
float *targets_t = targets->f(t);
|
||||
class_probs.init_to_size(num_classes_, 0.0);
|
||||
class_probs.resize(num_classes_);
|
||||
for (int u = 0; u < num_labels_; ++u) {
|
||||
double prob = probs(t, u);
|
||||
// Note that although Graves specifies sum over all labels of the same
|
||||
|
@ -97,7 +97,7 @@ double IntFeatureDist::FeatureDistance(const std::vector<int> &features) const {
|
||||
|
||||
// Compute the distance between the given feature vector and the last
|
||||
// Set feature vector.
|
||||
double IntFeatureDist::DebugFeatureDistance(const GenericVector<int> &features) const {
|
||||
double IntFeatureDist::DebugFeatureDistance(const std::vector<int> &features) const {
|
||||
const int num_test_features = features.size();
|
||||
const double denominator = total_feature_weight_ + num_test_features;
|
||||
double misses = denominator;
|
||||
|
@ -19,7 +19,7 @@
|
||||
#ifndef TESSERACT_CLASSIFY_INTFEATUREDIST_H_
|
||||
#define TESSERACT_CLASSIFY_INTFEATUREDIST_H_
|
||||
|
||||
#include "genericvector.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -53,7 +53,7 @@ public:
|
||||
// Compute the distance between the given feature vector and the last
|
||||
// Set feature vector.
|
||||
double FeatureDistance(const std::vector<int> &features) const;
|
||||
double DebugFeatureDistance(const GenericVector<int> &features) const;
|
||||
double DebugFeatureDistance(const std::vector<int> &features) const;
|
||||
|
||||
private:
|
||||
// Clear all data.
|
||||
|
@ -4,7 +4,6 @@
|
||||
// File: intfeaturemap.cpp
|
||||
// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
|
||||
// to provide a subspace mapping and fast feature lookup.
|
||||
// Created: Tue Oct 26 08:58:30 PDT 2010
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -133,7 +132,7 @@ int IntFeatureMap::FindNZFeatureMapping(SampleIterator *it) {
|
||||
int total_samples = 0;
|
||||
for (it->Begin(); !it->AtEnd(); it->Next()) {
|
||||
const TrainingSample &sample = it->GetSample();
|
||||
GenericVector<int> features;
|
||||
std::vector<int> features;
|
||||
feature_space_.IndexAndSortFeatures(sample.features(), sample.num_features(), &features);
|
||||
int num_features = features.size();
|
||||
for (int f = 0; f < num_features; ++f)
|
||||
@ -164,7 +163,7 @@ int IntFeatureMap::FinalizeMapping(SampleIterator *it) {
|
||||
}
|
||||
|
||||
// Prints the map features from the set in human-readable form.
|
||||
void IntFeatureMap::DebugMapFeatures(const GenericVector<int> &map_features) const {
|
||||
void IntFeatureMap::DebugMapFeatures(const std::vector<int> &map_features) const {
|
||||
for (int i = 0; i < map_features.size(); ++i) {
|
||||
INT_FEATURE_STRUCT f = InverseMapFeature(map_features[i]);
|
||||
f.print();
|
||||
|
@ -116,7 +116,7 @@ public:
|
||||
}
|
||||
|
||||
// Prints the map features from the set in human-readable form.
|
||||
void DebugMapFeatures(const GenericVector<int> &map_features) const;
|
||||
void DebugMapFeatures(const std::vector<int> &map_features) const;
|
||||
|
||||
private:
|
||||
void Clear();
|
||||
|
@ -89,7 +89,7 @@ bool MasterTrainer::Serialize(FILE *fp) const {
|
||||
return false;
|
||||
if (!fontinfo_table_.Serialize(fp))
|
||||
return false;
|
||||
if (!xheights_.Serialize(fp))
|
||||
if (!tesseract::Serialize(fp, xheights_))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -507,7 +507,7 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable *shape_table) {
|
||||
// must be clustered in order the fonts arrived, and reverse order of the
|
||||
// characters within each font.
|
||||
// Get a list of the fonts in the order they appeared.
|
||||
GenericVector<int> active_fonts;
|
||||
std::vector<int> active_fonts;
|
||||
int num_shapes = flat_shapes_.NumShapes();
|
||||
for (int s = 0; s < num_shapes; ++s) {
|
||||
int font = flat_shapes_.GetShape(s)[0].font_ids[0];
|
||||
@ -547,7 +547,7 @@ CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table,
|
||||
shape_map.SetMap(shape_id, true);
|
||||
shape_map.Setup();
|
||||
// Reverse the order of the samples to match the previous behavior.
|
||||
GenericVector<const TrainingSample *> sample_ptrs;
|
||||
std::vector<const TrainingSample *> sample_ptrs;
|
||||
SampleIterator it;
|
||||
it.Init(&shape_map, &shape_table, false, &samples_);
|
||||
for (it.Begin(); !it.AtEnd(); it.Next()) {
|
||||
@ -588,10 +588,10 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
|
||||
// Now write pffmtable. This is complicated by the fact that the adaptive
|
||||
// classifier still wants one indexed by unichar-id, but the static
|
||||
// classifier needs one indexed by its shape class id.
|
||||
// We put the shapetable_cutoffs in a GenericVector, and compute the
|
||||
// We put the shapetable_cutoffs in a vector, and compute the
|
||||
// unicharset cutoffs along the way.
|
||||
GenericVector<uint16_t> shapetable_cutoffs;
|
||||
GenericVector<uint16_t> unichar_cutoffs;
|
||||
std::vector<uint16_t> shapetable_cutoffs;
|
||||
std::vector<uint16_t> unichar_cutoffs;
|
||||
for (int c = 0; c < unicharset.size(); ++c)
|
||||
unichar_cutoffs.push_back(0);
|
||||
/* then write out each class */
|
||||
@ -620,7 +620,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
|
||||
if (fp == nullptr) {
|
||||
tprintf("Error, failed to open file \"%s\"\n", pffmtable_file);
|
||||
} else {
|
||||
shapetable_cutoffs.Serialize(fp);
|
||||
tesseract::Serialize(fp, shapetable_cutoffs);
|
||||
for (int c = 0; c < unicharset.size(); ++c) {
|
||||
const char *unichar = unicharset.id_to_unichar(c);
|
||||
if (strcmp(unichar, " ") == 0) {
|
||||
@ -894,7 +894,8 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float
|
||||
ShapeTable *shapes) {
|
||||
int num_shapes = shapes->NumShapes();
|
||||
int max_merges = num_shapes - min_shapes;
|
||||
auto *shape_dists = new GenericVector<ShapeDist>[num_shapes];
|
||||
// TODO: avoid new / delete.
|
||||
auto *shape_dists = new std::vector<ShapeDist>[num_shapes];
|
||||
float min_dist = kInfiniteDist;
|
||||
int min_s1 = 0;
|
||||
int min_s2 = 0;
|
||||
|
@ -261,7 +261,7 @@ private:
|
||||
// Font metrics gathered from multiple files.
|
||||
FontInfoTable fontinfo_table_;
|
||||
// Array of xheights indexed by font ids in fontinfo_table_;
|
||||
GenericVector<int32_t> xheights_;
|
||||
std::vector<int32_t> xheights_;
|
||||
|
||||
// Non-serialized data initialized by other means or used temporarily
|
||||
// during loading of training samples.
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <allheaders.h>
|
||||
#include "boxread.h"
|
||||
#include "fontinfo.h"
|
||||
//#include "helpers.h"
|
||||
#include "indexmapbidi.h"
|
||||
#include "intfeaturedist.h"
|
||||
#include "intfeaturemap.h"
|
||||
|
@ -3,7 +3,6 @@
|
||||
* Description: Function to degrade an image (usually of text) as if it
|
||||
* has been printed and then scanned.
|
||||
* Authors: Ray Smith
|
||||
* Created: Tue Nov 19 2013
|
||||
*
|
||||
* (C) Copyright 2013, Google Inc.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -22,7 +21,6 @@
|
||||
|
||||
#include <allheaders.h> // from leptonica
|
||||
#include <cstdlib>
|
||||
#include "genericvector.h"
|
||||
#include "helpers.h" // For TRand.
|
||||
#include "rect.h"
|
||||
|
||||
@ -175,7 +173,7 @@ Pix *DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
|
||||
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
||||
Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise,
|
||||
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
|
||||
GenericVector<TBOX> *boxes) {
|
||||
std::vector<TBOX> *boxes) {
|
||||
Pix *distorted = pixCopy(nullptr, const_cast<Pix *>(pix));
|
||||
// Things to do to synthetic training data.
|
||||
if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
|
||||
@ -214,7 +212,7 @@ Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool whi
|
||||
// perspective distortion. Width and height only need to be set if there
|
||||
// is no pix. If there is a pix, then they will be taken from there.
|
||||
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix,
|
||||
GenericVector<TBOX> *boxes) {
|
||||
std::vector<TBOX> *boxes) {
|
||||
if (pix != nullptr && *pix != nullptr) {
|
||||
width = pixGetWidth(*pix);
|
||||
height = pixGetHeight(*pix);
|
||||
|
@ -3,7 +3,6 @@
|
||||
* Description: Function to degrade an image (usually of text) as if it
|
||||
* has been printed and then scanned.
|
||||
* Authors: Ray Smith
|
||||
* Created: Tue Nov 19 2013
|
||||
*
|
||||
* (C) Copyright 2013, Google Inc.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -21,7 +20,6 @@
|
||||
#define TESSERACT_TRAINING_DEGRADEIMAGE_H_
|
||||
|
||||
#include <allheaders.h>
|
||||
#include "genericvector.h"
|
||||
#include "helpers.h" // For TRand.
|
||||
#include "rect.h"
|
||||
|
||||
@ -41,12 +39,12 @@ struct Pix *DegradeImage(struct Pix *input, int exposure, TRand *randomizer, flo
|
||||
// Returns nullptr on error. The returned Pix must be pixDestroyed.
|
||||
Pix *PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise,
|
||||
bool smooth_noise, bool blur, int box_reduction, TRand *randomizer,
|
||||
GenericVector<TBOX> *boxes);
|
||||
std::vector<TBOX> *boxes);
|
||||
// Distorts anything that has a non-null pointer with the same pseudo-random
|
||||
// perspective distortion. Width and height only need to be set if there
|
||||
// is no pix. If there is a pix, then they will be taken from there.
|
||||
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix,
|
||||
GenericVector<TBOX> *boxes);
|
||||
std::vector<TBOX> *boxes);
|
||||
// Computes the coefficients of a randomized projective transformation.
|
||||
// The image transform requires backward transformation coefficient, and the
|
||||
// box transform the forward coefficients.
|
||||
|
@ -16,7 +16,6 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "commontraining.h"
|
||||
#include "genericvector.h"
|
||||
#include "lstmtester.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
@ -52,7 +51,7 @@ int main(int argc, char **argv) {
|
||||
tprintf("Failed to load language model from %s!\n", FLAGS_traineddata.c_str());
|
||||
return 1;
|
||||
}
|
||||
GenericVector<char> model_data;
|
||||
std::vector<char> model_data;
|
||||
if (!tesseract::LoadDataFromFile(FLAGS_model.c_str(), &model_data)) {
|
||||
tprintf("Failed to load model from: %s\n", FLAGS_model.c_str());
|
||||
return 1;
|
||||
|
Loading…
Reference in New Issue
Block a user