Merge pull request #3202 from stweil/fix2

Fix unittests
This commit is contained in:
Egor Pugin 2020-12-30 11:55:35 +03:00 committed by GitHub
commit 17b5f46385
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 469 additions and 284 deletions

View File

@ -38,7 +38,6 @@
#include <cstdio>
#include <functional> // for std::function
#include <list> // for std::list
#include <vector> // for std::vector
struct Pix;
@ -817,7 +816,7 @@ class TESS_API TessBaseAPI {
EquationDetect* equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module.
std::list<ParagraphModel*>* paragraph_models_;
std::vector<ParagraphModel*>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.

View File

@ -937,7 +937,7 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
while (page_res_it.word() != nullptr) {
WERD_RES *word_res = page_res_it.word();
GenericVector<TBOX> boxes;
std::vector<TBOX> boxes;
tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
page_res_it.row()->row, word_res);
page_res_it.forward();
@ -1844,7 +1844,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
if (text[t] != '\0' || wordstr[w] != '\0') {
// No match.
delete page_res_;
GenericVector<TBOX> boxes;
std::vector<TBOX> boxes;
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
tesseract_->ReSegmentByClassification(page_res_);
tesseract_->TidyUp(page_res_);
@ -2291,10 +2291,10 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
int debug_level = 0;
GetIntVariable("paragraph_debug_level", &debug_level);
if (paragraph_models_ == nullptr)
paragraph_models_ = new std::list<ParagraphModel*>;
paragraph_models_ = new std::vector<ParagraphModel*>;
MutableIterator *result_it = GetMutableIterator();
do { // Detect paragraphs for this block
std::list<ParagraphModel *> models;
std::vector<ParagraphModel *> models;
::tesseract::DetectParagraphs(debug_level, after_text_recognition,
result_it, &models);
paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end());

View File

@ -113,8 +113,8 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
PAGE_RES* Tesseract::ApplyBoxes(const char* filename,
bool find_segmentation,
BLOCK_LIST *block_list) {
GenericVector<TBOX> boxes;
GenericVector<STRING> texts, full_texts;
std::vector<TBOX> boxes;
std::vector<STRING> texts, full_texts;
if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts,
nullptr)) {
return nullptr; // Can't do it.
@ -205,7 +205,7 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
/// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
PAGE_RES* Tesseract::SetupApplyBoxes(const std::vector<TBOX>& boxes,
BLOCK_LIST *block_list) {
PreenXHeights(block_list);
// Strip all fuzzy space markers to simplify the PAGE_RES.
@ -241,7 +241,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
/// Tests the chopper by exhaustively running chop_one_blob.
/// The word_res will contain filled chopped_word, seam_array, denorm,
/// box_word and best_state for the maximally chopped word.
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
void Tesseract::MaximallyChopWord(const std::vector<TBOX>& boxes,
BLOCK* block, ROW* row,
WERD_RES* word_res) {
if (!word_res->SetupForRecognition(unicharset, this, BestPix(),

View File

@ -52,8 +52,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename,
return false;
}
}
GenericVector<TBOX> boxes;
GenericVector<STRING> texts;
std::vector<TBOX> boxes;
std::vector<STRING> texts;
// Get the boxes for this page, if there are any.
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
nullptr) ||
@ -77,8 +77,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename,
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data.
void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
void Tesseract::TrainFromBoxes(const std::vector<TBOX>& boxes,
const std::vector<STRING>& texts,
BLOCK_LIST *block_list,
DocumentData* training_data) {
int box_count = boxes.size();
@ -133,8 +133,8 @@ void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
// and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way.
ImageData* Tesseract::GetLineData(const TBOX& line_box,
const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const std::vector<TBOX>& boxes,
const std::vector<STRING>& texts,
int start_box, int end_box,
const BLOCK& block) {
TBOX revised_box;
@ -145,8 +145,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
// Copy the boxes and shift them so they are relative to the image.
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
ICOORD shift = -revised_box.botleft();
GenericVector<TBOX> line_boxes;
GenericVector<STRING> line_texts;
std::vector<TBOX> line_boxes;
std::vector<STRING> line_texts;
for (int b = start_box; b < end_box; ++b) {
TBOX box = boxes[b];
box.rotate(block_rotation);
@ -154,8 +154,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
line_boxes.push_back(box);
line_texts.push_back(texts[b]);
}
GenericVector<int> page_numbers;
page_numbers.init_to_size(line_boxes.size(), applybox_page);
std::vector<int> page_numbers;
page_numbers.resize(line_boxes.size(), applybox_page);
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
return image_data;
}

View File

@ -249,7 +249,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
// allowed_ids.
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
const UNICHARSET& osd_set,
GenericVector<int>* allowed_ids) {
std::vector<int>* allowed_ids) {
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
if (i != sid_set.null_sid()) {
const char* script = sid_set.get_script_from_script_id(i);
@ -357,7 +357,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
to_block, &osd_blobs);
}
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
GenericVector<int> osd_scripts;
std::vector<int> osd_scripts;
if (osd_tess != this) {
// We are running osd as part of layout analysis, so constrain the
// scripts to those allowed by *this.

View File

@ -1236,14 +1236,19 @@ const ParagraphModel* ParagraphTheory::AddModel(const ParagraphModel &model) {
}
void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
models_->remove_if([this, used_models](ParagraphModel* m) {
bool remove = !used_models.contains(m) && models_we_added_.contains(m);
if (remove) {
models_we_added_.remove(models_we_added_.get_index(m));
size_t w = 0;
for (size_t r = 0; r < models_->size(); r++) {
ParagraphModel* m = (*models_)[r];
if (!used_models.contains(m) && models_we_added_.contains(m)) {
delete m;
} else {
if (r > w) {
(*models_)[w] = m;
}
w++;
}
return remove;
});
}
models_->resize(w);
}
// Examine rows[start, end) and try to determine if an existing non-centered
@ -2272,7 +2277,7 @@ void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs,
std::list<ParagraphModel *> *models) {
std::vector<ParagraphModel *> *models) {
GenericVector<RowScratchRegisters> rows;
ParagraphTheory theory(models);
@ -2514,7 +2519,7 @@ static void InitializeRowInfo(bool after_recognition,
void DetectParagraphs(int debug_level,
bool after_text_recognition,
const MutableIterator *block_start,
std::list<ParagraphModel *> *models) {
std::vector<ParagraphModel *> *models) {
// Clear out any preconceived notions.
if (block_start->Empty(RIL_TEXTLINE)) {
return;

View File

@ -91,7 +91,7 @@ void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs,
std::list<ParagraphModel *> *models);
std::vector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on
// that block and commit the results to the underlying ROW and BLOCK structs,
@ -101,7 +101,7 @@ void DetectParagraphs(int debug_level,
void DetectParagraphs(int debug_level,
bool after_text_recognition,
const MutableIterator *block_start,
std::list<ParagraphModel *> *models);
std::vector<ParagraphModel *> *models);
} // namespace

View File

@ -193,10 +193,10 @@ class ParagraphTheory {
public:
// We presume models will outlive us, and that models will take ownership
// of any ParagraphModel *'s we add.
explicit ParagraphTheory(std::list<ParagraphModel *> *models)
explicit ParagraphTheory(std::vector<ParagraphModel *> *models)
: models_(models) {}
std::list<ParagraphModel *> &models() { return *models_; }
const std::list<ParagraphModel *> &models() const { return *models_; }
std::vector<ParagraphModel *> &models() { return *models_; }
const std::vector<ParagraphModel *> &models() const { return *models_; }
// Return an existing model if one that is Comparable() can be found.
// Else, allocate a new copy of model to save and return a pointer to it.
@ -216,7 +216,7 @@ class ParagraphTheory {
int IndexOf(const ParagraphModel *model) const;
private:
std::list<ParagraphModel *> *models_;
std::vector<ParagraphModel *> *models_;
GenericVectorEqEq<ParagraphModel *> models_we_added_;
};

View File

@ -360,7 +360,7 @@ void ResultIterator::MoveToLogicalStartOfWord() {
BeginWord(0);
return;
}
GenericVector<int> blob_order;
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
if (blob_order.size() == 0 || blob_order[0] == 0)
return;
@ -370,7 +370,7 @@ void ResultIterator::MoveToLogicalStartOfWord() {
bool ResultIterator::IsAtFinalSymbolOfWord() const {
if (!it_->word())
return true;
GenericVector<int> blob_order;
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
return blob_order.size() == 0 || blob_order.back() == blob_index_;
}
@ -378,7 +378,7 @@ bool ResultIterator::IsAtFinalSymbolOfWord() const {
bool ResultIterator::IsAtFirstSymbolOfWord() const {
if (!it_->word())
return true;
GenericVector<int> blob_order;
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
return blob_order.size() == 0 || blob_order[0] == blob_index_;
}
@ -472,7 +472,7 @@ bool ResultIterator::Next(PageIteratorLevel level) {
MoveToLogicalStartOfTextline();
return it_->block() != nullptr;
case RIL_SYMBOL: {
GenericVector<int> blob_order;
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
int next_blob = 0;
while (next_blob < blob_order.size() &&
@ -674,7 +674,7 @@ void ResultIterator::AppendUTF8WordText(std::string* text) const {
*text += reading_direction_is_ltr ? kLRM : kRLM;
}
GenericVector<int> blob_order;
std::vector<int> blob_order;
CalculateBlobOrder(&blob_order);
for (int i = 0; i < blob_order.size(); i++) {
*text += it_->word()->BestUTF8(blob_order[i], false);

View File

@ -347,15 +347,15 @@ class Tesseract : public Wordrec {
// Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data.
void TrainFromBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
void TrainFromBoxes(const std::vector<TBOX>& boxes,
const std::vector<STRING>& texts,
BLOCK_LIST* block_list, DocumentData* training_data);
// Returns an Imagedata containing the image of the given textline,
// and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way.
ImageData* GetLineData(const TBOX& line_box, const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts, int start_box,
ImageData* GetLineData(const TBOX& line_box, const std::vector<TBOX>& boxes,
const std::vector<STRING>& texts, int start_box,
int end_box, const BLOCK& block);
// Helper gets the image of a rectangle, using the block.re_rotation() if
// needed to get to the image, and rotating the result back to horizontal
@ -708,12 +708,12 @@ class Tesseract : public Wordrec {
// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* SetupApplyBoxes(const GenericVector<TBOX>& boxes,
PAGE_RES* SetupApplyBoxes(const std::vector<TBOX>& boxes,
BLOCK_LIST* block_list);
// Tests the chopper by exhaustively running chop_one_blob.
// The word_res will contain filled chopped_word, seam_array, denorm,
// box_word and best_state for the maximally chopped word.
void MaximallyChopWord(const GenericVector<TBOX>& boxes, BLOCK* block,
void MaximallyChopWord(const std::vector<TBOX>& boxes, BLOCK* block,
ROW* row, WERD_RES* word_res);
// Gather consecutive blobs that match the given box into the best_state
// and corresponding correct_text.

View File

@ -331,9 +331,9 @@ void ImageData::Display() const {
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const GenericVector<int>& box_pages) {
void ImageData::AddBoxes(const std::vector<TBOX>& boxes,
const std::vector<STRING>& texts,
const std::vector<int>& box_pages) {
// Copy the boxes and make the transcription.
for (int i = 0; i < box_pages.size(); ++i) {
if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
@ -378,9 +378,9 @@ Pix* ImageData::GetPixInternal(const GenericVector<char>& image_data) {
// match the page number. Returns false on error.
bool ImageData::AddBoxes(const char* box_text) {
if (box_text != nullptr && box_text[0] != '\0') {
GenericVector<TBOX> boxes;
GenericVector<STRING> texts;
GenericVector<int> box_pages;
std::vector<TBOX> boxes;
std::vector<STRING> texts;
std::vector<int> box_pages;
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
/*continue_on_failure*/ true, &boxes, &texts, nullptr,
&box_pages)) {
@ -587,7 +587,7 @@ DocumentCache::~DocumentCache() {}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
bool DocumentCache::LoadDocuments(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy,
FileReader reader) {
cache_strategy_ = cache_strategy;

View File

@ -181,9 +181,9 @@ class ImageData {
// Adds the supplied boxes and transcriptions that correspond to the correct
// page number.
void AddBoxes(const GenericVector<TBOX>& boxes,
const GenericVector<STRING>& texts,
const GenericVector<int>& box_pages);
void AddBoxes(const std::vector<TBOX>& boxes,
const std::vector<STRING>& texts,
const std::vector<int>& box_pages);
private:
// Saves the given Pix as a PNG-encoded string and destroys it.
@ -335,7 +335,7 @@ class DocumentCache {
}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool LoadDocuments(const GenericVector<STRING>& filenames,
bool LoadDocuments(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy, FileReader reader);
// Adds document to the cache.

View File

@ -152,7 +152,7 @@ class GENERIC_2D_ARRAY {
return tesseract::Serialize(fp, &array_[0], size);
}
bool Serialize(tesseract::TFile* fp) const {
bool Serialize(TFile* fp) const {
if (!SerializeSize(fp)) return false;
if (!fp->Serialize(&empty_)) return false;
int size = num_elements();
@ -175,7 +175,7 @@ class GENERIC_2D_ARRAY {
return true;
}
bool DeSerialize(tesseract::TFile* fp) {
bool DeSerialize(TFile* fp) {
return DeSerializeSize(fp) &&
fp->DeSerialize(&empty_) &&
fp->DeSerialize(&array_[0], num_elements());
@ -473,7 +473,7 @@ class GENERIC_2D_ARRAY {
size = dim2_;
return tesseract::Serialize(fp, &size);
}
bool SerializeSize(tesseract::TFile* fp) const {
bool SerializeSize(TFile* fp) const {
uint32_t size = dim1_;
if (!fp->Serialize(&size)) return false;
size = dim2_;
@ -495,7 +495,7 @@ class GENERIC_2D_ARRAY {
Resize(size1, size2, empty_);
return true;
}
bool DeSerializeSize(tesseract::TFile* fp) {
bool DeSerializeSize(TFile* fp) {
int32_t size1, size2;
if (!fp->DeSerialize(&size1)) return false;
if (!fp->DeSerialize(&size2)) return false;
@ -639,7 +639,7 @@ struct MATRIX_COORD {
};
// The MatrixCoordPair contains a MATRIX_COORD and its priority.
using MatrixCoordPair = tesseract::KDPairInc<float, MATRIX_COORD>;
using MatrixCoordPair = KDPairInc<float, MATRIX_COORD>;
} // namespace tesseract

View File

@ -29,7 +29,6 @@
#include <cstdio>
#include <cstdlib>
#include <functional> // for std::function
#include <vector>
namespace tesseract {
@ -37,48 +36,64 @@ namespace tesseract {
// provides automatic deletion of pointers, [De]Serialize that works, and
// sort that works.
template <typename T>
class GenericVector : public std::vector<T> {
using base = std::vector<T>;
class GenericVector {
public:
using std::vector<T>::vector;
using base::begin;
using base::end;
using base::data;
using base::capacity;
using base::reserve;
using base::resize;
using base::back;
using base::clear;
using base::push_back;
GenericVector() {
init(kDefaultVectorSize);
}
GenericVector(int size, const T& init_val) {
init(size);
init_to_size(size, init_val);
}
// Copy
GenericVector(const GenericVector& other) {
this->init(other.size());
this->operator+=(other);
}
GenericVector<T>& operator+=(const GenericVector& other);
GenericVector<T>& operator=(const GenericVector& other);
~GenericVector();
// Reserve some memory.
void reserve(int size);
// Double the size of the internal array.
void double_the_size();
// Resizes to size and sets all values to t.
void init_to_size(int size, const T& t);
void resize(int size, const T& t);
// Resizes to size without any initialization.
void resize_no_init(int size) {
resize(size);
reserve(size);
size_used_ = size;
}
// Return the size used.
int size() const {
return size_used_;
}
// Workaround to avoid g++ -Wsign-compare warnings.
size_t unsigned_size() const {
return size();
static_assert(sizeof(size_used_) <= sizeof(size_t),
"Wow! sizeof(size_t) < sizeof(int32_t)!!");
assert(0 <= size_used_);
return static_cast<size_t>(size_used_);
}
int size_reserved() const {
return capacity();
return size_reserved_;
}
int size() const {
return base::size();
// Return true if empty.
bool empty() const {
return size_used_ == 0;
}
// Return the object from an index.
T& get(int index);
const T& get(int index) const;
T& get(int index) const;
T& back() const;
T& operator[](int index) const;
// Returns the last object and removes it.
T pop_back();
@ -94,6 +109,7 @@ class GenericVector : public std::vector<T> {
T contains_index(int index) const;
// Push an element in the end of the array
int push_back(T object);
void operator+=(const T& t);
// Push an element in the end of the array if the same
@ -117,7 +133,9 @@ class GenericVector : public std::vector<T> {
// Truncates the array to the given size by removing the end.
// If the current size is less, the array is not expanded.
void truncate(int size) {
resize(size);
if (size < size_used_) {
size_used_ = size;
}
}
// Add a callback to be called to delete the elements when the array took
@ -132,7 +150,13 @@ class GenericVector : public std::vector<T> {
compare_cb_ = cb;
}
// Delete objects pointed to by data()[i]
// Clear the array, calling the clear callback function if any.
// All the owned callbacks are also deleted.
// If you don't want the callbacks to be deleted, before calling clear, set
// the callback to nullptr.
void clear();
// Delete objects pointed to by data_[i]
void delete_data_pointers();
// This method clears the current object, then, does a shallow copy of
@ -192,8 +216,8 @@ class GenericVector : public std::vector<T> {
// Reverses the elements of the vector.
void reverse() {
for (int i = 0; i < size() / 2; ++i) {
Swap(&data()[i], &data()[size() - 1 - i]);
for (int i = 0; i < size_used_ / 2; ++i) {
Swap(&data_[i], &data_[size_used_ - 1 - i]);
}
}
@ -209,7 +233,7 @@ class GenericVector : public std::vector<T> {
// to two Ts and returns negative if the first element is to appear earlier
// in the result and positive if it is to appear later, with 0 for equal.
void sort(int (*comparator)(const void*, const void*)) {
qsort(data(), size(), sizeof(*data()), comparator);
qsort(data_, size_used_, sizeof(*data_), comparator);
}
// Searches the array (assuming sorted in ascending order, using sort()) for
@ -217,23 +241,23 @@ class GenericVector : public std::vector<T> {
// Use binary_search to get the index of target, or its nearest candidate.
bool bool_binary_search(const T& target) const {
int index = binary_search(target);
if (index >= size()) {
if (index >= size_used_) {
return false;
}
return data()[index] == target;
return data_[index] == target;
}
// Searches the array (assuming sorted in ascending order, using sort()) for
// an element equal to target and returns the index of the best candidate.
// The return value is conceptually the largest index i such that
// data()[i] <= target or 0 if target < the whole vector.
// data_[i] <= target or 0 if target < the whole vector.
// NOTE that this function uses operator> so really the return value is
// the largest index i such that data()[i] > target is false.
// the largest index i such that data_[i] > target is false.
int binary_search(const T& target) const {
int bottom = 0;
int top = size();
int top = size_used_;
while (top - bottom > 1) {
int middle = (bottom + top) / 2;
if (data()[middle] > target) {
if (data_[middle] > target) {
top = middle;
} else {
bottom = middle;
@ -245,20 +269,20 @@ class GenericVector : public std::vector<T> {
// Compact the vector by deleting elements using operator!= on basic types.
// The vector must be sorted.
void compact_sorted() {
if (size() == 0) {
if (size_used_ == 0) {
return;
}
// First element is in no matter what, hence the i = 1.
int last_write = 0;
for (int i = 1; i < size(); ++i) {
for (int i = 1; i < size_used_; ++i) {
// Finds next unique item and writes it.
if (data()[last_write] != data()[i]) {
data()[++last_write] = data()[i];
if (data_[last_write] != data_[i]) {
data_[++last_write] = data_[i];
}
}
// last_write is the index of a valid data cell, so add 1.
resize(last_write + 1);
size_used_ = last_write + 1;
}
// Returns the index of what would be the target_index_th item in the array
@ -269,26 +293,26 @@ class GenericVector : public std::vector<T> {
// Make sure target_index is legal.
if (target_index < 0) {
target_index = 0; // ensure legal
} else if (target_index >= size()) {
target_index = size() - 1;
} else if (target_index >= size_used_) {
target_index = size_used_ - 1;
}
unsigned int seed = 1;
return choose_nth_item(target_index, 0, size(), &seed);
return choose_nth_item(target_index, 0, size_used_, &seed);
}
// Swaps the elements with the given indices.
void swap(int index1, int index2) {
if (index1 != index2) {
T tmp = data()[index1];
data()[index1] = data()[index2];
data()[index2] = tmp;
T tmp = data_[index1];
data_[index1] = data_[index2];
data_[index2] = tmp;
}
}
// Returns true if all elements of *this are within the given range.
// Only uses operator<
bool WithinBounds(const T& rangemin, const T& rangemax) const {
for (int i = 0; i < size(); ++i) {
if (data()[i] < rangemin || rangemax < data()[i]) {
for (int i = 0; i < size_used_; ++i) {
if (data_[i] < rangemin || rangemax < data_[i]) {
return false;
}
}
@ -306,18 +330,47 @@ class GenericVector : public std::vector<T> {
// vector are small enough that for efficiency it makes sense
// to start with a larger initial size.
static const int kDefaultVectorSize = 4;
int32_t size_used_{};
int32_t size_reserved_{};
T* data_;
std::function<void(T)> clear_cb_;
std::function<bool(const T&, const T&)> compare_cb_;
};
#if defined(_MSC_VER) || defined(__APPLE__)
// MSVC stl does not have ::data() in vector<bool>,
// so we add custom specialization.
// On Apple there are also errors when using std::vector<bool>,
// so we replace it with vector<int> as a workaround.
template <>
class GenericVector<bool> : public std::vector<int> {};
#endif
// The default FileReader loads the whole file into the vector of char,
// returning false on error.
inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
bool result = false;
FILE* fp = fopen(filename, "rb");
if (fp != nullptr) {
fseek(fp, 0, SEEK_END);
auto size = std::ftell(fp);
fseek(fp, 0, SEEK_SET);
// Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
if (size > 0 && size < LONG_MAX) {
// reserve an extra byte in case caller wants to append a '\0' character
data->reserve(size + 1);
data->resize_no_init(size);
result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
}
fclose(fp);
}
return result;
}
// The default FileWriter writes the vector of char to the filename file,
// returning false on error.
inline bool SaveDataToFile(const GenericVector<char>& data,
const char* filename) {
FILE* fp = fopen(filename, "wb");
if (fp == nullptr) {
return false;
}
bool result =
static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
fclose(fp);
return result;
}
template <typename T>
bool cmp_eq(T const& t1, T const& t2) {
@ -378,9 +431,9 @@ class PointerVector : public GenericVector<T*> {
this->operator+=(other);
}
PointerVector<T>& operator+=(const PointerVector& other) {
this->reserve(this->size() + other.size());
this->reserve(this->size_used_ + other.size_used_);
for (int i = 0; i < other.size(); ++i) {
this->push_back(new T(*other.data()[i]));
this->push_back(new T(*other.data_[i]));
}
return *this;
}
@ -396,15 +449,15 @@ class PointerVector : public GenericVector<T*> {
// Removes an element at the given index and
// shifts the remaining elements to the left.
void remove(int index) {
delete GenericVector<T*>::data()[index];
delete GenericVector<T*>::data_[index];
GenericVector<T*>::remove(index);
}
// Truncates the array to the given size by removing the end.
// If the current size is less, the array is not expanded.
void truncate(int size) {
for (int i = size; i < GenericVector<T*>::size(); ++i) {
delete GenericVector<T*>::data()[i];
for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
delete GenericVector<T*>::data_[i];
}
GenericVector<T*>::truncate(size);
}
@ -415,20 +468,20 @@ class PointerVector : public GenericVector<T*> {
int new_size = 0;
int old_index = 0;
// Until the callback returns true, the elements stay the same.
while (old_index < GenericVector<T*>::size() &&
!delete_cb(GenericVector<T*>::data()[old_index++])) {
while (old_index < GenericVector<T*>::size_used_ &&
!delete_cb(GenericVector<T*>::data_[old_index++])) {
++new_size;
}
// Now just copy anything else that gets false from delete_cb.
for (; old_index < GenericVector<T*>::size(); ++old_index) {
if (!delete_cb(GenericVector<T*>::data()[old_index])) {
GenericVector<T*>::data()[new_size++] =
GenericVector<T*>::data()[old_index];
for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
if (!delete_cb(GenericVector<T*>::data_[old_index])) {
GenericVector<T*>::data_[new_size++] =
GenericVector<T*>::data_[old_index];
} else {
delete GenericVector<T*>::data()[old_index];
delete GenericVector<T*>::data_[old_index];
}
}
GenericVector<T*>::resize(new_size);
GenericVector<T*>::size_used_ = new_size;
}
// Clear the array, calling the clear callback function if any.
@ -446,32 +499,32 @@ class PointerVector : public GenericVector<T*> {
// normal GenericVector of those.
// Returns false in case of error.
bool Serialize(FILE* fp) const {
int32_t used = GenericVector<T*>::size();
int32_t used = GenericVector<T*>::size_used_;
if (fwrite(&used, sizeof(used), 1, fp) != 1) {
return false;
}
for (int i = 0; i < used; ++i) {
int8_t non_null = GenericVector<T*>::data()[i] != nullptr;
int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
return false;
}
if (non_null && !GenericVector<T*>::data()[i]->Serialize(fp)) {
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
return false;
}
}
return true;
}
bool Serialize(TFile* fp) const {
int32_t used = GenericVector<T*>::size();
int32_t used = GenericVector<T*>::size_used_;
if (fp->FWrite(&used, sizeof(used), 1) != 1) {
return false;
}
for (int i = 0; i < used; ++i) {
int8_t non_null = GenericVector<T*>::data()[i] != nullptr;
int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
return false;
}
if (non_null && !GenericVector<T*>::data()[i]->Serialize(fp)) {
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
return false;
}
}
@ -599,52 +652,99 @@ class GenericVectorEqEq : public GenericVector<T> {
template <typename T>
void GenericVector<T>::init(int size) {
clear();
resize(size);
size_used_ = 0;
if (size <= 0) {
data_ = nullptr;
size_reserved_ = 0;
} else {
if (size < kDefaultVectorSize) {
size = kDefaultVectorSize;
}
data_ = new T[size];
size_reserved_ = size;
}
clear_cb_ = nullptr;
compare_cb_ = nullptr;
}
template <typename T>
GenericVector<T>::~GenericVector() {
clear();
}
// Reserve some memory. If the internal array contains elements, they are
// copied.
template <typename T>
void GenericVector<T>::reserve(int size) {
if (size_reserved_ >= size || size <= 0) {
return;
}
if (size < kDefaultVectorSize) {
size = kDefaultVectorSize;
}
T* new_array = new T[size];
for (int i = 0; i < size_used_; ++i) {
new_array[i] = data_[i];
}
delete[] data_;
data_ = new_array;
size_reserved_ = size;
}
template <typename T>
void GenericVector<T>::double_the_size() {
if (capacity() == 0) {
if (size_reserved_ == 0) {
reserve(kDefaultVectorSize);
} else {
reserve(2 * capacity());
reserve(2 * size_reserved_);
}
}
// Resizes to size and sets all values to t.
template <typename T>
void GenericVector<T>::init_to_size(int size, const T& t) {
resize(size, t);
reserve(size);
size_used_ = size;
for (int i = 0; i < size; ++i) {
data_[i] = t;
}
}
template <typename T>
void GenericVector<T>::resize(int size, const T& t) {
init_to_size(size, t);
}
// Return the object from an index.
template <typename T>
T& GenericVector<T>::get(int index) {
assert(index >= 0 && index < size());
return data()[index];
T& GenericVector<T>::get(int index) const {
assert(index >= 0 && index < size_used_);
return data_[index];
}
// Return the object from an index.
template <typename T>
const T& GenericVector<T>::get(int index) const {
assert(index >= 0 && index < size());
return data()[index];
T& GenericVector<T>::operator[](int index) const {
assert(index >= 0 && index < size_used_);
return data_[index];
}
template <typename T>
T& GenericVector<T>::back() const {
assert(size_used_ > 0);
return data_[size_used_ - 1];
}
// Returns the last object and removes it.
template <typename T>
T GenericVector<T>::pop_back() {
auto b = back();
base::pop_back();
return b;
assert(size_used_ > 0);
return data_[--size_used_];
}
// Return the object from an index.
template <typename T>
void GenericVector<T>::set(const T& t, int index) {
assert(index >= 0 && index < size());
data()[index] = t;
assert(index >= 0 && index < size_used_);
data_[index] = t;
}
// Shifts the rest of the elements to the right to make
@ -652,32 +752,40 @@ void GenericVector<T>::set(const T& t, int index) {
// at the specified index.
template <typename T>
void GenericVector<T>::insert(const T& t, int index) {
base::insert(begin() + index, t);
assert(index >= 0 && index <= size_used_);
if (size_reserved_ == size_used_) {
double_the_size();
}
for (int i = size_used_; i > index; --i) {
data_[i] = data_[i - 1];
}
data_[index] = t;
size_used_++;
}
// Removes an element at the given index and
// shifts the remaining elements to the left.
template <typename T>
void GenericVector<T>::remove(int index) {
assert(index >= 0 && index < size());
for (int i = index; i < size() - 1; ++i) {
data()[i] = data()[i + 1];
assert(index >= 0 && index < size_used_);
for (int i = index; i < size_used_ - 1; ++i) {
data_[i] = data_[i + 1];
}
resize(size() - 1);
size_used_--;
}
// Return true if the index is valindex
template <typename T>
T GenericVector<T>::contains_index(int index) const {
return index >= 0 && index < size();
return index >= 0 && index < size_used_;
}
// Return the index of the T object.
template <typename T>
int GenericVector<T>::get_index(const T& object) const {
for (int i = 0; i < size(); ++i) {
for (int i = 0; i < size_used_; ++i) {
assert(compare_cb_ != nullptr);
if (compare_cb_(object, data()[i])) {
if (compare_cb_(object, data_[i])) {
return i;
}
}
@ -690,20 +798,38 @@ bool GenericVector<T>::contains(const T& object) const {
return get_index(object) != -1;
}
// Add an element in the array
template <typename T>
int GenericVector<T>::push_back(T object) {
int index = 0;
if (size_used_ == size_reserved_) {
double_the_size();
}
index = size_used_++;
data_[index] = object;
return index;
}
template <typename T>
int GenericVector<T>::push_back_new(const T& object) {
int index = get_index(object);
if (index >= 0) {
return index;
}
push_back(object);
return size();
return push_back(object);
}
// Add an element in the array (front)
template <typename T>
int GenericVector<T>::push_front(const T& object) {
insert(begin(), object);
if (size_used_ == size_reserved_) {
double_the_size();
}
for (int i = size_used_; i > 0; --i) {
data_[i] = data_[i - 1];
}
data_[0] = object;
++size_used_;
return 0;
}
@ -714,39 +840,62 @@ void GenericVector<T>::operator+=(const T& t) {
template <typename T>
GenericVector<T>& GenericVector<T>::operator+=(const GenericVector& other) {
this->reserve(size() + other.size());
this->reserve(size_used_ + other.size_used_);
for (int i = 0; i < other.size(); ++i) {
this->operator+=(other.data()[i]);
this->operator+=(other.data_[i]);
}
return *this;
}
template <typename T>
GenericVector<T>& GenericVector<T>::operator=(const GenericVector& other) {
if (&other != this) {
this->truncate(0);
this->operator+=(other);
}
return *this;
}
// Clear the array, calling the callback function if any.
template <typename T>
void GenericVector<T>::clear() {
if (size_reserved_ > 0 && clear_cb_ != nullptr) {
for (int i = 0; i < size_used_; ++i) {
clear_cb_(data_[i]);
}
}
delete[] data_;
data_ = nullptr;
size_used_ = 0;
size_reserved_ = 0;
clear_cb_ = nullptr;
compare_cb_ = nullptr;
}
template <typename T>
void GenericVector<T>::delete_data_pointers() {
for (int i = 0; i < size(); ++i) {
delete data()[i];
for (int i = 0; i < size_used_; ++i) {
delete data_[i];
}
}
template <typename T>
bool GenericVector<T>::write(FILE* f,
std::function<bool(FILE*, const T&)> cb) const {
int32_t cp = capacity();
if (fwrite(&cp, sizeof(cp), 1, f) != 1) {
if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
return false;
}
int32_t sz = size();
if (fwrite(&sz, sizeof(sz), 1, f) != 1) {
if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
return false;
}
if (cb != nullptr) {
for (int i = 0; i < size(); ++i) {
if (!cb(f, data()[i])) {
for (int i = 0; i < size_used_; ++i) {
if (!cb(f, data_[i])) {
return false;
}
}
} else {
if (fwrite(data(), sizeof(T), size(), f) != unsigned_size()) {
if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
return false;
}
}
@ -756,23 +905,22 @@ bool GenericVector<T>::write(FILE* f,
template <typename T>
bool GenericVector<T>::read(TFile* f,
std::function<bool(TFile*, T*)> cb) {
int32_t reserved, size;
int32_t reserved;
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
return false;
}
reserve(reserved);
if (f->FReadEndian(&size, sizeof(size), 1) != 1) {
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
return false;
}
resize(size);
if (cb != nullptr) {
for (int i = 0; i < size; ++i) {
if (!cb(f, data() + i)) {
for (int i = 0; i < size_used_; ++i) {
if (!cb(f, data_ + i)) {
return false;
}
}
} else {
if (f->FReadEndian(data(), sizeof(T), size) != size) {
if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
return false;
}
}
@ -783,22 +931,20 @@ bool GenericVector<T>::read(TFile* f,
// read/write of T will work. Returns false in case of error.
template <typename T>
bool GenericVector<T>::Serialize(FILE* fp) const {
int32_t sz = size();
if (fwrite(&sz, sizeof(sz), 1, fp) != 1) {
if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
return false;
}
if (fwrite(data(), sizeof(T), sz, fp) != unsigned_size()) {
if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
return false;
}
return true;
}
template <typename T>
bool GenericVector<T>::Serialize(TFile* fp) const {
int32_t sz = size();
if (fp->FWrite(&sz, sizeof(sz), 1) != 1) {
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
return false;
}
if (fp->FWrite(data(), sizeof(T), sz) != sz) {
if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
return false;
}
return true;
@ -822,13 +968,14 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
if (reserved > UINT16_MAX) {
return false;
}
resize(reserved);
if (fread(data(), sizeof(T), size(), fp) != unsigned_size()) {
reserve(reserved);
size_used_ = reserved;
if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
return false;
}
if (swap) {
for (int i = 0; i < size(); ++i) {
ReverseN(&data()[i], sizeof(data()[i]));
for (int i = 0; i < size_used_; ++i) {
ReverseN(&data_[i], sizeof(data_[i]));
}
}
return true;
@ -845,8 +992,9 @@ bool GenericVector<T>::DeSerialize(TFile* fp) {
if (reserved > limit) {
return false;
}
resize(reserved);
return fp->FReadEndian(data(), sizeof(T), size()) == size();
reserve(reserved);
size_used_ = reserved;
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
}
template <typename T>
bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
@ -862,12 +1010,11 @@ bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
// Returns false in case of error.
template <typename T>
bool GenericVector<T>::SerializeClasses(FILE* fp) const {
int32_t sz = size();
if (fwrite(&sz, sizeof(sz), 1, fp) != 1) {
if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
return false;
}
for (int i = 0; i < sz; ++i) {
if (!data()[i].Serialize(fp)) {
for (int i = 0; i < size_used_; ++i) {
if (!data_[i].Serialize(fp)) {
return false;
}
}
@ -875,12 +1022,11 @@ bool GenericVector<T>::SerializeClasses(FILE* fp) const {
}
template <typename T>
bool GenericVector<T>::SerializeClasses(TFile* fp) const {
int32_t sz = size();
if (fp->FWrite(&sz, sizeof(sz), 1) != 1) {
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
return false;
}
for (int i = 0; i < sz; ++i) {
if (!data()[i].Serialize(fp)) {
for (int i = 0; i < size_used_; ++i) {
if (!data_[i].Serialize(fp)) {
return false;
}
}
@ -904,7 +1050,7 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
T empty;
init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) {
if (!data()[i].DeSerialize(swap, fp)) {
if (!data_[i].DeSerialize(swap, fp)) {
return false;
}
}
@ -919,7 +1065,7 @@ bool GenericVector<T>::DeSerializeClasses(TFile* fp) {
T empty;
init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) {
if (!data()[i].DeSerialize(fp)) {
if (!data_[i].DeSerialize(fp)) {
return false;
}
}
@ -943,7 +1089,17 @@ bool GenericVector<T>::SkipDeSerializeClasses(TFile* fp) {
// its argument, and finally invalidates its argument.
template <typename T>
void GenericVector<T>::move(GenericVector<T>* from) {
*this = std::move(*from);
this->clear();
this->data_ = from->data_;
this->size_reserved_ = from->size_reserved_;
this->size_used_ = from->size_used_;
this->compare_cb_ = from->compare_cb_;
this->clear_cb_ = from->clear_cb_;
from->data_ = nullptr;
from->clear_cb_ = nullptr;
from->compare_cb_ = nullptr;
from->size_used_ = 0;
from->size_reserved_ = 0;
}
template <typename T>
@ -974,7 +1130,7 @@ int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
return start;
}
if (num_elements == 2) {
if (data()[start] < data()[start + 1]) {
if (data_[start] < data_[start + 1]) {
return target_index > start ? start + 1 : start;
}
return target_index > start ? start : start + 1;
@ -993,9 +1149,9 @@ int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
int next_lesser = start;
int prev_greater = end;
for (int next_sample = start + 1; next_sample < prev_greater;) {
if (data()[next_sample] < data()[next_lesser]) {
if (data_[next_sample] < data_[next_lesser]) {
swap(next_lesser++, next_sample++);
} else if (data()[next_sample] == data()[next_lesser]) {
} else if (data_[next_sample] == data_[next_lesser]) {
++next_sample;
} else {
swap(--prev_greater, next_sample);

View File

@ -133,6 +133,16 @@ TFile::~TFile() {
delete data_;
}
bool TFile::DeSerialize(std::vector<char>& data) {
uint32_t size;
if (!DeSerialize(&size)) {
return false;
}
// TODO: optimize.
data.resize(size);
return DeSerialize(&data[0], data.size());
}
bool TFile::DeSerialize(char* buffer, size_t count) {
return FRead(buffer, sizeof(*buffer), count) == count;
}
@ -177,6 +187,14 @@ bool TFile::DeSerialize(uint64_t* buffer, size_t count) {
return FReadEndian(buffer, sizeof(*buffer), count) == count;
}
bool TFile::Serialize(const std::vector<char>& data) {
uint32_t size = data.size();
if (!Serialize(&size)) {
return false;
}
return Serialize(&data[0], size);
}
bool TFile::Serialize(const char* buffer, size_t count) {
return FWrite(buffer, sizeof(*buffer), count) == count;
}

View File

@ -91,6 +91,7 @@ class TFile {
}
// Deserialize data.
bool DeSerialize(std::vector<char>& data);
bool DeSerialize(char* data, size_t count = 1);
bool DeSerialize(double* data, size_t count = 1);
bool DeSerialize(float* data, size_t count = 1);
@ -104,6 +105,7 @@ class TFile {
bool DeSerialize(uint64_t* data, size_t count = 1);
// Serialize data.
bool Serialize(const std::vector<char>& data);
bool Serialize(const char* data, size_t count = 1);
bool Serialize(const double* data, size_t count = 1);
bool Serialize(const float* data, size_t count = 1);

View File

@ -96,7 +96,7 @@ bool TessdataManager::LoadArchiveFile(const char *filename) {
#endif
bool TessdataManager::Init(const char *data_file_name) {
GenericVector<char> data;
std::vector<char> data;
if (reader_ == nullptr) {
#if defined(HAVE_LIBARCHIVE)
if (LoadArchiveFile(data_file_name)) return true;
@ -155,7 +155,7 @@ bool TessdataManager::SaveFile(const char* filename,
FileWriter writer) const {
// TODO: This method supports only the proprietary file format.
ASSERT_HOST(is_loaded_);
GenericVector<char> data;
std::vector<char> data;
Serialize(&data);
if (writer == nullptr)
return SaveDataToFile(data, filename);
@ -164,7 +164,7 @@ bool TessdataManager::SaveFile(const char* filename,
}
// Serializes to the given vector.
void TessdataManager::Serialize(GenericVector<char> *data) const {
void TessdataManager::Serialize(std::vector<char> *data) const {
// TODO: This method supports only the proprietary file format.
ASSERT_HOST(is_loaded_);
// Compute the offset_table and total size.
@ -178,7 +178,7 @@ void TessdataManager::Serialize(GenericVector<char> *data) const {
offset += entries_[i].size();
}
}
data->init_to_size(offset, 0);
data->resize(offset, 0);
int32_t num_entries = TESSDATA_NUM_ENTRIES;
TFile fp;
fp.OpenWrite(data);

View File

@ -151,7 +151,7 @@ class TessdataManager {
// Saves to the given filename.
bool SaveFile(const char* filename, FileWriter writer) const;
// Serializes to the given vector.
void Serialize(GenericVector<char> *data) const;
void Serialize(std::vector<char> *data) const;
// Resets to the initial state, keeping the reader.
void Clear();

View File

@ -716,8 +716,8 @@ int ShapeTable::AddUnicharToResults(
int result_index = unichar_map->get(unichar_id);
if (result_index < 0) {
UnicharRating result(unichar_id, rating);
results->push_back(result);
result_index = results->size();
results->push_back(result);
(*unichar_map)[unichar_id] = result_index;
}
return result_index;

View File

@ -377,7 +377,7 @@ class DawgPositionVector : public GenericVector<DawgPosition> {
bool debug,
const char *debug_msg) {
for (int i = 0; i < size(); ++i) {
if (data()[i] == new_pos) return false;
if (data_[i] == new_pos) return false;
}
push_back(new_pos);
if (debug) {

View File

@ -162,7 +162,7 @@ int main(int argc, char **argv) {
return EXIT_FAILURE;
}
recognizer.ConvertToInt();
GenericVector<char> lstm_data;
std::vector<char> lstm_data;
fp.OpenWrite(&lstm_data);
ASSERT_HOST(recognizer.Serialize(&tm, &fp));
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0],

View File

@ -34,7 +34,7 @@ namespace tesseract {
// can do its own thing. If lang is empty, returns true but does nothing.
// NOTE that suffix should contain any required . for the filename.
bool WriteFile(const std::string& output_dir, const std::string& lang,
const std::string& suffix, const GenericVector<char>& data,
const std::string& suffix, const std::vector<char>& data,
FileWriter writer) {
if (lang.empty()) return true;
std::string dirname = output_dir + "/" + lang;
@ -56,7 +56,7 @@ bool WriteFile(const std::string& output_dir, const std::string& lang,
// On failure emits a warning message and returns and empty STRING.
STRING ReadFile(const std::string& filename, FileReader reader) {
if (filename.empty()) return STRING();
GenericVector<char> data;
std::vector<char> data;
bool read_result;
if (reader == nullptr)
read_result = LoadDataFromFile(filename.c_str(), &data);
@ -71,7 +71,7 @@ STRING ReadFile(const std::string& filename, FileReader reader) {
bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir,
const std::string& lang, FileWriter writer,
TessdataManager* traineddata) {
GenericVector<char> unicharset_data;
std::vector<char> unicharset_data;
TFile fp;
fp.OpenWrite(&unicharset_data);
if (!unicharset.save_to_file(&fp)) return false;
@ -107,13 +107,13 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through,
}
}
TFile fp;
GenericVector<char> recoder_data;
std::vector<char> recoder_data;
fp.OpenWrite(&recoder_data);
if (!recoder.Serialize(&fp)) return false;
traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
recoder_data.size());
STRING encoding = recoder.GetEncodingAsString(unicharset);
recoder_data.init_to_size(encoding.length(), 0);
recoder_data.resize(encoding.length(), 0);
memcpy(&recoder_data[0], &encoding[0], encoding.length());
STRING suffix;
suffix.add_str_int(".charset_size=", recoder.code_range());
@ -134,7 +134,7 @@ static bool WriteDawg(const std::vector<STRING>& words,
std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg());
if (dawg == nullptr || dawg->NumEdges() == 0) return false;
TFile fp;
GenericVector<char> dawg_data;
std::vector<char> dawg_data;
fp.OpenWrite(&dawg_data);
if (!dawg->write_squished_dawg(&fp)) return false;
traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size());
@ -228,7 +228,7 @@ int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir
}
// Traineddata file.
GenericVector<char> traineddata_data;
std::vector<char> traineddata_data;
traineddata.Serialize(&traineddata_data);
if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
tprintf("Error writing output traineddata file!!\n");

View File

@ -30,7 +30,7 @@ namespace tesseract {
// can do its own thing. If lang is empty, returns true but does nothing.
// NOTE that suffix should contain any required . for the filename.
bool WriteFile(const std::string& output_dir, const std::string& lang,
const std::string& suffix, const GenericVector<char>& data,
const std::string& suffix, const std::vector<char>& data,
FileWriter writer);
// Helper reads a file with optional reader and returns a STRING.
// On failure emits a warning message and returns and empty STRING.

View File

@ -29,7 +29,7 @@ LSTMTester::LSTMTester(int64_t max_memory)
// tesseract into memory ready for testing. Returns false if nothing was
// loaded. The arg is a filename of a file that lists the filenames.
bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
GenericVector<STRING> filenames;
std::vector<STRING> filenames;
if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
tprintf("Failed to load list of eval filenames from %s\n",
filenames_file);
@ -41,7 +41,7 @@ bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
// Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for testing. Returns false if nothing was
// loaded.
bool LSTMTester::LoadAllEvalData(const GenericVector<STRING>& filenames) {
bool LSTMTester::LoadAllEvalData(const std::vector<STRING>& filenames) {
test_data_.Clear();
bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
total_pages_ = test_data_.TotalPages();

View File

@ -38,7 +38,7 @@ class LSTMTester {
// Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for testing. Returns false if nothing was
// loaded.
bool LoadAllEvalData(const GenericVector<STRING>& filenames);
bool LoadAllEvalData(const std::vector<STRING>& filenames);
// Runs an evaluation asynchronously on the stored eval data and returns a
// string describing the results of the previous test. Args match TestCallback

View File

@ -267,7 +267,7 @@ void LSTMTrainer::DebugNetwork() {
// Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for training. Returns false if nothing was
// loaded.
bool LSTMTrainer::LoadAllTrainingData(const GenericVector<STRING>& filenames,
bool LSTMTrainer::LoadAllTrainingData(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy,
bool randomly_rotate) {
randomly_rotate_ = randomly_rotate;
@ -302,7 +302,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
}
}
bool result = true; // Something interesting happened.
GenericVector<char> rec_model_data;
std::vector<char> rec_model_data;
if (error_rate < best_error_rate_) {
SaveRecognitionDump(&rec_model_data);
log_msg->add_str_double(" New best char error = ", error_rate);
@ -335,7 +335,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
// Error rate has ballooned. Go back to the best model.
*log_msg += "\nDivergence! ";
// Copy best_trainer_ before reading it, as it will get overwritten.
GenericVector<char> revert_data(best_trainer_);
std::vector<char> revert_data(best_trainer_);
if (ReadTrainingDump(revert_data, this)) {
LogIterations("Reverted to", log_msg);
ReduceLearningRates(this, log_msg);
@ -354,7 +354,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
}
if (checkpoint_name_.length() > 0) {
// Write a current checkpoint.
GenericVector<char> checkpoint;
std::vector<char> checkpoint;
if (!SaveTrainingDump(FULL, this, &checkpoint) ||
!SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
*log_msg += " failed to write checkpoint.";
@ -420,14 +420,14 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,
if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
if (!fp->Serialize(&worst_iteration_)) return false;
if (!fp->Serialize(&stall_iteration_)) return false;
if (!best_model_data_.Serialize(fp)) return false;
if (!worst_model_data_.Serialize(fp)) return false;
if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp))
if (!fp->Serialize(best_model_data_)) return false;
if (!fp->Serialize(worst_model_data_)) return false;
if (serialize_amount != NO_BEST_TRAINER && !fp->Serialize(best_trainer_))
return false;
GenericVector<char> sub_data;
std::vector<char> sub_data;
if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data))
return false;
if (!sub_data.Serialize(fp)) return false;
if (!fp->Serialize(sub_data)) return false;
if (!best_error_history_.Serialize(fp)) return false;
if (!best_error_iterations_.Serialize(fp)) return false;
return fp->Serialize(&improvement_steps_);
@ -464,11 +464,11 @@ bool LSTMTrainer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
if (!fp->DeSerialize(&worst_iteration_)) return false;
if (!fp->DeSerialize(&stall_iteration_)) return false;
if (!best_model_data_.DeSerialize(fp)) return false;
if (!worst_model_data_.DeSerialize(fp)) return false;
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false;
GenericVector<char> sub_data;
if (!sub_data.DeSerialize(fp)) return false;
if (!fp->DeSerialize(best_model_data_)) return false;
if (!fp->DeSerialize(worst_model_data_)) return false;
if (amount != NO_BEST_TRAINER && !fp->DeSerialize(best_trainer_)) return false;
std::vector<char> sub_data;
if (!fp->DeSerialize(sub_data)) return false;
delete sub_trainer_;
if (sub_data.empty()) {
sub_trainer_ = nullptr;
@ -542,7 +542,7 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING* log_msg) {
if (sub_error < best_error_rate_ &&
sub_margin >= kSubTrainerMarginFraction) {
// The sub_trainer_ has won the race to a new best. Switch to it.
GenericVector<char> updated_trainer;
std::vector<char> updated_trainer;
SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
ReadTrainingDump(updated_trainer, this);
log_msg->add_str_int(" Sub trainer wins at iteration ",
@ -594,7 +594,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
ok_sums[i].init_to_size(num_layers, 0.0);
}
double momentum_factor = 1.0 / (1.0 - momentum_);
GenericVector<char> orig_trainer;
std::vector<char> orig_trainer;
samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer);
for (int i = 0; i < num_layers; ++i) {
Network* layer = GetLayer(layers[i]);
@ -624,7 +624,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
copy_trainer.TrainOnLine(samples_trainer, true);
if (trainingdata == nullptr) continue;
// We'll now use this trainer again for each layer.
GenericVector<char> updated_trainer;
std::vector<char> updated_trainer;
samples_trainer->SaveTrainingDump(LIGHT, &copy_trainer, &updated_trainer);
for (int i = 0; i < num_layers; ++i) {
if (num_weights[i] == 0) continue;
@ -871,7 +871,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData* trainingdata,
// actually serialized.
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,
const LSTMTrainer* trainer,
GenericVector<char>* data) const {
std::vector<char>* data) const {
TFile fp;
fp.OpenWrite(data);
return trainer->Serialize(serialize_amount, &mgr_, &fp);
@ -891,7 +891,7 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr,
// Writes the full recognition traineddata to the given filename.
bool LSTMTrainer::SaveTraineddata(const char* filename) {
GenericVector<char> recognizer_data;
std::vector<char> recognizer_data;
SaveRecognitionDump(&recognizer_data);
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
recognizer_data.size());
@ -899,7 +899,7 @@ bool LSTMTrainer::SaveTraineddata(const char* filename) {
}
// Writes the recognizer to memory, so that it can be used for testing later.
void LSTMTrainer::SaveRecognitionDump(GenericVector<char>* data) const {
void LSTMTrainer::SaveRecognitionDump(std::vector<char>* data) const {
TFile fp;
fp.OpenWrite(data);
network_->SetEnableTraining(TS_TEMP_DISABLE);
@ -1260,7 +1260,7 @@ void LSTMTrainer::RollErrorBuffers() {
// Tester is an externally supplied callback function that tests on some
// data set with a given model and records the error rates in a graph.
STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
const GenericVector<char>& model_data,
const std::vector<char>& model_data,
TestCallback tester) {
if (error_rate > best_error_rate_
&& iteration < best_iteration_ + kErrorGraphInterval) {
@ -1287,7 +1287,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
worst_model_data_.size());
result = tester(worst_iteration_, worst_error_rates_, mgr_,
CurrentTrainingStage());
worst_model_data_.truncate(0);
worst_model_data_.clear();
best_model_data_ = model_data;
}
best_error_rate_ = error_rate;
@ -1322,7 +1322,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
CurrentTrainingStage());
}
if (result.length() > 0)
best_model_data_.truncate(0);
best_model_data_.clear();
worst_model_data_ = model_data;
}
}

View File

@ -135,7 +135,7 @@ class LSTMTrainer : public LSTMRecognizer {
int learning_iteration() const { return learning_iteration_; }
int32_t improvement_steps() const { return improvement_steps_; }
void set_perfect_delay(int delay) { perfect_delay_ = delay; }
const GenericVector<char>& best_trainer() const { return best_trainer_; }
const std::vector<char>& best_trainer() const { return best_trainer_; }
// Returns the error that was just calculated by PrepareForBackward.
double NewSingleError(ErrorTypes type) const {
return error_buffers_[type][training_iteration() % kRollingBufferSize_];
@ -167,7 +167,7 @@ class LSTMTrainer : public LSTMRecognizer {
// Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for training. Returns false if nothing was
// loaded.
bool LoadAllTrainingData(const GenericVector<STRING>& filenames,
bool LoadAllTrainingData(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy,
bool randomly_rotate);
@ -269,7 +269,7 @@ class LSTMTrainer : public LSTMRecognizer {
// actually serialized.
bool SaveTrainingDump(SerializeAmount serialize_amount,
const LSTMTrainer* trainer,
GenericVector<char>* data) const;
std::vector<char>* data) const;
// Reads previously saved trainer from memory. *this must always be the
// master trainer that retains the only copy of the training data and
@ -294,7 +294,7 @@ class LSTMTrainer : public LSTMRecognizer {
bool SaveTraineddata(const char* filename);
// Writes the recognizer to memory, so that it can be used for testing later.
void SaveRecognitionDump(GenericVector<char>* data) const;
void SaveRecognitionDump(std::vector<char>* data) const;
// Returns a suitable filename for a training dump, based on the model_base_,
// the iteration and the error rates.
@ -375,7 +375,7 @@ class LSTMTrainer : public LSTMRecognizer {
// Given that error_rate is either a new min or max, updates the best/worst
// error rates, and record of progress.
STRING UpdateErrorGraph(int iteration, double error_rate,
const GenericVector<char>& model_data,
const std::vector<char>& model_data,
TestCallback tester);
protected:
@ -420,10 +420,10 @@ class LSTMTrainer : public LSTMRecognizer {
// Iteration at which the process will be thought stalled.
int stall_iteration_;
// Saved recognition models for computing test error for graph points.
GenericVector<char> best_model_data_;
GenericVector<char> worst_model_data_;
std::vector<char> best_model_data_;
std::vector<char> worst_model_data_;
// Saved trainer for reverting back to last known best.
GenericVector<char> best_trainer_;
std::vector<char> best_trainer_;
// A subsidiary trainer running with a different learning rate until either
// *this or sub_trainer_ hits a new best.
LSTMTrainer* sub_trainer_;

View File

@ -136,7 +136,7 @@ int main(int argc, char **argv) {
tprintf("Must supply a list of training filenames! --train_listfile\n");
return EXIT_FAILURE;
}
GenericVector<STRING> filenames;
std::vector<STRING> filenames;
if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(),
&filenames)) {
tprintf("Failed to load list of training filenames from %s\n",

View File

@ -264,7 +264,7 @@ SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number,
}
SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes,
SEAM *Wordrec::chop_overlapping_blob(const std::vector<TBOX>& boxes,
bool italic_blob, WERD_RES *word_res,
int *blob_number) {
TWERD *word = word_res->chopped_word;
@ -362,7 +362,7 @@ SEAM* Wordrec::improve_one_blob(const GenericVector<BLOB_CHOICE*>& blob_choices,
* the worst blobs and try to divide it up to improve the ratings.
* Used for testing chopper.
*/
SEAM* Wordrec::chop_one_blob(const GenericVector<TBOX>& boxes,
SEAM* Wordrec::chop_one_blob(const std::vector<TBOX>& boxes,
const GenericVector<BLOB_CHOICE*>& blob_choices,
WERD_RES* word_res,
int* blob_number) {

View File

@ -370,7 +370,7 @@ class Wordrec : public Classify {
bool italic_blob, const GenericVector<SEAM*>& seams);
SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number,
bool italic_blob, const GenericVector<SEAM*>& seams);
SEAM *chop_overlapping_blob(const GenericVector<TBOX>& boxes,
SEAM *chop_overlapping_blob(const std::vector<TBOX>& boxes,
bool italic_blob,
WERD_RES *word_res, int *blob_number);
SEAM *improve_one_blob(const GenericVector<BLOB_CHOICE*> &blob_choices,
@ -379,7 +379,7 @@ class Wordrec : public Classify {
bool italic_blob,
WERD_RES *word,
int *blob_number);
SEAM *chop_one_blob(const GenericVector<TBOX> &boxes,
SEAM *chop_one_blob(const std::vector<TBOX> &boxes,
const GenericVector<BLOB_CHOICE*> &blob_choices,
WERD_RES *word_res,
int *blob_number);

View File

@ -97,7 +97,7 @@ TEST_F(ImagedataTest, CachesMultiDocs) {
// Number of pages in each document.
const std::vector<int> kNumPages = {6, 5, 7};
std::vector<std::vector<std::string>> page_texts;
GenericVector<STRING> filenames;
std::vector<STRING> filenames;
for (size_t d = 0; d < kNumPages.size(); ++d) {
page_texts.emplace_back(std::vector<std::string>());
std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());

View File

@ -45,7 +45,7 @@ TEST(LangModelTest, AddACharacter) {
LOG(INFO) << "Output dir=" << output_dir << "\n";
std::string lang1 = "eng";
bool pass_through_recoder = false;
GenericVector<STRING> words, puncs, numbers;
std::vector<STRING> words, puncs, numbers;
// If these reads fail, we get a warning message and an empty list of words.
ReadFile(file::JoinPath(eng_dir, "eng.wordlist"), nullptr)
.split('\n', &words);
@ -136,7 +136,7 @@ TEST(LangModelTest, AddACharacterHindi) {
LOG(INFO) << "Output dir=" << output_dir << "\n";
std::string lang1 = "hin";
bool pass_through_recoder = false;
GenericVector<STRING> words, puncs, numbers;
std::vector<STRING> words, puncs, numbers;
// If these reads fail, we get a warning message and an empty list of words.
ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr)
.split('\n', &words);

View File

@ -114,7 +114,7 @@ TEST_F(LSTMTrainerTest, DeterminismTest) {
double lstm_2d_err_a = TrainIterations(kTrainerIterations);
double act_error_a = trainer_->ActivationError();
double char_error_a = trainer_->CharError();
GenericVector<char> trainer_a_data;
std::vector<char> trainer_a_data;
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
&trainer_a_data));
SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",

View File

@ -78,7 +78,7 @@ class LSTMTrainerTest : public testing::Test {
ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false));
std::string script_dir = file::JoinPath(
LANGDATA_DIR, "");
GenericVector<STRING> words;
std::vector<STRING> words;
EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, "", FLAGS_test_tmpdir,
kLang, !recode, words, words, words, false,
nullptr, nullptr));
@ -95,7 +95,7 @@ class LSTMTrainerTest : public testing::Test {
if (layer_specific) net_mode |= NF_LAYER_SPECIFIC_LR;
EXPECT_TRUE(trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1,
learning_rate, 0.9, 0.999));
GenericVector<STRING> filenames;
std::vector<STRING> filenames;
filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str()));
EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false));
LOG(INFO) << "Setup network:" << model_name << "\n" ;
@ -151,7 +151,7 @@ class LSTMTrainerTest : public testing::Test {
// within 1% of the error rate. Returns the increase in error from float to
// int.
double TestIntMode(int test_iterations) {
GenericVector<char> trainer_data;
std::vector<char> trainer_data;
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
&trainer_data));
// Get the error on the next few iterations in float mode.

View File

@ -192,7 +192,7 @@ void TestParagraphDetection(const TextAndModel* correct, int num_rows) {
GenericVector<RowInfo> row_infos;
GenericVector<PARA*> row_owners;
PARA_LIST paragraphs;
std::list<ParagraphModel*> models;
std::vector<ParagraphModel*> models;
MakeAsciiRowInfos(correct, num_rows, &row_infos);
int debug_level(3);
@ -324,7 +324,7 @@ TEST(ParagraphsTest, TestSingleFullPageContinuation) {
GenericVector<tesseract::RowInfo> row_infos;
GenericVector<PARA*> row_owners;
PARA_LIST paragraphs;
std::list<ParagraphModel*> models;
std::vector<ParagraphModel*> models;
models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
MakeAsciiRowInfos(correct, num_rows, &row_infos);
tesseract::DetectParagraphs(3, &row_infos, &row_owners, &paragraphs, &models);

View File

@ -28,6 +28,12 @@ void ToVector(const GenericVectorEqEq<T>& from, std::vector<T>* to) {
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
}
template <typename T>
void ToVector(const std::vector<T>& from, std::vector<T>* to) {
to->clear();
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
}
// The fixture for testing Tesseract.
class ResultIteratorTest : public testing::Test {
protected:
@ -167,12 +173,12 @@ class ResultIteratorTest : public testing::Test {
const StrongScriptDirection* word_dirs,
int num_words, int* expected_reading_order,
int num_reading_order_entries) const {
GenericVector<StrongScriptDirection> gv_word_dirs;
std::vector<StrongScriptDirection> gv_word_dirs;
for (int i = 0; i < num_words; i++) {
gv_word_dirs.push_back(word_dirs[i]);
}
GenericVectorEqEq<int> output;
std::vector<int> output;
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
&output);
// STL vector can be used with EXPECT_EQ, so convert...
@ -191,17 +197,17 @@ class ResultIteratorTest : public testing::Test {
void VerifySaneTextlineOrder(bool in_ltr_context,
const StrongScriptDirection* word_dirs,
int num_words) const {
GenericVector<StrongScriptDirection> gv_word_dirs;
std::vector<StrongScriptDirection> gv_word_dirs;
for (int i = 0; i < num_words; i++) {
gv_word_dirs.push_back(word_dirs[i]);
}
GenericVectorEqEq<int> output;
std::vector<int> output;
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
&output);
ASSERT_GE(output.size(), num_words);
GenericVector<int> output_copy(output);
output_copy.sort();
std::vector<int> output_copy(output);
std::sort(output_copy.begin(), output_copy.end());
bool sane = true;
int j = 0;
while (j < output_copy.size() && output_copy[j] < 0) j++;

View File

@ -18,7 +18,6 @@
#include "boxchar.h"
#include "boxread.h"
#include "commandlineflags.h"
#include "genericvector.h"
#include "include_gunit.h"
#include "stringrenderer.h"
#include "strngs.h"
@ -227,7 +226,7 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
std::string boxes_str = renderer_->GetBoxesStr();
// Decode to get the box text strings.
EXPECT_FALSE(boxes_str.empty());
GenericVector<STRING> texts;
std::vector<STRING> texts;
EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts,
nullptr, nullptr));
std::string ltr_str;

View File

@ -16,7 +16,7 @@
namespace tesseract {
// Tests TFile and GenericVector serialization by serializing and
// Tests TFile and std::vector serialization by serializing and
// writing/reading.
class TfileTest : public ::testing::Test {
@ -115,7 +115,7 @@ TEST_F(TfileTest, Serialize) {
// This test verifies that Tfile can serialize a class.
MathData m1;
m1.Setup();
GenericVector<char> data;
std::vector<char> data;
TFile fpw;
fpw.OpenWrite(&data);
EXPECT_TRUE(m1.Serialize(&fpw));
@ -136,7 +136,7 @@ TEST_F(TfileTest, FGets) {
MathData m1;
std::string line_str = "This is a textline with a newline\n";
m1.Setup();
GenericVector<char> data;
std::vector<char> data;
TFile fpw;
fpw.OpenWrite(&data);
EXPECT_TRUE(m1.Serialize(&fpw));
@ -161,7 +161,7 @@ TEST_F(TfileTest, BigEndian) {
// This test verifies that Tfile can auto-reverse big-endian data.
MathData m1;
m1.Setup();
GenericVector<char> data;
std::vector<char> data;
TFile fpw;
fpw.OpenWrite(&data);
EXPECT_TRUE(m1.SerializeBigEndian(&fpw));

View File

@ -57,7 +57,7 @@ class UnicharcompressTest : public ::testing::Test {
}
// Serializes and de-serializes compressed_ over itself.
void SerializeAndUndo() {
GenericVector<char> data;
std::vector<char> data;
TFile wfp;
wfp.OpenWrite(&data);
EXPECT_TRUE(compressed_.Serialize(&wfp));

View File

@ -133,7 +133,7 @@ TEST(UnicharsetTest, MultibyteBigrams) {
// It is added if we force it to be.
u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue);
EXPECT_EQ(u.size(), 8);
GenericVector<char> data;
std::vector<char> data;
tesseract::TFile fp;
fp.OpenWrite(&data);
u.save_to_file(&fp);