diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 16845683..c90c75fa 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -38,7 +38,6 @@ #include #include // for std::function -#include // for std::list #include // for std::vector struct Pix; @@ -817,7 +816,7 @@ class TESS_API TessBaseAPI { EquationDetect* equ_detect_; ///< The equation detector. FileReader reader_; ///< Reads files from any filesystem. ImageThresholder* thresholder_; ///< Image thresholding module. - std::list* paragraph_models_; + std::vector* paragraph_models_; BLOCK_LIST* block_list_; ///< The page layout. PAGE_RES* page_res_; ///< The page-level data. std::string input_file_; ///< Name used by training code. diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 7e23d275..c4da3bbf 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -937,7 +937,7 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { while (page_res_it.word() != nullptr) { WERD_RES *word_res = page_res_it.word(); - GenericVector boxes; + std::vector boxes; tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, page_res_it.row()->row, word_res); page_res_it.forward(); @@ -1844,7 +1844,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { if (text[t] != '\0' || wordstr[w] != '\0') { // No match. delete page_res_; - GenericVector boxes; + std::vector boxes; page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); tesseract_->ReSegmentByClassification(page_res_); tesseract_->TidyUp(page_res_); @@ -2291,10 +2291,10 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { int debug_level = 0; GetIntVariable("paragraph_debug_level", &debug_level); if (paragraph_models_ == nullptr) - paragraph_models_ = new std::list; + paragraph_models_ = new std::vector; MutableIterator *result_it = GetMutableIterator(); do { // Detect paragraphs for this block - std::list models; + std::vector models; ::tesseract::DetectParagraphs(debug_level, after_text_recognition, result_it, &models); paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end()); diff --git a/src/ccmain/applybox.cpp b/src/ccmain/applybox.cpp index 8e018969..a8d1bbcd 100644 --- a/src/ccmain/applybox.cpp +++ b/src/ccmain/applybox.cpp @@ -113,8 +113,8 @@ static void clear_any_old_text(BLOCK_LIST *block_list) { PAGE_RES* Tesseract::ApplyBoxes(const char* filename, bool find_segmentation, BLOCK_LIST *block_list) { - GenericVector boxes; - GenericVector texts, full_texts; + std::vector boxes; + std::vector texts, full_texts; if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts, nullptr)) { return nullptr; // Can't do it. @@ -205,7 +205,7 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) { /// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: /// All fuzzy spaces are removed, and all the words are maximally chopped. -PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, +PAGE_RES* Tesseract::SetupApplyBoxes(const std::vector& boxes, BLOCK_LIST *block_list) { PreenXHeights(block_list); // Strip all fuzzy space markers to simplify the PAGE_RES. @@ -241,7 +241,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector& boxes, /// Tests the chopper by exhaustively running chop_one_blob. /// The word_res will contain filled chopped_word, seam_array, denorm, /// box_word and best_state for the maximally chopped word. -void Tesseract::MaximallyChopWord(const GenericVector& boxes, +void Tesseract::MaximallyChopWord(const std::vector& boxes, BLOCK* block, ROW* row, WERD_RES* word_res) { if (!word_res->SetupForRecognition(unicharset, this, BestPix(), diff --git a/src/ccmain/linerec.cpp b/src/ccmain/linerec.cpp index a799ac8c..4c056f9b 100644 --- a/src/ccmain/linerec.cpp +++ b/src/ccmain/linerec.cpp @@ -52,8 +52,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename, return false; } } - GenericVector boxes; - GenericVector texts; + std::vector boxes; + std::vector texts; // Get the boxes for this page, if there are any. if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr, nullptr) || @@ -77,8 +77,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename, // Generates training data for training a line recognizer, eg LSTM. // Breaks the boxes into lines, normalizes them, converts to ImageData and // appends them to the given training_data. -void Tesseract::TrainFromBoxes(const GenericVector& boxes, - const GenericVector& texts, +void Tesseract::TrainFromBoxes(const std::vector& boxes, + const std::vector& texts, BLOCK_LIST *block_list, DocumentData* training_data) { int box_count = boxes.size(); @@ -133,8 +133,8 @@ void Tesseract::TrainFromBoxes(const GenericVector& boxes, // and ground truth boxes/truth text if available in the input. // The image is not normalized in any way. ImageData* Tesseract::GetLineData(const TBOX& line_box, - const GenericVector& boxes, - const GenericVector& texts, + const std::vector& boxes, + const std::vector& texts, int start_box, int end_box, const BLOCK& block) { TBOX revised_box; @@ -145,8 +145,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box, // Copy the boxes and shift them so they are relative to the image. FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y()); ICOORD shift = -revised_box.botleft(); - GenericVector line_boxes; - GenericVector line_texts; + std::vector line_boxes; + std::vector line_texts; for (int b = start_box; b < end_box; ++b) { TBOX box = boxes[b]; box.rotate(block_rotation); @@ -154,8 +154,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box, line_boxes.push_back(box); line_texts.push_back(texts[b]); } - GenericVector page_numbers; - page_numbers.init_to_size(line_boxes.size(), applybox_page); + std::vector page_numbers; + page_numbers.resize(line_boxes.size(), applybox_page); image_data->AddBoxes(line_boxes, line_texts, page_numbers); return image_data; } diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp index 0ccab237..985c6796 100644 --- a/src/ccmain/pagesegmain.cpp +++ b/src/ccmain/pagesegmain.cpp @@ -249,7 +249,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, // allowed_ids. static void AddAllScriptsConverted(const UNICHARSET& sid_set, const UNICHARSET& osd_set, - GenericVector* allowed_ids) { + std::vector* allowed_ids) { for (int i = 0; i < sid_set.get_script_table_size(); ++i) { if (i != sid_set.null_sid()) { const char* script = sid_set.get_script_from_script_id(i); @@ -357,7 +357,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( to_block, &osd_blobs); } if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) { - GenericVector osd_scripts; + std::vector osd_scripts; if (osd_tess != this) { // We are running osd as part of layout analysis, so constrain the // scripts to those allowed by *this. diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp index 60bf2a9c..e97b864a 100644 --- a/src/ccmain/paragraphs.cpp +++ b/src/ccmain/paragraphs.cpp @@ -1236,14 +1236,19 @@ const ParagraphModel* ParagraphTheory::AddModel(const ParagraphModel &model) { } void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) { - models_->remove_if([this, used_models](ParagraphModel* m) { - bool remove = !used_models.contains(m) && models_we_added_.contains(m); - if (remove) { - models_we_added_.remove(models_we_added_.get_index(m)); + size_t w = 0; + for (size_t r = 0; r < models_->size(); r++) { + ParagraphModel* m = (*models_)[r]; + if (!used_models.contains(m) && models_we_added_.contains(m)) { delete m; + } else { + if (r > w) { + (*models_)[w] = m; + } + w++; } - return remove; - }); + } + models_->resize(w); } // Examine rows[start, end) and try to determine if an existing non-centered @@ -2272,7 +2277,7 @@ void DetectParagraphs(int debug_level, GenericVector *row_infos, GenericVector *row_owners, PARA_LIST *paragraphs, - std::list *models) { + std::vector *models) { GenericVector rows; ParagraphTheory theory(models); @@ -2514,7 +2519,7 @@ static void InitializeRowInfo(bool after_recognition, void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, - std::list *models) { + std::vector *models) { // Clear out any preconceived notions. if (block_start->Empty(RIL_TEXTLINE)) { return; diff --git a/src/ccmain/paragraphs.h b/src/ccmain/paragraphs.h index d20dc210..8cc9dab3 100644 --- a/src/ccmain/paragraphs.h +++ b/src/ccmain/paragraphs.h @@ -91,7 +91,7 @@ void DetectParagraphs(int debug_level, GenericVector *row_infos, GenericVector *row_owners, PARA_LIST *paragraphs, - std::list *models); + std::vector *models); // Given a MutableIterator to the start of a block, run DetectParagraphs on // that block and commit the results to the underlying ROW and BLOCK structs, @@ -101,7 +101,7 @@ void DetectParagraphs(int debug_level, void DetectParagraphs(int debug_level, bool after_text_recognition, const MutableIterator *block_start, - std::list *models); + std::vector *models); } // namespace diff --git a/src/ccmain/paragraphs_internal.h b/src/ccmain/paragraphs_internal.h index d8d6e9de..af7e5bff 100644 --- a/src/ccmain/paragraphs_internal.h +++ b/src/ccmain/paragraphs_internal.h @@ -193,10 +193,10 @@ class ParagraphTheory { public: // We presume models will outlive us, and that models will take ownership // of any ParagraphModel *'s we add. - explicit ParagraphTheory(std::list *models) + explicit ParagraphTheory(std::vector *models) : models_(models) {} - std::list &models() { return *models_; } - const std::list &models() const { return *models_; } + std::vector &models() { return *models_; } + const std::vector &models() const { return *models_; } // Return an existing model if one that is Comparable() can be found. // Else, allocate a new copy of model to save and return a pointer to it. @@ -216,7 +216,7 @@ class ParagraphTheory { int IndexOf(const ParagraphModel *model) const; private: - std::list *models_; + std::vector *models_; GenericVectorEqEq models_we_added_; }; diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index dc9d488e..d3e07077 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -360,7 +360,7 @@ void ResultIterator::MoveToLogicalStartOfWord() { BeginWord(0); return; } - GenericVector blob_order; + std::vector blob_order; CalculateBlobOrder(&blob_order); if (blob_order.size() == 0 || blob_order[0] == 0) return; @@ -370,7 +370,7 @@ void ResultIterator::MoveToLogicalStartOfWord() { bool ResultIterator::IsAtFinalSymbolOfWord() const { if (!it_->word()) return true; - GenericVector blob_order; + std::vector blob_order; CalculateBlobOrder(&blob_order); return blob_order.size() == 0 || blob_order.back() == blob_index_; } @@ -378,7 +378,7 @@ bool ResultIterator::IsAtFinalSymbolOfWord() const { bool ResultIterator::IsAtFirstSymbolOfWord() const { if (!it_->word()) return true; - GenericVector blob_order; + std::vector blob_order; CalculateBlobOrder(&blob_order); return blob_order.size() == 0 || blob_order[0] == blob_index_; } @@ -472,7 +472,7 @@ bool ResultIterator::Next(PageIteratorLevel level) { MoveToLogicalStartOfTextline(); return it_->block() != nullptr; case RIL_SYMBOL: { - GenericVector blob_order; + std::vector blob_order; CalculateBlobOrder(&blob_order); int next_blob = 0; while (next_blob < blob_order.size() && @@ -674,7 +674,7 @@ void ResultIterator::AppendUTF8WordText(std::string* text) const { *text += reading_direction_is_ltr ? kLRM : kRLM; } - GenericVector blob_order; + std::vector blob_order; CalculateBlobOrder(&blob_order); for (int i = 0; i < blob_order.size(); i++) { *text += it_->word()->BestUTF8(blob_order[i], false); diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 4b98c99f..f5da1f22 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -347,15 +347,15 @@ class Tesseract : public Wordrec { // Generates training data for training a line recognizer, eg LSTM. // Breaks the boxes into lines, normalizes them, converts to ImageData and // appends them to the given training_data. - void TrainFromBoxes(const GenericVector& boxes, - const GenericVector& texts, + void TrainFromBoxes(const std::vector& boxes, + const std::vector& texts, BLOCK_LIST* block_list, DocumentData* training_data); // Returns an Imagedata containing the image of the given textline, // and ground truth boxes/truth text if available in the input. // The image is not normalized in any way. - ImageData* GetLineData(const TBOX& line_box, const GenericVector& boxes, - const GenericVector& texts, int start_box, + ImageData* GetLineData(const TBOX& line_box, const std::vector& boxes, + const std::vector& texts, int start_box, int end_box, const BLOCK& block); // Helper gets the image of a rectangle, using the block.re_rotation() if // needed to get to the image, and rotating the result back to horizontal @@ -708,12 +708,12 @@ class Tesseract : public Wordrec { // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: // All fuzzy spaces are removed, and all the words are maximally chopped. - PAGE_RES* SetupApplyBoxes(const GenericVector& boxes, + PAGE_RES* SetupApplyBoxes(const std::vector& boxes, BLOCK_LIST* block_list); // Tests the chopper by exhaustively running chop_one_blob. // The word_res will contain filled chopped_word, seam_array, denorm, // box_word and best_state for the maximally chopped word. - void MaximallyChopWord(const GenericVector& boxes, BLOCK* block, + void MaximallyChopWord(const std::vector& boxes, BLOCK* block, ROW* row, WERD_RES* word_res); // Gather consecutive blobs that match the given box into the best_state // and corresponding correct_text. diff --git a/src/ccstruct/imagedata.cpp b/src/ccstruct/imagedata.cpp index 66ff5a46..6698cdf9 100644 --- a/src/ccstruct/imagedata.cpp +++ b/src/ccstruct/imagedata.cpp @@ -331,9 +331,9 @@ void ImageData::Display() const { // Adds the supplied boxes and transcriptions that correspond to the correct // page number. -void ImageData::AddBoxes(const GenericVector& boxes, - const GenericVector& texts, - const GenericVector& box_pages) { +void ImageData::AddBoxes(const std::vector& boxes, + const std::vector& texts, + const std::vector& box_pages) { // Copy the boxes and make the transcription. for (int i = 0; i < box_pages.size(); ++i) { if (page_number_ >= 0 && box_pages[i] != page_number_) continue; @@ -378,9 +378,9 @@ Pix* ImageData::GetPixInternal(const GenericVector& image_data) { // match the page number. Returns false on error. bool ImageData::AddBoxes(const char* box_text) { if (box_text != nullptr && box_text[0] != '\0') { - GenericVector boxes; - GenericVector texts; - GenericVector box_pages; + std::vector boxes; + std::vector texts; + std::vector box_pages; if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text, /*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) { @@ -587,7 +587,7 @@ DocumentCache::~DocumentCache() {} // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. -bool DocumentCache::LoadDocuments(const GenericVector& filenames, +bool DocumentCache::LoadDocuments(const std::vector& filenames, CachingStrategy cache_strategy, FileReader reader) { cache_strategy_ = cache_strategy; diff --git a/src/ccstruct/imagedata.h b/src/ccstruct/imagedata.h index cc793138..4c253cbd 100644 --- a/src/ccstruct/imagedata.h +++ b/src/ccstruct/imagedata.h @@ -181,9 +181,9 @@ class ImageData { // Adds the supplied boxes and transcriptions that correspond to the correct // page number. - void AddBoxes(const GenericVector& boxes, - const GenericVector& texts, - const GenericVector& box_pages); + void AddBoxes(const std::vector& boxes, + const std::vector& texts, + const std::vector& box_pages); private: // Saves the given Pix as a PNG-encoded string and destroys it. @@ -335,7 +335,7 @@ class DocumentCache { } // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. - bool LoadDocuments(const GenericVector& filenames, + bool LoadDocuments(const std::vector& filenames, CachingStrategy cache_strategy, FileReader reader); // Adds document to the cache. diff --git a/src/ccstruct/matrix.h b/src/ccstruct/matrix.h index 6440918e..a1494a76 100644 --- a/src/ccstruct/matrix.h +++ b/src/ccstruct/matrix.h @@ -152,7 +152,7 @@ class GENERIC_2D_ARRAY { return tesseract::Serialize(fp, &array_[0], size); } - bool Serialize(tesseract::TFile* fp) const { + bool Serialize(TFile* fp) const { if (!SerializeSize(fp)) return false; if (!fp->Serialize(&empty_)) return false; int size = num_elements(); @@ -175,7 +175,7 @@ class GENERIC_2D_ARRAY { return true; } - bool DeSerialize(tesseract::TFile* fp) { + bool DeSerialize(TFile* fp) { return DeSerializeSize(fp) && fp->DeSerialize(&empty_) && fp->DeSerialize(&array_[0], num_elements()); @@ -473,7 +473,7 @@ class GENERIC_2D_ARRAY { size = dim2_; return tesseract::Serialize(fp, &size); } - bool SerializeSize(tesseract::TFile* fp) const { + bool SerializeSize(TFile* fp) const { uint32_t size = dim1_; if (!fp->Serialize(&size)) return false; size = dim2_; @@ -495,7 +495,7 @@ class GENERIC_2D_ARRAY { Resize(size1, size2, empty_); return true; } - bool DeSerializeSize(tesseract::TFile* fp) { + bool DeSerializeSize(TFile* fp) { int32_t size1, size2; if (!fp->DeSerialize(&size1)) return false; if (!fp->DeSerialize(&size2)) return false; @@ -639,7 +639,7 @@ struct MATRIX_COORD { }; // The MatrixCoordPair contains a MATRIX_COORD and its priority. -using MatrixCoordPair = tesseract::KDPairInc; +using MatrixCoordPair = KDPairInc; } // namespace tesseract diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h index 3483472e..84a194b4 100644 --- a/src/ccutil/genericvector.h +++ b/src/ccutil/genericvector.h @@ -29,7 +29,6 @@ #include #include #include // for std::function -#include namespace tesseract { @@ -37,48 +36,64 @@ namespace tesseract { // provides automatic deletion of pointers, [De]Serialize that works, and // sort that works. template -class GenericVector : public std::vector { - using base = std::vector; +class GenericVector { public: - using std::vector::vector; - - using base::begin; - using base::end; - using base::data; - using base::capacity; - using base::reserve; - using base::resize; - using base::back; - using base::clear; - using base::push_back; + GenericVector() { + init(kDefaultVectorSize); + } + GenericVector(int size, const T& init_val) { + init(size); + init_to_size(size, init_val); + } + // Copy + GenericVector(const GenericVector& other) { + this->init(other.size()); + this->operator+=(other); + } GenericVector& operator+=(const GenericVector& other); + GenericVector& operator=(const GenericVector& other); + ~GenericVector(); + + // Reserve some memory. + void reserve(int size); // Double the size of the internal array. void double_the_size(); // Resizes to size and sets all values to t. void init_to_size(int size, const T& t); + void resize(int size, const T& t); // Resizes to size without any initialization. void resize_no_init(int size) { - resize(size); + reserve(size); + size_used_ = size; } + // Return the size used. + int size() const { + return size_used_; + } // Workaround to avoid g++ -Wsign-compare warnings. size_t unsigned_size() const { - return size(); + static_assert(sizeof(size_used_) <= sizeof(size_t), + "Wow! sizeof(size_t) < sizeof(int32_t)!!"); + assert(0 <= size_used_); + return static_cast(size_used_); } int size_reserved() const { - return capacity(); + return size_reserved_; } - int size() const { - return base::size(); + // Return true if empty. + bool empty() const { + return size_used_ == 0; } // Return the object from an index. - T& get(int index); - const T& get(int index) const; + T& get(int index) const; + T& back() const; + T& operator[](int index) const; // Returns the last object and removes it. T pop_back(); @@ -94,6 +109,7 @@ class GenericVector : public std::vector { T contains_index(int index) const; // Push an element in the end of the array + int push_back(T object); void operator+=(const T& t); // Push an element in the end of the array if the same @@ -117,7 +133,9 @@ class GenericVector : public std::vector { // Truncates the array to the given size by removing the end. // If the current size is less, the array is not expanded. void truncate(int size) { - resize(size); + if (size < size_used_) { + size_used_ = size; + } } // Add a callback to be called to delete the elements when the array took @@ -132,7 +150,13 @@ class GenericVector : public std::vector { compare_cb_ = cb; } - // Delete objects pointed to by data()[i] + // Clear the array, calling the clear callback function if any. + // All the owned callbacks are also deleted. + // If you don't want the callbacks to be deleted, before calling clear, set + // the callback to nullptr. + void clear(); + + // Delete objects pointed to by data_[i] void delete_data_pointers(); // This method clears the current object, then, does a shallow copy of @@ -192,8 +216,8 @@ class GenericVector : public std::vector { // Reverses the elements of the vector. void reverse() { - for (int i = 0; i < size() / 2; ++i) { - Swap(&data()[i], &data()[size() - 1 - i]); + for (int i = 0; i < size_used_ / 2; ++i) { + Swap(&data_[i], &data_[size_used_ - 1 - i]); } } @@ -209,7 +233,7 @@ class GenericVector : public std::vector { // to two Ts and returns negative if the first element is to appear earlier // in the result and positive if it is to appear later, with 0 for equal. void sort(int (*comparator)(const void*, const void*)) { - qsort(data(), size(), sizeof(*data()), comparator); + qsort(data_, size_used_, sizeof(*data_), comparator); } // Searches the array (assuming sorted in ascending order, using sort()) for @@ -217,23 +241,23 @@ class GenericVector : public std::vector { // Use binary_search to get the index of target, or its nearest candidate. bool bool_binary_search(const T& target) const { int index = binary_search(target); - if (index >= size()) { + if (index >= size_used_) { return false; } - return data()[index] == target; + return data_[index] == target; } // Searches the array (assuming sorted in ascending order, using sort()) for // an element equal to target and returns the index of the best candidate. // The return value is conceptually the largest index i such that - // data()[i] <= target or 0 if target < the whole vector. + // data_[i] <= target or 0 if target < the whole vector. // NOTE that this function uses operator> so really the return value is - // the largest index i such that data()[i] > target is false. + // the largest index i such that data_[i] > target is false. int binary_search(const T& target) const { int bottom = 0; - int top = size(); + int top = size_used_; while (top - bottom > 1) { int middle = (bottom + top) / 2; - if (data()[middle] > target) { + if (data_[middle] > target) { top = middle; } else { bottom = middle; @@ -245,20 +269,20 @@ class GenericVector : public std::vector { // Compact the vector by deleting elements using operator!= on basic types. // The vector must be sorted. void compact_sorted() { - if (size() == 0) { + if (size_used_ == 0) { return; } // First element is in no matter what, hence the i = 1. int last_write = 0; - for (int i = 1; i < size(); ++i) { + for (int i = 1; i < size_used_; ++i) { // Finds next unique item and writes it. - if (data()[last_write] != data()[i]) { - data()[++last_write] = data()[i]; + if (data_[last_write] != data_[i]) { + data_[++last_write] = data_[i]; } } // last_write is the index of a valid data cell, so add 1. - resize(last_write + 1); + size_used_ = last_write + 1; } // Returns the index of what would be the target_index_th item in the array @@ -269,26 +293,26 @@ class GenericVector : public std::vector { // Make sure target_index is legal. if (target_index < 0) { target_index = 0; // ensure legal - } else if (target_index >= size()) { - target_index = size() - 1; + } else if (target_index >= size_used_) { + target_index = size_used_ - 1; } unsigned int seed = 1; - return choose_nth_item(target_index, 0, size(), &seed); + return choose_nth_item(target_index, 0, size_used_, &seed); } // Swaps the elements with the given indices. void swap(int index1, int index2) { if (index1 != index2) { - T tmp = data()[index1]; - data()[index1] = data()[index2]; - data()[index2] = tmp; + T tmp = data_[index1]; + data_[index1] = data_[index2]; + data_[index2] = tmp; } } // Returns true if all elements of *this are within the given range. // Only uses operator< bool WithinBounds(const T& rangemin, const T& rangemax) const { - for (int i = 0; i < size(); ++i) { - if (data()[i] < rangemin || rangemax < data()[i]) { + for (int i = 0; i < size_used_; ++i) { + if (data_[i] < rangemin || rangemax < data_[i]) { return false; } } @@ -306,18 +330,47 @@ class GenericVector : public std::vector { // vector are small enough that for efficiency it makes sense // to start with a larger initial size. static const int kDefaultVectorSize = 4; + int32_t size_used_{}; + int32_t size_reserved_{}; + T* data_; std::function clear_cb_; std::function compare_cb_; }; -#if defined(_MSC_VER) || defined(__APPLE__) -// MSVC stl does not have ::data() in vector, -// so we add custom specialization. -// On Apple there are also errors when using std::vector, -// so we replace it with vector as a workaround. -template <> -class GenericVector : public std::vector {}; -#endif +// The default FileReader loads the whole file into the vector of char, +// returning false on error. +inline bool LoadDataFromFile(const char* filename, GenericVector* data) { + bool result = false; + FILE* fp = fopen(filename, "rb"); + if (fp != nullptr) { + fseek(fp, 0, SEEK_END); + auto size = std::ftell(fp); + fseek(fp, 0, SEEK_SET); + // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here. + if (size > 0 && size < LONG_MAX) { + // reserve an extra byte in case caller wants to append a '\0' character + data->reserve(size + 1); + data->resize_no_init(size); + result = static_cast(fread(&(*data)[0], 1, size, fp)) == size; + } + fclose(fp); + } + return result; +} + +// The default FileWriter writes the vector of char to the filename file, +// returning false on error. +inline bool SaveDataToFile(const GenericVector& data, + const char* filename) { + FILE* fp = fopen(filename, "wb"); + if (fp == nullptr) { + return false; + } + bool result = + static_cast(fwrite(&data[0], 1, data.size(), fp)) == data.size(); + fclose(fp); + return result; +} template bool cmp_eq(T const& t1, T const& t2) { @@ -378,9 +431,9 @@ class PointerVector : public GenericVector { this->operator+=(other); } PointerVector& operator+=(const PointerVector& other) { - this->reserve(this->size() + other.size()); + this->reserve(this->size_used_ + other.size_used_); for (int i = 0; i < other.size(); ++i) { - this->push_back(new T(*other.data()[i])); + this->push_back(new T(*other.data_[i])); } return *this; } @@ -396,15 +449,15 @@ class PointerVector : public GenericVector { // Removes an element at the given index and // shifts the remaining elements to the left. void remove(int index) { - delete GenericVector::data()[index]; + delete GenericVector::data_[index]; GenericVector::remove(index); } // Truncates the array to the given size by removing the end. // If the current size is less, the array is not expanded. void truncate(int size) { - for (int i = size; i < GenericVector::size(); ++i) { - delete GenericVector::data()[i]; + for (int i = size; i < GenericVector::size_used_; ++i) { + delete GenericVector::data_[i]; } GenericVector::truncate(size); } @@ -415,20 +468,20 @@ class PointerVector : public GenericVector { int new_size = 0; int old_index = 0; // Until the callback returns true, the elements stay the same. - while (old_index < GenericVector::size() && - !delete_cb(GenericVector::data()[old_index++])) { + while (old_index < GenericVector::size_used_ && + !delete_cb(GenericVector::data_[old_index++])) { ++new_size; } // Now just copy anything else that gets false from delete_cb. - for (; old_index < GenericVector::size(); ++old_index) { - if (!delete_cb(GenericVector::data()[old_index])) { - GenericVector::data()[new_size++] = - GenericVector::data()[old_index]; + for (; old_index < GenericVector::size_used_; ++old_index) { + if (!delete_cb(GenericVector::data_[old_index])) { + GenericVector::data_[new_size++] = + GenericVector::data_[old_index]; } else { - delete GenericVector::data()[old_index]; + delete GenericVector::data_[old_index]; } } - GenericVector::resize(new_size); + GenericVector::size_used_ = new_size; } // Clear the array, calling the clear callback function if any. @@ -446,32 +499,32 @@ class PointerVector : public GenericVector { // normal GenericVector of those. // Returns false in case of error. bool Serialize(FILE* fp) const { - int32_t used = GenericVector::size(); + int32_t used = GenericVector::size_used_; if (fwrite(&used, sizeof(used), 1, fp) != 1) { return false; } for (int i = 0; i < used; ++i) { - int8_t non_null = GenericVector::data()[i] != nullptr; + int8_t non_null = GenericVector::data_[i] != nullptr; if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) { return false; } - if (non_null && !GenericVector::data()[i]->Serialize(fp)) { + if (non_null && !GenericVector::data_[i]->Serialize(fp)) { return false; } } return true; } bool Serialize(TFile* fp) const { - int32_t used = GenericVector::size(); + int32_t used = GenericVector::size_used_; if (fp->FWrite(&used, sizeof(used), 1) != 1) { return false; } for (int i = 0; i < used; ++i) { - int8_t non_null = GenericVector::data()[i] != nullptr; + int8_t non_null = GenericVector::data_[i] != nullptr; if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) { return false; } - if (non_null && !GenericVector::data()[i]->Serialize(fp)) { + if (non_null && !GenericVector::data_[i]->Serialize(fp)) { return false; } } @@ -599,52 +652,99 @@ class GenericVectorEqEq : public GenericVector { template void GenericVector::init(int size) { - clear(); - resize(size); + size_used_ = 0; + if (size <= 0) { + data_ = nullptr; + size_reserved_ = 0; + } else { + if (size < kDefaultVectorSize) { + size = kDefaultVectorSize; + } + data_ = new T[size]; + size_reserved_ = size; + } + clear_cb_ = nullptr; + compare_cb_ = nullptr; +} + +template +GenericVector::~GenericVector() { + clear(); +} + +// Reserve some memory. If the internal array contains elements, they are +// copied. +template +void GenericVector::reserve(int size) { + if (size_reserved_ >= size || size <= 0) { + return; + } + if (size < kDefaultVectorSize) { + size = kDefaultVectorSize; + } + T* new_array = new T[size]; + for (int i = 0; i < size_used_; ++i) { + new_array[i] = data_[i]; + } + delete[] data_; + data_ = new_array; + size_reserved_ = size; } template void GenericVector::double_the_size() { - if (capacity() == 0) { + if (size_reserved_ == 0) { reserve(kDefaultVectorSize); } else { - reserve(2 * capacity()); + reserve(2 * size_reserved_); } } // Resizes to size and sets all values to t. template void GenericVector::init_to_size(int size, const T& t) { - resize(size, t); + reserve(size); + size_used_ = size; + for (int i = 0; i < size; ++i) { + data_[i] = t; + } +} + +template +void GenericVector::resize(int size, const T& t) { + init_to_size(size, t); } // Return the object from an index. template -T& GenericVector::get(int index) { - assert(index >= 0 && index < size()); - return data()[index]; +T& GenericVector::get(int index) const { + assert(index >= 0 && index < size_used_); + return data_[index]; } -// Return the object from an index. template -const T& GenericVector::get(int index) const { - assert(index >= 0 && index < size()); - return data()[index]; +T& GenericVector::operator[](int index) const { + assert(index >= 0 && index < size_used_); + return data_[index]; } +template +T& GenericVector::back() const { + assert(size_used_ > 0); + return data_[size_used_ - 1]; +} // Returns the last object and removes it. template T GenericVector::pop_back() { - auto b = back(); - base::pop_back(); - return b; + assert(size_used_ > 0); + return data_[--size_used_]; } // Return the object from an index. template void GenericVector::set(const T& t, int index) { - assert(index >= 0 && index < size()); - data()[index] = t; + assert(index >= 0 && index < size_used_); + data_[index] = t; } // Shifts the rest of the elements to the right to make @@ -652,32 +752,40 @@ void GenericVector::set(const T& t, int index) { // at the specified index. template void GenericVector::insert(const T& t, int index) { - base::insert(begin() + index, t); + assert(index >= 0 && index <= size_used_); + if (size_reserved_ == size_used_) { + double_the_size(); + } + for (int i = size_used_; i > index; --i) { + data_[i] = data_[i - 1]; + } + data_[index] = t; + size_used_++; } // Removes an element at the given index and // shifts the remaining elements to the left. template void GenericVector::remove(int index) { - assert(index >= 0 && index < size()); - for (int i = index; i < size() - 1; ++i) { - data()[i] = data()[i + 1]; + assert(index >= 0 && index < size_used_); + for (int i = index; i < size_used_ - 1; ++i) { + data_[i] = data_[i + 1]; } - resize(size() - 1); + size_used_--; } // Return true if the index is valindex template T GenericVector::contains_index(int index) const { - return index >= 0 && index < size(); + return index >= 0 && index < size_used_; } // Return the index of the T object. template int GenericVector::get_index(const T& object) const { - for (int i = 0; i < size(); ++i) { + for (int i = 0; i < size_used_; ++i) { assert(compare_cb_ != nullptr); - if (compare_cb_(object, data()[i])) { + if (compare_cb_(object, data_[i])) { return i; } } @@ -690,20 +798,38 @@ bool GenericVector::contains(const T& object) const { return get_index(object) != -1; } +// Add an element in the array +template +int GenericVector::push_back(T object) { + int index = 0; + if (size_used_ == size_reserved_) { + double_the_size(); + } + index = size_used_++; + data_[index] = object; + return index; +} + template int GenericVector::push_back_new(const T& object) { int index = get_index(object); if (index >= 0) { return index; } - push_back(object); - return size(); + return push_back(object); } // Add an element in the array (front) template int GenericVector::push_front(const T& object) { - insert(begin(), object); + if (size_used_ == size_reserved_) { + double_the_size(); + } + for (int i = size_used_; i > 0; --i) { + data_[i] = data_[i - 1]; + } + data_[0] = object; + ++size_used_; return 0; } @@ -714,39 +840,62 @@ void GenericVector::operator+=(const T& t) { template GenericVector& GenericVector::operator+=(const GenericVector& other) { - this->reserve(size() + other.size()); + this->reserve(size_used_ + other.size_used_); for (int i = 0; i < other.size(); ++i) { - this->operator+=(other.data()[i]); + this->operator+=(other.data_[i]); } return *this; } +template +GenericVector& GenericVector::operator=(const GenericVector& other) { + if (&other != this) { + this->truncate(0); + this->operator+=(other); + } + return *this; +} + +// Clear the array, calling the callback function if any. +template +void GenericVector::clear() { + if (size_reserved_ > 0 && clear_cb_ != nullptr) { + for (int i = 0; i < size_used_; ++i) { + clear_cb_(data_[i]); + } + } + delete[] data_; + data_ = nullptr; + size_used_ = 0; + size_reserved_ = 0; + clear_cb_ = nullptr; + compare_cb_ = nullptr; +} + template void GenericVector::delete_data_pointers() { - for (int i = 0; i < size(); ++i) { - delete data()[i]; + for (int i = 0; i < size_used_; ++i) { + delete data_[i]; } } template bool GenericVector::write(FILE* f, std::function cb) const { - int32_t cp = capacity(); - if (fwrite(&cp, sizeof(cp), 1, f) != 1) { + if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) { return false; } - int32_t sz = size(); - if (fwrite(&sz, sizeof(sz), 1, f) != 1) { + if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) { return false; } if (cb != nullptr) { - for (int i = 0; i < size(); ++i) { - if (!cb(f, data()[i])) { + for (int i = 0; i < size_used_; ++i) { + if (!cb(f, data_[i])) { return false; } } } else { - if (fwrite(data(), sizeof(T), size(), f) != unsigned_size()) { + if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) { return false; } } @@ -756,23 +905,22 @@ bool GenericVector::write(FILE* f, template bool GenericVector::read(TFile* f, std::function cb) { - int32_t reserved, size; + int32_t reserved; if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) { return false; } reserve(reserved); - if (f->FReadEndian(&size, sizeof(size), 1) != 1) { + if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) { return false; } - resize(size); if (cb != nullptr) { - for (int i = 0; i < size; ++i) { - if (!cb(f, data() + i)) { + for (int i = 0; i < size_used_; ++i) { + if (!cb(f, data_ + i)) { return false; } } } else { - if (f->FReadEndian(data(), sizeof(T), size) != size) { + if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) { return false; } } @@ -783,22 +931,20 @@ bool GenericVector::read(TFile* f, // read/write of T will work. Returns false in case of error. template bool GenericVector::Serialize(FILE* fp) const { - int32_t sz = size(); - if (fwrite(&sz, sizeof(sz), 1, fp) != 1) { + if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) { return false; } - if (fwrite(data(), sizeof(T), sz, fp) != unsigned_size()) { + if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) { return false; } return true; } template bool GenericVector::Serialize(TFile* fp) const { - int32_t sz = size(); - if (fp->FWrite(&sz, sizeof(sz), 1) != 1) { + if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) { return false; } - if (fp->FWrite(data(), sizeof(T), sz) != sz) { + if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) { return false; } return true; @@ -822,13 +968,14 @@ bool GenericVector::DeSerialize(bool swap, FILE* fp) { if (reserved > UINT16_MAX) { return false; } - resize(reserved); - if (fread(data(), sizeof(T), size(), fp) != unsigned_size()) { + reserve(reserved); + size_used_ = reserved; + if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) { return false; } if (swap) { - for (int i = 0; i < size(); ++i) { - ReverseN(&data()[i], sizeof(data()[i])); + for (int i = 0; i < size_used_; ++i) { + ReverseN(&data_[i], sizeof(data_[i])); } } return true; @@ -845,8 +992,9 @@ bool GenericVector::DeSerialize(TFile* fp) { if (reserved > limit) { return false; } - resize(reserved); - return fp->FReadEndian(data(), sizeof(T), size()) == size(); + reserve(reserved); + size_used_ = reserved; + return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_; } template bool GenericVector::SkipDeSerialize(TFile* fp) { @@ -862,12 +1010,11 @@ bool GenericVector::SkipDeSerialize(TFile* fp) { // Returns false in case of error. template bool GenericVector::SerializeClasses(FILE* fp) const { - int32_t sz = size(); - if (fwrite(&sz, sizeof(sz), 1, fp) != 1) { + if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) { return false; } - for (int i = 0; i < sz; ++i) { - if (!data()[i].Serialize(fp)) { + for (int i = 0; i < size_used_; ++i) { + if (!data_[i].Serialize(fp)) { return false; } } @@ -875,12 +1022,11 @@ bool GenericVector::SerializeClasses(FILE* fp) const { } template bool GenericVector::SerializeClasses(TFile* fp) const { - int32_t sz = size(); - if (fp->FWrite(&sz, sizeof(sz), 1) != 1) { + if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) { return false; } - for (int i = 0; i < sz; ++i) { - if (!data()[i].Serialize(fp)) { + for (int i = 0; i < size_used_; ++i) { + if (!data_[i].Serialize(fp)) { return false; } } @@ -904,7 +1050,7 @@ bool GenericVector::DeSerializeClasses(bool swap, FILE* fp) { T empty; init_to_size(reserved, empty); for (int i = 0; i < reserved; ++i) { - if (!data()[i].DeSerialize(swap, fp)) { + if (!data_[i].DeSerialize(swap, fp)) { return false; } } @@ -919,7 +1065,7 @@ bool GenericVector::DeSerializeClasses(TFile* fp) { T empty; init_to_size(reserved, empty); for (int i = 0; i < reserved; ++i) { - if (!data()[i].DeSerialize(fp)) { + if (!data_[i].DeSerialize(fp)) { return false; } } @@ -943,7 +1089,17 @@ bool GenericVector::SkipDeSerializeClasses(TFile* fp) { // its argument, and finally invalidates its argument. template void GenericVector::move(GenericVector* from) { - *this = std::move(*from); + this->clear(); + this->data_ = from->data_; + this->size_reserved_ = from->size_reserved_; + this->size_used_ = from->size_used_; + this->compare_cb_ = from->compare_cb_; + this->clear_cb_ = from->clear_cb_; + from->data_ = nullptr; + from->clear_cb_ = nullptr; + from->compare_cb_ = nullptr; + from->size_used_ = 0; + from->size_reserved_ = 0; } template @@ -974,7 +1130,7 @@ int GenericVector::choose_nth_item(int target_index, int start, int end, return start; } if (num_elements == 2) { - if (data()[start] < data()[start + 1]) { + if (data_[start] < data_[start + 1]) { return target_index > start ? start + 1 : start; } return target_index > start ? start : start + 1; @@ -993,9 +1149,9 @@ int GenericVector::choose_nth_item(int target_index, int start, int end, int next_lesser = start; int prev_greater = end; for (int next_sample = start + 1; next_sample < prev_greater;) { - if (data()[next_sample] < data()[next_lesser]) { + if (data_[next_sample] < data_[next_lesser]) { swap(next_lesser++, next_sample++); - } else if (data()[next_sample] == data()[next_lesser]) { + } else if (data_[next_sample] == data_[next_lesser]) { ++next_sample; } else { swap(--prev_greater, next_sample); diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp index 2721e9c3..fecce7c6 100644 --- a/src/ccutil/serialis.cpp +++ b/src/ccutil/serialis.cpp @@ -133,6 +133,16 @@ TFile::~TFile() { delete data_; } +bool TFile::DeSerialize(std::vector& data) { + uint32_t size; + if (!DeSerialize(&size)) { + return false; + } + // TODO: optimize. + data.resize(size); + return DeSerialize(&data[0], data.size()); +} + bool TFile::DeSerialize(char* buffer, size_t count) { return FRead(buffer, sizeof(*buffer), count) == count; } @@ -177,6 +187,14 @@ bool TFile::DeSerialize(uint64_t* buffer, size_t count) { return FReadEndian(buffer, sizeof(*buffer), count) == count; } +bool TFile::Serialize(const std::vector& data) { + uint32_t size = data.size(); + if (!Serialize(&size)) { + return false; + } + return Serialize(&data[0], size); +} + bool TFile::Serialize(const char* buffer, size_t count) { return FWrite(buffer, sizeof(*buffer), count) == count; } diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h index 9b3f4c8b..e38777f4 100644 --- a/src/ccutil/serialis.h +++ b/src/ccutil/serialis.h @@ -91,6 +91,7 @@ class TFile { } // Deserialize data. + bool DeSerialize(std::vector& data); bool DeSerialize(char* data, size_t count = 1); bool DeSerialize(double* data, size_t count = 1); bool DeSerialize(float* data, size_t count = 1); @@ -104,6 +105,7 @@ class TFile { bool DeSerialize(uint64_t* data, size_t count = 1); // Serialize data. + bool Serialize(const std::vector& data); bool Serialize(const char* data, size_t count = 1); bool Serialize(const double* data, size_t count = 1); bool Serialize(const float* data, size_t count = 1); diff --git a/src/ccutil/tessdatamanager.cpp b/src/ccutil/tessdatamanager.cpp index 41b1f924..a6ac8ea3 100644 --- a/src/ccutil/tessdatamanager.cpp +++ b/src/ccutil/tessdatamanager.cpp @@ -96,7 +96,7 @@ bool TessdataManager::LoadArchiveFile(const char *filename) { #endif bool TessdataManager::Init(const char *data_file_name) { - GenericVector data; + std::vector data; if (reader_ == nullptr) { #if defined(HAVE_LIBARCHIVE) if (LoadArchiveFile(data_file_name)) return true; @@ -155,7 +155,7 @@ bool TessdataManager::SaveFile(const char* filename, FileWriter writer) const { // TODO: This method supports only the proprietary file format. ASSERT_HOST(is_loaded_); - GenericVector data; + std::vector data; Serialize(&data); if (writer == nullptr) return SaveDataToFile(data, filename); @@ -164,7 +164,7 @@ bool TessdataManager::SaveFile(const char* filename, } // Serializes to the given vector. -void TessdataManager::Serialize(GenericVector *data) const { +void TessdataManager::Serialize(std::vector *data) const { // TODO: This method supports only the proprietary file format. ASSERT_HOST(is_loaded_); // Compute the offset_table and total size. @@ -178,7 +178,7 @@ void TessdataManager::Serialize(GenericVector *data) const { offset += entries_[i].size(); } } - data->init_to_size(offset, 0); + data->resize(offset, 0); int32_t num_entries = TESSDATA_NUM_ENTRIES; TFile fp; fp.OpenWrite(data); diff --git a/src/ccutil/tessdatamanager.h b/src/ccutil/tessdatamanager.h index dbb2740b..8eb93526 100644 --- a/src/ccutil/tessdatamanager.h +++ b/src/ccutil/tessdatamanager.h @@ -151,7 +151,7 @@ class TessdataManager { // Saves to the given filename. bool SaveFile(const char* filename, FileWriter writer) const; // Serializes to the given vector. - void Serialize(GenericVector *data) const; + void Serialize(std::vector *data) const; // Resets to the initial state, keeping the reader. void Clear(); diff --git a/src/classify/shapetable.cpp b/src/classify/shapetable.cpp index f6371eef..bf230086 100644 --- a/src/classify/shapetable.cpp +++ b/src/classify/shapetable.cpp @@ -716,8 +716,8 @@ int ShapeTable::AddUnicharToResults( int result_index = unichar_map->get(unichar_id); if (result_index < 0) { UnicharRating result(unichar_id, rating); - results->push_back(result); result_index = results->size(); + results->push_back(result); (*unichar_map)[unichar_id] = result_index; } return result_index; diff --git a/src/dict/dawg.h b/src/dict/dawg.h index f7c3088b..119643c0 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -377,7 +377,7 @@ class DawgPositionVector : public GenericVector { bool debug, const char *debug_msg) { for (int i = 0; i < size(); ++i) { - if (data()[i] == new_pos) return false; + if (data_[i] == new_pos) return false; } push_back(new_pos); if (debug) { diff --git a/src/training/combine_tessdata.cpp b/src/training/combine_tessdata.cpp index 51c4be5d..56b96ba9 100644 --- a/src/training/combine_tessdata.cpp +++ b/src/training/combine_tessdata.cpp @@ -162,7 +162,7 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } recognizer.ConvertToInt(); - GenericVector lstm_data; + std::vector lstm_data; fp.OpenWrite(&lstm_data); ASSERT_HOST(recognizer.Serialize(&tm, &fp)); tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], diff --git a/src/training/lang_model_helpers.cpp b/src/training/lang_model_helpers.cpp index dd1059d9..79f85fc1 100644 --- a/src/training/lang_model_helpers.cpp +++ b/src/training/lang_model_helpers.cpp @@ -34,7 +34,7 @@ namespace tesseract { // can do its own thing. If lang is empty, returns true but does nothing. // NOTE that suffix should contain any required . for the filename. bool WriteFile(const std::string& output_dir, const std::string& lang, - const std::string& suffix, const GenericVector& data, + const std::string& suffix, const std::vector& data, FileWriter writer) { if (lang.empty()) return true; std::string dirname = output_dir + "/" + lang; @@ -56,7 +56,7 @@ bool WriteFile(const std::string& output_dir, const std::string& lang, // On failure emits a warning message and returns and empty STRING. STRING ReadFile(const std::string& filename, FileReader reader) { if (filename.empty()) return STRING(); - GenericVector data; + std::vector data; bool read_result; if (reader == nullptr) read_result = LoadDataFromFile(filename.c_str(), &data); @@ -71,7 +71,7 @@ STRING ReadFile(const std::string& filename, FileReader reader) { bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir, const std::string& lang, FileWriter writer, TessdataManager* traineddata) { - GenericVector unicharset_data; + std::vector unicharset_data; TFile fp; fp.OpenWrite(&unicharset_data); if (!unicharset.save_to_file(&fp)) return false; @@ -107,13 +107,13 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through, } } TFile fp; - GenericVector recoder_data; + std::vector recoder_data; fp.OpenWrite(&recoder_data); if (!recoder.Serialize(&fp)) return false; traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0], recoder_data.size()); STRING encoding = recoder.GetEncodingAsString(unicharset); - recoder_data.init_to_size(encoding.length(), 0); + recoder_data.resize(encoding.length(), 0); memcpy(&recoder_data[0], &encoding[0], encoding.length()); STRING suffix; suffix.add_str_int(".charset_size=", recoder.code_range()); @@ -134,7 +134,7 @@ static bool WriteDawg(const std::vector& words, std::unique_ptr dawg(trie.trie_to_dawg()); if (dawg == nullptr || dawg->NumEdges() == 0) return false; TFile fp; - GenericVector dawg_data; + std::vector dawg_data; fp.OpenWrite(&dawg_data); if (!dawg->write_squished_dawg(&fp)) return false; traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size()); @@ -228,7 +228,7 @@ int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir } // Traineddata file. - GenericVector traineddata_data; + std::vector traineddata_data; traineddata.Serialize(&traineddata_data); if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) { tprintf("Error writing output traineddata file!!\n"); diff --git a/src/training/lang_model_helpers.h b/src/training/lang_model_helpers.h index b966a11a..545f997c 100644 --- a/src/training/lang_model_helpers.h +++ b/src/training/lang_model_helpers.h @@ -30,7 +30,7 @@ namespace tesseract { // can do its own thing. If lang is empty, returns true but does nothing. // NOTE that suffix should contain any required . for the filename. bool WriteFile(const std::string& output_dir, const std::string& lang, - const std::string& suffix, const GenericVector& data, + const std::string& suffix, const std::vector& data, FileWriter writer); // Helper reads a file with optional reader and returns a STRING. // On failure emits a warning message and returns and empty STRING. diff --git a/src/training/lstmtester.cpp b/src/training/lstmtester.cpp index afe167bf..97673fff 100644 --- a/src/training/lstmtester.cpp +++ b/src/training/lstmtester.cpp @@ -29,7 +29,7 @@ LSTMTester::LSTMTester(int64_t max_memory) // tesseract into memory ready for testing. Returns false if nothing was // loaded. The arg is a filename of a file that lists the filenames. bool LSTMTester::LoadAllEvalData(const char* filenames_file) { - GenericVector filenames; + std::vector filenames; if (!LoadFileLinesToStrings(filenames_file, &filenames)) { tprintf("Failed to load list of eval filenames from %s\n", filenames_file); @@ -41,7 +41,7 @@ bool LSTMTester::LoadAllEvalData(const char* filenames_file) { // Loads a set of lstmf files that were created using the lstm.train config to // tesseract into memory ready for testing. Returns false if nothing was // loaded. -bool LSTMTester::LoadAllEvalData(const GenericVector& filenames) { +bool LSTMTester::LoadAllEvalData(const std::vector& filenames) { test_data_.Clear(); bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr); total_pages_ = test_data_.TotalPages(); diff --git a/src/training/lstmtester.h b/src/training/lstmtester.h index b73658f0..48f2fd6a 100644 --- a/src/training/lstmtester.h +++ b/src/training/lstmtester.h @@ -38,7 +38,7 @@ class LSTMTester { // Loads a set of lstmf files that were created using the lstm.train config to // tesseract into memory ready for testing. Returns false if nothing was // loaded. - bool LoadAllEvalData(const GenericVector& filenames); + bool LoadAllEvalData(const std::vector& filenames); // Runs an evaluation asynchronously on the stored eval data and returns a // string describing the results of the previous test. Args match TestCallback diff --git a/src/training/lstmtrainer.cpp b/src/training/lstmtrainer.cpp index 0b06bd6c..6f13d5c2 100644 --- a/src/training/lstmtrainer.cpp +++ b/src/training/lstmtrainer.cpp @@ -267,7 +267,7 @@ void LSTMTrainer::DebugNetwork() { // Loads a set of lstmf files that were created using the lstm.train config to // tesseract into memory ready for training. Returns false if nothing was // loaded. -bool LSTMTrainer::LoadAllTrainingData(const GenericVector& filenames, +bool LSTMTrainer::LoadAllTrainingData(const std::vector& filenames, CachingStrategy cache_strategy, bool randomly_rotate) { randomly_rotate_ = randomly_rotate; @@ -302,7 +302,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) { } } bool result = true; // Something interesting happened. - GenericVector rec_model_data; + std::vector rec_model_data; if (error_rate < best_error_rate_) { SaveRecognitionDump(&rec_model_data); log_msg->add_str_double(" New best char error = ", error_rate); @@ -335,7 +335,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) { // Error rate has ballooned. Go back to the best model. *log_msg += "\nDivergence! "; // Copy best_trainer_ before reading it, as it will get overwritten. - GenericVector revert_data(best_trainer_); + std::vector revert_data(best_trainer_); if (ReadTrainingDump(revert_data, this)) { LogIterations("Reverted to", log_msg); ReduceLearningRates(this, log_msg); @@ -354,7 +354,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) { } if (checkpoint_name_.length() > 0) { // Write a current checkpoint. - GenericVector checkpoint; + std::vector checkpoint; if (!SaveTrainingDump(FULL, this, &checkpoint) || !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) { *log_msg += " failed to write checkpoint."; @@ -420,14 +420,14 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false; if (!fp->Serialize(&worst_iteration_)) return false; if (!fp->Serialize(&stall_iteration_)) return false; - if (!best_model_data_.Serialize(fp)) return false; - if (!worst_model_data_.Serialize(fp)) return false; - if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp)) + if (!fp->Serialize(best_model_data_)) return false; + if (!fp->Serialize(worst_model_data_)) return false; + if (serialize_amount != NO_BEST_TRAINER && !fp->Serialize(best_trainer_)) return false; - GenericVector sub_data; + std::vector sub_data; if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data)) return false; - if (!sub_data.Serialize(fp)) return false; + if (!fp->Serialize(sub_data)) return false; if (!best_error_history_.Serialize(fp)) return false; if (!best_error_iterations_.Serialize(fp)) return false; return fp->Serialize(&improvement_steps_); @@ -464,11 +464,11 @@ bool LSTMTrainer::DeSerialize(const TessdataManager* mgr, TFile* fp) { if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false; if (!fp->DeSerialize(&worst_iteration_)) return false; if (!fp->DeSerialize(&stall_iteration_)) return false; - if (!best_model_data_.DeSerialize(fp)) return false; - if (!worst_model_data_.DeSerialize(fp)) return false; - if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false; - GenericVector sub_data; - if (!sub_data.DeSerialize(fp)) return false; + if (!fp->DeSerialize(best_model_data_)) return false; + if (!fp->DeSerialize(worst_model_data_)) return false; + if (amount != NO_BEST_TRAINER && !fp->DeSerialize(best_trainer_)) return false; + std::vector sub_data; + if (!fp->DeSerialize(sub_data)) return false; delete sub_trainer_; if (sub_data.empty()) { sub_trainer_ = nullptr; @@ -542,7 +542,7 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING* log_msg) { if (sub_error < best_error_rate_ && sub_margin >= kSubTrainerMarginFraction) { // The sub_trainer_ has won the race to a new best. Switch to it. - GenericVector updated_trainer; + std::vector updated_trainer; SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer); ReadTrainingDump(updated_trainer, this); log_msg->add_str_int(" Sub trainer wins at iteration ", @@ -594,7 +594,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples, ok_sums[i].init_to_size(num_layers, 0.0); } double momentum_factor = 1.0 / (1.0 - momentum_); - GenericVector orig_trainer; + std::vector orig_trainer; samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer); for (int i = 0; i < num_layers; ++i) { Network* layer = GetLayer(layers[i]); @@ -624,7 +624,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples, copy_trainer.TrainOnLine(samples_trainer, true); if (trainingdata == nullptr) continue; // We'll now use this trainer again for each layer. - GenericVector updated_trainer; + std::vector updated_trainer; samples_trainer->SaveTrainingDump(LIGHT, ©_trainer, &updated_trainer); for (int i = 0; i < num_layers; ++i) { if (num_weights[i] == 0) continue; @@ -871,7 +871,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData* trainingdata, // actually serialized. bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer* trainer, - GenericVector* data) const { + std::vector* data) const { TFile fp; fp.OpenWrite(data); return trainer->Serialize(serialize_amount, &mgr_, &fp); @@ -891,7 +891,7 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr, // Writes the full recognition traineddata to the given filename. bool LSTMTrainer::SaveTraineddata(const char* filename) { - GenericVector recognizer_data; + std::vector recognizer_data; SaveRecognitionDump(&recognizer_data); mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], recognizer_data.size()); @@ -899,7 +899,7 @@ bool LSTMTrainer::SaveTraineddata(const char* filename) { } // Writes the recognizer to memory, so that it can be used for testing later. -void LSTMTrainer::SaveRecognitionDump(GenericVector* data) const { +void LSTMTrainer::SaveRecognitionDump(std::vector* data) const { TFile fp; fp.OpenWrite(data); network_->SetEnableTraining(TS_TEMP_DISABLE); @@ -1260,7 +1260,7 @@ void LSTMTrainer::RollErrorBuffers() { // Tester is an externally supplied callback function that tests on some // data set with a given model and records the error rates in a graph. STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, - const GenericVector& model_data, + const std::vector& model_data, TestCallback tester) { if (error_rate > best_error_rate_ && iteration < best_iteration_ + kErrorGraphInterval) { @@ -1287,7 +1287,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, worst_model_data_.size()); result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage()); - worst_model_data_.truncate(0); + worst_model_data_.clear(); best_model_data_ = model_data; } best_error_rate_ = error_rate; @@ -1322,7 +1322,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, CurrentTrainingStage()); } if (result.length() > 0) - best_model_data_.truncate(0); + best_model_data_.clear(); worst_model_data_ = model_data; } } diff --git a/src/training/lstmtrainer.h b/src/training/lstmtrainer.h index 992ec2b1..fad939c9 100644 --- a/src/training/lstmtrainer.h +++ b/src/training/lstmtrainer.h @@ -135,7 +135,7 @@ class LSTMTrainer : public LSTMRecognizer { int learning_iteration() const { return learning_iteration_; } int32_t improvement_steps() const { return improvement_steps_; } void set_perfect_delay(int delay) { perfect_delay_ = delay; } - const GenericVector& best_trainer() const { return best_trainer_; } + const std::vector& best_trainer() const { return best_trainer_; } // Returns the error that was just calculated by PrepareForBackward. double NewSingleError(ErrorTypes type) const { return error_buffers_[type][training_iteration() % kRollingBufferSize_]; @@ -167,7 +167,7 @@ class LSTMTrainer : public LSTMRecognizer { // Loads a set of lstmf files that were created using the lstm.train config to // tesseract into memory ready for training. Returns false if nothing was // loaded. - bool LoadAllTrainingData(const GenericVector& filenames, + bool LoadAllTrainingData(const std::vector& filenames, CachingStrategy cache_strategy, bool randomly_rotate); @@ -269,7 +269,7 @@ class LSTMTrainer : public LSTMRecognizer { // actually serialized. bool SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer* trainer, - GenericVector* data) const; + std::vector* data) const; // Reads previously saved trainer from memory. *this must always be the // master trainer that retains the only copy of the training data and @@ -294,7 +294,7 @@ class LSTMTrainer : public LSTMRecognizer { bool SaveTraineddata(const char* filename); // Writes the recognizer to memory, so that it can be used for testing later. - void SaveRecognitionDump(GenericVector* data) const; + void SaveRecognitionDump(std::vector* data) const; // Returns a suitable filename for a training dump, based on the model_base_, // the iteration and the error rates. @@ -375,7 +375,7 @@ class LSTMTrainer : public LSTMRecognizer { // Given that error_rate is either a new min or max, updates the best/worst // error rates, and record of progress. STRING UpdateErrorGraph(int iteration, double error_rate, - const GenericVector& model_data, + const std::vector& model_data, TestCallback tester); protected: @@ -420,10 +420,10 @@ class LSTMTrainer : public LSTMRecognizer { // Iteration at which the process will be thought stalled. int stall_iteration_; // Saved recognition models for computing test error for graph points. - GenericVector best_model_data_; - GenericVector worst_model_data_; + std::vector best_model_data_; + std::vector worst_model_data_; // Saved trainer for reverting back to last known best. - GenericVector best_trainer_; + std::vector best_trainer_; // A subsidiary trainer running with a different learning rate until either // *this or sub_trainer_ hits a new best. LSTMTrainer* sub_trainer_; diff --git a/src/training/lstmtraining.cpp b/src/training/lstmtraining.cpp index 8a34ead4..8f17bba6 100644 --- a/src/training/lstmtraining.cpp +++ b/src/training/lstmtraining.cpp @@ -136,7 +136,7 @@ int main(int argc, char **argv) { tprintf("Must supply a list of training filenames! --train_listfile\n"); return EXIT_FAILURE; } - GenericVector filenames; + std::vector filenames; if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(), &filenames)) { tprintf("Failed to load list of training filenames from %s\n", diff --git a/src/wordrec/chopper.cpp b/src/wordrec/chopper.cpp index efddc565..593fee04 100644 --- a/src/wordrec/chopper.cpp +++ b/src/wordrec/chopper.cpp @@ -264,7 +264,7 @@ SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number, } -SEAM *Wordrec::chop_overlapping_blob(const GenericVector& boxes, +SEAM *Wordrec::chop_overlapping_blob(const std::vector& boxes, bool italic_blob, WERD_RES *word_res, int *blob_number) { TWERD *word = word_res->chopped_word; @@ -362,7 +362,7 @@ SEAM* Wordrec::improve_one_blob(const GenericVector& blob_choices, * the worst blobs and try to divide it up to improve the ratings. * Used for testing chopper. */ -SEAM* Wordrec::chop_one_blob(const GenericVector& boxes, +SEAM* Wordrec::chop_one_blob(const std::vector& boxes, const GenericVector& blob_choices, WERD_RES* word_res, int* blob_number) { diff --git a/src/wordrec/wordrec.h b/src/wordrec/wordrec.h index 470ea611..3bdaffa1 100644 --- a/src/wordrec/wordrec.h +++ b/src/wordrec/wordrec.h @@ -370,7 +370,7 @@ class Wordrec : public Classify { bool italic_blob, const GenericVector& seams); SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector& seams); - SEAM *chop_overlapping_blob(const GenericVector& boxes, + SEAM *chop_overlapping_blob(const std::vector& boxes, bool italic_blob, WERD_RES *word_res, int *blob_number); SEAM *improve_one_blob(const GenericVector &blob_choices, @@ -379,7 +379,7 @@ class Wordrec : public Classify { bool italic_blob, WERD_RES *word, int *blob_number); - SEAM *chop_one_blob(const GenericVector &boxes, + SEAM *chop_one_blob(const std::vector &boxes, const GenericVector &blob_choices, WERD_RES *word_res, int *blob_number); diff --git a/unittest/imagedata_test.cc b/unittest/imagedata_test.cc index 673334d0..8e17cf87 100644 --- a/unittest/imagedata_test.cc +++ b/unittest/imagedata_test.cc @@ -97,7 +97,7 @@ TEST_F(ImagedataTest, CachesMultiDocs) { // Number of pages in each document. const std::vector kNumPages = {6, 5, 7}; std::vector> page_texts; - GenericVector filenames; + std::vector filenames; for (size_t d = 0; d < kNumPages.size(); ++d) { page_texts.emplace_back(std::vector()); std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back()); diff --git a/unittest/lang_model_test.cc b/unittest/lang_model_test.cc index 3296ac06..b788f55e 100644 --- a/unittest/lang_model_test.cc +++ b/unittest/lang_model_test.cc @@ -45,7 +45,7 @@ TEST(LangModelTest, AddACharacter) { LOG(INFO) << "Output dir=" << output_dir << "\n"; std::string lang1 = "eng"; bool pass_through_recoder = false; - GenericVector words, puncs, numbers; + std::vector words, puncs, numbers; // If these reads fail, we get a warning message and an empty list of words. ReadFile(file::JoinPath(eng_dir, "eng.wordlist"), nullptr) .split('\n', &words); @@ -136,7 +136,7 @@ TEST(LangModelTest, AddACharacterHindi) { LOG(INFO) << "Output dir=" << output_dir << "\n"; std::string lang1 = "hin"; bool pass_through_recoder = false; - GenericVector words, puncs, numbers; + std::vector words, puncs, numbers; // If these reads fail, we get a warning message and an empty list of words. ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr) .split('\n', &words); diff --git a/unittest/lstm_test.cc b/unittest/lstm_test.cc index ccc639e2..69a07a58 100644 --- a/unittest/lstm_test.cc +++ b/unittest/lstm_test.cc @@ -114,7 +114,7 @@ TEST_F(LSTMTrainerTest, DeterminismTest) { double lstm_2d_err_a = TrainIterations(kTrainerIterations); double act_error_a = trainer_->ActivationError(); double char_error_a = trainer_->CharError(); - GenericVector trainer_a_data; + std::vector trainer_a_data; EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(), &trainer_a_data)); SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", diff --git a/unittest/lstm_test.h b/unittest/lstm_test.h index 1bfa2870..f4e05365 100644 --- a/unittest/lstm_test.h +++ b/unittest/lstm_test.h @@ -78,7 +78,7 @@ class LSTMTrainerTest : public testing::Test { ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false)); std::string script_dir = file::JoinPath( LANGDATA_DIR, ""); - GenericVector words; + std::vector words; EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, "", FLAGS_test_tmpdir, kLang, !recode, words, words, words, false, nullptr, nullptr)); @@ -95,7 +95,7 @@ class LSTMTrainerTest : public testing::Test { if (layer_specific) net_mode |= NF_LAYER_SPECIFIC_LR; EXPECT_TRUE(trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1, learning_rate, 0.9, 0.999)); - GenericVector filenames; + std::vector filenames; filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str())); EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false)); LOG(INFO) << "Setup network:" << model_name << "\n" ; @@ -151,7 +151,7 @@ class LSTMTrainerTest : public testing::Test { // within 1% of the error rate. Returns the increase in error from float to // int. double TestIntMode(int test_iterations) { - GenericVector trainer_data; + std::vector trainer_data; EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(), &trainer_data)); // Get the error on the next few iterations in float mode. diff --git a/unittest/paragraphs_test.cc b/unittest/paragraphs_test.cc index 55136801..f8e61e21 100644 --- a/unittest/paragraphs_test.cc +++ b/unittest/paragraphs_test.cc @@ -192,7 +192,7 @@ void TestParagraphDetection(const TextAndModel* correct, int num_rows) { GenericVector row_infos; GenericVector row_owners; PARA_LIST paragraphs; - std::list models; + std::vector models; MakeAsciiRowInfos(correct, num_rows, &row_infos); int debug_level(3); @@ -324,7 +324,7 @@ TEST(ParagraphsTest, TestSingleFullPageContinuation) { GenericVector row_infos; GenericVector row_owners; PARA_LIST paragraphs; - std::list models; + std::vector models; models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10)); MakeAsciiRowInfos(correct, num_rows, &row_infos); tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs, &models); diff --git a/unittest/resultiterator_test.cc b/unittest/resultiterator_test.cc index 2719ffe9..22667215 100644 --- a/unittest/resultiterator_test.cc +++ b/unittest/resultiterator_test.cc @@ -28,6 +28,12 @@ void ToVector(const GenericVectorEqEq& from, std::vector* to) { for (int i = 0; i < from.size(); i++) to->push_back(from[i]); } +template +void ToVector(const std::vector& from, std::vector* to) { + to->clear(); + for (int i = 0; i < from.size(); i++) to->push_back(from[i]); +} + // The fixture for testing Tesseract. class ResultIteratorTest : public testing::Test { protected: @@ -167,12 +173,12 @@ class ResultIteratorTest : public testing::Test { const StrongScriptDirection* word_dirs, int num_words, int* expected_reading_order, int num_reading_order_entries) const { - GenericVector gv_word_dirs; + std::vector gv_word_dirs; for (int i = 0; i < num_words; i++) { gv_word_dirs.push_back(word_dirs[i]); } - GenericVectorEqEq output; + std::vector output; ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs, &output); // STL vector can be used with EXPECT_EQ, so convert... @@ -191,17 +197,17 @@ class ResultIteratorTest : public testing::Test { void VerifySaneTextlineOrder(bool in_ltr_context, const StrongScriptDirection* word_dirs, int num_words) const { - GenericVector gv_word_dirs; + std::vector gv_word_dirs; for (int i = 0; i < num_words; i++) { gv_word_dirs.push_back(word_dirs[i]); } - GenericVectorEqEq output; + std::vector output; ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs, &output); ASSERT_GE(output.size(), num_words); - GenericVector output_copy(output); - output_copy.sort(); + std::vector output_copy(output); + std::sort(output_copy.begin(), output_copy.end()); bool sane = true; int j = 0; while (j < output_copy.size() && output_copy[j] < 0) j++; diff --git a/unittest/stringrenderer_test.cc b/unittest/stringrenderer_test.cc index bf2e6881..95d69097 100644 --- a/unittest/stringrenderer_test.cc +++ b/unittest/stringrenderer_test.cc @@ -18,7 +18,6 @@ #include "boxchar.h" #include "boxread.h" #include "commandlineflags.h" -#include "genericvector.h" #include "include_gunit.h" #include "stringrenderer.h" #include "strngs.h" @@ -227,7 +226,7 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) { std::string boxes_str = renderer_->GetBoxesStr(); // Decode to get the box text strings. EXPECT_FALSE(boxes_str.empty()); - GenericVector texts; + std::vector texts; EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts, nullptr, nullptr)); std::string ltr_str; diff --git a/unittest/tfile_test.cc b/unittest/tfile_test.cc index eb147885..166405ff 100644 --- a/unittest/tfile_test.cc +++ b/unittest/tfile_test.cc @@ -16,7 +16,7 @@ namespace tesseract { -// Tests TFile and GenericVector serialization by serializing and +// Tests TFile and std::vector serialization by serializing and // writing/reading. class TfileTest : public ::testing::Test { @@ -115,7 +115,7 @@ TEST_F(TfileTest, Serialize) { // This test verifies that Tfile can serialize a class. MathData m1; m1.Setup(); - GenericVector data; + std::vector data; TFile fpw; fpw.OpenWrite(&data); EXPECT_TRUE(m1.Serialize(&fpw)); @@ -136,7 +136,7 @@ TEST_F(TfileTest, FGets) { MathData m1; std::string line_str = "This is a textline with a newline\n"; m1.Setup(); - GenericVector data; + std::vector data; TFile fpw; fpw.OpenWrite(&data); EXPECT_TRUE(m1.Serialize(&fpw)); @@ -161,7 +161,7 @@ TEST_F(TfileTest, BigEndian) { // This test verifies that Tfile can auto-reverse big-endian data. MathData m1; m1.Setup(); - GenericVector data; + std::vector data; TFile fpw; fpw.OpenWrite(&data); EXPECT_TRUE(m1.SerializeBigEndian(&fpw)); diff --git a/unittest/unicharcompress_test.cc b/unittest/unicharcompress_test.cc index d3bd368f..ce06d641 100644 --- a/unittest/unicharcompress_test.cc +++ b/unittest/unicharcompress_test.cc @@ -57,7 +57,7 @@ class UnicharcompressTest : public ::testing::Test { } // Serializes and de-serializes compressed_ over itself. void SerializeAndUndo() { - GenericVector data; + std::vector data; TFile wfp; wfp.OpenWrite(&data); EXPECT_TRUE(compressed_.Serialize(&wfp)); diff --git a/unittest/unicharset_test.cc b/unittest/unicharset_test.cc index 21408427..b6fe8d88 100644 --- a/unittest/unicharset_test.cc +++ b/unittest/unicharset_test.cc @@ -133,7 +133,7 @@ TEST(UnicharsetTest, MultibyteBigrams) { // It is added if we force it to be. u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue); EXPECT_EQ(u.size(), 8); - GenericVector data; + std::vector data; tesseract::TFile fp; fp.OpenWrite(&data); u.save_to_file(&fp);