Merge pull request #3202 from stweil/fix2

Fix unittests
This commit is contained in:
Egor Pugin 2020-12-30 11:55:35 +03:00 committed by GitHub
commit 17b5f46385
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 469 additions and 284 deletions

View File

@ -38,7 +38,6 @@
#include <cstdio> #include <cstdio>
#include <functional> // for std::function #include <functional> // for std::function
#include <list> // for std::list
#include <vector> // for std::vector #include <vector> // for std::vector
struct Pix; struct Pix;
@ -817,7 +816,7 @@ class TESS_API TessBaseAPI {
EquationDetect* equ_detect_; ///< The equation detector. EquationDetect* equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem. FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder* thresholder_; ///< Image thresholding module. ImageThresholder* thresholder_; ///< Image thresholding module.
std::list<ParagraphModel*>* paragraph_models_; std::vector<ParagraphModel*>* paragraph_models_;
BLOCK_LIST* block_list_; ///< The page layout. BLOCK_LIST* block_list_; ///< The page layout.
PAGE_RES* page_res_; ///< The page-level data. PAGE_RES* page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code. std::string input_file_; ///< Name used by training code.

View File

@ -937,7 +937,7 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
while (page_res_it.word() != nullptr) { while (page_res_it.word() != nullptr) {
WERD_RES *word_res = page_res_it.word(); WERD_RES *word_res = page_res_it.word();
GenericVector<TBOX> boxes; std::vector<TBOX> boxes;
tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
page_res_it.row()->row, word_res); page_res_it.row()->row, word_res);
page_res_it.forward(); page_res_it.forward();
@ -1844,7 +1844,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
if (text[t] != '\0' || wordstr[w] != '\0') { if (text[t] != '\0' || wordstr[w] != '\0') {
// No match. // No match.
delete page_res_; delete page_res_;
GenericVector<TBOX> boxes; std::vector<TBOX> boxes;
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
tesseract_->ReSegmentByClassification(page_res_); tesseract_->ReSegmentByClassification(page_res_);
tesseract_->TidyUp(page_res_); tesseract_->TidyUp(page_res_);
@ -2291,10 +2291,10 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
int debug_level = 0; int debug_level = 0;
GetIntVariable("paragraph_debug_level", &debug_level); GetIntVariable("paragraph_debug_level", &debug_level);
if (paragraph_models_ == nullptr) if (paragraph_models_ == nullptr)
paragraph_models_ = new std::list<ParagraphModel*>; paragraph_models_ = new std::vector<ParagraphModel*>;
MutableIterator *result_it = GetMutableIterator(); MutableIterator *result_it = GetMutableIterator();
do { // Detect paragraphs for this block do { // Detect paragraphs for this block
std::list<ParagraphModel *> models; std::vector<ParagraphModel *> models;
::tesseract::DetectParagraphs(debug_level, after_text_recognition, ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
result_it, &models); result_it, &models);
paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end()); paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end());

View File

@ -113,8 +113,8 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
PAGE_RES* Tesseract::ApplyBoxes(const char* filename, PAGE_RES* Tesseract::ApplyBoxes(const char* filename,
bool find_segmentation, bool find_segmentation,
BLOCK_LIST *block_list) { BLOCK_LIST *block_list) {
GenericVector<TBOX> boxes; std::vector<TBOX> boxes;
GenericVector<STRING> texts, full_texts; std::vector<STRING> texts, full_texts;
if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts, if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts,
nullptr)) { nullptr)) {
return nullptr; // Can't do it. return nullptr; // Can't do it.
@ -205,7 +205,7 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: /// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
/// All fuzzy spaces are removed, and all the words are maximally chopped. /// All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes, PAGE_RES* Tesseract::SetupApplyBoxes(const std::vector<TBOX>& boxes,
BLOCK_LIST *block_list) { BLOCK_LIST *block_list) {
PreenXHeights(block_list); PreenXHeights(block_list);
// Strip all fuzzy space markers to simplify the PAGE_RES. // Strip all fuzzy space markers to simplify the PAGE_RES.
@ -241,7 +241,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
/// Tests the chopper by exhaustively running chop_one_blob. /// Tests the chopper by exhaustively running chop_one_blob.
/// The word_res will contain filled chopped_word, seam_array, denorm, /// The word_res will contain filled chopped_word, seam_array, denorm,
/// box_word and best_state for the maximally chopped word. /// box_word and best_state for the maximally chopped word.
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes, void Tesseract::MaximallyChopWord(const std::vector<TBOX>& boxes,
BLOCK* block, ROW* row, BLOCK* block, ROW* row,
WERD_RES* word_res) { WERD_RES* word_res) {
if (!word_res->SetupForRecognition(unicharset, this, BestPix(), if (!word_res->SetupForRecognition(unicharset, this, BestPix(),

View File

@ -52,8 +52,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename,
return false; return false;
} }
} }
GenericVector<TBOX> boxes; std::vector<TBOX> boxes;
GenericVector<STRING> texts; std::vector<STRING> texts;
// Get the boxes for this page, if there are any. // Get the boxes for this page, if there are any.
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr, if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
nullptr) || nullptr) ||
@ -77,8 +77,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename,
// Generates training data for training a line recognizer, eg LSTM. // Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and // Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data. // appends them to the given training_data.
void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes, void Tesseract::TrainFromBoxes(const std::vector<TBOX>& boxes,
const GenericVector<STRING>& texts, const std::vector<STRING>& texts,
BLOCK_LIST *block_list, BLOCK_LIST *block_list,
DocumentData* training_data) { DocumentData* training_data) {
int box_count = boxes.size(); int box_count = boxes.size();
@ -133,8 +133,8 @@ void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
// and ground truth boxes/truth text if available in the input. // and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way. // The image is not normalized in any way.
ImageData* Tesseract::GetLineData(const TBOX& line_box, ImageData* Tesseract::GetLineData(const TBOX& line_box,
const GenericVector<TBOX>& boxes, const std::vector<TBOX>& boxes,
const GenericVector<STRING>& texts, const std::vector<STRING>& texts,
int start_box, int end_box, int start_box, int end_box,
const BLOCK& block) { const BLOCK& block) {
TBOX revised_box; TBOX revised_box;
@ -145,8 +145,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
// Copy the boxes and shift them so they are relative to the image. // Copy the boxes and shift them so they are relative to the image.
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y()); FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
ICOORD shift = -revised_box.botleft(); ICOORD shift = -revised_box.botleft();
GenericVector<TBOX> line_boxes; std::vector<TBOX> line_boxes;
GenericVector<STRING> line_texts; std::vector<STRING> line_texts;
for (int b = start_box; b < end_box; ++b) { for (int b = start_box; b < end_box; ++b) {
TBOX box = boxes[b]; TBOX box = boxes[b];
box.rotate(block_rotation); box.rotate(block_rotation);
@ -154,8 +154,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
line_boxes.push_back(box); line_boxes.push_back(box);
line_texts.push_back(texts[b]); line_texts.push_back(texts[b]);
} }
GenericVector<int> page_numbers; std::vector<int> page_numbers;
page_numbers.init_to_size(line_boxes.size(), applybox_page); page_numbers.resize(line_boxes.size(), applybox_page);
image_data->AddBoxes(line_boxes, line_texts, page_numbers); image_data->AddBoxes(line_boxes, line_texts, page_numbers);
return image_data; return image_data;
} }

View File

@ -249,7 +249,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
// allowed_ids. // allowed_ids.
static void AddAllScriptsConverted(const UNICHARSET& sid_set, static void AddAllScriptsConverted(const UNICHARSET& sid_set,
const UNICHARSET& osd_set, const UNICHARSET& osd_set,
GenericVector<int>* allowed_ids) { std::vector<int>* allowed_ids) {
for (int i = 0; i < sid_set.get_script_table_size(); ++i) { for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
if (i != sid_set.null_sid()) { if (i != sid_set.null_sid()) {
const char* script = sid_set.get_script_from_script_id(i); const char* script = sid_set.get_script_from_script_id(i);
@ -357,7 +357,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
to_block, &osd_blobs); to_block, &osd_blobs);
} }
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) { if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
GenericVector<int> osd_scripts; std::vector<int> osd_scripts;
if (osd_tess != this) { if (osd_tess != this) {
// We are running osd as part of layout analysis, so constrain the // We are running osd as part of layout analysis, so constrain the
// scripts to those allowed by *this. // scripts to those allowed by *this.

View File

@ -1236,14 +1236,19 @@ const ParagraphModel* ParagraphTheory::AddModel(const ParagraphModel &model) {
} }
void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) { void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
models_->remove_if([this, used_models](ParagraphModel* m) { size_t w = 0;
bool remove = !used_models.contains(m) && models_we_added_.contains(m); for (size_t r = 0; r < models_->size(); r++) {
if (remove) { ParagraphModel* m = (*models_)[r];
models_we_added_.remove(models_we_added_.get_index(m)); if (!used_models.contains(m) && models_we_added_.contains(m)) {
delete m; delete m;
} else {
if (r > w) {
(*models_)[w] = m;
} }
return remove; w++;
}); }
}
models_->resize(w);
} }
// Examine rows[start, end) and try to determine if an existing non-centered // Examine rows[start, end) and try to determine if an existing non-centered
@ -2272,7 +2277,7 @@ void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos, GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners, GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs, PARA_LIST *paragraphs,
std::list<ParagraphModel *> *models) { std::vector<ParagraphModel *> *models) {
GenericVector<RowScratchRegisters> rows; GenericVector<RowScratchRegisters> rows;
ParagraphTheory theory(models); ParagraphTheory theory(models);
@ -2514,7 +2519,7 @@ static void InitializeRowInfo(bool after_recognition,
void DetectParagraphs(int debug_level, void DetectParagraphs(int debug_level,
bool after_text_recognition, bool after_text_recognition,
const MutableIterator *block_start, const MutableIterator *block_start,
std::list<ParagraphModel *> *models) { std::vector<ParagraphModel *> *models) {
// Clear out any preconceived notions. // Clear out any preconceived notions.
if (block_start->Empty(RIL_TEXTLINE)) { if (block_start->Empty(RIL_TEXTLINE)) {
return; return;

View File

@ -91,7 +91,7 @@ void DetectParagraphs(int debug_level,
GenericVector<RowInfo> *row_infos, GenericVector<RowInfo> *row_infos,
GenericVector<PARA *> *row_owners, GenericVector<PARA *> *row_owners,
PARA_LIST *paragraphs, PARA_LIST *paragraphs,
std::list<ParagraphModel *> *models); std::vector<ParagraphModel *> *models);
// Given a MutableIterator to the start of a block, run DetectParagraphs on // Given a MutableIterator to the start of a block, run DetectParagraphs on
// that block and commit the results to the underlying ROW and BLOCK structs, // that block and commit the results to the underlying ROW and BLOCK structs,
@ -101,7 +101,7 @@ void DetectParagraphs(int debug_level,
void DetectParagraphs(int debug_level, void DetectParagraphs(int debug_level,
bool after_text_recognition, bool after_text_recognition,
const MutableIterator *block_start, const MutableIterator *block_start,
std::list<ParagraphModel *> *models); std::vector<ParagraphModel *> *models);
} // namespace } // namespace

View File

@ -193,10 +193,10 @@ class ParagraphTheory {
public: public:
// We presume models will outlive us, and that models will take ownership // We presume models will outlive us, and that models will take ownership
// of any ParagraphModel *'s we add. // of any ParagraphModel *'s we add.
explicit ParagraphTheory(std::list<ParagraphModel *> *models) explicit ParagraphTheory(std::vector<ParagraphModel *> *models)
: models_(models) {} : models_(models) {}
std::list<ParagraphModel *> &models() { return *models_; } std::vector<ParagraphModel *> &models() { return *models_; }
const std::list<ParagraphModel *> &models() const { return *models_; } const std::vector<ParagraphModel *> &models() const { return *models_; }
// Return an existing model if one that is Comparable() can be found. // Return an existing model if one that is Comparable() can be found.
// Else, allocate a new copy of model to save and return a pointer to it. // Else, allocate a new copy of model to save and return a pointer to it.
@ -216,7 +216,7 @@ class ParagraphTheory {
int IndexOf(const ParagraphModel *model) const; int IndexOf(const ParagraphModel *model) const;
private: private:
std::list<ParagraphModel *> *models_; std::vector<ParagraphModel *> *models_;
GenericVectorEqEq<ParagraphModel *> models_we_added_; GenericVectorEqEq<ParagraphModel *> models_we_added_;
}; };

View File

@ -360,7 +360,7 @@ void ResultIterator::MoveToLogicalStartOfWord() {
BeginWord(0); BeginWord(0);
return; return;
} }
GenericVector<int> blob_order; std::vector<int> blob_order;
CalculateBlobOrder(&blob_order); CalculateBlobOrder(&blob_order);
if (blob_order.size() == 0 || blob_order[0] == 0) if (blob_order.size() == 0 || blob_order[0] == 0)
return; return;
@ -370,7 +370,7 @@ void ResultIterator::MoveToLogicalStartOfWord() {
bool ResultIterator::IsAtFinalSymbolOfWord() const { bool ResultIterator::IsAtFinalSymbolOfWord() const {
if (!it_->word()) if (!it_->word())
return true; return true;
GenericVector<int> blob_order; std::vector<int> blob_order;
CalculateBlobOrder(&blob_order); CalculateBlobOrder(&blob_order);
return blob_order.size() == 0 || blob_order.back() == blob_index_; return blob_order.size() == 0 || blob_order.back() == blob_index_;
} }
@ -378,7 +378,7 @@ bool ResultIterator::IsAtFinalSymbolOfWord() const {
bool ResultIterator::IsAtFirstSymbolOfWord() const { bool ResultIterator::IsAtFirstSymbolOfWord() const {
if (!it_->word()) if (!it_->word())
return true; return true;
GenericVector<int> blob_order; std::vector<int> blob_order;
CalculateBlobOrder(&blob_order); CalculateBlobOrder(&blob_order);
return blob_order.size() == 0 || blob_order[0] == blob_index_; return blob_order.size() == 0 || blob_order[0] == blob_index_;
} }
@ -472,7 +472,7 @@ bool ResultIterator::Next(PageIteratorLevel level) {
MoveToLogicalStartOfTextline(); MoveToLogicalStartOfTextline();
return it_->block() != nullptr; return it_->block() != nullptr;
case RIL_SYMBOL: { case RIL_SYMBOL: {
GenericVector<int> blob_order; std::vector<int> blob_order;
CalculateBlobOrder(&blob_order); CalculateBlobOrder(&blob_order);
int next_blob = 0; int next_blob = 0;
while (next_blob < blob_order.size() && while (next_blob < blob_order.size() &&
@ -674,7 +674,7 @@ void ResultIterator::AppendUTF8WordText(std::string* text) const {
*text += reading_direction_is_ltr ? kLRM : kRLM; *text += reading_direction_is_ltr ? kLRM : kRLM;
} }
GenericVector<int> blob_order; std::vector<int> blob_order;
CalculateBlobOrder(&blob_order); CalculateBlobOrder(&blob_order);
for (int i = 0; i < blob_order.size(); i++) { for (int i = 0; i < blob_order.size(); i++) {
*text += it_->word()->BestUTF8(blob_order[i], false); *text += it_->word()->BestUTF8(blob_order[i], false);

View File

@ -347,15 +347,15 @@ class Tesseract : public Wordrec {
// Generates training data for training a line recognizer, eg LSTM. // Generates training data for training a line recognizer, eg LSTM.
// Breaks the boxes into lines, normalizes them, converts to ImageData and // Breaks the boxes into lines, normalizes them, converts to ImageData and
// appends them to the given training_data. // appends them to the given training_data.
void TrainFromBoxes(const GenericVector<TBOX>& boxes, void TrainFromBoxes(const std::vector<TBOX>& boxes,
const GenericVector<STRING>& texts, const std::vector<STRING>& texts,
BLOCK_LIST* block_list, DocumentData* training_data); BLOCK_LIST* block_list, DocumentData* training_data);
// Returns an Imagedata containing the image of the given textline, // Returns an Imagedata containing the image of the given textline,
// and ground truth boxes/truth text if available in the input. // and ground truth boxes/truth text if available in the input.
// The image is not normalized in any way. // The image is not normalized in any way.
ImageData* GetLineData(const TBOX& line_box, const GenericVector<TBOX>& boxes, ImageData* GetLineData(const TBOX& line_box, const std::vector<TBOX>& boxes,
const GenericVector<STRING>& texts, int start_box, const std::vector<STRING>& texts, int start_box,
int end_box, const BLOCK& block); int end_box, const BLOCK& block);
// Helper gets the image of a rectangle, using the block.re_rotation() if // Helper gets the image of a rectangle, using the block.re_rotation() if
// needed to get to the image, and rotating the result back to horizontal // needed to get to the image, and rotating the result back to horizontal
@ -708,12 +708,12 @@ class Tesseract : public Wordrec {
// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes: // Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
// All fuzzy spaces are removed, and all the words are maximally chopped. // All fuzzy spaces are removed, and all the words are maximally chopped.
PAGE_RES* SetupApplyBoxes(const GenericVector<TBOX>& boxes, PAGE_RES* SetupApplyBoxes(const std::vector<TBOX>& boxes,
BLOCK_LIST* block_list); BLOCK_LIST* block_list);
// Tests the chopper by exhaustively running chop_one_blob. // Tests the chopper by exhaustively running chop_one_blob.
// The word_res will contain filled chopped_word, seam_array, denorm, // The word_res will contain filled chopped_word, seam_array, denorm,
// box_word and best_state for the maximally chopped word. // box_word and best_state for the maximally chopped word.
void MaximallyChopWord(const GenericVector<TBOX>& boxes, BLOCK* block, void MaximallyChopWord(const std::vector<TBOX>& boxes, BLOCK* block,
ROW* row, WERD_RES* word_res); ROW* row, WERD_RES* word_res);
// Gather consecutive blobs that match the given box into the best_state // Gather consecutive blobs that match the given box into the best_state
// and corresponding correct_text. // and corresponding correct_text.

View File

@ -331,9 +331,9 @@ void ImageData::Display() const {
// Adds the supplied boxes and transcriptions that correspond to the correct // Adds the supplied boxes and transcriptions that correspond to the correct
// page number. // page number.
void ImageData::AddBoxes(const GenericVector<TBOX>& boxes, void ImageData::AddBoxes(const std::vector<TBOX>& boxes,
const GenericVector<STRING>& texts, const std::vector<STRING>& texts,
const GenericVector<int>& box_pages) { const std::vector<int>& box_pages) {
// Copy the boxes and make the transcription. // Copy the boxes and make the transcription.
for (int i = 0; i < box_pages.size(); ++i) { for (int i = 0; i < box_pages.size(); ++i) {
if (page_number_ >= 0 && box_pages[i] != page_number_) continue; if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
@ -378,9 +378,9 @@ Pix* ImageData::GetPixInternal(const GenericVector<char>& image_data) {
// match the page number. Returns false on error. // match the page number. Returns false on error.
bool ImageData::AddBoxes(const char* box_text) { bool ImageData::AddBoxes(const char* box_text) {
if (box_text != nullptr && box_text[0] != '\0') { if (box_text != nullptr && box_text[0] != '\0') {
GenericVector<TBOX> boxes; std::vector<TBOX> boxes;
GenericVector<STRING> texts; std::vector<STRING> texts;
GenericVector<int> box_pages; std::vector<int> box_pages;
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text, if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
/*continue_on_failure*/ true, &boxes, &texts, nullptr, /*continue_on_failure*/ true, &boxes, &texts, nullptr,
&box_pages)) { &box_pages)) {
@ -587,7 +587,7 @@ DocumentCache::~DocumentCache() {}
// Adds all the documents in the list of filenames, counting memory. // Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files. // The reader is used to read the files.
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames, bool DocumentCache::LoadDocuments(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy, CachingStrategy cache_strategy,
FileReader reader) { FileReader reader) {
cache_strategy_ = cache_strategy; cache_strategy_ = cache_strategy;

View File

@ -181,9 +181,9 @@ class ImageData {
// Adds the supplied boxes and transcriptions that correspond to the correct // Adds the supplied boxes and transcriptions that correspond to the correct
// page number. // page number.
void AddBoxes(const GenericVector<TBOX>& boxes, void AddBoxes(const std::vector<TBOX>& boxes,
const GenericVector<STRING>& texts, const std::vector<STRING>& texts,
const GenericVector<int>& box_pages); const std::vector<int>& box_pages);
private: private:
// Saves the given Pix as a PNG-encoded string and destroys it. // Saves the given Pix as a PNG-encoded string and destroys it.
@ -335,7 +335,7 @@ class DocumentCache {
} }
// Adds all the documents in the list of filenames, counting memory. // Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files. // The reader is used to read the files.
bool LoadDocuments(const GenericVector<STRING>& filenames, bool LoadDocuments(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy, FileReader reader); CachingStrategy cache_strategy, FileReader reader);
// Adds document to the cache. // Adds document to the cache.

View File

@ -152,7 +152,7 @@ class GENERIC_2D_ARRAY {
return tesseract::Serialize(fp, &array_[0], size); return tesseract::Serialize(fp, &array_[0], size);
} }
bool Serialize(tesseract::TFile* fp) const { bool Serialize(TFile* fp) const {
if (!SerializeSize(fp)) return false; if (!SerializeSize(fp)) return false;
if (!fp->Serialize(&empty_)) return false; if (!fp->Serialize(&empty_)) return false;
int size = num_elements(); int size = num_elements();
@ -175,7 +175,7 @@ class GENERIC_2D_ARRAY {
return true; return true;
} }
bool DeSerialize(tesseract::TFile* fp) { bool DeSerialize(TFile* fp) {
return DeSerializeSize(fp) && return DeSerializeSize(fp) &&
fp->DeSerialize(&empty_) && fp->DeSerialize(&empty_) &&
fp->DeSerialize(&array_[0], num_elements()); fp->DeSerialize(&array_[0], num_elements());
@ -473,7 +473,7 @@ class GENERIC_2D_ARRAY {
size = dim2_; size = dim2_;
return tesseract::Serialize(fp, &size); return tesseract::Serialize(fp, &size);
} }
bool SerializeSize(tesseract::TFile* fp) const { bool SerializeSize(TFile* fp) const {
uint32_t size = dim1_; uint32_t size = dim1_;
if (!fp->Serialize(&size)) return false; if (!fp->Serialize(&size)) return false;
size = dim2_; size = dim2_;
@ -495,7 +495,7 @@ class GENERIC_2D_ARRAY {
Resize(size1, size2, empty_); Resize(size1, size2, empty_);
return true; return true;
} }
bool DeSerializeSize(tesseract::TFile* fp) { bool DeSerializeSize(TFile* fp) {
int32_t size1, size2; int32_t size1, size2;
if (!fp->DeSerialize(&size1)) return false; if (!fp->DeSerialize(&size1)) return false;
if (!fp->DeSerialize(&size2)) return false; if (!fp->DeSerialize(&size2)) return false;
@ -639,7 +639,7 @@ struct MATRIX_COORD {
}; };
// The MatrixCoordPair contains a MATRIX_COORD and its priority. // The MatrixCoordPair contains a MATRIX_COORD and its priority.
using MatrixCoordPair = tesseract::KDPairInc<float, MATRIX_COORD>; using MatrixCoordPair = KDPairInc<float, MATRIX_COORD>;
} // namespace tesseract } // namespace tesseract

View File

@ -29,7 +29,6 @@
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <functional> // for std::function #include <functional> // for std::function
#include <vector>
namespace tesseract { namespace tesseract {
@ -37,48 +36,64 @@ namespace tesseract {
// provides automatic deletion of pointers, [De]Serialize that works, and // provides automatic deletion of pointers, [De]Serialize that works, and
// sort that works. // sort that works.
template <typename T> template <typename T>
class GenericVector : public std::vector<T> { class GenericVector {
using base = std::vector<T>;
public: public:
using std::vector<T>::vector; GenericVector() {
init(kDefaultVectorSize);
using base::begin; }
using base::end; GenericVector(int size, const T& init_val) {
using base::data; init(size);
using base::capacity; init_to_size(size, init_val);
using base::reserve; }
using base::resize;
using base::back;
using base::clear;
using base::push_back;
// Copy
GenericVector(const GenericVector& other) {
this->init(other.size());
this->operator+=(other);
}
GenericVector<T>& operator+=(const GenericVector& other); GenericVector<T>& operator+=(const GenericVector& other);
GenericVector<T>& operator=(const GenericVector& other);
~GenericVector();
// Reserve some memory.
void reserve(int size);
// Double the size of the internal array. // Double the size of the internal array.
void double_the_size(); void double_the_size();
// Resizes to size and sets all values to t. // Resizes to size and sets all values to t.
void init_to_size(int size, const T& t); void init_to_size(int size, const T& t);
void resize(int size, const T& t);
// Resizes to size without any initialization. // Resizes to size without any initialization.
void resize_no_init(int size) { void resize_no_init(int size) {
resize(size); reserve(size);
size_used_ = size;
} }
// Return the size used.
int size() const {
return size_used_;
}
// Workaround to avoid g++ -Wsign-compare warnings. // Workaround to avoid g++ -Wsign-compare warnings.
size_t unsigned_size() const { size_t unsigned_size() const {
return size(); static_assert(sizeof(size_used_) <= sizeof(size_t),
"Wow! sizeof(size_t) < sizeof(int32_t)!!");
assert(0 <= size_used_);
return static_cast<size_t>(size_used_);
} }
int size_reserved() const { int size_reserved() const {
return capacity(); return size_reserved_;
} }
int size() const { // Return true if empty.
return base::size(); bool empty() const {
return size_used_ == 0;
} }
// Return the object from an index. // Return the object from an index.
T& get(int index); T& get(int index) const;
const T& get(int index) const; T& back() const;
T& operator[](int index) const;
// Returns the last object and removes it. // Returns the last object and removes it.
T pop_back(); T pop_back();
@ -94,6 +109,7 @@ class GenericVector : public std::vector<T> {
T contains_index(int index) const; T contains_index(int index) const;
// Push an element in the end of the array // Push an element in the end of the array
int push_back(T object);
void operator+=(const T& t); void operator+=(const T& t);
// Push an element in the end of the array if the same // Push an element in the end of the array if the same
@ -117,7 +133,9 @@ class GenericVector : public std::vector<T> {
// Truncates the array to the given size by removing the end. // Truncates the array to the given size by removing the end.
// If the current size is less, the array is not expanded. // If the current size is less, the array is not expanded.
void truncate(int size) { void truncate(int size) {
resize(size); if (size < size_used_) {
size_used_ = size;
}
} }
// Add a callback to be called to delete the elements when the array took // Add a callback to be called to delete the elements when the array took
@ -132,7 +150,13 @@ class GenericVector : public std::vector<T> {
compare_cb_ = cb; compare_cb_ = cb;
} }
// Delete objects pointed to by data()[i] // Clear the array, calling the clear callback function if any.
// All the owned callbacks are also deleted.
// If you don't want the callbacks to be deleted, before calling clear, set
// the callback to nullptr.
void clear();
// Delete objects pointed to by data_[i]
void delete_data_pointers(); void delete_data_pointers();
// This method clears the current object, then, does a shallow copy of // This method clears the current object, then, does a shallow copy of
@ -192,8 +216,8 @@ class GenericVector : public std::vector<T> {
// Reverses the elements of the vector. // Reverses the elements of the vector.
void reverse() { void reverse() {
for (int i = 0; i < size() / 2; ++i) { for (int i = 0; i < size_used_ / 2; ++i) {
Swap(&data()[i], &data()[size() - 1 - i]); Swap(&data_[i], &data_[size_used_ - 1 - i]);
} }
} }
@ -209,7 +233,7 @@ class GenericVector : public std::vector<T> {
// to two Ts and returns negative if the first element is to appear earlier // to two Ts and returns negative if the first element is to appear earlier
// in the result and positive if it is to appear later, with 0 for equal. // in the result and positive if it is to appear later, with 0 for equal.
void sort(int (*comparator)(const void*, const void*)) { void sort(int (*comparator)(const void*, const void*)) {
qsort(data(), size(), sizeof(*data()), comparator); qsort(data_, size_used_, sizeof(*data_), comparator);
} }
// Searches the array (assuming sorted in ascending order, using sort()) for // Searches the array (assuming sorted in ascending order, using sort()) for
@ -217,23 +241,23 @@ class GenericVector : public std::vector<T> {
// Use binary_search to get the index of target, or its nearest candidate. // Use binary_search to get the index of target, or its nearest candidate.
bool bool_binary_search(const T& target) const { bool bool_binary_search(const T& target) const {
int index = binary_search(target); int index = binary_search(target);
if (index >= size()) { if (index >= size_used_) {
return false; return false;
} }
return data()[index] == target; return data_[index] == target;
} }
// Searches the array (assuming sorted in ascending order, using sort()) for // Searches the array (assuming sorted in ascending order, using sort()) for
// an element equal to target and returns the index of the best candidate. // an element equal to target and returns the index of the best candidate.
// The return value is conceptually the largest index i such that // The return value is conceptually the largest index i such that
// data()[i] <= target or 0 if target < the whole vector. // data_[i] <= target or 0 if target < the whole vector.
// NOTE that this function uses operator> so really the return value is // NOTE that this function uses operator> so really the return value is
// the largest index i such that data()[i] > target is false. // the largest index i such that data_[i] > target is false.
int binary_search(const T& target) const { int binary_search(const T& target) const {
int bottom = 0; int bottom = 0;
int top = size(); int top = size_used_;
while (top - bottom > 1) { while (top - bottom > 1) {
int middle = (bottom + top) / 2; int middle = (bottom + top) / 2;
if (data()[middle] > target) { if (data_[middle] > target) {
top = middle; top = middle;
} else { } else {
bottom = middle; bottom = middle;
@ -245,20 +269,20 @@ class GenericVector : public std::vector<T> {
// Compact the vector by deleting elements using operator!= on basic types. // Compact the vector by deleting elements using operator!= on basic types.
// The vector must be sorted. // The vector must be sorted.
void compact_sorted() { void compact_sorted() {
if (size() == 0) { if (size_used_ == 0) {
return; return;
} }
// First element is in no matter what, hence the i = 1. // First element is in no matter what, hence the i = 1.
int last_write = 0; int last_write = 0;
for (int i = 1; i < size(); ++i) { for (int i = 1; i < size_used_; ++i) {
// Finds next unique item and writes it. // Finds next unique item and writes it.
if (data()[last_write] != data()[i]) { if (data_[last_write] != data_[i]) {
data()[++last_write] = data()[i]; data_[++last_write] = data_[i];
} }
} }
// last_write is the index of a valid data cell, so add 1. // last_write is the index of a valid data cell, so add 1.
resize(last_write + 1); size_used_ = last_write + 1;
} }
// Returns the index of what would be the target_index_th item in the array // Returns the index of what would be the target_index_th item in the array
@ -269,26 +293,26 @@ class GenericVector : public std::vector<T> {
// Make sure target_index is legal. // Make sure target_index is legal.
if (target_index < 0) { if (target_index < 0) {
target_index = 0; // ensure legal target_index = 0; // ensure legal
} else if (target_index >= size()) { } else if (target_index >= size_used_) {
target_index = size() - 1; target_index = size_used_ - 1;
} }
unsigned int seed = 1; unsigned int seed = 1;
return choose_nth_item(target_index, 0, size(), &seed); return choose_nth_item(target_index, 0, size_used_, &seed);
} }
// Swaps the elements with the given indices. // Swaps the elements with the given indices.
void swap(int index1, int index2) { void swap(int index1, int index2) {
if (index1 != index2) { if (index1 != index2) {
T tmp = data()[index1]; T tmp = data_[index1];
data()[index1] = data()[index2]; data_[index1] = data_[index2];
data()[index2] = tmp; data_[index2] = tmp;
} }
} }
// Returns true if all elements of *this are within the given range. // Returns true if all elements of *this are within the given range.
// Only uses operator< // Only uses operator<
bool WithinBounds(const T& rangemin, const T& rangemax) const { bool WithinBounds(const T& rangemin, const T& rangemax) const {
for (int i = 0; i < size(); ++i) { for (int i = 0; i < size_used_; ++i) {
if (data()[i] < rangemin || rangemax < data()[i]) { if (data_[i] < rangemin || rangemax < data_[i]) {
return false; return false;
} }
} }
@ -306,18 +330,47 @@ class GenericVector : public std::vector<T> {
// vector are small enough that for efficiency it makes sense // vector are small enough that for efficiency it makes sense
// to start with a larger initial size. // to start with a larger initial size.
static const int kDefaultVectorSize = 4; static const int kDefaultVectorSize = 4;
int32_t size_used_{};
int32_t size_reserved_{};
T* data_;
std::function<void(T)> clear_cb_; std::function<void(T)> clear_cb_;
std::function<bool(const T&, const T&)> compare_cb_; std::function<bool(const T&, const T&)> compare_cb_;
}; };
#if defined(_MSC_VER) || defined(__APPLE__) // The default FileReader loads the whole file into the vector of char,
// MSVC stl does not have ::data() in vector<bool>, // returning false on error.
// so we add custom specialization. inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
// On Apple there are also errors when using std::vector<bool>, bool result = false;
// so we replace it with vector<int> as a workaround. FILE* fp = fopen(filename, "rb");
template <> if (fp != nullptr) {
class GenericVector<bool> : public std::vector<int> {}; fseek(fp, 0, SEEK_END);
#endif auto size = std::ftell(fp);
fseek(fp, 0, SEEK_SET);
// Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
if (size > 0 && size < LONG_MAX) {
// reserve an extra byte in case caller wants to append a '\0' character
data->reserve(size + 1);
data->resize_no_init(size);
result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
}
fclose(fp);
}
return result;
}
// The default FileWriter writes the vector of char to the filename file,
// returning false on error.
inline bool SaveDataToFile(const GenericVector<char>& data,
const char* filename) {
FILE* fp = fopen(filename, "wb");
if (fp == nullptr) {
return false;
}
bool result =
static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
fclose(fp);
return result;
}
template <typename T> template <typename T>
bool cmp_eq(T const& t1, T const& t2) { bool cmp_eq(T const& t1, T const& t2) {
@ -378,9 +431,9 @@ class PointerVector : public GenericVector<T*> {
this->operator+=(other); this->operator+=(other);
} }
PointerVector<T>& operator+=(const PointerVector& other) { PointerVector<T>& operator+=(const PointerVector& other) {
this->reserve(this->size() + other.size()); this->reserve(this->size_used_ + other.size_used_);
for (int i = 0; i < other.size(); ++i) { for (int i = 0; i < other.size(); ++i) {
this->push_back(new T(*other.data()[i])); this->push_back(new T(*other.data_[i]));
} }
return *this; return *this;
} }
@ -396,15 +449,15 @@ class PointerVector : public GenericVector<T*> {
// Removes an element at the given index and // Removes an element at the given index and
// shifts the remaining elements to the left. // shifts the remaining elements to the left.
void remove(int index) { void remove(int index) {
delete GenericVector<T*>::data()[index]; delete GenericVector<T*>::data_[index];
GenericVector<T*>::remove(index); GenericVector<T*>::remove(index);
} }
// Truncates the array to the given size by removing the end. // Truncates the array to the given size by removing the end.
// If the current size is less, the array is not expanded. // If the current size is less, the array is not expanded.
void truncate(int size) { void truncate(int size) {
for (int i = size; i < GenericVector<T*>::size(); ++i) { for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
delete GenericVector<T*>::data()[i]; delete GenericVector<T*>::data_[i];
} }
GenericVector<T*>::truncate(size); GenericVector<T*>::truncate(size);
} }
@ -415,20 +468,20 @@ class PointerVector : public GenericVector<T*> {
int new_size = 0; int new_size = 0;
int old_index = 0; int old_index = 0;
// Until the callback returns true, the elements stay the same. // Until the callback returns true, the elements stay the same.
while (old_index < GenericVector<T*>::size() && while (old_index < GenericVector<T*>::size_used_ &&
!delete_cb(GenericVector<T*>::data()[old_index++])) { !delete_cb(GenericVector<T*>::data_[old_index++])) {
++new_size; ++new_size;
} }
// Now just copy anything else that gets false from delete_cb. // Now just copy anything else that gets false from delete_cb.
for (; old_index < GenericVector<T*>::size(); ++old_index) { for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
if (!delete_cb(GenericVector<T*>::data()[old_index])) { if (!delete_cb(GenericVector<T*>::data_[old_index])) {
GenericVector<T*>::data()[new_size++] = GenericVector<T*>::data_[new_size++] =
GenericVector<T*>::data()[old_index]; GenericVector<T*>::data_[old_index];
} else { } else {
delete GenericVector<T*>::data()[old_index]; delete GenericVector<T*>::data_[old_index];
} }
} }
GenericVector<T*>::resize(new_size); GenericVector<T*>::size_used_ = new_size;
} }
// Clear the array, calling the clear callback function if any. // Clear the array, calling the clear callback function if any.
@ -446,32 +499,32 @@ class PointerVector : public GenericVector<T*> {
// normal GenericVector of those. // normal GenericVector of those.
// Returns false in case of error. // Returns false in case of error.
bool Serialize(FILE* fp) const { bool Serialize(FILE* fp) const {
int32_t used = GenericVector<T*>::size(); int32_t used = GenericVector<T*>::size_used_;
if (fwrite(&used, sizeof(used), 1, fp) != 1) { if (fwrite(&used, sizeof(used), 1, fp) != 1) {
return false; return false;
} }
for (int i = 0; i < used; ++i) { for (int i = 0; i < used; ++i) {
int8_t non_null = GenericVector<T*>::data()[i] != nullptr; int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) { if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
return false; return false;
} }
if (non_null && !GenericVector<T*>::data()[i]->Serialize(fp)) { if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
return false; return false;
} }
} }
return true; return true;
} }
bool Serialize(TFile* fp) const { bool Serialize(TFile* fp) const {
int32_t used = GenericVector<T*>::size(); int32_t used = GenericVector<T*>::size_used_;
if (fp->FWrite(&used, sizeof(used), 1) != 1) { if (fp->FWrite(&used, sizeof(used), 1) != 1) {
return false; return false;
} }
for (int i = 0; i < used; ++i) { for (int i = 0; i < used; ++i) {
int8_t non_null = GenericVector<T*>::data()[i] != nullptr; int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) { if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
return false; return false;
} }
if (non_null && !GenericVector<T*>::data()[i]->Serialize(fp)) { if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
return false; return false;
} }
} }
@ -599,52 +652,99 @@ class GenericVectorEqEq : public GenericVector<T> {
template <typename T> template <typename T>
void GenericVector<T>::init(int size) { void GenericVector<T>::init(int size) {
size_used_ = 0;
if (size <= 0) {
data_ = nullptr;
size_reserved_ = 0;
} else {
if (size < kDefaultVectorSize) {
size = kDefaultVectorSize;
}
data_ = new T[size];
size_reserved_ = size;
}
clear_cb_ = nullptr;
compare_cb_ = nullptr;
}
template <typename T>
GenericVector<T>::~GenericVector() {
clear(); clear();
resize(size); }
// Reserve some memory. If the internal array contains elements, they are
// copied.
template <typename T>
void GenericVector<T>::reserve(int size) {
if (size_reserved_ >= size || size <= 0) {
return;
}
if (size < kDefaultVectorSize) {
size = kDefaultVectorSize;
}
T* new_array = new T[size];
for (int i = 0; i < size_used_; ++i) {
new_array[i] = data_[i];
}
delete[] data_;
data_ = new_array;
size_reserved_ = size;
} }
template <typename T> template <typename T>
void GenericVector<T>::double_the_size() { void GenericVector<T>::double_the_size() {
if (capacity() == 0) { if (size_reserved_ == 0) {
reserve(kDefaultVectorSize); reserve(kDefaultVectorSize);
} else { } else {
reserve(2 * capacity()); reserve(2 * size_reserved_);
} }
} }
// Resizes to size and sets all values to t. // Resizes to size and sets all values to t.
template <typename T> template <typename T>
void GenericVector<T>::init_to_size(int size, const T& t) { void GenericVector<T>::init_to_size(int size, const T& t) {
resize(size, t); reserve(size);
size_used_ = size;
for (int i = 0; i < size; ++i) {
data_[i] = t;
}
}
template <typename T>
void GenericVector<T>::resize(int size, const T& t) {
init_to_size(size, t);
} }
// Return the object from an index. // Return the object from an index.
template <typename T> template <typename T>
T& GenericVector<T>::get(int index) { T& GenericVector<T>::get(int index) const {
assert(index >= 0 && index < size()); assert(index >= 0 && index < size_used_);
return data()[index]; return data_[index];
} }
// Return the object from an index.
template <typename T> template <typename T>
const T& GenericVector<T>::get(int index) const { T& GenericVector<T>::operator[](int index) const {
assert(index >= 0 && index < size()); assert(index >= 0 && index < size_used_);
return data()[index]; return data_[index];
} }
template <typename T>
T& GenericVector<T>::back() const {
assert(size_used_ > 0);
return data_[size_used_ - 1];
}
// Returns the last object and removes it. // Returns the last object and removes it.
template <typename T> template <typename T>
T GenericVector<T>::pop_back() { T GenericVector<T>::pop_back() {
auto b = back(); assert(size_used_ > 0);
base::pop_back(); return data_[--size_used_];
return b;
} }
// Return the object from an index. // Return the object from an index.
template <typename T> template <typename T>
void GenericVector<T>::set(const T& t, int index) { void GenericVector<T>::set(const T& t, int index) {
assert(index >= 0 && index < size()); assert(index >= 0 && index < size_used_);
data()[index] = t; data_[index] = t;
} }
// Shifts the rest of the elements to the right to make // Shifts the rest of the elements to the right to make
@ -652,32 +752,40 @@ void GenericVector<T>::set(const T& t, int index) {
// at the specified index. // at the specified index.
template <typename T> template <typename T>
void GenericVector<T>::insert(const T& t, int index) { void GenericVector<T>::insert(const T& t, int index) {
base::insert(begin() + index, t); assert(index >= 0 && index <= size_used_);
if (size_reserved_ == size_used_) {
double_the_size();
}
for (int i = size_used_; i > index; --i) {
data_[i] = data_[i - 1];
}
data_[index] = t;
size_used_++;
} }
// Removes an element at the given index and // Removes an element at the given index and
// shifts the remaining elements to the left. // shifts the remaining elements to the left.
template <typename T> template <typename T>
void GenericVector<T>::remove(int index) { void GenericVector<T>::remove(int index) {
assert(index >= 0 && index < size()); assert(index >= 0 && index < size_used_);
for (int i = index; i < size() - 1; ++i) { for (int i = index; i < size_used_ - 1; ++i) {
data()[i] = data()[i + 1]; data_[i] = data_[i + 1];
} }
resize(size() - 1); size_used_--;
} }
// Return true if the index is valindex // Return true if the index is valindex
template <typename T> template <typename T>
T GenericVector<T>::contains_index(int index) const { T GenericVector<T>::contains_index(int index) const {
return index >= 0 && index < size(); return index >= 0 && index < size_used_;
} }
// Return the index of the T object. // Return the index of the T object.
template <typename T> template <typename T>
int GenericVector<T>::get_index(const T& object) const { int GenericVector<T>::get_index(const T& object) const {
for (int i = 0; i < size(); ++i) { for (int i = 0; i < size_used_; ++i) {
assert(compare_cb_ != nullptr); assert(compare_cb_ != nullptr);
if (compare_cb_(object, data()[i])) { if (compare_cb_(object, data_[i])) {
return i; return i;
} }
} }
@ -690,20 +798,38 @@ bool GenericVector<T>::contains(const T& object) const {
return get_index(object) != -1; return get_index(object) != -1;
} }
// Add an element in the array
template <typename T>
int GenericVector<T>::push_back(T object) {
int index = 0;
if (size_used_ == size_reserved_) {
double_the_size();
}
index = size_used_++;
data_[index] = object;
return index;
}
template <typename T> template <typename T>
int GenericVector<T>::push_back_new(const T& object) { int GenericVector<T>::push_back_new(const T& object) {
int index = get_index(object); int index = get_index(object);
if (index >= 0) { if (index >= 0) {
return index; return index;
} }
push_back(object); return push_back(object);
return size();
} }
// Add an element in the array (front) // Add an element in the array (front)
template <typename T> template <typename T>
int GenericVector<T>::push_front(const T& object) { int GenericVector<T>::push_front(const T& object) {
insert(begin(), object); if (size_used_ == size_reserved_) {
double_the_size();
}
for (int i = size_used_; i > 0; --i) {
data_[i] = data_[i - 1];
}
data_[0] = object;
++size_used_;
return 0; return 0;
} }
@ -714,39 +840,62 @@ void GenericVector<T>::operator+=(const T& t) {
template <typename T> template <typename T>
GenericVector<T>& GenericVector<T>::operator+=(const GenericVector& other) { GenericVector<T>& GenericVector<T>::operator+=(const GenericVector& other) {
this->reserve(size() + other.size()); this->reserve(size_used_ + other.size_used_);
for (int i = 0; i < other.size(); ++i) { for (int i = 0; i < other.size(); ++i) {
this->operator+=(other.data()[i]); this->operator+=(other.data_[i]);
} }
return *this; return *this;
} }
template <typename T>
GenericVector<T>& GenericVector<T>::operator=(const GenericVector& other) {
if (&other != this) {
this->truncate(0);
this->operator+=(other);
}
return *this;
}
// Clear the array, calling the callback function if any.
template <typename T>
void GenericVector<T>::clear() {
if (size_reserved_ > 0 && clear_cb_ != nullptr) {
for (int i = 0; i < size_used_; ++i) {
clear_cb_(data_[i]);
}
}
delete[] data_;
data_ = nullptr;
size_used_ = 0;
size_reserved_ = 0;
clear_cb_ = nullptr;
compare_cb_ = nullptr;
}
template <typename T> template <typename T>
void GenericVector<T>::delete_data_pointers() { void GenericVector<T>::delete_data_pointers() {
for (int i = 0; i < size(); ++i) { for (int i = 0; i < size_used_; ++i) {
delete data()[i]; delete data_[i];
} }
} }
template <typename T> template <typename T>
bool GenericVector<T>::write(FILE* f, bool GenericVector<T>::write(FILE* f,
std::function<bool(FILE*, const T&)> cb) const { std::function<bool(FILE*, const T&)> cb) const {
int32_t cp = capacity(); if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
if (fwrite(&cp, sizeof(cp), 1, f) != 1) {
return false; return false;
} }
int32_t sz = size(); if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
if (fwrite(&sz, sizeof(sz), 1, f) != 1) {
return false; return false;
} }
if (cb != nullptr) { if (cb != nullptr) {
for (int i = 0; i < size(); ++i) { for (int i = 0; i < size_used_; ++i) {
if (!cb(f, data()[i])) { if (!cb(f, data_[i])) {
return false; return false;
} }
} }
} else { } else {
if (fwrite(data(), sizeof(T), size(), f) != unsigned_size()) { if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
return false; return false;
} }
} }
@ -756,23 +905,22 @@ bool GenericVector<T>::write(FILE* f,
template <typename T> template <typename T>
bool GenericVector<T>::read(TFile* f, bool GenericVector<T>::read(TFile* f,
std::function<bool(TFile*, T*)> cb) { std::function<bool(TFile*, T*)> cb) {
int32_t reserved, size; int32_t reserved;
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) { if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
return false; return false;
} }
reserve(reserved); reserve(reserved);
if (f->FReadEndian(&size, sizeof(size), 1) != 1) { if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
return false; return false;
} }
resize(size);
if (cb != nullptr) { if (cb != nullptr) {
for (int i = 0; i < size; ++i) { for (int i = 0; i < size_used_; ++i) {
if (!cb(f, data() + i)) { if (!cb(f, data_ + i)) {
return false; return false;
} }
} }
} else { } else {
if (f->FReadEndian(data(), sizeof(T), size) != size) { if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
return false; return false;
} }
} }
@ -783,22 +931,20 @@ bool GenericVector<T>::read(TFile* f,
// read/write of T will work. Returns false in case of error. // read/write of T will work. Returns false in case of error.
template <typename T> template <typename T>
bool GenericVector<T>::Serialize(FILE* fp) const { bool GenericVector<T>::Serialize(FILE* fp) const {
int32_t sz = size(); if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
if (fwrite(&sz, sizeof(sz), 1, fp) != 1) {
return false; return false;
} }
if (fwrite(data(), sizeof(T), sz, fp) != unsigned_size()) { if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
return false; return false;
} }
return true; return true;
} }
template <typename T> template <typename T>
bool GenericVector<T>::Serialize(TFile* fp) const { bool GenericVector<T>::Serialize(TFile* fp) const {
int32_t sz = size(); if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
if (fp->FWrite(&sz, sizeof(sz), 1) != 1) {
return false; return false;
} }
if (fp->FWrite(data(), sizeof(T), sz) != sz) { if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
return false; return false;
} }
return true; return true;
@ -822,13 +968,14 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
if (reserved > UINT16_MAX) { if (reserved > UINT16_MAX) {
return false; return false;
} }
resize(reserved); reserve(reserved);
if (fread(data(), sizeof(T), size(), fp) != unsigned_size()) { size_used_ = reserved;
if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
return false; return false;
} }
if (swap) { if (swap) {
for (int i = 0; i < size(); ++i) { for (int i = 0; i < size_used_; ++i) {
ReverseN(&data()[i], sizeof(data()[i])); ReverseN(&data_[i], sizeof(data_[i]));
} }
} }
return true; return true;
@ -845,8 +992,9 @@ bool GenericVector<T>::DeSerialize(TFile* fp) {
if (reserved > limit) { if (reserved > limit) {
return false; return false;
} }
resize(reserved); reserve(reserved);
return fp->FReadEndian(data(), sizeof(T), size()) == size(); size_used_ = reserved;
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
} }
template <typename T> template <typename T>
bool GenericVector<T>::SkipDeSerialize(TFile* fp) { bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
@ -862,12 +1010,11 @@ bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
// Returns false in case of error. // Returns false in case of error.
template <typename T> template <typename T>
bool GenericVector<T>::SerializeClasses(FILE* fp) const { bool GenericVector<T>::SerializeClasses(FILE* fp) const {
int32_t sz = size(); if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
if (fwrite(&sz, sizeof(sz), 1, fp) != 1) {
return false; return false;
} }
for (int i = 0; i < sz; ++i) { for (int i = 0; i < size_used_; ++i) {
if (!data()[i].Serialize(fp)) { if (!data_[i].Serialize(fp)) {
return false; return false;
} }
} }
@ -875,12 +1022,11 @@ bool GenericVector<T>::SerializeClasses(FILE* fp) const {
} }
template <typename T> template <typename T>
bool GenericVector<T>::SerializeClasses(TFile* fp) const { bool GenericVector<T>::SerializeClasses(TFile* fp) const {
int32_t sz = size(); if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
if (fp->FWrite(&sz, sizeof(sz), 1) != 1) {
return false; return false;
} }
for (int i = 0; i < sz; ++i) { for (int i = 0; i < size_used_; ++i) {
if (!data()[i].Serialize(fp)) { if (!data_[i].Serialize(fp)) {
return false; return false;
} }
} }
@ -904,7 +1050,7 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
T empty; T empty;
init_to_size(reserved, empty); init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) { for (int i = 0; i < reserved; ++i) {
if (!data()[i].DeSerialize(swap, fp)) { if (!data_[i].DeSerialize(swap, fp)) {
return false; return false;
} }
} }
@ -919,7 +1065,7 @@ bool GenericVector<T>::DeSerializeClasses(TFile* fp) {
T empty; T empty;
init_to_size(reserved, empty); init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) { for (int i = 0; i < reserved; ++i) {
if (!data()[i].DeSerialize(fp)) { if (!data_[i].DeSerialize(fp)) {
return false; return false;
} }
} }
@ -943,7 +1089,17 @@ bool GenericVector<T>::SkipDeSerializeClasses(TFile* fp) {
// its argument, and finally invalidates its argument. // its argument, and finally invalidates its argument.
template <typename T> template <typename T>
void GenericVector<T>::move(GenericVector<T>* from) { void GenericVector<T>::move(GenericVector<T>* from) {
*this = std::move(*from); this->clear();
this->data_ = from->data_;
this->size_reserved_ = from->size_reserved_;
this->size_used_ = from->size_used_;
this->compare_cb_ = from->compare_cb_;
this->clear_cb_ = from->clear_cb_;
from->data_ = nullptr;
from->clear_cb_ = nullptr;
from->compare_cb_ = nullptr;
from->size_used_ = 0;
from->size_reserved_ = 0;
} }
template <typename T> template <typename T>
@ -974,7 +1130,7 @@ int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
return start; return start;
} }
if (num_elements == 2) { if (num_elements == 2) {
if (data()[start] < data()[start + 1]) { if (data_[start] < data_[start + 1]) {
return target_index > start ? start + 1 : start; return target_index > start ? start + 1 : start;
} }
return target_index > start ? start : start + 1; return target_index > start ? start : start + 1;
@ -993,9 +1149,9 @@ int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
int next_lesser = start; int next_lesser = start;
int prev_greater = end; int prev_greater = end;
for (int next_sample = start + 1; next_sample < prev_greater;) { for (int next_sample = start + 1; next_sample < prev_greater;) {
if (data()[next_sample] < data()[next_lesser]) { if (data_[next_sample] < data_[next_lesser]) {
swap(next_lesser++, next_sample++); swap(next_lesser++, next_sample++);
} else if (data()[next_sample] == data()[next_lesser]) { } else if (data_[next_sample] == data_[next_lesser]) {
++next_sample; ++next_sample;
} else { } else {
swap(--prev_greater, next_sample); swap(--prev_greater, next_sample);

View File

@ -133,6 +133,16 @@ TFile::~TFile() {
delete data_; delete data_;
} }
bool TFile::DeSerialize(std::vector<char>& data) {
uint32_t size;
if (!DeSerialize(&size)) {
return false;
}
// TODO: optimize.
data.resize(size);
return DeSerialize(&data[0], data.size());
}
bool TFile::DeSerialize(char* buffer, size_t count) { bool TFile::DeSerialize(char* buffer, size_t count) {
return FRead(buffer, sizeof(*buffer), count) == count; return FRead(buffer, sizeof(*buffer), count) == count;
} }
@ -177,6 +187,14 @@ bool TFile::DeSerialize(uint64_t* buffer, size_t count) {
return FReadEndian(buffer, sizeof(*buffer), count) == count; return FReadEndian(buffer, sizeof(*buffer), count) == count;
} }
bool TFile::Serialize(const std::vector<char>& data) {
uint32_t size = data.size();
if (!Serialize(&size)) {
return false;
}
return Serialize(&data[0], size);
}
bool TFile::Serialize(const char* buffer, size_t count) { bool TFile::Serialize(const char* buffer, size_t count) {
return FWrite(buffer, sizeof(*buffer), count) == count; return FWrite(buffer, sizeof(*buffer), count) == count;
} }

View File

@ -91,6 +91,7 @@ class TFile {
} }
// Deserialize data. // Deserialize data.
bool DeSerialize(std::vector<char>& data);
bool DeSerialize(char* data, size_t count = 1); bool DeSerialize(char* data, size_t count = 1);
bool DeSerialize(double* data, size_t count = 1); bool DeSerialize(double* data, size_t count = 1);
bool DeSerialize(float* data, size_t count = 1); bool DeSerialize(float* data, size_t count = 1);
@ -104,6 +105,7 @@ class TFile {
bool DeSerialize(uint64_t* data, size_t count = 1); bool DeSerialize(uint64_t* data, size_t count = 1);
// Serialize data. // Serialize data.
bool Serialize(const std::vector<char>& data);
bool Serialize(const char* data, size_t count = 1); bool Serialize(const char* data, size_t count = 1);
bool Serialize(const double* data, size_t count = 1); bool Serialize(const double* data, size_t count = 1);
bool Serialize(const float* data, size_t count = 1); bool Serialize(const float* data, size_t count = 1);

View File

@ -96,7 +96,7 @@ bool TessdataManager::LoadArchiveFile(const char *filename) {
#endif #endif
bool TessdataManager::Init(const char *data_file_name) { bool TessdataManager::Init(const char *data_file_name) {
GenericVector<char> data; std::vector<char> data;
if (reader_ == nullptr) { if (reader_ == nullptr) {
#if defined(HAVE_LIBARCHIVE) #if defined(HAVE_LIBARCHIVE)
if (LoadArchiveFile(data_file_name)) return true; if (LoadArchiveFile(data_file_name)) return true;
@ -155,7 +155,7 @@ bool TessdataManager::SaveFile(const char* filename,
FileWriter writer) const { FileWriter writer) const {
// TODO: This method supports only the proprietary file format. // TODO: This method supports only the proprietary file format.
ASSERT_HOST(is_loaded_); ASSERT_HOST(is_loaded_);
GenericVector<char> data; std::vector<char> data;
Serialize(&data); Serialize(&data);
if (writer == nullptr) if (writer == nullptr)
return SaveDataToFile(data, filename); return SaveDataToFile(data, filename);
@ -164,7 +164,7 @@ bool TessdataManager::SaveFile(const char* filename,
} }
// Serializes to the given vector. // Serializes to the given vector.
void TessdataManager::Serialize(GenericVector<char> *data) const { void TessdataManager::Serialize(std::vector<char> *data) const {
// TODO: This method supports only the proprietary file format. // TODO: This method supports only the proprietary file format.
ASSERT_HOST(is_loaded_); ASSERT_HOST(is_loaded_);
// Compute the offset_table and total size. // Compute the offset_table and total size.
@ -178,7 +178,7 @@ void TessdataManager::Serialize(GenericVector<char> *data) const {
offset += entries_[i].size(); offset += entries_[i].size();
} }
} }
data->init_to_size(offset, 0); data->resize(offset, 0);
int32_t num_entries = TESSDATA_NUM_ENTRIES; int32_t num_entries = TESSDATA_NUM_ENTRIES;
TFile fp; TFile fp;
fp.OpenWrite(data); fp.OpenWrite(data);

View File

@ -151,7 +151,7 @@ class TessdataManager {
// Saves to the given filename. // Saves to the given filename.
bool SaveFile(const char* filename, FileWriter writer) const; bool SaveFile(const char* filename, FileWriter writer) const;
// Serializes to the given vector. // Serializes to the given vector.
void Serialize(GenericVector<char> *data) const; void Serialize(std::vector<char> *data) const;
// Resets to the initial state, keeping the reader. // Resets to the initial state, keeping the reader.
void Clear(); void Clear();

View File

@ -716,8 +716,8 @@ int ShapeTable::AddUnicharToResults(
int result_index = unichar_map->get(unichar_id); int result_index = unichar_map->get(unichar_id);
if (result_index < 0) { if (result_index < 0) {
UnicharRating result(unichar_id, rating); UnicharRating result(unichar_id, rating);
results->push_back(result);
result_index = results->size(); result_index = results->size();
results->push_back(result);
(*unichar_map)[unichar_id] = result_index; (*unichar_map)[unichar_id] = result_index;
} }
return result_index; return result_index;

View File

@ -377,7 +377,7 @@ class DawgPositionVector : public GenericVector<DawgPosition> {
bool debug, bool debug,
const char *debug_msg) { const char *debug_msg) {
for (int i = 0; i < size(); ++i) { for (int i = 0; i < size(); ++i) {
if (data()[i] == new_pos) return false; if (data_[i] == new_pos) return false;
} }
push_back(new_pos); push_back(new_pos);
if (debug) { if (debug) {

View File

@ -162,7 +162,7 @@ int main(int argc, char **argv) {
return EXIT_FAILURE; return EXIT_FAILURE;
} }
recognizer.ConvertToInt(); recognizer.ConvertToInt();
GenericVector<char> lstm_data; std::vector<char> lstm_data;
fp.OpenWrite(&lstm_data); fp.OpenWrite(&lstm_data);
ASSERT_HOST(recognizer.Serialize(&tm, &fp)); ASSERT_HOST(recognizer.Serialize(&tm, &fp));
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0],

View File

@ -34,7 +34,7 @@ namespace tesseract {
// can do its own thing. If lang is empty, returns true but does nothing. // can do its own thing. If lang is empty, returns true but does nothing.
// NOTE that suffix should contain any required . for the filename. // NOTE that suffix should contain any required . for the filename.
bool WriteFile(const std::string& output_dir, const std::string& lang, bool WriteFile(const std::string& output_dir, const std::string& lang,
const std::string& suffix, const GenericVector<char>& data, const std::string& suffix, const std::vector<char>& data,
FileWriter writer) { FileWriter writer) {
if (lang.empty()) return true; if (lang.empty()) return true;
std::string dirname = output_dir + "/" + lang; std::string dirname = output_dir + "/" + lang;
@ -56,7 +56,7 @@ bool WriteFile(const std::string& output_dir, const std::string& lang,
// On failure emits a warning message and returns and empty STRING. // On failure emits a warning message and returns and empty STRING.
STRING ReadFile(const std::string& filename, FileReader reader) { STRING ReadFile(const std::string& filename, FileReader reader) {
if (filename.empty()) return STRING(); if (filename.empty()) return STRING();
GenericVector<char> data; std::vector<char> data;
bool read_result; bool read_result;
if (reader == nullptr) if (reader == nullptr)
read_result = LoadDataFromFile(filename.c_str(), &data); read_result = LoadDataFromFile(filename.c_str(), &data);
@ -71,7 +71,7 @@ STRING ReadFile(const std::string& filename, FileReader reader) {
bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir, bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir,
const std::string& lang, FileWriter writer, const std::string& lang, FileWriter writer,
TessdataManager* traineddata) { TessdataManager* traineddata) {
GenericVector<char> unicharset_data; std::vector<char> unicharset_data;
TFile fp; TFile fp;
fp.OpenWrite(&unicharset_data); fp.OpenWrite(&unicharset_data);
if (!unicharset.save_to_file(&fp)) return false; if (!unicharset.save_to_file(&fp)) return false;
@ -107,13 +107,13 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through,
} }
} }
TFile fp; TFile fp;
GenericVector<char> recoder_data; std::vector<char> recoder_data;
fp.OpenWrite(&recoder_data); fp.OpenWrite(&recoder_data);
if (!recoder.Serialize(&fp)) return false; if (!recoder.Serialize(&fp)) return false;
traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0], traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
recoder_data.size()); recoder_data.size());
STRING encoding = recoder.GetEncodingAsString(unicharset); STRING encoding = recoder.GetEncodingAsString(unicharset);
recoder_data.init_to_size(encoding.length(), 0); recoder_data.resize(encoding.length(), 0);
memcpy(&recoder_data[0], &encoding[0], encoding.length()); memcpy(&recoder_data[0], &encoding[0], encoding.length());
STRING suffix; STRING suffix;
suffix.add_str_int(".charset_size=", recoder.code_range()); suffix.add_str_int(".charset_size=", recoder.code_range());
@ -134,7 +134,7 @@ static bool WriteDawg(const std::vector<STRING>& words,
std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg()); std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg());
if (dawg == nullptr || dawg->NumEdges() == 0) return false; if (dawg == nullptr || dawg->NumEdges() == 0) return false;
TFile fp; TFile fp;
GenericVector<char> dawg_data; std::vector<char> dawg_data;
fp.OpenWrite(&dawg_data); fp.OpenWrite(&dawg_data);
if (!dawg->write_squished_dawg(&fp)) return false; if (!dawg->write_squished_dawg(&fp)) return false;
traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size()); traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size());
@ -228,7 +228,7 @@ int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir
} }
// Traineddata file. // Traineddata file.
GenericVector<char> traineddata_data; std::vector<char> traineddata_data;
traineddata.Serialize(&traineddata_data); traineddata.Serialize(&traineddata_data);
if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) { if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
tprintf("Error writing output traineddata file!!\n"); tprintf("Error writing output traineddata file!!\n");

View File

@ -30,7 +30,7 @@ namespace tesseract {
// can do its own thing. If lang is empty, returns true but does nothing. // can do its own thing. If lang is empty, returns true but does nothing.
// NOTE that suffix should contain any required . for the filename. // NOTE that suffix should contain any required . for the filename.
bool WriteFile(const std::string& output_dir, const std::string& lang, bool WriteFile(const std::string& output_dir, const std::string& lang,
const std::string& suffix, const GenericVector<char>& data, const std::string& suffix, const std::vector<char>& data,
FileWriter writer); FileWriter writer);
// Helper reads a file with optional reader and returns a STRING. // Helper reads a file with optional reader and returns a STRING.
// On failure emits a warning message and returns and empty STRING. // On failure emits a warning message and returns and empty STRING.

View File

@ -29,7 +29,7 @@ LSTMTester::LSTMTester(int64_t max_memory)
// tesseract into memory ready for testing. Returns false if nothing was // tesseract into memory ready for testing. Returns false if nothing was
// loaded. The arg is a filename of a file that lists the filenames. // loaded. The arg is a filename of a file that lists the filenames.
bool LSTMTester::LoadAllEvalData(const char* filenames_file) { bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
GenericVector<STRING> filenames; std::vector<STRING> filenames;
if (!LoadFileLinesToStrings(filenames_file, &filenames)) { if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
tprintf("Failed to load list of eval filenames from %s\n", tprintf("Failed to load list of eval filenames from %s\n",
filenames_file); filenames_file);
@ -41,7 +41,7 @@ bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
// Loads a set of lstmf files that were created using the lstm.train config to // Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for testing. Returns false if nothing was // tesseract into memory ready for testing. Returns false if nothing was
// loaded. // loaded.
bool LSTMTester::LoadAllEvalData(const GenericVector<STRING>& filenames) { bool LSTMTester::LoadAllEvalData(const std::vector<STRING>& filenames) {
test_data_.Clear(); test_data_.Clear();
bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr); bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
total_pages_ = test_data_.TotalPages(); total_pages_ = test_data_.TotalPages();

View File

@ -38,7 +38,7 @@ class LSTMTester {
// Loads a set of lstmf files that were created using the lstm.train config to // Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for testing. Returns false if nothing was // tesseract into memory ready for testing. Returns false if nothing was
// loaded. // loaded.
bool LoadAllEvalData(const GenericVector<STRING>& filenames); bool LoadAllEvalData(const std::vector<STRING>& filenames);
// Runs an evaluation asynchronously on the stored eval data and returns a // Runs an evaluation asynchronously on the stored eval data and returns a
// string describing the results of the previous test. Args match TestCallback // string describing the results of the previous test. Args match TestCallback

View File

@ -267,7 +267,7 @@ void LSTMTrainer::DebugNetwork() {
// Loads a set of lstmf files that were created using the lstm.train config to // Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for training. Returns false if nothing was // tesseract into memory ready for training. Returns false if nothing was
// loaded. // loaded.
bool LSTMTrainer::LoadAllTrainingData(const GenericVector<STRING>& filenames, bool LSTMTrainer::LoadAllTrainingData(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy, CachingStrategy cache_strategy,
bool randomly_rotate) { bool randomly_rotate) {
randomly_rotate_ = randomly_rotate; randomly_rotate_ = randomly_rotate;
@ -302,7 +302,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
} }
} }
bool result = true; // Something interesting happened. bool result = true; // Something interesting happened.
GenericVector<char> rec_model_data; std::vector<char> rec_model_data;
if (error_rate < best_error_rate_) { if (error_rate < best_error_rate_) {
SaveRecognitionDump(&rec_model_data); SaveRecognitionDump(&rec_model_data);
log_msg->add_str_double(" New best char error = ", error_rate); log_msg->add_str_double(" New best char error = ", error_rate);
@ -335,7 +335,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
// Error rate has ballooned. Go back to the best model. // Error rate has ballooned. Go back to the best model.
*log_msg += "\nDivergence! "; *log_msg += "\nDivergence! ";
// Copy best_trainer_ before reading it, as it will get overwritten. // Copy best_trainer_ before reading it, as it will get overwritten.
GenericVector<char> revert_data(best_trainer_); std::vector<char> revert_data(best_trainer_);
if (ReadTrainingDump(revert_data, this)) { if (ReadTrainingDump(revert_data, this)) {
LogIterations("Reverted to", log_msg); LogIterations("Reverted to", log_msg);
ReduceLearningRates(this, log_msg); ReduceLearningRates(this, log_msg);
@ -354,7 +354,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
} }
if (checkpoint_name_.length() > 0) { if (checkpoint_name_.length() > 0) {
// Write a current checkpoint. // Write a current checkpoint.
GenericVector<char> checkpoint; std::vector<char> checkpoint;
if (!SaveTrainingDump(FULL, this, &checkpoint) || if (!SaveTrainingDump(FULL, this, &checkpoint) ||
!SaveDataToFile(checkpoint, checkpoint_name_.c_str())) { !SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
*log_msg += " failed to write checkpoint."; *log_msg += " failed to write checkpoint.";
@ -420,14 +420,14 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,
if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false; if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
if (!fp->Serialize(&worst_iteration_)) return false; if (!fp->Serialize(&worst_iteration_)) return false;
if (!fp->Serialize(&stall_iteration_)) return false; if (!fp->Serialize(&stall_iteration_)) return false;
if (!best_model_data_.Serialize(fp)) return false; if (!fp->Serialize(best_model_data_)) return false;
if (!worst_model_data_.Serialize(fp)) return false; if (!fp->Serialize(worst_model_data_)) return false;
if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp)) if (serialize_amount != NO_BEST_TRAINER && !fp->Serialize(best_trainer_))
return false; return false;
GenericVector<char> sub_data; std::vector<char> sub_data;
if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data)) if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data))
return false; return false;
if (!sub_data.Serialize(fp)) return false; if (!fp->Serialize(sub_data)) return false;
if (!best_error_history_.Serialize(fp)) return false; if (!best_error_history_.Serialize(fp)) return false;
if (!best_error_iterations_.Serialize(fp)) return false; if (!best_error_iterations_.Serialize(fp)) return false;
return fp->Serialize(&improvement_steps_); return fp->Serialize(&improvement_steps_);
@ -464,11 +464,11 @@ bool LSTMTrainer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false; if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
if (!fp->DeSerialize(&worst_iteration_)) return false; if (!fp->DeSerialize(&worst_iteration_)) return false;
if (!fp->DeSerialize(&stall_iteration_)) return false; if (!fp->DeSerialize(&stall_iteration_)) return false;
if (!best_model_data_.DeSerialize(fp)) return false; if (!fp->DeSerialize(best_model_data_)) return false;
if (!worst_model_data_.DeSerialize(fp)) return false; if (!fp->DeSerialize(worst_model_data_)) return false;
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false; if (amount != NO_BEST_TRAINER && !fp->DeSerialize(best_trainer_)) return false;
GenericVector<char> sub_data; std::vector<char> sub_data;
if (!sub_data.DeSerialize(fp)) return false; if (!fp->DeSerialize(sub_data)) return false;
delete sub_trainer_; delete sub_trainer_;
if (sub_data.empty()) { if (sub_data.empty()) {
sub_trainer_ = nullptr; sub_trainer_ = nullptr;
@ -542,7 +542,7 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING* log_msg) {
if (sub_error < best_error_rate_ && if (sub_error < best_error_rate_ &&
sub_margin >= kSubTrainerMarginFraction) { sub_margin >= kSubTrainerMarginFraction) {
// The sub_trainer_ has won the race to a new best. Switch to it. // The sub_trainer_ has won the race to a new best. Switch to it.
GenericVector<char> updated_trainer; std::vector<char> updated_trainer;
SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer); SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
ReadTrainingDump(updated_trainer, this); ReadTrainingDump(updated_trainer, this);
log_msg->add_str_int(" Sub trainer wins at iteration ", log_msg->add_str_int(" Sub trainer wins at iteration ",
@ -594,7 +594,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
ok_sums[i].init_to_size(num_layers, 0.0); ok_sums[i].init_to_size(num_layers, 0.0);
} }
double momentum_factor = 1.0 / (1.0 - momentum_); double momentum_factor = 1.0 / (1.0 - momentum_);
GenericVector<char> orig_trainer; std::vector<char> orig_trainer;
samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer); samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer);
for (int i = 0; i < num_layers; ++i) { for (int i = 0; i < num_layers; ++i) {
Network* layer = GetLayer(layers[i]); Network* layer = GetLayer(layers[i]);
@ -624,7 +624,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
copy_trainer.TrainOnLine(samples_trainer, true); copy_trainer.TrainOnLine(samples_trainer, true);
if (trainingdata == nullptr) continue; if (trainingdata == nullptr) continue;
// We'll now use this trainer again for each layer. // We'll now use this trainer again for each layer.
GenericVector<char> updated_trainer; std::vector<char> updated_trainer;
samples_trainer->SaveTrainingDump(LIGHT, &copy_trainer, &updated_trainer); samples_trainer->SaveTrainingDump(LIGHT, &copy_trainer, &updated_trainer);
for (int i = 0; i < num_layers; ++i) { for (int i = 0; i < num_layers; ++i) {
if (num_weights[i] == 0) continue; if (num_weights[i] == 0) continue;
@ -871,7 +871,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData* trainingdata,
// actually serialized. // actually serialized.
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,
const LSTMTrainer* trainer, const LSTMTrainer* trainer,
GenericVector<char>* data) const { std::vector<char>* data) const {
TFile fp; TFile fp;
fp.OpenWrite(data); fp.OpenWrite(data);
return trainer->Serialize(serialize_amount, &mgr_, &fp); return trainer->Serialize(serialize_amount, &mgr_, &fp);
@ -891,7 +891,7 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr,
// Writes the full recognition traineddata to the given filename. // Writes the full recognition traineddata to the given filename.
bool LSTMTrainer::SaveTraineddata(const char* filename) { bool LSTMTrainer::SaveTraineddata(const char* filename) {
GenericVector<char> recognizer_data; std::vector<char> recognizer_data;
SaveRecognitionDump(&recognizer_data); SaveRecognitionDump(&recognizer_data);
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
recognizer_data.size()); recognizer_data.size());
@ -899,7 +899,7 @@ bool LSTMTrainer::SaveTraineddata(const char* filename) {
} }
// Writes the recognizer to memory, so that it can be used for testing later. // Writes the recognizer to memory, so that it can be used for testing later.
void LSTMTrainer::SaveRecognitionDump(GenericVector<char>* data) const { void LSTMTrainer::SaveRecognitionDump(std::vector<char>* data) const {
TFile fp; TFile fp;
fp.OpenWrite(data); fp.OpenWrite(data);
network_->SetEnableTraining(TS_TEMP_DISABLE); network_->SetEnableTraining(TS_TEMP_DISABLE);
@ -1260,7 +1260,7 @@ void LSTMTrainer::RollErrorBuffers() {
// Tester is an externally supplied callback function that tests on some // Tester is an externally supplied callback function that tests on some
// data set with a given model and records the error rates in a graph. // data set with a given model and records the error rates in a graph.
STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate, STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
const GenericVector<char>& model_data, const std::vector<char>& model_data,
TestCallback tester) { TestCallback tester) {
if (error_rate > best_error_rate_ if (error_rate > best_error_rate_
&& iteration < best_iteration_ + kErrorGraphInterval) { && iteration < best_iteration_ + kErrorGraphInterval) {
@ -1287,7 +1287,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
worst_model_data_.size()); worst_model_data_.size());
result = tester(worst_iteration_, worst_error_rates_, mgr_, result = tester(worst_iteration_, worst_error_rates_, mgr_,
CurrentTrainingStage()); CurrentTrainingStage());
worst_model_data_.truncate(0); worst_model_data_.clear();
best_model_data_ = model_data; best_model_data_ = model_data;
} }
best_error_rate_ = error_rate; best_error_rate_ = error_rate;
@ -1322,7 +1322,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
CurrentTrainingStage()); CurrentTrainingStage());
} }
if (result.length() > 0) if (result.length() > 0)
best_model_data_.truncate(0); best_model_data_.clear();
worst_model_data_ = model_data; worst_model_data_ = model_data;
} }
} }

View File

@ -135,7 +135,7 @@ class LSTMTrainer : public LSTMRecognizer {
int learning_iteration() const { return learning_iteration_; } int learning_iteration() const { return learning_iteration_; }
int32_t improvement_steps() const { return improvement_steps_; } int32_t improvement_steps() const { return improvement_steps_; }
void set_perfect_delay(int delay) { perfect_delay_ = delay; } void set_perfect_delay(int delay) { perfect_delay_ = delay; }
const GenericVector<char>& best_trainer() const { return best_trainer_; } const std::vector<char>& best_trainer() const { return best_trainer_; }
// Returns the error that was just calculated by PrepareForBackward. // Returns the error that was just calculated by PrepareForBackward.
double NewSingleError(ErrorTypes type) const { double NewSingleError(ErrorTypes type) const {
return error_buffers_[type][training_iteration() % kRollingBufferSize_]; return error_buffers_[type][training_iteration() % kRollingBufferSize_];
@ -167,7 +167,7 @@ class LSTMTrainer : public LSTMRecognizer {
// Loads a set of lstmf files that were created using the lstm.train config to // Loads a set of lstmf files that were created using the lstm.train config to
// tesseract into memory ready for training. Returns false if nothing was // tesseract into memory ready for training. Returns false if nothing was
// loaded. // loaded.
bool LoadAllTrainingData(const GenericVector<STRING>& filenames, bool LoadAllTrainingData(const std::vector<STRING>& filenames,
CachingStrategy cache_strategy, CachingStrategy cache_strategy,
bool randomly_rotate); bool randomly_rotate);
@ -269,7 +269,7 @@ class LSTMTrainer : public LSTMRecognizer {
// actually serialized. // actually serialized.
bool SaveTrainingDump(SerializeAmount serialize_amount, bool SaveTrainingDump(SerializeAmount serialize_amount,
const LSTMTrainer* trainer, const LSTMTrainer* trainer,
GenericVector<char>* data) const; std::vector<char>* data) const;
// Reads previously saved trainer from memory. *this must always be the // Reads previously saved trainer from memory. *this must always be the
// master trainer that retains the only copy of the training data and // master trainer that retains the only copy of the training data and
@ -294,7 +294,7 @@ class LSTMTrainer : public LSTMRecognizer {
bool SaveTraineddata(const char* filename); bool SaveTraineddata(const char* filename);
// Writes the recognizer to memory, so that it can be used for testing later. // Writes the recognizer to memory, so that it can be used for testing later.
void SaveRecognitionDump(GenericVector<char>* data) const; void SaveRecognitionDump(std::vector<char>* data) const;
// Returns a suitable filename for a training dump, based on the model_base_, // Returns a suitable filename for a training dump, based on the model_base_,
// the iteration and the error rates. // the iteration and the error rates.
@ -375,7 +375,7 @@ class LSTMTrainer : public LSTMRecognizer {
// Given that error_rate is either a new min or max, updates the best/worst // Given that error_rate is either a new min or max, updates the best/worst
// error rates, and record of progress. // error rates, and record of progress.
STRING UpdateErrorGraph(int iteration, double error_rate, STRING UpdateErrorGraph(int iteration, double error_rate,
const GenericVector<char>& model_data, const std::vector<char>& model_data,
TestCallback tester); TestCallback tester);
protected: protected:
@ -420,10 +420,10 @@ class LSTMTrainer : public LSTMRecognizer {
// Iteration at which the process will be thought stalled. // Iteration at which the process will be thought stalled.
int stall_iteration_; int stall_iteration_;
// Saved recognition models for computing test error for graph points. // Saved recognition models for computing test error for graph points.
GenericVector<char> best_model_data_; std::vector<char> best_model_data_;
GenericVector<char> worst_model_data_; std::vector<char> worst_model_data_;
// Saved trainer for reverting back to last known best. // Saved trainer for reverting back to last known best.
GenericVector<char> best_trainer_; std::vector<char> best_trainer_;
// A subsidiary trainer running with a different learning rate until either // A subsidiary trainer running with a different learning rate until either
// *this or sub_trainer_ hits a new best. // *this or sub_trainer_ hits a new best.
LSTMTrainer* sub_trainer_; LSTMTrainer* sub_trainer_;

View File

@ -136,7 +136,7 @@ int main(int argc, char **argv) {
tprintf("Must supply a list of training filenames! --train_listfile\n"); tprintf("Must supply a list of training filenames! --train_listfile\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
GenericVector<STRING> filenames; std::vector<STRING> filenames;
if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(), if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(),
&filenames)) { &filenames)) {
tprintf("Failed to load list of training filenames from %s\n", tprintf("Failed to load list of training filenames from %s\n",

View File

@ -264,7 +264,7 @@ SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number,
} }
SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes, SEAM *Wordrec::chop_overlapping_blob(const std::vector<TBOX>& boxes,
bool italic_blob, WERD_RES *word_res, bool italic_blob, WERD_RES *word_res,
int *blob_number) { int *blob_number) {
TWERD *word = word_res->chopped_word; TWERD *word = word_res->chopped_word;
@ -362,7 +362,7 @@ SEAM* Wordrec::improve_one_blob(const GenericVector<BLOB_CHOICE*>& blob_choices,
* the worst blobs and try to divide it up to improve the ratings. * the worst blobs and try to divide it up to improve the ratings.
* Used for testing chopper. * Used for testing chopper.
*/ */
SEAM* Wordrec::chop_one_blob(const GenericVector<TBOX>& boxes, SEAM* Wordrec::chop_one_blob(const std::vector<TBOX>& boxes,
const GenericVector<BLOB_CHOICE*>& blob_choices, const GenericVector<BLOB_CHOICE*>& blob_choices,
WERD_RES* word_res, WERD_RES* word_res,
int* blob_number) { int* blob_number) {

View File

@ -370,7 +370,7 @@ class Wordrec : public Classify {
bool italic_blob, const GenericVector<SEAM*>& seams); bool italic_blob, const GenericVector<SEAM*>& seams);
SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number, SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number,
bool italic_blob, const GenericVector<SEAM*>& seams); bool italic_blob, const GenericVector<SEAM*>& seams);
SEAM *chop_overlapping_blob(const GenericVector<TBOX>& boxes, SEAM *chop_overlapping_blob(const std::vector<TBOX>& boxes,
bool italic_blob, bool italic_blob,
WERD_RES *word_res, int *blob_number); WERD_RES *word_res, int *blob_number);
SEAM *improve_one_blob(const GenericVector<BLOB_CHOICE*> &blob_choices, SEAM *improve_one_blob(const GenericVector<BLOB_CHOICE*> &blob_choices,
@ -379,7 +379,7 @@ class Wordrec : public Classify {
bool italic_blob, bool italic_blob,
WERD_RES *word, WERD_RES *word,
int *blob_number); int *blob_number);
SEAM *chop_one_blob(const GenericVector<TBOX> &boxes, SEAM *chop_one_blob(const std::vector<TBOX> &boxes,
const GenericVector<BLOB_CHOICE*> &blob_choices, const GenericVector<BLOB_CHOICE*> &blob_choices,
WERD_RES *word_res, WERD_RES *word_res,
int *blob_number); int *blob_number);

View File

@ -97,7 +97,7 @@ TEST_F(ImagedataTest, CachesMultiDocs) {
// Number of pages in each document. // Number of pages in each document.
const std::vector<int> kNumPages = {6, 5, 7}; const std::vector<int> kNumPages = {6, 5, 7};
std::vector<std::vector<std::string>> page_texts; std::vector<std::vector<std::string>> page_texts;
GenericVector<STRING> filenames; std::vector<STRING> filenames;
for (size_t d = 0; d < kNumPages.size(); ++d) { for (size_t d = 0; d < kNumPages.size(); ++d) {
page_texts.emplace_back(std::vector<std::string>()); page_texts.emplace_back(std::vector<std::string>());
std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back()); std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());

View File

@ -45,7 +45,7 @@ TEST(LangModelTest, AddACharacter) {
LOG(INFO) << "Output dir=" << output_dir << "\n"; LOG(INFO) << "Output dir=" << output_dir << "\n";
std::string lang1 = "eng"; std::string lang1 = "eng";
bool pass_through_recoder = false; bool pass_through_recoder = false;
GenericVector<STRING> words, puncs, numbers; std::vector<STRING> words, puncs, numbers;
// If these reads fail, we get a warning message and an empty list of words. // If these reads fail, we get a warning message and an empty list of words.
ReadFile(file::JoinPath(eng_dir, "eng.wordlist"), nullptr) ReadFile(file::JoinPath(eng_dir, "eng.wordlist"), nullptr)
.split('\n', &words); .split('\n', &words);
@ -136,7 +136,7 @@ TEST(LangModelTest, AddACharacterHindi) {
LOG(INFO) << "Output dir=" << output_dir << "\n"; LOG(INFO) << "Output dir=" << output_dir << "\n";
std::string lang1 = "hin"; std::string lang1 = "hin";
bool pass_through_recoder = false; bool pass_through_recoder = false;
GenericVector<STRING> words, puncs, numbers; std::vector<STRING> words, puncs, numbers;
// If these reads fail, we get a warning message and an empty list of words. // If these reads fail, we get a warning message and an empty list of words.
ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr) ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr)
.split('\n', &words); .split('\n', &words);

View File

@ -114,7 +114,7 @@ TEST_F(LSTMTrainerTest, DeterminismTest) {
double lstm_2d_err_a = TrainIterations(kTrainerIterations); double lstm_2d_err_a = TrainIterations(kTrainerIterations);
double act_error_a = trainer_->ActivationError(); double act_error_a = trainer_->ActivationError();
double char_error_a = trainer_->CharError(); double char_error_a = trainer_->CharError();
GenericVector<char> trainer_a_data; std::vector<char> trainer_a_data;
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(), EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
&trainer_a_data)); &trainer_a_data));
SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",

View File

@ -78,7 +78,7 @@ class LSTMTrainerTest : public testing::Test {
ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false)); ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false));
std::string script_dir = file::JoinPath( std::string script_dir = file::JoinPath(
LANGDATA_DIR, ""); LANGDATA_DIR, "");
GenericVector<STRING> words; std::vector<STRING> words;
EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, "", FLAGS_test_tmpdir, EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, "", FLAGS_test_tmpdir,
kLang, !recode, words, words, words, false, kLang, !recode, words, words, words, false,
nullptr, nullptr)); nullptr, nullptr));
@ -95,7 +95,7 @@ class LSTMTrainerTest : public testing::Test {
if (layer_specific) net_mode |= NF_LAYER_SPECIFIC_LR; if (layer_specific) net_mode |= NF_LAYER_SPECIFIC_LR;
EXPECT_TRUE(trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1, EXPECT_TRUE(trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1,
learning_rate, 0.9, 0.999)); learning_rate, 0.9, 0.999));
GenericVector<STRING> filenames; std::vector<STRING> filenames;
filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str())); filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str()));
EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false)); EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false));
LOG(INFO) << "Setup network:" << model_name << "\n" ; LOG(INFO) << "Setup network:" << model_name << "\n" ;
@ -151,7 +151,7 @@ class LSTMTrainerTest : public testing::Test {
// within 1% of the error rate. Returns the increase in error from float to // within 1% of the error rate. Returns the increase in error from float to
// int. // int.
double TestIntMode(int test_iterations) { double TestIntMode(int test_iterations) {
GenericVector<char> trainer_data; std::vector<char> trainer_data;
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(), EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
&trainer_data)); &trainer_data));
// Get the error on the next few iterations in float mode. // Get the error on the next few iterations in float mode.

View File

@ -192,7 +192,7 @@ void TestParagraphDetection(const TextAndModel* correct, int num_rows) {
GenericVector<RowInfo> row_infos; GenericVector<RowInfo> row_infos;
GenericVector<PARA*> row_owners; GenericVector<PARA*> row_owners;
PARA_LIST paragraphs; PARA_LIST paragraphs;
std::list<ParagraphModel*> models; std::vector<ParagraphModel*> models;
MakeAsciiRowInfos(correct, num_rows, &row_infos); MakeAsciiRowInfos(correct, num_rows, &row_infos);
int debug_level(3); int debug_level(3);
@ -324,7 +324,7 @@ TEST(ParagraphsTest, TestSingleFullPageContinuation) {
GenericVector<tesseract::RowInfo> row_infos; GenericVector<tesseract::RowInfo> row_infos;
GenericVector<PARA*> row_owners; GenericVector<PARA*> row_owners;
PARA_LIST paragraphs; PARA_LIST paragraphs;
std::list<ParagraphModel*> models; std::vector<ParagraphModel*> models;
models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10)); models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
MakeAsciiRowInfos(correct, num_rows, &row_infos); MakeAsciiRowInfos(correct, num_rows, &row_infos);
tesseract::DetectParagraphs(3, &row_infos, &row_owners, &paragraphs, &models); tesseract::DetectParagraphs(3, &row_infos, &row_owners, &paragraphs, &models);

View File

@ -28,6 +28,12 @@ void ToVector(const GenericVectorEqEq<T>& from, std::vector<T>* to) {
for (int i = 0; i < from.size(); i++) to->push_back(from[i]); for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
} }
template <typename T>
void ToVector(const std::vector<T>& from, std::vector<T>* to) {
to->clear();
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
}
// The fixture for testing Tesseract. // The fixture for testing Tesseract.
class ResultIteratorTest : public testing::Test { class ResultIteratorTest : public testing::Test {
protected: protected:
@ -167,12 +173,12 @@ class ResultIteratorTest : public testing::Test {
const StrongScriptDirection* word_dirs, const StrongScriptDirection* word_dirs,
int num_words, int* expected_reading_order, int num_words, int* expected_reading_order,
int num_reading_order_entries) const { int num_reading_order_entries) const {
GenericVector<StrongScriptDirection> gv_word_dirs; std::vector<StrongScriptDirection> gv_word_dirs;
for (int i = 0; i < num_words; i++) { for (int i = 0; i < num_words; i++) {
gv_word_dirs.push_back(word_dirs[i]); gv_word_dirs.push_back(word_dirs[i]);
} }
GenericVectorEqEq<int> output; std::vector<int> output;
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs, ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
&output); &output);
// STL vector can be used with EXPECT_EQ, so convert... // STL vector can be used with EXPECT_EQ, so convert...
@ -191,17 +197,17 @@ class ResultIteratorTest : public testing::Test {
void VerifySaneTextlineOrder(bool in_ltr_context, void VerifySaneTextlineOrder(bool in_ltr_context,
const StrongScriptDirection* word_dirs, const StrongScriptDirection* word_dirs,
int num_words) const { int num_words) const {
GenericVector<StrongScriptDirection> gv_word_dirs; std::vector<StrongScriptDirection> gv_word_dirs;
for (int i = 0; i < num_words; i++) { for (int i = 0; i < num_words; i++) {
gv_word_dirs.push_back(word_dirs[i]); gv_word_dirs.push_back(word_dirs[i]);
} }
GenericVectorEqEq<int> output; std::vector<int> output;
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs, ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
&output); &output);
ASSERT_GE(output.size(), num_words); ASSERT_GE(output.size(), num_words);
GenericVector<int> output_copy(output); std::vector<int> output_copy(output);
output_copy.sort(); std::sort(output_copy.begin(), output_copy.end());
bool sane = true; bool sane = true;
int j = 0; int j = 0;
while (j < output_copy.size() && output_copy[j] < 0) j++; while (j < output_copy.size() && output_copy[j] < 0) j++;

View File

@ -18,7 +18,6 @@
#include "boxchar.h" #include "boxchar.h"
#include "boxread.h" #include "boxread.h"
#include "commandlineflags.h" #include "commandlineflags.h"
#include "genericvector.h"
#include "include_gunit.h" #include "include_gunit.h"
#include "stringrenderer.h" #include "stringrenderer.h"
#include "strngs.h" #include "strngs.h"
@ -227,7 +226,7 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
std::string boxes_str = renderer_->GetBoxesStr(); std::string boxes_str = renderer_->GetBoxesStr();
// Decode to get the box text strings. // Decode to get the box text strings.
EXPECT_FALSE(boxes_str.empty()); EXPECT_FALSE(boxes_str.empty());
GenericVector<STRING> texts; std::vector<STRING> texts;
EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts, EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts,
nullptr, nullptr)); nullptr, nullptr));
std::string ltr_str; std::string ltr_str;

View File

@ -16,7 +16,7 @@
namespace tesseract { namespace tesseract {
// Tests TFile and GenericVector serialization by serializing and // Tests TFile and std::vector serialization by serializing and
// writing/reading. // writing/reading.
class TfileTest : public ::testing::Test { class TfileTest : public ::testing::Test {
@ -115,7 +115,7 @@ TEST_F(TfileTest, Serialize) {
// This test verifies that Tfile can serialize a class. // This test verifies that Tfile can serialize a class.
MathData m1; MathData m1;
m1.Setup(); m1.Setup();
GenericVector<char> data; std::vector<char> data;
TFile fpw; TFile fpw;
fpw.OpenWrite(&data); fpw.OpenWrite(&data);
EXPECT_TRUE(m1.Serialize(&fpw)); EXPECT_TRUE(m1.Serialize(&fpw));
@ -136,7 +136,7 @@ TEST_F(TfileTest, FGets) {
MathData m1; MathData m1;
std::string line_str = "This is a textline with a newline\n"; std::string line_str = "This is a textline with a newline\n";
m1.Setup(); m1.Setup();
GenericVector<char> data; std::vector<char> data;
TFile fpw; TFile fpw;
fpw.OpenWrite(&data); fpw.OpenWrite(&data);
EXPECT_TRUE(m1.Serialize(&fpw)); EXPECT_TRUE(m1.Serialize(&fpw));
@ -161,7 +161,7 @@ TEST_F(TfileTest, BigEndian) {
// This test verifies that Tfile can auto-reverse big-endian data. // This test verifies that Tfile can auto-reverse big-endian data.
MathData m1; MathData m1;
m1.Setup(); m1.Setup();
GenericVector<char> data; std::vector<char> data;
TFile fpw; TFile fpw;
fpw.OpenWrite(&data); fpw.OpenWrite(&data);
EXPECT_TRUE(m1.SerializeBigEndian(&fpw)); EXPECT_TRUE(m1.SerializeBigEndian(&fpw));

View File

@ -57,7 +57,7 @@ class UnicharcompressTest : public ::testing::Test {
} }
// Serializes and de-serializes compressed_ over itself. // Serializes and de-serializes compressed_ over itself.
void SerializeAndUndo() { void SerializeAndUndo() {
GenericVector<char> data; std::vector<char> data;
TFile wfp; TFile wfp;
wfp.OpenWrite(&data); wfp.OpenWrite(&data);
EXPECT_TRUE(compressed_.Serialize(&wfp)); EXPECT_TRUE(compressed_.Serialize(&wfp));

View File

@ -133,7 +133,7 @@ TEST(UnicharsetTest, MultibyteBigrams) {
// It is added if we force it to be. // It is added if we force it to be.
u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue); u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue);
EXPECT_EQ(u.size(), 8); EXPECT_EQ(u.size(), 8);
GenericVector<char> data; std::vector<char> data;
tesseract::TFile fp; tesseract::TFile fp;
fp.OpenWrite(&data); fp.OpenWrite(&data);
u.save_to_file(&fp); u.save_to_file(&fp);