mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-12-12 15:39:04 +08:00
commit
17b5f46385
@ -38,7 +38,6 @@
|
|||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <functional> // for std::function
|
#include <functional> // for std::function
|
||||||
#include <list> // for std::list
|
|
||||||
#include <vector> // for std::vector
|
#include <vector> // for std::vector
|
||||||
|
|
||||||
struct Pix;
|
struct Pix;
|
||||||
@ -817,7 +816,7 @@ class TESS_API TessBaseAPI {
|
|||||||
EquationDetect* equ_detect_; ///< The equation detector.
|
EquationDetect* equ_detect_; ///< The equation detector.
|
||||||
FileReader reader_; ///< Reads files from any filesystem.
|
FileReader reader_; ///< Reads files from any filesystem.
|
||||||
ImageThresholder* thresholder_; ///< Image thresholding module.
|
ImageThresholder* thresholder_; ///< Image thresholding module.
|
||||||
std::list<ParagraphModel*>* paragraph_models_;
|
std::vector<ParagraphModel*>* paragraph_models_;
|
||||||
BLOCK_LIST* block_list_; ///< The page layout.
|
BLOCK_LIST* block_list_; ///< The page layout.
|
||||||
PAGE_RES* page_res_; ///< The page-level data.
|
PAGE_RES* page_res_; ///< The page-level data.
|
||||||
std::string input_file_; ///< Name used by training code.
|
std::string input_file_; ///< Name used by training code.
|
||||||
|
@ -937,7 +937,7 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) {
|
|||||||
|
|
||||||
while (page_res_it.word() != nullptr) {
|
while (page_res_it.word() != nullptr) {
|
||||||
WERD_RES *word_res = page_res_it.word();
|
WERD_RES *word_res = page_res_it.word();
|
||||||
GenericVector<TBOX> boxes;
|
std::vector<TBOX> boxes;
|
||||||
tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
|
tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
|
||||||
page_res_it.row()->row, word_res);
|
page_res_it.row()->row, word_res);
|
||||||
page_res_it.forward();
|
page_res_it.forward();
|
||||||
@ -1844,7 +1844,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
|
|||||||
if (text[t] != '\0' || wordstr[w] != '\0') {
|
if (text[t] != '\0' || wordstr[w] != '\0') {
|
||||||
// No match.
|
// No match.
|
||||||
delete page_res_;
|
delete page_res_;
|
||||||
GenericVector<TBOX> boxes;
|
std::vector<TBOX> boxes;
|
||||||
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
|
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
|
||||||
tesseract_->ReSegmentByClassification(page_res_);
|
tesseract_->ReSegmentByClassification(page_res_);
|
||||||
tesseract_->TidyUp(page_res_);
|
tesseract_->TidyUp(page_res_);
|
||||||
@ -2291,10 +2291,10 @@ void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
|
|||||||
int debug_level = 0;
|
int debug_level = 0;
|
||||||
GetIntVariable("paragraph_debug_level", &debug_level);
|
GetIntVariable("paragraph_debug_level", &debug_level);
|
||||||
if (paragraph_models_ == nullptr)
|
if (paragraph_models_ == nullptr)
|
||||||
paragraph_models_ = new std::list<ParagraphModel*>;
|
paragraph_models_ = new std::vector<ParagraphModel*>;
|
||||||
MutableIterator *result_it = GetMutableIterator();
|
MutableIterator *result_it = GetMutableIterator();
|
||||||
do { // Detect paragraphs for this block
|
do { // Detect paragraphs for this block
|
||||||
std::list<ParagraphModel *> models;
|
std::vector<ParagraphModel *> models;
|
||||||
::tesseract::DetectParagraphs(debug_level, after_text_recognition,
|
::tesseract::DetectParagraphs(debug_level, after_text_recognition,
|
||||||
result_it, &models);
|
result_it, &models);
|
||||||
paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end());
|
paragraph_models_->insert(paragraph_models_->end(), models.begin(), models.end());
|
||||||
|
@ -113,8 +113,8 @@ static void clear_any_old_text(BLOCK_LIST *block_list) {
|
|||||||
PAGE_RES* Tesseract::ApplyBoxes(const char* filename,
|
PAGE_RES* Tesseract::ApplyBoxes(const char* filename,
|
||||||
bool find_segmentation,
|
bool find_segmentation,
|
||||||
BLOCK_LIST *block_list) {
|
BLOCK_LIST *block_list) {
|
||||||
GenericVector<TBOX> boxes;
|
std::vector<TBOX> boxes;
|
||||||
GenericVector<STRING> texts, full_texts;
|
std::vector<STRING> texts, full_texts;
|
||||||
if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts,
|
if (!ReadAllBoxes(applybox_page, true, filename, &boxes, &texts, &full_texts,
|
||||||
nullptr)) {
|
nullptr)) {
|
||||||
return nullptr; // Can't do it.
|
return nullptr; // Can't do it.
|
||||||
@ -205,7 +205,7 @@ void Tesseract::PreenXHeights(BLOCK_LIST *block_list) {
|
|||||||
|
|
||||||
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
/// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
||||||
/// All fuzzy spaces are removed, and all the words are maximally chopped.
|
/// All fuzzy spaces are removed, and all the words are maximally chopped.
|
||||||
PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
|
PAGE_RES* Tesseract::SetupApplyBoxes(const std::vector<TBOX>& boxes,
|
||||||
BLOCK_LIST *block_list) {
|
BLOCK_LIST *block_list) {
|
||||||
PreenXHeights(block_list);
|
PreenXHeights(block_list);
|
||||||
// Strip all fuzzy space markers to simplify the PAGE_RES.
|
// Strip all fuzzy space markers to simplify the PAGE_RES.
|
||||||
@ -241,7 +241,7 @@ PAGE_RES* Tesseract::SetupApplyBoxes(const GenericVector<TBOX>& boxes,
|
|||||||
/// Tests the chopper by exhaustively running chop_one_blob.
|
/// Tests the chopper by exhaustively running chop_one_blob.
|
||||||
/// The word_res will contain filled chopped_word, seam_array, denorm,
|
/// The word_res will contain filled chopped_word, seam_array, denorm,
|
||||||
/// box_word and best_state for the maximally chopped word.
|
/// box_word and best_state for the maximally chopped word.
|
||||||
void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
|
void Tesseract::MaximallyChopWord(const std::vector<TBOX>& boxes,
|
||||||
BLOCK* block, ROW* row,
|
BLOCK* block, ROW* row,
|
||||||
WERD_RES* word_res) {
|
WERD_RES* word_res) {
|
||||||
if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
|
if (!word_res->SetupForRecognition(unicharset, this, BestPix(),
|
||||||
|
@ -52,8 +52,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GenericVector<TBOX> boxes;
|
std::vector<TBOX> boxes;
|
||||||
GenericVector<STRING> texts;
|
std::vector<STRING> texts;
|
||||||
// Get the boxes for this page, if there are any.
|
// Get the boxes for this page, if there are any.
|
||||||
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
|
if (!ReadAllBoxes(applybox_page, false, input_imagename, &boxes, &texts, nullptr,
|
||||||
nullptr) ||
|
nullptr) ||
|
||||||
@ -77,8 +77,8 @@ bool Tesseract::TrainLineRecognizer(const char* input_imagename,
|
|||||||
// Generates training data for training a line recognizer, eg LSTM.
|
// Generates training data for training a line recognizer, eg LSTM.
|
||||||
// Breaks the boxes into lines, normalizes them, converts to ImageData and
|
// Breaks the boxes into lines, normalizes them, converts to ImageData and
|
||||||
// appends them to the given training_data.
|
// appends them to the given training_data.
|
||||||
void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
|
void Tesseract::TrainFromBoxes(const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<STRING>& texts,
|
const std::vector<STRING>& texts,
|
||||||
BLOCK_LIST *block_list,
|
BLOCK_LIST *block_list,
|
||||||
DocumentData* training_data) {
|
DocumentData* training_data) {
|
||||||
int box_count = boxes.size();
|
int box_count = boxes.size();
|
||||||
@ -133,8 +133,8 @@ void Tesseract::TrainFromBoxes(const GenericVector<TBOX>& boxes,
|
|||||||
// and ground truth boxes/truth text if available in the input.
|
// and ground truth boxes/truth text if available in the input.
|
||||||
// The image is not normalized in any way.
|
// The image is not normalized in any way.
|
||||||
ImageData* Tesseract::GetLineData(const TBOX& line_box,
|
ImageData* Tesseract::GetLineData(const TBOX& line_box,
|
||||||
const GenericVector<TBOX>& boxes,
|
const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<STRING>& texts,
|
const std::vector<STRING>& texts,
|
||||||
int start_box, int end_box,
|
int start_box, int end_box,
|
||||||
const BLOCK& block) {
|
const BLOCK& block) {
|
||||||
TBOX revised_box;
|
TBOX revised_box;
|
||||||
@ -145,8 +145,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
|
|||||||
// Copy the boxes and shift them so they are relative to the image.
|
// Copy the boxes and shift them so they are relative to the image.
|
||||||
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
|
FCOORD block_rotation(block.re_rotation().x(), -block.re_rotation().y());
|
||||||
ICOORD shift = -revised_box.botleft();
|
ICOORD shift = -revised_box.botleft();
|
||||||
GenericVector<TBOX> line_boxes;
|
std::vector<TBOX> line_boxes;
|
||||||
GenericVector<STRING> line_texts;
|
std::vector<STRING> line_texts;
|
||||||
for (int b = start_box; b < end_box; ++b) {
|
for (int b = start_box; b < end_box; ++b) {
|
||||||
TBOX box = boxes[b];
|
TBOX box = boxes[b];
|
||||||
box.rotate(block_rotation);
|
box.rotate(block_rotation);
|
||||||
@ -154,8 +154,8 @@ ImageData* Tesseract::GetLineData(const TBOX& line_box,
|
|||||||
line_boxes.push_back(box);
|
line_boxes.push_back(box);
|
||||||
line_texts.push_back(texts[b]);
|
line_texts.push_back(texts[b]);
|
||||||
}
|
}
|
||||||
GenericVector<int> page_numbers;
|
std::vector<int> page_numbers;
|
||||||
page_numbers.init_to_size(line_boxes.size(), applybox_page);
|
page_numbers.resize(line_boxes.size(), applybox_page);
|
||||||
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
|
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
|
||||||
return image_data;
|
return image_data;
|
||||||
}
|
}
|
||||||
|
@ -249,7 +249,7 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
|
|||||||
// allowed_ids.
|
// allowed_ids.
|
||||||
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
|
static void AddAllScriptsConverted(const UNICHARSET& sid_set,
|
||||||
const UNICHARSET& osd_set,
|
const UNICHARSET& osd_set,
|
||||||
GenericVector<int>* allowed_ids) {
|
std::vector<int>* allowed_ids) {
|
||||||
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
|
for (int i = 0; i < sid_set.get_script_table_size(); ++i) {
|
||||||
if (i != sid_set.null_sid()) {
|
if (i != sid_set.null_sid()) {
|
||||||
const char* script = sid_set.get_script_from_script_id(i);
|
const char* script = sid_set.get_script_from_script_id(i);
|
||||||
@ -357,7 +357,7 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
|
|||||||
to_block, &osd_blobs);
|
to_block, &osd_blobs);
|
||||||
}
|
}
|
||||||
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
|
if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != nullptr && osr != nullptr) {
|
||||||
GenericVector<int> osd_scripts;
|
std::vector<int> osd_scripts;
|
||||||
if (osd_tess != this) {
|
if (osd_tess != this) {
|
||||||
// We are running osd as part of layout analysis, so constrain the
|
// We are running osd as part of layout analysis, so constrain the
|
||||||
// scripts to those allowed by *this.
|
// scripts to those allowed by *this.
|
||||||
|
@ -1236,14 +1236,19 @@ const ParagraphModel* ParagraphTheory::AddModel(const ParagraphModel &model) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
|
void ParagraphTheory::DiscardUnusedModels(const SetOfModels &used_models) {
|
||||||
models_->remove_if([this, used_models](ParagraphModel* m) {
|
size_t w = 0;
|
||||||
bool remove = !used_models.contains(m) && models_we_added_.contains(m);
|
for (size_t r = 0; r < models_->size(); r++) {
|
||||||
if (remove) {
|
ParagraphModel* m = (*models_)[r];
|
||||||
models_we_added_.remove(models_we_added_.get_index(m));
|
if (!used_models.contains(m) && models_we_added_.contains(m)) {
|
||||||
delete m;
|
delete m;
|
||||||
|
} else {
|
||||||
|
if (r > w) {
|
||||||
|
(*models_)[w] = m;
|
||||||
|
}
|
||||||
|
w++;
|
||||||
}
|
}
|
||||||
return remove;
|
}
|
||||||
});
|
models_->resize(w);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Examine rows[start, end) and try to determine if an existing non-centered
|
// Examine rows[start, end) and try to determine if an existing non-centered
|
||||||
@ -2272,7 +2277,7 @@ void DetectParagraphs(int debug_level,
|
|||||||
GenericVector<RowInfo> *row_infos,
|
GenericVector<RowInfo> *row_infos,
|
||||||
GenericVector<PARA *> *row_owners,
|
GenericVector<PARA *> *row_owners,
|
||||||
PARA_LIST *paragraphs,
|
PARA_LIST *paragraphs,
|
||||||
std::list<ParagraphModel *> *models) {
|
std::vector<ParagraphModel *> *models) {
|
||||||
GenericVector<RowScratchRegisters> rows;
|
GenericVector<RowScratchRegisters> rows;
|
||||||
ParagraphTheory theory(models);
|
ParagraphTheory theory(models);
|
||||||
|
|
||||||
@ -2514,7 +2519,7 @@ static void InitializeRowInfo(bool after_recognition,
|
|||||||
void DetectParagraphs(int debug_level,
|
void DetectParagraphs(int debug_level,
|
||||||
bool after_text_recognition,
|
bool after_text_recognition,
|
||||||
const MutableIterator *block_start,
|
const MutableIterator *block_start,
|
||||||
std::list<ParagraphModel *> *models) {
|
std::vector<ParagraphModel *> *models) {
|
||||||
// Clear out any preconceived notions.
|
// Clear out any preconceived notions.
|
||||||
if (block_start->Empty(RIL_TEXTLINE)) {
|
if (block_start->Empty(RIL_TEXTLINE)) {
|
||||||
return;
|
return;
|
||||||
|
@ -91,7 +91,7 @@ void DetectParagraphs(int debug_level,
|
|||||||
GenericVector<RowInfo> *row_infos,
|
GenericVector<RowInfo> *row_infos,
|
||||||
GenericVector<PARA *> *row_owners,
|
GenericVector<PARA *> *row_owners,
|
||||||
PARA_LIST *paragraphs,
|
PARA_LIST *paragraphs,
|
||||||
std::list<ParagraphModel *> *models);
|
std::vector<ParagraphModel *> *models);
|
||||||
|
|
||||||
// Given a MutableIterator to the start of a block, run DetectParagraphs on
|
// Given a MutableIterator to the start of a block, run DetectParagraphs on
|
||||||
// that block and commit the results to the underlying ROW and BLOCK structs,
|
// that block and commit the results to the underlying ROW and BLOCK structs,
|
||||||
@ -101,7 +101,7 @@ void DetectParagraphs(int debug_level,
|
|||||||
void DetectParagraphs(int debug_level,
|
void DetectParagraphs(int debug_level,
|
||||||
bool after_text_recognition,
|
bool after_text_recognition,
|
||||||
const MutableIterator *block_start,
|
const MutableIterator *block_start,
|
||||||
std::list<ParagraphModel *> *models);
|
std::vector<ParagraphModel *> *models);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
@ -193,10 +193,10 @@ class ParagraphTheory {
|
|||||||
public:
|
public:
|
||||||
// We presume models will outlive us, and that models will take ownership
|
// We presume models will outlive us, and that models will take ownership
|
||||||
// of any ParagraphModel *'s we add.
|
// of any ParagraphModel *'s we add.
|
||||||
explicit ParagraphTheory(std::list<ParagraphModel *> *models)
|
explicit ParagraphTheory(std::vector<ParagraphModel *> *models)
|
||||||
: models_(models) {}
|
: models_(models) {}
|
||||||
std::list<ParagraphModel *> &models() { return *models_; }
|
std::vector<ParagraphModel *> &models() { return *models_; }
|
||||||
const std::list<ParagraphModel *> &models() const { return *models_; }
|
const std::vector<ParagraphModel *> &models() const { return *models_; }
|
||||||
|
|
||||||
// Return an existing model if one that is Comparable() can be found.
|
// Return an existing model if one that is Comparable() can be found.
|
||||||
// Else, allocate a new copy of model to save and return a pointer to it.
|
// Else, allocate a new copy of model to save and return a pointer to it.
|
||||||
@ -216,7 +216,7 @@ class ParagraphTheory {
|
|||||||
int IndexOf(const ParagraphModel *model) const;
|
int IndexOf(const ParagraphModel *model) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::list<ParagraphModel *> *models_;
|
std::vector<ParagraphModel *> *models_;
|
||||||
GenericVectorEqEq<ParagraphModel *> models_we_added_;
|
GenericVectorEqEq<ParagraphModel *> models_we_added_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -360,7 +360,7 @@ void ResultIterator::MoveToLogicalStartOfWord() {
|
|||||||
BeginWord(0);
|
BeginWord(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
GenericVector<int> blob_order;
|
std::vector<int> blob_order;
|
||||||
CalculateBlobOrder(&blob_order);
|
CalculateBlobOrder(&blob_order);
|
||||||
if (blob_order.size() == 0 || blob_order[0] == 0)
|
if (blob_order.size() == 0 || blob_order[0] == 0)
|
||||||
return;
|
return;
|
||||||
@ -370,7 +370,7 @@ void ResultIterator::MoveToLogicalStartOfWord() {
|
|||||||
bool ResultIterator::IsAtFinalSymbolOfWord() const {
|
bool ResultIterator::IsAtFinalSymbolOfWord() const {
|
||||||
if (!it_->word())
|
if (!it_->word())
|
||||||
return true;
|
return true;
|
||||||
GenericVector<int> blob_order;
|
std::vector<int> blob_order;
|
||||||
CalculateBlobOrder(&blob_order);
|
CalculateBlobOrder(&blob_order);
|
||||||
return blob_order.size() == 0 || blob_order.back() == blob_index_;
|
return blob_order.size() == 0 || blob_order.back() == blob_index_;
|
||||||
}
|
}
|
||||||
@ -378,7 +378,7 @@ bool ResultIterator::IsAtFinalSymbolOfWord() const {
|
|||||||
bool ResultIterator::IsAtFirstSymbolOfWord() const {
|
bool ResultIterator::IsAtFirstSymbolOfWord() const {
|
||||||
if (!it_->word())
|
if (!it_->word())
|
||||||
return true;
|
return true;
|
||||||
GenericVector<int> blob_order;
|
std::vector<int> blob_order;
|
||||||
CalculateBlobOrder(&blob_order);
|
CalculateBlobOrder(&blob_order);
|
||||||
return blob_order.size() == 0 || blob_order[0] == blob_index_;
|
return blob_order.size() == 0 || blob_order[0] == blob_index_;
|
||||||
}
|
}
|
||||||
@ -472,7 +472,7 @@ bool ResultIterator::Next(PageIteratorLevel level) {
|
|||||||
MoveToLogicalStartOfTextline();
|
MoveToLogicalStartOfTextline();
|
||||||
return it_->block() != nullptr;
|
return it_->block() != nullptr;
|
||||||
case RIL_SYMBOL: {
|
case RIL_SYMBOL: {
|
||||||
GenericVector<int> blob_order;
|
std::vector<int> blob_order;
|
||||||
CalculateBlobOrder(&blob_order);
|
CalculateBlobOrder(&blob_order);
|
||||||
int next_blob = 0;
|
int next_blob = 0;
|
||||||
while (next_blob < blob_order.size() &&
|
while (next_blob < blob_order.size() &&
|
||||||
@ -674,7 +674,7 @@ void ResultIterator::AppendUTF8WordText(std::string* text) const {
|
|||||||
*text += reading_direction_is_ltr ? kLRM : kRLM;
|
*text += reading_direction_is_ltr ? kLRM : kRLM;
|
||||||
}
|
}
|
||||||
|
|
||||||
GenericVector<int> blob_order;
|
std::vector<int> blob_order;
|
||||||
CalculateBlobOrder(&blob_order);
|
CalculateBlobOrder(&blob_order);
|
||||||
for (int i = 0; i < blob_order.size(); i++) {
|
for (int i = 0; i < blob_order.size(); i++) {
|
||||||
*text += it_->word()->BestUTF8(blob_order[i], false);
|
*text += it_->word()->BestUTF8(blob_order[i], false);
|
||||||
|
@ -347,15 +347,15 @@ class Tesseract : public Wordrec {
|
|||||||
// Generates training data for training a line recognizer, eg LSTM.
|
// Generates training data for training a line recognizer, eg LSTM.
|
||||||
// Breaks the boxes into lines, normalizes them, converts to ImageData and
|
// Breaks the boxes into lines, normalizes them, converts to ImageData and
|
||||||
// appends them to the given training_data.
|
// appends them to the given training_data.
|
||||||
void TrainFromBoxes(const GenericVector<TBOX>& boxes,
|
void TrainFromBoxes(const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<STRING>& texts,
|
const std::vector<STRING>& texts,
|
||||||
BLOCK_LIST* block_list, DocumentData* training_data);
|
BLOCK_LIST* block_list, DocumentData* training_data);
|
||||||
|
|
||||||
// Returns an Imagedata containing the image of the given textline,
|
// Returns an Imagedata containing the image of the given textline,
|
||||||
// and ground truth boxes/truth text if available in the input.
|
// and ground truth boxes/truth text if available in the input.
|
||||||
// The image is not normalized in any way.
|
// The image is not normalized in any way.
|
||||||
ImageData* GetLineData(const TBOX& line_box, const GenericVector<TBOX>& boxes,
|
ImageData* GetLineData(const TBOX& line_box, const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<STRING>& texts, int start_box,
|
const std::vector<STRING>& texts, int start_box,
|
||||||
int end_box, const BLOCK& block);
|
int end_box, const BLOCK& block);
|
||||||
// Helper gets the image of a rectangle, using the block.re_rotation() if
|
// Helper gets the image of a rectangle, using the block.re_rotation() if
|
||||||
// needed to get to the image, and rotating the result back to horizontal
|
// needed to get to the image, and rotating the result back to horizontal
|
||||||
@ -708,12 +708,12 @@ class Tesseract : public Wordrec {
|
|||||||
|
|
||||||
// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
// Builds a PAGE_RES from the block_list in the way required for ApplyBoxes:
|
||||||
// All fuzzy spaces are removed, and all the words are maximally chopped.
|
// All fuzzy spaces are removed, and all the words are maximally chopped.
|
||||||
PAGE_RES* SetupApplyBoxes(const GenericVector<TBOX>& boxes,
|
PAGE_RES* SetupApplyBoxes(const std::vector<TBOX>& boxes,
|
||||||
BLOCK_LIST* block_list);
|
BLOCK_LIST* block_list);
|
||||||
// Tests the chopper by exhaustively running chop_one_blob.
|
// Tests the chopper by exhaustively running chop_one_blob.
|
||||||
// The word_res will contain filled chopped_word, seam_array, denorm,
|
// The word_res will contain filled chopped_word, seam_array, denorm,
|
||||||
// box_word and best_state for the maximally chopped word.
|
// box_word and best_state for the maximally chopped word.
|
||||||
void MaximallyChopWord(const GenericVector<TBOX>& boxes, BLOCK* block,
|
void MaximallyChopWord(const std::vector<TBOX>& boxes, BLOCK* block,
|
||||||
ROW* row, WERD_RES* word_res);
|
ROW* row, WERD_RES* word_res);
|
||||||
// Gather consecutive blobs that match the given box into the best_state
|
// Gather consecutive blobs that match the given box into the best_state
|
||||||
// and corresponding correct_text.
|
// and corresponding correct_text.
|
||||||
|
@ -331,9 +331,9 @@ void ImageData::Display() const {
|
|||||||
|
|
||||||
// Adds the supplied boxes and transcriptions that correspond to the correct
|
// Adds the supplied boxes and transcriptions that correspond to the correct
|
||||||
// page number.
|
// page number.
|
||||||
void ImageData::AddBoxes(const GenericVector<TBOX>& boxes,
|
void ImageData::AddBoxes(const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<STRING>& texts,
|
const std::vector<STRING>& texts,
|
||||||
const GenericVector<int>& box_pages) {
|
const std::vector<int>& box_pages) {
|
||||||
// Copy the boxes and make the transcription.
|
// Copy the boxes and make the transcription.
|
||||||
for (int i = 0; i < box_pages.size(); ++i) {
|
for (int i = 0; i < box_pages.size(); ++i) {
|
||||||
if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
|
if (page_number_ >= 0 && box_pages[i] != page_number_) continue;
|
||||||
@ -378,9 +378,9 @@ Pix* ImageData::GetPixInternal(const GenericVector<char>& image_data) {
|
|||||||
// match the page number. Returns false on error.
|
// match the page number. Returns false on error.
|
||||||
bool ImageData::AddBoxes(const char* box_text) {
|
bool ImageData::AddBoxes(const char* box_text) {
|
||||||
if (box_text != nullptr && box_text[0] != '\0') {
|
if (box_text != nullptr && box_text[0] != '\0') {
|
||||||
GenericVector<TBOX> boxes;
|
std::vector<TBOX> boxes;
|
||||||
GenericVector<STRING> texts;
|
std::vector<STRING> texts;
|
||||||
GenericVector<int> box_pages;
|
std::vector<int> box_pages;
|
||||||
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
|
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
|
||||||
/*continue_on_failure*/ true, &boxes, &texts, nullptr,
|
/*continue_on_failure*/ true, &boxes, &texts, nullptr,
|
||||||
&box_pages)) {
|
&box_pages)) {
|
||||||
@ -587,7 +587,7 @@ DocumentCache::~DocumentCache() {}
|
|||||||
|
|
||||||
// Adds all the documents in the list of filenames, counting memory.
|
// Adds all the documents in the list of filenames, counting memory.
|
||||||
// The reader is used to read the files.
|
// The reader is used to read the files.
|
||||||
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
|
bool DocumentCache::LoadDocuments(const std::vector<STRING>& filenames,
|
||||||
CachingStrategy cache_strategy,
|
CachingStrategy cache_strategy,
|
||||||
FileReader reader) {
|
FileReader reader) {
|
||||||
cache_strategy_ = cache_strategy;
|
cache_strategy_ = cache_strategy;
|
||||||
|
@ -181,9 +181,9 @@ class ImageData {
|
|||||||
|
|
||||||
// Adds the supplied boxes and transcriptions that correspond to the correct
|
// Adds the supplied boxes and transcriptions that correspond to the correct
|
||||||
// page number.
|
// page number.
|
||||||
void AddBoxes(const GenericVector<TBOX>& boxes,
|
void AddBoxes(const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<STRING>& texts,
|
const std::vector<STRING>& texts,
|
||||||
const GenericVector<int>& box_pages);
|
const std::vector<int>& box_pages);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Saves the given Pix as a PNG-encoded string and destroys it.
|
// Saves the given Pix as a PNG-encoded string and destroys it.
|
||||||
@ -335,7 +335,7 @@ class DocumentCache {
|
|||||||
}
|
}
|
||||||
// Adds all the documents in the list of filenames, counting memory.
|
// Adds all the documents in the list of filenames, counting memory.
|
||||||
// The reader is used to read the files.
|
// The reader is used to read the files.
|
||||||
bool LoadDocuments(const GenericVector<STRING>& filenames,
|
bool LoadDocuments(const std::vector<STRING>& filenames,
|
||||||
CachingStrategy cache_strategy, FileReader reader);
|
CachingStrategy cache_strategy, FileReader reader);
|
||||||
|
|
||||||
// Adds document to the cache.
|
// Adds document to the cache.
|
||||||
|
@ -152,7 +152,7 @@ class GENERIC_2D_ARRAY {
|
|||||||
return tesseract::Serialize(fp, &array_[0], size);
|
return tesseract::Serialize(fp, &array_[0], size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Serialize(tesseract::TFile* fp) const {
|
bool Serialize(TFile* fp) const {
|
||||||
if (!SerializeSize(fp)) return false;
|
if (!SerializeSize(fp)) return false;
|
||||||
if (!fp->Serialize(&empty_)) return false;
|
if (!fp->Serialize(&empty_)) return false;
|
||||||
int size = num_elements();
|
int size = num_elements();
|
||||||
@ -175,7 +175,7 @@ class GENERIC_2D_ARRAY {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DeSerialize(tesseract::TFile* fp) {
|
bool DeSerialize(TFile* fp) {
|
||||||
return DeSerializeSize(fp) &&
|
return DeSerializeSize(fp) &&
|
||||||
fp->DeSerialize(&empty_) &&
|
fp->DeSerialize(&empty_) &&
|
||||||
fp->DeSerialize(&array_[0], num_elements());
|
fp->DeSerialize(&array_[0], num_elements());
|
||||||
@ -473,7 +473,7 @@ class GENERIC_2D_ARRAY {
|
|||||||
size = dim2_;
|
size = dim2_;
|
||||||
return tesseract::Serialize(fp, &size);
|
return tesseract::Serialize(fp, &size);
|
||||||
}
|
}
|
||||||
bool SerializeSize(tesseract::TFile* fp) const {
|
bool SerializeSize(TFile* fp) const {
|
||||||
uint32_t size = dim1_;
|
uint32_t size = dim1_;
|
||||||
if (!fp->Serialize(&size)) return false;
|
if (!fp->Serialize(&size)) return false;
|
||||||
size = dim2_;
|
size = dim2_;
|
||||||
@ -495,7 +495,7 @@ class GENERIC_2D_ARRAY {
|
|||||||
Resize(size1, size2, empty_);
|
Resize(size1, size2, empty_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool DeSerializeSize(tesseract::TFile* fp) {
|
bool DeSerializeSize(TFile* fp) {
|
||||||
int32_t size1, size2;
|
int32_t size1, size2;
|
||||||
if (!fp->DeSerialize(&size1)) return false;
|
if (!fp->DeSerialize(&size1)) return false;
|
||||||
if (!fp->DeSerialize(&size2)) return false;
|
if (!fp->DeSerialize(&size2)) return false;
|
||||||
@ -639,7 +639,7 @@ struct MATRIX_COORD {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// The MatrixCoordPair contains a MATRIX_COORD and its priority.
|
// The MatrixCoordPair contains a MATRIX_COORD and its priority.
|
||||||
using MatrixCoordPair = tesseract::KDPairInc<float, MATRIX_COORD>;
|
using MatrixCoordPair = KDPairInc<float, MATRIX_COORD>;
|
||||||
|
|
||||||
} // namespace tesseract
|
} // namespace tesseract
|
||||||
|
|
||||||
|
@ -29,7 +29,6 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <functional> // for std::function
|
#include <functional> // for std::function
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
@ -37,48 +36,64 @@ namespace tesseract {
|
|||||||
// provides automatic deletion of pointers, [De]Serialize that works, and
|
// provides automatic deletion of pointers, [De]Serialize that works, and
|
||||||
// sort that works.
|
// sort that works.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class GenericVector : public std::vector<T> {
|
class GenericVector {
|
||||||
using base = std::vector<T>;
|
|
||||||
public:
|
public:
|
||||||
using std::vector<T>::vector;
|
GenericVector() {
|
||||||
|
init(kDefaultVectorSize);
|
||||||
using base::begin;
|
}
|
||||||
using base::end;
|
GenericVector(int size, const T& init_val) {
|
||||||
using base::data;
|
init(size);
|
||||||
using base::capacity;
|
init_to_size(size, init_val);
|
||||||
using base::reserve;
|
}
|
||||||
using base::resize;
|
|
||||||
using base::back;
|
|
||||||
using base::clear;
|
|
||||||
using base::push_back;
|
|
||||||
|
|
||||||
|
// Copy
|
||||||
|
GenericVector(const GenericVector& other) {
|
||||||
|
this->init(other.size());
|
||||||
|
this->operator+=(other);
|
||||||
|
}
|
||||||
GenericVector<T>& operator+=(const GenericVector& other);
|
GenericVector<T>& operator+=(const GenericVector& other);
|
||||||
|
GenericVector<T>& operator=(const GenericVector& other);
|
||||||
|
|
||||||
|
~GenericVector();
|
||||||
|
|
||||||
|
// Reserve some memory.
|
||||||
|
void reserve(int size);
|
||||||
// Double the size of the internal array.
|
// Double the size of the internal array.
|
||||||
void double_the_size();
|
void double_the_size();
|
||||||
|
|
||||||
// Resizes to size and sets all values to t.
|
// Resizes to size and sets all values to t.
|
||||||
void init_to_size(int size, const T& t);
|
void init_to_size(int size, const T& t);
|
||||||
|
void resize(int size, const T& t);
|
||||||
// Resizes to size without any initialization.
|
// Resizes to size without any initialization.
|
||||||
void resize_no_init(int size) {
|
void resize_no_init(int size) {
|
||||||
resize(size);
|
reserve(size);
|
||||||
|
size_used_ = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return the size used.
|
||||||
|
int size() const {
|
||||||
|
return size_used_;
|
||||||
|
}
|
||||||
// Workaround to avoid g++ -Wsign-compare warnings.
|
// Workaround to avoid g++ -Wsign-compare warnings.
|
||||||
size_t unsigned_size() const {
|
size_t unsigned_size() const {
|
||||||
return size();
|
static_assert(sizeof(size_used_) <= sizeof(size_t),
|
||||||
|
"Wow! sizeof(size_t) < sizeof(int32_t)!!");
|
||||||
|
assert(0 <= size_used_);
|
||||||
|
return static_cast<size_t>(size_used_);
|
||||||
}
|
}
|
||||||
int size_reserved() const {
|
int size_reserved() const {
|
||||||
return capacity();
|
return size_reserved_;
|
||||||
}
|
}
|
||||||
|
|
||||||
int size() const {
|
// Return true if empty.
|
||||||
return base::size();
|
bool empty() const {
|
||||||
|
return size_used_ == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the object from an index.
|
// Return the object from an index.
|
||||||
T& get(int index);
|
T& get(int index) const;
|
||||||
const T& get(int index) const;
|
T& back() const;
|
||||||
|
T& operator[](int index) const;
|
||||||
// Returns the last object and removes it.
|
// Returns the last object and removes it.
|
||||||
T pop_back();
|
T pop_back();
|
||||||
|
|
||||||
@ -94,6 +109,7 @@ class GenericVector : public std::vector<T> {
|
|||||||
T contains_index(int index) const;
|
T contains_index(int index) const;
|
||||||
|
|
||||||
// Push an element in the end of the array
|
// Push an element in the end of the array
|
||||||
|
int push_back(T object);
|
||||||
void operator+=(const T& t);
|
void operator+=(const T& t);
|
||||||
|
|
||||||
// Push an element in the end of the array if the same
|
// Push an element in the end of the array if the same
|
||||||
@ -117,7 +133,9 @@ class GenericVector : public std::vector<T> {
|
|||||||
// Truncates the array to the given size by removing the end.
|
// Truncates the array to the given size by removing the end.
|
||||||
// If the current size is less, the array is not expanded.
|
// If the current size is less, the array is not expanded.
|
||||||
void truncate(int size) {
|
void truncate(int size) {
|
||||||
resize(size);
|
if (size < size_used_) {
|
||||||
|
size_used_ = size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a callback to be called to delete the elements when the array took
|
// Add a callback to be called to delete the elements when the array took
|
||||||
@ -132,7 +150,13 @@ class GenericVector : public std::vector<T> {
|
|||||||
compare_cb_ = cb;
|
compare_cb_ = cb;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete objects pointed to by data()[i]
|
// Clear the array, calling the clear callback function if any.
|
||||||
|
// All the owned callbacks are also deleted.
|
||||||
|
// If you don't want the callbacks to be deleted, before calling clear, set
|
||||||
|
// the callback to nullptr.
|
||||||
|
void clear();
|
||||||
|
|
||||||
|
// Delete objects pointed to by data_[i]
|
||||||
void delete_data_pointers();
|
void delete_data_pointers();
|
||||||
|
|
||||||
// This method clears the current object, then, does a shallow copy of
|
// This method clears the current object, then, does a shallow copy of
|
||||||
@ -192,8 +216,8 @@ class GenericVector : public std::vector<T> {
|
|||||||
|
|
||||||
// Reverses the elements of the vector.
|
// Reverses the elements of the vector.
|
||||||
void reverse() {
|
void reverse() {
|
||||||
for (int i = 0; i < size() / 2; ++i) {
|
for (int i = 0; i < size_used_ / 2; ++i) {
|
||||||
Swap(&data()[i], &data()[size() - 1 - i]);
|
Swap(&data_[i], &data_[size_used_ - 1 - i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,7 +233,7 @@ class GenericVector : public std::vector<T> {
|
|||||||
// to two Ts and returns negative if the first element is to appear earlier
|
// to two Ts and returns negative if the first element is to appear earlier
|
||||||
// in the result and positive if it is to appear later, with 0 for equal.
|
// in the result and positive if it is to appear later, with 0 for equal.
|
||||||
void sort(int (*comparator)(const void*, const void*)) {
|
void sort(int (*comparator)(const void*, const void*)) {
|
||||||
qsort(data(), size(), sizeof(*data()), comparator);
|
qsort(data_, size_used_, sizeof(*data_), comparator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Searches the array (assuming sorted in ascending order, using sort()) for
|
// Searches the array (assuming sorted in ascending order, using sort()) for
|
||||||
@ -217,23 +241,23 @@ class GenericVector : public std::vector<T> {
|
|||||||
// Use binary_search to get the index of target, or its nearest candidate.
|
// Use binary_search to get the index of target, or its nearest candidate.
|
||||||
bool bool_binary_search(const T& target) const {
|
bool bool_binary_search(const T& target) const {
|
||||||
int index = binary_search(target);
|
int index = binary_search(target);
|
||||||
if (index >= size()) {
|
if (index >= size_used_) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return data()[index] == target;
|
return data_[index] == target;
|
||||||
}
|
}
|
||||||
// Searches the array (assuming sorted in ascending order, using sort()) for
|
// Searches the array (assuming sorted in ascending order, using sort()) for
|
||||||
// an element equal to target and returns the index of the best candidate.
|
// an element equal to target and returns the index of the best candidate.
|
||||||
// The return value is conceptually the largest index i such that
|
// The return value is conceptually the largest index i such that
|
||||||
// data()[i] <= target or 0 if target < the whole vector.
|
// data_[i] <= target or 0 if target < the whole vector.
|
||||||
// NOTE that this function uses operator> so really the return value is
|
// NOTE that this function uses operator> so really the return value is
|
||||||
// the largest index i such that data()[i] > target is false.
|
// the largest index i such that data_[i] > target is false.
|
||||||
int binary_search(const T& target) const {
|
int binary_search(const T& target) const {
|
||||||
int bottom = 0;
|
int bottom = 0;
|
||||||
int top = size();
|
int top = size_used_;
|
||||||
while (top - bottom > 1) {
|
while (top - bottom > 1) {
|
||||||
int middle = (bottom + top) / 2;
|
int middle = (bottom + top) / 2;
|
||||||
if (data()[middle] > target) {
|
if (data_[middle] > target) {
|
||||||
top = middle;
|
top = middle;
|
||||||
} else {
|
} else {
|
||||||
bottom = middle;
|
bottom = middle;
|
||||||
@ -245,20 +269,20 @@ class GenericVector : public std::vector<T> {
|
|||||||
// Compact the vector by deleting elements using operator!= on basic types.
|
// Compact the vector by deleting elements using operator!= on basic types.
|
||||||
// The vector must be sorted.
|
// The vector must be sorted.
|
||||||
void compact_sorted() {
|
void compact_sorted() {
|
||||||
if (size() == 0) {
|
if (size_used_ == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// First element is in no matter what, hence the i = 1.
|
// First element is in no matter what, hence the i = 1.
|
||||||
int last_write = 0;
|
int last_write = 0;
|
||||||
for (int i = 1; i < size(); ++i) {
|
for (int i = 1; i < size_used_; ++i) {
|
||||||
// Finds next unique item and writes it.
|
// Finds next unique item and writes it.
|
||||||
if (data()[last_write] != data()[i]) {
|
if (data_[last_write] != data_[i]) {
|
||||||
data()[++last_write] = data()[i];
|
data_[++last_write] = data_[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// last_write is the index of a valid data cell, so add 1.
|
// last_write is the index of a valid data cell, so add 1.
|
||||||
resize(last_write + 1);
|
size_used_ = last_write + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the index of what would be the target_index_th item in the array
|
// Returns the index of what would be the target_index_th item in the array
|
||||||
@ -269,26 +293,26 @@ class GenericVector : public std::vector<T> {
|
|||||||
// Make sure target_index is legal.
|
// Make sure target_index is legal.
|
||||||
if (target_index < 0) {
|
if (target_index < 0) {
|
||||||
target_index = 0; // ensure legal
|
target_index = 0; // ensure legal
|
||||||
} else if (target_index >= size()) {
|
} else if (target_index >= size_used_) {
|
||||||
target_index = size() - 1;
|
target_index = size_used_ - 1;
|
||||||
}
|
}
|
||||||
unsigned int seed = 1;
|
unsigned int seed = 1;
|
||||||
return choose_nth_item(target_index, 0, size(), &seed);
|
return choose_nth_item(target_index, 0, size_used_, &seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Swaps the elements with the given indices.
|
// Swaps the elements with the given indices.
|
||||||
void swap(int index1, int index2) {
|
void swap(int index1, int index2) {
|
||||||
if (index1 != index2) {
|
if (index1 != index2) {
|
||||||
T tmp = data()[index1];
|
T tmp = data_[index1];
|
||||||
data()[index1] = data()[index2];
|
data_[index1] = data_[index2];
|
||||||
data()[index2] = tmp;
|
data_[index2] = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Returns true if all elements of *this are within the given range.
|
// Returns true if all elements of *this are within the given range.
|
||||||
// Only uses operator<
|
// Only uses operator<
|
||||||
bool WithinBounds(const T& rangemin, const T& rangemax) const {
|
bool WithinBounds(const T& rangemin, const T& rangemax) const {
|
||||||
for (int i = 0; i < size(); ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
if (data()[i] < rangemin || rangemax < data()[i]) {
|
if (data_[i] < rangemin || rangemax < data_[i]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -306,18 +330,47 @@ class GenericVector : public std::vector<T> {
|
|||||||
// vector are small enough that for efficiency it makes sense
|
// vector are small enough that for efficiency it makes sense
|
||||||
// to start with a larger initial size.
|
// to start with a larger initial size.
|
||||||
static const int kDefaultVectorSize = 4;
|
static const int kDefaultVectorSize = 4;
|
||||||
|
int32_t size_used_{};
|
||||||
|
int32_t size_reserved_{};
|
||||||
|
T* data_;
|
||||||
std::function<void(T)> clear_cb_;
|
std::function<void(T)> clear_cb_;
|
||||||
std::function<bool(const T&, const T&)> compare_cb_;
|
std::function<bool(const T&, const T&)> compare_cb_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__APPLE__)
|
// The default FileReader loads the whole file into the vector of char,
|
||||||
// MSVC stl does not have ::data() in vector<bool>,
|
// returning false on error.
|
||||||
// so we add custom specialization.
|
inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
|
||||||
// On Apple there are also errors when using std::vector<bool>,
|
bool result = false;
|
||||||
// so we replace it with vector<int> as a workaround.
|
FILE* fp = fopen(filename, "rb");
|
||||||
template <>
|
if (fp != nullptr) {
|
||||||
class GenericVector<bool> : public std::vector<int> {};
|
fseek(fp, 0, SEEK_END);
|
||||||
#endif
|
auto size = std::ftell(fp);
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
// Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
|
||||||
|
if (size > 0 && size < LONG_MAX) {
|
||||||
|
// reserve an extra byte in case caller wants to append a '\0' character
|
||||||
|
data->reserve(size + 1);
|
||||||
|
data->resize_no_init(size);
|
||||||
|
result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The default FileWriter writes the vector of char to the filename file,
|
||||||
|
// returning false on error.
|
||||||
|
inline bool SaveDataToFile(const GenericVector<char>& data,
|
||||||
|
const char* filename) {
|
||||||
|
FILE* fp = fopen(filename, "wb");
|
||||||
|
if (fp == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool result =
|
||||||
|
static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
|
||||||
|
fclose(fp);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool cmp_eq(T const& t1, T const& t2) {
|
bool cmp_eq(T const& t1, T const& t2) {
|
||||||
@ -378,9 +431,9 @@ class PointerVector : public GenericVector<T*> {
|
|||||||
this->operator+=(other);
|
this->operator+=(other);
|
||||||
}
|
}
|
||||||
PointerVector<T>& operator+=(const PointerVector& other) {
|
PointerVector<T>& operator+=(const PointerVector& other) {
|
||||||
this->reserve(this->size() + other.size());
|
this->reserve(this->size_used_ + other.size_used_);
|
||||||
for (int i = 0; i < other.size(); ++i) {
|
for (int i = 0; i < other.size(); ++i) {
|
||||||
this->push_back(new T(*other.data()[i]));
|
this->push_back(new T(*other.data_[i]));
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -396,15 +449,15 @@ class PointerVector : public GenericVector<T*> {
|
|||||||
// Removes an element at the given index and
|
// Removes an element at the given index and
|
||||||
// shifts the remaining elements to the left.
|
// shifts the remaining elements to the left.
|
||||||
void remove(int index) {
|
void remove(int index) {
|
||||||
delete GenericVector<T*>::data()[index];
|
delete GenericVector<T*>::data_[index];
|
||||||
GenericVector<T*>::remove(index);
|
GenericVector<T*>::remove(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Truncates the array to the given size by removing the end.
|
// Truncates the array to the given size by removing the end.
|
||||||
// If the current size is less, the array is not expanded.
|
// If the current size is less, the array is not expanded.
|
||||||
void truncate(int size) {
|
void truncate(int size) {
|
||||||
for (int i = size; i < GenericVector<T*>::size(); ++i) {
|
for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
|
||||||
delete GenericVector<T*>::data()[i];
|
delete GenericVector<T*>::data_[i];
|
||||||
}
|
}
|
||||||
GenericVector<T*>::truncate(size);
|
GenericVector<T*>::truncate(size);
|
||||||
}
|
}
|
||||||
@ -415,20 +468,20 @@ class PointerVector : public GenericVector<T*> {
|
|||||||
int new_size = 0;
|
int new_size = 0;
|
||||||
int old_index = 0;
|
int old_index = 0;
|
||||||
// Until the callback returns true, the elements stay the same.
|
// Until the callback returns true, the elements stay the same.
|
||||||
while (old_index < GenericVector<T*>::size() &&
|
while (old_index < GenericVector<T*>::size_used_ &&
|
||||||
!delete_cb(GenericVector<T*>::data()[old_index++])) {
|
!delete_cb(GenericVector<T*>::data_[old_index++])) {
|
||||||
++new_size;
|
++new_size;
|
||||||
}
|
}
|
||||||
// Now just copy anything else that gets false from delete_cb.
|
// Now just copy anything else that gets false from delete_cb.
|
||||||
for (; old_index < GenericVector<T*>::size(); ++old_index) {
|
for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
|
||||||
if (!delete_cb(GenericVector<T*>::data()[old_index])) {
|
if (!delete_cb(GenericVector<T*>::data_[old_index])) {
|
||||||
GenericVector<T*>::data()[new_size++] =
|
GenericVector<T*>::data_[new_size++] =
|
||||||
GenericVector<T*>::data()[old_index];
|
GenericVector<T*>::data_[old_index];
|
||||||
} else {
|
} else {
|
||||||
delete GenericVector<T*>::data()[old_index];
|
delete GenericVector<T*>::data_[old_index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GenericVector<T*>::resize(new_size);
|
GenericVector<T*>::size_used_ = new_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear the array, calling the clear callback function if any.
|
// Clear the array, calling the clear callback function if any.
|
||||||
@ -446,32 +499,32 @@ class PointerVector : public GenericVector<T*> {
|
|||||||
// normal GenericVector of those.
|
// normal GenericVector of those.
|
||||||
// Returns false in case of error.
|
// Returns false in case of error.
|
||||||
bool Serialize(FILE* fp) const {
|
bool Serialize(FILE* fp) const {
|
||||||
int32_t used = GenericVector<T*>::size();
|
int32_t used = GenericVector<T*>::size_used_;
|
||||||
if (fwrite(&used, sizeof(used), 1, fp) != 1) {
|
if (fwrite(&used, sizeof(used), 1, fp) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < used; ++i) {
|
for (int i = 0; i < used; ++i) {
|
||||||
int8_t non_null = GenericVector<T*>::data()[i] != nullptr;
|
int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
|
||||||
if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
|
if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (non_null && !GenericVector<T*>::data()[i]->Serialize(fp)) {
|
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool Serialize(TFile* fp) const {
|
bool Serialize(TFile* fp) const {
|
||||||
int32_t used = GenericVector<T*>::size();
|
int32_t used = GenericVector<T*>::size_used_;
|
||||||
if (fp->FWrite(&used, sizeof(used), 1) != 1) {
|
if (fp->FWrite(&used, sizeof(used), 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < used; ++i) {
|
for (int i = 0; i < used; ++i) {
|
||||||
int8_t non_null = GenericVector<T*>::data()[i] != nullptr;
|
int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
|
||||||
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
|
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (non_null && !GenericVector<T*>::data()[i]->Serialize(fp)) {
|
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -599,52 +652,99 @@ class GenericVectorEqEq : public GenericVector<T> {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::init(int size) {
|
void GenericVector<T>::init(int size) {
|
||||||
clear();
|
size_used_ = 0;
|
||||||
resize(size);
|
if (size <= 0) {
|
||||||
|
data_ = nullptr;
|
||||||
|
size_reserved_ = 0;
|
||||||
|
} else {
|
||||||
|
if (size < kDefaultVectorSize) {
|
||||||
|
size = kDefaultVectorSize;
|
||||||
|
}
|
||||||
|
data_ = new T[size];
|
||||||
|
size_reserved_ = size;
|
||||||
|
}
|
||||||
|
clear_cb_ = nullptr;
|
||||||
|
compare_cb_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
GenericVector<T>::~GenericVector() {
|
||||||
|
clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reserve some memory. If the internal array contains elements, they are
|
||||||
|
// copied.
|
||||||
|
template <typename T>
|
||||||
|
void GenericVector<T>::reserve(int size) {
|
||||||
|
if (size_reserved_ >= size || size <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (size < kDefaultVectorSize) {
|
||||||
|
size = kDefaultVectorSize;
|
||||||
|
}
|
||||||
|
T* new_array = new T[size];
|
||||||
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
|
new_array[i] = data_[i];
|
||||||
|
}
|
||||||
|
delete[] data_;
|
||||||
|
data_ = new_array;
|
||||||
|
size_reserved_ = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::double_the_size() {
|
void GenericVector<T>::double_the_size() {
|
||||||
if (capacity() == 0) {
|
if (size_reserved_ == 0) {
|
||||||
reserve(kDefaultVectorSize);
|
reserve(kDefaultVectorSize);
|
||||||
} else {
|
} else {
|
||||||
reserve(2 * capacity());
|
reserve(2 * size_reserved_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resizes to size and sets all values to t.
|
// Resizes to size and sets all values to t.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::init_to_size(int size, const T& t) {
|
void GenericVector<T>::init_to_size(int size, const T& t) {
|
||||||
resize(size, t);
|
reserve(size);
|
||||||
|
size_used_ = size;
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
data_[i] = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void GenericVector<T>::resize(int size, const T& t) {
|
||||||
|
init_to_size(size, t);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the object from an index.
|
// Return the object from an index.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T& GenericVector<T>::get(int index) {
|
T& GenericVector<T>::get(int index) const {
|
||||||
assert(index >= 0 && index < size());
|
assert(index >= 0 && index < size_used_);
|
||||||
return data()[index];
|
return data_[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the object from an index.
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const T& GenericVector<T>::get(int index) const {
|
T& GenericVector<T>::operator[](int index) const {
|
||||||
assert(index >= 0 && index < size());
|
assert(index >= 0 && index < size_used_);
|
||||||
return data()[index];
|
return data_[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T& GenericVector<T>::back() const {
|
||||||
|
assert(size_used_ > 0);
|
||||||
|
return data_[size_used_ - 1];
|
||||||
|
}
|
||||||
// Returns the last object and removes it.
|
// Returns the last object and removes it.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T GenericVector<T>::pop_back() {
|
T GenericVector<T>::pop_back() {
|
||||||
auto b = back();
|
assert(size_used_ > 0);
|
||||||
base::pop_back();
|
return data_[--size_used_];
|
||||||
return b;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the object from an index.
|
// Return the object from an index.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::set(const T& t, int index) {
|
void GenericVector<T>::set(const T& t, int index) {
|
||||||
assert(index >= 0 && index < size());
|
assert(index >= 0 && index < size_used_);
|
||||||
data()[index] = t;
|
data_[index] = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shifts the rest of the elements to the right to make
|
// Shifts the rest of the elements to the right to make
|
||||||
@ -652,32 +752,40 @@ void GenericVector<T>::set(const T& t, int index) {
|
|||||||
// at the specified index.
|
// at the specified index.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::insert(const T& t, int index) {
|
void GenericVector<T>::insert(const T& t, int index) {
|
||||||
base::insert(begin() + index, t);
|
assert(index >= 0 && index <= size_used_);
|
||||||
|
if (size_reserved_ == size_used_) {
|
||||||
|
double_the_size();
|
||||||
|
}
|
||||||
|
for (int i = size_used_; i > index; --i) {
|
||||||
|
data_[i] = data_[i - 1];
|
||||||
|
}
|
||||||
|
data_[index] = t;
|
||||||
|
size_used_++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Removes an element at the given index and
|
// Removes an element at the given index and
|
||||||
// shifts the remaining elements to the left.
|
// shifts the remaining elements to the left.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::remove(int index) {
|
void GenericVector<T>::remove(int index) {
|
||||||
assert(index >= 0 && index < size());
|
assert(index >= 0 && index < size_used_);
|
||||||
for (int i = index; i < size() - 1; ++i) {
|
for (int i = index; i < size_used_ - 1; ++i) {
|
||||||
data()[i] = data()[i + 1];
|
data_[i] = data_[i + 1];
|
||||||
}
|
}
|
||||||
resize(size() - 1);
|
size_used_--;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return true if the index is valindex
|
// Return true if the index is valindex
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T GenericVector<T>::contains_index(int index) const {
|
T GenericVector<T>::contains_index(int index) const {
|
||||||
return index >= 0 && index < size();
|
return index >= 0 && index < size_used_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the index of the T object.
|
// Return the index of the T object.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int GenericVector<T>::get_index(const T& object) const {
|
int GenericVector<T>::get_index(const T& object) const {
|
||||||
for (int i = 0; i < size(); ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
assert(compare_cb_ != nullptr);
|
assert(compare_cb_ != nullptr);
|
||||||
if (compare_cb_(object, data()[i])) {
|
if (compare_cb_(object, data_[i])) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -690,20 +798,38 @@ bool GenericVector<T>::contains(const T& object) const {
|
|||||||
return get_index(object) != -1;
|
return get_index(object) != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add an element in the array
|
||||||
|
template <typename T>
|
||||||
|
int GenericVector<T>::push_back(T object) {
|
||||||
|
int index = 0;
|
||||||
|
if (size_used_ == size_reserved_) {
|
||||||
|
double_the_size();
|
||||||
|
}
|
||||||
|
index = size_used_++;
|
||||||
|
data_[index] = object;
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int GenericVector<T>::push_back_new(const T& object) {
|
int GenericVector<T>::push_back_new(const T& object) {
|
||||||
int index = get_index(object);
|
int index = get_index(object);
|
||||||
if (index >= 0) {
|
if (index >= 0) {
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
push_back(object);
|
return push_back(object);
|
||||||
return size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add an element in the array (front)
|
// Add an element in the array (front)
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int GenericVector<T>::push_front(const T& object) {
|
int GenericVector<T>::push_front(const T& object) {
|
||||||
insert(begin(), object);
|
if (size_used_ == size_reserved_) {
|
||||||
|
double_the_size();
|
||||||
|
}
|
||||||
|
for (int i = size_used_; i > 0; --i) {
|
||||||
|
data_[i] = data_[i - 1];
|
||||||
|
}
|
||||||
|
data_[0] = object;
|
||||||
|
++size_used_;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -714,39 +840,62 @@ void GenericVector<T>::operator+=(const T& t) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
GenericVector<T>& GenericVector<T>::operator+=(const GenericVector& other) {
|
GenericVector<T>& GenericVector<T>::operator+=(const GenericVector& other) {
|
||||||
this->reserve(size() + other.size());
|
this->reserve(size_used_ + other.size_used_);
|
||||||
for (int i = 0; i < other.size(); ++i) {
|
for (int i = 0; i < other.size(); ++i) {
|
||||||
this->operator+=(other.data()[i]);
|
this->operator+=(other.data_[i]);
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
GenericVector<T>& GenericVector<T>::operator=(const GenericVector& other) {
|
||||||
|
if (&other != this) {
|
||||||
|
this->truncate(0);
|
||||||
|
this->operator+=(other);
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear the array, calling the callback function if any.
|
||||||
|
template <typename T>
|
||||||
|
void GenericVector<T>::clear() {
|
||||||
|
if (size_reserved_ > 0 && clear_cb_ != nullptr) {
|
||||||
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
|
clear_cb_(data_[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[] data_;
|
||||||
|
data_ = nullptr;
|
||||||
|
size_used_ = 0;
|
||||||
|
size_reserved_ = 0;
|
||||||
|
clear_cb_ = nullptr;
|
||||||
|
compare_cb_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::delete_data_pointers() {
|
void GenericVector<T>::delete_data_pointers() {
|
||||||
for (int i = 0; i < size(); ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
delete data()[i];
|
delete data_[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::write(FILE* f,
|
bool GenericVector<T>::write(FILE* f,
|
||||||
std::function<bool(FILE*, const T&)> cb) const {
|
std::function<bool(FILE*, const T&)> cb) const {
|
||||||
int32_t cp = capacity();
|
if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
|
||||||
if (fwrite(&cp, sizeof(cp), 1, f) != 1) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int32_t sz = size();
|
if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
|
||||||
if (fwrite(&sz, sizeof(sz), 1, f) != 1) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (cb != nullptr) {
|
if (cb != nullptr) {
|
||||||
for (int i = 0; i < size(); ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
if (!cb(f, data()[i])) {
|
if (!cb(f, data_[i])) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (fwrite(data(), sizeof(T), size(), f) != unsigned_size()) {
|
if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -756,23 +905,22 @@ bool GenericVector<T>::write(FILE* f,
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::read(TFile* f,
|
bool GenericVector<T>::read(TFile* f,
|
||||||
std::function<bool(TFile*, T*)> cb) {
|
std::function<bool(TFile*, T*)> cb) {
|
||||||
int32_t reserved, size;
|
int32_t reserved;
|
||||||
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
|
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
reserve(reserved);
|
reserve(reserved);
|
||||||
if (f->FReadEndian(&size, sizeof(size), 1) != 1) {
|
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
resize(size);
|
|
||||||
if (cb != nullptr) {
|
if (cb != nullptr) {
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
if (!cb(f, data() + i)) {
|
if (!cb(f, data_ + i)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (f->FReadEndian(data(), sizeof(T), size) != size) {
|
if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -783,22 +931,20 @@ bool GenericVector<T>::read(TFile* f,
|
|||||||
// read/write of T will work. Returns false in case of error.
|
// read/write of T will work. Returns false in case of error.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::Serialize(FILE* fp) const {
|
bool GenericVector<T>::Serialize(FILE* fp) const {
|
||||||
int32_t sz = size();
|
if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
|
||||||
if (fwrite(&sz, sizeof(sz), 1, fp) != 1) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (fwrite(data(), sizeof(T), sz, fp) != unsigned_size()) {
|
if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::Serialize(TFile* fp) const {
|
bool GenericVector<T>::Serialize(TFile* fp) const {
|
||||||
int32_t sz = size();
|
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
|
||||||
if (fp->FWrite(&sz, sizeof(sz), 1) != 1) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (fp->FWrite(data(), sizeof(T), sz) != sz) {
|
if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -822,13 +968,14 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
|||||||
if (reserved > UINT16_MAX) {
|
if (reserved > UINT16_MAX) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
resize(reserved);
|
reserve(reserved);
|
||||||
if (fread(data(), sizeof(T), size(), fp) != unsigned_size()) {
|
size_used_ = reserved;
|
||||||
|
if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (swap) {
|
if (swap) {
|
||||||
for (int i = 0; i < size(); ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
ReverseN(&data()[i], sizeof(data()[i]));
|
ReverseN(&data_[i], sizeof(data_[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -845,8 +992,9 @@ bool GenericVector<T>::DeSerialize(TFile* fp) {
|
|||||||
if (reserved > limit) {
|
if (reserved > limit) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
resize(reserved);
|
reserve(reserved);
|
||||||
return fp->FReadEndian(data(), sizeof(T), size()) == size();
|
size_used_ = reserved;
|
||||||
|
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
|
bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
|
||||||
@ -862,12 +1010,11 @@ bool GenericVector<T>::SkipDeSerialize(TFile* fp) {
|
|||||||
// Returns false in case of error.
|
// Returns false in case of error.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::SerializeClasses(FILE* fp) const {
|
bool GenericVector<T>::SerializeClasses(FILE* fp) const {
|
||||||
int32_t sz = size();
|
if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
|
||||||
if (fwrite(&sz, sizeof(sz), 1, fp) != 1) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < sz; ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
if (!data()[i].Serialize(fp)) {
|
if (!data_[i].Serialize(fp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -875,12 +1022,11 @@ bool GenericVector<T>::SerializeClasses(FILE* fp) const {
|
|||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool GenericVector<T>::SerializeClasses(TFile* fp) const {
|
bool GenericVector<T>::SerializeClasses(TFile* fp) const {
|
||||||
int32_t sz = size();
|
if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
|
||||||
if (fp->FWrite(&sz, sizeof(sz), 1) != 1) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < sz; ++i) {
|
for (int i = 0; i < size_used_; ++i) {
|
||||||
if (!data()[i].Serialize(fp)) {
|
if (!data_[i].Serialize(fp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -904,7 +1050,7 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
|
|||||||
T empty;
|
T empty;
|
||||||
init_to_size(reserved, empty);
|
init_to_size(reserved, empty);
|
||||||
for (int i = 0; i < reserved; ++i) {
|
for (int i = 0; i < reserved; ++i) {
|
||||||
if (!data()[i].DeSerialize(swap, fp)) {
|
if (!data_[i].DeSerialize(swap, fp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -919,7 +1065,7 @@ bool GenericVector<T>::DeSerializeClasses(TFile* fp) {
|
|||||||
T empty;
|
T empty;
|
||||||
init_to_size(reserved, empty);
|
init_to_size(reserved, empty);
|
||||||
for (int i = 0; i < reserved; ++i) {
|
for (int i = 0; i < reserved; ++i) {
|
||||||
if (!data()[i].DeSerialize(fp)) {
|
if (!data_[i].DeSerialize(fp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -943,7 +1089,17 @@ bool GenericVector<T>::SkipDeSerializeClasses(TFile* fp) {
|
|||||||
// its argument, and finally invalidates its argument.
|
// its argument, and finally invalidates its argument.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void GenericVector<T>::move(GenericVector<T>* from) {
|
void GenericVector<T>::move(GenericVector<T>* from) {
|
||||||
*this = std::move(*from);
|
this->clear();
|
||||||
|
this->data_ = from->data_;
|
||||||
|
this->size_reserved_ = from->size_reserved_;
|
||||||
|
this->size_used_ = from->size_used_;
|
||||||
|
this->compare_cb_ = from->compare_cb_;
|
||||||
|
this->clear_cb_ = from->clear_cb_;
|
||||||
|
from->data_ = nullptr;
|
||||||
|
from->clear_cb_ = nullptr;
|
||||||
|
from->compare_cb_ = nullptr;
|
||||||
|
from->size_used_ = 0;
|
||||||
|
from->size_reserved_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -974,7 +1130,7 @@ int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
if (num_elements == 2) {
|
if (num_elements == 2) {
|
||||||
if (data()[start] < data()[start + 1]) {
|
if (data_[start] < data_[start + 1]) {
|
||||||
return target_index > start ? start + 1 : start;
|
return target_index > start ? start + 1 : start;
|
||||||
}
|
}
|
||||||
return target_index > start ? start : start + 1;
|
return target_index > start ? start : start + 1;
|
||||||
@ -993,9 +1149,9 @@ int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
|
|||||||
int next_lesser = start;
|
int next_lesser = start;
|
||||||
int prev_greater = end;
|
int prev_greater = end;
|
||||||
for (int next_sample = start + 1; next_sample < prev_greater;) {
|
for (int next_sample = start + 1; next_sample < prev_greater;) {
|
||||||
if (data()[next_sample] < data()[next_lesser]) {
|
if (data_[next_sample] < data_[next_lesser]) {
|
||||||
swap(next_lesser++, next_sample++);
|
swap(next_lesser++, next_sample++);
|
||||||
} else if (data()[next_sample] == data()[next_lesser]) {
|
} else if (data_[next_sample] == data_[next_lesser]) {
|
||||||
++next_sample;
|
++next_sample;
|
||||||
} else {
|
} else {
|
||||||
swap(--prev_greater, next_sample);
|
swap(--prev_greater, next_sample);
|
||||||
|
@ -133,6 +133,16 @@ TFile::~TFile() {
|
|||||||
delete data_;
|
delete data_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TFile::DeSerialize(std::vector<char>& data) {
|
||||||
|
uint32_t size;
|
||||||
|
if (!DeSerialize(&size)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// TODO: optimize.
|
||||||
|
data.resize(size);
|
||||||
|
return DeSerialize(&data[0], data.size());
|
||||||
|
}
|
||||||
|
|
||||||
bool TFile::DeSerialize(char* buffer, size_t count) {
|
bool TFile::DeSerialize(char* buffer, size_t count) {
|
||||||
return FRead(buffer, sizeof(*buffer), count) == count;
|
return FRead(buffer, sizeof(*buffer), count) == count;
|
||||||
}
|
}
|
||||||
@ -177,6 +187,14 @@ bool TFile::DeSerialize(uint64_t* buffer, size_t count) {
|
|||||||
return FReadEndian(buffer, sizeof(*buffer), count) == count;
|
return FReadEndian(buffer, sizeof(*buffer), count) == count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TFile::Serialize(const std::vector<char>& data) {
|
||||||
|
uint32_t size = data.size();
|
||||||
|
if (!Serialize(&size)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return Serialize(&data[0], size);
|
||||||
|
}
|
||||||
|
|
||||||
bool TFile::Serialize(const char* buffer, size_t count) {
|
bool TFile::Serialize(const char* buffer, size_t count) {
|
||||||
return FWrite(buffer, sizeof(*buffer), count) == count;
|
return FWrite(buffer, sizeof(*buffer), count) == count;
|
||||||
}
|
}
|
||||||
|
@ -91,6 +91,7 @@ class TFile {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Deserialize data.
|
// Deserialize data.
|
||||||
|
bool DeSerialize(std::vector<char>& data);
|
||||||
bool DeSerialize(char* data, size_t count = 1);
|
bool DeSerialize(char* data, size_t count = 1);
|
||||||
bool DeSerialize(double* data, size_t count = 1);
|
bool DeSerialize(double* data, size_t count = 1);
|
||||||
bool DeSerialize(float* data, size_t count = 1);
|
bool DeSerialize(float* data, size_t count = 1);
|
||||||
@ -104,6 +105,7 @@ class TFile {
|
|||||||
bool DeSerialize(uint64_t* data, size_t count = 1);
|
bool DeSerialize(uint64_t* data, size_t count = 1);
|
||||||
|
|
||||||
// Serialize data.
|
// Serialize data.
|
||||||
|
bool Serialize(const std::vector<char>& data);
|
||||||
bool Serialize(const char* data, size_t count = 1);
|
bool Serialize(const char* data, size_t count = 1);
|
||||||
bool Serialize(const double* data, size_t count = 1);
|
bool Serialize(const double* data, size_t count = 1);
|
||||||
bool Serialize(const float* data, size_t count = 1);
|
bool Serialize(const float* data, size_t count = 1);
|
||||||
|
@ -96,7 +96,7 @@ bool TessdataManager::LoadArchiveFile(const char *filename) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool TessdataManager::Init(const char *data_file_name) {
|
bool TessdataManager::Init(const char *data_file_name) {
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
if (reader_ == nullptr) {
|
if (reader_ == nullptr) {
|
||||||
#if defined(HAVE_LIBARCHIVE)
|
#if defined(HAVE_LIBARCHIVE)
|
||||||
if (LoadArchiveFile(data_file_name)) return true;
|
if (LoadArchiveFile(data_file_name)) return true;
|
||||||
@ -155,7 +155,7 @@ bool TessdataManager::SaveFile(const char* filename,
|
|||||||
FileWriter writer) const {
|
FileWriter writer) const {
|
||||||
// TODO: This method supports only the proprietary file format.
|
// TODO: This method supports only the proprietary file format.
|
||||||
ASSERT_HOST(is_loaded_);
|
ASSERT_HOST(is_loaded_);
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
Serialize(&data);
|
Serialize(&data);
|
||||||
if (writer == nullptr)
|
if (writer == nullptr)
|
||||||
return SaveDataToFile(data, filename);
|
return SaveDataToFile(data, filename);
|
||||||
@ -164,7 +164,7 @@ bool TessdataManager::SaveFile(const char* filename,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Serializes to the given vector.
|
// Serializes to the given vector.
|
||||||
void TessdataManager::Serialize(GenericVector<char> *data) const {
|
void TessdataManager::Serialize(std::vector<char> *data) const {
|
||||||
// TODO: This method supports only the proprietary file format.
|
// TODO: This method supports only the proprietary file format.
|
||||||
ASSERT_HOST(is_loaded_);
|
ASSERT_HOST(is_loaded_);
|
||||||
// Compute the offset_table and total size.
|
// Compute the offset_table and total size.
|
||||||
@ -178,7 +178,7 @@ void TessdataManager::Serialize(GenericVector<char> *data) const {
|
|||||||
offset += entries_[i].size();
|
offset += entries_[i].size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
data->init_to_size(offset, 0);
|
data->resize(offset, 0);
|
||||||
int32_t num_entries = TESSDATA_NUM_ENTRIES;
|
int32_t num_entries = TESSDATA_NUM_ENTRIES;
|
||||||
TFile fp;
|
TFile fp;
|
||||||
fp.OpenWrite(data);
|
fp.OpenWrite(data);
|
||||||
|
@ -151,7 +151,7 @@ class TessdataManager {
|
|||||||
// Saves to the given filename.
|
// Saves to the given filename.
|
||||||
bool SaveFile(const char* filename, FileWriter writer) const;
|
bool SaveFile(const char* filename, FileWriter writer) const;
|
||||||
// Serializes to the given vector.
|
// Serializes to the given vector.
|
||||||
void Serialize(GenericVector<char> *data) const;
|
void Serialize(std::vector<char> *data) const;
|
||||||
// Resets to the initial state, keeping the reader.
|
// Resets to the initial state, keeping the reader.
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
|
@ -716,8 +716,8 @@ int ShapeTable::AddUnicharToResults(
|
|||||||
int result_index = unichar_map->get(unichar_id);
|
int result_index = unichar_map->get(unichar_id);
|
||||||
if (result_index < 0) {
|
if (result_index < 0) {
|
||||||
UnicharRating result(unichar_id, rating);
|
UnicharRating result(unichar_id, rating);
|
||||||
results->push_back(result);
|
|
||||||
result_index = results->size();
|
result_index = results->size();
|
||||||
|
results->push_back(result);
|
||||||
(*unichar_map)[unichar_id] = result_index;
|
(*unichar_map)[unichar_id] = result_index;
|
||||||
}
|
}
|
||||||
return result_index;
|
return result_index;
|
||||||
|
@ -377,7 +377,7 @@ class DawgPositionVector : public GenericVector<DawgPosition> {
|
|||||||
bool debug,
|
bool debug,
|
||||||
const char *debug_msg) {
|
const char *debug_msg) {
|
||||||
for (int i = 0; i < size(); ++i) {
|
for (int i = 0; i < size(); ++i) {
|
||||||
if (data()[i] == new_pos) return false;
|
if (data_[i] == new_pos) return false;
|
||||||
}
|
}
|
||||||
push_back(new_pos);
|
push_back(new_pos);
|
||||||
if (debug) {
|
if (debug) {
|
||||||
|
@ -162,7 +162,7 @@ int main(int argc, char **argv) {
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
recognizer.ConvertToInt();
|
recognizer.ConvertToInt();
|
||||||
GenericVector<char> lstm_data;
|
std::vector<char> lstm_data;
|
||||||
fp.OpenWrite(&lstm_data);
|
fp.OpenWrite(&lstm_data);
|
||||||
ASSERT_HOST(recognizer.Serialize(&tm, &fp));
|
ASSERT_HOST(recognizer.Serialize(&tm, &fp));
|
||||||
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0],
|
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0],
|
||||||
|
@ -34,7 +34,7 @@ namespace tesseract {
|
|||||||
// can do its own thing. If lang is empty, returns true but does nothing.
|
// can do its own thing. If lang is empty, returns true but does nothing.
|
||||||
// NOTE that suffix should contain any required . for the filename.
|
// NOTE that suffix should contain any required . for the filename.
|
||||||
bool WriteFile(const std::string& output_dir, const std::string& lang,
|
bool WriteFile(const std::string& output_dir, const std::string& lang,
|
||||||
const std::string& suffix, const GenericVector<char>& data,
|
const std::string& suffix, const std::vector<char>& data,
|
||||||
FileWriter writer) {
|
FileWriter writer) {
|
||||||
if (lang.empty()) return true;
|
if (lang.empty()) return true;
|
||||||
std::string dirname = output_dir + "/" + lang;
|
std::string dirname = output_dir + "/" + lang;
|
||||||
@ -56,7 +56,7 @@ bool WriteFile(const std::string& output_dir, const std::string& lang,
|
|||||||
// On failure emits a warning message and returns and empty STRING.
|
// On failure emits a warning message and returns and empty STRING.
|
||||||
STRING ReadFile(const std::string& filename, FileReader reader) {
|
STRING ReadFile(const std::string& filename, FileReader reader) {
|
||||||
if (filename.empty()) return STRING();
|
if (filename.empty()) return STRING();
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
bool read_result;
|
bool read_result;
|
||||||
if (reader == nullptr)
|
if (reader == nullptr)
|
||||||
read_result = LoadDataFromFile(filename.c_str(), &data);
|
read_result = LoadDataFromFile(filename.c_str(), &data);
|
||||||
@ -71,7 +71,7 @@ STRING ReadFile(const std::string& filename, FileReader reader) {
|
|||||||
bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir,
|
bool WriteUnicharset(const UNICHARSET& unicharset, const std::string& output_dir,
|
||||||
const std::string& lang, FileWriter writer,
|
const std::string& lang, FileWriter writer,
|
||||||
TessdataManager* traineddata) {
|
TessdataManager* traineddata) {
|
||||||
GenericVector<char> unicharset_data;
|
std::vector<char> unicharset_data;
|
||||||
TFile fp;
|
TFile fp;
|
||||||
fp.OpenWrite(&unicharset_data);
|
fp.OpenWrite(&unicharset_data);
|
||||||
if (!unicharset.save_to_file(&fp)) return false;
|
if (!unicharset.save_to_file(&fp)) return false;
|
||||||
@ -107,13 +107,13 @@ bool WriteRecoder(const UNICHARSET& unicharset, bool pass_through,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
TFile fp;
|
TFile fp;
|
||||||
GenericVector<char> recoder_data;
|
std::vector<char> recoder_data;
|
||||||
fp.OpenWrite(&recoder_data);
|
fp.OpenWrite(&recoder_data);
|
||||||
if (!recoder.Serialize(&fp)) return false;
|
if (!recoder.Serialize(&fp)) return false;
|
||||||
traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
|
traineddata->OverwriteEntry(TESSDATA_LSTM_RECODER, &recoder_data[0],
|
||||||
recoder_data.size());
|
recoder_data.size());
|
||||||
STRING encoding = recoder.GetEncodingAsString(unicharset);
|
STRING encoding = recoder.GetEncodingAsString(unicharset);
|
||||||
recoder_data.init_to_size(encoding.length(), 0);
|
recoder_data.resize(encoding.length(), 0);
|
||||||
memcpy(&recoder_data[0], &encoding[0], encoding.length());
|
memcpy(&recoder_data[0], &encoding[0], encoding.length());
|
||||||
STRING suffix;
|
STRING suffix;
|
||||||
suffix.add_str_int(".charset_size=", recoder.code_range());
|
suffix.add_str_int(".charset_size=", recoder.code_range());
|
||||||
@ -134,7 +134,7 @@ static bool WriteDawg(const std::vector<STRING>& words,
|
|||||||
std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg());
|
std::unique_ptr<SquishedDawg> dawg(trie.trie_to_dawg());
|
||||||
if (dawg == nullptr || dawg->NumEdges() == 0) return false;
|
if (dawg == nullptr || dawg->NumEdges() == 0) return false;
|
||||||
TFile fp;
|
TFile fp;
|
||||||
GenericVector<char> dawg_data;
|
std::vector<char> dawg_data;
|
||||||
fp.OpenWrite(&dawg_data);
|
fp.OpenWrite(&dawg_data);
|
||||||
if (!dawg->write_squished_dawg(&fp)) return false;
|
if (!dawg->write_squished_dawg(&fp)) return false;
|
||||||
traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size());
|
traineddata->OverwriteEntry(file_type, &dawg_data[0], dawg_data.size());
|
||||||
@ -228,7 +228,7 @@ int CombineLangModel(const UNICHARSET& unicharset, const std::string& script_dir
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Traineddata file.
|
// Traineddata file.
|
||||||
GenericVector<char> traineddata_data;
|
std::vector<char> traineddata_data;
|
||||||
traineddata.Serialize(&traineddata_data);
|
traineddata.Serialize(&traineddata_data);
|
||||||
if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
|
if (!WriteFile(output_dir, lang, ".traineddata", traineddata_data, writer)) {
|
||||||
tprintf("Error writing output traineddata file!!\n");
|
tprintf("Error writing output traineddata file!!\n");
|
||||||
|
@ -30,7 +30,7 @@ namespace tesseract {
|
|||||||
// can do its own thing. If lang is empty, returns true but does nothing.
|
// can do its own thing. If lang is empty, returns true but does nothing.
|
||||||
// NOTE that suffix should contain any required . for the filename.
|
// NOTE that suffix should contain any required . for the filename.
|
||||||
bool WriteFile(const std::string& output_dir, const std::string& lang,
|
bool WriteFile(const std::string& output_dir, const std::string& lang,
|
||||||
const std::string& suffix, const GenericVector<char>& data,
|
const std::string& suffix, const std::vector<char>& data,
|
||||||
FileWriter writer);
|
FileWriter writer);
|
||||||
// Helper reads a file with optional reader and returns a STRING.
|
// Helper reads a file with optional reader and returns a STRING.
|
||||||
// On failure emits a warning message and returns and empty STRING.
|
// On failure emits a warning message and returns and empty STRING.
|
||||||
|
@ -29,7 +29,7 @@ LSTMTester::LSTMTester(int64_t max_memory)
|
|||||||
// tesseract into memory ready for testing. Returns false if nothing was
|
// tesseract into memory ready for testing. Returns false if nothing was
|
||||||
// loaded. The arg is a filename of a file that lists the filenames.
|
// loaded. The arg is a filename of a file that lists the filenames.
|
||||||
bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
|
bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
|
||||||
GenericVector<STRING> filenames;
|
std::vector<STRING> filenames;
|
||||||
if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
|
if (!LoadFileLinesToStrings(filenames_file, &filenames)) {
|
||||||
tprintf("Failed to load list of eval filenames from %s\n",
|
tprintf("Failed to load list of eval filenames from %s\n",
|
||||||
filenames_file);
|
filenames_file);
|
||||||
@ -41,7 +41,7 @@ bool LSTMTester::LoadAllEvalData(const char* filenames_file) {
|
|||||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||||
// tesseract into memory ready for testing. Returns false if nothing was
|
// tesseract into memory ready for testing. Returns false if nothing was
|
||||||
// loaded.
|
// loaded.
|
||||||
bool LSTMTester::LoadAllEvalData(const GenericVector<STRING>& filenames) {
|
bool LSTMTester::LoadAllEvalData(const std::vector<STRING>& filenames) {
|
||||||
test_data_.Clear();
|
test_data_.Clear();
|
||||||
bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
|
bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
|
||||||
total_pages_ = test_data_.TotalPages();
|
total_pages_ = test_data_.TotalPages();
|
||||||
|
@ -38,7 +38,7 @@ class LSTMTester {
|
|||||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||||
// tesseract into memory ready for testing. Returns false if nothing was
|
// tesseract into memory ready for testing. Returns false if nothing was
|
||||||
// loaded.
|
// loaded.
|
||||||
bool LoadAllEvalData(const GenericVector<STRING>& filenames);
|
bool LoadAllEvalData(const std::vector<STRING>& filenames);
|
||||||
|
|
||||||
// Runs an evaluation asynchronously on the stored eval data and returns a
|
// Runs an evaluation asynchronously on the stored eval data and returns a
|
||||||
// string describing the results of the previous test. Args match TestCallback
|
// string describing the results of the previous test. Args match TestCallback
|
||||||
|
@ -267,7 +267,7 @@ void LSTMTrainer::DebugNetwork() {
|
|||||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||||
// tesseract into memory ready for training. Returns false if nothing was
|
// tesseract into memory ready for training. Returns false if nothing was
|
||||||
// loaded.
|
// loaded.
|
||||||
bool LSTMTrainer::LoadAllTrainingData(const GenericVector<STRING>& filenames,
|
bool LSTMTrainer::LoadAllTrainingData(const std::vector<STRING>& filenames,
|
||||||
CachingStrategy cache_strategy,
|
CachingStrategy cache_strategy,
|
||||||
bool randomly_rotate) {
|
bool randomly_rotate) {
|
||||||
randomly_rotate_ = randomly_rotate;
|
randomly_rotate_ = randomly_rotate;
|
||||||
@ -302,7 +302,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool result = true; // Something interesting happened.
|
bool result = true; // Something interesting happened.
|
||||||
GenericVector<char> rec_model_data;
|
std::vector<char> rec_model_data;
|
||||||
if (error_rate < best_error_rate_) {
|
if (error_rate < best_error_rate_) {
|
||||||
SaveRecognitionDump(&rec_model_data);
|
SaveRecognitionDump(&rec_model_data);
|
||||||
log_msg->add_str_double(" New best char error = ", error_rate);
|
log_msg->add_str_double(" New best char error = ", error_rate);
|
||||||
@ -335,7 +335,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
|
|||||||
// Error rate has ballooned. Go back to the best model.
|
// Error rate has ballooned. Go back to the best model.
|
||||||
*log_msg += "\nDivergence! ";
|
*log_msg += "\nDivergence! ";
|
||||||
// Copy best_trainer_ before reading it, as it will get overwritten.
|
// Copy best_trainer_ before reading it, as it will get overwritten.
|
||||||
GenericVector<char> revert_data(best_trainer_);
|
std::vector<char> revert_data(best_trainer_);
|
||||||
if (ReadTrainingDump(revert_data, this)) {
|
if (ReadTrainingDump(revert_data, this)) {
|
||||||
LogIterations("Reverted to", log_msg);
|
LogIterations("Reverted to", log_msg);
|
||||||
ReduceLearningRates(this, log_msg);
|
ReduceLearningRates(this, log_msg);
|
||||||
@ -354,7 +354,7 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, STRING* log_msg) {
|
|||||||
}
|
}
|
||||||
if (checkpoint_name_.length() > 0) {
|
if (checkpoint_name_.length() > 0) {
|
||||||
// Write a current checkpoint.
|
// Write a current checkpoint.
|
||||||
GenericVector<char> checkpoint;
|
std::vector<char> checkpoint;
|
||||||
if (!SaveTrainingDump(FULL, this, &checkpoint) ||
|
if (!SaveTrainingDump(FULL, this, &checkpoint) ||
|
||||||
!SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
|
!SaveDataToFile(checkpoint, checkpoint_name_.c_str())) {
|
||||||
*log_msg += " failed to write checkpoint.";
|
*log_msg += " failed to write checkpoint.";
|
||||||
@ -420,14 +420,14 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,
|
|||||||
if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
|
if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
|
||||||
if (!fp->Serialize(&worst_iteration_)) return false;
|
if (!fp->Serialize(&worst_iteration_)) return false;
|
||||||
if (!fp->Serialize(&stall_iteration_)) return false;
|
if (!fp->Serialize(&stall_iteration_)) return false;
|
||||||
if (!best_model_data_.Serialize(fp)) return false;
|
if (!fp->Serialize(best_model_data_)) return false;
|
||||||
if (!worst_model_data_.Serialize(fp)) return false;
|
if (!fp->Serialize(worst_model_data_)) return false;
|
||||||
if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp))
|
if (serialize_amount != NO_BEST_TRAINER && !fp->Serialize(best_trainer_))
|
||||||
return false;
|
return false;
|
||||||
GenericVector<char> sub_data;
|
std::vector<char> sub_data;
|
||||||
if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data))
|
if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data))
|
||||||
return false;
|
return false;
|
||||||
if (!sub_data.Serialize(fp)) return false;
|
if (!fp->Serialize(sub_data)) return false;
|
||||||
if (!best_error_history_.Serialize(fp)) return false;
|
if (!best_error_history_.Serialize(fp)) return false;
|
||||||
if (!best_error_iterations_.Serialize(fp)) return false;
|
if (!best_error_iterations_.Serialize(fp)) return false;
|
||||||
return fp->Serialize(&improvement_steps_);
|
return fp->Serialize(&improvement_steps_);
|
||||||
@ -464,11 +464,11 @@ bool LSTMTrainer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
|
|||||||
if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
|
if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
|
||||||
if (!fp->DeSerialize(&worst_iteration_)) return false;
|
if (!fp->DeSerialize(&worst_iteration_)) return false;
|
||||||
if (!fp->DeSerialize(&stall_iteration_)) return false;
|
if (!fp->DeSerialize(&stall_iteration_)) return false;
|
||||||
if (!best_model_data_.DeSerialize(fp)) return false;
|
if (!fp->DeSerialize(best_model_data_)) return false;
|
||||||
if (!worst_model_data_.DeSerialize(fp)) return false;
|
if (!fp->DeSerialize(worst_model_data_)) return false;
|
||||||
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false;
|
if (amount != NO_BEST_TRAINER && !fp->DeSerialize(best_trainer_)) return false;
|
||||||
GenericVector<char> sub_data;
|
std::vector<char> sub_data;
|
||||||
if (!sub_data.DeSerialize(fp)) return false;
|
if (!fp->DeSerialize(sub_data)) return false;
|
||||||
delete sub_trainer_;
|
delete sub_trainer_;
|
||||||
if (sub_data.empty()) {
|
if (sub_data.empty()) {
|
||||||
sub_trainer_ = nullptr;
|
sub_trainer_ = nullptr;
|
||||||
@ -542,7 +542,7 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(STRING* log_msg) {
|
|||||||
if (sub_error < best_error_rate_ &&
|
if (sub_error < best_error_rate_ &&
|
||||||
sub_margin >= kSubTrainerMarginFraction) {
|
sub_margin >= kSubTrainerMarginFraction) {
|
||||||
// The sub_trainer_ has won the race to a new best. Switch to it.
|
// The sub_trainer_ has won the race to a new best. Switch to it.
|
||||||
GenericVector<char> updated_trainer;
|
std::vector<char> updated_trainer;
|
||||||
SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
|
SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
|
||||||
ReadTrainingDump(updated_trainer, this);
|
ReadTrainingDump(updated_trainer, this);
|
||||||
log_msg->add_str_int(" Sub trainer wins at iteration ",
|
log_msg->add_str_int(" Sub trainer wins at iteration ",
|
||||||
@ -594,7 +594,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
|
|||||||
ok_sums[i].init_to_size(num_layers, 0.0);
|
ok_sums[i].init_to_size(num_layers, 0.0);
|
||||||
}
|
}
|
||||||
double momentum_factor = 1.0 / (1.0 - momentum_);
|
double momentum_factor = 1.0 / (1.0 - momentum_);
|
||||||
GenericVector<char> orig_trainer;
|
std::vector<char> orig_trainer;
|
||||||
samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer);
|
samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer);
|
||||||
for (int i = 0; i < num_layers; ++i) {
|
for (int i = 0; i < num_layers; ++i) {
|
||||||
Network* layer = GetLayer(layers[i]);
|
Network* layer = GetLayer(layers[i]);
|
||||||
@ -624,7 +624,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
|
|||||||
copy_trainer.TrainOnLine(samples_trainer, true);
|
copy_trainer.TrainOnLine(samples_trainer, true);
|
||||||
if (trainingdata == nullptr) continue;
|
if (trainingdata == nullptr) continue;
|
||||||
// We'll now use this trainer again for each layer.
|
// We'll now use this trainer again for each layer.
|
||||||
GenericVector<char> updated_trainer;
|
std::vector<char> updated_trainer;
|
||||||
samples_trainer->SaveTrainingDump(LIGHT, ©_trainer, &updated_trainer);
|
samples_trainer->SaveTrainingDump(LIGHT, ©_trainer, &updated_trainer);
|
||||||
for (int i = 0; i < num_layers; ++i) {
|
for (int i = 0; i < num_layers; ++i) {
|
||||||
if (num_weights[i] == 0) continue;
|
if (num_weights[i] == 0) continue;
|
||||||
@ -871,7 +871,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData* trainingdata,
|
|||||||
// actually serialized.
|
// actually serialized.
|
||||||
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,
|
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,
|
||||||
const LSTMTrainer* trainer,
|
const LSTMTrainer* trainer,
|
||||||
GenericVector<char>* data) const {
|
std::vector<char>* data) const {
|
||||||
TFile fp;
|
TFile fp;
|
||||||
fp.OpenWrite(data);
|
fp.OpenWrite(data);
|
||||||
return trainer->Serialize(serialize_amount, &mgr_, &fp);
|
return trainer->Serialize(serialize_amount, &mgr_, &fp);
|
||||||
@ -891,7 +891,7 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager* mgr,
|
|||||||
|
|
||||||
// Writes the full recognition traineddata to the given filename.
|
// Writes the full recognition traineddata to the given filename.
|
||||||
bool LSTMTrainer::SaveTraineddata(const char* filename) {
|
bool LSTMTrainer::SaveTraineddata(const char* filename) {
|
||||||
GenericVector<char> recognizer_data;
|
std::vector<char> recognizer_data;
|
||||||
SaveRecognitionDump(&recognizer_data);
|
SaveRecognitionDump(&recognizer_data);
|
||||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
|
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
|
||||||
recognizer_data.size());
|
recognizer_data.size());
|
||||||
@ -899,7 +899,7 @@ bool LSTMTrainer::SaveTraineddata(const char* filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Writes the recognizer to memory, so that it can be used for testing later.
|
// Writes the recognizer to memory, so that it can be used for testing later.
|
||||||
void LSTMTrainer::SaveRecognitionDump(GenericVector<char>* data) const {
|
void LSTMTrainer::SaveRecognitionDump(std::vector<char>* data) const {
|
||||||
TFile fp;
|
TFile fp;
|
||||||
fp.OpenWrite(data);
|
fp.OpenWrite(data);
|
||||||
network_->SetEnableTraining(TS_TEMP_DISABLE);
|
network_->SetEnableTraining(TS_TEMP_DISABLE);
|
||||||
@ -1260,7 +1260,7 @@ void LSTMTrainer::RollErrorBuffers() {
|
|||||||
// Tester is an externally supplied callback function that tests on some
|
// Tester is an externally supplied callback function that tests on some
|
||||||
// data set with a given model and records the error rates in a graph.
|
// data set with a given model and records the error rates in a graph.
|
||||||
STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
||||||
const GenericVector<char>& model_data,
|
const std::vector<char>& model_data,
|
||||||
TestCallback tester) {
|
TestCallback tester) {
|
||||||
if (error_rate > best_error_rate_
|
if (error_rate > best_error_rate_
|
||||||
&& iteration < best_iteration_ + kErrorGraphInterval) {
|
&& iteration < best_iteration_ + kErrorGraphInterval) {
|
||||||
@ -1287,7 +1287,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
|||||||
worst_model_data_.size());
|
worst_model_data_.size());
|
||||||
result = tester(worst_iteration_, worst_error_rates_, mgr_,
|
result = tester(worst_iteration_, worst_error_rates_, mgr_,
|
||||||
CurrentTrainingStage());
|
CurrentTrainingStage());
|
||||||
worst_model_data_.truncate(0);
|
worst_model_data_.clear();
|
||||||
best_model_data_ = model_data;
|
best_model_data_ = model_data;
|
||||||
}
|
}
|
||||||
best_error_rate_ = error_rate;
|
best_error_rate_ = error_rate;
|
||||||
@ -1322,7 +1322,7 @@ STRING LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
|||||||
CurrentTrainingStage());
|
CurrentTrainingStage());
|
||||||
}
|
}
|
||||||
if (result.length() > 0)
|
if (result.length() > 0)
|
||||||
best_model_data_.truncate(0);
|
best_model_data_.clear();
|
||||||
worst_model_data_ = model_data;
|
worst_model_data_ = model_data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -135,7 +135,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
|||||||
int learning_iteration() const { return learning_iteration_; }
|
int learning_iteration() const { return learning_iteration_; }
|
||||||
int32_t improvement_steps() const { return improvement_steps_; }
|
int32_t improvement_steps() const { return improvement_steps_; }
|
||||||
void set_perfect_delay(int delay) { perfect_delay_ = delay; }
|
void set_perfect_delay(int delay) { perfect_delay_ = delay; }
|
||||||
const GenericVector<char>& best_trainer() const { return best_trainer_; }
|
const std::vector<char>& best_trainer() const { return best_trainer_; }
|
||||||
// Returns the error that was just calculated by PrepareForBackward.
|
// Returns the error that was just calculated by PrepareForBackward.
|
||||||
double NewSingleError(ErrorTypes type) const {
|
double NewSingleError(ErrorTypes type) const {
|
||||||
return error_buffers_[type][training_iteration() % kRollingBufferSize_];
|
return error_buffers_[type][training_iteration() % kRollingBufferSize_];
|
||||||
@ -167,7 +167,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
|||||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||||
// tesseract into memory ready for training. Returns false if nothing was
|
// tesseract into memory ready for training. Returns false if nothing was
|
||||||
// loaded.
|
// loaded.
|
||||||
bool LoadAllTrainingData(const GenericVector<STRING>& filenames,
|
bool LoadAllTrainingData(const std::vector<STRING>& filenames,
|
||||||
CachingStrategy cache_strategy,
|
CachingStrategy cache_strategy,
|
||||||
bool randomly_rotate);
|
bool randomly_rotate);
|
||||||
|
|
||||||
@ -269,7 +269,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
|||||||
// actually serialized.
|
// actually serialized.
|
||||||
bool SaveTrainingDump(SerializeAmount serialize_amount,
|
bool SaveTrainingDump(SerializeAmount serialize_amount,
|
||||||
const LSTMTrainer* trainer,
|
const LSTMTrainer* trainer,
|
||||||
GenericVector<char>* data) const;
|
std::vector<char>* data) const;
|
||||||
|
|
||||||
// Reads previously saved trainer from memory. *this must always be the
|
// Reads previously saved trainer from memory. *this must always be the
|
||||||
// master trainer that retains the only copy of the training data and
|
// master trainer that retains the only copy of the training data and
|
||||||
@ -294,7 +294,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
|||||||
bool SaveTraineddata(const char* filename);
|
bool SaveTraineddata(const char* filename);
|
||||||
|
|
||||||
// Writes the recognizer to memory, so that it can be used for testing later.
|
// Writes the recognizer to memory, so that it can be used for testing later.
|
||||||
void SaveRecognitionDump(GenericVector<char>* data) const;
|
void SaveRecognitionDump(std::vector<char>* data) const;
|
||||||
|
|
||||||
// Returns a suitable filename for a training dump, based on the model_base_,
|
// Returns a suitable filename for a training dump, based on the model_base_,
|
||||||
// the iteration and the error rates.
|
// the iteration and the error rates.
|
||||||
@ -375,7 +375,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
|||||||
// Given that error_rate is either a new min or max, updates the best/worst
|
// Given that error_rate is either a new min or max, updates the best/worst
|
||||||
// error rates, and record of progress.
|
// error rates, and record of progress.
|
||||||
STRING UpdateErrorGraph(int iteration, double error_rate,
|
STRING UpdateErrorGraph(int iteration, double error_rate,
|
||||||
const GenericVector<char>& model_data,
|
const std::vector<char>& model_data,
|
||||||
TestCallback tester);
|
TestCallback tester);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -420,10 +420,10 @@ class LSTMTrainer : public LSTMRecognizer {
|
|||||||
// Iteration at which the process will be thought stalled.
|
// Iteration at which the process will be thought stalled.
|
||||||
int stall_iteration_;
|
int stall_iteration_;
|
||||||
// Saved recognition models for computing test error for graph points.
|
// Saved recognition models for computing test error for graph points.
|
||||||
GenericVector<char> best_model_data_;
|
std::vector<char> best_model_data_;
|
||||||
GenericVector<char> worst_model_data_;
|
std::vector<char> worst_model_data_;
|
||||||
// Saved trainer for reverting back to last known best.
|
// Saved trainer for reverting back to last known best.
|
||||||
GenericVector<char> best_trainer_;
|
std::vector<char> best_trainer_;
|
||||||
// A subsidiary trainer running with a different learning rate until either
|
// A subsidiary trainer running with a different learning rate until either
|
||||||
// *this or sub_trainer_ hits a new best.
|
// *this or sub_trainer_ hits a new best.
|
||||||
LSTMTrainer* sub_trainer_;
|
LSTMTrainer* sub_trainer_;
|
||||||
|
@ -136,7 +136,7 @@ int main(int argc, char **argv) {
|
|||||||
tprintf("Must supply a list of training filenames! --train_listfile\n");
|
tprintf("Must supply a list of training filenames! --train_listfile\n");
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
GenericVector<STRING> filenames;
|
std::vector<STRING> filenames;
|
||||||
if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(),
|
if (!tesseract::LoadFileLinesToStrings(FLAGS_train_listfile.c_str(),
|
||||||
&filenames)) {
|
&filenames)) {
|
||||||
tprintf("Failed to load list of training filenames from %s\n",
|
tprintf("Failed to load list of training filenames from %s\n",
|
||||||
|
@ -264,7 +264,7 @@ SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SEAM *Wordrec::chop_overlapping_blob(const GenericVector<TBOX>& boxes,
|
SEAM *Wordrec::chop_overlapping_blob(const std::vector<TBOX>& boxes,
|
||||||
bool italic_blob, WERD_RES *word_res,
|
bool italic_blob, WERD_RES *word_res,
|
||||||
int *blob_number) {
|
int *blob_number) {
|
||||||
TWERD *word = word_res->chopped_word;
|
TWERD *word = word_res->chopped_word;
|
||||||
@ -362,7 +362,7 @@ SEAM* Wordrec::improve_one_blob(const GenericVector<BLOB_CHOICE*>& blob_choices,
|
|||||||
* the worst blobs and try to divide it up to improve the ratings.
|
* the worst blobs and try to divide it up to improve the ratings.
|
||||||
* Used for testing chopper.
|
* Used for testing chopper.
|
||||||
*/
|
*/
|
||||||
SEAM* Wordrec::chop_one_blob(const GenericVector<TBOX>& boxes,
|
SEAM* Wordrec::chop_one_blob(const std::vector<TBOX>& boxes,
|
||||||
const GenericVector<BLOB_CHOICE*>& blob_choices,
|
const GenericVector<BLOB_CHOICE*>& blob_choices,
|
||||||
WERD_RES* word_res,
|
WERD_RES* word_res,
|
||||||
int* blob_number) {
|
int* blob_number) {
|
||||||
|
@ -370,7 +370,7 @@ class Wordrec : public Classify {
|
|||||||
bool italic_blob, const GenericVector<SEAM*>& seams);
|
bool italic_blob, const GenericVector<SEAM*>& seams);
|
||||||
SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number,
|
SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number,
|
||||||
bool italic_blob, const GenericVector<SEAM*>& seams);
|
bool italic_blob, const GenericVector<SEAM*>& seams);
|
||||||
SEAM *chop_overlapping_blob(const GenericVector<TBOX>& boxes,
|
SEAM *chop_overlapping_blob(const std::vector<TBOX>& boxes,
|
||||||
bool italic_blob,
|
bool italic_blob,
|
||||||
WERD_RES *word_res, int *blob_number);
|
WERD_RES *word_res, int *blob_number);
|
||||||
SEAM *improve_one_blob(const GenericVector<BLOB_CHOICE*> &blob_choices,
|
SEAM *improve_one_blob(const GenericVector<BLOB_CHOICE*> &blob_choices,
|
||||||
@ -379,7 +379,7 @@ class Wordrec : public Classify {
|
|||||||
bool italic_blob,
|
bool italic_blob,
|
||||||
WERD_RES *word,
|
WERD_RES *word,
|
||||||
int *blob_number);
|
int *blob_number);
|
||||||
SEAM *chop_one_blob(const GenericVector<TBOX> &boxes,
|
SEAM *chop_one_blob(const std::vector<TBOX> &boxes,
|
||||||
const GenericVector<BLOB_CHOICE*> &blob_choices,
|
const GenericVector<BLOB_CHOICE*> &blob_choices,
|
||||||
WERD_RES *word_res,
|
WERD_RES *word_res,
|
||||||
int *blob_number);
|
int *blob_number);
|
||||||
|
@ -97,7 +97,7 @@ TEST_F(ImagedataTest, CachesMultiDocs) {
|
|||||||
// Number of pages in each document.
|
// Number of pages in each document.
|
||||||
const std::vector<int> kNumPages = {6, 5, 7};
|
const std::vector<int> kNumPages = {6, 5, 7};
|
||||||
std::vector<std::vector<std::string>> page_texts;
|
std::vector<std::vector<std::string>> page_texts;
|
||||||
GenericVector<STRING> filenames;
|
std::vector<STRING> filenames;
|
||||||
for (size_t d = 0; d < kNumPages.size(); ++d) {
|
for (size_t d = 0; d < kNumPages.size(); ++d) {
|
||||||
page_texts.emplace_back(std::vector<std::string>());
|
page_texts.emplace_back(std::vector<std::string>());
|
||||||
std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());
|
std::string filename = MakeFakeDoc(kNumPages[d], d, &page_texts.back());
|
||||||
|
@ -45,7 +45,7 @@ TEST(LangModelTest, AddACharacter) {
|
|||||||
LOG(INFO) << "Output dir=" << output_dir << "\n";
|
LOG(INFO) << "Output dir=" << output_dir << "\n";
|
||||||
std::string lang1 = "eng";
|
std::string lang1 = "eng";
|
||||||
bool pass_through_recoder = false;
|
bool pass_through_recoder = false;
|
||||||
GenericVector<STRING> words, puncs, numbers;
|
std::vector<STRING> words, puncs, numbers;
|
||||||
// If these reads fail, we get a warning message and an empty list of words.
|
// If these reads fail, we get a warning message and an empty list of words.
|
||||||
ReadFile(file::JoinPath(eng_dir, "eng.wordlist"), nullptr)
|
ReadFile(file::JoinPath(eng_dir, "eng.wordlist"), nullptr)
|
||||||
.split('\n', &words);
|
.split('\n', &words);
|
||||||
@ -136,7 +136,7 @@ TEST(LangModelTest, AddACharacterHindi) {
|
|||||||
LOG(INFO) << "Output dir=" << output_dir << "\n";
|
LOG(INFO) << "Output dir=" << output_dir << "\n";
|
||||||
std::string lang1 = "hin";
|
std::string lang1 = "hin";
|
||||||
bool pass_through_recoder = false;
|
bool pass_through_recoder = false;
|
||||||
GenericVector<STRING> words, puncs, numbers;
|
std::vector<STRING> words, puncs, numbers;
|
||||||
// If these reads fail, we get a warning message and an empty list of words.
|
// If these reads fail, we get a warning message and an empty list of words.
|
||||||
ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr)
|
ReadFile(file::JoinPath(hin_dir, "hin.wordlist"), nullptr)
|
||||||
.split('\n', &words);
|
.split('\n', &words);
|
||||||
|
@ -114,7 +114,7 @@ TEST_F(LSTMTrainerTest, DeterminismTest) {
|
|||||||
double lstm_2d_err_a = TrainIterations(kTrainerIterations);
|
double lstm_2d_err_a = TrainIterations(kTrainerIterations);
|
||||||
double act_error_a = trainer_->ActivationError();
|
double act_error_a = trainer_->ActivationError();
|
||||||
double char_error_a = trainer_->CharError();
|
double char_error_a = trainer_->CharError();
|
||||||
GenericVector<char> trainer_a_data;
|
std::vector<char> trainer_a_data;
|
||||||
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
|
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
|
||||||
&trainer_a_data));
|
&trainer_a_data));
|
||||||
SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
|
SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
|
||||||
|
@ -78,7 +78,7 @@ class LSTMTrainerTest : public testing::Test {
|
|||||||
ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false));
|
ASSERT_TRUE(unicharset.load_from_file(unicharset_name.c_str(), false));
|
||||||
std::string script_dir = file::JoinPath(
|
std::string script_dir = file::JoinPath(
|
||||||
LANGDATA_DIR, "");
|
LANGDATA_DIR, "");
|
||||||
GenericVector<STRING> words;
|
std::vector<STRING> words;
|
||||||
EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, "", FLAGS_test_tmpdir,
|
EXPECT_EQ(0, CombineLangModel(unicharset, script_dir, "", FLAGS_test_tmpdir,
|
||||||
kLang, !recode, words, words, words, false,
|
kLang, !recode, words, words, words, false,
|
||||||
nullptr, nullptr));
|
nullptr, nullptr));
|
||||||
@ -95,7 +95,7 @@ class LSTMTrainerTest : public testing::Test {
|
|||||||
if (layer_specific) net_mode |= NF_LAYER_SPECIFIC_LR;
|
if (layer_specific) net_mode |= NF_LAYER_SPECIFIC_LR;
|
||||||
EXPECT_TRUE(trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1,
|
EXPECT_TRUE(trainer_->InitNetwork(network_spec.c_str(), -1, net_mode, 0.1,
|
||||||
learning_rate, 0.9, 0.999));
|
learning_rate, 0.9, 0.999));
|
||||||
GenericVector<STRING> filenames;
|
std::vector<STRING> filenames;
|
||||||
filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str()));
|
filenames.push_back(STRING(TestDataNameToPath(lstmf_file).c_str()));
|
||||||
EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false));
|
EXPECT_TRUE(trainer_->LoadAllTrainingData(filenames, CS_SEQUENTIAL, false));
|
||||||
LOG(INFO) << "Setup network:" << model_name << "\n" ;
|
LOG(INFO) << "Setup network:" << model_name << "\n" ;
|
||||||
@ -151,7 +151,7 @@ class LSTMTrainerTest : public testing::Test {
|
|||||||
// within 1% of the error rate. Returns the increase in error from float to
|
// within 1% of the error rate. Returns the increase in error from float to
|
||||||
// int.
|
// int.
|
||||||
double TestIntMode(int test_iterations) {
|
double TestIntMode(int test_iterations) {
|
||||||
GenericVector<char> trainer_data;
|
std::vector<char> trainer_data;
|
||||||
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
|
EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
|
||||||
&trainer_data));
|
&trainer_data));
|
||||||
// Get the error on the next few iterations in float mode.
|
// Get the error on the next few iterations in float mode.
|
||||||
|
@ -192,7 +192,7 @@ void TestParagraphDetection(const TextAndModel* correct, int num_rows) {
|
|||||||
GenericVector<RowInfo> row_infos;
|
GenericVector<RowInfo> row_infos;
|
||||||
GenericVector<PARA*> row_owners;
|
GenericVector<PARA*> row_owners;
|
||||||
PARA_LIST paragraphs;
|
PARA_LIST paragraphs;
|
||||||
std::list<ParagraphModel*> models;
|
std::vector<ParagraphModel*> models;
|
||||||
|
|
||||||
MakeAsciiRowInfos(correct, num_rows, &row_infos);
|
MakeAsciiRowInfos(correct, num_rows, &row_infos);
|
||||||
int debug_level(3);
|
int debug_level(3);
|
||||||
@ -324,7 +324,7 @@ TEST(ParagraphsTest, TestSingleFullPageContinuation) {
|
|||||||
GenericVector<tesseract::RowInfo> row_infos;
|
GenericVector<tesseract::RowInfo> row_infos;
|
||||||
GenericVector<PARA*> row_owners;
|
GenericVector<PARA*> row_owners;
|
||||||
PARA_LIST paragraphs;
|
PARA_LIST paragraphs;
|
||||||
std::list<ParagraphModel*> models;
|
std::vector<ParagraphModel*> models;
|
||||||
models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
|
models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
|
||||||
MakeAsciiRowInfos(correct, num_rows, &row_infos);
|
MakeAsciiRowInfos(correct, num_rows, &row_infos);
|
||||||
tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs, &models);
|
tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs, &models);
|
||||||
|
@ -28,6 +28,12 @@ void ToVector(const GenericVectorEqEq<T>& from, std::vector<T>* to) {
|
|||||||
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
|
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void ToVector(const std::vector<T>& from, std::vector<T>* to) {
|
||||||
|
to->clear();
|
||||||
|
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
|
||||||
|
}
|
||||||
|
|
||||||
// The fixture for testing Tesseract.
|
// The fixture for testing Tesseract.
|
||||||
class ResultIteratorTest : public testing::Test {
|
class ResultIteratorTest : public testing::Test {
|
||||||
protected:
|
protected:
|
||||||
@ -167,12 +173,12 @@ class ResultIteratorTest : public testing::Test {
|
|||||||
const StrongScriptDirection* word_dirs,
|
const StrongScriptDirection* word_dirs,
|
||||||
int num_words, int* expected_reading_order,
|
int num_words, int* expected_reading_order,
|
||||||
int num_reading_order_entries) const {
|
int num_reading_order_entries) const {
|
||||||
GenericVector<StrongScriptDirection> gv_word_dirs;
|
std::vector<StrongScriptDirection> gv_word_dirs;
|
||||||
for (int i = 0; i < num_words; i++) {
|
for (int i = 0; i < num_words; i++) {
|
||||||
gv_word_dirs.push_back(word_dirs[i]);
|
gv_word_dirs.push_back(word_dirs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
GenericVectorEqEq<int> output;
|
std::vector<int> output;
|
||||||
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
|
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
|
||||||
&output);
|
&output);
|
||||||
// STL vector can be used with EXPECT_EQ, so convert...
|
// STL vector can be used with EXPECT_EQ, so convert...
|
||||||
@ -191,17 +197,17 @@ class ResultIteratorTest : public testing::Test {
|
|||||||
void VerifySaneTextlineOrder(bool in_ltr_context,
|
void VerifySaneTextlineOrder(bool in_ltr_context,
|
||||||
const StrongScriptDirection* word_dirs,
|
const StrongScriptDirection* word_dirs,
|
||||||
int num_words) const {
|
int num_words) const {
|
||||||
GenericVector<StrongScriptDirection> gv_word_dirs;
|
std::vector<StrongScriptDirection> gv_word_dirs;
|
||||||
for (int i = 0; i < num_words; i++) {
|
for (int i = 0; i < num_words; i++) {
|
||||||
gv_word_dirs.push_back(word_dirs[i]);
|
gv_word_dirs.push_back(word_dirs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
GenericVectorEqEq<int> output;
|
std::vector<int> output;
|
||||||
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
|
ResultIterator::CalculateTextlineOrder(in_ltr_context, gv_word_dirs,
|
||||||
&output);
|
&output);
|
||||||
ASSERT_GE(output.size(), num_words);
|
ASSERT_GE(output.size(), num_words);
|
||||||
GenericVector<int> output_copy(output);
|
std::vector<int> output_copy(output);
|
||||||
output_copy.sort();
|
std::sort(output_copy.begin(), output_copy.end());
|
||||||
bool sane = true;
|
bool sane = true;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
while (j < output_copy.size() && output_copy[j] < 0) j++;
|
while (j < output_copy.size() && output_copy[j] < 0) j++;
|
||||||
|
@ -18,7 +18,6 @@
|
|||||||
#include "boxchar.h"
|
#include "boxchar.h"
|
||||||
#include "boxread.h"
|
#include "boxread.h"
|
||||||
#include "commandlineflags.h"
|
#include "commandlineflags.h"
|
||||||
#include "genericvector.h"
|
|
||||||
#include "include_gunit.h"
|
#include "include_gunit.h"
|
||||||
#include "stringrenderer.h"
|
#include "stringrenderer.h"
|
||||||
#include "strngs.h"
|
#include "strngs.h"
|
||||||
@ -227,7 +226,7 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
|
|||||||
std::string boxes_str = renderer_->GetBoxesStr();
|
std::string boxes_str = renderer_->GetBoxesStr();
|
||||||
// Decode to get the box text strings.
|
// Decode to get the box text strings.
|
||||||
EXPECT_FALSE(boxes_str.empty());
|
EXPECT_FALSE(boxes_str.empty());
|
||||||
GenericVector<STRING> texts;
|
std::vector<STRING> texts;
|
||||||
EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts,
|
EXPECT_TRUE(ReadMemBoxes(0, false, boxes_str.c_str(), false, nullptr, &texts,
|
||||||
nullptr, nullptr));
|
nullptr, nullptr));
|
||||||
std::string ltr_str;
|
std::string ltr_str;
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
namespace tesseract {
|
namespace tesseract {
|
||||||
|
|
||||||
// Tests TFile and GenericVector serialization by serializing and
|
// Tests TFile and std::vector serialization by serializing and
|
||||||
// writing/reading.
|
// writing/reading.
|
||||||
|
|
||||||
class TfileTest : public ::testing::Test {
|
class TfileTest : public ::testing::Test {
|
||||||
@ -115,7 +115,7 @@ TEST_F(TfileTest, Serialize) {
|
|||||||
// This test verifies that Tfile can serialize a class.
|
// This test verifies that Tfile can serialize a class.
|
||||||
MathData m1;
|
MathData m1;
|
||||||
m1.Setup();
|
m1.Setup();
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
TFile fpw;
|
TFile fpw;
|
||||||
fpw.OpenWrite(&data);
|
fpw.OpenWrite(&data);
|
||||||
EXPECT_TRUE(m1.Serialize(&fpw));
|
EXPECT_TRUE(m1.Serialize(&fpw));
|
||||||
@ -136,7 +136,7 @@ TEST_F(TfileTest, FGets) {
|
|||||||
MathData m1;
|
MathData m1;
|
||||||
std::string line_str = "This is a textline with a newline\n";
|
std::string line_str = "This is a textline with a newline\n";
|
||||||
m1.Setup();
|
m1.Setup();
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
TFile fpw;
|
TFile fpw;
|
||||||
fpw.OpenWrite(&data);
|
fpw.OpenWrite(&data);
|
||||||
EXPECT_TRUE(m1.Serialize(&fpw));
|
EXPECT_TRUE(m1.Serialize(&fpw));
|
||||||
@ -161,7 +161,7 @@ TEST_F(TfileTest, BigEndian) {
|
|||||||
// This test verifies that Tfile can auto-reverse big-endian data.
|
// This test verifies that Tfile can auto-reverse big-endian data.
|
||||||
MathData m1;
|
MathData m1;
|
||||||
m1.Setup();
|
m1.Setup();
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
TFile fpw;
|
TFile fpw;
|
||||||
fpw.OpenWrite(&data);
|
fpw.OpenWrite(&data);
|
||||||
EXPECT_TRUE(m1.SerializeBigEndian(&fpw));
|
EXPECT_TRUE(m1.SerializeBigEndian(&fpw));
|
||||||
|
@ -57,7 +57,7 @@ class UnicharcompressTest : public ::testing::Test {
|
|||||||
}
|
}
|
||||||
// Serializes and de-serializes compressed_ over itself.
|
// Serializes and de-serializes compressed_ over itself.
|
||||||
void SerializeAndUndo() {
|
void SerializeAndUndo() {
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
TFile wfp;
|
TFile wfp;
|
||||||
wfp.OpenWrite(&data);
|
wfp.OpenWrite(&data);
|
||||||
EXPECT_TRUE(compressed_.Serialize(&wfp));
|
EXPECT_TRUE(compressed_.Serialize(&wfp));
|
||||||
|
@ -133,7 +133,7 @@ TEST(UnicharsetTest, MultibyteBigrams) {
|
|||||||
// It is added if we force it to be.
|
// It is added if we force it to be.
|
||||||
u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue);
|
u.unichar_insert("\u0ccd\u0cad", OldUncleanUnichars::kTrue);
|
||||||
EXPECT_EQ(u.size(), 8);
|
EXPECT_EQ(u.size(), 8);
|
||||||
GenericVector<char> data;
|
std::vector<char> data;
|
||||||
tesseract::TFile fp;
|
tesseract::TFile fp;
|
||||||
fp.OpenWrite(&data);
|
fp.OpenWrite(&data);
|
||||||
u.save_to_file(&fp);
|
u.save_to_file(&fp);
|
||||||
|
Loading…
Reference in New Issue
Block a user