From 8e79297dcefecdb929d753d28554fec51417ec39 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Wed, 3 May 2017 16:09:44 -0700 Subject: [PATCH] Final part of endian improvement. Adds big-endian support to lstm and fixes issue 518 --- ccmain/linerec.cpp | 2 +- ccmain/tessedit.cpp | 8 +--- ccstruct/fontinfo.cpp | 39 +++++++++---------- ccstruct/fontinfo.h | 10 ++--- ccstruct/imagedata.cpp | 56 ++++++++++++++------------- ccstruct/imagedata.h | 16 +++----- ccstruct/matrix.h | 23 ++++-------- ccutil/genericvector.h | 75 +++++++++++++++---------------------- ccutil/serialis.cpp | 15 ++++++-- ccutil/serialis.h | 8 +++- ccutil/strngs.cpp | 11 ++---- ccutil/strngs.h | 4 +- ccutil/tessdatamanager.cpp | 6 ++- ccutil/unicharcompress.cpp | 5 +-- ccutil/unicharcompress.h | 17 +++------ ccutil/unicity_table.h | 9 ++--- classify/adaptive.cpp | 2 +- classify/adaptmatch.cpp | 6 +-- classify/classify.h | 4 +- classify/cutoffs.cpp | 5 +-- classify/intproto.cpp | 43 +++++++++++---------- classify/shapetable.cpp | 22 +++++------ classify/shapetable.h | 9 ++--- dict/dawg.cpp | 19 +++++----- lstm/convolve.cpp | 11 ++---- lstm/convolve.h | 3 +- lstm/fullyconnected.cpp | 6 +-- lstm/fullyconnected.h | 3 +- lstm/input.cpp | 7 +--- lstm/input.h | 4 +- lstm/lstm.cpp | 16 ++++---- lstm/lstm.h | 3 +- lstm/lstmrecognizer.cpp | 27 ++++++------- lstm/lstmrecognizer.h | 3 +- lstm/lstmtrainer.cpp | 59 ++++++++++++++--------------- lstm/lstmtrainer.h | 3 +- lstm/maxpool.cpp | 5 +-- lstm/maxpool.h | 3 +- lstm/network.cpp | 30 ++++++--------- lstm/network.h | 6 +-- lstm/plumbing.cpp | 9 ++--- lstm/plumbing.h | 3 +- lstm/reconfig.cpp | 11 ++---- lstm/reconfig.h | 3 +- lstm/tfnetwork.cpp | 5 +-- lstm/tfnetwork.h | 3 +- lstm/weightmatrix.cpp | 28 +++++++------- lstm/weightmatrix.h | 5 +-- training/commontraining.cpp | 2 +- training/lstmtester.cpp | 3 +- 50 files changed, 299 insertions(+), 376 deletions(-) diff --git a/ccmain/linerec.cpp b/ccmain/linerec.cpp index 6c2421000..08f5f27fb 100644 --- a/ccmain/linerec.cpp +++ b/ccmain/linerec.cpp @@ -49,7 +49,7 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename, DocumentData images(lstmf_name); if (applybox_page > 0) { // Load existing document for the previous pages. - if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) { + if (!images.LoadDocument(lstmf_name.string(), 0, 0, nullptr)) { tprintf("Failed to read training data from %s!\n", lstmf_name.string()); return; } diff --git a/ccmain/tessedit.cpp b/ccmain/tessedit.cpp index defea65e4..e239c464b 100644 --- a/ccmain/tessedit.cpp +++ b/ccmain/tessedit.cpp @@ -188,13 +188,9 @@ bool Tesseract::init_tesseract_lang_data( #ifndef ANDROID_BUILD if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY || tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) { - if (mgr->swap()) { - tprintf("Error: LSTM requested on big-endian hardware!!\n"); - tprintf("Big-endian not yet supported! Loading tesseract.\n"); - tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY); - } else if (mgr->GetComponent(TESSDATA_LSTM, &fp)) { + if (mgr->GetComponent(TESSDATA_LSTM, &fp)) { lstm_recognizer_ = new LSTMRecognizer; - ASSERT_HOST(lstm_recognizer_->DeSerialize(mgr->swap(), &fp)); + ASSERT_HOST(lstm_recognizer_->DeSerialize(&fp)); if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr); } else { tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n"); diff --git a/ccstruct/fontinfo.cpp b/ccstruct/fontinfo.cpp index c3cda8259..796c05ee8 100644 --- a/ccstruct/fontinfo.cpp +++ b/ccstruct/fontinfo.cpp @@ -31,9 +31,9 @@ bool FontInfo::Serialize(FILE* fp) const { } // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. -bool FontInfo::DeSerialize(bool swap, TFile* fp) { - if (!read_info(fp, this, swap)) return false; - if (!read_spacing_info(fp, this, swap)) return false; +bool FontInfo::DeSerialize(TFile* fp) { + if (!read_info(fp, this)) return false; + if (!read_spacing_info(fp, this)) return false; return true; } @@ -51,9 +51,9 @@ bool FontInfoTable::Serialize(FILE* fp) const { } // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. -bool FontInfoTable::DeSerialize(bool swap, TFile* fp) { +bool FontInfoTable::DeSerialize(TFile* fp) { truncate(0); - return this->DeSerializeClasses(swap, fp); + return this->DeSerializeClasses(fp); } // Returns true if the given set of fonts includes one with the same @@ -149,14 +149,14 @@ void FontSetDeleteCallback(FontSet fs) { /*---------------------------------------------------------------------------*/ // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. -bool read_info(TFile* f, FontInfo* fi, bool swap) { +bool read_info(TFile* f, FontInfo* fi) { inT32 size; - if (f->FReadEndian(&size, sizeof(size), 1, swap) != 1) return false; + if (f->FReadEndian(&size, sizeof(size), 1) != 1) return false; char* font_name = new char[size + 1]; fi->name = font_name; if (f->FRead(font_name, sizeof(*font_name), size) != size) return false; font_name[size] = '\0'; - if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1, swap) != 1) + if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1) != 1) return false; return true; } @@ -170,19 +170,17 @@ bool write_info(FILE* f, const FontInfo& fi) { return true; } -bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) { +bool read_spacing_info(TFile* f, FontInfo* fi) { inT32 vec_size, kern_size; - if (f->FReadEndian(&vec_size, sizeof(vec_size), 1, swap) != 1) return false; + if (f->FReadEndian(&vec_size, sizeof(vec_size), 1) != 1) return false; ASSERT_HOST(vec_size >= 0); if (vec_size == 0) return true; fi->init_spacing(vec_size); for (int i = 0; i < vec_size; ++i) { FontSpacingInfo *fs = new FontSpacingInfo(); - if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, swap) != - 1 || - f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, swap) != - 1 || - f->FReadEndian(&kern_size, sizeof(kern_size), 1, swap) != 1) { + if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1) != 1 || + f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1) != 1 || + f->FReadEndian(&kern_size, sizeof(kern_size), 1) != 1) { delete fs; return false; } @@ -190,8 +188,8 @@ bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) { delete fs; continue; } - if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) || - !fs->kerned_x_gaps.DeSerialize(swap, f))) { + if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) || + !fs->kerned_x_gaps.DeSerialize(f))) { delete fs; return false; } @@ -229,11 +227,10 @@ bool write_spacing_info(FILE* f, const FontInfo& fi) { return true; } -bool read_set(TFile* f, FontSet* fs, bool swap) { - if (f->FReadEndian(&fs->size, sizeof(fs->size), 1, swap) != 1) return false; +bool read_set(TFile* f, FontSet* fs) { + if (f->FReadEndian(&fs->size, sizeof(fs->size), 1) != 1) return false; fs->configs = new int[fs->size]; - if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size, swap) != - fs->size) + if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size) != fs->size) return false; return true; } diff --git a/ccstruct/fontinfo.h b/ccstruct/fontinfo.h index abeaa096e..9a6426560 100644 --- a/ccstruct/fontinfo.h +++ b/ccstruct/fontinfo.h @@ -67,7 +67,7 @@ struct FontInfo { bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); // Reserves unicharset_size spots in spacing_vec. void init_spacing(int unicharset_size) { @@ -152,7 +152,7 @@ class FontInfoTable : public GenericVector { bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); // Returns true if the given set of fonts includes one with the same // properties as font_id. @@ -177,11 +177,11 @@ void FontInfoDeleteCallback(FontInfo f); void FontSetDeleteCallback(FontSet fs); // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. -bool read_info(TFile* f, FontInfo* fi, bool swap); +bool read_info(TFile* f, FontInfo* fi); bool write_info(FILE* f, const FontInfo& fi); -bool read_spacing_info(TFile* f, FontInfo* fi, bool swap); +bool read_spacing_info(TFile* f, FontInfo* fi); bool write_spacing_info(FILE* f, const FontInfo& fi); -bool read_set(TFile* f, FontSet* fs, bool swap); +bool read_set(TFile* f, FontSet* fs); bool write_set(FILE* f, const FontSet& fs); } // namespace tesseract. diff --git a/ccstruct/imagedata.cpp b/ccstruct/imagedata.cpp index ce185a98c..a567b3b0c 100644 --- a/ccstruct/imagedata.cpp +++ b/ccstruct/imagedata.cpp @@ -166,6 +166,7 @@ bool ImageData::Serialize(TFile* fp) const { if (!imagefilename_.Serialize(fp)) return false; if (fp->FWrite(&page_number_, sizeof(page_number_), 1) != 1) return false; if (!image_data_.Serialize(fp)) return false; + if (!language_.Serialize(fp)) return false; if (!transcription_.Serialize(fp)) return false; // WARNING: Will not work across different endian machines. if (!boxes_.Serialize(fp)) return false; @@ -177,15 +178,16 @@ bool ImageData::Serialize(TFile* fp) const { // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. -bool ImageData::DeSerialize(bool swap, TFile* fp) { - if (!imagefilename_.DeSerialize(swap, fp)) return false; - if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false; - if (swap) ReverseN(&page_number_, sizeof(page_number_)); - if (!image_data_.DeSerialize(swap, fp)) return false; - if (!transcription_.DeSerialize(swap, fp)) return false; +bool ImageData::DeSerialize(TFile* fp) { + if (!imagefilename_.DeSerialize(fp)) return false; + if (fp->FReadEndian(&page_number_, sizeof(page_number_), 1) != 1) + return false; + if (!image_data_.DeSerialize(fp)) return false; + if (!language_.DeSerialize(fp)) return false; + if (!transcription_.DeSerialize(fp)) return false; // WARNING: Will not work across different endian machines. - if (!boxes_.DeSerialize(swap, fp)) return false; - if (!box_texts_.DeSerializeClasses(swap, fp)) return false; + if (!boxes_.DeSerialize(fp)) return false; + if (!box_texts_.DeSerializeClasses(fp)) return false; inT8 vertical = 0; if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false; vertical_text_ = vertical != 0; @@ -193,14 +195,15 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) { } // As DeSerialize, but only seeks past the data - hence a static method. -bool ImageData::SkipDeSerialize(bool swap, TFile* fp) { - if (!STRING::SkipDeSerialize(swap, fp)) return false; +bool ImageData::SkipDeSerialize(TFile* fp) { + if (!STRING::SkipDeSerialize(fp)) return false; inT32 page_number; if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false; - if (!GenericVector::SkipDeSerialize(swap, fp)) return false; - if (!STRING::SkipDeSerialize(swap, fp)) return false; - if (!GenericVector::SkipDeSerialize(swap, fp)) return false; - if (!GenericVector::SkipDeSerializeClasses(swap, fp)) return false; + if (!GenericVector::SkipDeSerialize(fp)) return false; + if (!STRING::SkipDeSerialize(fp)) return false; + if (!STRING::SkipDeSerialize(fp)) return false; + if (!GenericVector::SkipDeSerialize(fp)) return false; + if (!GenericVector::SkipDeSerializeClasses(fp)) return false; inT8 vertical = 0; return fp->FRead(&vertical, sizeof(vertical), 1) == 1; } @@ -384,21 +387,19 @@ DocumentData::~DocumentData() { // Reads all the pages in the given lstmf filename to the cache. The reader // is used to read the file. -bool DocumentData::LoadDocument(const char* filename, const char* lang, - int start_page, inT64 max_memory, - FileReader reader) { - SetDocument(filename, lang, max_memory, reader); +bool DocumentData::LoadDocument(const char* filename, int start_page, + inT64 max_memory, FileReader reader) { + SetDocument(filename, max_memory, reader); pages_offset_ = start_page; return ReCachePages(); } // Sets up the document, without actually loading it. -void DocumentData::SetDocument(const char* filename, const char* lang, - inT64 max_memory, FileReader reader) { +void DocumentData::SetDocument(const char* filename, inT64 max_memory, + FileReader reader) { SVAutoLock lock_p(&pages_mutex_); SVAutoLock lock(&general_mutex_); document_name_ = filename; - lang_ = lang; pages_offset_ = -1; max_memory_ = max_memory; reader_ = reader; @@ -522,7 +523,7 @@ bool DocumentData::ReCachePages() { pages_.truncate(0); TFile fp; if (!fp.Open(document_name_, reader_) || - !PointerVector::DeSerializeSize(false, &fp, &loaded_pages) || + !PointerVector::DeSerializeSize(&fp, &loaded_pages) || loaded_pages <= 0) { tprintf("Deserialize header failed: %s\n", document_name_.string()); return false; @@ -534,15 +535,17 @@ bool DocumentData::ReCachePages() { for (page = 0; page < loaded_pages; ++page) { if (page < pages_offset_ || (max_memory_ > 0 && memory_used() > max_memory_)) { - if (!PointerVector::DeSerializeSkip(false, &fp)) break; + if (!PointerVector::DeSerializeSkip(&fp)) { + tprintf("Deserializeskip failed\n"); + break; + } } else { - if (!pages_.DeSerializeElement(false, &fp)) break; + if (!pages_.DeSerializeElement(&fp)) break; ImageData* image_data = pages_.back(); if (image_data->imagefilename().length() == 0) { image_data->set_imagefilename(document_name_); image_data->set_page_number(page); } - image_data->set_language(lang_); set_memory_used(memory_used() + image_data->MemoryUsed()); } } @@ -567,7 +570,6 @@ DocumentCache::~DocumentCache() {} // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. bool DocumentCache::LoadDocuments(const GenericVector& filenames, - const char* lang, CachingStrategy cache_strategy, FileReader reader) { cache_strategy_ = cache_strategy; @@ -580,7 +582,7 @@ bool DocumentCache::LoadDocuments(const GenericVector& filenames, for (int arg = 0; arg < filenames.size(); ++arg) { STRING filename = filenames[arg]; DocumentData* document = new DocumentData(filename); - document->SetDocument(filename.string(), lang, fair_share_memory, reader); + document->SetDocument(filename.string(), fair_share_memory, reader); AddToCache(document); } if (!documents_.empty()) { diff --git a/ccstruct/imagedata.h b/ccstruct/imagedata.h index 45cb65a6c..e0eddfa06 100644 --- a/ccstruct/imagedata.h +++ b/ccstruct/imagedata.h @@ -116,10 +116,9 @@ class ImageData { // Writes to the given file. Returns false in case of error. bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); // As DeSerialize, but only seeks past the data - hence a static method. - static bool SkipDeSerialize(bool swap, tesseract::TFile* fp); + static bool SkipDeSerialize(tesseract::TFile* fp); // Other accessors. const STRING& imagefilename() const { @@ -210,11 +209,10 @@ class DocumentData { // Reads all the pages in the given lstmf filename to the cache. The reader // is used to read the file. - bool LoadDocument(const char* filename, const char* lang, int start_page, - inT64 max_memory, FileReader reader); + bool LoadDocument(const char* filename, int start_page, inT64 max_memory, + FileReader reader); // Sets up the document, without actually loading it. - void SetDocument(const char* filename, const char* lang, inT64 max_memory, - FileReader reader); + void SetDocument(const char* filename, inT64 max_memory, FileReader reader); // Writes all the pages to the given filename. Returns false on error. bool SaveDocument(const char* filename, FileWriter writer); bool SaveToBuffer(GenericVector* buffer); @@ -286,8 +284,6 @@ class DocumentData { private: // A name for this document. STRING document_name_; - // The language of this document. - STRING lang_; // A group of pages that corresponds in some loose way to a document. PointerVector pages_; // Page number of the first index in pages_. @@ -325,7 +321,7 @@ class DocumentCache { } // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. - bool LoadDocuments(const GenericVector& filenames, const char* lang, + bool LoadDocuments(const GenericVector& filenames, CachingStrategy cache_strategy, FileReader reader); // Adds document to the cache. diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h index 8e0442ae7..43bdb810f 100644 --- a/ccstruct/matrix.h +++ b/ccstruct/matrix.h @@ -164,16 +164,11 @@ class GENERIC_2D_ARRAY { } return true; } - bool DeSerialize(bool swap, tesseract::TFile* fp) { - if (!DeSerializeSize(swap, fp)) return false; - if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false; - if (swap) ReverseN(&empty_, sizeof(empty_)); + bool DeSerialize(tesseract::TFile* fp) { + if (!DeSerializeSize(fp)) return false; + if (fp->FReadEndian(&empty_, sizeof(empty_), 1) != 1) return false; int size = num_elements(); - if (fp->FRead(array_, sizeof(*array_), size) != size) return false; - if (swap) { - for (int i = 0; i < size; ++i) - ReverseN(&array_[i], sizeof(array_[i])); - } + if (fp->FReadEndian(array_, sizeof(*array_), size) != size) return false; return true; } @@ -487,14 +482,10 @@ class GENERIC_2D_ARRAY { Resize(size1, size2, empty_); return true; } - bool DeSerializeSize(bool swap, tesseract::TFile* fp) { + bool DeSerializeSize(tesseract::TFile* fp) { inT32 size1, size2; - if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false; - if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false; - if (swap) { - ReverseN(&size1, sizeof(size1)); - ReverseN(&size2, sizeof(size2)); - } + if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false; + if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false; Resize(size1, size2, empty_); return true; } diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h index 4dee34d17..a0e4699fc 100644 --- a/ccutil/genericvector.h +++ b/ccutil/genericvector.h @@ -163,8 +163,7 @@ class GenericVector { // DEPRECATED. Use [De]Serialize[Classes] instead. bool write(FILE* f, TessResultCallback2* cb) const; bool read(tesseract::TFile* f, - TessResultCallback3* cb, - bool swap); + TessResultCallback2* cb); // Writes a vector of simple types to the given file. Assumes that bitwise // read/write of T will work. Returns false in case of error. // TODO(rays) Change all callers to use TFile and remove deprecated methods. @@ -174,10 +173,11 @@ class GenericVector { // read/write will work with ReverseN according to sizeof(T). // Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. + // TFile is assumed to know about swapping. bool DeSerialize(bool swap, FILE* fp); - bool DeSerialize(bool swap, tesseract::TFile* fp); + bool DeSerialize(tesseract::TFile* fp); // Skips the deserialization of the vector. - static bool SkipDeSerialize(bool swap, tesseract::TFile* fp); + static bool SkipDeSerialize(tesseract::TFile* fp); // Writes a vector of classes to the given file. Assumes the existence of // bool T::Serialize(FILE* fp) const that returns false in case of error. // Returns false in case of error. @@ -189,9 +189,9 @@ class GenericVector { // this function. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerializeClasses(bool swap, FILE* fp); - bool DeSerializeClasses(bool swap, tesseract::TFile* fp); + bool DeSerializeClasses(tesseract::TFile* fp); // Calls SkipDeSerialize on the elements of the vector. - static bool SkipDeSerializeClasses(bool swap, tesseract::TFile* fp); + static bool SkipDeSerializeClasses(tesseract::TFile* fp); // Allocates a new array of double the current_size, copies over the // information from data to the new location, deletes data and returns @@ -569,13 +569,13 @@ class PointerVector : public GenericVector { } return true; } - bool DeSerialize(bool swap, TFile* fp) { + bool DeSerialize(TFile* fp) { inT32 reserved; - if (!DeSerializeSize(swap, fp, &reserved)) return false; + if (!DeSerializeSize(fp, &reserved)) return false; GenericVector::reserve(reserved); truncate(0); for (int i = 0; i < reserved; ++i) { - if (!DeSerializeElement(swap, fp)) return false; + if (!DeSerializeElement(fp)) return false; } return true; } @@ -583,19 +583,17 @@ class PointerVector : public GenericVector { // retain the integrity of the stream, the caller must call some combination // of DeSerializeElement and DeSerializeSkip of the exact number returned in // *size, assuming a true return. - static bool DeSerializeSize(bool swap, TFile* fp, inT32* size) { - if (fp->FRead(size, sizeof(*size), 1) != 1) return false; - if (swap) Reverse32(size); - return true; + static bool DeSerializeSize(TFile* fp, inT32* size) { + return fp->FReadEndian(size, sizeof(*size), 1) == 1; } // Reads and appends to the vector the next element of the serialization. - bool DeSerializeElement(bool swap, TFile* fp) { + bool DeSerializeElement(TFile* fp) { inT8 non_null; if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; T* item = NULL; if (non_null) { item = new T; - if (!item->DeSerialize(swap, fp)) { + if (!item->DeSerialize(fp)) { delete item; return false; } @@ -607,11 +605,11 @@ class PointerVector : public GenericVector { return true; } // Skips the next element of the serialization. - static bool DeSerializeSkip(bool swap, TFile* fp) { + static bool DeSerializeSkip(TFile* fp) { inT8 non_null; if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; if (non_null) { - if (!T::SkipDeSerialize(swap, fp)) return false; + if (!T::SkipDeSerialize(fp)) return false; } return true; } @@ -889,23 +887,21 @@ bool GenericVector::write( template bool GenericVector::read( - tesseract::TFile* f, - TessResultCallback3* cb, bool swap) { + tesseract::TFile* f, TessResultCallback2* cb) { inT32 reserved; - if (f->FReadEndian(&reserved, sizeof(reserved), 1, swap) != 1) return false; + if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; reserve(reserved); - if (f->FReadEndian(&size_used_, sizeof(size_used_), 1, swap) != 1) - return false; + if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false; if (cb != NULL) { for (int i = 0; i < size_used_; ++i) { - if (!cb->Run(f, data_ + i, swap)) { + if (!cb->Run(f, data_ + i)) { delete cb; return false; } } delete cb; } else { - if (f->FReadEndian(data_, sizeof(T), size_used_, swap) != size_used_) + if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) return false; } return true; @@ -945,24 +941,17 @@ bool GenericVector::DeSerialize(bool swap, FILE* fp) { return true; } template -bool GenericVector::DeSerialize(bool swap, tesseract::TFile* fp) { +bool GenericVector::DeSerialize(tesseract::TFile* fp) { inT32 reserved; - if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; - if (swap) Reverse32(&reserved); + if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; reserve(reserved); size_used_ = reserved; - if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false; - if (swap) { - for (int i = 0; i < size_used_; ++i) - ReverseN(&data_[i], sizeof(data_[i])); - } - return true; + return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_; } template -bool GenericVector::SkipDeSerialize(bool swap, tesseract::TFile* fp) { +bool GenericVector::SkipDeSerialize(tesseract::TFile* fp) { inT32 reserved; - if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; - if (swap) Reverse32(&reserved); + if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; return fp->FRead(NULL, sizeof(T), reserved) == reserved; } @@ -1004,24 +993,22 @@ bool GenericVector::DeSerializeClasses(bool swap, FILE* fp) { return true; } template -bool GenericVector::DeSerializeClasses(bool swap, tesseract::TFile* fp) { +bool GenericVector::DeSerializeClasses(tesseract::TFile* fp) { uinT32 reserved; - if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; - if (swap) Reverse32(&reserved); + if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; T empty; init_to_size(reserved, empty); for (int i = 0; i < reserved; ++i) { - if (!data_[i].DeSerialize(swap, fp)) return false; + if (!data_[i].DeSerialize(fp)) return false; } return true; } template -bool GenericVector::SkipDeSerializeClasses(bool swap, tesseract::TFile* fp) { +bool GenericVector::SkipDeSerializeClasses(tesseract::TFile* fp) { uinT32 reserved; - if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; - if (swap) Reverse32(&reserved); + if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false; for (int i = 0; i < reserved; ++i) { - if (!T::SkipDeSerialize(swap, fp)) return false; + if (!T::SkipDeSerialize(fp)) return false; } return true; } diff --git a/ccutil/serialis.cpp b/ccutil/serialis.cpp index 80dd9b975..d12d4eac0 100644 --- a/ccutil/serialis.cpp +++ b/ccutil/serialis.cpp @@ -24,8 +24,11 @@ namespace tesseract { TFile::TFile() - : offset_(0), data_(NULL), data_is_owned_(false), is_writing_(false) { -} + : offset_(0), + data_(NULL), + data_is_owned_(false), + is_writing_(false), + swap_(false) {} TFile::~TFile() { if (data_is_owned_) @@ -39,6 +42,7 @@ bool TFile::Open(const STRING& filename, FileReader reader) { } offset_ = 0; is_writing_ = false; + swap_ = false; if (reader == NULL) return LoadDataFromFile(filename, data_); else @@ -52,6 +56,7 @@ bool TFile::Open(const char* data, int size) { data_is_owned_ = true; } is_writing_ = false; + swap_ = false; data_->init_to_size(size, 0); memcpy(&(*data_)[0], data, size); return true; @@ -69,6 +74,7 @@ bool TFile::Open(FILE* fp, inT64 end_offset) { } int size = end_offset - current_pos; is_writing_ = false; + swap_ = false; if (!data_is_owned_) { data_ = new GenericVector; data_is_owned_ = true; @@ -88,9 +94,9 @@ char* TFile::FGets(char* buffer, int buffer_size) { return size > 0 ? buffer : NULL; } -int TFile::FReadEndian(void* buffer, int size, int count, bool swap) { +int TFile::FReadEndian(void* buffer, int size, int count) { int num_read = FRead(buffer, size, count); - if (swap) { + if (swap_) { char* char_buffer = reinterpret_cast(buffer); for (int i = 0; i < num_read; ++i, char_buffer += size) { ReverseN(char_buffer, size); @@ -128,6 +134,7 @@ void TFile::OpenWrite(GenericVector* data) { data_is_owned_ = true; } is_writing_ = true; + swap_ = false; data_->truncate(0); } diff --git a/ccutil/serialis.h b/ccutil/serialis.h index f5f98a2c9..6703e1d58 100644 --- a/ccutil/serialis.h +++ b/ccutil/serialis.h @@ -61,6 +61,8 @@ class TFile { bool Open(const char* data, int size); // From an open file and an end offset. bool Open(FILE* fp, inT64 end_offset); + // Sets the value of the swap flag, so that FReadEndian does the right thing. + void set_swap(bool value) { swap_ = value; } // Reads a line like fgets. Returns NULL on EOF, otherwise buffer. // Reads at most buffer_size bytes, including '\0' terminator, even if @@ -68,9 +70,9 @@ class TFile { // To use fscanf use FGets and sscanf. char* FGets(char* buffer, int buffer_size); // Replicates fread, followed by a swap of the bytes if needed, returning the - // number of items read. If swap is true then the count items will each have + // number of items read. If swap_ is true then the count items will each have // size bytes reversed. - int FReadEndian(void* buffer, int size, int count, bool swap); + int FReadEndian(void* buffer, int size, int count); // Replicates fread, returning the number of items read. int FRead(void* buffer, int size, int count); // Resets the TFile as if it has been Opened, but nothing read. @@ -96,6 +98,8 @@ class TFile { bool data_is_owned_; // True if the TFile is open for writing. bool is_writing_; + // True if bytes need to be swapped in FReadEndian. + bool swap_; }; } // namespace tesseract. diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp index 5a9cfd0d4..dcaaf2e2f 100644 --- a/ccutil/strngs.cpp +++ b/ccutil/strngs.cpp @@ -171,21 +171,18 @@ bool STRING::DeSerialize(bool swap, FILE* fp) { } // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. -bool STRING::DeSerialize(bool swap, TFile* fp) { +bool STRING::DeSerialize(TFile* fp) { inT32 len; - if (fp->FRead(&len, sizeof(len), 1) != 1) return false; - if (swap) - ReverseN(&len, sizeof(len)); + if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false; truncate_at(len); if (fp->FRead(GetCStr(), 1, len) != len) return false; return true; } // As DeSerialize, but only seeks past the data - hence a static method. -bool STRING::SkipDeSerialize(bool swap, tesseract::TFile* fp) { +bool STRING::SkipDeSerialize(tesseract::TFile* fp) { inT32 len; - if (fp->FRead(&len, sizeof(len), 1) != 1) return false; - if (swap) ReverseN(&len, sizeof(len)); + if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false; return fp->FRead(NULL, 1, len) == len; } diff --git a/ccutil/strngs.h b/ccutil/strngs.h index 2e65463ef..5ca9c4fc8 100644 --- a/ccutil/strngs.h +++ b/ccutil/strngs.h @@ -59,9 +59,9 @@ class TESS_API STRING bool Serialize(tesseract::TFile* fp) const; // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, tesseract::TFile* fp); + bool DeSerialize(tesseract::TFile* fp); // As DeSerialize, but only seeks past the data - hence a static method. - static bool SkipDeSerialize(bool swap, tesseract::TFile* fp); + static bool SkipDeSerialize(tesseract::TFile* fp); BOOL8 contains(const char c) const; inT32 length() const; diff --git a/ccutil/tessdatamanager.cpp b/ccutil/tessdatamanager.cpp index 9b4599605..698202297 100644 --- a/ccutil/tessdatamanager.cpp +++ b/ccutil/tessdatamanager.cpp @@ -59,11 +59,12 @@ bool TessdataManager::LoadMemBuffer(const char *name, const char *data, inT32 num_entries = TESSDATA_NUM_ENTRIES; if (fp.FRead(&num_entries, sizeof(num_entries), 1) != 1) return false; swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0; + fp.set_swap(swap_); if (swap_) ReverseN(&num_entries, sizeof(num_entries)); GenericVector offset_table; offset_table.init_to_size(num_entries, -1); - if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries, - swap_) != num_entries) + if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries) != + num_entries) return false; for (int i = 0; i < num_entries && i < TESSDATA_NUM_ENTRIES; ++i) { if (offset_table[i] >= 0) { @@ -152,6 +153,7 @@ bool TessdataManager::GetComponent(TessdataType type, TFile *fp) { if (!is_loaded_ && !Init(data_file_name_.string())) return false; if (entries_[type].empty()) return false; fp->Open(&entries_[type][0], entries_[type].size()); + fp->set_swap(swap_); return true; } diff --git a/ccutil/unicharcompress.cpp b/ccutil/unicharcompress.cpp index 084e6c438..3b8595cac 100644 --- a/ccutil/unicharcompress.cpp +++ b/ccutil/unicharcompress.cpp @@ -315,9 +315,8 @@ bool UnicharCompress::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool UnicharCompress::DeSerialize(bool swap, TFile* fp) { - if (!encoder_.DeSerializeClasses(swap, fp)) return false; +bool UnicharCompress::DeSerialize(TFile* fp) { + if (!encoder_.DeSerializeClasses(fp)) return false; ComputeCodeRange(); SetupDecoder(); return true; diff --git a/ccutil/unicharcompress.h b/ccutil/unicharcompress.h index 12fcd867c..c68bb16de 100644 --- a/ccutil/unicharcompress.h +++ b/ccutil/unicharcompress.h @@ -69,17 +69,12 @@ class RecodedCharID { } // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp) { + bool DeSerialize(TFile* fp) { if (fp->FRead(&self_normalized_, sizeof(self_normalized_), 1) != 1) return false; - if (fp->FRead(&length_, sizeof(length_), 1) != 1) return false; - if (swap) ReverseN(&length_, sizeof(length_)); - if (fp->FRead(code_, sizeof(code_[0]), length_) != length_) return false; - if (swap) { - for (int i = 0; i < length_; ++i) { - ReverseN(&code_[i], sizeof(code_[i])); - } - } + if (fp->FReadEndian(&length_, sizeof(length_), 1) != 1) return false; + if (fp->FReadEndian(code_, sizeof(code_[0]), length_) != length_) + return false; return true; } bool operator==(const RecodedCharID& other) const { @@ -205,8 +200,8 @@ class UnicharCompress { // Writes to the given file. Returns false in case of error. bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + + bool DeSerialize(TFile* fp); // Returns a STRING containing a text file that describes the encoding thus: // [,]* diff --git a/ccutil/unicity_table.h b/ccutil/unicity_table.h index f89e1ab36..f36bcfa00 100644 --- a/ccutil/unicity_table.h +++ b/ccutil/unicity_table.h @@ -86,10 +86,8 @@ class UnicityTable { /// once. The given callback will be deleted at the end. /// Returns false on read/write error. bool write(FILE* f, TessResultCallback2* cb) const; - /// swap is used to switch the endianness. bool read(tesseract::TFile* f, - TessResultCallback3* cb, - bool swap); + TessResultCallback2* cb); private: GenericVector table_; @@ -196,9 +194,8 @@ bool UnicityTable::write( template bool UnicityTable::read( - tesseract::TFile* f, - TessResultCallback3* cb, bool swap) { - return table_.read(f, cb, swap); + tesseract::TFile* f, TessResultCallback2* cb) { + return table_.read(f, cb); } // This method clear the current object, then, does a shallow copy of diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp index 94e09adf9..7483a74fe 100644 --- a/classify/adaptive.cpp +++ b/classify/adaptive.cpp @@ -365,7 +365,7 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) { fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1); /* then read in the basic integer templates */ - Templates->Templates = ReadIntTemplates(false, fp); + Templates->Templates = ReadIntTemplates(fp); /* then read in the adaptive info for each class */ for (i = 0; i < (Templates->Templates)->NumClasses; i++) { diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 8d69e6d10..c7c451bd5 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -535,11 +535,11 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { if (language_data_path_prefix.length() > 0 && mgr != nullptr) { TFile fp; ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp)); - PreTrainedTemplates = ReadIntTemplates(mgr->swap(), &fp); + PreTrainedTemplates = ReadIntTemplates(&fp); if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) { shape_table_ = new ShapeTable(unicharset); - if (!shape_table_->DeSerialize(mgr->swap(), &fp)) { + if (!shape_table_->DeSerialize(&fp)) { tprintf("Error loading shape table!\n"); delete shape_table_; shape_table_ = NULL; @@ -547,7 +547,7 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) { } ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp)); - ReadNewCutoffs(&fp, mgr->swap(), CharNormCutoffs); + ReadNewCutoffs(&fp, CharNormCutoffs); ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp)); NormProtos = ReadNormProtos(&fp); diff --git a/classify/classify.h b/classify/classify.h index 62086c400..823d49f96 100644 --- a/classify/classify.h +++ b/classify/classify.h @@ -103,7 +103,7 @@ class Classify : public CCStruct { const uinT8* normalization_factors, const uinT16* expected_num_features, GenericVector* results); - void ReadNewCutoffs(TFile* fp, bool swap, CLASS_CUTOFF_ARRAY Cutoffs); + void ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs); void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates); ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File); @@ -334,7 +334,7 @@ class Classify : public CCStruct { uinT8* char_norm_array); void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures); /* intproto.cpp *************************************************************/ - INT_TEMPLATES ReadIntTemplates(bool swap, TFile* fp); + INT_TEMPLATES ReadIntTemplates(TFile* fp); void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET& target_unicharset); CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on, diff --git a/classify/cutoffs.cpp b/classify/cutoffs.cpp index cdcaf361c..15eb2e85b 100644 --- a/classify/cutoffs.cpp +++ b/classify/cutoffs.cpp @@ -49,15 +49,14 @@ namespace tesseract { * @note Exceptions: none * @note History: Wed Feb 20 09:38:26 1991, DSJ, Created. */ -void Classify::ReadNewCutoffs(TFile* fp, bool swap, - CLASS_CUTOFF_ARRAY Cutoffs) { +void Classify::ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs) { char Class[UNICHAR_LEN + 1]; CLASS_ID ClassId; int Cutoff; int i; if (shape_table_ != NULL) { - if (!shapetable_cutoffs_.DeSerialize(swap, fp)) { + if (!shapetable_cutoffs_.DeSerialize(fp)) { tprintf("Error during read of shapetable pffmtable!\n"); } } diff --git a/classify/intproto.cpp b/classify/intproto.cpp index 1939bcbc6..145035932 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -758,7 +758,7 @@ namespace tesseract { * @note Exceptions: none * @note History: Wed Feb 27 11:48:46 1991, DSJ, Created. */ -INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { +INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) { int i, j, w, x, y, z; int unicharset_size; int version_id = 0; @@ -784,18 +784,18 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { /* first read the high level template struct */ Templates = NewIntTemplates(); // Read Templates in parts for 64 bit compatibility. - if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) != 1) + if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1) tprintf("Bad read of inttemp!\n"); - if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, - swap) != 1 || + if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), + 1) != 1 || fp->FReadEndian(&Templates->NumClassPruners, - sizeof(Templates->NumClassPruners), 1, swap) != 1) + sizeof(Templates->NumClassPruners), 1) != 1) tprintf("Bad read of inttemp!\n"); if (Templates->NumClasses < 0) { // This file has a version id! version_id = -Templates->NumClasses; if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), - 1, swap) != 1) + 1) != 1) tprintf("Bad read of inttemp!\n"); } @@ -805,12 +805,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { } if (version_id < 2) { - if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size, swap) != + if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) != unicharset_size) { tprintf("Bad read of inttemp!\n"); } if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]), - Templates->NumClasses, swap) != Templates->NumClasses) { + Templates->NumClasses) != Templates->NumClasses) { tprintf("Bad read of inttemp!\n"); } } @@ -820,8 +820,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR; for (i = 0; i < Templates->NumClassPruners; i++) { Pruner = new CLASS_PRUNER_STRUCT; - if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets, - swap) != kNumBuckets) { + if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) != + kNumBuckets) { tprintf("Bad read of inttemp!\n"); } if (version_id < 2) { @@ -887,8 +887,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { for (i = 0; i < Templates->NumClasses; i++) { /* first read in the high level struct for the class */ Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT)); - if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1, swap) != - 1 || + if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 || fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 || fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1) tprintf("Bad read of inttemp!\n"); @@ -902,8 +901,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { } int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs; ASSERT_HOST(num_configs <= MaxNumConfigs); - if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs, - swap) != num_configs) { + if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs) != + num_configs) { tprintf("Bad read of inttemp!\n"); } if (version_id < 2) { @@ -927,8 +926,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT)); int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR; if (fp->FReadEndian(&ProtoSet->ProtoPruner, - sizeof(ProtoSet->ProtoPruner[0][0][0]), num_buckets, - swap) != num_buckets) + sizeof(ProtoSet->ProtoPruner[0][0][0]), + num_buckets) != num_buckets) tprintf("Bad read of inttemp!\n"); for (x = 0; x < PROTOS_PER_PROTO_SET; x++) { if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A), @@ -942,7 +941,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { tprintf("Bad read of inttemp!\n"); if (fp->FReadEndian(&ProtoSet->Protos[x].Configs, sizeof(ProtoSet->Protos[x].Configs[0]), - WerdsPerConfigVec, swap) != WerdsPerConfigVec) + WerdsPerConfigVec) != WerdsPerConfigVec) cprintf("Bad read of inttemp!\n"); } Class->ProtoSets[j] = ProtoSet; @@ -950,7 +949,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { if (version_id < 4) { Class->font_set_id = -1; } else { - fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1, swap); + fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1); } } @@ -977,12 +976,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) { } } if (version_id >= 4) { - this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info), swap); + this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info)); if (version_id >= 5) { - this->fontinfo_table_.read( - fp, NewPermanentTessCallback(read_spacing_info), swap); + this->fontinfo_table_.read(fp, + NewPermanentTessCallback(read_spacing_info)); } - this->fontset_table_.read(fp, NewPermanentTessCallback(read_set), swap); + this->fontset_table_.read(fp, NewPermanentTessCallback(read_set)); } // Clean up. diff --git a/classify/shapetable.cpp b/classify/shapetable.cpp index 24e26d8c3..2730dceeb 100644 --- a/classify/shapetable.cpp +++ b/classify/shapetable.cpp @@ -70,11 +70,10 @@ bool UnicharAndFonts::Serialize(FILE* fp) const { return true; } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool UnicharAndFonts::DeSerialize(bool swap, TFile* fp) { - if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1, swap) != 1) - return false; - if (!font_ids.DeSerialize(swap, fp)) return false; + +bool UnicharAndFonts::DeSerialize(TFile* fp) { + if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1) != 1) return false; + if (!font_ids.DeSerialize(fp)) return false; return true; } @@ -94,13 +93,12 @@ bool Shape::Serialize(FILE* fp) const { return true; } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool Shape::DeSerialize(bool swap, TFile* fp) { + +bool Shape::DeSerialize(TFile* fp) { uinT8 sorted; if (fp->FRead(&sorted, sizeof(sorted), 1) != 1) return false; unichars_sorted_ = sorted != 0; - if (!unichars_.DeSerializeClasses(swap, fp)) return false; - return true; + return unichars_.DeSerializeClasses(fp); } // Adds a font_id for the given unichar_id. If the unichar_id is not @@ -250,9 +248,9 @@ bool ShapeTable::Serialize(FILE* fp) const { return true; } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool ShapeTable::DeSerialize(bool swap, TFile* fp) { - if (!shape_table_.DeSerialize(swap, fp)) return false; + +bool ShapeTable::DeSerialize(TFile* fp) { + if (!shape_table_.DeSerialize(fp)) return false; num_fonts_ = 0; return true; } diff --git a/classify/shapetable.h b/classify/shapetable.h index 2dc3bee62..8ec6004bd 100644 --- a/classify/shapetable.h +++ b/classify/shapetable.h @@ -167,8 +167,7 @@ struct UnicharAndFonts { // Writes to the given file. Returns false in case of error. bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); // Sort function to sort a pair of UnicharAndFonts by unichar_id. static int SortByUnicharId(const void* v1, const void* v2); @@ -190,8 +189,7 @@ class Shape { // Writes to the given file. Returns false in case of error. bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); int destination_index() const { return destination_index_; @@ -271,8 +269,7 @@ class ShapeTable { // Writes to the given file. Returns false in case of error. bool Serialize(FILE* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); // Accessors. int NumShapes() const { diff --git a/dict/dawg.cpp b/dict/dawg.cpp index b8099c965..4edf3c86b 100644 --- a/dict/dawg.cpp +++ b/dict/dawg.cpp @@ -311,23 +311,24 @@ void SquishedDawg::print_edge(EDGE_REF edge) const { bool SquishedDawg::read_squished_dawg(TFile *file) { if (debug_level_) tprintf("Reading squished dawg\n"); - // Read the magic number and if it does not match kDawgMagicNumber - // set swap to true to indicate that we need to switch endianness. + // Read the magic number and check that it matches kDawgMagicNumber, as + // auto-endian fixing should make sure it is always correct. inT16 magic; - if (file->FRead(&magic, sizeof(inT16), 1) != 1) return false; - bool swap = (magic != kDawgMagicNumber); + if (file->FReadEndian(&magic, sizeof(magic), 1) != 1) return false; + if (magic != kDawgMagicNumber) { + tprintf("Bad magic number on dawg: %d vs %d\n", magic, kDawgMagicNumber); + return false; + } inT32 unicharset_size; - if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) != - 1) - return false; - if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1, swap) != 1) + if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1) return false; + if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1) != 1) return false; ASSERT_HOST(num_edges_ > 0); // DAWG should not be empty Dawg::init(unicharset_size); edges_ = new EDGE_RECORD[num_edges_]; - if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_, swap) != + if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_) != num_edges_) return false; if (debug_level_ > 2) { diff --git a/lstm/convolve.cpp b/lstm/convolve.cpp index f89ca3bae..d34efdf3f 100644 --- a/lstm/convolve.cpp +++ b/lstm/convolve.cpp @@ -42,14 +42,9 @@ bool Convolve::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool Convolve::DeSerialize(bool swap, TFile* fp) { - if (fp->FRead(&half_x_, sizeof(half_x_), 1) != 1) return false; - if (fp->FRead(&half_y_, sizeof(half_y_), 1) != 1) return false; - if (swap) { - ReverseN(&half_x_, sizeof(half_x_)); - ReverseN(&half_y_, sizeof(half_y_)); - } +bool Convolve::DeSerialize(TFile* fp) { + if (fp->FReadEndian(&half_x_, sizeof(half_x_), 1) != 1) return false; + if (fp->FReadEndian(&half_y_, sizeof(half_y_), 1) != 1) return false; no_ = ni_ * (2*half_x_ + 1) * (2*half_y_ + 1); return true; } diff --git a/lstm/convolve.h b/lstm/convolve.h index a05dc1d85..184390a76 100644 --- a/lstm/convolve.h +++ b/lstm/convolve.h @@ -47,8 +47,7 @@ class Convolve : public Network { // Writes to the given file. Returns false in case of error. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/fullyconnected.cpp b/lstm/fullyconnected.cpp index c5b92768e..ecf43db19 100644 --- a/lstm/fullyconnected.cpp +++ b/lstm/fullyconnected.cpp @@ -94,10 +94,8 @@ bool FullyConnected::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool FullyConnected::DeSerialize(bool swap, TFile* fp) { - if (!weights_.DeSerialize(IsTraining(), swap, fp)) return false; - return true; +bool FullyConnected::DeSerialize(TFile* fp) { + return weights_.DeSerialize(IsTraining(), fp); } // Runs forward propagation of activations on the input line. diff --git a/lstm/fullyconnected.h b/lstm/fullyconnected.h index f5a593906..fb9f9b46b 100644 --- a/lstm/fullyconnected.h +++ b/lstm/fullyconnected.h @@ -78,8 +78,7 @@ class FullyConnected : public Network { // Writes to the given file. Returns false in case of error. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/input.cpp b/lstm/input.cpp index 1bcf367e3..daa4687fd 100644 --- a/lstm/input.cpp +++ b/lstm/input.cpp @@ -48,11 +48,8 @@ bool Input::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool Input::DeSerialize(bool swap, TFile* fp) { - if (fp->FRead(&shape_, sizeof(shape_), 1) != 1) return false; - // TODO(rays) swaps! - return true; +bool Input::DeSerialize(TFile* fp) { + return fp->FReadEndian(&shape_, sizeof(shape_), 1) == 1; } // Returns an integer reduction factor that the network applies to the diff --git a/lstm/input.h b/lstm/input.h index 7a750a562..5bdefcb6a 100644 --- a/lstm/input.h +++ b/lstm/input.h @@ -51,9 +51,7 @@ class Input : public Network { // Should be overridden by subclasses, but called by their Serialize. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - // Should be overridden by subclasses, but NOT called by their DeSerialize. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Returns an integer reduction factor that the network applies to the // time sequence. Assumes that any 2-d is already eliminated. Used for diff --git a/lstm/lstm.cpp b/lstm/lstm.cpp index 9fe16cf8b..3b9ca87c2 100644 --- a/lstm/lstm.cpp +++ b/lstm/lstm.cpp @@ -173,10 +173,9 @@ bool LSTM::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool LSTM::DeSerialize(bool swap, TFile* fp) { - if (fp->FRead(&na_, sizeof(na_), 1) != 1) return false; - if (swap) ReverseN(&na_, sizeof(na_)); + +bool LSTM::DeSerialize(TFile* fp) { + if (fp->FReadEndian(&na_, sizeof(na_), 1) != 1) return false; if (type_ == NT_LSTM_SOFTMAX) { nf_ = no_; } else if (type_ == NT_LSTM_SOFTMAX_ENCODED) { @@ -187,7 +186,7 @@ bool LSTM::DeSerialize(bool swap, TFile* fp) { is_2d_ = false; for (int w = 0; w < WT_COUNT; ++w) { if (w == GFS && !Is2D()) continue; - if (!gate_weights_[w].DeSerialize(IsTraining(), swap, fp)) return false; + if (!gate_weights_[w].DeSerialize(IsTraining(), fp)) return false; if (w == CI) { ns_ = gate_weights_[CI].NumOutputs(); is_2d_ = na_ - nf_ == ni_ + 2 * ns_; @@ -195,11 +194,10 @@ bool LSTM::DeSerialize(bool swap, TFile* fp) { } delete softmax_; if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) { - softmax_ = - reinterpret_cast(Network::CreateFromFile(swap, fp)); - if (softmax_ == NULL) return false; + softmax_ = reinterpret_cast(Network::CreateFromFile(fp)); + if (softmax_ == nullptr) return false; } else { - softmax_ = NULL; + softmax_ = nullptr; } return true; } diff --git a/lstm/lstm.h b/lstm/lstm.h index f87fa6811..aa6763f72 100644 --- a/lstm/lstm.h +++ b/lstm/lstm.h @@ -86,8 +86,7 @@ class LSTM : public Network { // Writes to the given file. Returns false in case of error. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/lstmrecognizer.cpp b/lstm/lstmrecognizer.cpp index e4013aec2..817e49ed9 100644 --- a/lstm/lstmrecognizer.cpp +++ b/lstm/lstmrecognizer.cpp @@ -88,25 +88,27 @@ bool LSTMRecognizer::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) { +bool LSTMRecognizer::DeSerialize(TFile* fp) { delete network_; - network_ = Network::CreateFromFile(swap, fp); + network_ = Network::CreateFromFile(fp); if (network_ == NULL) return false; if (!ccutil_.unicharset.load_from_file(fp, false)) return false; - if (!network_str_.DeSerialize(swap, fp)) return false; - if (fp->FRead(&training_flags_, sizeof(training_flags_), 1) != 1) + if (!network_str_.DeSerialize(fp)) return false; + if (fp->FReadEndian(&training_flags_, sizeof(training_flags_), 1) != 1) return false; - if (fp->FRead(&training_iteration_, sizeof(training_iteration_), 1) != 1) + if (fp->FReadEndian(&training_iteration_, sizeof(training_iteration_), 1) != + 1) return false; - if (fp->FRead(&sample_iteration_, sizeof(sample_iteration_), 1) != 1) + if (fp->FReadEndian(&sample_iteration_, sizeof(sample_iteration_), 1) != 1) return false; - if (fp->FRead(&null_char_, sizeof(null_char_), 1) != 1) return false; - if (fp->FRead(&weight_range_, sizeof(weight_range_), 1) != 1) return false; - if (fp->FRead(&learning_rate_, sizeof(learning_rate_), 1) != 1) return false; - if (fp->FRead(&momentum_, sizeof(momentum_), 1) != 1) return false; + if (fp->FReadEndian(&null_char_, sizeof(null_char_), 1) != 1) return false; + if (fp->FReadEndian(&weight_range_, sizeof(weight_range_), 1) != 1) + return false; + if (fp->FReadEndian(&learning_rate_, sizeof(learning_rate_), 1) != 1) + return false; + if (fp->FReadEndian(&momentum_, sizeof(momentum_), 1) != 1) return false; if (IsRecoding()) { - if (!recoder_.DeSerialize(swap, fp)) return false; + if (!recoder_.DeSerialize(fp)) return false; RecodedCharID code; recoder_.EncodeUnichar(UNICHAR_SPACE, &code); if (code(0) != UNICHAR_SPACE) { @@ -114,7 +116,6 @@ bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) { return false; } } - // TODO(rays) swaps! network_->SetRandomizer(&randomizer_); network_->CacheXScaleFactor(network_->XScaleFactor()); return true; diff --git a/lstm/lstmrecognizer.h b/lstm/lstmrecognizer.h index 87dc135dd..d235613d2 100644 --- a/lstm/lstmrecognizer.h +++ b/lstm/lstmrecognizer.h @@ -158,8 +158,7 @@ class LSTMRecognizer { // Writes to the given file. Returns false in case of error. bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool swap, TFile* fp); + bool DeSerialize(TFile* fp); // Loads the dictionary if possible from the traineddata file. // Prints a warning message, and returns false but otherwise fails silently // and continues to work without it if loading fails. diff --git a/lstm/lstmtrainer.cpp b/lstm/lstmtrainer.cpp index 3539a71de..036199694 100644 --- a/lstm/lstmtrainer.cpp +++ b/lstm/lstmtrainer.cpp @@ -304,8 +304,7 @@ void LSTMTrainer::DebugNetwork() { // loaded. bool LSTMTrainer::LoadAllTrainingData(const GenericVector& filenames) { training_data_.Clear(); - return training_data_.LoadDocuments(filenames, "eng", CacheStrategy(), - file_reader_); + return training_data_.LoadDocuments(filenames, CacheStrategy(), file_reader_); } // Keeps track of best and locally worst char error_rate and launches tests @@ -480,54 +479,54 @@ bool LSTMTrainer::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool LSTMTrainer::DeSerialize(bool swap, TFile* fp) { - if (!LSTMRecognizer::DeSerialize(swap, fp)) return false; +// NOTE: It is assumed that the trainer is never read cross-endian. +bool LSTMTrainer::DeSerialize(TFile* fp) { + if (!LSTMRecognizer::DeSerialize(fp)) return false; if (fp->FRead(&learning_iteration_, sizeof(learning_iteration_), 1) != 1) { // Special case. If we successfully decoded the recognizer, but fail here // then it means we were just given a recognizer, so issue a warning and // allow it. tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n"); learning_iteration_ = 0; - network_->SetEnableTraining(TS_RE_ENABLE); + network_->SetEnableTraining(TS_ENABLED); return true; } - if (fp->FRead(&prev_sample_iteration_, sizeof(prev_sample_iteration_), 1) != - 1) + if (fp->FReadEndian(&prev_sample_iteration_, sizeof(prev_sample_iteration_), + 1) != 1) return false; - if (fp->FRead(&perfect_delay_, sizeof(perfect_delay_), 1) != 1) return false; - if (fp->FRead(&last_perfect_training_iteration_, - sizeof(last_perfect_training_iteration_), 1) != 1) + if (fp->FReadEndian(&perfect_delay_, sizeof(perfect_delay_), 1) != 1) + return false; + if (fp->FReadEndian(&last_perfect_training_iteration_, + sizeof(last_perfect_training_iteration_), 1) != 1) return false; for (int i = 0; i < ET_COUNT; ++i) { - if (!error_buffers_[i].DeSerialize(swap, fp)) return false; + if (!error_buffers_[i].DeSerialize(fp)) return false; } if (fp->FRead(&error_rates_, sizeof(error_rates_), 1) != 1) return false; - if (fp->FRead(&training_stage_, sizeof(training_stage_), 1) != 1) + if (fp->FReadEndian(&training_stage_, sizeof(training_stage_), 1) != 1) return false; uinT8 amount; if (fp->FRead(&amount, sizeof(amount), 1) != 1) return false; if (amount == LIGHT) return true; // Don't read the rest. - if (fp->FRead(&best_error_rate_, sizeof(best_error_rate_), 1) != 1) + if (fp->FReadEndian(&best_error_rate_, sizeof(best_error_rate_), 1) != 1) return false; - if (fp->FRead(&best_error_rates_, sizeof(best_error_rates_), 1) != 1) + if (fp->FReadEndian(&best_error_rates_, sizeof(best_error_rates_), 1) != 1) return false; - if (fp->FRead(&best_iteration_, sizeof(best_iteration_), 1) != 1) + if (fp->FReadEndian(&best_iteration_, sizeof(best_iteration_), 1) != 1) return false; - if (fp->FRead(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1) + if (fp->FReadEndian(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1) return false; - if (fp->FRead(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1) + if (fp->FReadEndian(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1) return false; - if (fp->FRead(&worst_iteration_, sizeof(worst_iteration_), 1) != 1) + if (fp->FReadEndian(&worst_iteration_, sizeof(worst_iteration_), 1) != 1) return false; - if (fp->FRead(&stall_iteration_, sizeof(stall_iteration_), 1) != 1) - return false; - if (!best_model_data_.DeSerialize(swap, fp)) return false; - if (!worst_model_data_.DeSerialize(swap, fp)) return false; - if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(swap, fp)) + if (fp->FReadEndian(&stall_iteration_, sizeof(stall_iteration_), 1) != 1) return false; + if (!best_model_data_.DeSerialize(fp)) return false; + if (!worst_model_data_.DeSerialize(fp)) return false; + if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false; GenericVector sub_data; - if (!sub_data.DeSerialize(swap, fp)) return false; + if (!sub_data.DeSerialize(fp)) return false; delete sub_trainer_; if (sub_data.empty()) { sub_trainer_ = NULL; @@ -535,9 +534,9 @@ bool LSTMTrainer::DeSerialize(bool swap, TFile* fp) { sub_trainer_ = new LSTMTrainer(); if (!ReadTrainingDump(sub_data, sub_trainer_)) return false; } - if (!best_error_history_.DeSerialize(swap, fp)) return false; - if (!best_error_iterations_.DeSerialize(swap, fp)) return false; - if (fp->FRead(&improvement_steps_, sizeof(improvement_steps_), 1) != 1) + if (!best_error_history_.DeSerialize(fp)) return false; + if (!best_error_iterations_.DeSerialize(fp)) return false; + if (fp->FReadEndian(&improvement_steps_, sizeof(improvement_steps_), 1) != 1) return false; return true; } @@ -925,7 +924,7 @@ bool LSTMTrainer::ReadTrainingDump(const GenericVector& data, bool LSTMTrainer::ReadSizedTrainingDump(const char* data, int size) { TFile fp; fp.Open(data, size); - return DeSerialize(false, &fp); + return DeSerialize(&fp); } // Writes the recognizer to memory, so that it can be used for testing later. @@ -943,7 +942,7 @@ LSTMRecognizer* LSTMTrainer::ReadRecognitionDump( TFile fp; fp.Open(&data[0], data.size()); LSTMRecognizer* recognizer = new LSTMRecognizer; - ASSERT_HOST(recognizer->DeSerialize(false, &fp)); + ASSERT_HOST(recognizer->DeSerialize(&fp)); return recognizer; } diff --git a/lstm/lstmtrainer.h b/lstm/lstmtrainer.h index 2054284dd..484b75f96 100644 --- a/lstm/lstmtrainer.h +++ b/lstm/lstmtrainer.h @@ -215,8 +215,7 @@ class LSTMTrainer : public LSTMRecognizer { // Writes to the given file. Returns false in case of error. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the // learning rates (by scaling reduction, or layer specific, according to diff --git a/lstm/maxpool.cpp b/lstm/maxpool.cpp index 2164aaf5e..edfb2f3fb 100644 --- a/lstm/maxpool.cpp +++ b/lstm/maxpool.cpp @@ -31,9 +31,8 @@ Maxpool::~Maxpool() { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool Maxpool::DeSerialize(bool swap, TFile* fp) { - bool result = Reconfig::DeSerialize(swap, fp); +bool Maxpool::DeSerialize(TFile* fp) { + bool result = Reconfig::DeSerialize(fp); no_ = ni_; return result; } diff --git a/lstm/maxpool.h b/lstm/maxpool.h index 1f742a9d3..99e765acd 100644 --- a/lstm/maxpool.h +++ b/lstm/maxpool.h @@ -40,8 +40,7 @@ class Maxpool : public Reconfig { } // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/network.cpp b/lstm/network.cpp index 791848ad4..ee3289e24 100644 --- a/lstm/network.cpp +++ b/lstm/network.cpp @@ -164,14 +164,13 @@ bool Network::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. // Should be overridden by subclasses, but NOT called by their DeSerialize. -bool Network::DeSerialize(bool swap, TFile* fp) { +bool Network::DeSerialize(TFile* fp) { inT8 data = 0; if (fp->FRead(&data, sizeof(data), 1) != 1) return false; if (data == NT_NONE) { STRING type_name; - if (!type_name.DeSerialize(swap, fp)) return false; + if (!type_name.DeSerialize(fp)) return false; for (data = 0; data < NT_COUNT && type_name != kTypeNames[data]; ++data) { } if (data == NT_COUNT) { @@ -184,27 +183,22 @@ bool Network::DeSerialize(bool swap, TFile* fp) { training_ = data == TS_ENABLED ? TS_ENABLED : TS_DISABLED; if (fp->FRead(&data, sizeof(data), 1) != 1) return false; needs_to_backprop_ = data != 0; - if (fp->FRead(&network_flags_, sizeof(network_flags_), 1) != 1) return false; - if (fp->FRead(&ni_, sizeof(ni_), 1) != 1) return false; - if (fp->FRead(&no_, sizeof(no_), 1) != 1) return false; - if (fp->FRead(&num_weights_, sizeof(num_weights_), 1) != 1) return false; - if (!name_.DeSerialize(swap, fp)) return false; - if (swap) { - ReverseN(&network_flags_, sizeof(network_flags_)); - ReverseN(&ni_, sizeof(ni_)); - ReverseN(&no_, sizeof(no_)); - ReverseN(&num_weights_, sizeof(num_weights_)); - } + if (fp->FReadEndian(&network_flags_, sizeof(network_flags_), 1) != 1) + return false; + if (fp->FReadEndian(&ni_, sizeof(ni_), 1) != 1) return false; + if (fp->FReadEndian(&no_, sizeof(no_), 1) != 1) return false; + if (fp->FReadEndian(&num_weights_, sizeof(num_weights_), 1) != 1) + return false; + if (!name_.DeSerialize(fp)) return false; return true; } // Reads from the given file. Returns NULL in case of error. -// If swap is true, assumes a big/little-endian swap is needed. // Determines the type of the serialized class and calls its DeSerialize // on a new object of the appropriate type, which is returned. -Network* Network::CreateFromFile(bool swap, TFile* fp) { +Network* Network::CreateFromFile(TFile* fp) { Network stub; - if (!stub.DeSerialize(swap, fp)) return NULL; + if (!stub.DeSerialize(fp)) return NULL; Network* network = NULL; switch (stub.type_) { case NT_CONVOLVE: @@ -269,7 +263,7 @@ Network* Network::CreateFromFile(bool swap, TFile* fp) { network->needs_to_backprop_ = stub.needs_to_backprop_; network->network_flags_ = stub.network_flags_; network->num_weights_ = stub.num_weights_; - if (!network->DeSerialize(swap, fp)) { + if (!network->DeSerialize(fp)) { delete network; return NULL; } diff --git a/lstm/network.h b/lstm/network.h index db38b1821..951af3fb3 100644 --- a/lstm/network.h +++ b/lstm/network.h @@ -208,9 +208,8 @@ class Network { // Should be overridden by subclasses, but called by their Serialize. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. // Should be overridden by subclasses, but NOT called by their DeSerialize. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Updates the weights using the given learning rate and momentum. // num_samples is the quotient to be used in the adagrad computation iff @@ -223,10 +222,9 @@ class Network { double* changed) const {} // Reads from the given file. Returns NULL in case of error. - // If swap is true, assumes a big/little-endian swap is needed. // Determines the type of the serialized class and calls its DeSerialize // on a new object of the appropriate type, which is returned. - static Network* CreateFromFile(bool swap, TFile* fp); + static Network* CreateFromFile(TFile* fp); // Runs forward propagation of activations on the input line. // Note that input and output are both 2-d arrays. diff --git a/lstm/plumbing.cpp b/lstm/plumbing.cpp index bfb582541..1f40093e0 100644 --- a/lstm/plumbing.cpp +++ b/lstm/plumbing.cpp @@ -187,19 +187,18 @@ bool Plumbing::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool Plumbing::DeSerialize(bool swap, TFile* fp) { +bool Plumbing::DeSerialize(TFile* fp) { stack_.truncate(0); no_ = 0; // We will be modifying this as we AddToStack. inT32 size; - if (fp->FRead(&size, sizeof(size), 1) != 1) return false; + if (fp->FReadEndian(&size, sizeof(size), 1) != 1) return false; for (int i = 0; i < size; ++i) { - Network* network = CreateFromFile(swap, fp); + Network* network = CreateFromFile(fp); if (network == NULL) return false; AddToStack(network); } if ((network_flags_ & NF_LAYER_SPECIFIC_LR) && - !learning_rates_.DeSerialize(swap, fp)) { + !learning_rates_.DeSerialize(fp)) { return false; } return true; diff --git a/lstm/plumbing.h b/lstm/plumbing.h index bda855e09..b56648240 100644 --- a/lstm/plumbing.h +++ b/lstm/plumbing.h @@ -116,8 +116,7 @@ class Plumbing : public Network { // Writes to the given file. Returns false in case of error. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Updates the weights using the given learning rate and momentum. // num_samples is the quotient to be used in the adagrad computation iff diff --git a/lstm/reconfig.cpp b/lstm/reconfig.cpp index aa5e01b92..9c29d4516 100644 --- a/lstm/reconfig.cpp +++ b/lstm/reconfig.cpp @@ -59,14 +59,9 @@ bool Reconfig::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool Reconfig::DeSerialize(bool swap, TFile* fp) { - if (fp->FRead(&x_scale_, sizeof(x_scale_), 1) != 1) return false; - if (fp->FRead(&y_scale_, sizeof(y_scale_), 1) != 1) return false; - if (swap) { - ReverseN(&x_scale_, sizeof(x_scale_)); - ReverseN(&y_scale_, sizeof(y_scale_)); - } +bool Reconfig::DeSerialize(TFile* fp) { + if (fp->FReadEndian(&x_scale_, sizeof(x_scale_), 1) != 1) return false; + if (fp->FReadEndian(&y_scale_, sizeof(y_scale_), 1) != 1) return false; no_ = ni_ * x_scale_ * y_scale_; return true; } diff --git a/lstm/reconfig.h b/lstm/reconfig.h index 4409cf0a4..351b7066d 100644 --- a/lstm/reconfig.h +++ b/lstm/reconfig.h @@ -57,8 +57,7 @@ class Reconfig : public Network { // Writes to the given file. Returns false in case of error. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/tfnetwork.cpp b/lstm/tfnetwork.cpp index 13d487a42..abc8ba4a1 100644 --- a/lstm/tfnetwork.cpp +++ b/lstm/tfnetwork.cpp @@ -53,11 +53,10 @@ bool TFNetwork::Serialize(TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. // Should be overridden by subclasses, but NOT called by their DeSerialize. -bool TFNetwork::DeSerialize(bool swap, TFile* fp) { +bool TFNetwork::DeSerialize(TFile* fp) { GenericVector data; - if (!data.DeSerialize(swap, fp)) return false; + if (!data.DeSerialize(fp)) return false; if (!model_proto_.ParseFromArray(&data[0], data.size())) { return false; } diff --git a/lstm/tfnetwork.h b/lstm/tfnetwork.h index 749706cd4..e796f00fa 100644 --- a/lstm/tfnetwork.h +++ b/lstm/tfnetwork.h @@ -59,9 +59,8 @@ class TFNetwork : public Network { // Should be overridden by subclasses, but called by their Serialize. virtual bool Serialize(TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. // Should be overridden by subclasses, but NOT called by their DeSerialize. - virtual bool DeSerialize(bool swap, TFile* fp); + virtual bool DeSerialize(TFile* fp); // Runs forward propagation of activations on the input line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/weightmatrix.cpp b/lstm/weightmatrix.cpp index 477de4669..77b8d824f 100644 --- a/lstm/weightmatrix.cpp +++ b/lstm/weightmatrix.cpp @@ -121,22 +121,22 @@ bool WeightMatrix::Serialize(bool training, TFile* fp) const { } // Reads from the given file. Returns false in case of error. -// If swap is true, assumes a big/little-endian swap is needed. -bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) { + +bool WeightMatrix::DeSerialize(bool training, TFile* fp) { uinT8 mode = 0; if (fp->FRead(&mode, sizeof(mode), 1) != 1) return false; int_mode_ = (mode & kInt8Flag) != 0; use_ada_grad_ = (mode & kAdaGradFlag) != 0; - if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, swap, fp); + if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, fp); if (int_mode_) { - if (!wi_.DeSerialize(swap, fp)) return false; - if (!scales_.DeSerialize(swap, fp)) return false; + if (!wi_.DeSerialize(fp)) return false; + if (!scales_.DeSerialize(fp)) return false; } else { - if (!wf_.DeSerialize(swap, fp)) return false; + if (!wf_.DeSerialize(fp)) return false; if (training) { InitBackward(use_ada_grad_); - if (!updates_.DeSerialize(swap, fp)) return false; - if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(swap, fp)) return false; + if (!updates_.DeSerialize(fp)) return false; + if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(fp)) return false; } } return true; @@ -144,24 +144,24 @@ bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) { // As DeSerialize, but reads an old (float) format WeightMatrix for // backward compatibility. -bool WeightMatrix::DeSerializeOld(bool training, bool swap, TFile* fp) { +bool WeightMatrix::DeSerializeOld(bool training, TFile* fp) { GENERIC_2D_ARRAY float_array; if (int_mode_) { - if (!wi_.DeSerialize(swap, fp)) return false; + if (!wi_.DeSerialize(fp)) return false; GenericVector old_scales; - if (!old_scales.DeSerialize(swap, fp)) return false; + if (!old_scales.DeSerialize(fp)) return false; scales_.init_to_size(old_scales.size(), 0.0); for (int i = 0; i < old_scales.size(); ++i) scales_[i] = old_scales[i]; } else { - if (!float_array.DeSerialize(swap, fp)) return false; + if (!float_array.DeSerialize(fp)) return false; FloatToDouble(float_array, &wf_); } if (training) { InitBackward(use_ada_grad_); - if (!float_array.DeSerialize(swap, fp)) return false; + if (!float_array.DeSerialize(fp)) return false; FloatToDouble(float_array, &updates_); // Errs was only used in int training, which is now dead. - if (!float_array.DeSerialize(swap, fp)) return false; + if (!float_array.DeSerialize(fp)) return false; } return true; } diff --git a/lstm/weightmatrix.h b/lstm/weightmatrix.h index 635c66188..e1b04c37d 100644 --- a/lstm/weightmatrix.h +++ b/lstm/weightmatrix.h @@ -97,11 +97,10 @@ class WeightMatrix { // Writes to the given file. Returns false in case of error. bool Serialize(bool training, TFile* fp) const; // Reads from the given file. Returns false in case of error. - // If swap is true, assumes a big/little-endian swap is needed. - bool DeSerialize(bool training, bool swap, TFile* fp); + bool DeSerialize(bool training, TFile* fp); // As DeSerialize, but reads an old (float) format WeightMatrix for // backward compatibility. - bool DeSerializeOld(bool training, bool swap, TFile* fp); + bool DeSerializeOld(bool training, TFile* fp); // Computes matrix.vector v = Wu. // u is of size W.dim2() - 1 and the output v is of size W.dim1(). diff --git a/training/commontraining.cpp b/training/commontraining.cpp index cd6dd75cb..0d32daf28 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -119,7 +119,7 @@ ShapeTable* LoadShapeTable(const STRING& file_prefix) { TFile shape_fp; if (shape_fp.Open(shape_table_file.string(), nullptr)) { shape_table = new ShapeTable; - if (!shape_table->DeSerialize(false, &shape_fp)) { + if (!shape_table->DeSerialize(&shape_fp)) { delete shape_table; shape_table = nullptr; tprintf("Error: Failed to read shape table %s\n", diff --git a/training/lstmtester.cpp b/training/lstmtester.cpp index df37ebd7e..f0eaa697b 100644 --- a/training/lstmtester.cpp +++ b/training/lstmtester.cpp @@ -42,8 +42,7 @@ bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) { // loaded. bool LSTMTester::LoadAllEvalData(const GenericVector& filenames) { test_data_.Clear(); - bool result = - test_data_.LoadDocuments(filenames, "eng", CS_SEQUENTIAL, nullptr); + bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr); total_pages_ = test_data_.TotalPages(); return result; }