Final part of endian improvement. Adds big-endian support to lstm and fixes issue 518

This commit is contained in:
Ray Smith 2017-05-03 16:09:44 -07:00
parent 6ac31dcbdd
commit 8e79297dce
50 changed files with 299 additions and 376 deletions

View File

@ -49,7 +49,7 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
DocumentData images(lstmf_name);
if (applybox_page > 0) {
// Load existing document for the previous pages.
if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) {
if (!images.LoadDocument(lstmf_name.string(), 0, 0, nullptr)) {
tprintf("Failed to read training data from %s!\n", lstmf_name.string());
return;
}

View File

@ -188,13 +188,9 @@ bool Tesseract::init_tesseract_lang_data(
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (mgr->swap()) {
tprintf("Error: LSTM requested on big-endian hardware!!\n");
tprintf("Big-endian not yet supported! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
lstm_recognizer_ = new LSTMRecognizer;
ASSERT_HOST(lstm_recognizer_->DeSerialize(mgr->swap(), &fp));
ASSERT_HOST(lstm_recognizer_->DeSerialize(&fp));
if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr);
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");

View File

@ -31,9 +31,9 @@ bool FontInfo::Serialize(FILE* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool FontInfo::DeSerialize(bool swap, TFile* fp) {
if (!read_info(fp, this, swap)) return false;
if (!read_spacing_info(fp, this, swap)) return false;
bool FontInfo::DeSerialize(TFile* fp) {
if (!read_info(fp, this)) return false;
if (!read_spacing_info(fp, this)) return false;
return true;
}
@ -51,9 +51,9 @@ bool FontInfoTable::Serialize(FILE* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool FontInfoTable::DeSerialize(bool swap, TFile* fp) {
bool FontInfoTable::DeSerialize(TFile* fp) {
truncate(0);
return this->DeSerializeClasses(swap, fp);
return this->DeSerializeClasses(fp);
}
// Returns true if the given set of fonts includes one with the same
@ -149,14 +149,14 @@ void FontSetDeleteCallback(FontSet fs) {
/*---------------------------------------------------------------------------*/
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(TFile* f, FontInfo* fi, bool swap) {
bool read_info(TFile* f, FontInfo* fi) {
inT32 size;
if (f->FReadEndian(&size, sizeof(size), 1, swap) != 1) return false;
if (f->FReadEndian(&size, sizeof(size), 1) != 1) return false;
char* font_name = new char[size + 1];
fi->name = font_name;
if (f->FRead(font_name, sizeof(*font_name), size) != size) return false;
font_name[size] = '\0';
if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1, swap) != 1)
if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1) != 1)
return false;
return true;
}
@ -170,19 +170,17 @@ bool write_info(FILE* f, const FontInfo& fi) {
return true;
}
bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
bool read_spacing_info(TFile* f, FontInfo* fi) {
inT32 vec_size, kern_size;
if (f->FReadEndian(&vec_size, sizeof(vec_size), 1, swap) != 1) return false;
if (f->FReadEndian(&vec_size, sizeof(vec_size), 1) != 1) return false;
ASSERT_HOST(vec_size >= 0);
if (vec_size == 0) return true;
fi->init_spacing(vec_size);
for (int i = 0; i < vec_size; ++i) {
FontSpacingInfo *fs = new FontSpacingInfo();
if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, swap) !=
1 ||
f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, swap) !=
1 ||
f->FReadEndian(&kern_size, sizeof(kern_size), 1, swap) != 1) {
if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1) != 1 ||
f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1) != 1 ||
f->FReadEndian(&kern_size, sizeof(kern_size), 1) != 1) {
delete fs;
return false;
}
@ -190,8 +188,8 @@ bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
delete fs;
continue;
}
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) ||
!fs->kerned_x_gaps.DeSerialize(swap, f))) {
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) ||
!fs->kerned_x_gaps.DeSerialize(f))) {
delete fs;
return false;
}
@ -229,11 +227,10 @@ bool write_spacing_info(FILE* f, const FontInfo& fi) {
return true;
}
bool read_set(TFile* f, FontSet* fs, bool swap) {
if (f->FReadEndian(&fs->size, sizeof(fs->size), 1, swap) != 1) return false;
bool read_set(TFile* f, FontSet* fs) {
if (f->FReadEndian(&fs->size, sizeof(fs->size), 1) != 1) return false;
fs->configs = new int[fs->size];
if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size, swap) !=
fs->size)
if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size) != fs->size)
return false;
return true;
}

View File

@ -67,7 +67,7 @@ struct FontInfo {
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// Reserves unicharset_size spots in spacing_vec.
void init_spacing(int unicharset_size) {
@ -152,7 +152,7 @@ class FontInfoTable : public GenericVector<FontInfo> {
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// Returns true if the given set of fonts includes one with the same
// properties as font_id.
@ -177,11 +177,11 @@ void FontInfoDeleteCallback(FontInfo f);
void FontSetDeleteCallback(FontSet fs);
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
bool read_info(TFile* f, FontInfo* fi, bool swap);
bool read_info(TFile* f, FontInfo* fi);
bool write_info(FILE* f, const FontInfo& fi);
bool read_spacing_info(TFile* f, FontInfo* fi, bool swap);
bool read_spacing_info(TFile* f, FontInfo* fi);
bool write_spacing_info(FILE* f, const FontInfo& fi);
bool read_set(TFile* f, FontSet* fs, bool swap);
bool read_set(TFile* f, FontSet* fs);
bool write_set(FILE* f, const FontSet& fs);
} // namespace tesseract.

View File

@ -166,6 +166,7 @@ bool ImageData::Serialize(TFile* fp) const {
if (!imagefilename_.Serialize(fp)) return false;
if (fp->FWrite(&page_number_, sizeof(page_number_), 1) != 1) return false;
if (!image_data_.Serialize(fp)) return false;
if (!language_.Serialize(fp)) return false;
if (!transcription_.Serialize(fp)) return false;
// WARNING: Will not work across different endian machines.
if (!boxes_.Serialize(fp)) return false;
@ -177,15 +178,16 @@ bool ImageData::Serialize(TFile* fp) const {
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool ImageData::DeSerialize(bool swap, TFile* fp) {
if (!imagefilename_.DeSerialize(swap, fp)) return false;
if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false;
if (swap) ReverseN(&page_number_, sizeof(page_number_));
if (!image_data_.DeSerialize(swap, fp)) return false;
if (!transcription_.DeSerialize(swap, fp)) return false;
bool ImageData::DeSerialize(TFile* fp) {
if (!imagefilename_.DeSerialize(fp)) return false;
if (fp->FReadEndian(&page_number_, sizeof(page_number_), 1) != 1)
return false;
if (!image_data_.DeSerialize(fp)) return false;
if (!language_.DeSerialize(fp)) return false;
if (!transcription_.DeSerialize(fp)) return false;
// WARNING: Will not work across different endian machines.
if (!boxes_.DeSerialize(swap, fp)) return false;
if (!box_texts_.DeSerializeClasses(swap, fp)) return false;
if (!boxes_.DeSerialize(fp)) return false;
if (!box_texts_.DeSerializeClasses(fp)) return false;
inT8 vertical = 0;
if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false;
vertical_text_ = vertical != 0;
@ -193,14 +195,15 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) {
}
// As DeSerialize, but only seeks past the data - hence a static method.
bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
if (!STRING::SkipDeSerialize(swap, fp)) return false;
bool ImageData::SkipDeSerialize(TFile* fp) {
if (!STRING::SkipDeSerialize(fp)) return false;
inT32 page_number;
if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
if (!STRING::SkipDeSerialize(swap, fp)) return false;
if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
if (!GenericVector<char>::SkipDeSerialize(fp)) return false;
if (!STRING::SkipDeSerialize(fp)) return false;
if (!STRING::SkipDeSerialize(fp)) return false;
if (!GenericVector<TBOX>::SkipDeSerialize(fp)) return false;
if (!GenericVector<STRING>::SkipDeSerializeClasses(fp)) return false;
inT8 vertical = 0;
return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
}
@ -384,21 +387,19 @@ DocumentData::~DocumentData() {
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
bool DocumentData::LoadDocument(const char* filename, const char* lang,
int start_page, inT64 max_memory,
FileReader reader) {
SetDocument(filename, lang, max_memory, reader);
bool DocumentData::LoadDocument(const char* filename, int start_page,
inT64 max_memory, FileReader reader) {
SetDocument(filename, max_memory, reader);
pages_offset_ = start_page;
return ReCachePages();
}
// Sets up the document, without actually loading it.
void DocumentData::SetDocument(const char* filename, const char* lang,
inT64 max_memory, FileReader reader) {
void DocumentData::SetDocument(const char* filename, inT64 max_memory,
FileReader reader) {
SVAutoLock lock_p(&pages_mutex_);
SVAutoLock lock(&general_mutex_);
document_name_ = filename;
lang_ = lang;
pages_offset_ = -1;
max_memory_ = max_memory;
reader_ = reader;
@ -522,7 +523,7 @@ bool DocumentData::ReCachePages() {
pages_.truncate(0);
TFile fp;
if (!fp.Open(document_name_, reader_) ||
!PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
!PointerVector<ImageData>::DeSerializeSize(&fp, &loaded_pages) ||
loaded_pages <= 0) {
tprintf("Deserialize header failed: %s\n", document_name_.string());
return false;
@ -534,15 +535,17 @@ bool DocumentData::ReCachePages() {
for (page = 0; page < loaded_pages; ++page) {
if (page < pages_offset_ ||
(max_memory_ > 0 && memory_used() > max_memory_)) {
if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
if (!PointerVector<ImageData>::DeSerializeSkip(&fp)) {
tprintf("Deserializeskip failed\n");
break;
}
} else {
if (!pages_.DeSerializeElement(false, &fp)) break;
if (!pages_.DeSerializeElement(&fp)) break;
ImageData* image_data = pages_.back();
if (image_data->imagefilename().length() == 0) {
image_data->set_imagefilename(document_name_);
image_data->set_page_number(page);
}
image_data->set_language(lang_);
set_memory_used(memory_used() + image_data->MemoryUsed());
}
}
@ -567,7 +570,6 @@ DocumentCache::~DocumentCache() {}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
const char* lang,
CachingStrategy cache_strategy,
FileReader reader) {
cache_strategy_ = cache_strategy;
@ -580,7 +582,7 @@ bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
for (int arg = 0; arg < filenames.size(); ++arg) {
STRING filename = filenames[arg];
DocumentData* document = new DocumentData(filename);
document->SetDocument(filename.string(), lang, fair_share_memory, reader);
document->SetDocument(filename.string(), fair_share_memory, reader);
AddToCache(document);
}
if (!documents_.empty()) {

View File

@ -116,10 +116,9 @@ class ImageData {
// Writes to the given file. Returns false in case of error.
bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
static bool SkipDeSerialize(tesseract::TFile* fp);
// Other accessors.
const STRING& imagefilename() const {
@ -210,11 +209,10 @@ class DocumentData {
// Reads all the pages in the given lstmf filename to the cache. The reader
// is used to read the file.
bool LoadDocument(const char* filename, const char* lang, int start_page,
inT64 max_memory, FileReader reader);
bool LoadDocument(const char* filename, int start_page, inT64 max_memory,
FileReader reader);
// Sets up the document, without actually loading it.
void SetDocument(const char* filename, const char* lang, inT64 max_memory,
FileReader reader);
void SetDocument(const char* filename, inT64 max_memory, FileReader reader);
// Writes all the pages to the given filename. Returns false on error.
bool SaveDocument(const char* filename, FileWriter writer);
bool SaveToBuffer(GenericVector<char>* buffer);
@ -286,8 +284,6 @@ class DocumentData {
private:
// A name for this document.
STRING document_name_;
// The language of this document.
STRING lang_;
// A group of pages that corresponds in some loose way to a document.
PointerVector<ImageData> pages_;
// Page number of the first index in pages_.
@ -325,7 +321,7 @@ class DocumentCache {
}
// Adds all the documents in the list of filenames, counting memory.
// The reader is used to read the files.
bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
bool LoadDocuments(const GenericVector<STRING>& filenames,
CachingStrategy cache_strategy, FileReader reader);
// Adds document to the cache.

View File

@ -164,16 +164,11 @@ class GENERIC_2D_ARRAY {
}
return true;
}
bool DeSerialize(bool swap, tesseract::TFile* fp) {
if (!DeSerializeSize(swap, fp)) return false;
if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false;
if (swap) ReverseN(&empty_, sizeof(empty_));
bool DeSerialize(tesseract::TFile* fp) {
if (!DeSerializeSize(fp)) return false;
if (fp->FReadEndian(&empty_, sizeof(empty_), 1) != 1) return false;
int size = num_elements();
if (fp->FRead(array_, sizeof(*array_), size) != size) return false;
if (swap) {
for (int i = 0; i < size; ++i)
ReverseN(&array_[i], sizeof(array_[i]));
}
if (fp->FReadEndian(array_, sizeof(*array_), size) != size) return false;
return true;
}
@ -487,14 +482,10 @@ class GENERIC_2D_ARRAY {
Resize(size1, size2, empty_);
return true;
}
bool DeSerializeSize(bool swap, tesseract::TFile* fp) {
bool DeSerializeSize(tesseract::TFile* fp) {
inT32 size1, size2;
if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false;
if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false;
if (swap) {
ReverseN(&size1, sizeof(size1));
ReverseN(&size2, sizeof(size2));
}
if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false;
if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false;
Resize(size1, size2, empty_);
return true;
}

View File

@ -163,8 +163,7 @@ class GenericVector {
// DEPRECATED. Use [De]Serialize[Classes] instead.
bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
bool read(tesseract::TFile* f,
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb,
bool swap);
TessResultCallback2<bool, tesseract::TFile*, T*>* cb);
// Writes a vector of simple types to the given file. Assumes that bitwise
// read/write of T will work. Returns false in case of error.
// TODO(rays) Change all callers to use TFile and remove deprecated methods.
@ -174,10 +173,11 @@ class GenericVector {
// read/write will work with ReverseN according to sizeof(T).
// Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// TFile is assumed to know about swapping.
bool DeSerialize(bool swap, FILE* fp);
bool DeSerialize(bool swap, tesseract::TFile* fp);
bool DeSerialize(tesseract::TFile* fp);
// Skips the deserialization of the vector.
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
static bool SkipDeSerialize(tesseract::TFile* fp);
// Writes a vector of classes to the given file. Assumes the existence of
// bool T::Serialize(FILE* fp) const that returns false in case of error.
// Returns false in case of error.
@ -189,9 +189,9 @@ class GenericVector {
// this function. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerializeClasses(bool swap, FILE* fp);
bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
bool DeSerializeClasses(tesseract::TFile* fp);
// Calls SkipDeSerialize on the elements of the vector.
static bool SkipDeSerializeClasses(bool swap, tesseract::TFile* fp);
static bool SkipDeSerializeClasses(tesseract::TFile* fp);
// Allocates a new array of double the current_size, copies over the
// information from data to the new location, deletes data and returns
@ -569,13 +569,13 @@ class PointerVector : public GenericVector<T*> {
}
return true;
}
bool DeSerialize(bool swap, TFile* fp) {
bool DeSerialize(TFile* fp) {
inT32 reserved;
if (!DeSerializeSize(swap, fp, &reserved)) return false;
if (!DeSerializeSize(fp, &reserved)) return false;
GenericVector<T*>::reserve(reserved);
truncate(0);
for (int i = 0; i < reserved; ++i) {
if (!DeSerializeElement(swap, fp)) return false;
if (!DeSerializeElement(fp)) return false;
}
return true;
}
@ -583,19 +583,17 @@ class PointerVector : public GenericVector<T*> {
// retain the integrity of the stream, the caller must call some combination
// of DeSerializeElement and DeSerializeSkip of the exact number returned in
// *size, assuming a true return.
static bool DeSerializeSize(bool swap, TFile* fp, inT32* size) {
if (fp->FRead(size, sizeof(*size), 1) != 1) return false;
if (swap) Reverse32(size);
return true;
static bool DeSerializeSize(TFile* fp, inT32* size) {
return fp->FReadEndian(size, sizeof(*size), 1) == 1;
}
// Reads and appends to the vector the next element of the serialization.
bool DeSerializeElement(bool swap, TFile* fp) {
bool DeSerializeElement(TFile* fp) {
inT8 non_null;
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
T* item = NULL;
if (non_null) {
item = new T;
if (!item->DeSerialize(swap, fp)) {
if (!item->DeSerialize(fp)) {
delete item;
return false;
}
@ -607,11 +605,11 @@ class PointerVector : public GenericVector<T*> {
return true;
}
// Skips the next element of the serialization.
static bool DeSerializeSkip(bool swap, TFile* fp) {
static bool DeSerializeSkip(TFile* fp) {
inT8 non_null;
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
if (non_null) {
if (!T::SkipDeSerialize(swap, fp)) return false;
if (!T::SkipDeSerialize(fp)) return false;
}
return true;
}
@ -889,23 +887,21 @@ bool GenericVector<T>::write(
template <typename T>
bool GenericVector<T>::read(
tesseract::TFile* f,
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb, bool swap) {
tesseract::TFile* f, TessResultCallback2<bool, tesseract::TFile*, T*>* cb) {
inT32 reserved;
if (f->FReadEndian(&reserved, sizeof(reserved), 1, swap) != 1) return false;
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
reserve(reserved);
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1, swap) != 1)
return false;
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false;
if (cb != NULL) {
for (int i = 0; i < size_used_; ++i) {
if (!cb->Run(f, data_ + i, swap)) {
if (!cb->Run(f, data_ + i)) {
delete cb;
return false;
}
}
delete cb;
} else {
if (f->FReadEndian(data_, sizeof(T), size_used_, swap) != size_used_)
if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_)
return false;
}
return true;
@ -945,24 +941,17 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
return true;
}
template <typename T>
bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) {
bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
inT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
reserve(reserved);
size_used_ = reserved;
if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
if (swap) {
for (int i = 0; i < size_used_; ++i)
ReverseN(&data_[i], sizeof(data_[i]));
}
return true;
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
}
template <typename T>
bool GenericVector<T>::SkipDeSerialize(bool swap, tesseract::TFile* fp) {
bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
inT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
return fp->FRead(NULL, sizeof(T), reserved) == reserved;
}
@ -1004,24 +993,22 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
return true;
}
template <typename T>
bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) {
bool GenericVector<T>::DeSerializeClasses(tesseract::TFile* fp) {
uinT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
T empty;
init_to_size(reserved, empty);
for (int i = 0; i < reserved; ++i) {
if (!data_[i].DeSerialize(swap, fp)) return false;
if (!data_[i].DeSerialize(fp)) return false;
}
return true;
}
template <typename T>
bool GenericVector<T>::SkipDeSerializeClasses(bool swap, tesseract::TFile* fp) {
bool GenericVector<T>::SkipDeSerializeClasses(tesseract::TFile* fp) {
uinT32 reserved;
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
if (swap) Reverse32(&reserved);
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
for (int i = 0; i < reserved; ++i) {
if (!T::SkipDeSerialize(swap, fp)) return false;
if (!T::SkipDeSerialize(fp)) return false;
}
return true;
}

View File

@ -24,8 +24,11 @@
namespace tesseract {
TFile::TFile()
: offset_(0), data_(NULL), data_is_owned_(false), is_writing_(false) {
}
: offset_(0),
data_(NULL),
data_is_owned_(false),
is_writing_(false),
swap_(false) {}
TFile::~TFile() {
if (data_is_owned_)
@ -39,6 +42,7 @@ bool TFile::Open(const STRING& filename, FileReader reader) {
}
offset_ = 0;
is_writing_ = false;
swap_ = false;
if (reader == NULL)
return LoadDataFromFile(filename, data_);
else
@ -52,6 +56,7 @@ bool TFile::Open(const char* data, int size) {
data_is_owned_ = true;
}
is_writing_ = false;
swap_ = false;
data_->init_to_size(size, 0);
memcpy(&(*data_)[0], data, size);
return true;
@ -69,6 +74,7 @@ bool TFile::Open(FILE* fp, inT64 end_offset) {
}
int size = end_offset - current_pos;
is_writing_ = false;
swap_ = false;
if (!data_is_owned_) {
data_ = new GenericVector<char>;
data_is_owned_ = true;
@ -88,9 +94,9 @@ char* TFile::FGets(char* buffer, int buffer_size) {
return size > 0 ? buffer : NULL;
}
int TFile::FReadEndian(void* buffer, int size, int count, bool swap) {
int TFile::FReadEndian(void* buffer, int size, int count) {
int num_read = FRead(buffer, size, count);
if (swap) {
if (swap_) {
char* char_buffer = reinterpret_cast<char*>(buffer);
for (int i = 0; i < num_read; ++i, char_buffer += size) {
ReverseN(char_buffer, size);
@ -128,6 +134,7 @@ void TFile::OpenWrite(GenericVector<char>* data) {
data_is_owned_ = true;
}
is_writing_ = true;
swap_ = false;
data_->truncate(0);
}

View File

@ -61,6 +61,8 @@ class TFile {
bool Open(const char* data, int size);
// From an open file and an end offset.
bool Open(FILE* fp, inT64 end_offset);
// Sets the value of the swap flag, so that FReadEndian does the right thing.
void set_swap(bool value) { swap_ = value; }
// Reads a line like fgets. Returns NULL on EOF, otherwise buffer.
// Reads at most buffer_size bytes, including '\0' terminator, even if
@ -68,9 +70,9 @@ class TFile {
// To use fscanf use FGets and sscanf.
char* FGets(char* buffer, int buffer_size);
// Replicates fread, followed by a swap of the bytes if needed, returning the
// number of items read. If swap is true then the count items will each have
// number of items read. If swap_ is true then the count items will each have
// size bytes reversed.
int FReadEndian(void* buffer, int size, int count, bool swap);
int FReadEndian(void* buffer, int size, int count);
// Replicates fread, returning the number of items read.
int FRead(void* buffer, int size, int count);
// Resets the TFile as if it has been Opened, but nothing read.
@ -96,6 +98,8 @@ class TFile {
bool data_is_owned_;
// True if the TFile is open for writing.
bool is_writing_;
// True if bytes need to be swapped in FReadEndian.
bool swap_;
};
} // namespace tesseract.

View File

@ -171,21 +171,18 @@ bool STRING::DeSerialize(bool swap, FILE* fp) {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool STRING::DeSerialize(bool swap, TFile* fp) {
bool STRING::DeSerialize(TFile* fp) {
inT32 len;
if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
if (swap)
ReverseN(&len, sizeof(len));
if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false;
truncate_at(len);
if (fp->FRead(GetCStr(), 1, len) != len) return false;
return true;
}
// As DeSerialize, but only seeks past the data - hence a static method.
bool STRING::SkipDeSerialize(bool swap, tesseract::TFile* fp) {
bool STRING::SkipDeSerialize(tesseract::TFile* fp) {
inT32 len;
if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
if (swap) ReverseN(&len, sizeof(len));
if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false;
return fp->FRead(NULL, 1, len) == len;
}

View File

@ -59,9 +59,9 @@ class TESS_API STRING
bool Serialize(tesseract::TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, tesseract::TFile* fp);
bool DeSerialize(tesseract::TFile* fp);
// As DeSerialize, but only seeks past the data - hence a static method.
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
static bool SkipDeSerialize(tesseract::TFile* fp);
BOOL8 contains(const char c) const;
inT32 length() const;

View File

@ -59,11 +59,12 @@ bool TessdataManager::LoadMemBuffer(const char *name, const char *data,
inT32 num_entries = TESSDATA_NUM_ENTRIES;
if (fp.FRead(&num_entries, sizeof(num_entries), 1) != 1) return false;
swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0;
fp.set_swap(swap_);
if (swap_) ReverseN(&num_entries, sizeof(num_entries));
GenericVector<inT64> offset_table;
offset_table.init_to_size(num_entries, -1);
if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries,
swap_) != num_entries)
if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries) !=
num_entries)
return false;
for (int i = 0; i < num_entries && i < TESSDATA_NUM_ENTRIES; ++i) {
if (offset_table[i] >= 0) {
@ -152,6 +153,7 @@ bool TessdataManager::GetComponent(TessdataType type, TFile *fp) {
if (!is_loaded_ && !Init(data_file_name_.string())) return false;
if (entries_[type].empty()) return false;
fp->Open(&entries_[type][0], entries_[type].size());
fp->set_swap(swap_);
return true;
}

View File

@ -315,9 +315,8 @@ bool UnicharCompress::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool UnicharCompress::DeSerialize(bool swap, TFile* fp) {
if (!encoder_.DeSerializeClasses(swap, fp)) return false;
bool UnicharCompress::DeSerialize(TFile* fp) {
if (!encoder_.DeSerializeClasses(fp)) return false;
ComputeCodeRange();
SetupDecoder();
return true;

View File

@ -69,17 +69,12 @@ class RecodedCharID {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp) {
bool DeSerialize(TFile* fp) {
if (fp->FRead(&self_normalized_, sizeof(self_normalized_), 1) != 1)
return false;
if (fp->FRead(&length_, sizeof(length_), 1) != 1) return false;
if (swap) ReverseN(&length_, sizeof(length_));
if (fp->FRead(code_, sizeof(code_[0]), length_) != length_) return false;
if (swap) {
for (int i = 0; i < length_; ++i) {
ReverseN(&code_[i], sizeof(code_[i]));
}
}
if (fp->FReadEndian(&length_, sizeof(length_), 1) != 1) return false;
if (fp->FReadEndian(code_, sizeof(code_[0]), length_) != length_)
return false;
return true;
}
bool operator==(const RecodedCharID& other) const {
@ -205,8 +200,8 @@ class UnicharCompress {
// Writes to the given file. Returns false in case of error.
bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// Returns a STRING containing a text file that describes the encoding thus:
// <index>[,<index>]*<tab><UTF8-str><newline>

View File

@ -86,10 +86,8 @@ class UnicityTable {
/// once. The given callback will be deleted at the end.
/// Returns false on read/write error.
bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
/// swap is used to switch the endianness.
bool read(tesseract::TFile* f,
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb,
bool swap);
TessResultCallback2<bool, tesseract::TFile*, T*>* cb);
private:
GenericVector<T> table_;
@ -196,9 +194,8 @@ bool UnicityTable<T>::write(
template <typename T>
bool UnicityTable<T>::read(
tesseract::TFile* f,
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb, bool swap) {
return table_.read(f, cb, swap);
tesseract::TFile* f, TessResultCallback2<bool, tesseract::TFile*, T*>* cb) {
return table_.read(f, cb);
}
// This method clear the current object, then, does a shallow copy of

View File

@ -365,7 +365,7 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) {
fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
/* then read in the basic integer templates */
Templates->Templates = ReadIntTemplates(false, fp);
Templates->Templates = ReadIntTemplates(fp);
/* then read in the adaptive info for each class */
for (i = 0; i < (Templates->Templates)->NumClasses; i++) {

View File

@ -535,11 +535,11 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) {
if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
TFile fp;
ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
PreTrainedTemplates = ReadIntTemplates(mgr->swap(), &fp);
PreTrainedTemplates = ReadIntTemplates(&fp);
if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
shape_table_ = new ShapeTable(unicharset);
if (!shape_table_->DeSerialize(mgr->swap(), &fp)) {
if (!shape_table_->DeSerialize(&fp)) {
tprintf("Error loading shape table!\n");
delete shape_table_;
shape_table_ = NULL;
@ -547,7 +547,7 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) {
}
ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
ReadNewCutoffs(&fp, mgr->swap(), CharNormCutoffs);
ReadNewCutoffs(&fp, CharNormCutoffs);
ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
NormProtos = ReadNormProtos(&fp);

View File

@ -103,7 +103,7 @@ class Classify : public CCStruct {
const uinT8* normalization_factors,
const uinT16* expected_num_features,
GenericVector<CP_RESULT_STRUCT>* results);
void ReadNewCutoffs(TFile* fp, bool swap, CLASS_CUTOFF_ARRAY Cutoffs);
void ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs);
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File);
@ -334,7 +334,7 @@ class Classify : public CCStruct {
uinT8* char_norm_array);
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
/* intproto.cpp *************************************************************/
INT_TEMPLATES ReadIntTemplates(bool swap, TFile* fp);
INT_TEMPLATES ReadIntTemplates(TFile* fp);
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
const UNICHARSET& target_unicharset);
CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,

View File

@ -49,15 +49,14 @@ namespace tesseract {
* @note Exceptions: none
* @note History: Wed Feb 20 09:38:26 1991, DSJ, Created.
*/
void Classify::ReadNewCutoffs(TFile* fp, bool swap,
CLASS_CUTOFF_ARRAY Cutoffs) {
void Classify::ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs) {
char Class[UNICHAR_LEN + 1];
CLASS_ID ClassId;
int Cutoff;
int i;
if (shape_table_ != NULL) {
if (!shapetable_cutoffs_.DeSerialize(swap, fp)) {
if (!shapetable_cutoffs_.DeSerialize(fp)) {
tprintf("Error during read of shapetable pffmtable!\n");
}
}

View File

@ -758,7 +758,7 @@ namespace tesseract {
* @note Exceptions: none
* @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
*/
INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) {
int i, j, w, x, y, z;
int unicharset_size;
int version_id = 0;
@ -784,18 +784,18 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
/* first read the high level template struct */
Templates = NewIntTemplates();
// Read Templates in parts for 64 bit compatibility.
if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) != 1)
if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
tprintf("Bad read of inttemp!\n");
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1,
swap) != 1 ||
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
1) != 1 ||
fp->FReadEndian(&Templates->NumClassPruners,
sizeof(Templates->NumClassPruners), 1, swap) != 1)
sizeof(Templates->NumClassPruners), 1) != 1)
tprintf("Bad read of inttemp!\n");
if (Templates->NumClasses < 0) {
// This file has a version id!
version_id = -Templates->NumClasses;
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
1, swap) != 1)
1) != 1)
tprintf("Bad read of inttemp!\n");
}
@ -805,12 +805,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
}
if (version_id < 2) {
if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size, swap) !=
if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) !=
unicharset_size) {
tprintf("Bad read of inttemp!\n");
}
if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]),
Templates->NumClasses, swap) != Templates->NumClasses) {
Templates->NumClasses) != Templates->NumClasses) {
tprintf("Bad read of inttemp!\n");
}
}
@ -820,8 +820,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
for (i = 0; i < Templates->NumClassPruners; i++) {
Pruner = new CLASS_PRUNER_STRUCT;
if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets,
swap) != kNumBuckets) {
if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) !=
kNumBuckets) {
tprintf("Bad read of inttemp!\n");
}
if (version_id < 2) {
@ -887,8 +887,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
for (i = 0; i < Templates->NumClasses; i++) {
/* first read in the high level struct for the class */
Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1, swap) !=
1 ||
if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1)
tprintf("Bad read of inttemp!\n");
@ -902,8 +901,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
}
int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
ASSERT_HOST(num_configs <= MaxNumConfigs);
if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs,
swap) != num_configs) {
if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs) !=
num_configs) {
tprintf("Bad read of inttemp!\n");
}
if (version_id < 2) {
@ -927,8 +926,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
if (fp->FReadEndian(&ProtoSet->ProtoPruner,
sizeof(ProtoSet->ProtoPruner[0][0][0]), num_buckets,
swap) != num_buckets)
sizeof(ProtoSet->ProtoPruner[0][0][0]),
num_buckets) != num_buckets)
tprintf("Bad read of inttemp!\n");
for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A),
@ -942,7 +941,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
tprintf("Bad read of inttemp!\n");
if (fp->FReadEndian(&ProtoSet->Protos[x].Configs,
sizeof(ProtoSet->Protos[x].Configs[0]),
WerdsPerConfigVec, swap) != WerdsPerConfigVec)
WerdsPerConfigVec) != WerdsPerConfigVec)
cprintf("Bad read of inttemp!\n");
}
Class->ProtoSets[j] = ProtoSet;
@ -950,7 +949,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
if (version_id < 4) {
Class->font_set_id = -1;
} else {
fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1, swap);
fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1);
}
}
@ -977,12 +976,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
}
}
if (version_id >= 4) {
this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info), swap);
this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info));
if (version_id >= 5) {
this->fontinfo_table_.read(
fp, NewPermanentTessCallback(read_spacing_info), swap);
this->fontinfo_table_.read(fp,
NewPermanentTessCallback(read_spacing_info));
}
this->fontset_table_.read(fp, NewPermanentTessCallback(read_set), swap);
this->fontset_table_.read(fp, NewPermanentTessCallback(read_set));
}
// Clean up.

View File

@ -70,11 +70,10 @@ bool UnicharAndFonts::Serialize(FILE* fp) const {
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool UnicharAndFonts::DeSerialize(bool swap, TFile* fp) {
if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1, swap) != 1)
return false;
if (!font_ids.DeSerialize(swap, fp)) return false;
bool UnicharAndFonts::DeSerialize(TFile* fp) {
if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1) != 1) return false;
if (!font_ids.DeSerialize(fp)) return false;
return true;
}
@ -94,13 +93,12 @@ bool Shape::Serialize(FILE* fp) const {
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Shape::DeSerialize(bool swap, TFile* fp) {
bool Shape::DeSerialize(TFile* fp) {
uinT8 sorted;
if (fp->FRead(&sorted, sizeof(sorted), 1) != 1) return false;
unichars_sorted_ = sorted != 0;
if (!unichars_.DeSerializeClasses(swap, fp)) return false;
return true;
return unichars_.DeSerializeClasses(fp);
}
// Adds a font_id for the given unichar_id. If the unichar_id is not
@ -250,9 +248,9 @@ bool ShapeTable::Serialize(FILE* fp) const {
return true;
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool ShapeTable::DeSerialize(bool swap, TFile* fp) {
if (!shape_table_.DeSerialize(swap, fp)) return false;
bool ShapeTable::DeSerialize(TFile* fp) {
if (!shape_table_.DeSerialize(fp)) return false;
num_fonts_ = 0;
return true;
}

View File

@ -167,8 +167,7 @@ struct UnicharAndFonts {
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
static int SortByUnicharId(const void* v1, const void* v2);
@ -190,8 +189,7 @@ class Shape {
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
int destination_index() const {
return destination_index_;
@ -271,8 +269,7 @@ class ShapeTable {
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// Accessors.
int NumShapes() const {

View File

@ -311,23 +311,24 @@ void SquishedDawg::print_edge(EDGE_REF edge) const {
bool SquishedDawg::read_squished_dawg(TFile *file) {
if (debug_level_) tprintf("Reading squished dawg\n");
// Read the magic number and if it does not match kDawgMagicNumber
// set swap to true to indicate that we need to switch endianness.
// Read the magic number and check that it matches kDawgMagicNumber, as
// auto-endian fixing should make sure it is always correct.
inT16 magic;
if (file->FRead(&magic, sizeof(inT16), 1) != 1) return false;
bool swap = (magic != kDawgMagicNumber);
if (file->FReadEndian(&magic, sizeof(magic), 1) != 1) return false;
if (magic != kDawgMagicNumber) {
tprintf("Bad magic number on dawg: %d vs %d\n", magic, kDawgMagicNumber);
return false;
}
inT32 unicharset_size;
if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) !=
1)
return false;
if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1, swap) != 1)
if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
return false;
if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1) != 1) return false;
ASSERT_HOST(num_edges_ > 0); // DAWG should not be empty
Dawg::init(unicharset_size);
edges_ = new EDGE_RECORD[num_edges_];
if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_, swap) !=
if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_) !=
num_edges_)
return false;
if (debug_level_ > 2) {

View File

@ -42,14 +42,9 @@ bool Convolve::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Convolve::DeSerialize(bool swap, TFile* fp) {
if (fp->FRead(&half_x_, sizeof(half_x_), 1) != 1) return false;
if (fp->FRead(&half_y_, sizeof(half_y_), 1) != 1) return false;
if (swap) {
ReverseN(&half_x_, sizeof(half_x_));
ReverseN(&half_y_, sizeof(half_y_));
}
bool Convolve::DeSerialize(TFile* fp) {
if (fp->FReadEndian(&half_x_, sizeof(half_x_), 1) != 1) return false;
if (fp->FReadEndian(&half_y_, sizeof(half_y_), 1) != 1) return false;
no_ = ni_ * (2*half_x_ + 1) * (2*half_y_ + 1);
return true;
}

View File

@ -47,8 +47,7 @@ class Convolve : public Network {
// Writes to the given file. Returns false in case of error.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Runs forward propagation of activations on the input line.
// See Network for a detailed discussion of the arguments.

View File

@ -94,10 +94,8 @@ bool FullyConnected::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool FullyConnected::DeSerialize(bool swap, TFile* fp) {
if (!weights_.DeSerialize(IsTraining(), swap, fp)) return false;
return true;
bool FullyConnected::DeSerialize(TFile* fp) {
return weights_.DeSerialize(IsTraining(), fp);
}
// Runs forward propagation of activations on the input line.

View File

@ -78,8 +78,7 @@ class FullyConnected : public Network {
// Writes to the given file. Returns false in case of error.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Runs forward propagation of activations on the input line.
// See Network for a detailed discussion of the arguments.

View File

@ -48,11 +48,8 @@ bool Input::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Input::DeSerialize(bool swap, TFile* fp) {
if (fp->FRead(&shape_, sizeof(shape_), 1) != 1) return false;
// TODO(rays) swaps!
return true;
bool Input::DeSerialize(TFile* fp) {
return fp->FReadEndian(&shape_, sizeof(shape_), 1) == 1;
}
// Returns an integer reduction factor that the network applies to the

View File

@ -51,9 +51,7 @@ class Input : public Network {
// Should be overridden by subclasses, but called by their Serialize.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Should be overridden by subclasses, but NOT called by their DeSerialize.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Returns an integer reduction factor that the network applies to the
// time sequence. Assumes that any 2-d is already eliminated. Used for

View File

@ -173,10 +173,9 @@ bool LSTM::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool LSTM::DeSerialize(bool swap, TFile* fp) {
if (fp->FRead(&na_, sizeof(na_), 1) != 1) return false;
if (swap) ReverseN(&na_, sizeof(na_));
bool LSTM::DeSerialize(TFile* fp) {
if (fp->FReadEndian(&na_, sizeof(na_), 1) != 1) return false;
if (type_ == NT_LSTM_SOFTMAX) {
nf_ = no_;
} else if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
@ -187,7 +186,7 @@ bool LSTM::DeSerialize(bool swap, TFile* fp) {
is_2d_ = false;
for (int w = 0; w < WT_COUNT; ++w) {
if (w == GFS && !Is2D()) continue;
if (!gate_weights_[w].DeSerialize(IsTraining(), swap, fp)) return false;
if (!gate_weights_[w].DeSerialize(IsTraining(), fp)) return false;
if (w == CI) {
ns_ = gate_weights_[CI].NumOutputs();
is_2d_ = na_ - nf_ == ni_ + 2 * ns_;
@ -195,11 +194,10 @@ bool LSTM::DeSerialize(bool swap, TFile* fp) {
}
delete softmax_;
if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) {
softmax_ =
reinterpret_cast<FullyConnected*>(Network::CreateFromFile(swap, fp));
if (softmax_ == NULL) return false;
softmax_ = reinterpret_cast<FullyConnected*>(Network::CreateFromFile(fp));
if (softmax_ == nullptr) return false;
} else {
softmax_ = NULL;
softmax_ = nullptr;
}
return true;
}

View File

@ -86,8 +86,7 @@ class LSTM : public Network {
// Writes to the given file. Returns false in case of error.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Runs forward propagation of activations on the input line.
// See Network for a detailed discussion of the arguments.

View File

@ -88,25 +88,27 @@ bool LSTMRecognizer::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) {
bool LSTMRecognizer::DeSerialize(TFile* fp) {
delete network_;
network_ = Network::CreateFromFile(swap, fp);
network_ = Network::CreateFromFile(fp);
if (network_ == NULL) return false;
if (!ccutil_.unicharset.load_from_file(fp, false)) return false;
if (!network_str_.DeSerialize(swap, fp)) return false;
if (fp->FRead(&training_flags_, sizeof(training_flags_), 1) != 1)
if (!network_str_.DeSerialize(fp)) return false;
if (fp->FReadEndian(&training_flags_, sizeof(training_flags_), 1) != 1)
return false;
if (fp->FRead(&training_iteration_, sizeof(training_iteration_), 1) != 1)
if (fp->FReadEndian(&training_iteration_, sizeof(training_iteration_), 1) !=
1)
return false;
if (fp->FRead(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
if (fp->FReadEndian(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
return false;
if (fp->FRead(&null_char_, sizeof(null_char_), 1) != 1) return false;
if (fp->FRead(&weight_range_, sizeof(weight_range_), 1) != 1) return false;
if (fp->FRead(&learning_rate_, sizeof(learning_rate_), 1) != 1) return false;
if (fp->FRead(&momentum_, sizeof(momentum_), 1) != 1) return false;
if (fp->FReadEndian(&null_char_, sizeof(null_char_), 1) != 1) return false;
if (fp->FReadEndian(&weight_range_, sizeof(weight_range_), 1) != 1)
return false;
if (fp->FReadEndian(&learning_rate_, sizeof(learning_rate_), 1) != 1)
return false;
if (fp->FReadEndian(&momentum_, sizeof(momentum_), 1) != 1) return false;
if (IsRecoding()) {
if (!recoder_.DeSerialize(swap, fp)) return false;
if (!recoder_.DeSerialize(fp)) return false;
RecodedCharID code;
recoder_.EncodeUnichar(UNICHAR_SPACE, &code);
if (code(0) != UNICHAR_SPACE) {
@ -114,7 +116,6 @@ bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) {
return false;
}
}
// TODO(rays) swaps!
network_->SetRandomizer(&randomizer_);
network_->CacheXScaleFactor(network_->XScaleFactor());
return true;

View File

@ -158,8 +158,7 @@ class LSTMRecognizer {
// Writes to the given file. Returns false in case of error.
bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, TFile* fp);
bool DeSerialize(TFile* fp);
// Loads the dictionary if possible from the traineddata file.
// Prints a warning message, and returns false but otherwise fails silently
// and continues to work without it if loading fails.

View File

@ -304,8 +304,7 @@ void LSTMTrainer::DebugNetwork() {
// loaded.
bool LSTMTrainer::LoadAllTrainingData(const GenericVector<STRING>& filenames) {
training_data_.Clear();
return training_data_.LoadDocuments(filenames, "eng", CacheStrategy(),
file_reader_);
return training_data_.LoadDocuments(filenames, CacheStrategy(), file_reader_);
}
// Keeps track of best and locally worst char error_rate and launches tests
@ -480,54 +479,54 @@ bool LSTMTrainer::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool LSTMTrainer::DeSerialize(bool swap, TFile* fp) {
if (!LSTMRecognizer::DeSerialize(swap, fp)) return false;
// NOTE: It is assumed that the trainer is never read cross-endian.
bool LSTMTrainer::DeSerialize(TFile* fp) {
if (!LSTMRecognizer::DeSerialize(fp)) return false;
if (fp->FRead(&learning_iteration_, sizeof(learning_iteration_), 1) != 1) {
// Special case. If we successfully decoded the recognizer, but fail here
// then it means we were just given a recognizer, so issue a warning and
// allow it.
tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n");
learning_iteration_ = 0;
network_->SetEnableTraining(TS_RE_ENABLE);
network_->SetEnableTraining(TS_ENABLED);
return true;
}
if (fp->FRead(&prev_sample_iteration_, sizeof(prev_sample_iteration_), 1) !=
1)
if (fp->FReadEndian(&prev_sample_iteration_, sizeof(prev_sample_iteration_),
1) != 1)
return false;
if (fp->FRead(&perfect_delay_, sizeof(perfect_delay_), 1) != 1) return false;
if (fp->FRead(&last_perfect_training_iteration_,
sizeof(last_perfect_training_iteration_), 1) != 1)
if (fp->FReadEndian(&perfect_delay_, sizeof(perfect_delay_), 1) != 1)
return false;
if (fp->FReadEndian(&last_perfect_training_iteration_,
sizeof(last_perfect_training_iteration_), 1) != 1)
return false;
for (int i = 0; i < ET_COUNT; ++i) {
if (!error_buffers_[i].DeSerialize(swap, fp)) return false;
if (!error_buffers_[i].DeSerialize(fp)) return false;
}
if (fp->FRead(&error_rates_, sizeof(error_rates_), 1) != 1) return false;
if (fp->FRead(&training_stage_, sizeof(training_stage_), 1) != 1)
if (fp->FReadEndian(&training_stage_, sizeof(training_stage_), 1) != 1)
return false;
uinT8 amount;
if (fp->FRead(&amount, sizeof(amount), 1) != 1) return false;
if (amount == LIGHT) return true; // Don't read the rest.
if (fp->FRead(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
if (fp->FReadEndian(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
return false;
if (fp->FRead(&best_error_rates_, sizeof(best_error_rates_), 1) != 1)
if (fp->FReadEndian(&best_error_rates_, sizeof(best_error_rates_), 1) != 1)
return false;
if (fp->FRead(&best_iteration_, sizeof(best_iteration_), 1) != 1)
if (fp->FReadEndian(&best_iteration_, sizeof(best_iteration_), 1) != 1)
return false;
if (fp->FRead(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1)
if (fp->FReadEndian(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1)
return false;
if (fp->FRead(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1)
if (fp->FReadEndian(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1)
return false;
if (fp->FRead(&worst_iteration_, sizeof(worst_iteration_), 1) != 1)
if (fp->FReadEndian(&worst_iteration_, sizeof(worst_iteration_), 1) != 1)
return false;
if (fp->FRead(&stall_iteration_, sizeof(stall_iteration_), 1) != 1)
return false;
if (!best_model_data_.DeSerialize(swap, fp)) return false;
if (!worst_model_data_.DeSerialize(swap, fp)) return false;
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(swap, fp))
if (fp->FReadEndian(&stall_iteration_, sizeof(stall_iteration_), 1) != 1)
return false;
if (!best_model_data_.DeSerialize(fp)) return false;
if (!worst_model_data_.DeSerialize(fp)) return false;
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false;
GenericVector<char> sub_data;
if (!sub_data.DeSerialize(swap, fp)) return false;
if (!sub_data.DeSerialize(fp)) return false;
delete sub_trainer_;
if (sub_data.empty()) {
sub_trainer_ = NULL;
@ -535,9 +534,9 @@ bool LSTMTrainer::DeSerialize(bool swap, TFile* fp) {
sub_trainer_ = new LSTMTrainer();
if (!ReadTrainingDump(sub_data, sub_trainer_)) return false;
}
if (!best_error_history_.DeSerialize(swap, fp)) return false;
if (!best_error_iterations_.DeSerialize(swap, fp)) return false;
if (fp->FRead(&improvement_steps_, sizeof(improvement_steps_), 1) != 1)
if (!best_error_history_.DeSerialize(fp)) return false;
if (!best_error_iterations_.DeSerialize(fp)) return false;
if (fp->FReadEndian(&improvement_steps_, sizeof(improvement_steps_), 1) != 1)
return false;
return true;
}
@ -925,7 +924,7 @@ bool LSTMTrainer::ReadTrainingDump(const GenericVector<char>& data,
bool LSTMTrainer::ReadSizedTrainingDump(const char* data, int size) {
TFile fp;
fp.Open(data, size);
return DeSerialize(false, &fp);
return DeSerialize(&fp);
}
// Writes the recognizer to memory, so that it can be used for testing later.
@ -943,7 +942,7 @@ LSTMRecognizer* LSTMTrainer::ReadRecognitionDump(
TFile fp;
fp.Open(&data[0], data.size());
LSTMRecognizer* recognizer = new LSTMRecognizer;
ASSERT_HOST(recognizer->DeSerialize(false, &fp));
ASSERT_HOST(recognizer->DeSerialize(&fp));
return recognizer;
}

View File

@ -215,8 +215,7 @@ class LSTMTrainer : public LSTMRecognizer {
// Writes to the given file. Returns false in case of error.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
// learning rates (by scaling reduction, or layer specific, according to

View File

@ -31,9 +31,8 @@ Maxpool::~Maxpool() {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Maxpool::DeSerialize(bool swap, TFile* fp) {
bool result = Reconfig::DeSerialize(swap, fp);
bool Maxpool::DeSerialize(TFile* fp) {
bool result = Reconfig::DeSerialize(fp);
no_ = ni_;
return result;
}

View File

@ -40,8 +40,7 @@ class Maxpool : public Reconfig {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Runs forward propagation of activations on the input line.
// See Network for a detailed discussion of the arguments.

View File

@ -164,14 +164,13 @@ bool Network::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Should be overridden by subclasses, but NOT called by their DeSerialize.
bool Network::DeSerialize(bool swap, TFile* fp) {
bool Network::DeSerialize(TFile* fp) {
inT8 data = 0;
if (fp->FRead(&data, sizeof(data), 1) != 1) return false;
if (data == NT_NONE) {
STRING type_name;
if (!type_name.DeSerialize(swap, fp)) return false;
if (!type_name.DeSerialize(fp)) return false;
for (data = 0; data < NT_COUNT && type_name != kTypeNames[data]; ++data) {
}
if (data == NT_COUNT) {
@ -184,27 +183,22 @@ bool Network::DeSerialize(bool swap, TFile* fp) {
training_ = data == TS_ENABLED ? TS_ENABLED : TS_DISABLED;
if (fp->FRead(&data, sizeof(data), 1) != 1) return false;
needs_to_backprop_ = data != 0;
if (fp->FRead(&network_flags_, sizeof(network_flags_), 1) != 1) return false;
if (fp->FRead(&ni_, sizeof(ni_), 1) != 1) return false;
if (fp->FRead(&no_, sizeof(no_), 1) != 1) return false;
if (fp->FRead(&num_weights_, sizeof(num_weights_), 1) != 1) return false;
if (!name_.DeSerialize(swap, fp)) return false;
if (swap) {
ReverseN(&network_flags_, sizeof(network_flags_));
ReverseN(&ni_, sizeof(ni_));
ReverseN(&no_, sizeof(no_));
ReverseN(&num_weights_, sizeof(num_weights_));
}
if (fp->FReadEndian(&network_flags_, sizeof(network_flags_), 1) != 1)
return false;
if (fp->FReadEndian(&ni_, sizeof(ni_), 1) != 1) return false;
if (fp->FReadEndian(&no_, sizeof(no_), 1) != 1) return false;
if (fp->FReadEndian(&num_weights_, sizeof(num_weights_), 1) != 1)
return false;
if (!name_.DeSerialize(fp)) return false;
return true;
}
// Reads from the given file. Returns NULL in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Determines the type of the serialized class and calls its DeSerialize
// on a new object of the appropriate type, which is returned.
Network* Network::CreateFromFile(bool swap, TFile* fp) {
Network* Network::CreateFromFile(TFile* fp) {
Network stub;
if (!stub.DeSerialize(swap, fp)) return NULL;
if (!stub.DeSerialize(fp)) return NULL;
Network* network = NULL;
switch (stub.type_) {
case NT_CONVOLVE:
@ -269,7 +263,7 @@ Network* Network::CreateFromFile(bool swap, TFile* fp) {
network->needs_to_backprop_ = stub.needs_to_backprop_;
network->network_flags_ = stub.network_flags_;
network->num_weights_ = stub.num_weights_;
if (!network->DeSerialize(swap, fp)) {
if (!network->DeSerialize(fp)) {
delete network;
return NULL;
}

View File

@ -208,9 +208,8 @@ class Network {
// Should be overridden by subclasses, but called by their Serialize.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Should be overridden by subclasses, but NOT called by their DeSerialize.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Updates the weights using the given learning rate and momentum.
// num_samples is the quotient to be used in the adagrad computation iff
@ -223,10 +222,9 @@ class Network {
double* changed) const {}
// Reads from the given file. Returns NULL in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Determines the type of the serialized class and calls its DeSerialize
// on a new object of the appropriate type, which is returned.
static Network* CreateFromFile(bool swap, TFile* fp);
static Network* CreateFromFile(TFile* fp);
// Runs forward propagation of activations on the input line.
// Note that input and output are both 2-d arrays.

View File

@ -187,19 +187,18 @@ bool Plumbing::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Plumbing::DeSerialize(bool swap, TFile* fp) {
bool Plumbing::DeSerialize(TFile* fp) {
stack_.truncate(0);
no_ = 0; // We will be modifying this as we AddToStack.
inT32 size;
if (fp->FRead(&size, sizeof(size), 1) != 1) return false;
if (fp->FReadEndian(&size, sizeof(size), 1) != 1) return false;
for (int i = 0; i < size; ++i) {
Network* network = CreateFromFile(swap, fp);
Network* network = CreateFromFile(fp);
if (network == NULL) return false;
AddToStack(network);
}
if ((network_flags_ & NF_LAYER_SPECIFIC_LR) &&
!learning_rates_.DeSerialize(swap, fp)) {
!learning_rates_.DeSerialize(fp)) {
return false;
}
return true;

View File

@ -116,8 +116,7 @@ class Plumbing : public Network {
// Writes to the given file. Returns false in case of error.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Updates the weights using the given learning rate and momentum.
// num_samples is the quotient to be used in the adagrad computation iff

View File

@ -59,14 +59,9 @@ bool Reconfig::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool Reconfig::DeSerialize(bool swap, TFile* fp) {
if (fp->FRead(&x_scale_, sizeof(x_scale_), 1) != 1) return false;
if (fp->FRead(&y_scale_, sizeof(y_scale_), 1) != 1) return false;
if (swap) {
ReverseN(&x_scale_, sizeof(x_scale_));
ReverseN(&y_scale_, sizeof(y_scale_));
}
bool Reconfig::DeSerialize(TFile* fp) {
if (fp->FReadEndian(&x_scale_, sizeof(x_scale_), 1) != 1) return false;
if (fp->FReadEndian(&y_scale_, sizeof(y_scale_), 1) != 1) return false;
no_ = ni_ * x_scale_ * y_scale_;
return true;
}

View File

@ -57,8 +57,7 @@ class Reconfig : public Network {
// Writes to the given file. Returns false in case of error.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Runs forward propagation of activations on the input line.
// See Network for a detailed discussion of the arguments.

View File

@ -53,11 +53,10 @@ bool TFNetwork::Serialize(TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Should be overridden by subclasses, but NOT called by their DeSerialize.
bool TFNetwork::DeSerialize(bool swap, TFile* fp) {
bool TFNetwork::DeSerialize(TFile* fp) {
GenericVector<char> data;
if (!data.DeSerialize(swap, fp)) return false;
if (!data.DeSerialize(fp)) return false;
if (!model_proto_.ParseFromArray(&data[0], data.size())) {
return false;
}

View File

@ -59,9 +59,8 @@ class TFNetwork : public Network {
// Should be overridden by subclasses, but called by their Serialize.
virtual bool Serialize(TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
// Should be overridden by subclasses, but NOT called by their DeSerialize.
virtual bool DeSerialize(bool swap, TFile* fp);
virtual bool DeSerialize(TFile* fp);
// Runs forward propagation of activations on the input line.
// See Network for a detailed discussion of the arguments.

View File

@ -121,22 +121,22 @@ bool WeightMatrix::Serialize(bool training, TFile* fp) const {
}
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) {
bool WeightMatrix::DeSerialize(bool training, TFile* fp) {
uinT8 mode = 0;
if (fp->FRead(&mode, sizeof(mode), 1) != 1) return false;
int_mode_ = (mode & kInt8Flag) != 0;
use_ada_grad_ = (mode & kAdaGradFlag) != 0;
if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, swap, fp);
if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, fp);
if (int_mode_) {
if (!wi_.DeSerialize(swap, fp)) return false;
if (!scales_.DeSerialize(swap, fp)) return false;
if (!wi_.DeSerialize(fp)) return false;
if (!scales_.DeSerialize(fp)) return false;
} else {
if (!wf_.DeSerialize(swap, fp)) return false;
if (!wf_.DeSerialize(fp)) return false;
if (training) {
InitBackward(use_ada_grad_);
if (!updates_.DeSerialize(swap, fp)) return false;
if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(swap, fp)) return false;
if (!updates_.DeSerialize(fp)) return false;
if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(fp)) return false;
}
}
return true;
@ -144,24 +144,24 @@ bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) {
// As DeSerialize, but reads an old (float) format WeightMatrix for
// backward compatibility.
bool WeightMatrix::DeSerializeOld(bool training, bool swap, TFile* fp) {
bool WeightMatrix::DeSerializeOld(bool training, TFile* fp) {
GENERIC_2D_ARRAY<float> float_array;
if (int_mode_) {
if (!wi_.DeSerialize(swap, fp)) return false;
if (!wi_.DeSerialize(fp)) return false;
GenericVector<float> old_scales;
if (!old_scales.DeSerialize(swap, fp)) return false;
if (!old_scales.DeSerialize(fp)) return false;
scales_.init_to_size(old_scales.size(), 0.0);
for (int i = 0; i < old_scales.size(); ++i) scales_[i] = old_scales[i];
} else {
if (!float_array.DeSerialize(swap, fp)) return false;
if (!float_array.DeSerialize(fp)) return false;
FloatToDouble(float_array, &wf_);
}
if (training) {
InitBackward(use_ada_grad_);
if (!float_array.DeSerialize(swap, fp)) return false;
if (!float_array.DeSerialize(fp)) return false;
FloatToDouble(float_array, &updates_);
// Errs was only used in int training, which is now dead.
if (!float_array.DeSerialize(swap, fp)) return false;
if (!float_array.DeSerialize(fp)) return false;
}
return true;
}

View File

@ -97,11 +97,10 @@ class WeightMatrix {
// Writes to the given file. Returns false in case of error.
bool Serialize(bool training, TFile* fp) const;
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool training, bool swap, TFile* fp);
bool DeSerialize(bool training, TFile* fp);
// As DeSerialize, but reads an old (float) format WeightMatrix for
// backward compatibility.
bool DeSerializeOld(bool training, bool swap, TFile* fp);
bool DeSerializeOld(bool training, TFile* fp);
// Computes matrix.vector v = Wu.
// u is of size W.dim2() - 1 and the output v is of size W.dim1().

View File

@ -119,7 +119,7 @@ ShapeTable* LoadShapeTable(const STRING& file_prefix) {
TFile shape_fp;
if (shape_fp.Open(shape_table_file.string(), nullptr)) {
shape_table = new ShapeTable;
if (!shape_table->DeSerialize(false, &shape_fp)) {
if (!shape_table->DeSerialize(&shape_fp)) {
delete shape_table;
shape_table = nullptr;
tprintf("Error: Failed to read shape table %s\n",

View File

@ -42,8 +42,7 @@ bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) {
// loaded.
bool LSTMTester::LoadAllEvalData(const GenericVector<STRING>& filenames) {
test_data_.Clear();
bool result =
test_data_.LoadDocuments(filenames, "eng", CS_SEQUENTIAL, nullptr);
bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
total_pages_ = test_data_.TotalPages();
return result;
}