mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
Final part of endian improvement. Adds big-endian support to lstm and fixes issue 518
This commit is contained in:
parent
6ac31dcbdd
commit
8e79297dce
@ -49,7 +49,7 @@ void Tesseract::TrainLineRecognizer(const STRING& input_imagename,
|
||||
DocumentData images(lstmf_name);
|
||||
if (applybox_page > 0) {
|
||||
// Load existing document for the previous pages.
|
||||
if (!images.LoadDocument(lstmf_name.string(), "eng", 0, 0, NULL)) {
|
||||
if (!images.LoadDocument(lstmf_name.string(), 0, 0, nullptr)) {
|
||||
tprintf("Failed to read training data from %s!\n", lstmf_name.string());
|
||||
return;
|
||||
}
|
||||
|
@ -188,13 +188,9 @@ bool Tesseract::init_tesseract_lang_data(
|
||||
#ifndef ANDROID_BUILD
|
||||
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
|
||||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
|
||||
if (mgr->swap()) {
|
||||
tprintf("Error: LSTM requested on big-endian hardware!!\n");
|
||||
tprintf("Big-endian not yet supported! Loading tesseract.\n");
|
||||
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
|
||||
} else if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
|
||||
if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
|
||||
lstm_recognizer_ = new LSTMRecognizer;
|
||||
ASSERT_HOST(lstm_recognizer_->DeSerialize(mgr->swap(), &fp));
|
||||
ASSERT_HOST(lstm_recognizer_->DeSerialize(&fp));
|
||||
if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr);
|
||||
} else {
|
||||
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
|
||||
|
@ -31,9 +31,9 @@ bool FontInfo::Serialize(FILE* fp) const {
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfo::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!read_info(fp, this, swap)) return false;
|
||||
if (!read_spacing_info(fp, this, swap)) return false;
|
||||
bool FontInfo::DeSerialize(TFile* fp) {
|
||||
if (!read_info(fp, this)) return false;
|
||||
if (!read_spacing_info(fp, this)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -51,9 +51,9 @@ bool FontInfoTable::Serialize(FILE* fp) const {
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FontInfoTable::DeSerialize(bool swap, TFile* fp) {
|
||||
bool FontInfoTable::DeSerialize(TFile* fp) {
|
||||
truncate(0);
|
||||
return this->DeSerializeClasses(swap, fp);
|
||||
return this->DeSerializeClasses(fp);
|
||||
}
|
||||
|
||||
// Returns true if the given set of fonts includes one with the same
|
||||
@ -149,14 +149,14 @@ void FontSetDeleteCallback(FontSet fs) {
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(TFile* f, FontInfo* fi, bool swap) {
|
||||
bool read_info(TFile* f, FontInfo* fi) {
|
||||
inT32 size;
|
||||
if (f->FReadEndian(&size, sizeof(size), 1, swap) != 1) return false;
|
||||
if (f->FReadEndian(&size, sizeof(size), 1) != 1) return false;
|
||||
char* font_name = new char[size + 1];
|
||||
fi->name = font_name;
|
||||
if (f->FRead(font_name, sizeof(*font_name), size) != size) return false;
|
||||
font_name[size] = '\0';
|
||||
if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1, swap) != 1)
|
||||
if (f->FReadEndian(&fi->properties, sizeof(fi->properties), 1) != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -170,19 +170,17 @@ bool write_info(FILE* f, const FontInfo& fi) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
|
||||
bool read_spacing_info(TFile* f, FontInfo* fi) {
|
||||
inT32 vec_size, kern_size;
|
||||
if (f->FReadEndian(&vec_size, sizeof(vec_size), 1, swap) != 1) return false;
|
||||
if (f->FReadEndian(&vec_size, sizeof(vec_size), 1) != 1) return false;
|
||||
ASSERT_HOST(vec_size >= 0);
|
||||
if (vec_size == 0) return true;
|
||||
fi->init_spacing(vec_size);
|
||||
for (int i = 0; i < vec_size; ++i) {
|
||||
FontSpacingInfo *fs = new FontSpacingInfo();
|
||||
if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1, swap) !=
|
||||
1 ||
|
||||
f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1, swap) !=
|
||||
1 ||
|
||||
f->FReadEndian(&kern_size, sizeof(kern_size), 1, swap) != 1) {
|
||||
if (f->FReadEndian(&fs->x_gap_before, sizeof(fs->x_gap_before), 1) != 1 ||
|
||||
f->FReadEndian(&fs->x_gap_after, sizeof(fs->x_gap_after), 1) != 1 ||
|
||||
f->FReadEndian(&kern_size, sizeof(kern_size), 1) != 1) {
|
||||
delete fs;
|
||||
return false;
|
||||
}
|
||||
@ -190,8 +188,8 @@ bool read_spacing_info(TFile* f, FontInfo* fi, bool swap) {
|
||||
delete fs;
|
||||
continue;
|
||||
}
|
||||
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(swap, f) ||
|
||||
!fs->kerned_x_gaps.DeSerialize(swap, f))) {
|
||||
if (kern_size > 0 && (!fs->kerned_unichar_ids.DeSerialize(f) ||
|
||||
!fs->kerned_x_gaps.DeSerialize(f))) {
|
||||
delete fs;
|
||||
return false;
|
||||
}
|
||||
@ -229,11 +227,10 @@ bool write_spacing_info(FILE* f, const FontInfo& fi) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_set(TFile* f, FontSet* fs, bool swap) {
|
||||
if (f->FReadEndian(&fs->size, sizeof(fs->size), 1, swap) != 1) return false;
|
||||
bool read_set(TFile* f, FontSet* fs) {
|
||||
if (f->FReadEndian(&fs->size, sizeof(fs->size), 1) != 1) return false;
|
||||
fs->configs = new int[fs->size];
|
||||
if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size, swap) !=
|
||||
fs->size)
|
||||
if (f->FReadEndian(fs->configs, sizeof(fs->configs[0]), fs->size) != fs->size)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ struct FontInfo {
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
|
||||
// Reserves unicharset_size spots in spacing_vec.
|
||||
void init_spacing(int unicharset_size) {
|
||||
@ -152,7 +152,7 @@ class FontInfoTable : public GenericVector<FontInfo> {
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
|
||||
// Returns true if the given set of fonts includes one with the same
|
||||
// properties as font_id.
|
||||
@ -177,11 +177,11 @@ void FontInfoDeleteCallback(FontInfo f);
|
||||
void FontSetDeleteCallback(FontSet fs);
|
||||
|
||||
// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
|
||||
bool read_info(TFile* f, FontInfo* fi, bool swap);
|
||||
bool read_info(TFile* f, FontInfo* fi);
|
||||
bool write_info(FILE* f, const FontInfo& fi);
|
||||
bool read_spacing_info(TFile* f, FontInfo* fi, bool swap);
|
||||
bool read_spacing_info(TFile* f, FontInfo* fi);
|
||||
bool write_spacing_info(FILE* f, const FontInfo& fi);
|
||||
bool read_set(TFile* f, FontSet* fs, bool swap);
|
||||
bool read_set(TFile* f, FontSet* fs);
|
||||
bool write_set(FILE* f, const FontSet& fs);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -166,6 +166,7 @@ bool ImageData::Serialize(TFile* fp) const {
|
||||
if (!imagefilename_.Serialize(fp)) return false;
|
||||
if (fp->FWrite(&page_number_, sizeof(page_number_), 1) != 1) return false;
|
||||
if (!image_data_.Serialize(fp)) return false;
|
||||
if (!language_.Serialize(fp)) return false;
|
||||
if (!transcription_.Serialize(fp)) return false;
|
||||
// WARNING: Will not work across different endian machines.
|
||||
if (!boxes_.Serialize(fp)) return false;
|
||||
@ -177,15 +178,16 @@ bool ImageData::Serialize(TFile* fp) const {
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool ImageData::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!imagefilename_.DeSerialize(swap, fp)) return false;
|
||||
if (fp->FRead(&page_number_, sizeof(page_number_), 1) != 1) return false;
|
||||
if (swap) ReverseN(&page_number_, sizeof(page_number_));
|
||||
if (!image_data_.DeSerialize(swap, fp)) return false;
|
||||
if (!transcription_.DeSerialize(swap, fp)) return false;
|
||||
bool ImageData::DeSerialize(TFile* fp) {
|
||||
if (!imagefilename_.DeSerialize(fp)) return false;
|
||||
if (fp->FReadEndian(&page_number_, sizeof(page_number_), 1) != 1)
|
||||
return false;
|
||||
if (!image_data_.DeSerialize(fp)) return false;
|
||||
if (!language_.DeSerialize(fp)) return false;
|
||||
if (!transcription_.DeSerialize(fp)) return false;
|
||||
// WARNING: Will not work across different endian machines.
|
||||
if (!boxes_.DeSerialize(swap, fp)) return false;
|
||||
if (!box_texts_.DeSerializeClasses(swap, fp)) return false;
|
||||
if (!boxes_.DeSerialize(fp)) return false;
|
||||
if (!box_texts_.DeSerializeClasses(fp)) return false;
|
||||
inT8 vertical = 0;
|
||||
if (fp->FRead(&vertical, sizeof(vertical), 1) != 1) return false;
|
||||
vertical_text_ = vertical != 0;
|
||||
@ -193,14 +195,15 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) {
|
||||
}
|
||||
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
bool ImageData::SkipDeSerialize(bool swap, TFile* fp) {
|
||||
if (!STRING::SkipDeSerialize(swap, fp)) return false;
|
||||
bool ImageData::SkipDeSerialize(TFile* fp) {
|
||||
if (!STRING::SkipDeSerialize(fp)) return false;
|
||||
inT32 page_number;
|
||||
if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false;
|
||||
if (!GenericVector<char>::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!STRING::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!GenericVector<TBOX>::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!GenericVector<STRING>::SkipDeSerializeClasses(swap, fp)) return false;
|
||||
if (!GenericVector<char>::SkipDeSerialize(fp)) return false;
|
||||
if (!STRING::SkipDeSerialize(fp)) return false;
|
||||
if (!STRING::SkipDeSerialize(fp)) return false;
|
||||
if (!GenericVector<TBOX>::SkipDeSerialize(fp)) return false;
|
||||
if (!GenericVector<STRING>::SkipDeSerializeClasses(fp)) return false;
|
||||
inT8 vertical = 0;
|
||||
return fp->FRead(&vertical, sizeof(vertical), 1) == 1;
|
||||
}
|
||||
@ -384,21 +387,19 @@ DocumentData::~DocumentData() {
|
||||
|
||||
// Reads all the pages in the given lstmf filename to the cache. The reader
|
||||
// is used to read the file.
|
||||
bool DocumentData::LoadDocument(const char* filename, const char* lang,
|
||||
int start_page, inT64 max_memory,
|
||||
FileReader reader) {
|
||||
SetDocument(filename, lang, max_memory, reader);
|
||||
bool DocumentData::LoadDocument(const char* filename, int start_page,
|
||||
inT64 max_memory, FileReader reader) {
|
||||
SetDocument(filename, max_memory, reader);
|
||||
pages_offset_ = start_page;
|
||||
return ReCachePages();
|
||||
}
|
||||
|
||||
// Sets up the document, without actually loading it.
|
||||
void DocumentData::SetDocument(const char* filename, const char* lang,
|
||||
inT64 max_memory, FileReader reader) {
|
||||
void DocumentData::SetDocument(const char* filename, inT64 max_memory,
|
||||
FileReader reader) {
|
||||
SVAutoLock lock_p(&pages_mutex_);
|
||||
SVAutoLock lock(&general_mutex_);
|
||||
document_name_ = filename;
|
||||
lang_ = lang;
|
||||
pages_offset_ = -1;
|
||||
max_memory_ = max_memory;
|
||||
reader_ = reader;
|
||||
@ -522,7 +523,7 @@ bool DocumentData::ReCachePages() {
|
||||
pages_.truncate(0);
|
||||
TFile fp;
|
||||
if (!fp.Open(document_name_, reader_) ||
|
||||
!PointerVector<ImageData>::DeSerializeSize(false, &fp, &loaded_pages) ||
|
||||
!PointerVector<ImageData>::DeSerializeSize(&fp, &loaded_pages) ||
|
||||
loaded_pages <= 0) {
|
||||
tprintf("Deserialize header failed: %s\n", document_name_.string());
|
||||
return false;
|
||||
@ -534,15 +535,17 @@ bool DocumentData::ReCachePages() {
|
||||
for (page = 0; page < loaded_pages; ++page) {
|
||||
if (page < pages_offset_ ||
|
||||
(max_memory_ > 0 && memory_used() > max_memory_)) {
|
||||
if (!PointerVector<ImageData>::DeSerializeSkip(false, &fp)) break;
|
||||
if (!PointerVector<ImageData>::DeSerializeSkip(&fp)) {
|
||||
tprintf("Deserializeskip failed\n");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!pages_.DeSerializeElement(false, &fp)) break;
|
||||
if (!pages_.DeSerializeElement(&fp)) break;
|
||||
ImageData* image_data = pages_.back();
|
||||
if (image_data->imagefilename().length() == 0) {
|
||||
image_data->set_imagefilename(document_name_);
|
||||
image_data->set_page_number(page);
|
||||
}
|
||||
image_data->set_language(lang_);
|
||||
set_memory_used(memory_used() + image_data->MemoryUsed());
|
||||
}
|
||||
}
|
||||
@ -567,7 +570,6 @@ DocumentCache::~DocumentCache() {}
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
|
||||
const char* lang,
|
||||
CachingStrategy cache_strategy,
|
||||
FileReader reader) {
|
||||
cache_strategy_ = cache_strategy;
|
||||
@ -580,7 +582,7 @@ bool DocumentCache::LoadDocuments(const GenericVector<STRING>& filenames,
|
||||
for (int arg = 0; arg < filenames.size(); ++arg) {
|
||||
STRING filename = filenames[arg];
|
||||
DocumentData* document = new DocumentData(filename);
|
||||
document->SetDocument(filename.string(), lang, fair_share_memory, reader);
|
||||
document->SetDocument(filename.string(), fair_share_memory, reader);
|
||||
AddToCache(document);
|
||||
}
|
||||
if (!documents_.empty()) {
|
||||
|
@ -116,10 +116,9 @@ class ImageData {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
|
||||
static bool SkipDeSerialize(tesseract::TFile* fp);
|
||||
|
||||
// Other accessors.
|
||||
const STRING& imagefilename() const {
|
||||
@ -210,11 +209,10 @@ class DocumentData {
|
||||
|
||||
// Reads all the pages in the given lstmf filename to the cache. The reader
|
||||
// is used to read the file.
|
||||
bool LoadDocument(const char* filename, const char* lang, int start_page,
|
||||
inT64 max_memory, FileReader reader);
|
||||
bool LoadDocument(const char* filename, int start_page, inT64 max_memory,
|
||||
FileReader reader);
|
||||
// Sets up the document, without actually loading it.
|
||||
void SetDocument(const char* filename, const char* lang, inT64 max_memory,
|
||||
FileReader reader);
|
||||
void SetDocument(const char* filename, inT64 max_memory, FileReader reader);
|
||||
// Writes all the pages to the given filename. Returns false on error.
|
||||
bool SaveDocument(const char* filename, FileWriter writer);
|
||||
bool SaveToBuffer(GenericVector<char>* buffer);
|
||||
@ -286,8 +284,6 @@ class DocumentData {
|
||||
private:
|
||||
// A name for this document.
|
||||
STRING document_name_;
|
||||
// The language of this document.
|
||||
STRING lang_;
|
||||
// A group of pages that corresponds in some loose way to a document.
|
||||
PointerVector<ImageData> pages_;
|
||||
// Page number of the first index in pages_.
|
||||
@ -325,7 +321,7 @@ class DocumentCache {
|
||||
}
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool LoadDocuments(const GenericVector<STRING>& filenames, const char* lang,
|
||||
bool LoadDocuments(const GenericVector<STRING>& filenames,
|
||||
CachingStrategy cache_strategy, FileReader reader);
|
||||
|
||||
// Adds document to the cache.
|
||||
|
@ -164,16 +164,11 @@ class GENERIC_2D_ARRAY {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool DeSerialize(bool swap, tesseract::TFile* fp) {
|
||||
if (!DeSerializeSize(swap, fp)) return false;
|
||||
if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false;
|
||||
if (swap) ReverseN(&empty_, sizeof(empty_));
|
||||
bool DeSerialize(tesseract::TFile* fp) {
|
||||
if (!DeSerializeSize(fp)) return false;
|
||||
if (fp->FReadEndian(&empty_, sizeof(empty_), 1) != 1) return false;
|
||||
int size = num_elements();
|
||||
if (fp->FRead(array_, sizeof(*array_), size) != size) return false;
|
||||
if (swap) {
|
||||
for (int i = 0; i < size; ++i)
|
||||
ReverseN(&array_[i], sizeof(array_[i]));
|
||||
}
|
||||
if (fp->FReadEndian(array_, sizeof(*array_), size) != size) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -487,14 +482,10 @@ class GENERIC_2D_ARRAY {
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
bool DeSerializeSize(bool swap, tesseract::TFile* fp) {
|
||||
bool DeSerializeSize(tesseract::TFile* fp) {
|
||||
inT32 size1, size2;
|
||||
if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false;
|
||||
if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false;
|
||||
if (swap) {
|
||||
ReverseN(&size1, sizeof(size1));
|
||||
ReverseN(&size2, sizeof(size2));
|
||||
}
|
||||
if (fp->FReadEndian(&size1, sizeof(size1), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&size2, sizeof(size2), 1) != 1) return false;
|
||||
Resize(size1, size2, empty_);
|
||||
return true;
|
||||
}
|
||||
|
@ -163,8 +163,7 @@ class GenericVector {
|
||||
// DEPRECATED. Use [De]Serialize[Classes] instead.
|
||||
bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
|
||||
bool read(tesseract::TFile* f,
|
||||
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb,
|
||||
bool swap);
|
||||
TessResultCallback2<bool, tesseract::TFile*, T*>* cb);
|
||||
// Writes a vector of simple types to the given file. Assumes that bitwise
|
||||
// read/write of T will work. Returns false in case of error.
|
||||
// TODO(rays) Change all callers to use TFile and remove deprecated methods.
|
||||
@ -174,10 +173,11 @@ class GenericVector {
|
||||
// read/write will work with ReverseN according to sizeof(T).
|
||||
// Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// TFile is assumed to know about swapping.
|
||||
bool DeSerialize(bool swap, FILE* fp);
|
||||
bool DeSerialize(bool swap, tesseract::TFile* fp);
|
||||
bool DeSerialize(tesseract::TFile* fp);
|
||||
// Skips the deserialization of the vector.
|
||||
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
|
||||
static bool SkipDeSerialize(tesseract::TFile* fp);
|
||||
// Writes a vector of classes to the given file. Assumes the existence of
|
||||
// bool T::Serialize(FILE* fp) const that returns false in case of error.
|
||||
// Returns false in case of error.
|
||||
@ -189,9 +189,9 @@ class GenericVector {
|
||||
// this function. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerializeClasses(bool swap, FILE* fp);
|
||||
bool DeSerializeClasses(bool swap, tesseract::TFile* fp);
|
||||
bool DeSerializeClasses(tesseract::TFile* fp);
|
||||
// Calls SkipDeSerialize on the elements of the vector.
|
||||
static bool SkipDeSerializeClasses(bool swap, tesseract::TFile* fp);
|
||||
static bool SkipDeSerializeClasses(tesseract::TFile* fp);
|
||||
|
||||
// Allocates a new array of double the current_size, copies over the
|
||||
// information from data to the new location, deletes data and returns
|
||||
@ -569,13 +569,13 @@ class PointerVector : public GenericVector<T*> {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool DeSerialize(bool swap, TFile* fp) {
|
||||
bool DeSerialize(TFile* fp) {
|
||||
inT32 reserved;
|
||||
if (!DeSerializeSize(swap, fp, &reserved)) return false;
|
||||
if (!DeSerializeSize(fp, &reserved)) return false;
|
||||
GenericVector<T*>::reserve(reserved);
|
||||
truncate(0);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
if (!DeSerializeElement(swap, fp)) return false;
|
||||
if (!DeSerializeElement(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -583,19 +583,17 @@ class PointerVector : public GenericVector<T*> {
|
||||
// retain the integrity of the stream, the caller must call some combination
|
||||
// of DeSerializeElement and DeSerializeSkip of the exact number returned in
|
||||
// *size, assuming a true return.
|
||||
static bool DeSerializeSize(bool swap, TFile* fp, inT32* size) {
|
||||
if (fp->FRead(size, sizeof(*size), 1) != 1) return false;
|
||||
if (swap) Reverse32(size);
|
||||
return true;
|
||||
static bool DeSerializeSize(TFile* fp, inT32* size) {
|
||||
return fp->FReadEndian(size, sizeof(*size), 1) == 1;
|
||||
}
|
||||
// Reads and appends to the vector the next element of the serialization.
|
||||
bool DeSerializeElement(bool swap, TFile* fp) {
|
||||
bool DeSerializeElement(TFile* fp) {
|
||||
inT8 non_null;
|
||||
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
T* item = NULL;
|
||||
if (non_null) {
|
||||
item = new T;
|
||||
if (!item->DeSerialize(swap, fp)) {
|
||||
if (!item->DeSerialize(fp)) {
|
||||
delete item;
|
||||
return false;
|
||||
}
|
||||
@ -607,11 +605,11 @@ class PointerVector : public GenericVector<T*> {
|
||||
return true;
|
||||
}
|
||||
// Skips the next element of the serialization.
|
||||
static bool DeSerializeSkip(bool swap, TFile* fp) {
|
||||
static bool DeSerializeSkip(TFile* fp) {
|
||||
inT8 non_null;
|
||||
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
if (non_null) {
|
||||
if (!T::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!T::SkipDeSerialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -889,23 +887,21 @@ bool GenericVector<T>::write(
|
||||
|
||||
template <typename T>
|
||||
bool GenericVector<T>::read(
|
||||
tesseract::TFile* f,
|
||||
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb, bool swap) {
|
||||
tesseract::TFile* f, TessResultCallback2<bool, tesseract::TFile*, T*>* cb) {
|
||||
inT32 reserved;
|
||||
if (f->FReadEndian(&reserved, sizeof(reserved), 1, swap) != 1) return false;
|
||||
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
reserve(reserved);
|
||||
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1, swap) != 1)
|
||||
return false;
|
||||
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false;
|
||||
if (cb != NULL) {
|
||||
for (int i = 0; i < size_used_; ++i) {
|
||||
if (!cb->Run(f, data_ + i, swap)) {
|
||||
if (!cb->Run(f, data_ + i)) {
|
||||
delete cb;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
delete cb;
|
||||
} else {
|
||||
if (f->FReadEndian(data_, sizeof(T), size_used_, swap) != size_used_)
|
||||
if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -945,24 +941,17 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerialize(bool swap, tesseract::TFile* fp) {
|
||||
bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
reserve(reserved);
|
||||
size_used_ = reserved;
|
||||
if (fp->FRead(data_, sizeof(T), size_used_) != size_used_) return false;
|
||||
if (swap) {
|
||||
for (int i = 0; i < size_used_; ++i)
|
||||
ReverseN(&data_[i], sizeof(data_[i]));
|
||||
}
|
||||
return true;
|
||||
return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SkipDeSerialize(bool swap, tesseract::TFile* fp) {
|
||||
bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
return fp->FRead(NULL, sizeof(T), reserved) == reserved;
|
||||
}
|
||||
|
||||
@ -1004,24 +993,22 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerializeClasses(bool swap, tesseract::TFile* fp) {
|
||||
bool GenericVector<T>::DeSerializeClasses(tesseract::TFile* fp) {
|
||||
uinT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
T empty;
|
||||
init_to_size(reserved, empty);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
if (!data_[i].DeSerialize(swap, fp)) return false;
|
||||
if (!data_[i].DeSerialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SkipDeSerializeClasses(bool swap, tesseract::TFile* fp) {
|
||||
bool GenericVector<T>::SkipDeSerializeClasses(tesseract::TFile* fp) {
|
||||
uinT32 reserved;
|
||||
if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
if (!T::SkipDeSerialize(swap, fp)) return false;
|
||||
if (!T::SkipDeSerialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -24,8 +24,11 @@
|
||||
namespace tesseract {
|
||||
|
||||
TFile::TFile()
|
||||
: offset_(0), data_(NULL), data_is_owned_(false), is_writing_(false) {
|
||||
}
|
||||
: offset_(0),
|
||||
data_(NULL),
|
||||
data_is_owned_(false),
|
||||
is_writing_(false),
|
||||
swap_(false) {}
|
||||
|
||||
TFile::~TFile() {
|
||||
if (data_is_owned_)
|
||||
@ -39,6 +42,7 @@ bool TFile::Open(const STRING& filename, FileReader reader) {
|
||||
}
|
||||
offset_ = 0;
|
||||
is_writing_ = false;
|
||||
swap_ = false;
|
||||
if (reader == NULL)
|
||||
return LoadDataFromFile(filename, data_);
|
||||
else
|
||||
@ -52,6 +56,7 @@ bool TFile::Open(const char* data, int size) {
|
||||
data_is_owned_ = true;
|
||||
}
|
||||
is_writing_ = false;
|
||||
swap_ = false;
|
||||
data_->init_to_size(size, 0);
|
||||
memcpy(&(*data_)[0], data, size);
|
||||
return true;
|
||||
@ -69,6 +74,7 @@ bool TFile::Open(FILE* fp, inT64 end_offset) {
|
||||
}
|
||||
int size = end_offset - current_pos;
|
||||
is_writing_ = false;
|
||||
swap_ = false;
|
||||
if (!data_is_owned_) {
|
||||
data_ = new GenericVector<char>;
|
||||
data_is_owned_ = true;
|
||||
@ -88,9 +94,9 @@ char* TFile::FGets(char* buffer, int buffer_size) {
|
||||
return size > 0 ? buffer : NULL;
|
||||
}
|
||||
|
||||
int TFile::FReadEndian(void* buffer, int size, int count, bool swap) {
|
||||
int TFile::FReadEndian(void* buffer, int size, int count) {
|
||||
int num_read = FRead(buffer, size, count);
|
||||
if (swap) {
|
||||
if (swap_) {
|
||||
char* char_buffer = reinterpret_cast<char*>(buffer);
|
||||
for (int i = 0; i < num_read; ++i, char_buffer += size) {
|
||||
ReverseN(char_buffer, size);
|
||||
@ -128,6 +134,7 @@ void TFile::OpenWrite(GenericVector<char>* data) {
|
||||
data_is_owned_ = true;
|
||||
}
|
||||
is_writing_ = true;
|
||||
swap_ = false;
|
||||
data_->truncate(0);
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,8 @@ class TFile {
|
||||
bool Open(const char* data, int size);
|
||||
// From an open file and an end offset.
|
||||
bool Open(FILE* fp, inT64 end_offset);
|
||||
// Sets the value of the swap flag, so that FReadEndian does the right thing.
|
||||
void set_swap(bool value) { swap_ = value; }
|
||||
|
||||
// Reads a line like fgets. Returns NULL on EOF, otherwise buffer.
|
||||
// Reads at most buffer_size bytes, including '\0' terminator, even if
|
||||
@ -68,9 +70,9 @@ class TFile {
|
||||
// To use fscanf use FGets and sscanf.
|
||||
char* FGets(char* buffer, int buffer_size);
|
||||
// Replicates fread, followed by a swap of the bytes if needed, returning the
|
||||
// number of items read. If swap is true then the count items will each have
|
||||
// number of items read. If swap_ is true then the count items will each have
|
||||
// size bytes reversed.
|
||||
int FReadEndian(void* buffer, int size, int count, bool swap);
|
||||
int FReadEndian(void* buffer, int size, int count);
|
||||
// Replicates fread, returning the number of items read.
|
||||
int FRead(void* buffer, int size, int count);
|
||||
// Resets the TFile as if it has been Opened, but nothing read.
|
||||
@ -96,6 +98,8 @@ class TFile {
|
||||
bool data_is_owned_;
|
||||
// True if the TFile is open for writing.
|
||||
bool is_writing_;
|
||||
// True if bytes need to be swapped in FReadEndian.
|
||||
bool swap_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -171,21 +171,18 @@ bool STRING::DeSerialize(bool swap, FILE* fp) {
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool STRING::DeSerialize(bool swap, TFile* fp) {
|
||||
bool STRING::DeSerialize(TFile* fp) {
|
||||
inT32 len;
|
||||
if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
|
||||
if (swap)
|
||||
ReverseN(&len, sizeof(len));
|
||||
if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false;
|
||||
truncate_at(len);
|
||||
if (fp->FRead(GetCStr(), 1, len) != len) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
bool STRING::SkipDeSerialize(bool swap, tesseract::TFile* fp) {
|
||||
bool STRING::SkipDeSerialize(tesseract::TFile* fp) {
|
||||
inT32 len;
|
||||
if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
|
||||
if (swap) ReverseN(&len, sizeof(len));
|
||||
if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false;
|
||||
return fp->FRead(NULL, 1, len) == len;
|
||||
}
|
||||
|
||||
|
@ -59,9 +59,9 @@ class TESS_API STRING
|
||||
bool Serialize(tesseract::TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, tesseract::TFile* fp);
|
||||
bool DeSerialize(tesseract::TFile* fp);
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
static bool SkipDeSerialize(bool swap, tesseract::TFile* fp);
|
||||
static bool SkipDeSerialize(tesseract::TFile* fp);
|
||||
|
||||
BOOL8 contains(const char c) const;
|
||||
inT32 length() const;
|
||||
|
@ -59,11 +59,12 @@ bool TessdataManager::LoadMemBuffer(const char *name, const char *data,
|
||||
inT32 num_entries = TESSDATA_NUM_ENTRIES;
|
||||
if (fp.FRead(&num_entries, sizeof(num_entries), 1) != 1) return false;
|
||||
swap_ = num_entries > kMaxNumTessdataEntries || num_entries < 0;
|
||||
fp.set_swap(swap_);
|
||||
if (swap_) ReverseN(&num_entries, sizeof(num_entries));
|
||||
GenericVector<inT64> offset_table;
|
||||
offset_table.init_to_size(num_entries, -1);
|
||||
if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries,
|
||||
swap_) != num_entries)
|
||||
if (fp.FReadEndian(&offset_table[0], sizeof(offset_table[0]), num_entries) !=
|
||||
num_entries)
|
||||
return false;
|
||||
for (int i = 0; i < num_entries && i < TESSDATA_NUM_ENTRIES; ++i) {
|
||||
if (offset_table[i] >= 0) {
|
||||
@ -152,6 +153,7 @@ bool TessdataManager::GetComponent(TessdataType type, TFile *fp) {
|
||||
if (!is_loaded_ && !Init(data_file_name_.string())) return false;
|
||||
if (entries_[type].empty()) return false;
|
||||
fp->Open(&entries_[type][0], entries_[type].size());
|
||||
fp->set_swap(swap_);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -315,9 +315,8 @@ bool UnicharCompress::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool UnicharCompress::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!encoder_.DeSerializeClasses(swap, fp)) return false;
|
||||
bool UnicharCompress::DeSerialize(TFile* fp) {
|
||||
if (!encoder_.DeSerializeClasses(fp)) return false;
|
||||
ComputeCodeRange();
|
||||
SetupDecoder();
|
||||
return true;
|
||||
|
@ -69,17 +69,12 @@ class RecodedCharID {
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp) {
|
||||
bool DeSerialize(TFile* fp) {
|
||||
if (fp->FRead(&self_normalized_, sizeof(self_normalized_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&length_, sizeof(length_), 1) != 1) return false;
|
||||
if (swap) ReverseN(&length_, sizeof(length_));
|
||||
if (fp->FRead(code_, sizeof(code_[0]), length_) != length_) return false;
|
||||
if (swap) {
|
||||
for (int i = 0; i < length_; ++i) {
|
||||
ReverseN(&code_[i], sizeof(code_[i]));
|
||||
}
|
||||
}
|
||||
if (fp->FReadEndian(&length_, sizeof(length_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(code_, sizeof(code_[0]), length_) != length_)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
bool operator==(const RecodedCharID& other) const {
|
||||
@ -205,8 +200,8 @@ class UnicharCompress {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
|
||||
bool DeSerialize(TFile* fp);
|
||||
|
||||
// Returns a STRING containing a text file that describes the encoding thus:
|
||||
// <index>[,<index>]*<tab><UTF8-str><newline>
|
||||
|
@ -86,10 +86,8 @@ class UnicityTable {
|
||||
/// once. The given callback will be deleted at the end.
|
||||
/// Returns false on read/write error.
|
||||
bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
|
||||
/// swap is used to switch the endianness.
|
||||
bool read(tesseract::TFile* f,
|
||||
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb,
|
||||
bool swap);
|
||||
TessResultCallback2<bool, tesseract::TFile*, T*>* cb);
|
||||
|
||||
private:
|
||||
GenericVector<T> table_;
|
||||
@ -196,9 +194,8 @@ bool UnicityTable<T>::write(
|
||||
|
||||
template <typename T>
|
||||
bool UnicityTable<T>::read(
|
||||
tesseract::TFile* f,
|
||||
TessResultCallback3<bool, tesseract::TFile*, T*, bool>* cb, bool swap) {
|
||||
return table_.read(f, cb, swap);
|
||||
tesseract::TFile* f, TessResultCallback2<bool, tesseract::TFile*, T*>* cb) {
|
||||
return table_.read(f, cb);
|
||||
}
|
||||
|
||||
// This method clear the current object, then, does a shallow copy of
|
||||
|
@ -365,7 +365,7 @@ ADAPT_TEMPLATES Classify::ReadAdaptedTemplates(TFile *fp) {
|
||||
fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
|
||||
|
||||
/* then read in the basic integer templates */
|
||||
Templates->Templates = ReadIntTemplates(false, fp);
|
||||
Templates->Templates = ReadIntTemplates(fp);
|
||||
|
||||
/* then read in the adaptive info for each class */
|
||||
for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
|
||||
|
@ -535,11 +535,11 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) {
|
||||
if (language_data_path_prefix.length() > 0 && mgr != nullptr) {
|
||||
TFile fp;
|
||||
ASSERT_HOST(mgr->GetComponent(TESSDATA_INTTEMP, &fp));
|
||||
PreTrainedTemplates = ReadIntTemplates(mgr->swap(), &fp);
|
||||
PreTrainedTemplates = ReadIntTemplates(&fp);
|
||||
|
||||
if (mgr->GetComponent(TESSDATA_SHAPE_TABLE, &fp)) {
|
||||
shape_table_ = new ShapeTable(unicharset);
|
||||
if (!shape_table_->DeSerialize(mgr->swap(), &fp)) {
|
||||
if (!shape_table_->DeSerialize(&fp)) {
|
||||
tprintf("Error loading shape table!\n");
|
||||
delete shape_table_;
|
||||
shape_table_ = NULL;
|
||||
@ -547,7 +547,7 @@ void Classify::InitAdaptiveClassifier(TessdataManager* mgr) {
|
||||
}
|
||||
|
||||
ASSERT_HOST(mgr->GetComponent(TESSDATA_PFFMTABLE, &fp));
|
||||
ReadNewCutoffs(&fp, mgr->swap(), CharNormCutoffs);
|
||||
ReadNewCutoffs(&fp, CharNormCutoffs);
|
||||
|
||||
ASSERT_HOST(mgr->GetComponent(TESSDATA_NORMPROTO, &fp));
|
||||
NormProtos = ReadNormProtos(&fp);
|
||||
|
@ -103,7 +103,7 @@ class Classify : public CCStruct {
|
||||
const uinT8* normalization_factors,
|
||||
const uinT16* expected_num_features,
|
||||
GenericVector<CP_RESULT_STRUCT>* results);
|
||||
void ReadNewCutoffs(TFile* fp, bool swap, CLASS_CUTOFF_ARRAY Cutoffs);
|
||||
void ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs);
|
||||
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
|
||||
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
|
||||
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile* File);
|
||||
@ -334,7 +334,7 @@ class Classify : public CCStruct {
|
||||
uinT8* char_norm_array);
|
||||
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
|
||||
/* intproto.cpp *************************************************************/
|
||||
INT_TEMPLATES ReadIntTemplates(bool swap, TFile* fp);
|
||||
INT_TEMPLATES ReadIntTemplates(TFile* fp);
|
||||
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
|
||||
const UNICHARSET& target_unicharset);
|
||||
CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,
|
||||
|
@ -49,15 +49,14 @@ namespace tesseract {
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 20 09:38:26 1991, DSJ, Created.
|
||||
*/
|
||||
void Classify::ReadNewCutoffs(TFile* fp, bool swap,
|
||||
CLASS_CUTOFF_ARRAY Cutoffs) {
|
||||
void Classify::ReadNewCutoffs(TFile* fp, CLASS_CUTOFF_ARRAY Cutoffs) {
|
||||
char Class[UNICHAR_LEN + 1];
|
||||
CLASS_ID ClassId;
|
||||
int Cutoff;
|
||||
int i;
|
||||
|
||||
if (shape_table_ != NULL) {
|
||||
if (!shapetable_cutoffs_.DeSerialize(swap, fp)) {
|
||||
if (!shapetable_cutoffs_.DeSerialize(fp)) {
|
||||
tprintf("Error during read of shapetable pffmtable!\n");
|
||||
}
|
||||
}
|
||||
|
@ -758,7 +758,7 @@ namespace tesseract {
|
||||
* @note Exceptions: none
|
||||
* @note History: Wed Feb 27 11:48:46 1991, DSJ, Created.
|
||||
*/
|
||||
INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
INT_TEMPLATES Classify::ReadIntTemplates(TFile *fp) {
|
||||
int i, j, w, x, y, z;
|
||||
int unicharset_size;
|
||||
int version_id = 0;
|
||||
@ -784,18 +784,18 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
/* first read the high level template struct */
|
||||
Templates = NewIntTemplates();
|
||||
// Read Templates in parts for 64 bit compatibility.
|
||||
if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) != 1)
|
||||
if (fp->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses), 1,
|
||||
swap) != 1 ||
|
||||
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
|
||||
1) != 1 ||
|
||||
fp->FReadEndian(&Templates->NumClassPruners,
|
||||
sizeof(Templates->NumClassPruners), 1, swap) != 1)
|
||||
sizeof(Templates->NumClassPruners), 1) != 1)
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
if (Templates->NumClasses < 0) {
|
||||
// This file has a version id!
|
||||
version_id = -Templates->NumClasses;
|
||||
if (fp->FReadEndian(&Templates->NumClasses, sizeof(Templates->NumClasses),
|
||||
1, swap) != 1)
|
||||
1) != 1)
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
|
||||
@ -805,12 +805,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
}
|
||||
|
||||
if (version_id < 2) {
|
||||
if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size, swap) !=
|
||||
if (fp->FReadEndian(IndexFor, sizeof(IndexFor[0]), unicharset_size) !=
|
||||
unicharset_size) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
if (fp->FReadEndian(ClassIdFor, sizeof(ClassIdFor[0]),
|
||||
Templates->NumClasses, swap) != Templates->NumClasses) {
|
||||
Templates->NumClasses) != Templates->NumClasses) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
}
|
||||
@ -820,8 +820,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
NUM_CP_BUCKETS * NUM_CP_BUCKETS * NUM_CP_BUCKETS * WERDS_PER_CP_VECTOR;
|
||||
for (i = 0; i < Templates->NumClassPruners; i++) {
|
||||
Pruner = new CLASS_PRUNER_STRUCT;
|
||||
if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets,
|
||||
swap) != kNumBuckets) {
|
||||
if (fp->FReadEndian(Pruner, sizeof(Pruner->p[0][0][0][0]), kNumBuckets) !=
|
||||
kNumBuckets) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
if (version_id < 2) {
|
||||
@ -887,8 +887,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
for (i = 0; i < Templates->NumClasses; i++) {
|
||||
/* first read in the high level struct for the class */
|
||||
Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
|
||||
if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1, swap) !=
|
||||
1 ||
|
||||
if (fp->FReadEndian(&Class->NumProtos, sizeof(Class->NumProtos), 1) != 1 ||
|
||||
fp->FRead(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1) != 1 ||
|
||||
fp->FRead(&Class->NumConfigs, sizeof(Class->NumConfigs), 1) != 1)
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
@ -902,8 +901,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
}
|
||||
int num_configs = version_id < 4 ? MaxNumConfigs : Class->NumConfigs;
|
||||
ASSERT_HOST(num_configs <= MaxNumConfigs);
|
||||
if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs,
|
||||
swap) != num_configs) {
|
||||
if (fp->FReadEndian(Class->ConfigLengths, sizeof(uinT16), num_configs) !=
|
||||
num_configs) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
if (version_id < 2) {
|
||||
@ -927,8 +926,8 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
|
||||
int num_buckets = NUM_PP_PARAMS * NUM_PP_BUCKETS * WERDS_PER_PP_VECTOR;
|
||||
if (fp->FReadEndian(&ProtoSet->ProtoPruner,
|
||||
sizeof(ProtoSet->ProtoPruner[0][0][0]), num_buckets,
|
||||
swap) != num_buckets)
|
||||
sizeof(ProtoSet->ProtoPruner[0][0][0]),
|
||||
num_buckets) != num_buckets)
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
|
||||
if (fp->FRead(&ProtoSet->Protos[x].A, sizeof(ProtoSet->Protos[x].A),
|
||||
@ -942,7 +941,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
tprintf("Bad read of inttemp!\n");
|
||||
if (fp->FReadEndian(&ProtoSet->Protos[x].Configs,
|
||||
sizeof(ProtoSet->Protos[x].Configs[0]),
|
||||
WerdsPerConfigVec, swap) != WerdsPerConfigVec)
|
||||
WerdsPerConfigVec) != WerdsPerConfigVec)
|
||||
cprintf("Bad read of inttemp!\n");
|
||||
}
|
||||
Class->ProtoSets[j] = ProtoSet;
|
||||
@ -950,7 +949,7 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
if (version_id < 4) {
|
||||
Class->font_set_id = -1;
|
||||
} else {
|
||||
fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1, swap);
|
||||
fp->FReadEndian(&Class->font_set_id, sizeof(Class->font_set_id), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -977,12 +976,12 @@ INT_TEMPLATES Classify::ReadIntTemplates(bool swap, TFile *fp) {
|
||||
}
|
||||
}
|
||||
if (version_id >= 4) {
|
||||
this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info), swap);
|
||||
this->fontinfo_table_.read(fp, NewPermanentTessCallback(read_info));
|
||||
if (version_id >= 5) {
|
||||
this->fontinfo_table_.read(
|
||||
fp, NewPermanentTessCallback(read_spacing_info), swap);
|
||||
this->fontinfo_table_.read(fp,
|
||||
NewPermanentTessCallback(read_spacing_info));
|
||||
}
|
||||
this->fontset_table_.read(fp, NewPermanentTessCallback(read_set), swap);
|
||||
this->fontset_table_.read(fp, NewPermanentTessCallback(read_set));
|
||||
}
|
||||
|
||||
// Clean up.
|
||||
|
@ -70,11 +70,10 @@ bool UnicharAndFonts::Serialize(FILE* fp) const {
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool UnicharAndFonts::DeSerialize(bool swap, TFile* fp) {
|
||||
if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1, swap) != 1)
|
||||
return false;
|
||||
if (!font_ids.DeSerialize(swap, fp)) return false;
|
||||
|
||||
bool UnicharAndFonts::DeSerialize(TFile* fp) {
|
||||
if (fp->FReadEndian(&unichar_id, sizeof(unichar_id), 1) != 1) return false;
|
||||
if (!font_ids.DeSerialize(fp)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -94,13 +93,12 @@ bool Shape::Serialize(FILE* fp) const {
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Shape::DeSerialize(bool swap, TFile* fp) {
|
||||
|
||||
bool Shape::DeSerialize(TFile* fp) {
|
||||
uinT8 sorted;
|
||||
if (fp->FRead(&sorted, sizeof(sorted), 1) != 1) return false;
|
||||
unichars_sorted_ = sorted != 0;
|
||||
if (!unichars_.DeSerializeClasses(swap, fp)) return false;
|
||||
return true;
|
||||
return unichars_.DeSerializeClasses(fp);
|
||||
}
|
||||
|
||||
// Adds a font_id for the given unichar_id. If the unichar_id is not
|
||||
@ -250,9 +248,9 @@ bool ShapeTable::Serialize(FILE* fp) const {
|
||||
return true;
|
||||
}
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool ShapeTable::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!shape_table_.DeSerialize(swap, fp)) return false;
|
||||
|
||||
bool ShapeTable::DeSerialize(TFile* fp) {
|
||||
if (!shape_table_.DeSerialize(fp)) return false;
|
||||
num_fonts_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
@ -167,8 +167,7 @@ struct UnicharAndFonts {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
|
||||
// Sort function to sort a pair of UnicharAndFonts by unichar_id.
|
||||
static int SortByUnicharId(const void* v1, const void* v2);
|
||||
@ -190,8 +189,7 @@ class Shape {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
|
||||
int destination_index() const {
|
||||
return destination_index_;
|
||||
@ -271,8 +269,7 @@ class ShapeTable {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
|
||||
// Accessors.
|
||||
int NumShapes() const {
|
||||
|
@ -311,23 +311,24 @@ void SquishedDawg::print_edge(EDGE_REF edge) const {
|
||||
bool SquishedDawg::read_squished_dawg(TFile *file) {
|
||||
if (debug_level_) tprintf("Reading squished dawg\n");
|
||||
|
||||
// Read the magic number and if it does not match kDawgMagicNumber
|
||||
// set swap to true to indicate that we need to switch endianness.
|
||||
// Read the magic number and check that it matches kDawgMagicNumber, as
|
||||
// auto-endian fixing should make sure it is always correct.
|
||||
inT16 magic;
|
||||
if (file->FRead(&magic, sizeof(inT16), 1) != 1) return false;
|
||||
bool swap = (magic != kDawgMagicNumber);
|
||||
if (file->FReadEndian(&magic, sizeof(magic), 1) != 1) return false;
|
||||
if (magic != kDawgMagicNumber) {
|
||||
tprintf("Bad magic number on dawg: %d vs %d\n", magic, kDawgMagicNumber);
|
||||
return false;
|
||||
}
|
||||
|
||||
inT32 unicharset_size;
|
||||
if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1, swap) !=
|
||||
1)
|
||||
return false;
|
||||
if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1, swap) != 1)
|
||||
if (file->FReadEndian(&unicharset_size, sizeof(unicharset_size), 1) != 1)
|
||||
return false;
|
||||
if (file->FReadEndian(&num_edges_, sizeof(num_edges_), 1) != 1) return false;
|
||||
ASSERT_HOST(num_edges_ > 0); // DAWG should not be empty
|
||||
Dawg::init(unicharset_size);
|
||||
|
||||
edges_ = new EDGE_RECORD[num_edges_];
|
||||
if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_, swap) !=
|
||||
if (file->FReadEndian(&edges_[0], sizeof(edges_[0]), num_edges_) !=
|
||||
num_edges_)
|
||||
return false;
|
||||
if (debug_level_ > 2) {
|
||||
|
@ -42,14 +42,9 @@ bool Convolve::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Convolve::DeSerialize(bool swap, TFile* fp) {
|
||||
if (fp->FRead(&half_x_, sizeof(half_x_), 1) != 1) return false;
|
||||
if (fp->FRead(&half_y_, sizeof(half_y_), 1) != 1) return false;
|
||||
if (swap) {
|
||||
ReverseN(&half_x_, sizeof(half_x_));
|
||||
ReverseN(&half_y_, sizeof(half_y_));
|
||||
}
|
||||
bool Convolve::DeSerialize(TFile* fp) {
|
||||
if (fp->FReadEndian(&half_x_, sizeof(half_x_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&half_y_, sizeof(half_y_), 1) != 1) return false;
|
||||
no_ = ni_ * (2*half_x_ + 1) * (2*half_y_ + 1);
|
||||
return true;
|
||||
}
|
||||
|
@ -47,8 +47,7 @@ class Convolve : public Network {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -94,10 +94,8 @@ bool FullyConnected::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool FullyConnected::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!weights_.DeSerialize(IsTraining(), swap, fp)) return false;
|
||||
return true;
|
||||
bool FullyConnected::DeSerialize(TFile* fp) {
|
||||
return weights_.DeSerialize(IsTraining(), fp);
|
||||
}
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
|
@ -78,8 +78,7 @@ class FullyConnected : public Network {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -48,11 +48,8 @@ bool Input::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Input::DeSerialize(bool swap, TFile* fp) {
|
||||
if (fp->FRead(&shape_, sizeof(shape_), 1) != 1) return false;
|
||||
// TODO(rays) swaps!
|
||||
return true;
|
||||
bool Input::DeSerialize(TFile* fp) {
|
||||
return fp->FReadEndian(&shape_, sizeof(shape_), 1) == 1;
|
||||
}
|
||||
|
||||
// Returns an integer reduction factor that the network applies to the
|
||||
|
@ -51,9 +51,7 @@ class Input : public Network {
|
||||
// Should be overridden by subclasses, but called by their Serialize.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Should be overridden by subclasses, but NOT called by their DeSerialize.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Returns an integer reduction factor that the network applies to the
|
||||
// time sequence. Assumes that any 2-d is already eliminated. Used for
|
||||
|
@ -173,10 +173,9 @@ bool LSTM::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool LSTM::DeSerialize(bool swap, TFile* fp) {
|
||||
if (fp->FRead(&na_, sizeof(na_), 1) != 1) return false;
|
||||
if (swap) ReverseN(&na_, sizeof(na_));
|
||||
|
||||
bool LSTM::DeSerialize(TFile* fp) {
|
||||
if (fp->FReadEndian(&na_, sizeof(na_), 1) != 1) return false;
|
||||
if (type_ == NT_LSTM_SOFTMAX) {
|
||||
nf_ = no_;
|
||||
} else if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
|
||||
@ -187,7 +186,7 @@ bool LSTM::DeSerialize(bool swap, TFile* fp) {
|
||||
is_2d_ = false;
|
||||
for (int w = 0; w < WT_COUNT; ++w) {
|
||||
if (w == GFS && !Is2D()) continue;
|
||||
if (!gate_weights_[w].DeSerialize(IsTraining(), swap, fp)) return false;
|
||||
if (!gate_weights_[w].DeSerialize(IsTraining(), fp)) return false;
|
||||
if (w == CI) {
|
||||
ns_ = gate_weights_[CI].NumOutputs();
|
||||
is_2d_ = na_ - nf_ == ni_ + 2 * ns_;
|
||||
@ -195,11 +194,10 @@ bool LSTM::DeSerialize(bool swap, TFile* fp) {
|
||||
}
|
||||
delete softmax_;
|
||||
if (type_ == NT_LSTM_SOFTMAX || type_ == NT_LSTM_SOFTMAX_ENCODED) {
|
||||
softmax_ =
|
||||
reinterpret_cast<FullyConnected*>(Network::CreateFromFile(swap, fp));
|
||||
if (softmax_ == NULL) return false;
|
||||
softmax_ = reinterpret_cast<FullyConnected*>(Network::CreateFromFile(fp));
|
||||
if (softmax_ == nullptr) return false;
|
||||
} else {
|
||||
softmax_ = NULL;
|
||||
softmax_ = nullptr;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -86,8 +86,7 @@ class LSTM : public Network {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -88,25 +88,27 @@ bool LSTMRecognizer::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) {
|
||||
bool LSTMRecognizer::DeSerialize(TFile* fp) {
|
||||
delete network_;
|
||||
network_ = Network::CreateFromFile(swap, fp);
|
||||
network_ = Network::CreateFromFile(fp);
|
||||
if (network_ == NULL) return false;
|
||||
if (!ccutil_.unicharset.load_from_file(fp, false)) return false;
|
||||
if (!network_str_.DeSerialize(swap, fp)) return false;
|
||||
if (fp->FRead(&training_flags_, sizeof(training_flags_), 1) != 1)
|
||||
if (!network_str_.DeSerialize(fp)) return false;
|
||||
if (fp->FReadEndian(&training_flags_, sizeof(training_flags_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&training_iteration_, sizeof(training_iteration_), 1) != 1)
|
||||
if (fp->FReadEndian(&training_iteration_, sizeof(training_iteration_), 1) !=
|
||||
1)
|
||||
return false;
|
||||
if (fp->FRead(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
|
||||
if (fp->FReadEndian(&sample_iteration_, sizeof(sample_iteration_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&null_char_, sizeof(null_char_), 1) != 1) return false;
|
||||
if (fp->FRead(&weight_range_, sizeof(weight_range_), 1) != 1) return false;
|
||||
if (fp->FRead(&learning_rate_, sizeof(learning_rate_), 1) != 1) return false;
|
||||
if (fp->FRead(&momentum_, sizeof(momentum_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&null_char_, sizeof(null_char_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&weight_range_, sizeof(weight_range_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FReadEndian(&learning_rate_, sizeof(learning_rate_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FReadEndian(&momentum_, sizeof(momentum_), 1) != 1) return false;
|
||||
if (IsRecoding()) {
|
||||
if (!recoder_.DeSerialize(swap, fp)) return false;
|
||||
if (!recoder_.DeSerialize(fp)) return false;
|
||||
RecodedCharID code;
|
||||
recoder_.EncodeUnichar(UNICHAR_SPACE, &code);
|
||||
if (code(0) != UNICHAR_SPACE) {
|
||||
@ -114,7 +116,6 @@ bool LSTMRecognizer::DeSerialize(bool swap, TFile* fp) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// TODO(rays) swaps!
|
||||
network_->SetRandomizer(&randomizer_);
|
||||
network_->CacheXScaleFactor(network_->XScaleFactor());
|
||||
return true;
|
||||
|
@ -158,8 +158,7 @@ class LSTMRecognizer {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool swap, TFile* fp);
|
||||
bool DeSerialize(TFile* fp);
|
||||
// Loads the dictionary if possible from the traineddata file.
|
||||
// Prints a warning message, and returns false but otherwise fails silently
|
||||
// and continues to work without it if loading fails.
|
||||
|
@ -304,8 +304,7 @@ void LSTMTrainer::DebugNetwork() {
|
||||
// loaded.
|
||||
bool LSTMTrainer::LoadAllTrainingData(const GenericVector<STRING>& filenames) {
|
||||
training_data_.Clear();
|
||||
return training_data_.LoadDocuments(filenames, "eng", CacheStrategy(),
|
||||
file_reader_);
|
||||
return training_data_.LoadDocuments(filenames, CacheStrategy(), file_reader_);
|
||||
}
|
||||
|
||||
// Keeps track of best and locally worst char error_rate and launches tests
|
||||
@ -480,54 +479,54 @@ bool LSTMTrainer::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool LSTMTrainer::DeSerialize(bool swap, TFile* fp) {
|
||||
if (!LSTMRecognizer::DeSerialize(swap, fp)) return false;
|
||||
// NOTE: It is assumed that the trainer is never read cross-endian.
|
||||
bool LSTMTrainer::DeSerialize(TFile* fp) {
|
||||
if (!LSTMRecognizer::DeSerialize(fp)) return false;
|
||||
if (fp->FRead(&learning_iteration_, sizeof(learning_iteration_), 1) != 1) {
|
||||
// Special case. If we successfully decoded the recognizer, but fail here
|
||||
// then it means we were just given a recognizer, so issue a warning and
|
||||
// allow it.
|
||||
tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n");
|
||||
learning_iteration_ = 0;
|
||||
network_->SetEnableTraining(TS_RE_ENABLE);
|
||||
network_->SetEnableTraining(TS_ENABLED);
|
||||
return true;
|
||||
}
|
||||
if (fp->FRead(&prev_sample_iteration_, sizeof(prev_sample_iteration_), 1) !=
|
||||
1)
|
||||
if (fp->FReadEndian(&prev_sample_iteration_, sizeof(prev_sample_iteration_),
|
||||
1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&perfect_delay_, sizeof(perfect_delay_), 1) != 1) return false;
|
||||
if (fp->FRead(&last_perfect_training_iteration_,
|
||||
sizeof(last_perfect_training_iteration_), 1) != 1)
|
||||
if (fp->FReadEndian(&perfect_delay_, sizeof(perfect_delay_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FReadEndian(&last_perfect_training_iteration_,
|
||||
sizeof(last_perfect_training_iteration_), 1) != 1)
|
||||
return false;
|
||||
for (int i = 0; i < ET_COUNT; ++i) {
|
||||
if (!error_buffers_[i].DeSerialize(swap, fp)) return false;
|
||||
if (!error_buffers_[i].DeSerialize(fp)) return false;
|
||||
}
|
||||
if (fp->FRead(&error_rates_, sizeof(error_rates_), 1) != 1) return false;
|
||||
if (fp->FRead(&training_stage_, sizeof(training_stage_), 1) != 1)
|
||||
if (fp->FReadEndian(&training_stage_, sizeof(training_stage_), 1) != 1)
|
||||
return false;
|
||||
uinT8 amount;
|
||||
if (fp->FRead(&amount, sizeof(amount), 1) != 1) return false;
|
||||
if (amount == LIGHT) return true; // Don't read the rest.
|
||||
if (fp->FRead(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
|
||||
if (fp->FReadEndian(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&best_error_rates_, sizeof(best_error_rates_), 1) != 1)
|
||||
if (fp->FReadEndian(&best_error_rates_, sizeof(best_error_rates_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&best_iteration_, sizeof(best_iteration_), 1) != 1)
|
||||
if (fp->FReadEndian(&best_iteration_, sizeof(best_iteration_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1)
|
||||
if (fp->FReadEndian(&worst_error_rate_, sizeof(worst_error_rate_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1)
|
||||
if (fp->FReadEndian(&worst_error_rates_, sizeof(worst_error_rates_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&worst_iteration_, sizeof(worst_iteration_), 1) != 1)
|
||||
if (fp->FReadEndian(&worst_iteration_, sizeof(worst_iteration_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FRead(&stall_iteration_, sizeof(stall_iteration_), 1) != 1)
|
||||
return false;
|
||||
if (!best_model_data_.DeSerialize(swap, fp)) return false;
|
||||
if (!worst_model_data_.DeSerialize(swap, fp)) return false;
|
||||
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(swap, fp))
|
||||
if (fp->FReadEndian(&stall_iteration_, sizeof(stall_iteration_), 1) != 1)
|
||||
return false;
|
||||
if (!best_model_data_.DeSerialize(fp)) return false;
|
||||
if (!worst_model_data_.DeSerialize(fp)) return false;
|
||||
if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false;
|
||||
GenericVector<char> sub_data;
|
||||
if (!sub_data.DeSerialize(swap, fp)) return false;
|
||||
if (!sub_data.DeSerialize(fp)) return false;
|
||||
delete sub_trainer_;
|
||||
if (sub_data.empty()) {
|
||||
sub_trainer_ = NULL;
|
||||
@ -535,9 +534,9 @@ bool LSTMTrainer::DeSerialize(bool swap, TFile* fp) {
|
||||
sub_trainer_ = new LSTMTrainer();
|
||||
if (!ReadTrainingDump(sub_data, sub_trainer_)) return false;
|
||||
}
|
||||
if (!best_error_history_.DeSerialize(swap, fp)) return false;
|
||||
if (!best_error_iterations_.DeSerialize(swap, fp)) return false;
|
||||
if (fp->FRead(&improvement_steps_, sizeof(improvement_steps_), 1) != 1)
|
||||
if (!best_error_history_.DeSerialize(fp)) return false;
|
||||
if (!best_error_iterations_.DeSerialize(fp)) return false;
|
||||
if (fp->FReadEndian(&improvement_steps_, sizeof(improvement_steps_), 1) != 1)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -925,7 +924,7 @@ bool LSTMTrainer::ReadTrainingDump(const GenericVector<char>& data,
|
||||
bool LSTMTrainer::ReadSizedTrainingDump(const char* data, int size) {
|
||||
TFile fp;
|
||||
fp.Open(data, size);
|
||||
return DeSerialize(false, &fp);
|
||||
return DeSerialize(&fp);
|
||||
}
|
||||
|
||||
// Writes the recognizer to memory, so that it can be used for testing later.
|
||||
@ -943,7 +942,7 @@ LSTMRecognizer* LSTMTrainer::ReadRecognitionDump(
|
||||
TFile fp;
|
||||
fp.Open(&data[0], data.size());
|
||||
LSTMRecognizer* recognizer = new LSTMRecognizer;
|
||||
ASSERT_HOST(recognizer->DeSerialize(false, &fp));
|
||||
ASSERT_HOST(recognizer->DeSerialize(&fp));
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
|
@ -215,8 +215,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// De-serializes the saved best_trainer_ into sub_trainer_, and adjusts the
|
||||
// learning rates (by scaling reduction, or layer specific, according to
|
||||
|
@ -31,9 +31,8 @@ Maxpool::~Maxpool() {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Maxpool::DeSerialize(bool swap, TFile* fp) {
|
||||
bool result = Reconfig::DeSerialize(swap, fp);
|
||||
bool Maxpool::DeSerialize(TFile* fp) {
|
||||
bool result = Reconfig::DeSerialize(fp);
|
||||
no_ = ni_;
|
||||
return result;
|
||||
}
|
||||
|
@ -40,8 +40,7 @@ class Maxpool : public Reconfig {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -164,14 +164,13 @@ bool Network::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Should be overridden by subclasses, but NOT called by their DeSerialize.
|
||||
bool Network::DeSerialize(bool swap, TFile* fp) {
|
||||
bool Network::DeSerialize(TFile* fp) {
|
||||
inT8 data = 0;
|
||||
if (fp->FRead(&data, sizeof(data), 1) != 1) return false;
|
||||
if (data == NT_NONE) {
|
||||
STRING type_name;
|
||||
if (!type_name.DeSerialize(swap, fp)) return false;
|
||||
if (!type_name.DeSerialize(fp)) return false;
|
||||
for (data = 0; data < NT_COUNT && type_name != kTypeNames[data]; ++data) {
|
||||
}
|
||||
if (data == NT_COUNT) {
|
||||
@ -184,27 +183,22 @@ bool Network::DeSerialize(bool swap, TFile* fp) {
|
||||
training_ = data == TS_ENABLED ? TS_ENABLED : TS_DISABLED;
|
||||
if (fp->FRead(&data, sizeof(data), 1) != 1) return false;
|
||||
needs_to_backprop_ = data != 0;
|
||||
if (fp->FRead(&network_flags_, sizeof(network_flags_), 1) != 1) return false;
|
||||
if (fp->FRead(&ni_, sizeof(ni_), 1) != 1) return false;
|
||||
if (fp->FRead(&no_, sizeof(no_), 1) != 1) return false;
|
||||
if (fp->FRead(&num_weights_, sizeof(num_weights_), 1) != 1) return false;
|
||||
if (!name_.DeSerialize(swap, fp)) return false;
|
||||
if (swap) {
|
||||
ReverseN(&network_flags_, sizeof(network_flags_));
|
||||
ReverseN(&ni_, sizeof(ni_));
|
||||
ReverseN(&no_, sizeof(no_));
|
||||
ReverseN(&num_weights_, sizeof(num_weights_));
|
||||
}
|
||||
if (fp->FReadEndian(&network_flags_, sizeof(network_flags_), 1) != 1)
|
||||
return false;
|
||||
if (fp->FReadEndian(&ni_, sizeof(ni_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&no_, sizeof(no_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&num_weights_, sizeof(num_weights_), 1) != 1)
|
||||
return false;
|
||||
if (!name_.DeSerialize(fp)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns NULL in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Determines the type of the serialized class and calls its DeSerialize
|
||||
// on a new object of the appropriate type, which is returned.
|
||||
Network* Network::CreateFromFile(bool swap, TFile* fp) {
|
||||
Network* Network::CreateFromFile(TFile* fp) {
|
||||
Network stub;
|
||||
if (!stub.DeSerialize(swap, fp)) return NULL;
|
||||
if (!stub.DeSerialize(fp)) return NULL;
|
||||
Network* network = NULL;
|
||||
switch (stub.type_) {
|
||||
case NT_CONVOLVE:
|
||||
@ -269,7 +263,7 @@ Network* Network::CreateFromFile(bool swap, TFile* fp) {
|
||||
network->needs_to_backprop_ = stub.needs_to_backprop_;
|
||||
network->network_flags_ = stub.network_flags_;
|
||||
network->num_weights_ = stub.num_weights_;
|
||||
if (!network->DeSerialize(swap, fp)) {
|
||||
if (!network->DeSerialize(fp)) {
|
||||
delete network;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -208,9 +208,8 @@ class Network {
|
||||
// Should be overridden by subclasses, but called by their Serialize.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Should be overridden by subclasses, but NOT called by their DeSerialize.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Updates the weights using the given learning rate and momentum.
|
||||
// num_samples is the quotient to be used in the adagrad computation iff
|
||||
@ -223,10 +222,9 @@ class Network {
|
||||
double* changed) const {}
|
||||
|
||||
// Reads from the given file. Returns NULL in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Determines the type of the serialized class and calls its DeSerialize
|
||||
// on a new object of the appropriate type, which is returned.
|
||||
static Network* CreateFromFile(bool swap, TFile* fp);
|
||||
static Network* CreateFromFile(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// Note that input and output are both 2-d arrays.
|
||||
|
@ -187,19 +187,18 @@ bool Plumbing::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Plumbing::DeSerialize(bool swap, TFile* fp) {
|
||||
bool Plumbing::DeSerialize(TFile* fp) {
|
||||
stack_.truncate(0);
|
||||
no_ = 0; // We will be modifying this as we AddToStack.
|
||||
inT32 size;
|
||||
if (fp->FRead(&size, sizeof(size), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&size, sizeof(size), 1) != 1) return false;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
Network* network = CreateFromFile(swap, fp);
|
||||
Network* network = CreateFromFile(fp);
|
||||
if (network == NULL) return false;
|
||||
AddToStack(network);
|
||||
}
|
||||
if ((network_flags_ & NF_LAYER_SPECIFIC_LR) &&
|
||||
!learning_rates_.DeSerialize(swap, fp)) {
|
||||
!learning_rates_.DeSerialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -116,8 +116,7 @@ class Plumbing : public Network {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Updates the weights using the given learning rate and momentum.
|
||||
// num_samples is the quotient to be used in the adagrad computation iff
|
||||
|
@ -59,14 +59,9 @@ bool Reconfig::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool Reconfig::DeSerialize(bool swap, TFile* fp) {
|
||||
if (fp->FRead(&x_scale_, sizeof(x_scale_), 1) != 1) return false;
|
||||
if (fp->FRead(&y_scale_, sizeof(y_scale_), 1) != 1) return false;
|
||||
if (swap) {
|
||||
ReverseN(&x_scale_, sizeof(x_scale_));
|
||||
ReverseN(&y_scale_, sizeof(y_scale_));
|
||||
}
|
||||
bool Reconfig::DeSerialize(TFile* fp) {
|
||||
if (fp->FReadEndian(&x_scale_, sizeof(x_scale_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&y_scale_, sizeof(y_scale_), 1) != 1) return false;
|
||||
no_ = ni_ * x_scale_ * y_scale_;
|
||||
return true;
|
||||
}
|
||||
|
@ -57,8 +57,7 @@ class Reconfig : public Network {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -53,11 +53,10 @@ bool TFNetwork::Serialize(TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Should be overridden by subclasses, but NOT called by their DeSerialize.
|
||||
bool TFNetwork::DeSerialize(bool swap, TFile* fp) {
|
||||
bool TFNetwork::DeSerialize(TFile* fp) {
|
||||
GenericVector<char> data;
|
||||
if (!data.DeSerialize(swap, fp)) return false;
|
||||
if (!data.DeSerialize(fp)) return false;
|
||||
if (!model_proto_.ParseFromArray(&data[0], data.size())) {
|
||||
return false;
|
||||
}
|
||||
|
@ -59,9 +59,8 @@ class TFNetwork : public Network {
|
||||
// Should be overridden by subclasses, but called by their Serialize.
|
||||
virtual bool Serialize(TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
// Should be overridden by subclasses, but NOT called by their DeSerialize.
|
||||
virtual bool DeSerialize(bool swap, TFile* fp);
|
||||
virtual bool DeSerialize(TFile* fp);
|
||||
|
||||
// Runs forward propagation of activations on the input line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
|
@ -121,22 +121,22 @@ bool WeightMatrix::Serialize(bool training, TFile* fp) const {
|
||||
}
|
||||
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) {
|
||||
|
||||
bool WeightMatrix::DeSerialize(bool training, TFile* fp) {
|
||||
uinT8 mode = 0;
|
||||
if (fp->FRead(&mode, sizeof(mode), 1) != 1) return false;
|
||||
int_mode_ = (mode & kInt8Flag) != 0;
|
||||
use_ada_grad_ = (mode & kAdaGradFlag) != 0;
|
||||
if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, swap, fp);
|
||||
if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, fp);
|
||||
if (int_mode_) {
|
||||
if (!wi_.DeSerialize(swap, fp)) return false;
|
||||
if (!scales_.DeSerialize(swap, fp)) return false;
|
||||
if (!wi_.DeSerialize(fp)) return false;
|
||||
if (!scales_.DeSerialize(fp)) return false;
|
||||
} else {
|
||||
if (!wf_.DeSerialize(swap, fp)) return false;
|
||||
if (!wf_.DeSerialize(fp)) return false;
|
||||
if (training) {
|
||||
InitBackward(use_ada_grad_);
|
||||
if (!updates_.DeSerialize(swap, fp)) return false;
|
||||
if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(swap, fp)) return false;
|
||||
if (!updates_.DeSerialize(fp)) return false;
|
||||
if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(fp)) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -144,24 +144,24 @@ bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) {
|
||||
|
||||
// As DeSerialize, but reads an old (float) format WeightMatrix for
|
||||
// backward compatibility.
|
||||
bool WeightMatrix::DeSerializeOld(bool training, bool swap, TFile* fp) {
|
||||
bool WeightMatrix::DeSerializeOld(bool training, TFile* fp) {
|
||||
GENERIC_2D_ARRAY<float> float_array;
|
||||
if (int_mode_) {
|
||||
if (!wi_.DeSerialize(swap, fp)) return false;
|
||||
if (!wi_.DeSerialize(fp)) return false;
|
||||
GenericVector<float> old_scales;
|
||||
if (!old_scales.DeSerialize(swap, fp)) return false;
|
||||
if (!old_scales.DeSerialize(fp)) return false;
|
||||
scales_.init_to_size(old_scales.size(), 0.0);
|
||||
for (int i = 0; i < old_scales.size(); ++i) scales_[i] = old_scales[i];
|
||||
} else {
|
||||
if (!float_array.DeSerialize(swap, fp)) return false;
|
||||
if (!float_array.DeSerialize(fp)) return false;
|
||||
FloatToDouble(float_array, &wf_);
|
||||
}
|
||||
if (training) {
|
||||
InitBackward(use_ada_grad_);
|
||||
if (!float_array.DeSerialize(swap, fp)) return false;
|
||||
if (!float_array.DeSerialize(fp)) return false;
|
||||
FloatToDouble(float_array, &updates_);
|
||||
// Errs was only used in int training, which is now dead.
|
||||
if (!float_array.DeSerialize(swap, fp)) return false;
|
||||
if (!float_array.DeSerialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -97,11 +97,10 @@ class WeightMatrix {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(bool training, TFile* fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool DeSerialize(bool training, bool swap, TFile* fp);
|
||||
bool DeSerialize(bool training, TFile* fp);
|
||||
// As DeSerialize, but reads an old (float) format WeightMatrix for
|
||||
// backward compatibility.
|
||||
bool DeSerializeOld(bool training, bool swap, TFile* fp);
|
||||
bool DeSerializeOld(bool training, TFile* fp);
|
||||
|
||||
// Computes matrix.vector v = Wu.
|
||||
// u is of size W.dim2() - 1 and the output v is of size W.dim1().
|
||||
|
@ -119,7 +119,7 @@ ShapeTable* LoadShapeTable(const STRING& file_prefix) {
|
||||
TFile shape_fp;
|
||||
if (shape_fp.Open(shape_table_file.string(), nullptr)) {
|
||||
shape_table = new ShapeTable;
|
||||
if (!shape_table->DeSerialize(false, &shape_fp)) {
|
||||
if (!shape_table->DeSerialize(&shape_fp)) {
|
||||
delete shape_table;
|
||||
shape_table = nullptr;
|
||||
tprintf("Error: Failed to read shape table %s\n",
|
||||
|
@ -42,8 +42,7 @@ bool LSTMTester::LoadAllEvalData(const STRING& filenames_file) {
|
||||
// loaded.
|
||||
bool LSTMTester::LoadAllEvalData(const GenericVector<STRING>& filenames) {
|
||||
test_data_.Clear();
|
||||
bool result =
|
||||
test_data_.LoadDocuments(filenames, "eng", CS_SEQUENTIAL, nullptr);
|
||||
bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
|
||||
total_pages_ = test_data_.TotalPages();
|
||||
return result;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user