mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 22:43:45 +08:00
Use POSIX data types for external interfaces (#1358)
Replace the Tesseract specific data types in header files which are part of Debian package libtesseract-dev by POSIX data types. Update also matching cpp files. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
c6afad03b2
commit
47a326b02d
@ -346,8 +346,8 @@ class GenericVector {
|
||||
// vector are small enough that for efficiency it makes sense
|
||||
// to start with a larger initial size.
|
||||
static const int kDefaultVectorSize = 4;
|
||||
inT32 size_used_;
|
||||
inT32 size_reserved_;
|
||||
int32_t size_used_;
|
||||
int32_t size_reserved_;
|
||||
T* data_;
|
||||
TessCallback1<T>* clear_cb_;
|
||||
// Mutable because Run method is not const
|
||||
@ -536,20 +536,20 @@ class PointerVector : public GenericVector<T*> {
|
||||
// normal GenericVector of those.
|
||||
// Returns false in case of error.
|
||||
bool Serialize(FILE* fp) const {
|
||||
inT32 used = GenericVector<T*>::size_used_;
|
||||
int32_t used = GenericVector<T*>::size_used_;
|
||||
if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
|
||||
for (int i = 0; i < used; ++i) {
|
||||
inT8 non_null = GenericVector<T*>::data_[i] != NULL;
|
||||
int8_t non_null = GenericVector<T*>::data_[i] != NULL;
|
||||
if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
|
||||
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool Serialize(TFile* fp) const {
|
||||
inT32 used = GenericVector<T*>::size_used_;
|
||||
int32_t used = GenericVector<T*>::size_used_;
|
||||
if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
|
||||
for (int i = 0; i < used; ++i) {
|
||||
inT8 non_null = GenericVector<T*>::data_[i] != NULL;
|
||||
int8_t non_null = GenericVector<T*>::data_[i] != NULL;
|
||||
if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
|
||||
}
|
||||
@ -563,13 +563,13 @@ class PointerVector : public GenericVector<T*> {
|
||||
// Also needs T::T(), as new T is used in this function.
|
||||
// Returns false in case of error.
|
||||
bool DeSerialize(bool swap, FILE* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
GenericVector<T*>::reserve(reserved);
|
||||
truncate(0);
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
inT8 non_null;
|
||||
int8_t non_null;
|
||||
if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
|
||||
T* item = NULL;
|
||||
if (non_null) {
|
||||
@ -587,7 +587,7 @@ class PointerVector : public GenericVector<T*> {
|
||||
return true;
|
||||
}
|
||||
bool DeSerialize(TFile* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (!DeSerializeSize(fp, &reserved)) return false;
|
||||
GenericVector<T*>::reserve(reserved);
|
||||
truncate(0);
|
||||
@ -600,12 +600,12 @@ class PointerVector : public GenericVector<T*> {
|
||||
// retain the integrity of the stream, the caller must call some combination
|
||||
// of DeSerializeElement and DeSerializeSkip of the exact number returned in
|
||||
// *size, assuming a true return.
|
||||
static bool DeSerializeSize(TFile* fp, inT32* size) {
|
||||
static bool DeSerializeSize(TFile* fp, int32_t* size) {
|
||||
return fp->FReadEndian(size, sizeof(*size), 1) == 1;
|
||||
}
|
||||
// Reads and appends to the vector the next element of the serialization.
|
||||
bool DeSerializeElement(TFile* fp) {
|
||||
inT8 non_null;
|
||||
int8_t non_null;
|
||||
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
T* item = NULL;
|
||||
if (non_null) {
|
||||
@ -623,7 +623,7 @@ class PointerVector : public GenericVector<T*> {
|
||||
}
|
||||
// Skips the next element of the serialization.
|
||||
static bool DeSerializeSkip(TFile* fp) {
|
||||
inT8 non_null;
|
||||
int8_t non_null;
|
||||
if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
|
||||
if (non_null) {
|
||||
if (!T::SkipDeSerialize(fp)) return false;
|
||||
@ -906,7 +906,7 @@ bool GenericVector<T>::write(
|
||||
template <typename T>
|
||||
bool GenericVector<T>::read(
|
||||
tesseract::TFile* f, TessResultCallback2<bool, tesseract::TFile*, T*>* cb) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
reserve(reserved);
|
||||
if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false;
|
||||
@ -947,7 +947,7 @@ bool GenericVector<T>::Serialize(tesseract::TFile* fp) const {
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
reserve(reserved);
|
||||
@ -961,7 +961,7 @@ bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
reserve(reserved);
|
||||
size_used_ = reserved;
|
||||
@ -969,7 +969,7 @@ bool GenericVector<T>::DeSerialize(tesseract::TFile* fp) {
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SkipDeSerialize(tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
return fp->FRead(NULL, sizeof(T), reserved) == reserved;
|
||||
}
|
||||
@ -1001,7 +1001,7 @@ bool GenericVector<T>::SerializeClasses(tesseract::TFile* fp) const {
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
|
||||
if (swap) Reverse32(&reserved);
|
||||
T empty;
|
||||
@ -1013,7 +1013,7 @@ bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::DeSerializeClasses(tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
T empty;
|
||||
init_to_size(reserved, empty);
|
||||
@ -1024,7 +1024,7 @@ bool GenericVector<T>::DeSerializeClasses(tesseract::TFile* fp) {
|
||||
}
|
||||
template <typename T>
|
||||
bool GenericVector<T>::SkipDeSerializeClasses(tesseract::TFile* fp) {
|
||||
inT32 reserved;
|
||||
int32_t reserved;
|
||||
if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
|
||||
for (int i = 0; i < reserved; ++i) {
|
||||
if (!T::SkipDeSerialize(fp)) return false;
|
||||
|
@ -42,17 +42,17 @@ class TRand {
|
||||
public:
|
||||
TRand() : seed_(1) {}
|
||||
// Sets the seed to the given value.
|
||||
void set_seed(uinT64 seed) {
|
||||
void set_seed(uint64_t seed) {
|
||||
seed_ = seed;
|
||||
}
|
||||
// Sets the seed using a hash of a string.
|
||||
void set_seed(const std::string& str) {
|
||||
std::hash<std::string> hasher;
|
||||
set_seed(static_cast<uinT64>(hasher(str)));
|
||||
set_seed(static_cast<uint64_t>(hasher(str)));
|
||||
}
|
||||
|
||||
// Returns an integer in the range 0 to MAX_INT32.
|
||||
inT32 IntRand() {
|
||||
int32_t IntRand() {
|
||||
Iterate();
|
||||
return seed_ >> 33;
|
||||
}
|
||||
@ -73,7 +73,7 @@ class TRand {
|
||||
}
|
||||
|
||||
// The current value of the seed.
|
||||
uinT64 seed_;
|
||||
uint64_t seed_;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -65,9 +65,9 @@ typedef unsigned char BOOL8;
|
||||
#define MAX_UINT32 0xffffffff
|
||||
#define MAX_FLOAT32 std::numeric_limits<float>::max()
|
||||
|
||||
#define MIN_INT8 static_cast<inT8>(0x80)
|
||||
#define MIN_INT16 static_cast<inT16>(0x8000)
|
||||
#define MIN_INT32 static_cast<inT32>(0x80000000)
|
||||
#define MIN_INT8 static_cast<int8_t>(0x80)
|
||||
#define MIN_INT16 static_cast<int16_t>(0x8000)
|
||||
#define MIN_INT32 static_cast<int32_t>(0x80000000)
|
||||
#define MIN_UINT8 0x00
|
||||
#define MIN_UINT16 0x0000
|
||||
#define MIN_UINT32 0x00000000
|
||||
|
@ -27,7 +27,7 @@
|
||||
// TODO(rays) further cleanup by redirecting calls to new and creating proper
|
||||
// constructors.
|
||||
|
||||
char *alloc_string(inT32 count) {
|
||||
char *alloc_string(int32_t count) {
|
||||
// Round up the amount allocated to a multiple of 4
|
||||
return static_cast<char*>(malloc((count + 3) & ~3));
|
||||
}
|
||||
@ -36,7 +36,7 @@ void free_string(char *string) {
|
||||
free(string);
|
||||
}
|
||||
|
||||
void *alloc_mem(inT32 count) {
|
||||
void *alloc_mem(int32_t count) {
|
||||
return malloc(static_cast<size_t>(count));
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,11 @@
|
||||
#include "host.h"
|
||||
|
||||
// allocate string
|
||||
extern char *alloc_string(inT32 count);
|
||||
extern char *alloc_string(int32_t count);
|
||||
// free a string.
|
||||
extern void free_string(char *string);
|
||||
// get some memory
|
||||
extern void *alloc_mem(inT32 count);
|
||||
extern void *alloc_mem(int32_t count);
|
||||
// free mem from alloc_mem
|
||||
extern void free_mem(void *oldchunk);
|
||||
|
||||
|
@ -80,16 +80,16 @@ typedef struct { /*single character */
|
||||
// characters sets will need to handle extended characters appropriately, but
|
||||
// *all* code needs to be prepared to receive UTF8 coded characters for
|
||||
// characters such as bullet and fancy quotes.
|
||||
uinT16 char_code; /*character itself */
|
||||
inT16 left; /*of char (-1) */
|
||||
inT16 right; /*of char (-1) */
|
||||
inT16 top; /*of char (-1) */
|
||||
inT16 bottom; /*of char (-1) */
|
||||
inT16 font_index; /*what font (0) */
|
||||
uinT8 confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uinT8 point_size; /*of char, 72=i inch, (10) */
|
||||
inT8 blanks; /*no of spaces before this char (1) */
|
||||
uinT8 formatting; /*char formatting (0) */
|
||||
uint16_t char_code; /*character itself */
|
||||
int16_t left; /*of char (-1) */
|
||||
int16_t right; /*of char (-1) */
|
||||
int16_t top; /*of char (-1) */
|
||||
int16_t bottom; /*of char (-1) */
|
||||
int16_t font_index; /*what font (0) */
|
||||
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
|
||||
uint8_t point_size; /*of char, 72=i inch, (10) */
|
||||
int8_t blanks; /*no of spaces before this char (1) */
|
||||
uint8_t formatting; /*char formatting (0) */
|
||||
} EANYCODE_CHAR; /*single character */
|
||||
|
||||
/**********************************************************************
|
||||
@ -114,14 +114,14 @@ typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top,
|
||||
|
||||
class ETEXT_DESC { // output header
|
||||
public:
|
||||
inT16 count; /// chars in this buffer(0)
|
||||
inT16 progress; /// percent complete increasing (0-100)
|
||||
int16_t count; /// chars in this buffer(0)
|
||||
int16_t progress; /// percent complete increasing (0-100)
|
||||
/** Progress monitor covers word recognition and it does not cover layout
|
||||
* analysis.
|
||||
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
|
||||
inT8 more_to_come; /// true if not last
|
||||
volatile inT8 ocr_alive; /// ocr sets to 1, HP 0
|
||||
inT8 err_code; /// for errcode use
|
||||
int8_t more_to_come; /// true if not last
|
||||
volatile int8_t ocr_alive; /// ocr sets to 1, HP 0
|
||||
int8_t err_code; /// for errcode use
|
||||
CANCEL_FUNC cancel; /// returns true to cancel
|
||||
PROGRESS_FUNC progress_callback; /// called whenever progress increases
|
||||
void* cancel_this; /// this or other data for cancel
|
||||
@ -143,9 +143,9 @@ class ETEXT_DESC { // output header
|
||||
}
|
||||
|
||||
// Sets the end time to be deadline_msecs milliseconds from now.
|
||||
void set_deadline_msecs(inT32 deadline_msecs) {
|
||||
void set_deadline_msecs(int32_t deadline_msecs) {
|
||||
gettimeofday(&end_time, NULL);
|
||||
inT32 deadline_secs = deadline_msecs / 1000;
|
||||
int32_t deadline_secs = deadline_msecs / 1000;
|
||||
end_time.tv_sec += deadline_secs;
|
||||
end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000;
|
||||
if (end_time.tv_usec > 1000000) {
|
||||
|
@ -147,7 +147,7 @@ bool ParamUtils::GetParamAsString(const char *name,
|
||||
member_params->int_params);
|
||||
if (ip) {
|
||||
char buf[128];
|
||||
snprintf(buf, sizeof(buf), "%d", inT32(*ip));
|
||||
snprintf(buf, sizeof(buf), "%d", int32_t(*ip));
|
||||
*value = buf;
|
||||
return true;
|
||||
}
|
||||
@ -177,7 +177,7 @@ void ParamUtils::PrintParams(FILE *fp, const ParamsVectors *member_params) {
|
||||
const ParamsVectors *vec = (v == 0) ? GlobalParams() : member_params;
|
||||
for (i = 0; i < vec->int_params.size(); ++i) {
|
||||
fprintf(fp, "%s\t%d\t%s\n", vec->int_params[i]->name_str(),
|
||||
(inT32)(*vec->int_params[i]), vec->int_params[i]->info_str());
|
||||
(int32_t)(*vec->int_params[i]), vec->int_params[i]->info_str());
|
||||
}
|
||||
for (i = 0; i < vec->bool_params.size(); ++i) {
|
||||
fprintf(fp, "%s\t%d\t%s\n", vec->bool_params[i]->name_str(),
|
||||
|
@ -141,7 +141,7 @@ class Param {
|
||||
|
||||
class IntParam : public Param {
|
||||
public:
|
||||
IntParam(inT32 value, const char *name, const char *comment, bool init,
|
||||
IntParam(int32_t value, const char *name, const char *comment, bool init,
|
||||
ParamsVectors *vec) : Param(name, comment, init) {
|
||||
value_ = value;
|
||||
default_ = value;
|
||||
@ -149,16 +149,16 @@ class IntParam : public Param {
|
||||
vec->int_params.push_back(this);
|
||||
}
|
||||
~IntParam() { ParamUtils::RemoveParam<IntParam>(this, params_vec_); }
|
||||
operator inT32() const { return value_; }
|
||||
void operator=(inT32 value) { value_ = value; }
|
||||
void set_value(inT32 value) { value_ = value; }
|
||||
operator int32_t() const { return value_; }
|
||||
void operator=(int32_t value) { value_ = value; }
|
||||
void set_value(int32_t value) { value_ = value; }
|
||||
void ResetToDefault() {
|
||||
value_ = default_;
|
||||
}
|
||||
|
||||
private:
|
||||
inT32 value_;
|
||||
inT32 default_;
|
||||
int32_t value_;
|
||||
int32_t default_;
|
||||
// Pointer to the vector that contains this param (not owened by this class).
|
||||
GenericVector<IntParam *> *params_vec_;
|
||||
};
|
||||
|
@ -62,9 +62,9 @@ bool TFile::Open(const char* data, int size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TFile::Open(FILE* fp, inT64 end_offset) {
|
||||
bool TFile::Open(FILE* fp, int64_t end_offset) {
|
||||
offset_ = 0;
|
||||
inT64 current_pos = ftell(fp);
|
||||
int64_t current_pos = ftell(fp);
|
||||
if (end_offset < 0) {
|
||||
if (fseek(fp, 0, SEEK_END))
|
||||
return false;
|
||||
|
@ -60,7 +60,7 @@ class TFile {
|
||||
// From an existing memory buffer.
|
||||
bool Open(const char* data, int size);
|
||||
// From an open file and an end offset.
|
||||
bool Open(FILE* fp, inT64 end_offset);
|
||||
bool Open(FILE* fp, int64_t end_offset);
|
||||
// Sets the value of the swap flag, so that FReadEndian does the right thing.
|
||||
void set_swap(bool value) { swap_ = value; }
|
||||
|
||||
|
@ -67,7 +67,7 @@ void STRING::DiscardData() {
|
||||
|
||||
// This is a private method; ensure FixHeader is called (or used_ is well defined)
|
||||
// beforehand
|
||||
char* STRING::ensure_cstr(inT32 min_capacity) {
|
||||
char* STRING::ensure_cstr(int32_t min_capacity) {
|
||||
STRING_HEADER* orig_header = GetHeader();
|
||||
if (min_capacity <= orig_header->capacity_)
|
||||
return ((char *)this->data_) + sizeof(STRING_HEADER);
|
||||
@ -146,14 +146,14 @@ STRING::~STRING() {
|
||||
// TODO(rays) Change all callers to use TFile and remove the old functions.
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool STRING::Serialize(FILE* fp) const {
|
||||
inT32 len = length();
|
||||
int32_t len = length();
|
||||
if (fwrite(&len, sizeof(len), 1, fp) != 1) return false;
|
||||
if (static_cast<int>(fwrite(GetCStr(), 1, len, fp)) != len) return false;
|
||||
return true;
|
||||
}
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool STRING::Serialize(TFile* fp) const {
|
||||
inT32 len = length();
|
||||
int32_t len = length();
|
||||
if (fp->FWrite(&len, sizeof(len), 1) != 1) return false;
|
||||
if (fp->FWrite(GetCStr(), 1, len) != len) return false;
|
||||
return true;
|
||||
@ -161,7 +161,7 @@ bool STRING::Serialize(TFile* fp) const {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool STRING::DeSerialize(bool swap, FILE* fp) {
|
||||
inT32 len;
|
||||
int32_t len;
|
||||
if (fread(&len, sizeof(len), 1, fp) != 1) return false;
|
||||
if (swap)
|
||||
ReverseN(&len, sizeof(len));
|
||||
@ -172,7 +172,7 @@ bool STRING::DeSerialize(bool swap, FILE* fp) {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// If swap is true, assumes a big/little-endian swap is needed.
|
||||
bool STRING::DeSerialize(TFile* fp) {
|
||||
inT32 len;
|
||||
int32_t len;
|
||||
if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false;
|
||||
truncate_at(len);
|
||||
if (fp->FRead(GetCStr(), 1, len) != len) return false;
|
||||
@ -181,7 +181,7 @@ bool STRING::DeSerialize(TFile* fp) {
|
||||
|
||||
// As DeSerialize, but only seeks past the data - hence a static method.
|
||||
bool STRING::SkipDeSerialize(tesseract::TFile* fp) {
|
||||
inT32 len;
|
||||
int32_t len;
|
||||
if (fp->FReadEndian(&len, sizeof(len), 1) != 1) return false;
|
||||
return fp->FRead(NULL, 1, len) == len;
|
||||
}
|
||||
@ -190,7 +190,7 @@ BOOL8 STRING::contains(const char c) const {
|
||||
return (c != '\0') && (strchr (GetCStr(), c) != NULL);
|
||||
}
|
||||
|
||||
inT32 STRING::length() const {
|
||||
int32_t STRING::length() const {
|
||||
FixHeader();
|
||||
return GetHeader()->used_ - 1;
|
||||
}
|
||||
@ -218,11 +218,11 @@ const char* STRING::c_str() const {
|
||||
* Also makes the [] operator return a const so it is immutable
|
||||
*/
|
||||
#if STRING_IS_PROTECTED
|
||||
const char& STRING::operator[](inT32 index) const {
|
||||
const char& STRING::operator[](int32_t index) const {
|
||||
return GetCStr()[index];
|
||||
}
|
||||
|
||||
void STRING::insert_range(inT32 index, const char* str, int len) {
|
||||
void STRING::insert_range(int32_t index, const char* str, int len) {
|
||||
// if index is outside current range, then also grow size of string
|
||||
// to accmodate the requested range.
|
||||
STRING_HEADER* this_header = GetHeader();
|
||||
@ -255,7 +255,7 @@ void STRING::insert_range(inT32 index, const char* str, int len) {
|
||||
assert(InvariantOk());
|
||||
}
|
||||
|
||||
void STRING::erase_range(inT32 index, int len) {
|
||||
void STRING::erase_range(int32_t index, int len) {
|
||||
char* this_cstr = GetCStr();
|
||||
STRING_HEADER* this_header = GetHeader();
|
||||
|
||||
@ -266,7 +266,7 @@ void STRING::erase_range(inT32 index, int len) {
|
||||
}
|
||||
|
||||
#else
|
||||
void STRING::truncate_at(inT32 index) {
|
||||
void STRING::truncate_at(int32_t index) {
|
||||
ASSERT_HOST(index >= 0);
|
||||
FixHeader();
|
||||
char* this_cstr = ensure_cstr(index + 1);
|
||||
@ -275,7 +275,7 @@ void STRING::truncate_at(inT32 index) {
|
||||
assert(InvariantOk());
|
||||
}
|
||||
|
||||
char& STRING::operator[](inT32 index) const {
|
||||
char& STRING::operator[](int32_t index) const {
|
||||
// Code is casting away this const and mutating the string,
|
||||
// so mark used_ as -1 to flag it unreliable.
|
||||
GetHeader()->used_ = -1;
|
||||
@ -333,7 +333,7 @@ BOOL8 STRING::operator!=(const char* cstr) const {
|
||||
if (cstr == NULL)
|
||||
return this_header->used_ > 1; // either '\0' or NULL
|
||||
else {
|
||||
inT32 length = strlen(cstr) + 1;
|
||||
int32_t length = strlen(cstr) + 1;
|
||||
return (this_header->used_ != length)
|
||||
|| (memcmp(GetCStr(), cstr, length) != 0);
|
||||
}
|
||||
|
@ -65,32 +65,32 @@ class TESS_API STRING
|
||||
static bool SkipDeSerialize(tesseract::TFile* fp);
|
||||
|
||||
BOOL8 contains(const char c) const;
|
||||
inT32 length() const;
|
||||
inT32 size() const { return length(); }
|
||||
int32_t length() const;
|
||||
int32_t size() const { return length(); }
|
||||
// Workaround to avoid g++ -Wsign-compare warnings.
|
||||
uinT32 unsigned_size() const {
|
||||
const inT32 len = length();
|
||||
uint32_t unsigned_size() const {
|
||||
const int32_t len = length();
|
||||
assert(0 <= len);
|
||||
return static_cast<uinT32>(len);
|
||||
return static_cast<uint32_t>(len);
|
||||
}
|
||||
const char *string() const;
|
||||
const char *c_str() const;
|
||||
|
||||
inline char* strdup() const {
|
||||
inT32 len = length() + 1;
|
||||
int32_t len = length() + 1;
|
||||
return strncpy(new char[len], GetCStr(), len);
|
||||
}
|
||||
|
||||
#if STRING_IS_PROTECTED
|
||||
const char &operator[] (inT32 index) const;
|
||||
const char &operator[] (int32_t index) const;
|
||||
// len is number of chars in s to insert starting at index in this string
|
||||
void insert_range(inT32 index, const char*s, int len);
|
||||
void erase_range(inT32 index, int len);
|
||||
void insert_range(int32_t index, const char*s, int len);
|
||||
void erase_range(int32_t index, int len);
|
||||
#else
|
||||
char &operator[] (inT32 index) const;
|
||||
char &operator[] (int32_t index) const;
|
||||
#endif
|
||||
void split(const char c, GenericVector<STRING> *splited);
|
||||
void truncate_at(inT32 index);
|
||||
void truncate_at(int32_t index);
|
||||
|
||||
BOOL8 operator== (const STRING & string) const;
|
||||
BOOL8 operator!= (const STRING & string) const;
|
||||
@ -118,7 +118,7 @@ class TESS_API STRING
|
||||
void add_str_double(const char* str, double number);
|
||||
|
||||
// ensure capacity but keep pointer encapsulated
|
||||
inline void ensure(inT32 min_capacity) { ensure_cstr(min_capacity); }
|
||||
inline void ensure(int32_t min_capacity) { ensure_cstr(min_capacity); }
|
||||
|
||||
private:
|
||||
typedef struct STRING_HEADER {
|
||||
@ -171,7 +171,7 @@ class TESS_API STRING
|
||||
// Ensure string has requested capacity as optimization
|
||||
// to avoid unnecessary reallocations.
|
||||
// The return value is a cstr buffer with at least requested capacity
|
||||
char* ensure_cstr(inT32 min_capacity);
|
||||
char* ensure_cstr(int32_t min_capacity);
|
||||
|
||||
void FixHeader() const; // make used_ non-negative, even if const
|
||||
|
||||
|
@ -95,12 +95,12 @@ class RecodedCharID {
|
||||
|
||||
private:
|
||||
// True if this code is self-normalizing, ie is the master entry for indices
|
||||
// that map to the same code. Has boolean value, but inT8 for serialization.
|
||||
inT8 self_normalized_;
|
||||
// that map to the same code. Has boolean value, but int8_t for serialization.
|
||||
int8_t self_normalized_;
|
||||
// The number of elements in use in code_;
|
||||
inT32 length_;
|
||||
int32_t length_;
|
||||
// The re-encoded form of the unichar-id to which this RecodedCharID relates.
|
||||
inT32 code_[kMaxCodeLen];
|
||||
int32_t code_[kMaxCodeLen];
|
||||
};
|
||||
|
||||
// Class holds a "compression" of a unicharset to simplify the learning problem
|
||||
|
@ -136,8 +136,8 @@ class CHAR_FRAGMENT {
|
||||
// ie did not need chopping to be isolated, but may have been separated
|
||||
// out from a multi-outline blob.
|
||||
bool natural;
|
||||
inT16 pos; // fragment position in the character
|
||||
inT16 total; // total number of fragments in the character
|
||||
int16_t pos; // fragment position in the character
|
||||
int16_t total; // total number of fragments in the character
|
||||
};
|
||||
|
||||
// The UNICHARSET class is an utility class for Tesseract that holds the
|
||||
@ -582,13 +582,13 @@ class UNICHARSET {
|
||||
int min_bottom, int max_bottom,
|
||||
int min_top, int max_top) {
|
||||
unichars[unichar_id].properties.min_bottom =
|
||||
static_cast<uinT8>(ClipToRange(min_bottom, 0, MAX_UINT8));
|
||||
static_cast<uint8_t>(ClipToRange(min_bottom, 0, MAX_UINT8));
|
||||
unichars[unichar_id].properties.max_bottom =
|
||||
static_cast<uinT8>(ClipToRange(max_bottom, 0, MAX_UINT8));
|
||||
static_cast<uint8_t>(ClipToRange(max_bottom, 0, MAX_UINT8));
|
||||
unichars[unichar_id].properties.min_top =
|
||||
static_cast<uinT8>(ClipToRange(min_top, 0, MAX_UINT8));
|
||||
static_cast<uint8_t>(ClipToRange(min_top, 0, MAX_UINT8));
|
||||
unichars[unichar_id].properties.max_top =
|
||||
static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
|
||||
static_cast<uint8_t>(ClipToRange(max_top, 0, MAX_UINT8));
|
||||
}
|
||||
// Returns the width stats (as mean, sd) of the given unichar relative to the
|
||||
// median advance of all characters in the character set.
|
||||
@ -933,10 +933,10 @@ class UNICHARSET {
|
||||
// baseline-normalized coordinates, ie, where the baseline is
|
||||
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight
|
||||
// (See normalis.h for the definitions).
|
||||
uinT8 min_bottom;
|
||||
uinT8 max_bottom;
|
||||
uinT8 min_top;
|
||||
uinT8 max_top;
|
||||
uint8_t min_bottom;
|
||||
uint8_t max_bottom;
|
||||
uint8_t min_top;
|
||||
uint8_t max_top;
|
||||
// Statstics of the widths of bounding box, relative to the median advance.
|
||||
float width;
|
||||
float width_sd;
|
||||
|
@ -63,8 +63,8 @@ class Convolve : public Network {
|
||||
|
||||
protected:
|
||||
// Serialized data.
|
||||
inT32 half_x_;
|
||||
inT32 half_y_;
|
||||
int32_t half_x_;
|
||||
int32_t half_y_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -148,7 +148,7 @@ void FullyConnected::Forward(bool debug, const NetworkIO& input,
|
||||
#endif
|
||||
double* temp_line = temp_lines[thread_id];
|
||||
const double* d_input = NULL;
|
||||
const inT8* i_input = NULL;
|
||||
const int8_t* i_input = NULL;
|
||||
if (input.int_mode()) {
|
||||
i_input = input.i(t);
|
||||
} else {
|
||||
@ -188,7 +188,7 @@ void FullyConnected::SetupForward(const NetworkIO& input,
|
||||
}
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(const double* d_input, const inT8* i_input,
|
||||
void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_input,
|
||||
int t, double* output_line) {
|
||||
// input is copied to source_ line-by-line for cache coherency.
|
||||
if (IsTraining() && external_source_ == NULL && d_input != NULL)
|
||||
|
@ -91,7 +91,7 @@ class FullyConnected : public Network {
|
||||
// Components of Forward so FullyConnected can be reused inside LSTM.
|
||||
void SetupForward(const NetworkIO& input,
|
||||
const TransposedArray* input_transpose);
|
||||
void ForwardTimeStep(const double* d_input, const inT8* i_input, int t,
|
||||
void ForwardTimeStep(const double* d_input, const int8_t* i_input, int t,
|
||||
double* output_line);
|
||||
|
||||
// Runs backward propagation of errors on the deltas line.
|
||||
|
@ -368,7 +368,7 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
|
||||
MultiplyVectorsInPlace(ns_, temp_lines[GF1], curr_state);
|
||||
if (Is2D()) {
|
||||
// Max-pool the forget gates (in 2-d) instead of blindly adding.
|
||||
inT8* which_fg_col = which_fg_[t];
|
||||
int8_t* which_fg_col = which_fg_[t];
|
||||
memset(which_fg_col, 1, ns_ * sizeof(which_fg_col[0]));
|
||||
if (valid_2d) {
|
||||
const double* stepped_state = states[mod_t];
|
||||
|
@ -129,14 +129,14 @@ class LSTM : public Network {
|
||||
// Size of padded input to weight matrices = ni_ + no_ for 1-D operation
|
||||
// and ni_ + 2 * no_ for 2-D operation. Note that there is a phantom 1 input
|
||||
// for the bias that makes the weight matrices of size [na + 1][no].
|
||||
inT32 na_;
|
||||
int32_t na_;
|
||||
// Number of internal states. Equal to no_ except for a softmax LSTM.
|
||||
// ns_ is NOT serialized, but is calculated from gate_weights_.
|
||||
inT32 ns_;
|
||||
int32_t ns_;
|
||||
// Number of additional feedback states. The softmax types feed back
|
||||
// additional output information on top of the ns_ internal states.
|
||||
// In the case of a binary-coded (EMBEDDED) softmax, nf_ < no_.
|
||||
inT32 nf_;
|
||||
int32_t nf_;
|
||||
// Flag indicating 2-D operation.
|
||||
bool is_2d_;
|
||||
|
||||
@ -149,7 +149,7 @@ class LSTM : public Network {
|
||||
// Internal state used during forward operation, of size [width, ns].
|
||||
NetworkIO state_;
|
||||
// State of the 2-d maxpool, generated during forward, used during backward.
|
||||
GENERIC_2D_ARRAY<inT8> which_fg_;
|
||||
GENERIC_2D_ARRAY<int8_t> which_fg_;
|
||||
// Internal state saved from forward, but used only during backward.
|
||||
NetworkIO node_values_[WT_COUNT];
|
||||
// Preserved input stride_map used for Backward when NT_LSTM_SQUASHED.
|
||||
|
@ -223,7 +223,7 @@ class LSTMRecognizer {
|
||||
protected:
|
||||
// Sets the random seed from the sample_iteration_;
|
||||
void SetRandomSeed() {
|
||||
inT64 seed = static_cast<inT64>(sample_iteration_) * 0x10000001;
|
||||
int64_t seed = static_cast<int64_t>(sample_iteration_) * 0x10000001;
|
||||
randomizer_.set_seed(seed);
|
||||
randomizer_.IntRand();
|
||||
}
|
||||
@ -280,14 +280,14 @@ class LSTMRecognizer {
|
||||
STRING network_str_;
|
||||
// Flags used to determine the training method of the network.
|
||||
// See enum TrainingFlags above.
|
||||
inT32 training_flags_;
|
||||
int32_t training_flags_;
|
||||
// Number of actual backward training steps used.
|
||||
inT32 training_iteration_;
|
||||
int32_t training_iteration_;
|
||||
// Index into training sample set. sample_iteration >= training_iteration_.
|
||||
inT32 sample_iteration_;
|
||||
int32_t sample_iteration_;
|
||||
// Index in softmax of null character. May take the value UNICHAR_BROKEN or
|
||||
// ccutil_.unicharset.size().
|
||||
inT32 null_char_;
|
||||
int32_t null_char_;
|
||||
// Learning rate and momentum multipliers of deltas in backprop.
|
||||
float learning_rate_;
|
||||
float momentum_;
|
||||
|
@ -88,7 +88,7 @@ LSTMTrainer::LSTMTrainer(FileReader file_reader, FileWriter file_writer,
|
||||
CheckPointReader checkpoint_reader,
|
||||
CheckPointWriter checkpoint_writer,
|
||||
const char* model_base, const char* checkpoint_name,
|
||||
int debug_interval, inT64 max_memory)
|
||||
int debug_interval, int64_t max_memory)
|
||||
: randomly_rotate_(false),
|
||||
training_data_(max_memory),
|
||||
file_reader_(file_reader),
|
||||
@ -446,7 +446,7 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,
|
||||
if (fp->FWrite(&error_rates_, sizeof(error_rates_), 1) != 1) return false;
|
||||
if (fp->FWrite(&training_stage_, sizeof(training_stage_), 1) != 1)
|
||||
return false;
|
||||
uinT8 amount = serialize_amount;
|
||||
uint8_t amount = serialize_amount;
|
||||
if (fp->FWrite(&amount, sizeof(amount), 1) != 1) return false;
|
||||
if (serialize_amount == LIGHT) return true; // We are done.
|
||||
if (fp->FWrite(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
|
||||
@ -505,7 +505,7 @@ bool LSTMTrainer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
|
||||
if (fp->FRead(&error_rates_, sizeof(error_rates_), 1) != 1) return false;
|
||||
if (fp->FReadEndian(&training_stage_, sizeof(training_stage_), 1) != 1)
|
||||
return false;
|
||||
uinT8 amount;
|
||||
uint8_t amount;
|
||||
if (fp->FRead(&amount, sizeof(amount), 1) != 1) return false;
|
||||
if (amount == LIGHT) return true; // Don't read the rest.
|
||||
if (fp->FReadEndian(&best_error_rate_, sizeof(best_error_rate_), 1) != 1)
|
||||
|
@ -94,7 +94,7 @@ class LSTMTrainer : public LSTMRecognizer {
|
||||
CheckPointReader checkpoint_reader,
|
||||
CheckPointWriter checkpoint_writer,
|
||||
const char* model_base, const char* checkpoint_name,
|
||||
int debug_interval, inT64 max_memory);
|
||||
int debug_interval, int64_t max_memory);
|
||||
virtual ~LSTMTrainer();
|
||||
|
||||
// Tries to deserialize a trainer from the given file and silently returns
|
||||
|
@ -123,7 +123,7 @@ void Network::SetEnableTraining(TrainingState state) {
|
||||
|
||||
// Sets flags that control the action of the network. See NetworkFlags enum
|
||||
// for bit values.
|
||||
void Network::SetNetworkFlags(uinT32 flags) {
|
||||
void Network::SetNetworkFlags(uint32_t flags) {
|
||||
network_flags_ = flags;
|
||||
}
|
||||
|
||||
@ -151,7 +151,7 @@ bool Network::SetupNeedsBackprop(bool needs_backprop) {
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Network::Serialize(TFile* fp) const {
|
||||
inT8 data = NT_NONE;
|
||||
int8_t data = NT_NONE;
|
||||
if (fp->FWrite(&data, sizeof(data), 1) != 1) return false;
|
||||
STRING type_name = kTypeNames[type_];
|
||||
if (!type_name.Serialize(fp)) return false;
|
||||
@ -170,7 +170,7 @@ bool Network::Serialize(TFile* fp) const {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
// Should be overridden by subclasses, but NOT called by their DeSerialize.
|
||||
bool Network::DeSerialize(TFile* fp) {
|
||||
inT8 data = 0;
|
||||
int8_t data = 0;
|
||||
if (fp->FRead(&data, sizeof(data), 1) != 1) return false;
|
||||
if (data == NT_NONE) {
|
||||
STRING type_name;
|
||||
|
@ -164,7 +164,7 @@ class Network {
|
||||
|
||||
// Sets flags that control the action of the network. See NetworkFlags enum
|
||||
// for bit values.
|
||||
virtual void SetNetworkFlags(uinT32 flags);
|
||||
virtual void SetNetworkFlags(uint32_t flags);
|
||||
|
||||
// Sets up the network for training. Initializes weights using weights of
|
||||
// scale `range` picked according to the random number generator `randomizer`.
|
||||
@ -299,10 +299,10 @@ class Network {
|
||||
NetworkType type_; // Type of the derived network class.
|
||||
TrainingState training_; // Are we currently training?
|
||||
bool needs_to_backprop_; // This network needs to output back_deltas.
|
||||
inT32 network_flags_; // Behavior control flags in NetworkFlags.
|
||||
inT32 ni_; // Number of input values.
|
||||
inT32 no_; // Number of output values.
|
||||
inT32 num_weights_; // Number of weights in this and sub-network.
|
||||
int32_t network_flags_; // Behavior control flags in NetworkFlags.
|
||||
int32_t ni_; // Number of input values.
|
||||
int32_t no_; // Number of output values.
|
||||
int32_t num_weights_; // Number of weights in this and sub-network.
|
||||
STRING name_; // A unique name for this layer.
|
||||
|
||||
// NOT-serialized debug data.
|
||||
|
@ -223,7 +223,7 @@ void NetworkIO::Copy2DImage(int batch, Pix* pix, float black, float contrast,
|
||||
int num_features = NumFeatures();
|
||||
bool color = num_features == 3;
|
||||
if (width > target_width) width = target_width;
|
||||
uinT32* line = pixGetData(pix);
|
||||
uint32_t* line = pixGetData(pix);
|
||||
for (int y = 0; y < target_height; ++y, line += wpl) {
|
||||
int x = 0;
|
||||
if (y < height) {
|
||||
@ -262,7 +262,7 @@ void NetworkIO::Copy1DGreyImage(int batch, Pix* pix, float black,
|
||||
int x;
|
||||
for (x = 0; x < width; ++x, ++t) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
uinT32* line = pixGetData(pix) + wpl * y;
|
||||
uint32_t* line = pixGetData(pix) + wpl * y;
|
||||
int pixel = GET_DATA_BYTE(line, x);
|
||||
SetPixel(t, y, pixel, black, contrast);
|
||||
}
|
||||
@ -307,7 +307,7 @@ Pix* NetworkIO::ToPix() const {
|
||||
int im_y = top_im_y;
|
||||
int t = index.t();
|
||||
if (int_mode_) {
|
||||
const inT8* features = i_[t];
|
||||
const int8_t* features = i_[t];
|
||||
for (int y = 0; y < num_features; ++y, im_y += im_height) {
|
||||
int pixel = features[y * feature_factor];
|
||||
// 1 or 2 features use greyscale.
|
||||
@ -421,7 +421,7 @@ void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
|
||||
void NetworkIO::Randomize(int t, int offset, int num_features,
|
||||
TRand* randomizer) {
|
||||
if (int_mode_) {
|
||||
inT8* line = i_[t] + offset;
|
||||
int8_t* line = i_[t] + offset;
|
||||
for (int i = 0; i < num_features; ++i)
|
||||
line[i] = IntCastRounded(randomizer->SignedRand(MAX_INT8));
|
||||
} else {
|
||||
@ -602,7 +602,7 @@ bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
|
||||
// Reads a single timestep to floats in the range [-1, 1].
|
||||
void NetworkIO::ReadTimeStep(int t, double* output) const {
|
||||
if (int_mode_) {
|
||||
const inT8* line = i_[t];
|
||||
const int8_t* line = i_[t];
|
||||
for (int i = 0; i < i_.dim2(); ++i) {
|
||||
output[i] = static_cast<double>(line[i]) / MAX_INT8;
|
||||
}
|
||||
@ -618,7 +618,7 @@ void NetworkIO::ReadTimeStep(int t, double* output) const {
|
||||
void NetworkIO::AddTimeStep(int t, double* inout) const {
|
||||
int num_features = NumFeatures();
|
||||
if (int_mode_) {
|
||||
const inT8* line = i_[t];
|
||||
const int8_t* line = i_[t];
|
||||
for (int i = 0; i < num_features; ++i) {
|
||||
inout[i] += static_cast<double>(line[i]) / MAX_INT8;
|
||||
}
|
||||
@ -634,7 +634,7 @@ void NetworkIO::AddTimeStep(int t, double* inout) const {
|
||||
void NetworkIO::AddTimeStepPart(int t, int offset, int num_features,
|
||||
float* inout) const {
|
||||
if (int_mode_) {
|
||||
const inT8* line = i_[t] + offset;
|
||||
const int8_t* line = i_[t] + offset;
|
||||
for (int i = 0; i < num_features; ++i) {
|
||||
inout[i] += static_cast<float>(line[i]) / MAX_INT8;
|
||||
}
|
||||
@ -656,7 +656,7 @@ void NetworkIO::WriteTimeStep(int t, const double* input) {
|
||||
void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features,
|
||||
const double* input) {
|
||||
if (int_mode_) {
|
||||
inT8* line = i_[t] + offset;
|
||||
int8_t* line = i_[t] + offset;
|
||||
for (int i = 0; i < num_features; ++i) {
|
||||
line[i] = ClipToRange(IntCastRounded(input[i] * MAX_INT8),
|
||||
-MAX_INT8, MAX_INT8);
|
||||
@ -675,8 +675,8 @@ void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t,
|
||||
ASSERT_HOST(int_mode_ == src.int_mode_);
|
||||
if (int_mode_) {
|
||||
int dim = i_.dim2();
|
||||
inT8* dest_line = i_[dest_t];
|
||||
const inT8* src_line = src.i_[src_t];
|
||||
int8_t* dest_line = i_[dest_t];
|
||||
const int8_t* src_line = src.i_[src_t];
|
||||
for (int i = 0; i < dim; ++i) {
|
||||
if (dest_line[i] < src_line[i]) {
|
||||
dest_line[i] = src_line[i];
|
||||
@ -721,7 +721,7 @@ float NetworkIO::MinOfMaxes() const {
|
||||
for (int t = 0; t < width; ++t) {
|
||||
float max_value = -MAX_FLOAT32;
|
||||
if (int_mode_) {
|
||||
const inT8* column = i_[t];
|
||||
const int8_t* column = i_[t];
|
||||
for (int i = 0; i < num_features; ++i) {
|
||||
if (column[i] > max_value) max_value = column[i];
|
||||
}
|
||||
@ -747,9 +747,9 @@ void NetworkIO::CombineOutputs(const NetworkIO& base_output,
|
||||
if (int_mode_) {
|
||||
// Number of outputs from base and final result.
|
||||
for (int t = 0; t < width; ++t) {
|
||||
inT8* out_line = i_[t];
|
||||
const inT8* base_line = base_output.i_[t];
|
||||
const inT8* comb_line = combiner_output.i_[t];
|
||||
int8_t* out_line = i_[t];
|
||||
const int8_t* base_line = base_output.i_[t];
|
||||
const int8_t* comb_line = combiner_output.i_[t];
|
||||
float base_weight = static_cast<float>(comb_line[no]) / MAX_INT8;
|
||||
float boost_weight = 1.0f - base_weight;
|
||||
for (int i = 0; i < no; ++i) {
|
||||
|
@ -34,7 +34,7 @@ struct Pix;
|
||||
namespace tesseract {
|
||||
|
||||
// Class to contain all the input/output of a network, allowing for fixed or
|
||||
// variable-strided 2d to 1d mapping, and float or inT8 values. Provides
|
||||
// variable-strided 2d to 1d mapping, and float or int8_t values. Provides
|
||||
// enough calculating functions to hide the detail of the implementation.
|
||||
class NetworkIO {
|
||||
public:
|
||||
@ -120,7 +120,7 @@ class NetworkIO {
|
||||
ASSERT_HOST(!int_mode_);
|
||||
return f_[t];
|
||||
}
|
||||
const inT8* i(int t) const {
|
||||
const int8_t* i(int t) const {
|
||||
ASSERT_HOST(int_mode_);
|
||||
return i_[t];
|
||||
}
|
||||
@ -262,8 +262,8 @@ class NetworkIO {
|
||||
ASSERT_HOST(!v_io.int_mode_);
|
||||
int dim = f_.dim2();
|
||||
if (int_mode_) {
|
||||
const inT8* u = i_[t];
|
||||
const inT8* v = v_io.i_[t];
|
||||
const int8_t* u = i_[t];
|
||||
const int8_t* v = v_io.i_[t];
|
||||
for (int i = 0; i < dim; ++i) {
|
||||
product[i] = f(u[i] / static_cast<double>(MAX_INT8)) * v[i] /
|
||||
static_cast<double>(MAX_INT8);
|
||||
@ -333,7 +333,7 @@ class NetworkIO {
|
||||
|
||||
// Choice of float vs 8 bit int for data.
|
||||
GENERIC_2D_ARRAY<float> f_;
|
||||
GENERIC_2D_ARRAY<inT8> i_;
|
||||
GENERIC_2D_ARRAY<int8_t> i_;
|
||||
// Which of f_ and i_ are we actually using.
|
||||
bool int_mode_;
|
||||
// Stride for 2d input data.
|
||||
|
@ -242,7 +242,7 @@ class NetworkScratch {
|
||||
}; // class Stack.
|
||||
|
||||
private:
|
||||
// If true, the network weights are inT8, if false, float.
|
||||
// If true, the network weights are int8_t, if false, float.
|
||||
bool int_mode_;
|
||||
// Stacks of NetworkIO and GenericVector<float>. Once allocated, they are not
|
||||
// deleted until the NetworkScratch is deleted.
|
||||
|
@ -39,7 +39,7 @@ void Plumbing::SetEnableTraining(TrainingState state) {
|
||||
|
||||
// Sets flags that control the action of the network. See NetworkFlags enum
|
||||
// for bit values.
|
||||
void Plumbing::SetNetworkFlags(uinT32 flags) {
|
||||
void Plumbing::SetNetworkFlags(uint32_t flags) {
|
||||
Network::SetNetworkFlags(flags);
|
||||
for (int i = 0; i < stack_.size(); ++i)
|
||||
stack_[i]->SetNetworkFlags(flags);
|
||||
@ -184,7 +184,7 @@ float* Plumbing::LayerLearningRatePtr(const char* id) const {
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Plumbing::Serialize(TFile* fp) const {
|
||||
if (!Network::Serialize(fp)) return false;
|
||||
inT32 size = stack_.size();
|
||||
int32_t size = stack_.size();
|
||||
// Can't use PointerVector::Serialize here as we need a special DeSerialize.
|
||||
if (fp->FWrite(&size, sizeof(size), 1) != 1) return false;
|
||||
for (int i = 0; i < size; ++i)
|
||||
@ -200,7 +200,7 @@ bool Plumbing::Serialize(TFile* fp) const {
|
||||
bool Plumbing::DeSerialize(TFile* fp) {
|
||||
stack_.truncate(0);
|
||||
no_ = 0; // We will be modifying this as we AddToStack.
|
||||
inT32 size;
|
||||
int32_t size;
|
||||
if (fp->FReadEndian(&size, sizeof(size), 1) != 1) return false;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
Network* network = CreateFromFile(fp);
|
||||
|
@ -49,7 +49,7 @@ class Plumbing : public Network {
|
||||
|
||||
// Sets flags that control the action of the network. See NetworkFlags enum
|
||||
// for bit values.
|
||||
virtual void SetNetworkFlags(uinT32 flags);
|
||||
virtual void SetNetworkFlags(uint32_t flags);
|
||||
|
||||
// Sets up the network for training. Initializes weights using weights of
|
||||
// scale `range` picked according to the random number generator `randomizer`.
|
||||
|
@ -736,7 +736,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
|
||||
float score = cert;
|
||||
if (prev != NULL) score += prev->score;
|
||||
if (heap->size() < max_size || score > heap->PeekTop().data.score) {
|
||||
uinT64 hash = ComputeCodeHash(code, dup, prev);
|
||||
uint64_t hash = ComputeCodeHash(code, dup, prev);
|
||||
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
|
||||
dup, cert, score, prev, d, hash);
|
||||
if (UpdateHeapIfMatched(&node, heap)) return;
|
||||
@ -791,12 +791,12 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
|
||||
}
|
||||
|
||||
// Computes and returns the code-hash for the given code and prev.
|
||||
uinT64 RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
|
||||
const RecodeNode* prev) const {
|
||||
uinT64 hash = prev == nullptr ? 0 : prev->code_hash;
|
||||
uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
|
||||
const RecodeNode* prev) const {
|
||||
uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
|
||||
if (!dup && code != null_char_) {
|
||||
int num_classes = recoder_.code_range();
|
||||
uinT64 carry = (((hash >> 32) * num_classes) >> 32);
|
||||
uint64_t carry = (((hash >> 32) * num_classes) >> 32);
|
||||
hash *= num_classes;
|
||||
hash += carry;
|
||||
hash += code;
|
||||
|
@ -102,7 +102,7 @@ struct RecodeNode {
|
||||
code_hash(0) {}
|
||||
RecodeNode(int c, int uni_id, PermuterType perm, bool dawg_start,
|
||||
bool word_start, bool end, bool dup, float cert, float s,
|
||||
const RecodeNode* p, DawgPositionVector* d, uinT64 hash)
|
||||
const RecodeNode* p, DawgPositionVector* d, uint64_t hash)
|
||||
: code(c),
|
||||
unichar_id(uni_id),
|
||||
permuter(perm),
|
||||
@ -166,7 +166,7 @@ struct RecodeNode {
|
||||
DawgPositionVector* dawgs;
|
||||
// A hash of all codes in the prefix and this->code as well. Used for
|
||||
// duplicate path removal.
|
||||
uinT64 code_hash;
|
||||
uint64_t code_hash;
|
||||
};
|
||||
|
||||
typedef KDPairInc<double, RecodeNode> RecodePair;
|
||||
@ -337,7 +337,7 @@ class RecodeBeamSearch {
|
||||
// with reshuffle if needed. Returns true if there was a match.
|
||||
bool UpdateHeapIfMatched(RecodeNode* new_node, RecodeHeap* heap);
|
||||
// Computes and returns the code-hash for the given code and prev.
|
||||
uinT64 ComputeCodeHash(int code, bool dup, const RecodeNode* prev) const;
|
||||
uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode* prev) const;
|
||||
// Backtracks to extract the best path through the lattice that was built
|
||||
// during Decode. On return the best_nodes vector essentially contains the set
|
||||
// of code, score pairs that make the optimal path with the constraint that
|
||||
|
@ -75,8 +75,8 @@ class Reconfig : public Network {
|
||||
// Non-serialized data used to store parameters between forward and back.
|
||||
StrideMap back_map_;
|
||||
// Serialized data.
|
||||
inT32 x_scale_;
|
||||
inT32 y_scale_;
|
||||
int32_t x_scale_;
|
||||
int32_t y_scale_;
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -103,7 +103,7 @@ void WeightMatrix::ConvertToInt() {
|
||||
int dim2 = wi_.dim2();
|
||||
for (int t = 0; t < wi_.dim1(); ++t) {
|
||||
double* f_line = wf_[t];
|
||||
inT8* i_line = wi_[t];
|
||||
int8_t* i_line = wi_[t];
|
||||
double max_abs = 0.0;
|
||||
for (int f = 0; f < dim2; ++f) {
|
||||
double abs_val = fabs(f_line[f]);
|
||||
@ -133,7 +133,7 @@ void WeightMatrix::InitBackward() {
|
||||
if (use_adam_) dw_sq_sum_.Resize(no, ni, 0.0);
|
||||
}
|
||||
|
||||
// Flag on mode to indicate that this weightmatrix uses inT8.
|
||||
// Flag on mode to indicate that this weightmatrix uses int8_t.
|
||||
const int kInt8Flag = 1;
|
||||
// Flag on mode to indicate that this weightmatrix uses adam.
|
||||
const int kAdamFlag = 4;
|
||||
@ -146,7 +146,7 @@ const int kDoubleFlag = 128;
|
||||
bool WeightMatrix::Serialize(bool training, TFile* fp) const {
|
||||
// For backward compatibility, add kDoubleFlag to mode to indicate the doubles
|
||||
// format, without errs, so we can detect and read old format weight matrices.
|
||||
uinT8 mode =
|
||||
uint8_t mode =
|
||||
(int_mode_ ? kInt8Flag : 0) | (use_adam_ ? kAdamFlag : 0) | kDoubleFlag;
|
||||
if (fp->FWrite(&mode, sizeof(mode), 1) != 1) return false;
|
||||
if (int_mode_) {
|
||||
@ -163,7 +163,7 @@ bool WeightMatrix::Serialize(bool training, TFile* fp) const {
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
|
||||
bool WeightMatrix::DeSerialize(bool training, TFile* fp) {
|
||||
uinT8 mode = 0;
|
||||
uint8_t mode = 0;
|
||||
if (fp->FRead(&mode, sizeof(mode), 1) != 1) return false;
|
||||
int_mode_ = (mode & kInt8Flag) != 0;
|
||||
use_adam_ = (mode & kAdamFlag) != 0;
|
||||
@ -218,7 +218,7 @@ void WeightMatrix::MatrixDotVector(const double* u, double* v) const {
|
||||
MatrixDotVectorInternal(wf_, true, false, u, v);
|
||||
}
|
||||
|
||||
void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {
|
||||
void WeightMatrix::MatrixDotVector(const int8_t* u, double* v) const {
|
||||
ASSERT_HOST(int_mode_);
|
||||
ASSERT_HOST(multiplier_ != nullptr);
|
||||
multiplier_->MatrixDotVector(wi_, scales_, u, v);
|
||||
|
@ -60,7 +60,7 @@ class TransposedArray : public GENERIC_2D_ARRAY<double> {
|
||||
}; // class TransposedArray
|
||||
|
||||
// Generic weight matrix for network layers. Can store the matrix as either
|
||||
// an array of floats or inT8. Provides functions to compute the forward and
|
||||
// an array of floats or int8_t. Provides functions to compute the forward and
|
||||
// backward steps with the matrix and updates to the weights.
|
||||
class WeightMatrix {
|
||||
public:
|
||||
@ -122,7 +122,7 @@ class WeightMatrix {
|
||||
// implement the bias, but it doesn't actually have it.
|
||||
// Asserts that the call matches what we have.
|
||||
void MatrixDotVector(const double* u, double* v) const;
|
||||
void MatrixDotVector(const inT8* u, double* v) const;
|
||||
void MatrixDotVector(const int8_t* u, double* v) const;
|
||||
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
|
||||
// component-wise products of *this[0] and v to inout.
|
||||
void MultiplyAccumulate(const double* v, double* inout);
|
||||
@ -172,7 +172,7 @@ class WeightMatrix {
|
||||
private:
|
||||
// Choice between float and 8 bit int implementations.
|
||||
GENERIC_2D_ARRAY<double> wf_;
|
||||
GENERIC_2D_ARRAY<inT8> wi_;
|
||||
GENERIC_2D_ARRAY<int8_t> wi_;
|
||||
// Transposed copy of wf_, used only for Backward, and set with each Update.
|
||||
TransposedArray wf_t_;
|
||||
// Which of wf_ and wi_ are we actually using.
|
||||
|
Loading…
Reference in New Issue
Block a user