Prepare using float instead of double for LSTM calculations

The new header file ccutils/tesstypes.h also prepares support
for larger images by introducing a new data type for image
size and coordinates (still unused).

FloatToDouble is now a local function.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-07-22 20:02:48 +02:00
parent c3fb050daa
commit 66b77e6639
27 changed files with 265 additions and 221 deletions

View File

@ -150,10 +150,12 @@ endif
if MARCH_NATIVE_OPT
libtesseract_native_la_CXXFLAGS += -march=native -mtune=native
endif
libtesseract_native_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_native_la_SOURCES = src/arch/dotproduct.cpp
if HAVE_AVX
libtesseract_avx_la_CXXFLAGS = -mavx
libtesseract_avx_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_avx_la_SOURCES = src/arch/dotproductavx.cpp
libtesseract_la_LIBADD += libtesseract_avx.la
noinst_LTLIBRARIES += libtesseract_avx.la
@ -161,6 +163,7 @@ endif
if HAVE_AVX2
libtesseract_avx2_la_CXXFLAGS = -mavx2
libtesseract_avx2_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_avx2_la_SOURCES = src/arch/intsimdmatrixavx2.cpp
libtesseract_la_LIBADD += libtesseract_avx2.la
noinst_LTLIBRARIES += libtesseract_avx2.la
@ -168,6 +171,7 @@ endif
if HAVE_FMA
libtesseract_fma_la_CXXFLAGS = -mfma
libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_fma_la_SOURCES = src/arch/dotproductfma.cpp
libtesseract_la_LIBADD += libtesseract_fma.la
noinst_LTLIBRARIES += libtesseract_fma.la
@ -175,6 +179,7 @@ endif
if HAVE_SSE4_1
libtesseract_sse_la_CXXFLAGS = -msse4.1
libtesseract_sse_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_sse_la_SOURCES = src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp
libtesseract_la_LIBADD += libtesseract_sse.la
noinst_LTLIBRARIES += libtesseract_sse.la
@ -182,6 +187,7 @@ endif
if HAVE_NEON
libtesseract_neon_la_CXXFLAGS = $(NEON_CXXFLAGS)
libtesseract_neon_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
libtesseract_neon_la_SOURCES = src/arch/intsimdmatrixneon.cpp
libtesseract_la_LIBADD += libtesseract_neon.la
noinst_LTLIBRARIES += libtesseract_neon.la

View File

@ -19,12 +19,12 @@
namespace tesseract {
// Computes and returns the dot product of the two n-vectors u and v.
double DotProductNative(const double *u, const double *v, int n) {
double total = 0.0;
TFloat DotProductNative(const TFloat *u, const TFloat *v, int n) {
TFloat total = 0;
#if defined(OPENMP_SIMD) || defined(_OPENMP)
#pragma omp simd reduction(+:total)
#endif
for (int k = 0; k < n; ++k) {
for (int k = 0; k < n; k++) {
total += u[k] * v[k];
}
return total;

View File

@ -17,19 +17,21 @@
#ifndef TESSERACT_ARCH_DOTPRODUCT_H_
#define TESSERACT_ARCH_DOTPRODUCT_H_
#include "tesstypes.h"
namespace tesseract {
// Computes and returns the dot product of the n-vectors u and v.
double DotProductNative(const double *u, const double *v, int n);
TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
// Uses Intel AVX intrinsics to access the SIMD instruction set.
double DotProductAVX(const double *u, const double *v, int n);
TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);
// Use Intel FMA.
double DotProductFMA(const double *u, const double *v, int n);
TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);
// Uses Intel SSE intrinsics to access the SIMD instruction set.
double DotProductSSE(const double *u, const double *v, int n);
TFloat DotProductSSE(const TFloat *u, const TFloat *v, int n);
} // namespace tesseract.

View File

@ -76,7 +76,7 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
const std::vector<double> &scales, const int8_t *u, double *v) {
const std::vector<TFloat> &scales, const int8_t *u, TFloat *v) {
int num_out = w.dim1();
int num_in = w.dim2() - 1;
// Base implementation.

View File

@ -23,6 +23,8 @@
#include <cstdint>
#include <vector>
#include "tesstypes.h"
namespace tesseract {
template <class T>
@ -78,8 +80,8 @@ struct TESS_API IntSimdMatrix {
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
// Computes the base C++ implementation.
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<double> &scales,
const int8_t *u, double *v);
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<TFloat> &scales,
const int8_t *u, TFloat *v);
// Rounds the input up to a multiple of the given factor.
static int Roundup(int input, int factor) {
@ -95,8 +97,8 @@ struct TESS_API IntSimdMatrix {
// RoundInputs above.
// The input will be over-read to the extent of the padding. There are no
// alignment requirements.
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const double *, const int8_t *,
double *);
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const TFloat *, const int8_t *,
TFloat *);
MatrixDotVectorFunction matrixDotVectorFunction;
// Number of 32 bit outputs held in each register.

View File

@ -19,6 +19,7 @@
#if defined(__ARM_NEON)
# include "intsimdmatrix.h"
# include "tesstypes.h"
# include <algorithm>
# include <cstdint>
@ -52,9 +53,9 @@ constexpr int kNumInputsPerGroup = 8;
// u must be padded out with zeros to
// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements.
static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
const double *__restrict scales,
const TFloat *__restrict scales,
const int8_t *__restrict u, int num_in,
double *__restrict v, int num_out) {
TFloat *__restrict v, int num_out) {
// Initialize all the results to 0.
int32x4_t result0123 = {0, 0, 0, 0};
int32x4_t result4567 = {0, 0, 0, 0};
@ -163,8 +164,8 @@ static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
}
}
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
const int8_t *u, double *v) {
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
const int8_t *u, TFloat *v) {
const int num_out = dim1;
const int num_in = dim2 - 1;
// Each call to a partial_func_ produces group_size outputs, except the
@ -196,7 +197,8 @@ const IntSimdMatrix IntSimdMatrix::intSimdMatrixNEON = {
// Number of 8 bit inputs in the inputs register.
kNumInputsPerRegister,
// Number of inputs in each weight group.
kNumInputsPerGroup};
kNumInputsPerGroup
};
} // namespace tesseract.

View File

@ -69,15 +69,15 @@ static int32_t IntDotProductSSE(const int8_t *u, const int8_t *v, int n) {
}
// Computes part of matrix.vector v = Wu. Computes 1 result.
static void PartialMatrixDotVector1(const int8_t *wi, const double *scales, const int8_t *u,
int num_in, double *v) {
double total = IntDotProductSSE(u, wi, num_in);
static void PartialMatrixDotVector1(const int8_t *wi, const TFloat *scales, const int8_t *u,
int num_in, TFloat *v) {
TFloat total = IntDotProductSSE(u, wi, num_in);
// Add in the bias and correct for integer values.
*v = (total + wi[num_in] * INT8_MAX) * *scales;
}
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
const int8_t *u, double *v) {
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
const int8_t *u, TFloat *v) {
const int num_out = dim1;
const int num_in = dim2 - 1;
int output = 0;
@ -99,7 +99,8 @@ const IntSimdMatrix IntSimdMatrix::intSimdMatrixSSE = {
// Number of 8 bit inputs in the inputs register.
1,
// Number of inputs in each weight group.
1};
1
};
} // namespace tesseract.

View File

@ -93,8 +93,8 @@ bool SIMDDetect::sse_available_;
#endif
#if defined(HAVE_FRAMEWORK_ACCELERATE)
static double DotProductAccelerate(const double* u, const double* v, int n) {
double total = 0.0;
static TFloat DotProductAccelerate(const TFloat* u, const TFloat* v, int n) {
TFloat total = 0;
const int stride = 1;
vDSP_dotprD(u, stride, v, stride, &total, n);
return total;
@ -102,8 +102,8 @@ static double DotProductAccelerate(const double* u, const double* v, int n) {
#endif
// Computes and returns the dot product of the two n-vectors u and v.
static double DotProductGeneric(const double *u, const double *v, int n) {
double total = 0.0;
static TFloat DotProductGeneric(const TFloat *u, const TFloat *v, int n) {
TFloat total = 0;
for (int k = 0; k < n; ++k) {
total += u[k] * v[k];
}
@ -111,8 +111,8 @@ static double DotProductGeneric(const double *u, const double *v, int n) {
}
// Compute dot product using std::inner_product.
static double DotProductStdInnerProduct(const double *u, const double *v, int n) {
return std::inner_product(u, u + n, v, 0.0);
static TFloat DotProductStdInnerProduct(const TFloat *u, const TFloat *v, int n) {
return std::inner_product(u, u + n, v, static_cast<TFloat>(0));
}
static void SetDotProduct(DotProductFunction f, const IntSimdMatrix *m = nullptr) {

View File

@ -18,11 +18,12 @@
#define TESSERACT_ARCH_SIMDDETECT_H_
#include <tesseract/export.h>
#include "tesstypes.h"
namespace tesseract {
// Function pointer for best calculation of dot product.
using DotProductFunction = double (*)(const double *, const double *, int);
using DotProductFunction = TFloat (*)(const TFloat *, const TFloat *, int);
extern DotProductFunction DotProduct;
// Architecture detector. Add code here to detect any other architectures for

32
src/ccutil/tesstypes.h Normal file
View File

@ -0,0 +1,32 @@
///////////////////////////////////////////////////////////////////////
// File: tesstypes.h
// Description: Simple data types used by Tesseract code.
// Author: Stefan Weil
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_TESSTYPES_H
#define TESSERACT_TESSTYPES_H
#include <cstdint> // for int16_t
namespace tesseract {
// Image dimensions (width and height, coordinates).
using TDimension = int16_t;
// Floating point data type used for LSTM calculations.
using TFloat = double;
}
#endif // TESSERACT_TESSTYPES_H

View File

@ -156,7 +156,7 @@ void FullyConnected::Forward(bool debug, const NetworkIO &input,
// Thread-local pointer to temporary storage.
int thread_id = 0;
#endif
double *temp_line = temp_lines[thread_id];
TFloat *temp_line = temp_lines[thread_id];
if (input.int_mode()) {
ForwardTimeStep(input.i(t), t, temp_line);
} else {
@ -200,7 +200,7 @@ void FullyConnected::SetupForward(const NetworkIO &input, const TransposedArray
}
}
void FullyConnected::ForwardTimeStep(int t, double *output_line) {
void FullyConnected::ForwardTimeStep(int t, TFloat *output_line) {
if (type_ == NT_TANH) {
FuncInplace<GFunc>(no_, output_line);
} else if (type_ == NT_LOGISTIC) {
@ -218,7 +218,7 @@ void FullyConnected::ForwardTimeStep(int t, double *output_line) {
}
}
void FullyConnected::ForwardTimeStep(const double *d_input, int t, double *output_line) {
void FullyConnected::ForwardTimeStep(const TFloat *d_input, int t, TFloat *output_line) {
// input is copied to source_ line-by-line for cache coherency.
if (IsTraining() && external_source_ == nullptr) {
source_t_.WriteStrided(t, d_input);
@ -227,7 +227,7 @@ void FullyConnected::ForwardTimeStep(const double *d_input, int t, double *outpu
ForwardTimeStep(t, output_line);
}
void FullyConnected::ForwardTimeStep(const int8_t *i_input, int t, double *output_line) {
void FullyConnected::ForwardTimeStep(const int8_t *i_input, int t, TFloat *output_line) {
// input is copied to source_ line-by-line for cache coherency.
weights_.MatrixDotVector(i_input, output_line);
ForwardTimeStep(t, output_line);
@ -265,11 +265,11 @@ bool FullyConnected::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkSc
for (int t = 0; t < width; ++t) {
int thread_id = 0;
#endif
double *backprop = nullptr;
TFloat *backprop = nullptr;
if (needs_to_backprop_) {
backprop = temp_backprops[thread_id];
}
double *curr_errors = errors[thread_id];
TFloat *curr_errors = errors[thread_id];
BackwardTimeStep(fwd_deltas, t, curr_errors, errors_t.get(), backprop);
if (backprop != nullptr) {
back_deltas->WriteTimeStep(t, backprop);
@ -287,8 +287,8 @@ bool FullyConnected::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkSc
return false; // No point going further back.
}
void FullyConnected::BackwardTimeStep(const NetworkIO &fwd_deltas, int t, double *curr_errors,
TransposedArray *errors_t, double *backprop) {
void FullyConnected::BackwardTimeStep(const NetworkIO &fwd_deltas, int t, TFloat *curr_errors,
TransposedArray *errors_t, TFloat *backprop) {
if (type_ == NT_TANH) {
acts_.FuncMultiply<GPrime>(fwd_deltas, t, curr_errors);
} else if (type_ == NT_LOGISTIC) {
@ -328,7 +328,7 @@ void FullyConnected::Update(float learning_rate, float momentum, float adam_beta
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void FullyConnected::CountAlternators(const Network &other, double *same, double *changed) const {
void FullyConnected::CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {
ASSERT_HOST(other.type() == type_);
const auto *fc = static_cast<const FullyConnected *>(&other);
weights_.CountAlternators(fc->weights_, same, changed);

View File

@ -20,6 +20,7 @@
#include "network.h"
#include "networkscratch.h"
#include "tesstypes.h"
namespace tesseract {
@ -90,17 +91,17 @@ public:
NetworkScratch *scratch, NetworkIO *output) override;
// Components of Forward so FullyConnected can be reused inside LSTM.
void SetupForward(const NetworkIO &input, const TransposedArray *input_transpose);
void ForwardTimeStep(int t, double *output_line);
void ForwardTimeStep(const double *d_input, int t, double *output_line);
void ForwardTimeStep(const int8_t *i_input, int t, double *output_line);
void ForwardTimeStep(int t, TFloat *output_line);
void ForwardTimeStep(const TFloat *d_input, int t, TFloat *output_line);
void ForwardTimeStep(const int8_t *i_input, int t, TFloat *output_line);
// Runs backward propagation of errors on the deltas line.
// See Network for a detailed discussion of the arguments.
bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
NetworkIO *back_deltas) override;
// Components of Backward so FullyConnected can be reused inside LSTM.
void BackwardTimeStep(const NetworkIO &fwd_deltas, int t, double *curr_errors,
TransposedArray *errors_t, double *backprop);
void BackwardTimeStep(const NetworkIO &fwd_deltas, int t, TFloat *curr_errors,
TransposedArray *errors_t, TFloat *backprop);
void FinishBackward(const TransposedArray &errors_t);
// Updates the weights using the given learning rate, momentum and adam_beta.
@ -109,7 +110,7 @@ public:
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void CountAlternators(const Network &other, double *same, double *changed) const override;
void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const override;
protected:
// Weight arrays of size [no, ni + 1].

View File

@ -1,7 +1,7 @@
// Generated code with lookup tables
#include "functions.h"
namespace tesseract {
const double TanhTable[] = {
const TFloat TanhTable[] = {
0.0,
0.00390623013190634,
0.007812341058161014,
@ -4099,7 +4099,7 @@ const double TanhTable[] = {
0.9999999999999742,
0.9999999999999745,
};
const double LogisticTable[] = {
const TFloat LogisticTable[] = {
0.5,
0.5009765612582384,
0.5019531150659532,

View File

@ -19,6 +19,7 @@
#define TESSERACT_LSTM_FUNCTIONS_H_
#include "helpers.h"
#include "tesstypes.h"
// Setting this to 1 or more causes massive dumps of debug data: weights,
// updates, internal calculations etc, and reduces the number of test iterations
@ -33,14 +34,14 @@ namespace tesseract {
// Size of static tables.
constexpr int kTableSize = 4096;
// Scale factor for float arg to int index.
constexpr double kScaleFactor = 256.0;
constexpr TFloat kScaleFactor = 256.0;
// Generated lookup tables.
extern const double TanhTable[];
extern const double LogisticTable[];
extern const TFloat TanhTable[];
extern const TFloat LogisticTable[];
// Non-linearity (sigmoid) functions with cache tables and clipping.
inline double Tanh(double x) {
inline TFloat Tanh(TFloat x) {
if (x < 0.0) {
return -Tanh(-x);
}
@ -49,13 +50,13 @@ inline double Tanh(double x) {
if (index >= (kTableSize - 1)) {
return 1.0;
}
double tanh_i0 = TanhTable[index];
double tanh_i1 = TanhTable[index + 1];
TFloat tanh_i0 = TanhTable[index];
TFloat tanh_i1 = TanhTable[index + 1];
// Linear interpolation.
return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
}
inline double Logistic(double x) {
inline TFloat Logistic(TFloat x) {
if (x < 0.0) {
return 1.0 - Logistic(-x);
}
@ -64,25 +65,25 @@ inline double Logistic(double x) {
if (index >= (kTableSize - 1)) {
return 1.0;
}
double l0 = LogisticTable[index];
double l1 = LogisticTable[index + 1];
TFloat l0 = LogisticTable[index];
TFloat l1 = LogisticTable[index + 1];
// Linear interpolation.
return l0 + (l1 - l0) * (x - index);
}
// Non-linearity (sigmoid) functions and their derivatives.
struct FFunc {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
return Logistic(x);
}
};
struct FPrime {
inline double operator()(double y) const {
inline TFloat operator()(TFloat y) const {
return y * (1.0 - y);
}
};
struct ClipFFunc {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
if (x <= 0.0) {
return 0.0;
}
@ -93,12 +94,12 @@ struct ClipFFunc {
}
};
struct ClipFPrime {
inline double operator()(double y) const {
inline TFloat operator()(TFloat y) const {
return 0.0 < y && y < 1.0 ? 1.0 : 0.0;
}
};
struct Relu {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
if (x <= 0.0) {
return 0.0;
}
@ -106,22 +107,22 @@ struct Relu {
}
};
struct ReluPrime {
inline double operator()(double y) const {
inline TFloat operator()(TFloat y) const {
return 0.0 < y ? 1.0 : 0.0;
}
};
struct GFunc {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
return Tanh(x);
}
};
struct GPrime {
inline double operator()(double y) const {
inline TFloat operator()(TFloat y) const {
return 1.0 - y * y;
}
};
struct ClipGFunc {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
if (x <= -1.0) {
return -1.0;
}
@ -132,35 +133,35 @@ struct ClipGFunc {
}
};
struct ClipGPrime {
inline double operator()(double y) const {
inline TFloat operator()(TFloat y) const {
return -1.0 < y && y < 1.0 ? 1.0 : 0.0;
}
};
struct HFunc {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
return Tanh(x);
}
};
struct HPrime {
inline double operator()(double y) const {
double u = Tanh(y);
return 1.0 - u * u;
inline TFloat operator()(TFloat y) const {
TFloat u = Tanh(y);
return 1 - u * u;
}
};
struct UnityFunc {
inline double operator()(double /*x*/) const {
inline TFloat operator()(TFloat /*x*/) const {
return 1.0;
}
};
struct IdentityFunc {
inline double operator()(double x) const {
inline TFloat operator()(TFloat x) const {
return x;
}
};
// Applies Func in-place to inout, of size n.
template <class Func>
inline void FuncInplace(int n, double *inout) {
inline void FuncInplace(int n, TFloat *inout) {
Func f;
for (int i = 0; i < n; ++i) {
inout[i] = f(inout[i]);
@ -169,7 +170,7 @@ inline void FuncInplace(int n, double *inout) {
// Applies Func to u and multiplies the result by v component-wise,
// putting the product in out, all of size n.
template <class Func>
inline void FuncMultiply(const double *u, const double *v, int n, double *out) {
inline void FuncMultiply(const TFloat *u, const TFloat *v, int n, TFloat *out) {
Func f;
for (int i = 0; i < n; ++i) {
out[i] = f(u[i]) * v[i];
@ -206,34 +207,34 @@ inline void SoftmaxInPlace(int n, T *inout) {
}
// Copies n values of the given src vector to dest.
inline void CopyVector(int n, const double *src, double *dest) {
inline void CopyVector(int n, const TFloat *src, TFloat *dest) {
memcpy(dest, src, n * sizeof(dest[0]));
}
// Adds n values of the given src vector to dest.
inline void AccumulateVector(int n, const double *src, double *dest) {
inline void AccumulateVector(int n, const TFloat *src, TFloat *dest) {
for (int i = 0; i < n; ++i) {
dest[i] += src[i];
}
}
// Multiplies n values of inout in-place element-wise by the given src vector.
inline void MultiplyVectorsInPlace(int n, const double *src, double *inout) {
inline void MultiplyVectorsInPlace(int n, const TFloat *src, TFloat *inout) {
for (int i = 0; i < n; ++i) {
inout[i] *= src[i];
}
}
// Multiplies n values of u by v, element-wise, accumulating to out.
inline void MultiplyAccumulate(int n, const double *u, const double *v, double *out) {
inline void MultiplyAccumulate(int n, const TFloat *u, const TFloat *v, TFloat *out) {
for (int i = 0; i < n; i++) {
out[i] += u[i] * v[i];
}
}
// Sums the given 5 n-vectors putting the result into sum.
inline void SumVectors(int n, const double *v1, const double *v2, const double *v3,
const double *v4, const double *v5, double *sum) {
inline void SumVectors(int n, const TFloat *v1, const TFloat *v2, const TFloat *v3,
const TFloat *v4, const TFloat *v5, TFloat *sum) {
for (int i = 0; i < n; ++i) {
sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
}
@ -255,12 +256,12 @@ inline void ClipVector(int n, T lower, T upper, T *vec) {
// Converts the given n-vector to a binary encoding of the maximum value,
// encoded as vector of nf binary values.
inline void CodeInBinary(int n, int nf, double *vec) {
inline void CodeInBinary(int n, int nf, TFloat *vec) {
if (nf <= 0 || n < nf) {
return;
}
int index = 0;
double best_score = vec[0];
TFloat best_score = vec[0];
for (int i = 1; i < n; ++i) {
if (vec[i] > best_score) {
best_score = vec[i];

View File

@ -68,9 +68,9 @@ namespace tesseract {
// Max absolute value of state_. It is reasonably high to enable the state
// to count things.
const double kStateClip = 100.0;
const TFloat kStateClip = 100.0;
// Max absolute value of gate_errors (the gradients).
const double kErrClip = 1.0f;
const TFloat kErrClip = 1.0f;
// Calculate ceil(log2(n)).
static inline uint32_t ceil_log2(uint32_t n) {
@ -312,9 +312,9 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
// Single timestep buffers for the current/recurrent output and state.
NetworkScratch::FloatVec curr_state, curr_output;
curr_state.Init(ns_, scratch);
ZeroVector<double>(ns_, curr_state);
ZeroVector<TFloat>(ns_, curr_state);
curr_output.Init(ns_, scratch);
ZeroVector<double>(ns_, curr_output);
ZeroVector<TFloat>(ns_, curr_output);
// Rotating buffers of width buf_width allow storage of the state and output
// for the other dimension, used only when working in true 2D mode. The width
// is enough to hold an entire strip of the major direction.
@ -325,9 +325,9 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
outputs.resize(buf_width);
for (int i = 0; i < buf_width; ++i) {
states[i].Init(ns_, scratch);
ZeroVector<double>(ns_, states[i]);
ZeroVector<TFloat>(ns_, states[i]);
outputs[i].Init(ns_, scratch);
ZeroVector<double>(ns_, outputs[i]);
ZeroVector<TFloat>(ns_, outputs[i]);
}
}
// Used only if a softmax LSTM.
@ -335,7 +335,7 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
NetworkScratch::IO int_output;
if (softmax_ != nullptr) {
softmax_output.Init(no_, scratch);
ZeroVector<double>(no_, softmax_output);
ZeroVector<TFloat>(no_, softmax_output);
int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_);
if (input.int_mode()) {
int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch);
@ -429,7 +429,7 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
int8_t *which_fg_col = which_fg_[t];
memset(which_fg_col, 1, ns_ * sizeof(which_fg_col[0]));
if (valid_2d) {
const double *stepped_state = states[mod_t];
const TFloat *stepped_state = states[mod_t];
for (int i = 0; i < ns_; ++i) {
if (temp_lines[GF1][i] < temp_lines[GFS][i]) {
curr_state[i] = temp_lines[GFS][i] * stepped_state[i];
@ -440,7 +440,7 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
}
MultiplyAccumulate(ns_, temp_lines[CI], temp_lines[GI], curr_state);
// Clip curr_state to a sane range.
ClipVector<double>(ns_, -kStateClip, kStateClip, curr_state);
ClipVector<TFloat>(ns_, -kStateClip, kStateClip, curr_state);
if (IsTraining()) {
// Save the gate node values.
node_values_[CI].WriteTimeStep(t, temp_lines[CI]);
@ -483,8 +483,8 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
// Always zero the states at the end of every row, but only for the major
// direction. The 2-D state remains intact.
if (src_index.IsLast(FD_WIDTH)) {
ZeroVector<double>(ns_, curr_state);
ZeroVector<double>(ns_, curr_output);
ZeroVector<TFloat>(ns_, curr_state);
ZeroVector<TFloat>(ns_, curr_output);
}
} while (src_index.Increment());
#if DEBUG_DETAIL > 0
@ -520,8 +520,8 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
NetworkScratch::FloatVec curr_stateerr, curr_sourceerr;
curr_stateerr.Init(ns_, scratch);
curr_sourceerr.Init(na_, scratch);
ZeroVector<double>(ns_, curr_stateerr);
ZeroVector<double>(na_, curr_sourceerr);
ZeroVector<TFloat>(ns_, curr_stateerr);
ZeroVector<TFloat>(na_, curr_sourceerr);
// Errors in the gates.
NetworkScratch::FloatVec gate_errors[WT_COUNT];
for (auto &gate_error : gate_errors) {
@ -537,8 +537,8 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
for (int t = 0; t < buf_width; ++t) {
stateerr[t].Init(ns_, scratch);
sourceerr[t].Init(na_, scratch);
ZeroVector<double>(ns_, stateerr[t]);
ZeroVector<double>(na_, sourceerr[t]);
ZeroVector<TFloat>(ns_, stateerr[t]);
ZeroVector<TFloat>(na_, sourceerr[t]);
}
}
// Parallel-generated sourceerr from each of the gates.
@ -559,7 +559,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
softmax_errors.Init(no_, scratch);
softmax_errors_t.Init(no_, width, scratch);
}
double state_clip = Is2D() ? 9.0 : 4.0;
TFloat state_clip = Is2D() ? 9.0 : 4.0;
#if DEBUG_DETAIL > 1
tprintf("fwd_deltas:%s\n", name_.c_str());
fwd_deltas.Print(10);
@ -594,8 +594,8 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
int mod_t = Modulo(t, buf_width); // Current timestep.
// Zero the state in the major direction only at the end of every row.
if (at_last_x) {
ZeroVector<double>(na_, curr_sourceerr);
ZeroVector<double>(ns_, curr_stateerr);
ZeroVector<TFloat>(na_, curr_sourceerr);
ZeroVector<TFloat>(ns_, curr_stateerr);
}
// Setup the outputerr.
if (type_ == NT_LSTM_SUMMARY) {
@ -603,7 +603,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
fwd_deltas.ReadTimeStep(src_index.t(), outputerr);
src_index.Decrement();
} else {
ZeroVector<double>(ns_, outputerr);
ZeroVector<TFloat>(ns_, outputerr);
}
} else if (softmax_ == nullptr) {
fwd_deltas.ReadTimeStep(t, outputerr);
@ -631,7 +631,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
}
if (down_pos >= 0) {
const float *right_node_gfs = node_values_[GFS].f(down_pos);
const double *right_stateerr = stateerr[mod_t];
const TFloat *right_stateerr = stateerr[mod_t];
for (int i = 0; i < ns_; ++i) {
if (which_fg_[down_pos][i] == 2) {
curr_stateerr[i] += right_stateerr[i] * right_node_gfs[i];
@ -641,7 +641,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
}
state_.FuncMultiply3Add<HPrime>(node_values_[GO], t, outputerr, curr_stateerr);
// Clip stateerr_ to a sane range.
ClipVector<double>(ns_, -state_clip, state_clip, curr_stateerr);
ClipVector<TFloat>(ns_, -state_clip, state_clip, curr_stateerr);
#if DEBUG_DETAIL > 1
if (t + 10 > width) {
tprintf("t=%d, stateerr=", t);
@ -758,7 +758,7 @@ void LSTM::Update(float learning_rate, float momentum, float adam_beta, int num_
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void LSTM::CountAlternators(const Network &other, double *same, double *changed) const {
void LSTM::CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {
ASSERT_HOST(other.type() == type_);
const LSTM *lstm = static_cast<const LSTM *>(&other);
for (int w = 0; w < WT_COUNT; ++w) {

View File

@ -109,7 +109,7 @@ public:
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void CountAlternators(const Network &other, double *same, double *changed) const override;
void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const override;
// Prints the weights for debug purposes.
void PrintW();
// Prints the weight deltas for debug purposes.

View File

@ -321,7 +321,7 @@ Network *Network::CreateFromFile(TFile *fp) {
}
// Returns a random number in [-range, range].
double Network::Random(double range) {
TFloat Network::Random(TFloat range) {
ASSERT_HOST(randomizer_ != nullptr);
return randomizer_->SignedRand(range);
}

View File

@ -235,7 +235,7 @@ public:
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
virtual void CountAlternators(const Network &other, double *same, double *changed) const {}
virtual void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {}
// Reads from the given file. Returns nullptr in case of error.
// Determines the type of the serialized class and calls its DeSerialize
@ -287,7 +287,7 @@ public:
protected:
// Returns a random number in [-range, range].
double Random(double range);
TFloat Random(TFloat range);
protected:
NetworkType type_; // Type of the derived network class.

View File

@ -529,9 +529,9 @@ int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, in
int length = labels.size();
int last_start = end - length;
int best_start = -1;
double best_score = 0.0;
TFloat best_score = 0;
for (int s = start; s <= last_start; ++s) {
double score = ScoreOfLabels(labels, s);
TFloat score = ScoreOfLabels(labels, s);
if (score > best_score || best_start < 0) {
best_score = score;
best_start = s;
@ -542,9 +542,9 @@ int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, in
// Returns the cumulative score of the given labels starting at start, and
// using one label per time-step.
double NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
TFloat NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
int length = labels.size();
double score = 0.0;
TFloat score = 0;
for (int i = 0; i < length; ++i) {
score += f_(start + i, labels[i]);
}
@ -615,27 +615,27 @@ bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
}
// Reads a single timestep to floats in the range [-1, 1].
void NetworkIO::ReadTimeStep(int t, double *output) const {
void NetworkIO::ReadTimeStep(int t, TFloat *output) const {
if (int_mode_) {
const int8_t *line = i_[t];
for (int i = 0; i < i_.dim2(); ++i) {
output[i] = static_cast<double>(line[i]) / INT8_MAX;
output[i] = static_cast<TFloat>(line[i]) / INT8_MAX;
}
} else {
const float *line = f_[t];
for (int i = 0; i < f_.dim2(); ++i) {
output[i] = static_cast<double>(line[i]);
output[i] = static_cast<TFloat>(line[i]);
}
}
}
// Adds a single timestep to floats.
void NetworkIO::AddTimeStep(int t, double *inout) const {
void NetworkIO::AddTimeStep(int t, TFloat *inout) const {
int num_features = NumFeatures();
if (int_mode_) {
const int8_t *line = i_[t];
for (int i = 0; i < num_features; ++i) {
inout[i] += static_cast<double>(line[i]) / INT8_MAX;
inout[i] += static_cast<TFloat>(line[i]) / INT8_MAX;
}
} else {
const float *line = f_[t];
@ -661,13 +661,13 @@ void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float *inou
}
// Writes a single timestep from floats in the range [-1, 1].
void NetworkIO::WriteTimeStep(int t, const double *input) {
void NetworkIO::WriteTimeStep(int t, const TFloat *input) {
WriteTimeStepPart(t, 0, NumFeatures(), input);
}
// Writes a single timestep from floats in the range [-1, 1] writing only
// num_features elements of input to (*this)[t], starting at offset.
void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const double *input) {
void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input) {
if (int_mode_) {
int8_t *line = i_[t] + offset;
for (int i = 0; i < num_features; ++i) {

View File

@ -172,7 +172,7 @@ public:
int PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const;
// Returns the cumulative score of the given labels starting at start, and
// using one label per time-step.
double ScoreOfLabels(const std::vector<int> &labels, int start) const;
TFloat ScoreOfLabels(const std::vector<int> &labels, int start) const;
// Helper function sets all the outputs for a single timestep, such that
// label has value ok_score, and the other labels share 1 - ok_score.
// Assumes float mode.
@ -193,16 +193,16 @@ public:
bool AnySuspiciousTruth(float confidence_thr) const;
// Reads a single timestep to floats in the range [-1, 1].
void ReadTimeStep(int t, double *output) const;
void ReadTimeStep(int t, TFloat *output) const;
// Adds a single timestep to floats.
void AddTimeStep(int t, double *inout) const;
void AddTimeStep(int t, TFloat *inout) const;
// Adds part of a single timestep to floats.
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const;
// Writes a single timestep from floats in the range [-1, 1].
void WriteTimeStep(int t, const double *input);
void WriteTimeStep(int t, const TFloat *input);
// Writes a single timestep from floats in the range [-1, 1] writing only
// num_features elements of input to (*this)[t], starting at offset.
void WriteTimeStepPart(int t, int offset, int num_features, const double *input);
void WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input);
// Maxpools a single time step from src.
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line);
// Runs maxpool backward, using maxes to index timesteps in *this.
@ -253,9 +253,9 @@ public:
// Applies Func to timestep t of *this (u) and multiplies the result by v
// component-wise, putting the product in *product.
// *this and v may be int or float, but must match. The outputs are double.
// *this and v may be int or float, but must match. The outputs are TFloat.
template <class Func>
void FuncMultiply(const NetworkIO &v_io, int t, double *product) {
void FuncMultiply(const NetworkIO &v_io, int t, TFloat *product) {
Func f;
ASSERT_HOST(!int_mode_);
ASSERT_HOST(!v_io.int_mode_);
@ -264,7 +264,7 @@ public:
const int8_t *u = i_[t];
const int8_t *v = v_io.i_[t];
for (int i = 0; i < dim; ++i) {
product[i] = f(u[i] / static_cast<double>(INT8_MAX)) * v[i] / static_cast<double>(INT8_MAX);
product[i] = f(u[i] / static_cast<TFloat>(INT8_MAX)) * v[i] / INT8_MAX;
}
} else {
const float *u = f_[t];
@ -278,8 +278,8 @@ public:
// component-wise, putting the product in *product.
// All NetworkIOs are assumed to be float.
template <class Func>
void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const double *w,
double *product) const {
void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const TFloat *w,
TFloat *product) const {
ASSERT_HOST(!int_mode_);
ASSERT_HOST(!v_io.int_mode_);
Func f;
@ -294,7 +294,7 @@ public:
// component-wise, adding the product to *product.
// All NetworkIOs are assumed to be float.
template <class Func>
void FuncMultiply3Add(const NetworkIO &v_io, int t, const double *w, double *product) const {
void FuncMultiply3Add(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
ASSERT_HOST(!int_mode_);
ASSERT_HOST(!v_io.int_mode_);
Func f;
@ -309,7 +309,7 @@ public:
// component-wise, putting the product in product, all at timestep t, except
// w, which is a simple array. All NetworkIOs are assumed to be float.
template <class Func1, class Func2>
void Func2Multiply3(const NetworkIO &v_io, int t, const double *w, double *product) const {
void Func2Multiply3(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
ASSERT_HOST(!int_mode_);
ASSERT_HOST(!v_io.int_mode_);
Func1 f;

View File

@ -156,25 +156,25 @@ public:
}
// Use the cast operator instead of operator[] so the FloatVec can be used
// as a double* argument to a function call.
operator double *() const {
// as a TFloat* argument to a function call.
operator TFloat *() const {
return data_;
}
double *get() {
TFloat *get() {
return data_;
}
private:
// Vector borrowed from the scratch space. Use Return to free it.
std::vector<double> *vec_;
std::vector<TFloat> *vec_;
// Short-cut pointer to the underlying array.
double *data_;
TFloat *data_;
// The source scratch_space_. Borrowed pointer, used to free the
// vector. Don't delete!
NetworkScratch *scratch_space_;
}; // class FloatVec
// Class that acts like a 2-D array of double, yet actually uses space
// Class that acts like a 2-D array of TFloat, yet actually uses space
// from the source NetworkScratch, and knows how to unstack the borrowed
// array on destruction.
class GradientStore {
@ -270,7 +270,7 @@ private:
// deleted until the NetworkScratch is deleted.
Stack<NetworkIO> int_stack_;
Stack<NetworkIO> float_stack_;
Stack<std::vector<double>> vec_stack_;
Stack<std::vector<TFloat>> vec_stack_;
Stack<TransposedArray> array_stack_;
};

View File

@ -255,7 +255,7 @@ void Plumbing::Update(float learning_rate, float momentum, float adam_beta, int
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void Plumbing::CountAlternators(const Network &other, double *same, double *changed) const {
void Plumbing::CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {
ASSERT_HOST(other.type() == type_);
const auto *plumbing = static_cast<const Plumbing *>(&other);
ASSERT_HOST(plumbing->stack_.size() == stack_.size());

View File

@ -143,7 +143,7 @@ public:
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void CountAlternators(const Network &other, double *same, double *changed) const override;
void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const override;
protected:
// The networks.

View File

@ -26,7 +26,7 @@
namespace tesseract {
#if defined(ANDROID)
static inline double log2(double n) {
static inline TFloat log2(TFloat n) {
return log(n) / log(2.0);
}
#endif // ANDROID
@ -34,7 +34,22 @@ static inline double log2(double n) {
// Number of iterations after which the correction effectively becomes unity.
const int kAdamCorrectionIterations = 200000;
// Epsilon in Adam to prevent division by zero.
const double kAdamEpsilon = 1e-8;
const TFloat kAdamEpsilon = 1e-8;
// Utility function converts an array of float to the corresponding array
// of double.
static void FloatToDouble(const GENERIC_2D_ARRAY<float> &src, GENERIC_2D_ARRAY<double> &dst) {
const auto dim1 = src.dim1();
const auto dim2 = src.dim2();
dst.ResizeNoInit(dim1, dim2);
for (int i = 0; i < dim1; ++i) {
const auto *src_i = src[i];
auto *dst_i = dst[i];
for (int j = 0; j < dim2; ++j) {
dst_i[j] = static_cast<double>(src_i[j]);
}
}
}
// Computes matrix.vector v = Wu.
// u is of size W.dim2() - add_bias_fwd and the output v is of size
@ -44,13 +59,13 @@ const double kAdamEpsilon = 1e-8;
// If skip_bias_back, we are actually performing the backwards product on a
// transposed matrix, so we need to drop the v output corresponding to the last
// element in dim1.
static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double> &w, bool add_bias_fwd,
bool skip_bias_back, const double *u, double *v) {
static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<TFloat> &w, bool add_bias_fwd,
bool skip_bias_back, const TFloat *u, TFloat *v) {
int num_results = w.dim1() - skip_bias_back;
int extent = w.dim2() - add_bias_fwd;
for (int i = 0; i < num_results; ++i) {
const double *wi = w[i];
double total = DotProduct(wi, u, extent);
const TFloat *wi = w[i];
TFloat total = DotProduct(wi, u, extent);
if (add_bias_fwd) {
total += wi[extent]; // The bias value.
}
@ -58,8 +73,8 @@ static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double> &w, bo
}
}
// Copies the whole input transposed, converted to double, into *this.
void TransposedArray::Transpose(const GENERIC_2D_ARRAY<double> &input) {
// Copies the whole input transposed, converted to TFloat, into *this.
void TransposedArray::Transpose(const GENERIC_2D_ARRAY<TFloat> &input) {
int width = input.dim1();
int num_features = input.dim2();
ResizeNoInit(num_features, width);
@ -97,25 +112,25 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam, float weight_r
// for all outputs with negative code_map entries. Returns the new number of
// weights.
int WeightMatrix::RemapOutputs(const std::vector<int> &code_map) {
GENERIC_2D_ARRAY<double> old_wf(wf_);
GENERIC_2D_ARRAY<TFloat> old_wf(wf_);
int old_no = wf_.dim1();
int new_no = code_map.size();
int ni = wf_.dim2();
std::vector<double> means(ni, 0.0);
std::vector<TFloat> means(ni, 0.0);
for (int c = 0; c < old_no; ++c) {
const double *weights = wf_[c];
const TFloat *weights = wf_[c];
for (int i = 0; i < ni; ++i) {
means[i] += weights[i];
}
}
for (double &mean : means) {
for (auto &mean : means) {
mean /= old_no;
}
wf_.Resize(new_no, ni, 0.0);
InitBackward();
for (int dest = 0; dest < new_no; ++dest) {
int src = code_map[dest];
const double *src_data = src >= 0 ? old_wf[src] : means.data();
const TFloat *src_data = src >= 0 ? old_wf[src] : means.data();
memcpy(wf_[dest], src_data, ni * sizeof(*src_data));
}
return ni * new_no;
@ -126,23 +141,23 @@ int WeightMatrix::RemapOutputs(const std::vector<int> &code_map) {
// Compute the max absolute value of the weight set.
// Scale so the max absolute value becomes INT8_MAX.
// Round to integer.
// Store a multiplicative scale factor (as a double) that will reproduce
// Store a multiplicative scale factor (as a TFloat) that will reproduce
// the original value, subject to rounding errors.
void WeightMatrix::ConvertToInt() {
wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());
scales_.reserve(wi_.dim1());
int dim2 = wi_.dim2();
for (int t = 0; t < wi_.dim1(); ++t) {
double *f_line = wf_[t];
TFloat *f_line = wf_[t];
int8_t *i_line = wi_[t];
double max_abs = 0.0;
TFloat max_abs = 0;
for (int f = 0; f < dim2; ++f) {
double abs_val = fabs(f_line[f]);
TFloat abs_val = fabs(f_line[f]);
if (abs_val > max_abs) {
max_abs = abs_val;
}
}
double scale = max_abs / INT8_MAX;
TFloat scale = max_abs / INT8_MAX;
scales_.push_back(scale / INT8_MAX);
if (scale == 0.0) {
scale = 1.0;
@ -291,14 +306,14 @@ bool WeightMatrix::DeSerializeOld(bool training, TFile *fp) {
if (!float_array.DeSerialize(fp)) {
return false;
}
FloatToDouble(float_array, &wf_);
FloatToDouble(float_array, wf_);
}
if (training) {
InitBackward();
if (!float_array.DeSerialize(fp)) {
return false;
}
FloatToDouble(float_array, &updates_);
FloatToDouble(float_array, updates_);
// Errs was only used in int training, which is now dead.
if (!float_array.DeSerialize(fp)) {
return false;
@ -312,12 +327,12 @@ bool WeightMatrix::DeSerializeOld(bool training, TFile *fp) {
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
// Asserts that the call matches what we have.
void WeightMatrix::MatrixDotVector(const double *u, double *v) const {
void WeightMatrix::MatrixDotVector(const TFloat *u, TFloat *v) const {
assert(!int_mode_);
MatrixDotVectorInternal(wf_, true, false, u, v);
}
void WeightMatrix::MatrixDotVector(const int8_t *u, double *v) const {
void WeightMatrix::MatrixDotVector(const int8_t *u, TFloat *v) const {
assert(int_mode_);
if (IntSimdMatrix::intSimdMatrix) {
IntSimdMatrix::intSimdMatrix->matrixDotVectorFunction(wi_.dim1(), wi_.dim2(), &shaped_w_[0],
@ -329,11 +344,11 @@ void WeightMatrix::MatrixDotVector(const int8_t *u, double *v) const {
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
// component-wise products of *this[0] and v to inout.
void WeightMatrix::MultiplyAccumulate(const double *v, double *inout) {
void WeightMatrix::MultiplyAccumulate(const TFloat *v, TFloat *inout) {
assert(!int_mode_);
assert(wf_.dim1() == 1);
int n = wf_.dim2();
const double *u = wf_[0];
const TFloat *u = wf_[0];
for (int i = 0; i < n; ++i) {
inout[i] += u[i] * v[i];
}
@ -343,7 +358,7 @@ void WeightMatrix::MultiplyAccumulate(const double *v, double *inout) {
// u is of size W.dim1() and the output v is of size W.dim2() - 1.
// The last result is discarded, as v is assumed to have an imaginary
// last value of 1, as with MatrixDotVector.
void WeightMatrix::VectorDotMatrix(const double *u, double *v) const {
void WeightMatrix::VectorDotMatrix(const TFloat *u, TFloat *v) const {
assert(!int_mode_);
MatrixDotVectorInternal(wf_t_, false, true, u, v);
}
@ -367,13 +382,13 @@ void WeightMatrix::SumOuterTransposed(const TransposedArray &u, const Transposed
# pragma omp parallel for num_threads(4) if (in_parallel)
#endif
for (int i = 0; i < num_outputs; ++i) {
double *dwi = dw_[i];
const double *ui = u[i];
TFloat *dwi = dw_[i];
const TFloat *ui = u[i];
for (int j = 0; j < num_inputs; ++j) {
dwi[j] = DotProduct(ui, v[j], num_samples);
}
// The last element of v is missing, presumed 1.0f.
double total = 0.0;
TFloat total = 0;
for (int k = 0; k < num_samples; ++k) {
total += ui[k];
}
@ -419,17 +434,17 @@ void WeightMatrix::AddDeltas(const WeightMatrix &other) {
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void WeightMatrix::CountAlternators(const WeightMatrix &other, double *same,
double *changed) const {
void WeightMatrix::CountAlternators(const WeightMatrix &other, TFloat *same,
TFloat *changed) const {
int num_outputs = updates_.dim1();
int num_inputs = updates_.dim2();
assert(num_outputs == other.updates_.dim1());
assert(num_inputs == other.updates_.dim2());
for (int i = 0; i < num_outputs; ++i) {
const double *this_i = updates_[i];
const double *other_i = other.updates_[i];
const TFloat *this_i = updates_[i];
const TFloat *other_i = other.updates_[i];
for (int j = 0; j < num_inputs; ++j) {
double product = this_i[j] * other_i[j];
TFloat product = this_i[j] * other_i[j];
if (product < 0.0) {
*changed -= product;
} else {
@ -442,10 +457,10 @@ void WeightMatrix::CountAlternators(const WeightMatrix &other, double *same,
// Helper computes an integer histogram bucket for a weight and adds it
// to the histogram.
const int kHistogramBuckets = 16;
static void HistogramWeight(double weight, STATS *histogram) {
static void HistogramWeight(TFloat weight, STATS *histogram) {
int bucket = kHistogramBuckets - 1;
if (weight != 0.0) {
double logval = -log2(fabs(weight));
TFloat logval = -log2(fabs(weight));
bucket = ClipToRange(IntCastRounded(logval), 0, kHistogramBuckets - 1);
}
histogram->add(bucket, 1);
@ -470,20 +485,4 @@ void WeightMatrix::Debug2D(const char *msg) {
histogram.print();
}
// Utility function converts an array of float to the corresponding array
// of double.
/* static */
void WeightMatrix::FloatToDouble(const GENERIC_2D_ARRAY<float> &wf, GENERIC_2D_ARRAY<double> *wd) {
int dim1 = wf.dim1();
int dim2 = wf.dim2();
wd->ResizeNoInit(dim1, dim2);
for (int i = 0; i < dim1; ++i) {
const float *wfi = wf[i];
double *wdi = (*wd)[i];
for (int j = 0; j < dim2; ++j) {
wdi[j] = static_cast<double>(wfi[j]);
}
}
}
} // namespace tesseract.

View File

@ -22,17 +22,18 @@
#include <vector>
#include "intsimdmatrix.h"
#include "matrix.h"
#include "tesstypes.h"
#include "tprintf.h"
namespace tesseract {
// Convenience instantiation of GENERIC_2D_ARRAY<double> with additional
// Convenience instantiation of GENERIC_2D_ARRAY<TFloat> with additional
// operations to write a strided vector, so the transposed form of the input
// is memory-contiguous.
class TransposedArray : public GENERIC_2D_ARRAY<double> {
class TransposedArray : public GENERIC_2D_ARRAY<TFloat> {
public:
// Copies the whole input transposed, converted to double, into *this.
void Transpose(const GENERIC_2D_ARRAY<double> &input);
// Copies the whole input transposed, converted to TFloat, into *this.
void Transpose(const GENERIC_2D_ARRAY<TFloat> &input);
// Writes a vector of data representing a timestep (gradients or sources).
// The data is assumed to be of size1 in size (the strided dimension).
~TransposedArray() override;
@ -107,11 +108,11 @@ public:
return int_mode_ ? wi_.dim1() : wf_.dim1();
}
// Provides one set of weights. Only used by peep weight maxpool.
const double *GetWeights(int index) const {
const TFloat *GetWeights(int index) const {
return wf_[index];
}
// Provides access to the deltas (dw_).
double GetDW(int i, int j) const {
TFloat GetDW(int i, int j) const {
return dw_(i, j);
}
@ -132,16 +133,16 @@ public:
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
// Asserts that the call matches what we have.
void MatrixDotVector(const double *u, double *v) const;
void MatrixDotVector(const int8_t *u, double *v) const;
void MatrixDotVector(const TFloat *u, TFloat *v) const;
void MatrixDotVector(const int8_t *u, TFloat *v) const;
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
// component-wise products of *this[0] and v to inout.
void MultiplyAccumulate(const double *v, double *inout);
void MultiplyAccumulate(const TFloat *v, TFloat *inout);
// Computes vector.matrix v = uW.
// u is of size W.dim1() and the output v is of size W.dim2() - 1.
// The last result is discarded, as v is assumed to have an imaginary
// last value of 1, as with MatrixDotVector.
void VectorDotMatrix(const double *u, double *v) const;
void VectorDotMatrix(const TFloat *u, TFloat *v) const;
// Fills dw_[i][j] with the dot product u[i][] . v[j][], using elements
// from u and v, starting with u[i][offset] and v[j][offset].
// Note that (matching MatrixDotVector) v[last][] is missing, presumed 1.0.
@ -155,17 +156,13 @@ public:
// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const;
void CountAlternators(const WeightMatrix &other, TFloat *same, TFloat *changed) const;
void Debug2D(const char *msg);
// Utility function converts an array of float to the corresponding array
// of double.
static void FloatToDouble(const GENERIC_2D_ARRAY<float> &wf, GENERIC_2D_ARRAY<double> *wd);
private:
// Choice between float and 8 bit int implementations.
GENERIC_2D_ARRAY<double> wf_;
GENERIC_2D_ARRAY<TFloat> wf_;
GENERIC_2D_ARRAY<int8_t> wi_;
// Transposed copy of wf_, used only for Backward, and set with each Update.
TransposedArray wf_t_;
@ -175,14 +172,14 @@ private:
bool use_adam_;
// If we are using wi_, then scales_ is a factor to restore the row product
// with a vector to the correct range.
std::vector<double> scales_;
std::vector<TFloat> scales_;
// Weight deltas. dw_ is the new delta, and updates_ the momentum-decaying
// amount to be added to wf_/wi_.
GENERIC_2D_ARRAY<double> dw_;
GENERIC_2D_ARRAY<double> updates_;
GENERIC_2D_ARRAY<TFloat> dw_;
GENERIC_2D_ARRAY<TFloat> updates_;
// Iff use_adam_, the sum of squares of dw_. The number of samples is
// given to Update(). Serialized iff use_adam_.
GENERIC_2D_ARRAY<double> dw_sq_sum_;
GENERIC_2D_ARRAY<TFloat> dw_sq_sum_;
// The weights matrix reorganized in whatever way suits this instance.
std::vector<int8_t> shaped_w_;
};

View File

@ -661,7 +661,7 @@ void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, std::string
// Even if it looks like all weights should remain the same, an adjustment
// will be made to guarantee a different result when reverting to an old best.
// Returns the number of layer learning rates that were reduced.
int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
LSTMTrainer *samples_trainer) {
enum WhichWay {
LR_DOWN, // Learning rate will go down by factor.
@ -671,13 +671,13 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
std::vector<std::string> layers = EnumerateLayers();
int num_layers = layers.size();
std::vector<int> num_weights(num_layers);
std::vector<double> bad_sums[LR_COUNT];
std::vector<double> ok_sums[LR_COUNT];
std::vector<TFloat> bad_sums[LR_COUNT];
std::vector<TFloat> ok_sums[LR_COUNT];
for (int i = 0; i < LR_COUNT; ++i) {
bad_sums[i].resize(num_layers, 0.0);
ok_sums[i].resize(num_layers, 0.0);
}
double momentum_factor = 1.0 / (1.0 - momentum_);
auto momentum_factor = 1 / (1 - momentum_);
std::vector<char> orig_trainer;
samples_trainer->SaveTrainingDump(LIGHT, *this, &orig_trainer);
for (int i = 0; i < num_layers; ++i) {
@ -689,7 +689,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
// Which way will we modify the learning rate?
for (int ww = 0; ww < LR_COUNT; ++ww) {
// Transfer momentum to learning rate and adjust by the ww factor.
float ww_factor = momentum_factor;
auto ww_factor = momentum_factor;
if (ww == LR_DOWN) {
ww_factor *= factor;
}
@ -748,10 +748,10 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
}
Network *layer = GetLayer(layers[i]);
float lr = GetLayerLearningRate(layers[i]);
double total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
double total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
double frac_down = bad_sums[LR_DOWN][i] / total_down;
double frac_same = bad_sums[LR_SAME][i] / total_same;
TFloat total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
TFloat frac_down = bad_sums[LR_DOWN][i] / total_down;
TFloat frac_same = bad_sums[LR_SAME][i] / total_same;
tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(), lr * factor,
100.0 * frac_down, lr, 100.0 * frac_same);
if (frac_down < frac_same * kImprovementFraction) {

View File

@ -237,7 +237,7 @@ public:
// Even if it looks like all weights should remain the same, an adjustment
// will be made to guarantee a different result when reverting to an old best.
// Returns the number of layer learning rates that were reduced.
int ReduceLayerLearningRates(double factor, int num_samples, LSTMTrainer *samples_trainer);
int ReduceLayerLearningRates(TFloat factor, int num_samples, LSTMTrainer *samples_trainer);
// Converts the string to integer class labels, with appropriate null_char_s
// in between if not in SimpleTextOutput mode. Returns false on failure.