mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-20 15:59:11 +08:00
Prepare using float instead of double for LSTM calculations
The new header file ccutils/tesstypes.h also prepares support for larger images by introducing a new data type for image size and coordinates (still unused). FloatToDouble is now a local function. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
c3fb050daa
commit
66b77e6639
@ -150,10 +150,12 @@ endif
|
||||
if MARCH_NATIVE_OPT
|
||||
libtesseract_native_la_CXXFLAGS += -march=native -mtune=native
|
||||
endif
|
||||
libtesseract_native_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_native_la_SOURCES = src/arch/dotproduct.cpp
|
||||
|
||||
if HAVE_AVX
|
||||
libtesseract_avx_la_CXXFLAGS = -mavx
|
||||
libtesseract_avx_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_avx_la_SOURCES = src/arch/dotproductavx.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_avx.la
|
||||
noinst_LTLIBRARIES += libtesseract_avx.la
|
||||
@ -161,6 +163,7 @@ endif
|
||||
|
||||
if HAVE_AVX2
|
||||
libtesseract_avx2_la_CXXFLAGS = -mavx2
|
||||
libtesseract_avx2_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_avx2_la_SOURCES = src/arch/intsimdmatrixavx2.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_avx2.la
|
||||
noinst_LTLIBRARIES += libtesseract_avx2.la
|
||||
@ -168,6 +171,7 @@ endif
|
||||
|
||||
if HAVE_FMA
|
||||
libtesseract_fma_la_CXXFLAGS = -mfma
|
||||
libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_fma_la_SOURCES = src/arch/dotproductfma.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_fma.la
|
||||
noinst_LTLIBRARIES += libtesseract_fma.la
|
||||
@ -175,6 +179,7 @@ endif
|
||||
|
||||
if HAVE_SSE4_1
|
||||
libtesseract_sse_la_CXXFLAGS = -msse4.1
|
||||
libtesseract_sse_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_sse_la_SOURCES = src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_sse.la
|
||||
noinst_LTLIBRARIES += libtesseract_sse.la
|
||||
@ -182,6 +187,7 @@ endif
|
||||
|
||||
if HAVE_NEON
|
||||
libtesseract_neon_la_CXXFLAGS = $(NEON_CXXFLAGS)
|
||||
libtesseract_neon_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_neon_la_SOURCES = src/arch/intsimdmatrixneon.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_neon.la
|
||||
noinst_LTLIBRARIES += libtesseract_neon.la
|
||||
|
@ -19,12 +19,12 @@
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the two n-vectors u and v.
|
||||
double DotProductNative(const double *u, const double *v, int n) {
|
||||
double total = 0.0;
|
||||
TFloat DotProductNative(const TFloat *u, const TFloat *v, int n) {
|
||||
TFloat total = 0;
|
||||
#if defined(OPENMP_SIMD) || defined(_OPENMP)
|
||||
#pragma omp simd reduction(+:total)
|
||||
#endif
|
||||
for (int k = 0; k < n; ++k) {
|
||||
for (int k = 0; k < n; k++) {
|
||||
total += u[k] * v[k];
|
||||
}
|
||||
return total;
|
||||
|
@ -17,19 +17,21 @@
|
||||
#ifndef TESSERACT_ARCH_DOTPRODUCT_H_
|
||||
#define TESSERACT_ARCH_DOTPRODUCT_H_
|
||||
|
||||
#include "tesstypes.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
double DotProductNative(const double *u, const double *v, int n);
|
||||
TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
double DotProductAVX(const double *u, const double *v, int n);
|
||||
TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Use Intel FMA.
|
||||
double DotProductFMA(const double *u, const double *v, int n);
|
||||
TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Uses Intel SSE intrinsics to access the SIMD instruction set.
|
||||
double DotProductSSE(const double *u, const double *v, int n);
|
||||
TFloat DotProductSSE(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -76,7 +76,7 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t> &w, std::vector<int8_t>
|
||||
// u is imagined to have an extra element at the end with value 1, to
|
||||
// implement the bias, but it doesn't actually have it.
|
||||
void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w,
|
||||
const std::vector<double> &scales, const int8_t *u, double *v) {
|
||||
const std::vector<TFloat> &scales, const int8_t *u, TFloat *v) {
|
||||
int num_out = w.dim1();
|
||||
int num_in = w.dim2() - 1;
|
||||
// Base implementation.
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "tesstypes.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
template <class T>
|
||||
@ -78,8 +80,8 @@ struct TESS_API IntSimdMatrix {
|
||||
// u is imagined to have an extra element at the end with value 1, to
|
||||
// implement the bias, but it doesn't actually have it.
|
||||
// Computes the base C++ implementation.
|
||||
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<double> &scales,
|
||||
const int8_t *u, double *v);
|
||||
static void MatrixDotVector(const GENERIC_2D_ARRAY<int8_t> &w, const std::vector<TFloat> &scales,
|
||||
const int8_t *u, TFloat *v);
|
||||
|
||||
// Rounds the input up to a multiple of the given factor.
|
||||
static int Roundup(int input, int factor) {
|
||||
@ -95,8 +97,8 @@ struct TESS_API IntSimdMatrix {
|
||||
// RoundInputs above.
|
||||
// The input will be over-read to the extent of the padding. There are no
|
||||
// alignment requirements.
|
||||
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const double *, const int8_t *,
|
||||
double *);
|
||||
using MatrixDotVectorFunction = void (*)(int, int, const int8_t *, const TFloat *, const int8_t *,
|
||||
TFloat *);
|
||||
MatrixDotVectorFunction matrixDotVectorFunction;
|
||||
|
||||
// Number of 32 bit outputs held in each register.
|
||||
|
@ -19,6 +19,7 @@
|
||||
#if defined(__ARM_NEON)
|
||||
|
||||
# include "intsimdmatrix.h"
|
||||
# include "tesstypes.h"
|
||||
|
||||
# include <algorithm>
|
||||
# include <cstdint>
|
||||
@ -52,9 +53,9 @@ constexpr int kNumInputsPerGroup = 8;
|
||||
// u must be padded out with zeros to
|
||||
// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements.
|
||||
static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
|
||||
const double *__restrict scales,
|
||||
const TFloat *__restrict scales,
|
||||
const int8_t *__restrict u, int num_in,
|
||||
double *__restrict v, int num_out) {
|
||||
TFloat *__restrict v, int num_out) {
|
||||
// Initialize all the results to 0.
|
||||
int32x4_t result0123 = {0, 0, 0, 0};
|
||||
int32x4_t result4567 = {0, 0, 0, 0};
|
||||
@ -163,8 +164,8 @@ static inline void PartialMatrixDotVector8(const int8_t *__restrict wi,
|
||||
}
|
||||
}
|
||||
|
||||
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
|
||||
const int8_t *u, double *v) {
|
||||
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
|
||||
const int8_t *u, TFloat *v) {
|
||||
const int num_out = dim1;
|
||||
const int num_in = dim2 - 1;
|
||||
// Each call to a partial_func_ produces group_size outputs, except the
|
||||
@ -196,7 +197,8 @@ const IntSimdMatrix IntSimdMatrix::intSimdMatrixNEON = {
|
||||
// Number of 8 bit inputs in the inputs register.
|
||||
kNumInputsPerRegister,
|
||||
// Number of inputs in each weight group.
|
||||
kNumInputsPerGroup};
|
||||
kNumInputsPerGroup
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -69,15 +69,15 @@ static int32_t IntDotProductSSE(const int8_t *u, const int8_t *v, int n) {
|
||||
}
|
||||
|
||||
// Computes part of matrix.vector v = Wu. Computes 1 result.
|
||||
static void PartialMatrixDotVector1(const int8_t *wi, const double *scales, const int8_t *u,
|
||||
int num_in, double *v) {
|
||||
double total = IntDotProductSSE(u, wi, num_in);
|
||||
static void PartialMatrixDotVector1(const int8_t *wi, const TFloat *scales, const int8_t *u,
|
||||
int num_in, TFloat *v) {
|
||||
TFloat total = IntDotProductSSE(u, wi, num_in);
|
||||
// Add in the bias and correct for integer values.
|
||||
*v = (total + wi[num_in] * INT8_MAX) * *scales;
|
||||
}
|
||||
|
||||
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const double *scales,
|
||||
const int8_t *u, double *v) {
|
||||
static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales,
|
||||
const int8_t *u, TFloat *v) {
|
||||
const int num_out = dim1;
|
||||
const int num_in = dim2 - 1;
|
||||
int output = 0;
|
||||
@ -99,7 +99,8 @@ const IntSimdMatrix IntSimdMatrix::intSimdMatrixSSE = {
|
||||
// Number of 8 bit inputs in the inputs register.
|
||||
1,
|
||||
// Number of inputs in each weight group.
|
||||
1};
|
||||
1
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -93,8 +93,8 @@ bool SIMDDetect::sse_available_;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_FRAMEWORK_ACCELERATE)
|
||||
static double DotProductAccelerate(const double* u, const double* v, int n) {
|
||||
double total = 0.0;
|
||||
static TFloat DotProductAccelerate(const TFloat* u, const TFloat* v, int n) {
|
||||
TFloat total = 0;
|
||||
const int stride = 1;
|
||||
vDSP_dotprD(u, stride, v, stride, &total, n);
|
||||
return total;
|
||||
@ -102,8 +102,8 @@ static double DotProductAccelerate(const double* u, const double* v, int n) {
|
||||
#endif
|
||||
|
||||
// Computes and returns the dot product of the two n-vectors u and v.
|
||||
static double DotProductGeneric(const double *u, const double *v, int n) {
|
||||
double total = 0.0;
|
||||
static TFloat DotProductGeneric(const TFloat *u, const TFloat *v, int n) {
|
||||
TFloat total = 0;
|
||||
for (int k = 0; k < n; ++k) {
|
||||
total += u[k] * v[k];
|
||||
}
|
||||
@ -111,8 +111,8 @@ static double DotProductGeneric(const double *u, const double *v, int n) {
|
||||
}
|
||||
|
||||
// Compute dot product using std::inner_product.
|
||||
static double DotProductStdInnerProduct(const double *u, const double *v, int n) {
|
||||
return std::inner_product(u, u + n, v, 0.0);
|
||||
static TFloat DotProductStdInnerProduct(const TFloat *u, const TFloat *v, int n) {
|
||||
return std::inner_product(u, u + n, v, static_cast<TFloat>(0));
|
||||
}
|
||||
|
||||
static void SetDotProduct(DotProductFunction f, const IntSimdMatrix *m = nullptr) {
|
||||
|
@ -18,11 +18,12 @@
|
||||
#define TESSERACT_ARCH_SIMDDETECT_H_
|
||||
|
||||
#include <tesseract/export.h>
|
||||
#include "tesstypes.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Function pointer for best calculation of dot product.
|
||||
using DotProductFunction = double (*)(const double *, const double *, int);
|
||||
using DotProductFunction = TFloat (*)(const TFloat *, const TFloat *, int);
|
||||
extern DotProductFunction DotProduct;
|
||||
|
||||
// Architecture detector. Add code here to detect any other architectures for
|
||||
|
32
src/ccutil/tesstypes.h
Normal file
32
src/ccutil/tesstypes.h
Normal file
@ -0,0 +1,32 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: tesstypes.h
|
||||
// Description: Simple data types used by Tesseract code.
|
||||
// Author: Stefan Weil
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_TESSTYPES_H
|
||||
#define TESSERACT_TESSTYPES_H
|
||||
|
||||
#include <cstdint> // for int16_t
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Image dimensions (width and height, coordinates).
|
||||
using TDimension = int16_t;
|
||||
|
||||
// Floating point data type used for LSTM calculations.
|
||||
using TFloat = double;
|
||||
|
||||
}
|
||||
|
||||
#endif // TESSERACT_TESSTYPES_H
|
@ -156,7 +156,7 @@ void FullyConnected::Forward(bool debug, const NetworkIO &input,
|
||||
// Thread-local pointer to temporary storage.
|
||||
int thread_id = 0;
|
||||
#endif
|
||||
double *temp_line = temp_lines[thread_id];
|
||||
TFloat *temp_line = temp_lines[thread_id];
|
||||
if (input.int_mode()) {
|
||||
ForwardTimeStep(input.i(t), t, temp_line);
|
||||
} else {
|
||||
@ -200,7 +200,7 @@ void FullyConnected::SetupForward(const NetworkIO &input, const TransposedArray
|
||||
}
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(int t, double *output_line) {
|
||||
void FullyConnected::ForwardTimeStep(int t, TFloat *output_line) {
|
||||
if (type_ == NT_TANH) {
|
||||
FuncInplace<GFunc>(no_, output_line);
|
||||
} else if (type_ == NT_LOGISTIC) {
|
||||
@ -218,7 +218,7 @@ void FullyConnected::ForwardTimeStep(int t, double *output_line) {
|
||||
}
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(const double *d_input, int t, double *output_line) {
|
||||
void FullyConnected::ForwardTimeStep(const TFloat *d_input, int t, TFloat *output_line) {
|
||||
// input is copied to source_ line-by-line for cache coherency.
|
||||
if (IsTraining() && external_source_ == nullptr) {
|
||||
source_t_.WriteStrided(t, d_input);
|
||||
@ -227,7 +227,7 @@ void FullyConnected::ForwardTimeStep(const double *d_input, int t, double *outpu
|
||||
ForwardTimeStep(t, output_line);
|
||||
}
|
||||
|
||||
void FullyConnected::ForwardTimeStep(const int8_t *i_input, int t, double *output_line) {
|
||||
void FullyConnected::ForwardTimeStep(const int8_t *i_input, int t, TFloat *output_line) {
|
||||
// input is copied to source_ line-by-line for cache coherency.
|
||||
weights_.MatrixDotVector(i_input, output_line);
|
||||
ForwardTimeStep(t, output_line);
|
||||
@ -265,11 +265,11 @@ bool FullyConnected::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkSc
|
||||
for (int t = 0; t < width; ++t) {
|
||||
int thread_id = 0;
|
||||
#endif
|
||||
double *backprop = nullptr;
|
||||
TFloat *backprop = nullptr;
|
||||
if (needs_to_backprop_) {
|
||||
backprop = temp_backprops[thread_id];
|
||||
}
|
||||
double *curr_errors = errors[thread_id];
|
||||
TFloat *curr_errors = errors[thread_id];
|
||||
BackwardTimeStep(fwd_deltas, t, curr_errors, errors_t.get(), backprop);
|
||||
if (backprop != nullptr) {
|
||||
back_deltas->WriteTimeStep(t, backprop);
|
||||
@ -287,8 +287,8 @@ bool FullyConnected::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkSc
|
||||
return false; // No point going further back.
|
||||
}
|
||||
|
||||
void FullyConnected::BackwardTimeStep(const NetworkIO &fwd_deltas, int t, double *curr_errors,
|
||||
TransposedArray *errors_t, double *backprop) {
|
||||
void FullyConnected::BackwardTimeStep(const NetworkIO &fwd_deltas, int t, TFloat *curr_errors,
|
||||
TransposedArray *errors_t, TFloat *backprop) {
|
||||
if (type_ == NT_TANH) {
|
||||
acts_.FuncMultiply<GPrime>(fwd_deltas, t, curr_errors);
|
||||
} else if (type_ == NT_LOGISTIC) {
|
||||
@ -328,7 +328,7 @@ void FullyConnected::Update(float learning_rate, float momentum, float adam_beta
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void FullyConnected::CountAlternators(const Network &other, double *same, double *changed) const {
|
||||
void FullyConnected::CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {
|
||||
ASSERT_HOST(other.type() == type_);
|
||||
const auto *fc = static_cast<const FullyConnected *>(&other);
|
||||
weights_.CountAlternators(fc->weights_, same, changed);
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include "network.h"
|
||||
#include "networkscratch.h"
|
||||
#include "tesstypes.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -90,17 +91,17 @@ public:
|
||||
NetworkScratch *scratch, NetworkIO *output) override;
|
||||
// Components of Forward so FullyConnected can be reused inside LSTM.
|
||||
void SetupForward(const NetworkIO &input, const TransposedArray *input_transpose);
|
||||
void ForwardTimeStep(int t, double *output_line);
|
||||
void ForwardTimeStep(const double *d_input, int t, double *output_line);
|
||||
void ForwardTimeStep(const int8_t *i_input, int t, double *output_line);
|
||||
void ForwardTimeStep(int t, TFloat *output_line);
|
||||
void ForwardTimeStep(const TFloat *d_input, int t, TFloat *output_line);
|
||||
void ForwardTimeStep(const int8_t *i_input, int t, TFloat *output_line);
|
||||
|
||||
// Runs backward propagation of errors on the deltas line.
|
||||
// See Network for a detailed discussion of the arguments.
|
||||
bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
|
||||
NetworkIO *back_deltas) override;
|
||||
// Components of Backward so FullyConnected can be reused inside LSTM.
|
||||
void BackwardTimeStep(const NetworkIO &fwd_deltas, int t, double *curr_errors,
|
||||
TransposedArray *errors_t, double *backprop);
|
||||
void BackwardTimeStep(const NetworkIO &fwd_deltas, int t, TFloat *curr_errors,
|
||||
TransposedArray *errors_t, TFloat *backprop);
|
||||
void FinishBackward(const TransposedArray &errors_t);
|
||||
|
||||
// Updates the weights using the given learning rate, momentum and adam_beta.
|
||||
@ -109,7 +110,7 @@ public:
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void CountAlternators(const Network &other, double *same, double *changed) const override;
|
||||
void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const override;
|
||||
|
||||
protected:
|
||||
// Weight arrays of size [no, ni + 1].
|
||||
|
@ -1,7 +1,7 @@
|
||||
// Generated code with lookup tables
|
||||
#include "functions.h"
|
||||
namespace tesseract {
|
||||
const double TanhTable[] = {
|
||||
const TFloat TanhTable[] = {
|
||||
0.0,
|
||||
0.00390623013190634,
|
||||
0.007812341058161014,
|
||||
@ -4099,7 +4099,7 @@ const double TanhTable[] = {
|
||||
0.9999999999999742,
|
||||
0.9999999999999745,
|
||||
};
|
||||
const double LogisticTable[] = {
|
||||
const TFloat LogisticTable[] = {
|
||||
0.5,
|
||||
0.5009765612582384,
|
||||
0.5019531150659532,
|
||||
|
@ -19,6 +19,7 @@
|
||||
#define TESSERACT_LSTM_FUNCTIONS_H_
|
||||
|
||||
#include "helpers.h"
|
||||
#include "tesstypes.h"
|
||||
|
||||
// Setting this to 1 or more causes massive dumps of debug data: weights,
|
||||
// updates, internal calculations etc, and reduces the number of test iterations
|
||||
@ -33,14 +34,14 @@ namespace tesseract {
|
||||
// Size of static tables.
|
||||
constexpr int kTableSize = 4096;
|
||||
// Scale factor for float arg to int index.
|
||||
constexpr double kScaleFactor = 256.0;
|
||||
constexpr TFloat kScaleFactor = 256.0;
|
||||
|
||||
// Generated lookup tables.
|
||||
extern const double TanhTable[];
|
||||
extern const double LogisticTable[];
|
||||
extern const TFloat TanhTable[];
|
||||
extern const TFloat LogisticTable[];
|
||||
|
||||
// Non-linearity (sigmoid) functions with cache tables and clipping.
|
||||
inline double Tanh(double x) {
|
||||
inline TFloat Tanh(TFloat x) {
|
||||
if (x < 0.0) {
|
||||
return -Tanh(-x);
|
||||
}
|
||||
@ -49,13 +50,13 @@ inline double Tanh(double x) {
|
||||
if (index >= (kTableSize - 1)) {
|
||||
return 1.0;
|
||||
}
|
||||
double tanh_i0 = TanhTable[index];
|
||||
double tanh_i1 = TanhTable[index + 1];
|
||||
TFloat tanh_i0 = TanhTable[index];
|
||||
TFloat tanh_i1 = TanhTable[index + 1];
|
||||
// Linear interpolation.
|
||||
return tanh_i0 + (tanh_i1 - tanh_i0) * (x - index);
|
||||
}
|
||||
|
||||
inline double Logistic(double x) {
|
||||
inline TFloat Logistic(TFloat x) {
|
||||
if (x < 0.0) {
|
||||
return 1.0 - Logistic(-x);
|
||||
}
|
||||
@ -64,25 +65,25 @@ inline double Logistic(double x) {
|
||||
if (index >= (kTableSize - 1)) {
|
||||
return 1.0;
|
||||
}
|
||||
double l0 = LogisticTable[index];
|
||||
double l1 = LogisticTable[index + 1];
|
||||
TFloat l0 = LogisticTable[index];
|
||||
TFloat l1 = LogisticTable[index + 1];
|
||||
// Linear interpolation.
|
||||
return l0 + (l1 - l0) * (x - index);
|
||||
}
|
||||
|
||||
// Non-linearity (sigmoid) functions and their derivatives.
|
||||
struct FFunc {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
return Logistic(x);
|
||||
}
|
||||
};
|
||||
struct FPrime {
|
||||
inline double operator()(double y) const {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return y * (1.0 - y);
|
||||
}
|
||||
};
|
||||
struct ClipFFunc {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
if (x <= 0.0) {
|
||||
return 0.0;
|
||||
}
|
||||
@ -93,12 +94,12 @@ struct ClipFFunc {
|
||||
}
|
||||
};
|
||||
struct ClipFPrime {
|
||||
inline double operator()(double y) const {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return 0.0 < y && y < 1.0 ? 1.0 : 0.0;
|
||||
}
|
||||
};
|
||||
struct Relu {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
if (x <= 0.0) {
|
||||
return 0.0;
|
||||
}
|
||||
@ -106,22 +107,22 @@ struct Relu {
|
||||
}
|
||||
};
|
||||
struct ReluPrime {
|
||||
inline double operator()(double y) const {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return 0.0 < y ? 1.0 : 0.0;
|
||||
}
|
||||
};
|
||||
struct GFunc {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
return Tanh(x);
|
||||
}
|
||||
};
|
||||
struct GPrime {
|
||||
inline double operator()(double y) const {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return 1.0 - y * y;
|
||||
}
|
||||
};
|
||||
struct ClipGFunc {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
if (x <= -1.0) {
|
||||
return -1.0;
|
||||
}
|
||||
@ -132,35 +133,35 @@ struct ClipGFunc {
|
||||
}
|
||||
};
|
||||
struct ClipGPrime {
|
||||
inline double operator()(double y) const {
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
return -1.0 < y && y < 1.0 ? 1.0 : 0.0;
|
||||
}
|
||||
};
|
||||
struct HFunc {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
return Tanh(x);
|
||||
}
|
||||
};
|
||||
struct HPrime {
|
||||
inline double operator()(double y) const {
|
||||
double u = Tanh(y);
|
||||
return 1.0 - u * u;
|
||||
inline TFloat operator()(TFloat y) const {
|
||||
TFloat u = Tanh(y);
|
||||
return 1 - u * u;
|
||||
}
|
||||
};
|
||||
struct UnityFunc {
|
||||
inline double operator()(double /*x*/) const {
|
||||
inline TFloat operator()(TFloat /*x*/) const {
|
||||
return 1.0;
|
||||
}
|
||||
};
|
||||
struct IdentityFunc {
|
||||
inline double operator()(double x) const {
|
||||
inline TFloat operator()(TFloat x) const {
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
// Applies Func in-place to inout, of size n.
|
||||
template <class Func>
|
||||
inline void FuncInplace(int n, double *inout) {
|
||||
inline void FuncInplace(int n, TFloat *inout) {
|
||||
Func f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
inout[i] = f(inout[i]);
|
||||
@ -169,7 +170,7 @@ inline void FuncInplace(int n, double *inout) {
|
||||
// Applies Func to u and multiplies the result by v component-wise,
|
||||
// putting the product in out, all of size n.
|
||||
template <class Func>
|
||||
inline void FuncMultiply(const double *u, const double *v, int n, double *out) {
|
||||
inline void FuncMultiply(const TFloat *u, const TFloat *v, int n, TFloat *out) {
|
||||
Func f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
out[i] = f(u[i]) * v[i];
|
||||
@ -206,34 +207,34 @@ inline void SoftmaxInPlace(int n, T *inout) {
|
||||
}
|
||||
|
||||
// Copies n values of the given src vector to dest.
|
||||
inline void CopyVector(int n, const double *src, double *dest) {
|
||||
inline void CopyVector(int n, const TFloat *src, TFloat *dest) {
|
||||
memcpy(dest, src, n * sizeof(dest[0]));
|
||||
}
|
||||
|
||||
// Adds n values of the given src vector to dest.
|
||||
inline void AccumulateVector(int n, const double *src, double *dest) {
|
||||
inline void AccumulateVector(int n, const TFloat *src, TFloat *dest) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
dest[i] += src[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Multiplies n values of inout in-place element-wise by the given src vector.
|
||||
inline void MultiplyVectorsInPlace(int n, const double *src, double *inout) {
|
||||
inline void MultiplyVectorsInPlace(int n, const TFloat *src, TFloat *inout) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
inout[i] *= src[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Multiplies n values of u by v, element-wise, accumulating to out.
|
||||
inline void MultiplyAccumulate(int n, const double *u, const double *v, double *out) {
|
||||
inline void MultiplyAccumulate(int n, const TFloat *u, const TFloat *v, TFloat *out) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
out[i] += u[i] * v[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Sums the given 5 n-vectors putting the result into sum.
|
||||
inline void SumVectors(int n, const double *v1, const double *v2, const double *v3,
|
||||
const double *v4, const double *v5, double *sum) {
|
||||
inline void SumVectors(int n, const TFloat *v1, const TFloat *v2, const TFloat *v3,
|
||||
const TFloat *v4, const TFloat *v5, TFloat *sum) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
sum[i] = v1[i] + v2[i] + v3[i] + v4[i] + v5[i];
|
||||
}
|
||||
@ -255,12 +256,12 @@ inline void ClipVector(int n, T lower, T upper, T *vec) {
|
||||
|
||||
// Converts the given n-vector to a binary encoding of the maximum value,
|
||||
// encoded as vector of nf binary values.
|
||||
inline void CodeInBinary(int n, int nf, double *vec) {
|
||||
inline void CodeInBinary(int n, int nf, TFloat *vec) {
|
||||
if (nf <= 0 || n < nf) {
|
||||
return;
|
||||
}
|
||||
int index = 0;
|
||||
double best_score = vec[0];
|
||||
TFloat best_score = vec[0];
|
||||
for (int i = 1; i < n; ++i) {
|
||||
if (vec[i] > best_score) {
|
||||
best_score = vec[i];
|
||||
|
@ -68,9 +68,9 @@ namespace tesseract {
|
||||
|
||||
// Max absolute value of state_. It is reasonably high to enable the state
|
||||
// to count things.
|
||||
const double kStateClip = 100.0;
|
||||
const TFloat kStateClip = 100.0;
|
||||
// Max absolute value of gate_errors (the gradients).
|
||||
const double kErrClip = 1.0f;
|
||||
const TFloat kErrClip = 1.0f;
|
||||
|
||||
// Calculate ceil(log2(n)).
|
||||
static inline uint32_t ceil_log2(uint32_t n) {
|
||||
@ -312,9 +312,9 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
|
||||
// Single timestep buffers for the current/recurrent output and state.
|
||||
NetworkScratch::FloatVec curr_state, curr_output;
|
||||
curr_state.Init(ns_, scratch);
|
||||
ZeroVector<double>(ns_, curr_state);
|
||||
ZeroVector<TFloat>(ns_, curr_state);
|
||||
curr_output.Init(ns_, scratch);
|
||||
ZeroVector<double>(ns_, curr_output);
|
||||
ZeroVector<TFloat>(ns_, curr_output);
|
||||
// Rotating buffers of width buf_width allow storage of the state and output
|
||||
// for the other dimension, used only when working in true 2D mode. The width
|
||||
// is enough to hold an entire strip of the major direction.
|
||||
@ -325,9 +325,9 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
|
||||
outputs.resize(buf_width);
|
||||
for (int i = 0; i < buf_width; ++i) {
|
||||
states[i].Init(ns_, scratch);
|
||||
ZeroVector<double>(ns_, states[i]);
|
||||
ZeroVector<TFloat>(ns_, states[i]);
|
||||
outputs[i].Init(ns_, scratch);
|
||||
ZeroVector<double>(ns_, outputs[i]);
|
||||
ZeroVector<TFloat>(ns_, outputs[i]);
|
||||
}
|
||||
}
|
||||
// Used only if a softmax LSTM.
|
||||
@ -335,7 +335,7 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
|
||||
NetworkScratch::IO int_output;
|
||||
if (softmax_ != nullptr) {
|
||||
softmax_output.Init(no_, scratch);
|
||||
ZeroVector<double>(no_, softmax_output);
|
||||
ZeroVector<TFloat>(no_, softmax_output);
|
||||
int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_);
|
||||
if (input.int_mode()) {
|
||||
int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch);
|
||||
@ -429,7 +429,7 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
|
||||
int8_t *which_fg_col = which_fg_[t];
|
||||
memset(which_fg_col, 1, ns_ * sizeof(which_fg_col[0]));
|
||||
if (valid_2d) {
|
||||
const double *stepped_state = states[mod_t];
|
||||
const TFloat *stepped_state = states[mod_t];
|
||||
for (int i = 0; i < ns_; ++i) {
|
||||
if (temp_lines[GF1][i] < temp_lines[GFS][i]) {
|
||||
curr_state[i] = temp_lines[GFS][i] * stepped_state[i];
|
||||
@ -440,7 +440,7 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
|
||||
}
|
||||
MultiplyAccumulate(ns_, temp_lines[CI], temp_lines[GI], curr_state);
|
||||
// Clip curr_state to a sane range.
|
||||
ClipVector<double>(ns_, -kStateClip, kStateClip, curr_state);
|
||||
ClipVector<TFloat>(ns_, -kStateClip, kStateClip, curr_state);
|
||||
if (IsTraining()) {
|
||||
// Save the gate node values.
|
||||
node_values_[CI].WriteTimeStep(t, temp_lines[CI]);
|
||||
@ -483,8 +483,8 @@ void LSTM::Forward(bool debug, const NetworkIO &input, const TransposedArray *in
|
||||
// Always zero the states at the end of every row, but only for the major
|
||||
// direction. The 2-D state remains intact.
|
||||
if (src_index.IsLast(FD_WIDTH)) {
|
||||
ZeroVector<double>(ns_, curr_state);
|
||||
ZeroVector<double>(ns_, curr_output);
|
||||
ZeroVector<TFloat>(ns_, curr_state);
|
||||
ZeroVector<TFloat>(ns_, curr_output);
|
||||
}
|
||||
} while (src_index.Increment());
|
||||
#if DEBUG_DETAIL > 0
|
||||
@ -520,8 +520,8 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
NetworkScratch::FloatVec curr_stateerr, curr_sourceerr;
|
||||
curr_stateerr.Init(ns_, scratch);
|
||||
curr_sourceerr.Init(na_, scratch);
|
||||
ZeroVector<double>(ns_, curr_stateerr);
|
||||
ZeroVector<double>(na_, curr_sourceerr);
|
||||
ZeroVector<TFloat>(ns_, curr_stateerr);
|
||||
ZeroVector<TFloat>(na_, curr_sourceerr);
|
||||
// Errors in the gates.
|
||||
NetworkScratch::FloatVec gate_errors[WT_COUNT];
|
||||
for (auto &gate_error : gate_errors) {
|
||||
@ -537,8 +537,8 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
for (int t = 0; t < buf_width; ++t) {
|
||||
stateerr[t].Init(ns_, scratch);
|
||||
sourceerr[t].Init(na_, scratch);
|
||||
ZeroVector<double>(ns_, stateerr[t]);
|
||||
ZeroVector<double>(na_, sourceerr[t]);
|
||||
ZeroVector<TFloat>(ns_, stateerr[t]);
|
||||
ZeroVector<TFloat>(na_, sourceerr[t]);
|
||||
}
|
||||
}
|
||||
// Parallel-generated sourceerr from each of the gates.
|
||||
@ -559,7 +559,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
softmax_errors.Init(no_, scratch);
|
||||
softmax_errors_t.Init(no_, width, scratch);
|
||||
}
|
||||
double state_clip = Is2D() ? 9.0 : 4.0;
|
||||
TFloat state_clip = Is2D() ? 9.0 : 4.0;
|
||||
#if DEBUG_DETAIL > 1
|
||||
tprintf("fwd_deltas:%s\n", name_.c_str());
|
||||
fwd_deltas.Print(10);
|
||||
@ -594,8 +594,8 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
int mod_t = Modulo(t, buf_width); // Current timestep.
|
||||
// Zero the state in the major direction only at the end of every row.
|
||||
if (at_last_x) {
|
||||
ZeroVector<double>(na_, curr_sourceerr);
|
||||
ZeroVector<double>(ns_, curr_stateerr);
|
||||
ZeroVector<TFloat>(na_, curr_sourceerr);
|
||||
ZeroVector<TFloat>(ns_, curr_stateerr);
|
||||
}
|
||||
// Setup the outputerr.
|
||||
if (type_ == NT_LSTM_SUMMARY) {
|
||||
@ -603,7 +603,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
fwd_deltas.ReadTimeStep(src_index.t(), outputerr);
|
||||
src_index.Decrement();
|
||||
} else {
|
||||
ZeroVector<double>(ns_, outputerr);
|
||||
ZeroVector<TFloat>(ns_, outputerr);
|
||||
}
|
||||
} else if (softmax_ == nullptr) {
|
||||
fwd_deltas.ReadTimeStep(t, outputerr);
|
||||
@ -631,7 +631,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
}
|
||||
if (down_pos >= 0) {
|
||||
const float *right_node_gfs = node_values_[GFS].f(down_pos);
|
||||
const double *right_stateerr = stateerr[mod_t];
|
||||
const TFloat *right_stateerr = stateerr[mod_t];
|
||||
for (int i = 0; i < ns_; ++i) {
|
||||
if (which_fg_[down_pos][i] == 2) {
|
||||
curr_stateerr[i] += right_stateerr[i] * right_node_gfs[i];
|
||||
@ -641,7 +641,7 @@ bool LSTM::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scr
|
||||
}
|
||||
state_.FuncMultiply3Add<HPrime>(node_values_[GO], t, outputerr, curr_stateerr);
|
||||
// Clip stateerr_ to a sane range.
|
||||
ClipVector<double>(ns_, -state_clip, state_clip, curr_stateerr);
|
||||
ClipVector<TFloat>(ns_, -state_clip, state_clip, curr_stateerr);
|
||||
#if DEBUG_DETAIL > 1
|
||||
if (t + 10 > width) {
|
||||
tprintf("t=%d, stateerr=", t);
|
||||
@ -758,7 +758,7 @@ void LSTM::Update(float learning_rate, float momentum, float adam_beta, int num_
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void LSTM::CountAlternators(const Network &other, double *same, double *changed) const {
|
||||
void LSTM::CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {
|
||||
ASSERT_HOST(other.type() == type_);
|
||||
const LSTM *lstm = static_cast<const LSTM *>(&other);
|
||||
for (int w = 0; w < WT_COUNT; ++w) {
|
||||
|
@ -109,7 +109,7 @@ public:
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void CountAlternators(const Network &other, double *same, double *changed) const override;
|
||||
void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const override;
|
||||
// Prints the weights for debug purposes.
|
||||
void PrintW();
|
||||
// Prints the weight deltas for debug purposes.
|
||||
|
@ -321,7 +321,7 @@ Network *Network::CreateFromFile(TFile *fp) {
|
||||
}
|
||||
|
||||
// Returns a random number in [-range, range].
|
||||
double Network::Random(double range) {
|
||||
TFloat Network::Random(TFloat range) {
|
||||
ASSERT_HOST(randomizer_ != nullptr);
|
||||
return randomizer_->SignedRand(range);
|
||||
}
|
||||
|
@ -235,7 +235,7 @@ public:
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
virtual void CountAlternators(const Network &other, double *same, double *changed) const {}
|
||||
virtual void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {}
|
||||
|
||||
// Reads from the given file. Returns nullptr in case of error.
|
||||
// Determines the type of the serialized class and calls its DeSerialize
|
||||
@ -287,7 +287,7 @@ public:
|
||||
|
||||
protected:
|
||||
// Returns a random number in [-range, range].
|
||||
double Random(double range);
|
||||
TFloat Random(TFloat range);
|
||||
|
||||
protected:
|
||||
NetworkType type_; // Type of the derived network class.
|
||||
|
@ -529,9 +529,9 @@ int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, in
|
||||
int length = labels.size();
|
||||
int last_start = end - length;
|
||||
int best_start = -1;
|
||||
double best_score = 0.0;
|
||||
TFloat best_score = 0;
|
||||
for (int s = start; s <= last_start; ++s) {
|
||||
double score = ScoreOfLabels(labels, s);
|
||||
TFloat score = ScoreOfLabels(labels, s);
|
||||
if (score > best_score || best_start < 0) {
|
||||
best_score = score;
|
||||
best_start = s;
|
||||
@ -542,9 +542,9 @@ int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, in
|
||||
|
||||
// Returns the cumulative score of the given labels starting at start, and
|
||||
// using one label per time-step.
|
||||
double NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
|
||||
TFloat NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const {
|
||||
int length = labels.size();
|
||||
double score = 0.0;
|
||||
TFloat score = 0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
score += f_(start + i, labels[i]);
|
||||
}
|
||||
@ -615,27 +615,27 @@ bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
|
||||
}
|
||||
|
||||
// Reads a single timestep to floats in the range [-1, 1].
|
||||
void NetworkIO::ReadTimeStep(int t, double *output) const {
|
||||
void NetworkIO::ReadTimeStep(int t, TFloat *output) const {
|
||||
if (int_mode_) {
|
||||
const int8_t *line = i_[t];
|
||||
for (int i = 0; i < i_.dim2(); ++i) {
|
||||
output[i] = static_cast<double>(line[i]) / INT8_MAX;
|
||||
output[i] = static_cast<TFloat>(line[i]) / INT8_MAX;
|
||||
}
|
||||
} else {
|
||||
const float *line = f_[t];
|
||||
for (int i = 0; i < f_.dim2(); ++i) {
|
||||
output[i] = static_cast<double>(line[i]);
|
||||
output[i] = static_cast<TFloat>(line[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a single timestep to floats.
|
||||
void NetworkIO::AddTimeStep(int t, double *inout) const {
|
||||
void NetworkIO::AddTimeStep(int t, TFloat *inout) const {
|
||||
int num_features = NumFeatures();
|
||||
if (int_mode_) {
|
||||
const int8_t *line = i_[t];
|
||||
for (int i = 0; i < num_features; ++i) {
|
||||
inout[i] += static_cast<double>(line[i]) / INT8_MAX;
|
||||
inout[i] += static_cast<TFloat>(line[i]) / INT8_MAX;
|
||||
}
|
||||
} else {
|
||||
const float *line = f_[t];
|
||||
@ -661,13 +661,13 @@ void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float *inou
|
||||
}
|
||||
|
||||
// Writes a single timestep from floats in the range [-1, 1].
|
||||
void NetworkIO::WriteTimeStep(int t, const double *input) {
|
||||
void NetworkIO::WriteTimeStep(int t, const TFloat *input) {
|
||||
WriteTimeStepPart(t, 0, NumFeatures(), input);
|
||||
}
|
||||
|
||||
// Writes a single timestep from floats in the range [-1, 1] writing only
|
||||
// num_features elements of input to (*this)[t], starting at offset.
|
||||
void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const double *input) {
|
||||
void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input) {
|
||||
if (int_mode_) {
|
||||
int8_t *line = i_[t] + offset;
|
||||
for (int i = 0; i < num_features; ++i) {
|
||||
|
@ -172,7 +172,7 @@ public:
|
||||
int PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const;
|
||||
// Returns the cumulative score of the given labels starting at start, and
|
||||
// using one label per time-step.
|
||||
double ScoreOfLabels(const std::vector<int> &labels, int start) const;
|
||||
TFloat ScoreOfLabels(const std::vector<int> &labels, int start) const;
|
||||
// Helper function sets all the outputs for a single timestep, such that
|
||||
// label has value ok_score, and the other labels share 1 - ok_score.
|
||||
// Assumes float mode.
|
||||
@ -193,16 +193,16 @@ public:
|
||||
bool AnySuspiciousTruth(float confidence_thr) const;
|
||||
|
||||
// Reads a single timestep to floats in the range [-1, 1].
|
||||
void ReadTimeStep(int t, double *output) const;
|
||||
void ReadTimeStep(int t, TFloat *output) const;
|
||||
// Adds a single timestep to floats.
|
||||
void AddTimeStep(int t, double *inout) const;
|
||||
void AddTimeStep(int t, TFloat *inout) const;
|
||||
// Adds part of a single timestep to floats.
|
||||
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const;
|
||||
// Writes a single timestep from floats in the range [-1, 1].
|
||||
void WriteTimeStep(int t, const double *input);
|
||||
void WriteTimeStep(int t, const TFloat *input);
|
||||
// Writes a single timestep from floats in the range [-1, 1] writing only
|
||||
// num_features elements of input to (*this)[t], starting at offset.
|
||||
void WriteTimeStepPart(int t, int offset, int num_features, const double *input);
|
||||
void WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input);
|
||||
// Maxpools a single time step from src.
|
||||
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line);
|
||||
// Runs maxpool backward, using maxes to index timesteps in *this.
|
||||
@ -253,9 +253,9 @@ public:
|
||||
|
||||
// Applies Func to timestep t of *this (u) and multiplies the result by v
|
||||
// component-wise, putting the product in *product.
|
||||
// *this and v may be int or float, but must match. The outputs are double.
|
||||
// *this and v may be int or float, but must match. The outputs are TFloat.
|
||||
template <class Func>
|
||||
void FuncMultiply(const NetworkIO &v_io, int t, double *product) {
|
||||
void FuncMultiply(const NetworkIO &v_io, int t, TFloat *product) {
|
||||
Func f;
|
||||
ASSERT_HOST(!int_mode_);
|
||||
ASSERT_HOST(!v_io.int_mode_);
|
||||
@ -264,7 +264,7 @@ public:
|
||||
const int8_t *u = i_[t];
|
||||
const int8_t *v = v_io.i_[t];
|
||||
for (int i = 0; i < dim; ++i) {
|
||||
product[i] = f(u[i] / static_cast<double>(INT8_MAX)) * v[i] / static_cast<double>(INT8_MAX);
|
||||
product[i] = f(u[i] / static_cast<TFloat>(INT8_MAX)) * v[i] / INT8_MAX;
|
||||
}
|
||||
} else {
|
||||
const float *u = f_[t];
|
||||
@ -278,8 +278,8 @@ public:
|
||||
// component-wise, putting the product in *product.
|
||||
// All NetworkIOs are assumed to be float.
|
||||
template <class Func>
|
||||
void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const double *w,
|
||||
double *product) const {
|
||||
void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const TFloat *w,
|
||||
TFloat *product) const {
|
||||
ASSERT_HOST(!int_mode_);
|
||||
ASSERT_HOST(!v_io.int_mode_);
|
||||
Func f;
|
||||
@ -294,7 +294,7 @@ public:
|
||||
// component-wise, adding the product to *product.
|
||||
// All NetworkIOs are assumed to be float.
|
||||
template <class Func>
|
||||
void FuncMultiply3Add(const NetworkIO &v_io, int t, const double *w, double *product) const {
|
||||
void FuncMultiply3Add(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
|
||||
ASSERT_HOST(!int_mode_);
|
||||
ASSERT_HOST(!v_io.int_mode_);
|
||||
Func f;
|
||||
@ -309,7 +309,7 @@ public:
|
||||
// component-wise, putting the product in product, all at timestep t, except
|
||||
// w, which is a simple array. All NetworkIOs are assumed to be float.
|
||||
template <class Func1, class Func2>
|
||||
void Func2Multiply3(const NetworkIO &v_io, int t, const double *w, double *product) const {
|
||||
void Func2Multiply3(const NetworkIO &v_io, int t, const TFloat *w, TFloat *product) const {
|
||||
ASSERT_HOST(!int_mode_);
|
||||
ASSERT_HOST(!v_io.int_mode_);
|
||||
Func1 f;
|
||||
|
@ -156,25 +156,25 @@ public:
|
||||
}
|
||||
|
||||
// Use the cast operator instead of operator[] so the FloatVec can be used
|
||||
// as a double* argument to a function call.
|
||||
operator double *() const {
|
||||
// as a TFloat* argument to a function call.
|
||||
operator TFloat *() const {
|
||||
return data_;
|
||||
}
|
||||
double *get() {
|
||||
TFloat *get() {
|
||||
return data_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Vector borrowed from the scratch space. Use Return to free it.
|
||||
std::vector<double> *vec_;
|
||||
std::vector<TFloat> *vec_;
|
||||
// Short-cut pointer to the underlying array.
|
||||
double *data_;
|
||||
TFloat *data_;
|
||||
// The source scratch_space_. Borrowed pointer, used to free the
|
||||
// vector. Don't delete!
|
||||
NetworkScratch *scratch_space_;
|
||||
}; // class FloatVec
|
||||
|
||||
// Class that acts like a 2-D array of double, yet actually uses space
|
||||
// Class that acts like a 2-D array of TFloat, yet actually uses space
|
||||
// from the source NetworkScratch, and knows how to unstack the borrowed
|
||||
// array on destruction.
|
||||
class GradientStore {
|
||||
@ -270,7 +270,7 @@ private:
|
||||
// deleted until the NetworkScratch is deleted.
|
||||
Stack<NetworkIO> int_stack_;
|
||||
Stack<NetworkIO> float_stack_;
|
||||
Stack<std::vector<double>> vec_stack_;
|
||||
Stack<std::vector<TFloat>> vec_stack_;
|
||||
Stack<TransposedArray> array_stack_;
|
||||
};
|
||||
|
||||
|
@ -255,7 +255,7 @@ void Plumbing::Update(float learning_rate, float momentum, float adam_beta, int
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void Plumbing::CountAlternators(const Network &other, double *same, double *changed) const {
|
||||
void Plumbing::CountAlternators(const Network &other, TFloat *same, TFloat *changed) const {
|
||||
ASSERT_HOST(other.type() == type_);
|
||||
const auto *plumbing = static_cast<const Plumbing *>(&other);
|
||||
ASSERT_HOST(plumbing->stack_.size() == stack_.size());
|
||||
|
@ -143,7 +143,7 @@ public:
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void CountAlternators(const Network &other, double *same, double *changed) const override;
|
||||
void CountAlternators(const Network &other, TFloat *same, TFloat *changed) const override;
|
||||
|
||||
protected:
|
||||
// The networks.
|
||||
|
@ -26,7 +26,7 @@
|
||||
namespace tesseract {
|
||||
|
||||
#if defined(ANDROID)
|
||||
static inline double log2(double n) {
|
||||
static inline TFloat log2(TFloat n) {
|
||||
return log(n) / log(2.0);
|
||||
}
|
||||
#endif // ANDROID
|
||||
@ -34,7 +34,22 @@ static inline double log2(double n) {
|
||||
// Number of iterations after which the correction effectively becomes unity.
|
||||
const int kAdamCorrectionIterations = 200000;
|
||||
// Epsilon in Adam to prevent division by zero.
|
||||
const double kAdamEpsilon = 1e-8;
|
||||
const TFloat kAdamEpsilon = 1e-8;
|
||||
|
||||
// Utility function converts an array of float to the corresponding array
|
||||
// of double.
|
||||
static void FloatToDouble(const GENERIC_2D_ARRAY<float> &src, GENERIC_2D_ARRAY<double> &dst) {
|
||||
const auto dim1 = src.dim1();
|
||||
const auto dim2 = src.dim2();
|
||||
dst.ResizeNoInit(dim1, dim2);
|
||||
for (int i = 0; i < dim1; ++i) {
|
||||
const auto *src_i = src[i];
|
||||
auto *dst_i = dst[i];
|
||||
for (int j = 0; j < dim2; ++j) {
|
||||
dst_i[j] = static_cast<double>(src_i[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Computes matrix.vector v = Wu.
|
||||
// u is of size W.dim2() - add_bias_fwd and the output v is of size
|
||||
@ -44,13 +59,13 @@ const double kAdamEpsilon = 1e-8;
|
||||
// If skip_bias_back, we are actually performing the backwards product on a
|
||||
// transposed matrix, so we need to drop the v output corresponding to the last
|
||||
// element in dim1.
|
||||
static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double> &w, bool add_bias_fwd,
|
||||
bool skip_bias_back, const double *u, double *v) {
|
||||
static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<TFloat> &w, bool add_bias_fwd,
|
||||
bool skip_bias_back, const TFloat *u, TFloat *v) {
|
||||
int num_results = w.dim1() - skip_bias_back;
|
||||
int extent = w.dim2() - add_bias_fwd;
|
||||
for (int i = 0; i < num_results; ++i) {
|
||||
const double *wi = w[i];
|
||||
double total = DotProduct(wi, u, extent);
|
||||
const TFloat *wi = w[i];
|
||||
TFloat total = DotProduct(wi, u, extent);
|
||||
if (add_bias_fwd) {
|
||||
total += wi[extent]; // The bias value.
|
||||
}
|
||||
@ -58,8 +73,8 @@ static inline void MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double> &w, bo
|
||||
}
|
||||
}
|
||||
|
||||
// Copies the whole input transposed, converted to double, into *this.
|
||||
void TransposedArray::Transpose(const GENERIC_2D_ARRAY<double> &input) {
|
||||
// Copies the whole input transposed, converted to TFloat, into *this.
|
||||
void TransposedArray::Transpose(const GENERIC_2D_ARRAY<TFloat> &input) {
|
||||
int width = input.dim1();
|
||||
int num_features = input.dim2();
|
||||
ResizeNoInit(num_features, width);
|
||||
@ -97,25 +112,25 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam, float weight_r
|
||||
// for all outputs with negative code_map entries. Returns the new number of
|
||||
// weights.
|
||||
int WeightMatrix::RemapOutputs(const std::vector<int> &code_map) {
|
||||
GENERIC_2D_ARRAY<double> old_wf(wf_);
|
||||
GENERIC_2D_ARRAY<TFloat> old_wf(wf_);
|
||||
int old_no = wf_.dim1();
|
||||
int new_no = code_map.size();
|
||||
int ni = wf_.dim2();
|
||||
std::vector<double> means(ni, 0.0);
|
||||
std::vector<TFloat> means(ni, 0.0);
|
||||
for (int c = 0; c < old_no; ++c) {
|
||||
const double *weights = wf_[c];
|
||||
const TFloat *weights = wf_[c];
|
||||
for (int i = 0; i < ni; ++i) {
|
||||
means[i] += weights[i];
|
||||
}
|
||||
}
|
||||
for (double &mean : means) {
|
||||
for (auto &mean : means) {
|
||||
mean /= old_no;
|
||||
}
|
||||
wf_.Resize(new_no, ni, 0.0);
|
||||
InitBackward();
|
||||
for (int dest = 0; dest < new_no; ++dest) {
|
||||
int src = code_map[dest];
|
||||
const double *src_data = src >= 0 ? old_wf[src] : means.data();
|
||||
const TFloat *src_data = src >= 0 ? old_wf[src] : means.data();
|
||||
memcpy(wf_[dest], src_data, ni * sizeof(*src_data));
|
||||
}
|
||||
return ni * new_no;
|
||||
@ -126,23 +141,23 @@ int WeightMatrix::RemapOutputs(const std::vector<int> &code_map) {
|
||||
// Compute the max absolute value of the weight set.
|
||||
// Scale so the max absolute value becomes INT8_MAX.
|
||||
// Round to integer.
|
||||
// Store a multiplicative scale factor (as a double) that will reproduce
|
||||
// Store a multiplicative scale factor (as a TFloat) that will reproduce
|
||||
// the original value, subject to rounding errors.
|
||||
void WeightMatrix::ConvertToInt() {
|
||||
wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());
|
||||
scales_.reserve(wi_.dim1());
|
||||
int dim2 = wi_.dim2();
|
||||
for (int t = 0; t < wi_.dim1(); ++t) {
|
||||
double *f_line = wf_[t];
|
||||
TFloat *f_line = wf_[t];
|
||||
int8_t *i_line = wi_[t];
|
||||
double max_abs = 0.0;
|
||||
TFloat max_abs = 0;
|
||||
for (int f = 0; f < dim2; ++f) {
|
||||
double abs_val = fabs(f_line[f]);
|
||||
TFloat abs_val = fabs(f_line[f]);
|
||||
if (abs_val > max_abs) {
|
||||
max_abs = abs_val;
|
||||
}
|
||||
}
|
||||
double scale = max_abs / INT8_MAX;
|
||||
TFloat scale = max_abs / INT8_MAX;
|
||||
scales_.push_back(scale / INT8_MAX);
|
||||
if (scale == 0.0) {
|
||||
scale = 1.0;
|
||||
@ -291,14 +306,14 @@ bool WeightMatrix::DeSerializeOld(bool training, TFile *fp) {
|
||||
if (!float_array.DeSerialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
FloatToDouble(float_array, &wf_);
|
||||
FloatToDouble(float_array, wf_);
|
||||
}
|
||||
if (training) {
|
||||
InitBackward();
|
||||
if (!float_array.DeSerialize(fp)) {
|
||||
return false;
|
||||
}
|
||||
FloatToDouble(float_array, &updates_);
|
||||
FloatToDouble(float_array, updates_);
|
||||
// Errs was only used in int training, which is now dead.
|
||||
if (!float_array.DeSerialize(fp)) {
|
||||
return false;
|
||||
@ -312,12 +327,12 @@ bool WeightMatrix::DeSerializeOld(bool training, TFile *fp) {
|
||||
// u is imagined to have an extra element at the end with value 1, to
|
||||
// implement the bias, but it doesn't actually have it.
|
||||
// Asserts that the call matches what we have.
|
||||
void WeightMatrix::MatrixDotVector(const double *u, double *v) const {
|
||||
void WeightMatrix::MatrixDotVector(const TFloat *u, TFloat *v) const {
|
||||
assert(!int_mode_);
|
||||
MatrixDotVectorInternal(wf_, true, false, u, v);
|
||||
}
|
||||
|
||||
void WeightMatrix::MatrixDotVector(const int8_t *u, double *v) const {
|
||||
void WeightMatrix::MatrixDotVector(const int8_t *u, TFloat *v) const {
|
||||
assert(int_mode_);
|
||||
if (IntSimdMatrix::intSimdMatrix) {
|
||||
IntSimdMatrix::intSimdMatrix->matrixDotVectorFunction(wi_.dim1(), wi_.dim2(), &shaped_w_[0],
|
||||
@ -329,11 +344,11 @@ void WeightMatrix::MatrixDotVector(const int8_t *u, double *v) const {
|
||||
|
||||
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
|
||||
// component-wise products of *this[0] and v to inout.
|
||||
void WeightMatrix::MultiplyAccumulate(const double *v, double *inout) {
|
||||
void WeightMatrix::MultiplyAccumulate(const TFloat *v, TFloat *inout) {
|
||||
assert(!int_mode_);
|
||||
assert(wf_.dim1() == 1);
|
||||
int n = wf_.dim2();
|
||||
const double *u = wf_[0];
|
||||
const TFloat *u = wf_[0];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
inout[i] += u[i] * v[i];
|
||||
}
|
||||
@ -343,7 +358,7 @@ void WeightMatrix::MultiplyAccumulate(const double *v, double *inout) {
|
||||
// u is of size W.dim1() and the output v is of size W.dim2() - 1.
|
||||
// The last result is discarded, as v is assumed to have an imaginary
|
||||
// last value of 1, as with MatrixDotVector.
|
||||
void WeightMatrix::VectorDotMatrix(const double *u, double *v) const {
|
||||
void WeightMatrix::VectorDotMatrix(const TFloat *u, TFloat *v) const {
|
||||
assert(!int_mode_);
|
||||
MatrixDotVectorInternal(wf_t_, false, true, u, v);
|
||||
}
|
||||
@ -367,13 +382,13 @@ void WeightMatrix::SumOuterTransposed(const TransposedArray &u, const Transposed
|
||||
# pragma omp parallel for num_threads(4) if (in_parallel)
|
||||
#endif
|
||||
for (int i = 0; i < num_outputs; ++i) {
|
||||
double *dwi = dw_[i];
|
||||
const double *ui = u[i];
|
||||
TFloat *dwi = dw_[i];
|
||||
const TFloat *ui = u[i];
|
||||
for (int j = 0; j < num_inputs; ++j) {
|
||||
dwi[j] = DotProduct(ui, v[j], num_samples);
|
||||
}
|
||||
// The last element of v is missing, presumed 1.0f.
|
||||
double total = 0.0;
|
||||
TFloat total = 0;
|
||||
for (int k = 0; k < num_samples; ++k) {
|
||||
total += ui[k];
|
||||
}
|
||||
@ -419,17 +434,17 @@ void WeightMatrix::AddDeltas(const WeightMatrix &other) {
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void WeightMatrix::CountAlternators(const WeightMatrix &other, double *same,
|
||||
double *changed) const {
|
||||
void WeightMatrix::CountAlternators(const WeightMatrix &other, TFloat *same,
|
||||
TFloat *changed) const {
|
||||
int num_outputs = updates_.dim1();
|
||||
int num_inputs = updates_.dim2();
|
||||
assert(num_outputs == other.updates_.dim1());
|
||||
assert(num_inputs == other.updates_.dim2());
|
||||
for (int i = 0; i < num_outputs; ++i) {
|
||||
const double *this_i = updates_[i];
|
||||
const double *other_i = other.updates_[i];
|
||||
const TFloat *this_i = updates_[i];
|
||||
const TFloat *other_i = other.updates_[i];
|
||||
for (int j = 0; j < num_inputs; ++j) {
|
||||
double product = this_i[j] * other_i[j];
|
||||
TFloat product = this_i[j] * other_i[j];
|
||||
if (product < 0.0) {
|
||||
*changed -= product;
|
||||
} else {
|
||||
@ -442,10 +457,10 @@ void WeightMatrix::CountAlternators(const WeightMatrix &other, double *same,
|
||||
// Helper computes an integer histogram bucket for a weight and adds it
|
||||
// to the histogram.
|
||||
const int kHistogramBuckets = 16;
|
||||
static void HistogramWeight(double weight, STATS *histogram) {
|
||||
static void HistogramWeight(TFloat weight, STATS *histogram) {
|
||||
int bucket = kHistogramBuckets - 1;
|
||||
if (weight != 0.0) {
|
||||
double logval = -log2(fabs(weight));
|
||||
TFloat logval = -log2(fabs(weight));
|
||||
bucket = ClipToRange(IntCastRounded(logval), 0, kHistogramBuckets - 1);
|
||||
}
|
||||
histogram->add(bucket, 1);
|
||||
@ -470,20 +485,4 @@ void WeightMatrix::Debug2D(const char *msg) {
|
||||
histogram.print();
|
||||
}
|
||||
|
||||
// Utility function converts an array of float to the corresponding array
|
||||
// of double.
|
||||
/* static */
|
||||
void WeightMatrix::FloatToDouble(const GENERIC_2D_ARRAY<float> &wf, GENERIC_2D_ARRAY<double> *wd) {
|
||||
int dim1 = wf.dim1();
|
||||
int dim2 = wf.dim2();
|
||||
wd->ResizeNoInit(dim1, dim2);
|
||||
for (int i = 0; i < dim1; ++i) {
|
||||
const float *wfi = wf[i];
|
||||
double *wdi = (*wd)[i];
|
||||
for (int j = 0; j < dim2; ++j) {
|
||||
wdi[j] = static_cast<double>(wfi[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -22,17 +22,18 @@
|
||||
#include <vector>
|
||||
#include "intsimdmatrix.h"
|
||||
#include "matrix.h"
|
||||
#include "tesstypes.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Convenience instantiation of GENERIC_2D_ARRAY<double> with additional
|
||||
// Convenience instantiation of GENERIC_2D_ARRAY<TFloat> with additional
|
||||
// operations to write a strided vector, so the transposed form of the input
|
||||
// is memory-contiguous.
|
||||
class TransposedArray : public GENERIC_2D_ARRAY<double> {
|
||||
class TransposedArray : public GENERIC_2D_ARRAY<TFloat> {
|
||||
public:
|
||||
// Copies the whole input transposed, converted to double, into *this.
|
||||
void Transpose(const GENERIC_2D_ARRAY<double> &input);
|
||||
// Copies the whole input transposed, converted to TFloat, into *this.
|
||||
void Transpose(const GENERIC_2D_ARRAY<TFloat> &input);
|
||||
// Writes a vector of data representing a timestep (gradients or sources).
|
||||
// The data is assumed to be of size1 in size (the strided dimension).
|
||||
~TransposedArray() override;
|
||||
@ -107,11 +108,11 @@ public:
|
||||
return int_mode_ ? wi_.dim1() : wf_.dim1();
|
||||
}
|
||||
// Provides one set of weights. Only used by peep weight maxpool.
|
||||
const double *GetWeights(int index) const {
|
||||
const TFloat *GetWeights(int index) const {
|
||||
return wf_[index];
|
||||
}
|
||||
// Provides access to the deltas (dw_).
|
||||
double GetDW(int i, int j) const {
|
||||
TFloat GetDW(int i, int j) const {
|
||||
return dw_(i, j);
|
||||
}
|
||||
|
||||
@ -132,16 +133,16 @@ public:
|
||||
// u is imagined to have an extra element at the end with value 1, to
|
||||
// implement the bias, but it doesn't actually have it.
|
||||
// Asserts that the call matches what we have.
|
||||
void MatrixDotVector(const double *u, double *v) const;
|
||||
void MatrixDotVector(const int8_t *u, double *v) const;
|
||||
void MatrixDotVector(const TFloat *u, TFloat *v) const;
|
||||
void MatrixDotVector(const int8_t *u, TFloat *v) const;
|
||||
// MatrixDotVector for peep weights, MultiplyAccumulate adds the
|
||||
// component-wise products of *this[0] and v to inout.
|
||||
void MultiplyAccumulate(const double *v, double *inout);
|
||||
void MultiplyAccumulate(const TFloat *v, TFloat *inout);
|
||||
// Computes vector.matrix v = uW.
|
||||
// u is of size W.dim1() and the output v is of size W.dim2() - 1.
|
||||
// The last result is discarded, as v is assumed to have an imaginary
|
||||
// last value of 1, as with MatrixDotVector.
|
||||
void VectorDotMatrix(const double *u, double *v) const;
|
||||
void VectorDotMatrix(const TFloat *u, TFloat *v) const;
|
||||
// Fills dw_[i][j] with the dot product u[i][] . v[j][], using elements
|
||||
// from u and v, starting with u[i][offset] and v[j][offset].
|
||||
// Note that (matching MatrixDotVector) v[last][] is missing, presumed 1.0.
|
||||
@ -155,17 +156,13 @@ public:
|
||||
// Sums the products of weight updates in *this and other, splitting into
|
||||
// positive (same direction) in *same and negative (different direction) in
|
||||
// *changed.
|
||||
void CountAlternators(const WeightMatrix &other, double *same, double *changed) const;
|
||||
void CountAlternators(const WeightMatrix &other, TFloat *same, TFloat *changed) const;
|
||||
|
||||
void Debug2D(const char *msg);
|
||||
|
||||
// Utility function converts an array of float to the corresponding array
|
||||
// of double.
|
||||
static void FloatToDouble(const GENERIC_2D_ARRAY<float> &wf, GENERIC_2D_ARRAY<double> *wd);
|
||||
|
||||
private:
|
||||
// Choice between float and 8 bit int implementations.
|
||||
GENERIC_2D_ARRAY<double> wf_;
|
||||
GENERIC_2D_ARRAY<TFloat> wf_;
|
||||
GENERIC_2D_ARRAY<int8_t> wi_;
|
||||
// Transposed copy of wf_, used only for Backward, and set with each Update.
|
||||
TransposedArray wf_t_;
|
||||
@ -175,14 +172,14 @@ private:
|
||||
bool use_adam_;
|
||||
// If we are using wi_, then scales_ is a factor to restore the row product
|
||||
// with a vector to the correct range.
|
||||
std::vector<double> scales_;
|
||||
std::vector<TFloat> scales_;
|
||||
// Weight deltas. dw_ is the new delta, and updates_ the momentum-decaying
|
||||
// amount to be added to wf_/wi_.
|
||||
GENERIC_2D_ARRAY<double> dw_;
|
||||
GENERIC_2D_ARRAY<double> updates_;
|
||||
GENERIC_2D_ARRAY<TFloat> dw_;
|
||||
GENERIC_2D_ARRAY<TFloat> updates_;
|
||||
// Iff use_adam_, the sum of squares of dw_. The number of samples is
|
||||
// given to Update(). Serialized iff use_adam_.
|
||||
GENERIC_2D_ARRAY<double> dw_sq_sum_;
|
||||
GENERIC_2D_ARRAY<TFloat> dw_sq_sum_;
|
||||
// The weights matrix reorganized in whatever way suits this instance.
|
||||
std::vector<int8_t> shaped_w_;
|
||||
};
|
||||
|
@ -661,7 +661,7 @@ void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, std::string
|
||||
// Even if it looks like all weights should remain the same, an adjustment
|
||||
// will be made to guarantee a different result when reverting to an old best.
|
||||
// Returns the number of layer learning rates that were reduced.
|
||||
int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
|
||||
int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
LSTMTrainer *samples_trainer) {
|
||||
enum WhichWay {
|
||||
LR_DOWN, // Learning rate will go down by factor.
|
||||
@ -671,13 +671,13 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
|
||||
std::vector<std::string> layers = EnumerateLayers();
|
||||
int num_layers = layers.size();
|
||||
std::vector<int> num_weights(num_layers);
|
||||
std::vector<double> bad_sums[LR_COUNT];
|
||||
std::vector<double> ok_sums[LR_COUNT];
|
||||
std::vector<TFloat> bad_sums[LR_COUNT];
|
||||
std::vector<TFloat> ok_sums[LR_COUNT];
|
||||
for (int i = 0; i < LR_COUNT; ++i) {
|
||||
bad_sums[i].resize(num_layers, 0.0);
|
||||
ok_sums[i].resize(num_layers, 0.0);
|
||||
}
|
||||
double momentum_factor = 1.0 / (1.0 - momentum_);
|
||||
auto momentum_factor = 1 / (1 - momentum_);
|
||||
std::vector<char> orig_trainer;
|
||||
samples_trainer->SaveTrainingDump(LIGHT, *this, &orig_trainer);
|
||||
for (int i = 0; i < num_layers; ++i) {
|
||||
@ -689,7 +689,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
|
||||
// Which way will we modify the learning rate?
|
||||
for (int ww = 0; ww < LR_COUNT; ++ww) {
|
||||
// Transfer momentum to learning rate and adjust by the ww factor.
|
||||
float ww_factor = momentum_factor;
|
||||
auto ww_factor = momentum_factor;
|
||||
if (ww == LR_DOWN) {
|
||||
ww_factor *= factor;
|
||||
}
|
||||
@ -748,10 +748,10 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
|
||||
}
|
||||
Network *layer = GetLayer(layers[i]);
|
||||
float lr = GetLayerLearningRate(layers[i]);
|
||||
double total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
|
||||
double total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
|
||||
double frac_down = bad_sums[LR_DOWN][i] / total_down;
|
||||
double frac_same = bad_sums[LR_SAME][i] / total_same;
|
||||
TFloat total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
|
||||
TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
|
||||
TFloat frac_down = bad_sums[LR_DOWN][i] / total_down;
|
||||
TFloat frac_same = bad_sums[LR_SAME][i] / total_same;
|
||||
tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(), lr * factor,
|
||||
100.0 * frac_down, lr, 100.0 * frac_same);
|
||||
if (frac_down < frac_same * kImprovementFraction) {
|
||||
|
@ -237,7 +237,7 @@ public:
|
||||
// Even if it looks like all weights should remain the same, an adjustment
|
||||
// will be made to guarantee a different result when reverting to an old best.
|
||||
// Returns the number of layer learning rates that were reduced.
|
||||
int ReduceLayerLearningRates(double factor, int num_samples, LSTMTrainer *samples_trainer);
|
||||
int ReduceLayerLearningRates(TFloat factor, int num_samples, LSTMTrainer *samples_trainer);
|
||||
|
||||
// Converts the string to integer class labels, with appropriate null_char_s
|
||||
// in between if not in SimpleTextOutput mode. Returns false on failure.
|
||||
|
Loading…
Reference in New Issue
Block a user