tesseract/lstm/weightmatrix.cpp

///////////////////////////////////////////////////////////////////////
// File:        weightmatrix.h
// Description: Hides distinction between float/int implementations.
// Author:      Ray Smith
// Created:     Tue Jun 17 11:46:20 PST 2014
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include "weightmatrix.h"

#undef NONX86_BUILD
#if defined(ANDROID_BUILD) or defined(__PPC__) or defined(_ARCH_PPC64)
#define NONX86_BUILD 1
#endif

#if defined(__linux__) && !defined(NONX86_BUILD)
#include <cpuid.h>
#endif
#include "dotproductavx.h"
#include "dotproductsse.h"
#include "statistc.h"
#include "svutil.h"
#include "tprintf.h"

namespace tesseract {

// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
class SIMDDetect {
 public:
  SIMDDetect()
      : arch_tested_(false), avx_available_(false), sse_available_(false) {}

  // Returns true if AVX is available on this system.
  bool IsAVXAvailable() {
    if (!arch_tested_) TestArchitecture();
    return avx_available_;
  }
  // Returns true if SSE4.1 is available on this system.
  bool IsSSEAvailable() {
    if (!arch_tested_) TestArchitecture();
    return sse_available_;
  }

 private:
  // Tests the architecture in a system-dependent way to detect AVX, SSE and
  // any other available SIMD equipment.
  void TestArchitecture() {
    SVAutoLock lock(&arch_mutex_);
    if (arch_tested_) return;
#if defined(__linux__) && !defined(NONX86_BUILD)
    if (__get_cpuid_max(0, NULL) >= 1) {
      unsigned int eax, ebx, ecx, edx;
      __get_cpuid(1, &eax, &ebx, &ecx, &edx);
      sse_available_ = (ecx & 0x00080000) != 0;
      avx_available_ = (ecx & 0x10000000) != 0;
    }
#endif
    if (avx_available_) tprintf("Found AVX\n");
    if (sse_available_) tprintf("Found SSE\n");
    arch_tested_ = true;
  }

 private:
  // Detect architecture in only a single thread.
  SVMutex arch_mutex_;
  // Flag set to true after TestArchitecture has been called.
  bool arch_tested_;
  // If true, then AVX has been detected.
  bool avx_available_;
  // If true, then SSe4.1 has been detected.
  bool sse_available_;
};

static SIMDDetect detector;

// Copies the whole input transposed, converted to double, into *this.
void TransposedArray::Transpose(const GENERIC_2D_ARRAY<double>& input) {
  int width = input.dim1();
  int num_features = input.dim2();
  ResizeNoInit(num_features, width);
  for (int t = 0; t < width; ++t) WriteStrided(t, input[t]);
}

// Sets up the network for training. Initializes weights using weights of
// scale `range` picked according to the random number generator `randomizer`.
int WeightMatrix::InitWeightsFloat(int no, int ni, bool ada_grad,
                                   float weight_range, TRand* randomizer) {
  int_mode_ = false;
  wf_.Resize(no, ni, 0.0);
  if (randomizer != NULL) {
    for (int i = 0; i < no; ++i) {
      for (int j = 0; j < ni; ++j) {
        wf_[i][j] = randomizer->SignedRand(weight_range);
      }
    }
  }
  InitBackward(ada_grad);
  return ni * no;
}

// Converts a float network to an int network. Each set of input weights that
// corresponds to a single output weight is converted independently:
// Compute the max absolute value of the weight set.
// Scale so the max absolute value becomes MAX_INT8.
// Round to integer.
// Store a multiplicative scale factor (as a double) that will reproduce
//   the original value, subject to rounding errors.
void WeightMatrix::ConvertToInt() {
  wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());
  scales_.init_to_size(wi_.dim1(), 0.0);
  int dim2 = wi_.dim2();
  for (int t = 0; t < wi_.dim1(); ++t) {
    double* f_line = wf_[t];
    inT8* i_line = wi_[t];
    double max_abs = 0.0;
    for (int f = 0; f < dim2; ++f) {
      double abs_val = fabs(f_line[f]);
      if (abs_val > max_abs) max_abs = abs_val;
    }
    double scale = max_abs / MAX_INT8;
    scales_[t] = scale;
    if (scale == 0.0) scale = 1.0;
    for (int f = 0; f < dim2; ++f) {
      i_line[f] = IntCastRounded(f_line[f] / scale);
    }
  }
  wf_.Resize(1, 1, 0.0);
  int_mode_ = true;
}

// Allocates any needed memory for running Backward, and zeroes the deltas,
// thus eliminating any existing momentum.
void WeightMatrix::InitBackward(bool ada_grad) {
  int no = int_mode_ ? wi_.dim1() : wf_.dim1();
  int ni = int_mode_ ? wi_.dim2() : wf_.dim2();
  use_ada_grad_ = ada_grad;
  dw_.Resize(no, ni, 0.0);
  updates_.Resize(no, ni, 0.0);
  wf_t_.Transpose(wf_);
  if (use_ada_grad_) dw_sq_sum_.Resize(no, ni, 0.0);
}

// Flag on mode to indicate that this weightmatrix uses inT8.
const int kInt8Flag = 1;
// Flag on mode to indicate that this weightmatrix uses ada grad.
const int kAdaGradFlag = 4;
// Flag on mode to indicate that this weightmatrix uses double. Set
// independently of kInt8Flag as even in int mode the scales can
// be float or double.
const int kDoubleFlag = 128;

// Writes to the given file. Returns false in case of error.
bool WeightMatrix::Serialize(bool training, TFile* fp) const {
  // For backward compatibility, add kDoubleFlag to mode to indicate the doubles
  // format, without errs, so we can detect and read old format weight matrices.
  uinT8 mode = (int_mode_ ? kInt8Flag : 0) |
               (use_ada_grad_ ? kAdaGradFlag : 0) | kDoubleFlag;
  if (fp->FWrite(&mode, sizeof(mode), 1) != 1) return false;
  if (int_mode_) {
    if (!wi_.Serialize(fp)) return false;
    if (!scales_.Serialize(fp)) return false;
  } else {
    if (!wf_.Serialize(fp)) return false;
    if (training && !updates_.Serialize(fp)) return false;
    if (training && use_ada_grad_ && !dw_sq_sum_.Serialize(fp)) return false;
  }
  return true;
}

// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) {
  uinT8 mode = 0;
  if (fp->FRead(&mode, sizeof(mode), 1) != 1) return false;
  int_mode_ = (mode & kInt8Flag) != 0;
  use_ada_grad_ = (mode & kAdaGradFlag) != 0;
  if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, swap, fp);
  if (int_mode_) {
    if (!wi_.DeSerialize(swap, fp)) return false;
    if (!scales_.DeSerialize(swap, fp)) return false;
  } else {
    if (!wf_.DeSerialize(swap, fp)) return false;
    if (training) {
      InitBackward(use_ada_grad_);
      if (!updates_.DeSerialize(swap, fp)) return false;
      if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(swap, fp)) return false;
    }
  }
  return true;
}

// As DeSerialize, but reads an old (float) format WeightMatrix for
// backward compatibility.
bool WeightMatrix::DeSerializeOld(bool training, bool swap, TFile* fp) {
  GENERIC_2D_ARRAY<float> float_array;
  if (int_mode_) {
    if (!wi_.DeSerialize(swap, fp)) return false;
    GenericVector<float> old_scales;
    if (!old_scales.DeSerialize(swap, fp)) return false;
    scales_.init_to_size(old_scales.size(), 0.0);
    for (int i = 0; i < old_scales.size(); ++i) scales_[i] = old_scales[i];
  } else {
    if (!float_array.DeSerialize(swap, fp)) return false;
    FloatToDouble(float_array, &wf_);
  }
  if (training) {
    InitBackward(use_ada_grad_);
    if (!float_array.DeSerialize(swap, fp)) return false;
    FloatToDouble(float_array, &updates_);
    // Errs was only used in int training, which is now dead.
    if (!float_array.DeSerialize(swap, fp)) return false;
  }
  return true;
}

// Computes matrix.vector v = Wu.
// u is of size W.dim2() - 1 and the output v is of size W.dim1().
// u is imagined to have an extra element at the end with value 1, to
// implement the bias, but it doesn't actually have it.
// Asserts that the call matches what we have.
void WeightMatrix::MatrixDotVector(const double* u, double* v) const {
  ASSERT_HOST(!int_mode_);
  MatrixDotVectorInternal(wf_, true, false, u, v);
}

void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {
  ASSERT_HOST(int_mode_);
  int num_out = wi_.dim1();
  int num_in = wi_.dim2() - 1;
  for (int i = 0; i < num_out; ++i) {
    const inT8* Wi = wi_[i];
    int total = 0;
    if (detector.IsSSEAvailable()) {
      total = IntDotProductSSE(u, Wi, num_in);
    } else {
      for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j];
    }
    // Add in the bias and correct for integer values.
    v[i] = (static_cast<double>(total) / MAX_INT8 + Wi[num_in]) * scales_[i];
  }
}

// MatrixDotVector for peep weights, MultiplyAccumulate adds the
// component-wise products of *this[0] and v to inout.
void WeightMatrix::MultiplyAccumulate(const double* v, double* inout) {
  ASSERT_HOST(!int_mode_);
  ASSERT_HOST(wf_.dim1() == 1);
  int n = wf_.dim2();
  const double* u = wf_[0];
  for (int i = 0; i < n; ++i) {
    inout[i] += u[i] * v[i];
  }
}

// Computes vector.matrix v = uW.
// u is of size W.dim1() and the output v is of size W.dim2() - 1.
// The last result is discarded, as v is assumed to have an imaginary
// last value of 1, as with MatrixDotVector.
void WeightMatrix::VectorDotMatrix(const double* u, double* v) const {
  ASSERT_HOST(!int_mode_);
  MatrixDotVectorInternal(wf_t_, false, true, u, v);
}

// Fills dw_[i][j] with the dot product u[i][] . v[j][], using elements from
// u and v. In terms of the neural network, u is the gradients and v is the
// inputs.
// Note that (matching MatrixDotVector) v[last][] is missing, presumed 1.0.
// Runs parallel if requested. Note that u and v must be transposed.
void WeightMatrix::SumOuterTransposed(const TransposedArray& u,
                                      const TransposedArray& v,
                                      bool in_parallel) {
  ASSERT_HOST(!int_mode_);
  int num_outputs = dw_.dim1();
  ASSERT_HOST(u.dim1() == num_outputs);
  ASSERT_HOST(u.dim2() == v.dim2());
  int num_inputs = dw_.dim2() - 1;
  int num_samples = u.dim2();
  // v is missing the last element in dim1.
  ASSERT_HOST(v.dim1() == num_inputs);
#ifdef _OPENMP
#pragma omp parallel for num_threads(4) if (in_parallel)
#endif
  for (int i = 0; i < num_outputs; ++i) {
    double* dwi = dw_[i];
    const double* ui = u[i];
    for (int j = 0; j < num_inputs; ++j) {
      dwi[j] = DotProduct(ui, v[j], num_samples);
    }
    // The last element of v is missing, presumed 1.0f.
    double total = 0.0;
    for (int k = 0; k < num_samples; ++k) total += ui[k];
    dwi[num_inputs] = total;
  }
}

// Updates the weights using the given learning rate and momentum.
// num_samples is the quotient to be used in the adagrad computation iff
// use_ada_grad_ is true.
void WeightMatrix::Update(double learning_rate, double momentum,
                          int num_samples) {
  ASSERT_HOST(!int_mode_);
  if (use_ada_grad_ && num_samples > 0) {
    dw_sq_sum_.SumSquares(dw_);
    dw_.AdaGradScaling(dw_sq_sum_, num_samples);
  }
  dw_ *= learning_rate;
  updates_ += dw_;
  if (momentum > 0.0) wf_ += updates_;
  if (momentum >= 0.0) updates_ *= momentum;
  wf_t_.Transpose(wf_);
}

// Adds the dw_ in other to the dw_ is *this.
void WeightMatrix::AddDeltas(const WeightMatrix& other) {
  ASSERT_HOST(dw_.dim1() == other.dw_.dim1());
  ASSERT_HOST(dw_.dim2() == other.dw_.dim2());
  dw_ += other.dw_;
}

// Sums the products of weight updates in *this and other, splitting into
// positive (same direction) in *same and negative (different direction) in
// *changed.
void WeightMatrix::CountAlternators(const WeightMatrix& other, double* same,
                                    double* changed) const {
  int num_outputs = updates_.dim1();
  int num_inputs = updates_.dim2();
  ASSERT_HOST(num_outputs == other.updates_.dim1());
  ASSERT_HOST(num_inputs == other.updates_.dim2());
  for (int i = 0; i < num_outputs; ++i) {
    const double* this_i = updates_[i];
    const double* other_i = other.updates_[i];
    for (int j = 0; j < num_inputs; ++j) {
      double product = this_i[j] * other_i[j];
      if (product < 0.0)
        *changed -= product;
      else
        *same += product;
    }
  }
}

// Helper computes an integer histogram bucket for a weight and adds it
// to the histogram.
const int kHistogramBuckets = 16;
static void HistogramWeight(double weight, STATS* histogram) {
  int bucket = kHistogramBuckets - 1;
  if (weight != 0.0) {
    double logval = -log2(fabs(weight));
    bucket = ClipToRange(IntCastRounded(logval), 0, kHistogramBuckets - 1);
  }
  histogram->add(bucket, 1);
}

void WeightMatrix::Debug2D(const char* msg) {
  STATS histogram(0, kHistogramBuckets);
  if (int_mode_) {
    for (int i = 0; i < wi_.dim1(); ++i) {
      for (int j = 0; j < wi_.dim2(); ++j) {
        HistogramWeight(wi_[i][j] * scales_[i], &histogram);
      }
    }
  } else {
    for (int i = 0; i < wf_.dim1(); ++i) {
      for (int j = 0; j < wf_.dim2(); ++j) {
        HistogramWeight(wf_[i][j], &histogram);
      }
    }
  }
  tprintf("%s\n", msg);
  histogram.print();
}

// Computes and returns the dot product of the two n-vectors u and v.
/* static */
double WeightMatrix::DotProduct(const double* u, const double* v, int n) {
  // Note: because the order of addition is different among the 3 DotProduct
  // functions, the results can (and do) vary slightly (although they agree
  // to within about 4e-15). This produces different results when running
  // training, despite all random inputs being precisely equal.
  // To get consistent results, use just one of these DotProduct functions.
  // On a test multi-layer network, serial is 57% slower than sse, and avx
  // is about 8% faster than sse. This suggests that the time is memory
  // bandwidth constrained and could benefit from holding the reused vector
  // in AVX registers.
  if (detector.IsAVXAvailable()) return DotProductAVX(u, v, n);
  if (detector.IsSSEAvailable()) return DotProductSSE(u, v, n);
  double total = 0.0;
  for (int k = 0; k < n; ++k) total += u[k] * v[k];
  return total;
}

// Utility function converts an array of float to the corresponding array
// of double.
/* static */
void WeightMatrix::FloatToDouble(const GENERIC_2D_ARRAY<float>& wf,
                                 GENERIC_2D_ARRAY<double>* wd) {
  int dim1 = wf.dim1();
  int dim2 = wf.dim2();
  wd->ResizeNoInit(dim1, dim2);
  for (int i = 0; i < dim1; ++i) {
    const float* wfi = wf[i];
    double* wdi = (*wd)[i];
    for (int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);
  }
}

// Computes matrix.vector v = Wu.
// u is of size W.dim2() - add_bias_fwd and the output v is of size
// W.dim1() - skip_bias_back.
// If add_bias_fwd, u is imagined to have an extra element at the end with value
// 1, to implement the bias, weight.
// If skip_bias_back, we are actullay performing the backwards product on a
// transposed matrix, so we need to drop the v output corresponding to the last
// element in dim1.
void WeightMatrix::MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double>& w,
                                           bool add_bias_fwd,
                                           bool skip_bias_back, const double* u,
                                           double* v) {
  int num_results = w.dim1() - skip_bias_back;
  int extent = w.dim2() - add_bias_fwd;
  for (int i = 0; i < num_results; ++i) {
    const double* wi = w[i];
    double total = DotProduct(wi, u, extent);
    if (add_bias_fwd) total += wi[extent];  // The bias value.
    v[i] = total;
  }
}

}  // namespace tesseract.

#undef NONX86_BUILD
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`///////////////////////////////////////////////////////////////////////`
			`// File: weightmatrix.h`
			`// Description: Hides distinction between float/int implementations.`
			`// Author: Ray Smith`
			`// Created: Tue Jun 17 11:46:20 PST 2014`
			`//`
			`// (C) Copyright 2014, Google Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`///////////////////////////////////////////////////////////////////////`

			`#include "weightmatrix.h"`

			`#undef NONX86_BUILD`
			`#if defined(ANDROID_BUILD) or defined(__PPC__) or defined(_ARCH_PPC64)`
			`#define NONX86_BUILD 1`
			`#endif`

Fix windows build. 2016-11-24 22:32:23 +08:00			`#if defined(__linux__) && !defined(NONX86_BUILD)`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`#include <cpuid.h>`
			`#endif`
			`#include "dotproductavx.h"`
			`#include "dotproductsse.h"`
			`#include "statistc.h"`
			`#include "svutil.h"`
			`#include "tprintf.h"`

			`namespace tesseract {`

			`// Architecture detector. Add code here to detect any other architectures for`
			`// SIMD-based faster dot product functions. Intended to be a single static`
			`// object, but it does no real harm to have more than one.`
			`class SIMDDetect {`
			`public:`
			`SIMDDetect()`
			`: arch_tested_(false), avx_available_(false), sse_available_(false) {}`

			`// Returns true if AVX is available on this system.`
			`bool IsAVXAvailable() {`
			`if (!arch_tested_) TestArchitecture();`
			`return avx_available_;`
			`}`
			`// Returns true if SSE4.1 is available on this system.`
			`bool IsSSEAvailable() {`
			`if (!arch_tested_) TestArchitecture();`
			`return sse_available_;`
			`}`

			`private:`
			`// Tests the architecture in a system-dependent way to detect AVX, SSE and`
			`// any other available SIMD equipment.`
			`void TestArchitecture() {`
			`SVAutoLock lock(&arch_mutex_);`
			`if (arch_tested_) return;`
			`#if defined(__linux__) && !defined(NONX86_BUILD)`
			`if (__get_cpuid_max(0, NULL) >= 1) {`
			`unsigned int eax, ebx, ecx, edx;`
			`__get_cpuid(1, &eax, &ebx, &ecx, &edx);`
			`sse_available_ = (ecx & 0x00080000) != 0;`
			`avx_available_ = (ecx & 0x10000000) != 0;`
			`}`
			`#endif`
			`if (avx_available_) tprintf("Found AVX\n");`
			`if (sse_available_) tprintf("Found SSE\n");`
			`arch_tested_ = true;`
			`}`

			`private:`
			`// Detect architecture in only a single thread.`
			`SVMutex arch_mutex_;`
			`// Flag set to true after TestArchitecture has been called.`
			`bool arch_tested_;`
			`// If true, then AVX has been detected.`
			`bool avx_available_;`
			`// If true, then SSe4.1 has been detected.`
			`bool sse_available_;`
			`};`

			`static SIMDDetect detector;`

			`// Copies the whole input transposed, converted to double, into *this.`
			`void TransposedArray::Transpose(const GENERIC_2D_ARRAY<double>& input) {`
			`int width = input.dim1();`
			`int num_features = input.dim2();`
			`ResizeNoInit(num_features, width);`
			`for (int t = 0; t < width; ++t) WriteStrided(t, input[t]);`
			`}`

			`// Sets up the network for training. Initializes weights using weights of`
			// scale `range` picked according to the random number generator `randomizer`.
			`int WeightMatrix::InitWeightsFloat(int no, int ni, bool ada_grad,`
			`float weight_range, TRand* randomizer) {`
			`int_mode_ = false;`
			`wf_.Resize(no, ni, 0.0);`
			`if (randomizer != NULL) {`
			`for (int i = 0; i < no; ++i) {`
			`for (int j = 0; j < ni; ++j) {`
			`wf_[i][j] = randomizer->SignedRand(weight_range);`
			`}`
			`}`
			`}`
Fixes to training process to allow incremental training from a recognition model 2016-12-01 07:51:17 +08:00			`InitBackward(ada_grad);`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`return ni * no;`
			`}`

			`// Converts a float network to an int network. Each set of input weights that`
			`// corresponds to a single output weight is converted independently:`
			`// Compute the max absolute value of the weight set.`
			`// Scale so the max absolute value becomes MAX_INT8.`
			`// Round to integer.`
			`// Store a multiplicative scale factor (as a double) that will reproduce`
			`// the original value, subject to rounding errors.`
			`void WeightMatrix::ConvertToInt() {`
			`wi_.ResizeNoInit(wf_.dim1(), wf_.dim2());`
			`scales_.init_to_size(wi_.dim1(), 0.0);`
			`int dim2 = wi_.dim2();`
			`for (int t = 0; t < wi_.dim1(); ++t) {`
			`double* f_line = wf_[t];`
			`inT8* i_line = wi_[t];`
			`double max_abs = 0.0;`
			`for (int f = 0; f < dim2; ++f) {`
			`double abs_val = fabs(f_line[f]);`
			`if (abs_val > max_abs) max_abs = abs_val;`
			`}`
			`double scale = max_abs / MAX_INT8;`
			`scales_[t] = scale;`
			`if (scale == 0.0) scale = 1.0;`
			`for (int f = 0; f < dim2; ++f) {`
			`i_line[f] = IntCastRounded(f_line[f] / scale);`
			`}`
			`}`
			`wf_.Resize(1, 1, 0.0);`
			`int_mode_ = true;`
			`}`

			`// Allocates any needed memory for running Backward, and zeroes the deltas,`
			`// thus eliminating any existing momentum.`
Fixes to training process to allow incremental training from a recognition model 2016-12-01 07:51:17 +08:00			`void WeightMatrix::InitBackward(bool ada_grad) {`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`int no = int_mode_ ? wi_.dim1() : wf_.dim1();`
			`int ni = int_mode_ ? wi_.dim2() : wf_.dim2();`
Fixes to training process to allow incremental training from a recognition model 2016-12-01 07:51:17 +08:00			`use_ada_grad_ = ada_grad;`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`dw_.Resize(no, ni, 0.0);`
			`updates_.Resize(no, ni, 0.0);`
			`wf_t_.Transpose(wf_);`
Fixes to training process to allow incremental training from a recognition model 2016-12-01 07:51:17 +08:00			`if (use_ada_grad_) dw_sq_sum_.Resize(no, ni, 0.0);`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`}`

			`// Flag on mode to indicate that this weightmatrix uses inT8.`
			`const int kInt8Flag = 1;`
			`// Flag on mode to indicate that this weightmatrix uses ada grad.`
			`const int kAdaGradFlag = 4;`
			`// Flag on mode to indicate that this weightmatrix uses double. Set`
			`// independently of kInt8Flag as even in int mode the scales can`
			`// be float or double.`
			`const int kDoubleFlag = 128;`

			`// Writes to the given file. Returns false in case of error.`
			`bool WeightMatrix::Serialize(bool training, TFile* fp) const {`
Fix typos in new LSTM code All of them were found and fixed by codespell. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2016-11-22 15:20:05 +08:00			`// For backward compatibility, add kDoubleFlag to mode to indicate the doubles`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`// format, without errs, so we can detect and read old format weight matrices.`
			`uinT8 mode = (int_mode_ ? kInt8Flag : 0) \|`
			`(use_ada_grad_ ? kAdaGradFlag : 0) \| kDoubleFlag;`
			`if (fp->FWrite(&mode, sizeof(mode), 1) != 1) return false;`
			`if (int_mode_) {`
			`if (!wi_.Serialize(fp)) return false;`
			`if (!scales_.Serialize(fp)) return false;`
			`} else {`
			`if (!wf_.Serialize(fp)) return false;`
			`if (training && !updates_.Serialize(fp)) return false;`
			`if (training && use_ada_grad_ && !dw_sq_sum_.Serialize(fp)) return false;`
			`}`
			`return true;`
			`}`

			`// Reads from the given file. Returns false in case of error.`
			`// If swap is true, assumes a big/little-endian swap is needed.`
			`bool WeightMatrix::DeSerialize(bool training, bool swap, TFile* fp) {`
			`uinT8 mode = 0;`
			`if (fp->FRead(&mode, sizeof(mode), 1) != 1) return false;`
			`int_mode_ = (mode & kInt8Flag) != 0;`
			`use_ada_grad_ = (mode & kAdaGradFlag) != 0;`
			`if ((mode & kDoubleFlag) == 0) return DeSerializeOld(training, swap, fp);`
			`if (int_mode_) {`
			`if (!wi_.DeSerialize(swap, fp)) return false;`
			`if (!scales_.DeSerialize(swap, fp)) return false;`
			`} else {`
			`if (!wf_.DeSerialize(swap, fp)) return false;`
			`if (training) {`
Fixes to training process to allow incremental training from a recognition model 2016-12-01 07:51:17 +08:00			`InitBackward(use_ada_grad_);`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`if (!updates_.DeSerialize(swap, fp)) return false;`
			`if (use_ada_grad_ && !dw_sq_sum_.DeSerialize(swap, fp)) return false;`
			`}`
			`}`
			`return true;`
			`}`

			`// As DeSerialize, but reads an old (float) format WeightMatrix for`
Fix typos in new LSTM code All of them were found and fixed by codespell. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2016-11-22 15:20:05 +08:00			`// backward compatibility.`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`bool WeightMatrix::DeSerializeOld(bool training, bool swap, TFile* fp) {`
			`GENERIC_2D_ARRAY<float> float_array;`
			`if (int_mode_) {`
			`if (!wi_.DeSerialize(swap, fp)) return false;`
			`GenericVector<float> old_scales;`
			`if (!old_scales.DeSerialize(swap, fp)) return false;`
			`scales_.init_to_size(old_scales.size(), 0.0);`
			`for (int i = 0; i < old_scales.size(); ++i) scales_[i] = old_scales[i];`
			`} else {`
			`if (!float_array.DeSerialize(swap, fp)) return false;`
			`FloatToDouble(float_array, &wf_);`
			`}`
			`if (training) {`
Fixes to training process to allow incremental training from a recognition model 2016-12-01 07:51:17 +08:00			`InitBackward(use_ada_grad_);`
Added new LSTM-based neural network line recognizer 2016-11-08 07:38:07 +08:00			`if (!float_array.DeSerialize(swap, fp)) return false;`
			`FloatToDouble(float_array, &updates_);`
			`// Errs was only used in int training, which is now dead.`
			`if (!float_array.DeSerialize(swap, fp)) return false;`
			`}`
			`return true;`
			`}`

			`// Computes matrix.vector v = Wu.`
			`// u is of size W.dim2() - 1 and the output v is of size W.dim1().`
			`// u is imagined to have an extra element at the end with value 1, to`
			`// implement the bias, but it doesn't actually have it.`
			`// Asserts that the call matches what we have.`
			`void WeightMatrix::MatrixDotVector(const double* u, double* v) const {`
			`ASSERT_HOST(!int_mode_);`
			`MatrixDotVectorInternal(wf_, true, false, u, v);`
			`}`

			`void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const {`
			`ASSERT_HOST(int_mode_);`
			`int num_out = wi_.dim1();`
			`int num_in = wi_.dim2() - 1;`
			`for (int i = 0; i < num_out; ++i) {`
			`const inT8* Wi = wi_[i];`
			`int total = 0;`
			`if (detector.IsSSEAvailable()) {`
			`total = IntDotProductSSE(u, Wi, num_in);`
			`} else {`
			`for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j];`
			`}`
			`// Add in the bias and correct for integer values.`
			`v[i] = (static_cast<double>(total) / MAX_INT8 + Wi[num_in]) * scales_[i];`
			`}`
			`}`

			`// MatrixDotVector for peep weights, MultiplyAccumulate adds the`
			`// component-wise products of *this[0] and v to inout.`
			`void WeightMatrix::MultiplyAccumulate(const double* v, double* inout) {`
			`ASSERT_HOST(!int_mode_);`
			`ASSERT_HOST(wf_.dim1() == 1);`
			`int n = wf_.dim2();`
			`const double* u = wf_[0];`
			`for (int i = 0; i < n; ++i) {`
			`inout[i] += u[i] * v[i];`
			`}`
			`}`

			`// Computes vector.matrix v = uW.`
			`// u is of size W.dim1() and the output v is of size W.dim2() - 1.`
			`// The last result is discarded, as v is assumed to have an imaginary`
			`// last value of 1, as with MatrixDotVector.`
			`void WeightMatrix::VectorDotMatrix(const double* u, double* v) const {`
			`ASSERT_HOST(!int_mode_);`
			`MatrixDotVectorInternal(wf_t_, false, true, u, v);`
			`}`

			`// Fills dw_[i][j] with the dot product u[i][] . v[j][], using elements from`
			`// u and v. In terms of the neural network, u is the gradients and v is the`
			`// inputs.`
			`// Note that (matching MatrixDotVector) v[last][] is missing, presumed 1.0.`
			`// Runs parallel if requested. Note that u and v must be transposed.`
			`void WeightMatrix::SumOuterTransposed(const TransposedArray& u,`
			`const TransposedArray& v,`
			`bool in_parallel) {`
			`ASSERT_HOST(!int_mode_);`
			`int num_outputs = dw_.dim1();`
			`ASSERT_HOST(u.dim1() == num_outputs);`
			`ASSERT_HOST(u.dim2() == v.dim2());`
			`int num_inputs = dw_.dim2() - 1;`
			`int num_samples = u.dim2();`
			`// v is missing the last element in dim1.`
			`ASSERT_HOST(v.dim1() == num_inputs);`
			`#ifdef _OPENMP`
			`#pragma omp parallel for num_threads(4) if (in_parallel)`
			`#endif`
			`for (int i = 0; i < num_outputs; ++i) {`
			`double* dwi = dw_[i];`
			`const double* ui = u[i];`
			`for (int j = 0; j < num_inputs; ++j) {`
			`dwi[j] = DotProduct(ui, v[j], num_samples);`
			`}`
			`// The last element of v is missing, presumed 1.0f.`
			`double total = 0.0;`
			`for (int k = 0; k < num_samples; ++k) total += ui[k];`
			`dwi[num_inputs] = total;`
			`}`
			`}`

			`// Updates the weights using the given learning rate and momentum.`
			`// num_samples is the quotient to be used in the adagrad computation iff`
			`// use_ada_grad_ is true.`
			`void WeightMatrix::Update(double learning_rate, double momentum,`
			`int num_samples) {`
			`ASSERT_HOST(!int_mode_);`
			`if (use_ada_grad_ && num_samples > 0) {`
			`dw_sq_sum_.SumSquares(dw_);`
			`dw_.AdaGradScaling(dw_sq_sum_, num_samples);`
			`}`
			`dw_ *= learning_rate;`
			`updates_ += dw_;`
			`if (momentum > 0.0) wf_ += updates_;`
			`if (momentum >= 0.0) updates_ *= momentum;`
			`wf_t_.Transpose(wf_);`
			`}`

			`// Adds the dw_ in other to the dw_ is *this.`
			`void WeightMatrix::AddDeltas(const WeightMatrix& other) {`
			`ASSERT_HOST(dw_.dim1() == other.dw_.dim1());`
			`ASSERT_HOST(dw_.dim2() == other.dw_.dim2());`
			`dw_ += other.dw_;`
			`}`

			`// Sums the products of weight updates in *this and other, splitting into`
			`// positive (same direction) in *same and negative (different direction) in`
			`// *changed.`
			`void WeightMatrix::CountAlternators(const WeightMatrix& other, double* same,`
			`double* changed) const {`
			`int num_outputs = updates_.dim1();`
			`int num_inputs = updates_.dim2();`
			`ASSERT_HOST(num_outputs == other.updates_.dim1());`
			`ASSERT_HOST(num_inputs == other.updates_.dim2());`
			`for (int i = 0; i < num_outputs; ++i) {`
			`const double* this_i = updates_[i];`
			`const double* other_i = other.updates_[i];`
			`for (int j = 0; j < num_inputs; ++j) {`
			`double product = this_i[j] * other_i[j];`
			`if (product < 0.0)`
			`*changed -= product;`
			`else`
			`*same += product;`
			`}`
			`}`
			`}`

			`// Helper computes an integer histogram bucket for a weight and adds it`
			`// to the histogram.`
			`const int kHistogramBuckets = 16;`
			`static void HistogramWeight(double weight, STATS* histogram) {`
			`int bucket = kHistogramBuckets - 1;`
			`if (weight != 0.0) {`
			`double logval = -log2(fabs(weight));`
			`bucket = ClipToRange(IntCastRounded(logval), 0, kHistogramBuckets - 1);`
			`}`
			`histogram->add(bucket, 1);`
			`}`

			`void WeightMatrix::Debug2D(const char* msg) {`
			`STATS histogram(0, kHistogramBuckets);`
			`if (int_mode_) {`
			`for (int i = 0; i < wi_.dim1(); ++i) {`
			`for (int j = 0; j < wi_.dim2(); ++j) {`
			`HistogramWeight(wi_[i][j] * scales_[i], &histogram);`
			`}`
			`}`
			`} else {`
			`for (int i = 0; i < wf_.dim1(); ++i) {`
			`for (int j = 0; j < wf_.dim2(); ++j) {`
			`HistogramWeight(wf_[i][j], &histogram);`
			`}`
			`}`
			`}`
			`tprintf("%s\n", msg);`
			`histogram.print();`
			`}`

			`// Computes and returns the dot product of the two n-vectors u and v.`
			`/* static */`
			`double WeightMatrix::DotProduct(const double* u, const double* v, int n) {`
			`// Note: because the order of addition is different among the 3 DotProduct`
			`// functions, the results can (and do) vary slightly (although they agree`
			`// to within about 4e-15). This produces different results when running`
			`// training, despite all random inputs being precisely equal.`
			`// To get consistent results, use just one of these DotProduct functions.`
			`// On a test multi-layer network, serial is 57% slower than sse, and avx`
			`// is about 8% faster than sse. This suggests that the time is memory`
			`// bandwidth constrained and could benefit from holding the reused vector`
			`// in AVX registers.`
			`if (detector.IsAVXAvailable()) return DotProductAVX(u, v, n);`
			`if (detector.IsSSEAvailable()) return DotProductSSE(u, v, n);`
			`double total = 0.0;`
			`for (int k = 0; k < n; ++k) total += u[k] * v[k];`
			`return total;`
			`}`

			`// Utility function converts an array of float to the corresponding array`
			`// of double.`
			`/* static */`
			`void WeightMatrix::FloatToDouble(const GENERIC_2D_ARRAY<float>& wf,`
			`GENERIC_2D_ARRAY<double>* wd) {`
			`int dim1 = wf.dim1();`
			`int dim2 = wf.dim2();`
			`wd->ResizeNoInit(dim1, dim2);`
			`for (int i = 0; i < dim1; ++i) {`
			`const float* wfi = wf[i];`
			`double* wdi = (*wd)[i];`
			`for (int j = 0; j < dim2; ++j) wdi[j] = static_cast<double>(wfi[j]);`
			`}`
			`}`

			`// Computes matrix.vector v = Wu.`
			`// u is of size W.dim2() - add_bias_fwd and the output v is of size`
			`// W.dim1() - skip_bias_back.`
			`// If add_bias_fwd, u is imagined to have an extra element at the end with value`
			`// 1, to implement the bias, weight.`
			`// If skip_bias_back, we are actullay performing the backwards product on a`
			`// transposed matrix, so we need to drop the v output corresponding to the last`
			`// element in dim1.`
			`void WeightMatrix::MatrixDotVectorInternal(const GENERIC_2D_ARRAY<double>& w,`
			`bool add_bias_fwd,`
			`bool skip_bias_back, const double* u,`
			`double* v) {`
			`int num_results = w.dim1() - skip_bias_back;`
			`int extent = w.dim2() - add_bias_fwd;`
			`for (int i = 0; i < num_results; ++i) {`
			`const double* wi = w[i];`
			`double total = DotProduct(wi, u, extent);`
			`if (add_bias_fwd) total += wi[extent]; // The bias value.`
			`v[i] = total;`
			`}`
			`}`

			`} // namespace tesseract.`

			`#undef NONX86_BUILD`