Tweak scales array for intSimdMatrix case.

Currently, the size of the scales array is not rounded up in the same way as the weights are. This blocks us pushing the scale calculations into the SIMD, as when we "overread" the end of the scale array, we potentially get errors. Here, we adjust the intSimdMatrix stuff to ensure that the scales array reserves enough entries to allow such overreads to work. This doesn't make any difference for now, but opens the way for future optimisations.
2024-11-27 20:59:36 +08:00 · 2020-05-26 12:37:04 +01:00 · 2020-05-26 12:37:04 +01:00 · 9dfdac51c6
commit 9dfdac51c6
parent 5a377707e0
4 changed files with 8 additions and 5 deletions
--- a/src/arch/intsimdmatrix.cpp
+++ b/src/arch/intsimdmatrix.cpp
@ -27,7 +27,8 @@ const IntSimdMatrix* IntSimdMatrix::intSimdMatrix = nullptr;

 // Computes a reshaped copy of the weight matrix w.
 void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w,
-                         std::vector<int8_t>& shaped_w) const {
+                         std::vector<int8_t>& shaped_w,
+                         GenericVector<double>& scales) const {
  const int num_out = w.dim1();
  const int num_in = w.dim2() - 1;
  // The rounded-up sizes of the reshaped weight matrix, excluding biases.
@ -35,6 +36,7 @@ void IntSimdMatrix::Init(const GENERIC_2D_ARRAY<int8_t>& w,
  int rounded_num_out = RoundOutputs(num_out);
  // Add the bias and compute the required size.
  shaped_w.resize((rounded_num_in + 1) * rounded_num_out, 0);
+  scales.resize_no_init(rounded_num_out);
  int shaped_index = 0;
  int output = 0;
  // Each number of registers needs a different format! Iterates over the
--- a/src/arch/intsimdmatrix.h
+++ b/src/arch/intsimdmatrix.h
@ -62,7 +62,8 @@ namespace tesseract {
 struct IntSimdMatrix {
  // Computes a reshaped copy of the weight matrix w.
  void Init(const GENERIC_2D_ARRAY<int8_t>& w,
-            std::vector<int8_t>& shaped_w) const;
+            std::vector<int8_t>& shaped_w,
+            GenericVector<double>& scales) const;

  // Rounds the size up to a multiple of the input register size (in int8_t).
  int RoundInputs(int size) const {
--- a/src/lstm/weightmatrix.cpp
+++ b/src/lstm/weightmatrix.cpp
@ -144,7 +144,7 @@ void WeightMatrix::ConvertToInt() {
  wf_.Resize(1, 1, 0.0);
  int_mode_ = true;
  if (IntSimdMatrix::intSimdMatrix) {
-    IntSimdMatrix::intSimdMatrix->Init(wi_, shaped_w_);
+    IntSimdMatrix::intSimdMatrix->Init(wi_, shaped_w_, scales_);
  }
 }

@ -198,7 +198,7 @@ bool WeightMatrix::DeSerialize(bool training, TFile* fp) {
    if (!wi_.DeSerialize(fp)) return false;
    if (!scales_.DeSerialize(fp)) return false;
    if (IntSimdMatrix::intSimdMatrix) {
-      IntSimdMatrix::intSimdMatrix->Init(wi_, shaped_w_);
+      IntSimdMatrix::intSimdMatrix->Init(wi_, shaped_w_, scales_);
    }
  } else {
    if (!wf_.DeSerialize(fp)) return false;
--- a/unittest/intsimdmatrix_test.cc
+++ b/unittest/intsimdmatrix_test.cc
@ -72,7 +72,7 @@ class IntSimdMatrixTest : public ::testing::Test {
        IntSimdMatrix::MatrixDotVector(w, scales, u.data(), base_result.data());
        std::vector<double> test_result(num_out);
        std::vector<int8_t> shaped_wi;
-        matrix.Init(w, shaped_wi);
+        matrix.Init(w, shaped_wi, scales);
        if (matrix.matrixDotVectorFunction) {
          matrix.matrixDotVectorFunction(w.dim1(), w.dim2(), &shaped_wi[0],
                                         &scales[0], &u[0], &test_result[0]);