tesseract/unittest/lstm_test.cc

// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Generating the training data:
// If the format of the lstmf (ImageData) file changes, the training data will
// have to be regenerated as follows:
//
// Use --xsize 800 for text2image to be similar to original training data.
//
// tesstrain.py --fonts_dir /usr/share/fonts --lang eng \
// --linedata_only   --noextract_font_properties --langdata_dir ../langdata_lstm \
// --tessdata_dir ../tessdata --output_dir ~/tesseract/test/testdata \
// --fontlist "Arial" --maxpages 10
//

#include "lstm_test.h"

namespace tesseract {

// Tests that some simple networks can learn Arial and meet accuracy targets.
TEST_F(LSTMTrainerTest, BasicTest) {
  // A Convolver sliding window classifier without LSTM.
  SetupTrainer(
      "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
      "Ct1,1,64O1c1]",
      "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false, 2e-4, false, "eng");
  double non_lstm_err = TrainIterations(kTrainerIterations * 4);
  EXPECT_LT(non_lstm_err, 98);
  LOG(INFO) << "********** Expected  < 98 ************\n";

  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_uni_err, 86);
  LOG(INFO) << "********** Expected  < 86 ************\n";
  // Beats the convolver. (Although it does have a lot more weights, it still
  // iterates faster.)
  EXPECT_LT(lstm_uni_err, non_lstm_err);
}

// Color learns almost as fast as normalized grey/2D.
TEST_F(LSTMTrainerTest, ColorTest) {
  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2D-color-lstm", true, true);
  double lstm_uni_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_uni_err, 85);
  //  EXPECT_GT(lstm_uni_err, 66);
  LOG(INFO) << "********** Expected  < 85 ************\n";
}

TEST_F(LSTMTrainerTest, BidiTest) {
  // A basic single-layer, bi-di 1d LSTM.
  SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
  double lstm_bi_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_bi_err, 75);
  LOG(INFO) << "********** Expected   < 75 ************\n";
  // Int mode training is dead, so convert the trained network to int and check
  // that its error rate is close to the float version.
  TestIntMode(kTrainerIterations);
}

// Tests that a 2d-2-layer network learns correctly.
// It takes a lot of iterations to get there.
TEST_F(LSTMTrainerTest, Test2D) {
  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);
  EXPECT_LT(lstm_2d_err, 98);
  //  EXPECT_GT(lstm_2d_err, 90);
  LOG(INFO) << "********** Expected  < 98 ************\n";
  // Int mode training is dead, so convert the trained network to int and check
  // that its error rate is close to the float version.
  TestIntMode(kTrainerIterations);
}

// Tests that a 2d-2-layer network with Adam does *a lot* better than
// without it.
TEST_F(LSTMTrainerTest, TestAdam) {
  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  true);
  double lstm_2d_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(lstm_2d_err, 70);
  LOG(INFO) << "********** Expected   < 70 ************\n";
  TestIntMode(kTrainerIterations);
}

// Trivial test of training speed on a fairly complex network.
TEST_F(LSTMTrainerTest, SpeedTest) {
  SetupTrainerEng(
      "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
      "O1c1]",
      "2-D-2-layer-lstm", false, true);
  TrainIterations(kTrainerIterations);
  LOG(INFO) << "********** *** ************\n";
}

// Tests that two identical networks trained the same get the same results.
// Also tests that the same happens with a serialize/deserialize in the middle.
TEST_F(LSTMTrainerTest, DeterminismTest) {
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  double lstm_2d_err_a = TrainIterations(kTrainerIterations);
  double act_error_a = trainer_->ActivationError();
  double char_error_a = trainer_->CharError();
  std::vector<char> trainer_a_data;
  EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, *trainer_, &trainer_a_data));
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  double lstm_2d_err_b = TrainIterations(kTrainerIterations);
  double act_error_b = trainer_->ActivationError();
  double char_error_b = trainer_->CharError();
  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
  // Now train some more iterations.
  lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
  act_error_b = trainer_->ActivationError();
  char_error_b = trainer_->CharError();
  // Unpack into a new trainer and train that some more too.
  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,
                  false);
  EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, *trainer_));
  lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
  act_error_a = trainer_->ActivationError();
  char_error_a = trainer_->CharError();
  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
  LOG(INFO) << "********** *** ************\n";
}

// The baseline network against which to test the built-in softmax.
TEST_F(LSTMTrainerTest, SoftmaxBaselineTest) {
  // A basic single-layer, single direction LSTM.
  SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_uni_err, 60);
  //  EXPECT_GT(lstm_uni_err, 48);
  LOG(INFO) << "********** Expected  < 60 ************\n";
  // Check that it works in int mode too.
  TestIntMode(kTrainerIterations);
  // If we run TestIntMode again, it tests that int_mode networks can
  // serialize and deserialize correctly.
  double delta = TestIntMode(kTrainerIterations);
  // The two tests (both of int mode this time) should be almost identical.
  LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
  EXPECT_LT(delta, 0.01);
}

// Tests that the built-in softmax does better than the external one,
// which has an error rate slightly less than 55%, as tested by
// SoftmaxBaselineTest.
TEST_F(LSTMTrainerTest, SoftmaxTest) {
  // LSTM with a built-in softmax can beat the external softmax.
  SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_sm_err, 49.0);
  LOG(INFO) << "********** Expected  < 49 ************\n";
  // Check that it works in int mode too.
  TestIntMode(kTrainerIterations);
}

// Tests that the built-in encoded softmax does better than the external one.
// It takes a lot of iterations to get there.
TEST_F(LSTMTrainerTest, EncodedSoftmaxTest) {
  // LSTM with a built-in encoded softmax can beat the external softmax.
  SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(lstm_sm_err, 62.0);
  LOG(INFO) << "********** Expected   < 62 ************\n";
  // Check that it works in int mode too.
  TestIntMode(kTrainerIterations);
}

// Tests that layer access methods work correctly.
TEST_F(LSTMTrainerTest, TestLayerAccess) {
  // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
  SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm", false, false);
  // Number of layers.
  const size_t kNumLayers = 8;
  // Expected layer names.
  const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};
  const char *kLayerNames[kNumLayers] = {"Input",  "Convolve",  "ConvNL", "Maxpool",
                                         "Lfys32", "Lbx128LTR", "Lbx128", "Output"};
  // Expected number of weights.
  const int kNumWeights[kNumLayers] = {0,
                                       0,
                                       16 * (25 + 1),
                                       0,
                                       32 * (4 * (32 + 16 + 1)),
                                       128 * (4 * (128 + 32 + 1)),
                                       128 * (4 * (128 + 32 + 1)),
                                       112 * (2 * 128 + 1)};

  auto layers = trainer_->EnumerateLayers();
  EXPECT_EQ(kNumLayers, layers.size());
  for (unsigned i = 0; i < kNumLayers && i < layers.size(); ++i) {
    EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
    EXPECT_STREQ(kLayerNames[i], trainer_->GetLayer(layers[i])->name().c_str());
    EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
  }
}

} // namespace tesseract.
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`// (C) Copyright 2017, Google Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00
			`// Generating the training data:`
			`// If the format of the lstmf (ImageData) file changes, the training data will`
Remove whitespace at line endings Signed-off-by: Stefan Weil <sw@weilnetz.de> 2019-02-19 20:53:31 +08:00			`// have to be regenerated as follows:`
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`//`
			`// Use --xsize 800 for text2image to be similar to original training data.`
			`//`
Remove scripts for training They were replaced by Python3 scripts (part of the tesstrain repository). Signed-off-by: Stefan Weil <sw@weilnetz.de> 2021-05-18 16:47:44 +08:00			`// tesstrain.py --fonts_dir /usr/share/fonts --lang eng \`
Add LF to INFO msgs in lstm_test 2019-01-24 19:40:53 +08:00			`// --linedata_only --noextract_font_properties --langdata_dir ../langdata_lstm \`
			`// --tessdata_dir ../tessdata --output_dir ~/tesseract/test/testdata \`
			`// --fontlist "Arial" --maxpages 10`
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`//`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`#include "lstm_test.h"`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00
			`namespace tesseract {`

			`// Tests that some simple networks can learn Arial and meet accuracy targets.`
			`TEST_F(LSTMTrainerTest, BasicTest) {`
			`// A Convolver sliding window classifier without LSTM.`
			`SetupTrainer(`
			`"[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "`
			`"Ct1,1,64O1c1]",`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`"no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false, 2e-4, false, "eng");`
more updates to lstm related unittests 2019-01-24 23:39:37 +08:00			`double non_lstm_err = TrainIterations(kTrainerIterations * 4);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`EXPECT_LT(non_lstm_err, 98);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 98 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00
			`// A basic single-layer, single direction LSTM.`
			`SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);`
			`double lstm_uni_err = TrainIterations(kTrainerIterations * 2);`
			`EXPECT_LT(lstm_uni_err, 86);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 86 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Beats the convolver. (Although it does have a lot more weights, it still`
			`// iterates faster.)`
			`EXPECT_LT(lstm_uni_err, non_lstm_err);`
			`}`

			`// Color learns almost as fast as normalized grey/2D.`
			`TEST_F(LSTMTrainerTest, ColorTest) {`
			`// A basic single-layer, single direction LSTM.`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2D-color-lstm", true, true);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`double lstm_uni_err = TrainIterations(kTrainerIterations);`
			`EXPECT_LT(lstm_uni_err, 85);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`// EXPECT_GT(lstm_uni_err, 66);`
			`LOG(INFO) << "******** Expected < 85 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`}`

			`TEST_F(LSTMTrainerTest, BidiTest) {`
			`// A basic single-layer, bi-di 1d LSTM.`
			`SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);`
			`double lstm_bi_err = TrainIterations(kTrainerIterations);`
			`EXPECT_LT(lstm_bi_err, 75);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 75 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Int mode training is dead, so convert the trained network to int and check`
			`// that its error rate is close to the float version.`
			`TestIntMode(kTrainerIterations);`
			`}`

			`// Tests that a 2d-2-layer network learns correctly.`
			`// It takes a lot of iterations to get there.`
			`TEST_F(LSTMTrainerTest, Test2D) {`
			`// A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,`
			`false);`
			`double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`EXPECT_LT(lstm_2d_err, 98);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`// EXPECT_GT(lstm_2d_err, 90);`
			`LOG(INFO) << "******** Expected < 98 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Int mode training is dead, so convert the trained network to int and check`
			`// that its error rate is close to the float version.`
			`TestIntMode(kTrainerIterations);`
			`}`

			`// Tests that a 2d-2-layer network with Adam does a lot better than`
			`// without it.`
			`TEST_F(LSTMTrainerTest, TestAdam) {`
			`// A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,`
			`true);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`double lstm_2d_err = TrainIterations(kTrainerIterations);`
			`EXPECT_LT(lstm_2d_err, 70);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 70 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`TestIntMode(kTrainerIterations);`
			`}`

			`// Trivial test of training speed on a fairly complex network.`
			`TEST_F(LSTMTrainerTest, SpeedTest) {`
			`SetupTrainerEng(`
			`"[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "`
			`"O1c1]",`
			`"2-D-2-layer-lstm", false, true);`
			`TrainIterations(kTrainerIterations);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** * ************\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`}`

			`// Tests that two identical networks trained the same get the same results.`
			`// Also tests that the same happens with a serialize/deserialize in the middle.`
			`TEST_F(LSTMTrainerTest, DeterminismTest) {`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,`
			`false);`
more updates to lstm related unittests 2019-01-24 23:39:37 +08:00			`double lstm_2d_err_a = TrainIterations(kTrainerIterations);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`double act_error_a = trainer_->ActivationError();`
			`double char_error_a = trainer_->CharError();`
Use old genericvector.h Signed-off-by: Stefan Weil <sw@weilnetz.de> 2020-12-29 20:02:36 +08:00			`std::vector<char> trainer_a_data;`
[training] Modernize. 2021-03-15 04:45:57 +08:00			`EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, *trainer_, &trainer_a_data));`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,`
			`false);`
more updates to lstm related unittests 2019-01-24 23:39:37 +08:00			`double lstm_2d_err_b = TrainIterations(kTrainerIterations);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`double act_error_b = trainer_->ActivationError();`
			`double char_error_b = trainer_->CharError();`
			`EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);`
			`EXPECT_FLOAT_EQ(act_error_a, act_error_b);`
			`EXPECT_FLOAT_EQ(char_error_a, char_error_b);`
			`// Now train some more iterations.`
			`lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);`
			`act_error_b = trainer_->ActivationError();`
			`char_error_b = trainer_->CharError();`
			`// Unpack into a new trainer and train that some more too.`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]", "2-D-2-layer-lstm", false,`
			`false);`
[training] Modernize. 2021-03-15 04:45:57 +08:00			`EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, *trainer_));`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);`
			`act_error_a = trainer_->ActivationError();`
			`char_error_a = trainer_->CharError();`
			`EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);`
			`EXPECT_FLOAT_EQ(act_error_a, act_error_b);`
			`EXPECT_FLOAT_EQ(char_error_a, char_error_b);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** * ************\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`}`

			`// The baseline network against which to test the built-in softmax.`
			`TEST_F(LSTMTrainerTest, SoftmaxBaselineTest) {`
			`// A basic single-layer, single direction LSTM.`
			`SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);`
			`double lstm_uni_err = TrainIterations(kTrainerIterations * 2);`
			`EXPECT_LT(lstm_uni_err, 60);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`// EXPECT_GT(lstm_uni_err, 48);`
			`LOG(INFO) << "******** Expected < 60 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Check that it works in int mode too.`
			`TestIntMode(kTrainerIterations);`
			`// If we run TestIntMode again, it tests that int_mode networks can`
			`// serialize and deserialize correctly.`
			`double delta = TestIntMode(kTrainerIterations);`
			`// The two tests (both of int mode this time) should be almost identical.`
Add LF to INFO msgs in lstm_test 2019-01-24 19:40:53 +08:00			`LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`EXPECT_LT(delta, 0.01);`
			`}`

			`// Tests that the built-in softmax does better than the external one,`
			`// which has an error rate slightly less than 55%, as tested by`
			`// SoftmaxBaselineTest.`
			`TEST_F(LSTMTrainerTest, SoftmaxTest) {`
			`// LSTM with a built-in softmax can beat the external softmax.`
			`SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);`
			`double lstm_sm_err = TrainIterations(kTrainerIterations * 2);`
			`EXPECT_LT(lstm_sm_err, 49.0);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 49 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Check that it works in int mode too.`
			`TestIntMode(kTrainerIterations);`
			`}`

			`// Tests that the built-in encoded softmax does better than the external one.`
			`// It takes a lot of iterations to get there.`
			`TEST_F(LSTMTrainerTest, EncodedSoftmaxTest) {`
			`// LSTM with a built-in encoded softmax can beat the external softmax.`
			`SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);`
			`double lstm_sm_err = TrainIterations(kTrainerIterations * 2);`
			`EXPECT_LT(lstm_sm_err, 62.0);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 62 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Check that it works in int mode too.`
			`TestIntMode(kTrainerIterations);`
			`}`

			`// Tests that layer access methods work correctly.`
			`TEST_F(LSTMTrainerTest, TestLayerAccess) {`
			`// A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm", false, false);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Number of layers.`
Fix some compiler warnings Signed-off-by: Stefan Weil <sw@weilnetz.de> 2021-03-22 17:26:40 +08:00			`const size_t kNumLayers = 8;`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Expected layer names.`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`const char *kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2", ":3:0", ":4:0", ":4:1:0", ":5"};`
			`const char *kLayerNames[kNumLayers] = {"Input", "Convolve", "ConvNL", "Maxpool",`
			`"Lfys32", "Lbx128LTR", "Lbx128", "Output"};`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`// Expected number of weights.`
			`const int kNumWeights[kNumLayers] = {0,`
			`0,`
			`16 * (25 + 1),`
			`0,`
			`32 * (4 * (32 + 16 + 1)),`
			`128 * (4 * (128 + 32 + 1)),`
			`128 * (4 * (128 + 32 + 1)),`
			`112 * (2 * 128 + 1)};`

More std::vector. 2021-01-07 18:57:49 +08:00			`auto layers = trainer_->EnumerateLayers();`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`EXPECT_EQ(kNumLayers, layers.size());`
Fix some compiler warnings Signed-off-by: Stefan Weil <sw@weilnetz.de> 2021-03-22 17:26:40 +08:00			`for (unsigned i = 0; i < kNumLayers && i < layers.size(); ++i) {`
Remove member functions STRING::string and StringParam::string (continued) Commit 994ec697d8c347589c4afd9274f65d2f114b2d6e did not update unittest. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2019-09-25 16:07:51 +08:00			`EXPECT_STREQ(kLayerIds[i], layers[i].c_str());`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`EXPECT_STREQ(kLayerNames[i], trainer_->GetLayer(layers[i])->name().c_str());`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());`
			`}`
			`}`

[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`} // namespace tesseract.`