tesseract/unittest/lstm_recode_test.cc

// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "lstm_test.h"

namespace tesseract {

// Tests that training with unicharset recoding learns faster than without,
// for Korean. This test is split in two, so it can be run sharded.

TEST_F(LSTMTrainerTest, RecodeTestKorBase) {
  // A basic single-layer, bi-di 1d LSTM on Korean.
  SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",
               "kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");
  double kor_full_err = TrainIterations(kTrainerIterations * 2);
  EXPECT_LT(kor_full_err, 88);
  //  EXPECT_GT(kor_full_err, 85);
  LOG(INFO) << "********** Expected  < 88 ************\n";
}

TEST_F(LSTMTrainerTest, RecodeTestKor) {
  // A basic single-layer, bi-di 1d LSTM on Korean.
  SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",
               "kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");
  double kor_recode_err = TrainIterations(kTrainerIterations);
  EXPECT_LT(kor_recode_err, 60);
  LOG(INFO) << "********** Expected  < 60 ************\n";
}

// Tests that the given string encodes and decodes back to the same
// with both recode on and off for Korean.

TEST_F(LSTMTrainerTest, EncodeDecodeBothTestKor) {
  TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");
}

} // namespace tesseract.
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`// (C) Copyright 2017, Google Inc.`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`#include "lstm_test.h"`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00
			`namespace tesseract {`

			`// Tests that training with unicharset recoding learns faster than without,`
			`// for Korean. This test is split in two, so it can be run sharded.`
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`TEST_F(LSTMTrainerTest, RecodeTestKorBase) {`
			`// A basic single-layer, bi-di 1d LSTM on Korean.`
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-full", "kor/kor.unicharset",`
Partially fix and enable more unittests Add more subtests to langmodel_test Add more subtests to langmodel_test fix and enable lstmtrainer_test fix and enable some subtests from recodebeam_test partial fix for resultiterator_test fix typo removing the terminating linefeed. fix typo changes 2019-01-25 22:05:57 +08:00			`"kor.Arial_Unicode_MS.exp0.lstmf", false, true, 5e-4, false, "kor");`
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`double kor_full_err = TrainIterations(kTrainerIterations * 2);`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`EXPECT_LT(kor_full_err, 88);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`// EXPECT_GT(kor_full_err, 85);`
			`LOG(INFO) << "******** Expected < 88 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`}`

			`TEST_F(LSTMTrainerTest, RecodeTestKor) {`
			`// A basic single-layer, bi-di 1d LSTM on Korean.`
Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`SetupTrainer("[1,1,0,32 Lbx96 O1c1]", "kor-recode", "kor/kor.unicharset",`
Partially fix and enable more unittests Add more subtests to langmodel_test Add more subtests to langmodel_test fix and enable lstmtrainer_test fix and enable some subtests from recodebeam_test partial fix for resultiterator_test fix typo removing the terminating linefeed. fix typo changes 2019-01-25 22:05:57 +08:00			`"kor.Arial_Unicode_MS.exp0.lstmf", true, true, 5e-4, false, "kor");`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`double kor_recode_err = TrainIterations(kTrainerIterations);`
			`EXPECT_LT(kor_recode_err, 60);`
[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`LOG(INFO) << "******** Expected < 60 **********\n";`
Add more unittests from Google They were provided by Jeff Breidenbach <jbreiden@google.com>. Signed-off-by: Stefan Weil <sw@weilnetz.de> 2018-08-24 21:07:48 +08:00			`}`

Fix and enable lstm related unittests (#2180) * Fix and build lstm related unittests * Use ./tmp instead of ./ for files created by unittests 2019-01-24 15:01:19 +08:00			`// Tests that the given string encodes and decodes back to the same`
			`// with both recode on and off for Korean.`

			`TEST_F(LSTMTrainerTest, EncodeDecodeBothTestKor) {`
			`TestEncodeDecodeBoth("kor", "한국어 위키백과에 오신 것을 환영합니다!");`
			`}`

[clang-format] Format unit tests. 2021-03-13 05:06:34 +08:00			`} // namespace tesseract.`