mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
commit
3a68a80eed
2
.github/workflows/unittest-macos.yml
vendored
2
.github/workflows/unittest-macos.yml
vendored
@ -25,7 +25,7 @@ jobs:
|
||||
run: |
|
||||
brew install autoconf automake libarchive
|
||||
brew install leptonica cairo pango
|
||||
brew install cabextract abseil
|
||||
brew install cabextract
|
||||
|
||||
- name: Setup
|
||||
run: |
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,6 +1,3 @@
|
||||
[submodule "abseil"]
|
||||
path = abseil
|
||||
url = https://github.com/abseil/abseil-cpp.git
|
||||
[submodule "googletest"]
|
||||
path = googletest
|
||||
url = https://github.com/google/googletest.git
|
||||
|
@ -69,7 +69,7 @@ your question has been asked (and has been answered) many times before...
|
||||
|
||||
You should always make sure your changes build and run successfully.
|
||||
|
||||
For that, your clone needs to have all submodules (`abseil`, `googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure.
|
||||
For that, your clone needs to have all submodules (`googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure.
|
||||
|
||||
Have a look at [the README](./README.md) and [testing README](./test/testing/README.md) and the [documentation](https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation.html#unit-test-builds) on installation.
|
||||
|
||||
|
82
Makefile.am
82
Makefile.am
@ -1156,7 +1156,6 @@ unittest_CPPFLAGS += $(pangocairo_CFLAGS)
|
||||
endif # ENABLE_TRAINING
|
||||
unittest_CPPFLAGS += -I$(top_srcdir)/src/viewer
|
||||
unittest_CPPFLAGS += -I$(top_srcdir)/src/wordrec
|
||||
unittest_CPPFLAGS += -I$(top_srcdir)/abseil
|
||||
if TENSORFLOW
|
||||
unittest_CPPFLAGS += -DINCLUDE_TENSORFLOW
|
||||
unittest_CPPFLAGS += -I$(top_srcdir)/unittest
|
||||
@ -1170,37 +1169,6 @@ libgtest_la_CPPFLAGS = -I$(top_srcdir)/googletest/googletest/include -I$(top_src
|
||||
libgtest_main_la_SOURCES = googletest/googletest/src/gtest_main.cc
|
||||
libgtest_main_la_CPPFLAGS = $(libgtest_la_CPPFLAGS)
|
||||
|
||||
# Build Abseil (needed for some unit tests).
|
||||
check_LTLIBRARIES += libabseil.la
|
||||
libabseil_la_SOURCES =
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/cycleclock.cc
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/raw_logging.cc
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/spinlock.cc
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/spinlock_wait.cc
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/sysinfo.cc
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/throw_delegate.cc
|
||||
libabseil_la_SOURCES += abseil/absl/base/internal/unscaledcycleclock.cc
|
||||
libabseil_la_SOURCES += abseil/absl/numeric/int128.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/ascii.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/charconv.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_bigint.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_parse.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/memutil.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/arg.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/bind.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/extension.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/float_conversion.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/output.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/parser.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/match.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/numbers.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/str_cat.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/str_split.cc
|
||||
libabseil_la_SOURCES += abseil/absl/strings/string_view.cc
|
||||
libabseil_la_SOURCES += abseil/absl/time/clock.cc
|
||||
libabseil_la_SOURCES += abseil/absl/time/duration.cc
|
||||
libabseil_la_CPPFLAGS = -I$(top_srcdir)/abseil
|
||||
|
||||
GMOCK_INCLUDES = -I$(top_srcdir)/googletest/googlemock/include \
|
||||
-I$(top_srcdir)/googletest/googlemock \
|
||||
-I$(top_srcdir)/googletest/googletest/include \
|
||||
@ -1214,7 +1182,6 @@ libgmock_main_la_CPPFLAGS = $(GMOCK_INCLUDES) \
|
||||
-pthread
|
||||
|
||||
# Build unittests
|
||||
ABSEIL_LIBS = libabseil.la
|
||||
GTEST_LIBS = libgtest.la libgtest_main.la -lpthread
|
||||
GMOCK_LIBS = libgmock.la libgmock_main.la
|
||||
TESS_LIBS = $(GTEST_LIBS)
|
||||
@ -1336,12 +1303,11 @@ endif # !DISABLED_LEGACY_ENGINE
|
||||
|
||||
baseapi_test_SOURCES = unittest/baseapi_test.cc
|
||||
baseapi_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
baseapi_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
baseapi_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
baseapi_thread_test_SOURCES = unittest/baseapi_thread_test.cc
|
||||
baseapi_thread_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
baseapi_thread_test_LDADD = $(ABSEIL_LIBS)
|
||||
baseapi_thread_test_LDADD += $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
baseapi_thread_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
bitvector_test_SOURCES = unittest/bitvector_test.cc
|
||||
@ -1377,7 +1343,7 @@ endif # !DISABLED_LEGACY_ENGINE
|
||||
|
||||
fileio_test_SOURCES = unittest/fileio_test.cc
|
||||
fileio_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
fileio_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
fileio_test_LDADD = $(TRAINING_LIBS)
|
||||
|
||||
heap_test_SOURCES = unittest/heap_test.cc
|
||||
heap_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
@ -1385,7 +1351,7 @@ heap_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
imagedata_test_SOURCES = unittest/imagedata_test.cc
|
||||
imagedata_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
imagedata_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
imagedata_test_LDADD = $(TRAINING_LIBS)
|
||||
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
indexmapbidi_test_SOURCES = unittest/indexmapbidi_test.cc
|
||||
@ -1411,7 +1377,7 @@ intsimdmatrix_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
lang_model_test_SOURCES = unittest/lang_model_test.cc
|
||||
lang_model_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
lang_model_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
lang_model_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
layout_test_SOURCES = unittest/layout_test.cc
|
||||
layout_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
@ -1438,24 +1404,24 @@ loadlang_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
lstm_recode_test_SOURCES = unittest/lstm_recode_test.cc
|
||||
lstm_recode_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
lstm_recode_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
lstm_recode_test_LDADD = $(TRAINING_LIBS)
|
||||
|
||||
lstm_squashed_test_SOURCES = unittest/lstm_squashed_test.cc
|
||||
lstm_squashed_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
lstm_squashed_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
lstm_squashed_test_LDADD = $(TRAINING_LIBS)
|
||||
|
||||
lstm_test_SOURCES = unittest/lstm_test.cc
|
||||
lstm_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
lstm_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
lstm_test_LDADD = $(TRAINING_LIBS)
|
||||
|
||||
lstmtrainer_test_SOURCES = unittest/lstmtrainer_test.cc
|
||||
lstmtrainer_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
lstmtrainer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
lstmtrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
mastertrainer_test_SOURCES = unittest/mastertrainer_test.cc
|
||||
mastertrainer_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
mastertrainer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
mastertrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
endif # !DISABLED_LEGACY_ENGINE
|
||||
|
||||
matrix_test_SOURCES = unittest/matrix_test.cc
|
||||
@ -1472,7 +1438,7 @@ normstrngs_test_SOURCES += unittest/third_party/utf/rune.c
|
||||
normstrngs_test_SOURCES += unittest/util/utf8/unilib.cc
|
||||
endif # TENSORFLOW
|
||||
normstrngs_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
normstrngs_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
normstrngs_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
nthitem_test_SOURCES = unittest/nthitem_test.cc
|
||||
nthitem_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
@ -1495,14 +1461,14 @@ pango_font_info_test_SOURCES += unittest/util/utf8/unicodetext.cc
|
||||
pango_font_info_test_SOURCES += unittest/util/utf8/unilib.cc
|
||||
endif # TENSORFLOW
|
||||
pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
pango_font_info_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
pango_font_info_test_LDADD += $(ICU_I18N_LIBS)
|
||||
pango_font_info_test_LDADD += $(pangocairo_LIBS)
|
||||
pango_font_info_test_LDADD += $(pangoft2_LIBS)
|
||||
|
||||
paragraphs_test_SOURCES = unittest/paragraphs_test.cc
|
||||
paragraphs_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
paragraphs_test_LDADD = $(ABSEIL_LIBS) $(TESS_LIBS)
|
||||
paragraphs_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
params_model_test_SOURCES = unittest/params_model_test.cc
|
||||
@ -1517,11 +1483,11 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
qrsequence_test_SOURCES = unittest/qrsequence_test.cc
|
||||
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
qrsequence_test_LDADD = $(ABSEIL_LIBS) $(TESS_LIBS)
|
||||
qrsequence_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
recodebeam_test_SOURCES = unittest/recodebeam_test.cc
|
||||
recodebeam_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
recodebeam_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
recodebeam_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
rect_test_SOURCES = unittest/rect_test.cc
|
||||
rect_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
@ -1529,7 +1495,7 @@ rect_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
resultiterator_test_SOURCES = unittest/resultiterator_test.cc
|
||||
resultiterator_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
resultiterator_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
resultiterator_test_LDADD = $(TRAINING_LIBS)
|
||||
resultiterator_test_LDADD += $(LEPTONICA_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
scanutils_test_SOURCES = unittest/scanutils_test.cc
|
||||
@ -1539,7 +1505,7 @@ scanutils_test_LDADD = $(TRAINING_LIBS)
|
||||
if !DISABLED_LEGACY_ENGINE
|
||||
shapetable_test_SOURCES = unittest/shapetable_test.cc
|
||||
shapetable_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
shapetable_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
|
||||
shapetable_test_LDADD = $(TRAINING_LIBS)
|
||||
endif # !DISABLED_LEGACY_ENGINE
|
||||
|
||||
stats_test_SOURCES = unittest/stats_test.cc
|
||||
@ -1552,7 +1518,7 @@ stridemap_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
stringrenderer_test_SOURCES = unittest/stringrenderer_test.cc
|
||||
stringrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
stringrenderer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
stringrenderer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
stringrenderer_test_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
stringrenderer_test_LDADD += $(pangocairo_LIBS) $(pangoft2_LIBS)
|
||||
stringrenderer_test_LDADD += $(cairo_LIBS) $(pango_LIBS)
|
||||
@ -1580,7 +1546,7 @@ tatweel_test_LDADD = $(TRAINING_LIBS)
|
||||
|
||||
textlineprojection_test_SOURCES = unittest/textlineprojection_test.cc
|
||||
textlineprojection_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
textlineprojection_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
textlineprojection_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
|
||||
|
||||
tfile_test_SOURCES = unittest/tfile_test.cc
|
||||
tfile_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
@ -1592,7 +1558,7 @@ unichar_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
unicharcompress_test_SOURCES = unittest/unicharcompress_test.cc
|
||||
unicharcompress_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
unicharcompress_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_UC_LIBS)
|
||||
unicharcompress_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
unicharset_test_SOURCES = unittest/unicharset_test.cc
|
||||
unicharset_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
@ -1600,19 +1566,19 @@ unicharset_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
validate_grapheme_test_SOURCES = unittest/validate_grapheme_test.cc
|
||||
validate_grapheme_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
validate_grapheme_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
validate_grapheme_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
validate_indic_test_SOURCES = unittest/validate_indic_test.cc
|
||||
validate_indic_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
validate_indic_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
validate_indic_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
validate_khmer_test_SOURCES = unittest/validate_khmer_test.cc
|
||||
validate_khmer_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
validate_khmer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
validate_khmer_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
validate_myanmar_test_SOURCES = unittest/validate_myanmar_test.cc
|
||||
validate_myanmar_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
validate_myanmar_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
validate_myanmar_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
|
||||
|
||||
validator_test_SOURCES = unittest/validator_test.cc
|
||||
validator_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
|
1
abseil
1
abseil
@ -1 +0,0 @@
|
||||
Subproject commit e1d388e7e74803050423d035e4374131b9b57919
|
1
sw.cpp
1
sw.cpp
@ -224,7 +224,6 @@ void build(Solution &s)
|
||||
t += pango_training;
|
||||
t += "org.sw.demo.google.googletest.gmock.main"_dep;
|
||||
t += "org.sw.demo.google.googletest.gtest.main"_dep;
|
||||
t += "org.sw.demo.google.abseil"_dep;
|
||||
|
||||
if (t.getCompilerType() == CompilerType::MSVC)
|
||||
t.CompileOptions.push_back("-utf-8");
|
||||
|
@ -60,7 +60,7 @@
|
||||
│ └── script
|
||||
│ └── Latin.traineddata
|
||||
└── tesseract
|
||||
├── abseil
|
||||
├── googletest
|
||||
...
|
||||
├── test
|
||||
├── unittest
|
||||
|
@ -19,8 +19,6 @@
|
||||
#include <tesseract/baseapi.h>
|
||||
|
||||
#include <allheaders.h>
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "gmock/gmock-matchers.h"
|
||||
|
||||
#include <memory>
|
||||
@ -49,7 +47,7 @@ std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) {
|
||||
char *result = tess->GetUTF8Text();
|
||||
std::string ocr_result = result;
|
||||
delete[] result;
|
||||
absl::StripAsciiWhitespace(&ocr_result);
|
||||
trim(ocr_result);
|
||||
return ocr_result;
|
||||
}
|
||||
|
||||
@ -81,7 +79,7 @@ TEST_F(TesseractTest, BasicTesseractTest) {
|
||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||
CHECK_OK(
|
||||
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
||||
absl::StripAsciiWhitespace(&truth_text);
|
||||
trim(truth_text);
|
||||
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
||||
src_pix.destroy();
|
||||
} else {
|
||||
@ -202,7 +200,7 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
|
||||
Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
|
||||
CHECK(src_pix);
|
||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||
absl::StripAsciiWhitespace(&truth_text);
|
||||
trim(truth_text);
|
||||
EXPECT_STREQ(kTestText[i], ocr_text.c_str());
|
||||
src_pix.destroy();
|
||||
}
|
||||
@ -224,7 +222,7 @@ TEST_F(TesseractTest, BasicLSTMTest) {
|
||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||
CHECK_OK(
|
||||
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
|
||||
absl::StripAsciiWhitespace(&truth_text);
|
||||
trim(truth_text);
|
||||
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
||||
src_pix.destroy();
|
||||
}
|
||||
@ -323,7 +321,9 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
|
||||
// Preload images and verify that OCR is correct on them individually.
|
||||
std::vector<Image > pix(num_langs);
|
||||
for (int i = 0; i < num_langs; ++i) {
|
||||
SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
|
||||
std::string tracestring = "Single instance test with lang = ";
|
||||
tracestring += langs[i];
|
||||
SCOPED_TRACE(tracestring);
|
||||
std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
|
||||
pix[i] = pixRead(path.c_str());
|
||||
QCHECK(pix[i] != nullptr) << "Could not read " << path;
|
||||
|
@ -28,7 +28,6 @@
|
||||
#endif
|
||||
#include <allheaders.h>
|
||||
#include <tesseract/baseapi.h>
|
||||
#include "absl/strings/ascii.h" // for absl::StripAsciiWhitespace
|
||||
#include "commandlineflags.h"
|
||||
#include "include_gunit.h"
|
||||
#include "log.h"
|
||||
@ -148,12 +147,12 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) {
|
||||
EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str()));
|
||||
}
|
||||
|
||||
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) {
|
||||
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string &ocr_text) {
|
||||
tess->SetImage(pix);
|
||||
char *result = tess->GetUTF8Text();
|
||||
*ocr_text = result;
|
||||
ocr_text = result;
|
||||
delete[] result;
|
||||
absl::StripAsciiWhitespace(ocr_text);
|
||||
trim(ocr_text);
|
||||
}
|
||||
|
||||
static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang,
|
||||
@ -166,7 +165,7 @@ static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &la
|
||||
InitTessInstance(tess_local, lang);
|
||||
}
|
||||
std::string ocr_text;
|
||||
GetCleanedText(tess_local, pix, &ocr_text);
|
||||
GetCleanedText(tess_local, pix, ocr_text);
|
||||
EXPECT_STREQ(expected_text.c_str(), ocr_text.c_str());
|
||||
if (tess_local != tess) {
|
||||
delete tess_local;
|
||||
@ -180,7 +179,7 @@ TEST_F(BaseapiThreadTest, TestBasicSanity) {
|
||||
TessBaseAPI tess;
|
||||
InitTessInstance(&tess, langs_[i]);
|
||||
std::string ocr_text;
|
||||
GetCleanedText(&tess, pix_[i], &ocr_text);
|
||||
GetCleanedText(&tess, pix_[i], ocr_text);
|
||||
CHECK(strcmp(gt_text_[i].c_str(), ocr_text.c_str()) == 0) << "Failed with lang = " << langs_[i];
|
||||
}
|
||||
}
|
||||
|
@ -13,10 +13,16 @@
|
||||
#ifndef TESSERACT_UNITTEST_CYCLETIMER_H
|
||||
#define TESSERACT_UNITTEST_CYCLETIMER_H
|
||||
|
||||
#include "absl/time/clock.h" // for GetCurrentTimeNanos
|
||||
#include <chrono> // for std::chrono
|
||||
|
||||
// See https://github.com/google/or-tools/blob/master/ortools/base/timer.h
|
||||
class CycleTimer {
|
||||
private:
|
||||
static int64_t now() {
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::steady_clock::now().time_since_epoch()).count();
|
||||
}
|
||||
|
||||
public:
|
||||
CycleTimer() {
|
||||
Reset();
|
||||
@ -31,7 +37,7 @@ public:
|
||||
// When Start() is called multiple times, only the most recent is used.
|
||||
void Start() {
|
||||
running_ = true;
|
||||
start_ = absl::GetCurrentTimeNanos();
|
||||
start_ = now();
|
||||
}
|
||||
|
||||
void Restart() {
|
||||
@ -41,17 +47,12 @@ public:
|
||||
|
||||
void Stop() {
|
||||
if (running_) {
|
||||
sum_ += absl::GetCurrentTimeNanos() - start_;
|
||||
sum_ += now() - start_;
|
||||
running_ = false;
|
||||
}
|
||||
}
|
||||
int64_t GetInMs() const {
|
||||
return GetNanos() / 1000000;
|
||||
}
|
||||
|
||||
protected:
|
||||
int64_t GetNanos() const {
|
||||
return running_ ? absl::GetCurrentTimeNanos() - start_ + sum_ : sum_;
|
||||
return running_ ? now() - start_ + sum_ : sum_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -12,8 +12,6 @@
|
||||
#include <stdio.h>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/strings/str_split.h"
|
||||
|
||||
#include "fileio.h"
|
||||
#include "include_gunit.h"
|
||||
|
||||
@ -58,7 +56,7 @@ TEST(InputBufferTest, Read) {
|
||||
std::string str;
|
||||
auto input = std::make_unique<InputBuffer>(fp);
|
||||
EXPECT_TRUE(input->Read(&str));
|
||||
std::vector<std::string> lines = absl::StrSplit(str, '\n', absl::SkipEmpty());
|
||||
std::vector<std::string> lines = split(str, '\n');
|
||||
EXPECT_EQ(2, lines.size());
|
||||
EXPECT_EQ("Hello", lines[0]);
|
||||
EXPECT_EQ(" world!", lines[1]);
|
||||
|
@ -12,9 +12,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
|
||||
#include "imagedata.h"
|
||||
#include "include_gunit.h"
|
||||
#include "log.h"
|
||||
@ -42,7 +39,9 @@ protected:
|
||||
DocumentData write_doc("My document");
|
||||
for (int p = 0; p < num_pages; ++p) {
|
||||
// Make some fake text that is different for each page and save it.
|
||||
page_texts->push_back(absl::StrFormat("Page %d of %d in doc %u", p, num_pages, doc_id));
|
||||
char text[80];
|
||||
snprintf(text, sizeof(text), "Page %d of %d in doc %u", p, num_pages, doc_id);
|
||||
page_texts->push_back(text);
|
||||
// Make an imagedata and put it in the document.
|
||||
ImageData *imagedata = ImageData::Build("noname", p, "eng", fake_image.data(),
|
||||
fake_image.size(), (*page_texts)[p].c_str(), nullptr);
|
||||
@ -51,7 +50,8 @@ protected:
|
||||
}
|
||||
// Write it to a file.
|
||||
std::string filename =
|
||||
file::JoinPath(FLAGS_test_tmpdir, absl::StrCat("documentdata", doc_id, ".lstmf"));
|
||||
file::JoinPath(FLAGS_test_tmpdir, "documentdata");
|
||||
filename += std::to_string(doc_id) + ".lstmf";
|
||||
EXPECT_TRUE(write_doc.SaveDocument(filename.c_str(), nullptr));
|
||||
return filename;
|
||||
}
|
||||
|
@ -20,6 +20,19 @@
|
||||
|
||||
const char *FLAGS_test_tmpdir = "./tmp";
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
void trim(std::string &s) {
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
|
||||
return !std::isspace(ch);
|
||||
}));
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
|
||||
return !std::isspace(ch);
|
||||
}).base(), s.end());
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
class file : public tesseract::File {
|
||||
public:
|
||||
static void MakeTmpdir() {
|
||||
|
@ -11,8 +11,6 @@
|
||||
|
||||
#include <string> // for std::string
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
|
||||
#include "gmock/gmock.h" // for testing::ElementsAreArray
|
||||
|
||||
#include "include_gunit.h"
|
||||
@ -59,7 +57,7 @@ TEST(LangModelTest, AddACharacter) {
|
||||
pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
|
||||
nullptr));
|
||||
// Init a trainer with it, and encode kTestString.
|
||||
std::string traineddata1 = file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
|
||||
std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
|
||||
LSTMTrainer trainer1;
|
||||
trainer1.InitCharSet(traineddata1);
|
||||
std::vector<int> labels1;
|
||||
@ -79,7 +77,7 @@ TEST(LangModelTest, AddACharacter) {
|
||||
pass_through_recoder, words, puncs, numbers, lang_is_rtl,
|
||||
nullptr, nullptr));
|
||||
// Init a trainer with it, and encode kTestString.
|
||||
std::string traineddata2 = file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
|
||||
std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
|
||||
LSTMTrainer trainer2;
|
||||
trainer2.InitCharSet(traineddata2);
|
||||
std::vector<int> labels2;
|
||||
@ -144,7 +142,7 @@ TEST(LangModelTest, AddACharacterHindi) {
|
||||
pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
|
||||
nullptr));
|
||||
// Init a trainer with it, and encode kTestString.
|
||||
std::string traineddata1 = file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
|
||||
std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
|
||||
LSTMTrainer trainer1;
|
||||
trainer1.InitCharSet(traineddata1);
|
||||
std::vector<int> labels1;
|
||||
@ -164,7 +162,7 @@ TEST(LangModelTest, AddACharacterHindi) {
|
||||
pass_through_recoder, words, puncs, numbers, lang_is_rtl,
|
||||
nullptr, nullptr));
|
||||
// Init a trainer with it, and encode kTestString.
|
||||
std::string traineddata2 = file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
|
||||
std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
|
||||
LSTMTrainer trainer2;
|
||||
trainer2.InitCharSet(traineddata2);
|
||||
std::vector<int> labels2;
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
#include "include_gunit.h"
|
||||
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "helpers.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
@ -81,7 +80,7 @@ protected:
|
||||
std::string checkpoint_path = model_path + "_checkpoint";
|
||||
trainer_ = std::make_unique<LSTMTrainer>(model_path.c_str(), checkpoint_path.c_str(), 0, 0);
|
||||
trainer_->InitCharSet(
|
||||
file::JoinPath(FLAGS_test_tmpdir, kLang, absl::StrCat(kLang, ".traineddata")));
|
||||
file::JoinPath(FLAGS_test_tmpdir, kLang, kLang) + ".traineddata");
|
||||
int net_mode = adam ? NF_ADAM : 0;
|
||||
// Adam needs a higher learning rate, due to not multiplying the effective
|
||||
// rate by 1/(1-momentum).
|
||||
|
@ -32,9 +32,6 @@
|
||||
#include "trainingsample.h"
|
||||
#include "unicharset.h"
|
||||
|
||||
#include "absl/strings/numbers.h" // for safe_strto32
|
||||
#include "absl/strings/str_split.h" // for absl::StrSplit
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -268,7 +265,7 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) {
|
||||
shape_classifier.get(), &accuracy_report);
|
||||
LOG(INFO) << accuracy_report.c_str();
|
||||
std::string result_string = accuracy_report.c_str();
|
||||
std::vector<std::string> results = absl::StrSplit(result_string, '\t', absl::SkipEmpty());
|
||||
std::vector<std::string> results = split(result_string, '\t');
|
||||
EXPECT_EQ(tesseract::CT_SIZE + 1, results.size());
|
||||
int result_values[tesseract::CT_SIZE];
|
||||
for (int i = 0; i < tesseract::CT_SIZE; ++i) {
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
#include "normstrngs.h"
|
||||
#include <tesseract/unichar.h>
|
||||
#include "absl/strings/str_format.h" // for absl::StrFormat
|
||||
#include "include_gunit.h"
|
||||
#include "normstrngs_test.h"
|
||||
#ifdef INCLUDE_TENSORFLOW
|
||||
@ -315,7 +314,9 @@ TEST(NormstrngsTest, IsWhitespace) {
|
||||
EXPECT_TRUE(IsWhitespace('\n'));
|
||||
// U+2000 through U+200A
|
||||
for (char32 ch = 0x2000; ch <= 0x200A; ++ch) {
|
||||
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
|
||||
char text[80];
|
||||
snprintf(text, sizeof(text), "Failed at U+%x", ch);
|
||||
SCOPED_TRACE(text);
|
||||
EXPECT_TRUE(IsWhitespace(ch));
|
||||
}
|
||||
// U+3000 is whitespace
|
||||
@ -354,7 +355,9 @@ TEST(NormstrngsTest, IsInterchangeValid) {
|
||||
const int32_t kMinUnicodeValue = 33;
|
||||
const int32_t kMaxUnicodeValue = 0x10FFFF;
|
||||
for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
|
||||
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
|
||||
char text[80];
|
||||
snprintf(text, sizeof(text), "Failed at U+%x", ch);
|
||||
SCOPED_TRACE(text);
|
||||
EXPECT_EQ(UniLib::IsInterchangeValid(ch), IsInterchangeValid(ch));
|
||||
}
|
||||
#else
|
||||
@ -369,7 +372,9 @@ TEST(NormstrngsTest, IsInterchangeValid7BitAscii) {
|
||||
const int32_t kMinUnicodeValue = 33;
|
||||
const int32_t kMaxUnicodeValue = 0x10FFFF;
|
||||
for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
|
||||
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
|
||||
char text[80];
|
||||
snprintf(text, sizeof(text), "Failed at U+%x", ch);
|
||||
SCOPED_TRACE(text);
|
||||
std::string str = EncodeAsUTF8(ch);
|
||||
EXPECT_EQ(UniLib::IsInterchangeValid7BitAscii(str), IsInterchangeValid7BitAscii(ch));
|
||||
}
|
||||
@ -396,7 +401,9 @@ TEST(NormstrngsTest, FullwidthToHalfwidth) {
|
||||
for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
|
||||
if (!IsValidCodepoint(ch))
|
||||
continue;
|
||||
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
|
||||
char text[80];
|
||||
snprintf(text, sizeof(text), "Failed at U+%x", ch);
|
||||
SCOPED_TRACE(text);
|
||||
std::string str = EncodeAsUTF8(ch);
|
||||
const std::string expected_half_str =
|
||||
UniLib::FullwidthToHalfwidth(str.c_str(), str.length(), true);
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include <sstream> // for std::stringstream
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_join.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -33,7 +31,9 @@ inline std::string CodepointList(const std::vector<char32> &str32) {
|
||||
|
||||
inline std::string PrintString32WithUnicodes(const std::string &str) {
|
||||
std::vector<char32> str32 = UNICHAR::UTF8ToUTF32(str.c_str());
|
||||
return absl::StrCat("\"", str, "\" ", CodepointList(str32));
|
||||
std::string s = "\"";
|
||||
s += "\" " + CodepointList(str32);
|
||||
return s;
|
||||
}
|
||||
|
||||
inline std::string PrintStringVectorWithUnicodes(const std::vector<std::string> &glyphs) {
|
||||
@ -49,18 +49,30 @@ inline void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_
|
||||
int unicode_count, int glyph_count, int grapheme_count,
|
||||
const std::string &target_str) {
|
||||
std::vector<std::string> glyphs;
|
||||
std::string s;
|
||||
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(
|
||||
u_mode, OCRNorm::kNone, GraphemeNormMode::kIndividualUnicodes, true, str.c_str(), &glyphs));
|
||||
EXPECT_EQ(glyphs.size(), unicode_count) << PrintStringVectorWithUnicodes(glyphs);
|
||||
EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
|
||||
for (auto &glyph : glyphs) {
|
||||
s += glyph;
|
||||
}
|
||||
EXPECT_EQ(target_str, s);
|
||||
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kGlyphSplit,
|
||||
true, str.c_str(), &glyphs));
|
||||
EXPECT_EQ(glyphs.size(), glyph_count) << PrintStringVectorWithUnicodes(glyphs);
|
||||
EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
|
||||
s.clear();
|
||||
for (auto &glyph : glyphs) {
|
||||
s += glyph;
|
||||
}
|
||||
EXPECT_EQ(target_str, s);
|
||||
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kCombined,
|
||||
true, str.c_str(), &glyphs));
|
||||
EXPECT_EQ(glyphs.size(), grapheme_count) << PrintStringVectorWithUnicodes(glyphs);
|
||||
EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
|
||||
s.clear();
|
||||
for (auto &glyph : glyphs) {
|
||||
s += glyph;
|
||||
}
|
||||
EXPECT_EQ(target_str, s);
|
||||
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kSingleString,
|
||||
true, str.c_str(), &glyphs));
|
||||
EXPECT_EQ(glyphs.size(), 1) << PrintStringVectorWithUnicodes(glyphs);
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <pango/pango.h>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include "absl/strings/str_cat.h" // for absl::StrCat
|
||||
#include "commandlineflags.h"
|
||||
#include "fileio.h"
|
||||
#include "gmock/gmock-matchers.h" // for EXPECT_THAT
|
||||
@ -316,7 +315,8 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) {
|
||||
// Check that none of the included fonts cover the Mongolian or Ogham space
|
||||
// characters.
|
||||
for (size_t f = 0; f < countof(kExpectedFontNames); ++f) {
|
||||
SCOPED_TRACE(absl::StrCat("Testing ", kExpectedFontNames[f]));
|
||||
std::string tracestring = "Testing " + kExpectedFontNames[f];
|
||||
SCOPED_TRACE(tracestring);
|
||||
FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], &unicode_mask);
|
||||
# if 0 // TODO: check fails because DejaVu Sans Ultra-Light supports ogham
|
||||
EXPECT_FALSE(unicode_mask[kOghamChar]);
|
||||
|
@ -11,10 +11,6 @@
|
||||
|
||||
#include <string> // for std::string
|
||||
|
||||
#include "absl/strings/str_cat.h" // for absl::StrCat
|
||||
#include "absl/strings/str_join.h" // for absl::StrJoin
|
||||
#include "absl/strings/str_split.h" // for absl::StrSplit
|
||||
|
||||
#include "include_gunit.h" // for TEST
|
||||
#include "log.h" // for LOG
|
||||
|
||||
@ -62,7 +58,7 @@ void AsciiToRowInfo(const char *text, int row_number, RowInfo *info) {
|
||||
info->lword_text = info->rword_text = "";
|
||||
info->ltr = true;
|
||||
|
||||
std::vector<std::string> words = absl::StrSplit(text, ' ', absl::SkipEmpty());
|
||||
std::vector<std::string> words = split(text, ' ');
|
||||
info->num_words = words.size();
|
||||
if (info->num_words < 1) {
|
||||
return;
|
||||
@ -156,10 +152,11 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
dbg_lines.emplace_back("# ==========================");
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (correct[i].model_type != PCONT) {
|
||||
dbg_lines.push_back(absl::StrCat(correct[i].ascii, " # ",
|
||||
correct[i].model.ToString().c_str(),
|
||||
correct[i].is_very_first_or_continuation ? " crown" : "",
|
||||
correct[i].is_list_item ? " li" : ""));
|
||||
std::string s = std::string(correct[i].ascii) + " # " +
|
||||
correct[i].model.ToString() +
|
||||
(correct[i].is_very_first_or_continuation ? " crown" : "") +
|
||||
(correct[i].is_list_item ? " li" : "");
|
||||
dbg_lines.push_back(s);
|
||||
} else {
|
||||
dbg_lines.emplace_back(correct[i].ascii);
|
||||
}
|
||||
@ -173,16 +170,21 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
if (i == 0 || (detector_output[i - 1] != detector_output[i])) {
|
||||
if (detector_output[i] && detector_output[i]->model) {
|
||||
annotation +=
|
||||
absl::StrCat(" # ", detector_output[i]->model->ToString().c_str(),
|
||||
detector_output[i]->is_very_first_or_continuation ? " crown" : "",
|
||||
detector_output[i]->is_list_item ? " li" : "");
|
||||
" # " + detector_output[i]->model->ToString() +
|
||||
(detector_output[i]->is_very_first_or_continuation ? " crown" : "") +
|
||||
(detector_output[i]->is_list_item ? " li" : "");
|
||||
} else {
|
||||
annotation = " # Unmodeled paragraph.";
|
||||
}
|
||||
}
|
||||
dbg_lines.push_back(absl::StrCat(correct[i].ascii, annotation));
|
||||
std::string s = correct[i].ascii + annotation;
|
||||
dbg_lines.push_back(s);
|
||||
}
|
||||
LOG(INFO) << "Discrepancy!\n" << absl::StrJoin(dbg_lines, "\n");
|
||||
std::string s;
|
||||
for (auto &dbg_line : dbg_lines) {
|
||||
s += dbg_line + "\n";
|
||||
}
|
||||
LOG(INFO) << "Discrepancy!\n" << s;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -22,8 +22,6 @@
|
||||
|
||||
#include "helpers.h"
|
||||
|
||||
#include "absl/strings/str_format.h" // for absl::StrFormat
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Number of characters to test beam search with.
|
||||
@ -160,9 +158,9 @@ protected:
|
||||
if (u_decoded.size() < truth_utf8.size()) {
|
||||
const char *str = ccutil_.unicharset.id_to_unichar(unichar_ids[u]);
|
||||
total_rating += ratings[u];
|
||||
LOG(INFO) << absl::StrFormat("%d:u_id=%d=%s, c=%g, r=%g, r_sum=%g @%d", u, unichar_ids[u],
|
||||
str, certainties[u], ratings[u], total_rating, xcoords[u])
|
||||
<< "\n";
|
||||
LOG(INFO) << u << ":u_id=" << unichar_ids[u] << "=" << str << ", c="
|
||||
<< certainties[u] << ", r=" << ratings[u] << "r_sum="
|
||||
<< total_rating << " @" << xcoords[u] << "\n";
|
||||
if (str[0] == ' ') {
|
||||
total_rating = 0.0f;
|
||||
}
|
||||
@ -184,11 +182,9 @@ protected:
|
||||
}
|
||||
w_decoded += word->best_choice->unichar_string().c_str();
|
||||
}
|
||||
LOG(INFO) << absl::StrFormat("Word:%d = %s, c=%g, r=%g, perm=%d", w,
|
||||
word->best_choice->unichar_string().c_str(),
|
||||
word->best_choice->certainty(), word->best_choice->rating(),
|
||||
word->best_choice->permuter())
|
||||
<< "\n";
|
||||
LOG(INFO) << "Word:" << w << " = " << word->best_choice->unichar_string()
|
||||
<< ", c=" << word->best_choice->certainty() << ", r=" << word->best_choice->rating()
|
||||
<< ", perm=" << word->best_choice->permuter() << "\n";
|
||||
}
|
||||
std::string w_trunc(w_decoded.data(), truth_utf8.size());
|
||||
if (truth_utf8 != w_trunc) {
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <string>
|
||||
#include "scrollview.h"
|
||||
|
||||
#include "absl/strings/str_format.h" // for absl::StrFormat
|
||||
#include "include_gunit.h"
|
||||
#include "log.h" // for LOG
|
||||
|
||||
@ -99,7 +98,7 @@ protected:
|
||||
pixWrite(outfile.c_str(), pix, IFF_PNG);
|
||||
}
|
||||
pix.destroy();
|
||||
LOG(INFO) << absl::StrFormat("At level %d: pix diff = %d\n", level, pixcount);
|
||||
LOG(INFO) << "At level " << level << ": pix diff = " << pixcount << "\n";
|
||||
EXPECT_LE(pixcount, max_diff);
|
||||
// if (base::GetFlag(FLAGS_v) > 1) CHECK_LE(pixcount, max_diff);
|
||||
}
|
||||
@ -293,7 +292,8 @@ TEST_F(ResultIteratorTest, EasyTest) {
|
||||
// Test baseline of the first line.
|
||||
int x1, y1, x2, y2;
|
||||
r_it->Baseline(tesseract::RIL_TEXTLINE, &x1, &y1, &x2, &y2);
|
||||
LOG(INFO) << absl::StrFormat("Baseline (%d,%d)->(%d,%d)", x1, y1, x2, y2) << "\n";
|
||||
LOG(INFO) << "Baseline ("
|
||||
<< x1 << ',' << y1 << ")->(" << x2 << ',' << y2 << ")\n";
|
||||
// Make sure we have a decent vector.
|
||||
EXPECT_GE(x2, x1 + 400);
|
||||
// The point 200,116 should be very close to the baseline.
|
||||
@ -315,9 +315,9 @@ TEST_F(ResultIteratorTest, EasyTest) {
|
||||
float confidence = r_it->Confidence(tesseract::RIL_WORD);
|
||||
EXPECT_GE(confidence, 80.0f);
|
||||
char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
|
||||
LOG(INFO) << absl::StrFormat("Word %s in font %s, id %d, size %d, conf %g", word_str, font,
|
||||
font_id, pointsize, confidence)
|
||||
<< "\n";
|
||||
LOG(INFO) << "Word " << word_str << " in font " << font
|
||||
<< ", id " << font_id << ", size " << pointsize
|
||||
<< ", conf " << confidence << "\n";
|
||||
delete[] word_str;
|
||||
EXPECT_FALSE(bold);
|
||||
EXPECT_FALSE(italic);
|
||||
@ -372,8 +372,8 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
|
||||
&pointsize, &font_id);
|
||||
char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
|
||||
if (word_str != nullptr) {
|
||||
LOG(INFO) << absl::StrFormat("Word %s is %s", word_str, smallcaps ? "SMALLCAPS" : "Normal")
|
||||
<< "\n";
|
||||
LOG(INFO) << "Word " << word_str
|
||||
<< " is " << (smallcaps ? "SMALLCAPS" : "Normal") << "\n";
|
||||
if (r_it->SymbolIsDropcap()) {
|
||||
++found_dropcaps;
|
||||
}
|
||||
@ -392,7 +392,7 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
|
||||
while (s_it.Next(tesseract::RIL_SYMBOL) && !s_it.IsAtBeginningOf(tesseract::RIL_WORD)) {
|
||||
if (s_it.SymbolIsDropcap()) {
|
||||
char *sym_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
|
||||
LOG(ERROR) << absl::StrFormat("Symbol %s of word %s is dropcap", sym_str, word_str);
|
||||
LOG(ERROR) << "Symbol " << sym_str << " of word " << word_str << " is dropcap";
|
||||
delete[] sym_str;
|
||||
}
|
||||
EXPECT_FALSE(s_it.SymbolIsDropcap());
|
||||
@ -433,8 +433,7 @@ TEST_F(ResultIteratorTest, SubSuperTest) {
|
||||
result = r_it->GetUTF8Text(tesseract::RIL_SYMBOL);
|
||||
if (strchr(kAllowedSupers, result[0]) == nullptr) {
|
||||
char* word = r_it->GetUTF8Text(tesseract::RIL_WORD);
|
||||
LOG(ERROR) << absl::StrFormat("Char %s in word %s is unexpected super!",
|
||||
result, word);
|
||||
LOG(ERROR) << "Char " << result << " in word " << word << " is unexpected super!";
|
||||
delete [] word;
|
||||
EXPECT_TRUE(strchr(kAllowedSupers, result[0]) != nullptr);
|
||||
}
|
||||
@ -445,8 +444,8 @@ TEST_F(ResultIteratorTest, SubSuperTest) {
|
||||
}
|
||||
} while (r_it->Next(tesseract::RIL_SYMBOL));
|
||||
delete r_it;
|
||||
LOG(INFO) << absl::StrFormat("Subs = %d, supers= %d, normal = %d",
|
||||
found_subs, found_supers, found_normal) << "\n";
|
||||
LOG(INFO) << "Subs = " << found_subs << ", supers= " << found_supers
|
||||
<< ", normal = " << found_normal << "\n";
|
||||
EXPECT_GE(found_subs, 25);
|
||||
EXPECT_GE(found_supers, 25);
|
||||
EXPECT_GE(found_normal, 1350);
|
||||
@ -528,7 +527,7 @@ TEST_F(ResultIteratorTest, DISABLED_NonNullChoicesTest) {
|
||||
do {
|
||||
char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
|
||||
if (word_str != nullptr) {
|
||||
LOG(INFO) << absl::StrFormat("Word %s:", word_str) << "\n";
|
||||
LOG(INFO) << "Word " << word_str << ":\n";
|
||||
ResultIterator s_it = *r_it;
|
||||
do {
|
||||
tesseract::ChoiceIterator c_it(s_it);
|
||||
@ -571,7 +570,7 @@ TEST_F(ResultIteratorTest, NonNullConfidencesTest) {
|
||||
const char *char_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
|
||||
CHECK(char_str != nullptr);
|
||||
float confidence = s_it.Confidence(tesseract::RIL_SYMBOL);
|
||||
LOG(INFO) << absl::StrFormat("Char %s has confidence %g\n", char_str, confidence);
|
||||
LOG(INFO) << "Char " << char_str << " has confidence " << confidence << "\n";
|
||||
delete[] char_str;
|
||||
} while (!s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) &&
|
||||
s_it.Next(tesseract::RIL_SYMBOL));
|
||||
|
@ -12,8 +12,6 @@
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/strings/str_format.h" // for absl::StrFormat
|
||||
|
||||
#include "include_gunit.h"
|
||||
|
||||
#include "serialis.h"
|
||||
@ -139,8 +137,9 @@ TEST_F(ShapeTableTest, FullTest) {
|
||||
UNICHARSET unicharset;
|
||||
unicharset.unichar_insert(" ");
|
||||
for (int i = 1; i <= 10; ++i) {
|
||||
std::string class_str = absl::StrFormat("class%d", i);
|
||||
unicharset.unichar_insert(class_str.c_str());
|
||||
char class_str[20];
|
||||
snprintf(class_str, sizeof(class_str), "class%d", i);
|
||||
unicharset.unichar_insert(class_str);
|
||||
}
|
||||
ShapeTable st(unicharset);
|
||||
EXPECT_EQ(0, st.AddShape(3, 101));
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "stringrenderer.h"
|
||||
|
||||
#include <allheaders.h>
|
||||
#include "absl/strings/str_split.h" // for absl::StrSplit
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@ -348,7 +347,7 @@ TEST_F(StringRendererTest, DoesRenderWordBoxes) {
|
||||
EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
|
||||
pix.destroy();
|
||||
// Verify #boxchars = #words + #spaces
|
||||
std::vector<std::string> words = absl::StrSplit(kEngText, ' ', absl::SkipEmpty());
|
||||
std::vector<std::string> words = split(kEngText, ' ');
|
||||
const int kNumSpaces = words.size() - 1;
|
||||
const int kExpectedNumBoxes = words.size() + kNumSpaces;
|
||||
const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
|
||||
@ -371,8 +370,12 @@ TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) {
|
||||
EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
|
||||
pix.destroy();
|
||||
// Verify #boxchars = #words + #spaces + #newlines
|
||||
std::vector<std::string> words =
|
||||
absl::StrSplit(kMultlineText, absl::ByAnyChar(" \n"), absl::SkipEmpty());
|
||||
std::vector<std::string> words;
|
||||
for (auto &line : split(kMultlineText, '\n')) {
|
||||
for (auto &word : split(line, ' ')) {
|
||||
words.push_back(word);
|
||||
}
|
||||
}
|
||||
const int kNumSeparators = words.size() - 1;
|
||||
const int kExpectedNumBoxes = words.size() + kNumSeparators;
|
||||
const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <allheaders.h>
|
||||
#include <string> // for std::string
|
||||
|
||||
#include "absl/strings/str_format.h" // for absl::StrFormat
|
||||
#include "include_gunit.h"
|
||||
|
||||
#include <tesseract/baseapi.h>
|
||||
@ -114,13 +113,12 @@ protected:
|
||||
const char *message) {
|
||||
int value = projection_->EvaluateBox(box, denorm_, false);
|
||||
if (greater_or_equal != (value > target_value)) {
|
||||
LOG(INFO) << absl::StrFormat(
|
||||
"EvaluateBox too %s:%d vs %d for %s word '%s' at:", greater_or_equal ? "low" : "high",
|
||||
value, target_value, message, text);
|
||||
LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
|
||||
<< ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
|
||||
box.print();
|
||||
value = projection_->EvaluateBox(box, denorm_, true);
|
||||
} else {
|
||||
LOG(INFO) << absl::StrFormat("EvaluateBox OK(%d) for %s word '%s'", value, message, text);
|
||||
LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
|
||||
}
|
||||
if (greater_or_equal) {
|
||||
EXPECT_GE(value, target_value);
|
||||
@ -136,14 +134,14 @@ protected:
|
||||
int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
|
||||
int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
|
||||
if (false_dist <= true_dist) {
|
||||
LOG(INFO) << absl::StrFormat("Distance wrong:%d vs %d for %s word '%s' at:", false_dist,
|
||||
true_dist, message, text);
|
||||
LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
|
||||
<< " for " << message << " word '" << text << "' at:";
|
||||
true_box.print();
|
||||
projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
|
||||
projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
|
||||
} else {
|
||||
LOG(INFO) << absl::StrFormat("Distance OK(%d vs %d) for %s word '%s'", false_dist, true_dist,
|
||||
message, text);
|
||||
LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
|
||||
<< ") for " << message << " word '" << text << "'";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,9 +12,6 @@
|
||||
#include <string>
|
||||
|
||||
#include <allheaders.h>
|
||||
#include "absl/strings/ascii.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
|
||||
#include "include_gunit.h"
|
||||
#include "log.h" // for LOG
|
||||
@ -46,7 +43,7 @@ protected:
|
||||
compressed_.EncodeUnichar(null_char_, &code);
|
||||
encoded_null_char_ = code(0);
|
||||
std::string output_name =
|
||||
file::JoinPath(FLAGS_test_tmpdir, absl::StrCat(unicharset_name, ".encoding.txt"));
|
||||
file::JoinPath(FLAGS_test_tmpdir, unicharset_name) + ".encoding.txt";
|
||||
std::string encoding = compressed_.GetEncodingAsString(unicharset_);
|
||||
std::string encoding_str(&encoding[0], encoding.size());
|
||||
CHECK_OK(file::SetContents(output_name, encoding_str, file::Defaults()));
|
||||
@ -233,7 +230,7 @@ TEST_F(UnicharcompressTest, GetEncodingAsString) {
|
||||
ExpectCorrect("trivial");
|
||||
std::string encoding = compressed_.GetEncodingAsString(unicharset_);
|
||||
std::string encoding_str(&encoding[0], encoding.length());
|
||||
std::vector<std::string> lines = absl::StrSplit(encoding_str, "\n", absl::SkipEmpty());
|
||||
std::vector<std::string> lines = split(encoding_str, '\n');
|
||||
EXPECT_EQ(5, lines.size());
|
||||
// The first line is always space.
|
||||
EXPECT_EQ("0\t ", lines[0]);
|
||||
|
Loading…
Reference in New Issue
Block a user