unittest: Remove dependency on absl::StripAsciiWhitespace()

This removes the last dependency on Abseil, so that submodule
is now removed completely.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
Stefan Weil 2021-08-06 20:30:42 +02:00
parent 87707bb8b0
commit 49f410ced3
10 changed files with 33 additions and 55 deletions

View File

@ -25,7 +25,7 @@ jobs:
run: |
brew install autoconf automake libarchive
brew install leptonica cairo pango
brew install cabextract abseil
brew install cabextract
- name: Setup
run: |

3
.gitmodules vendored
View File

@ -1,6 +1,3 @@
[submodule "abseil"]
path = abseil
url = https://github.com/abseil/abseil-cpp.git
[submodule "googletest"]
path = googletest
url = https://github.com/google/googletest.git

View File

@ -69,7 +69,7 @@ your question has been asked (and has been answered) many times before...
You should always make sure your changes build and run successfully.
For that, your clone needs to have all submodules (`abseil`, `googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure.
For that, your clone needs to have all submodules (`googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure.
Have a look at [the README](./README.md) and [testing README](./test/testing/README.md) and the [documentation](https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation.html#unit-test-builds) on installation.

View File

@ -1156,7 +1156,6 @@ unittest_CPPFLAGS += $(pangocairo_CFLAGS)
endif # ENABLE_TRAINING
unittest_CPPFLAGS += -I$(top_srcdir)/src/viewer
unittest_CPPFLAGS += -I$(top_srcdir)/src/wordrec
unittest_CPPFLAGS += -I$(top_srcdir)/abseil
if TENSORFLOW
unittest_CPPFLAGS += -DINCLUDE_TENSORFLOW
unittest_CPPFLAGS += -I$(top_srcdir)/unittest
@ -1170,31 +1169,6 @@ libgtest_la_CPPFLAGS = -I$(top_srcdir)/googletest/googletest/include -I$(top_src
libgtest_main_la_SOURCES = googletest/googletest/src/gtest_main.cc
libgtest_main_la_CPPFLAGS = $(libgtest_la_CPPFLAGS)
# Build Abseil (needed for some unit tests).
check_LTLIBRARIES += libabseil.la
libabseil_la_SOURCES =
libabseil_la_SOURCES += abseil/absl/base/internal/raw_logging.cc
libabseil_la_SOURCES += abseil/absl/base/internal/spinlock.cc
libabseil_la_SOURCES += abseil/absl/base/internal/spinlock_wait.cc
libabseil_la_SOURCES += abseil/absl/base/internal/sysinfo.cc
libabseil_la_SOURCES += abseil/absl/base/internal/throw_delegate.cc
libabseil_la_SOURCES += abseil/absl/numeric/int128.cc
libabseil_la_SOURCES += abseil/absl/strings/ascii.cc
libabseil_la_SOURCES += abseil/absl/strings/charconv.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_bigint.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_parse.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/memutil.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/arg.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/bind.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/extension.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/float_conversion.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/output.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/parser.cc
libabseil_la_SOURCES += abseil/absl/strings/match.cc
libabseil_la_SOURCES += abseil/absl/strings/numbers.cc
libabseil_la_SOURCES += abseil/absl/strings/string_view.cc
libabseil_la_CPPFLAGS = -I$(top_srcdir)/abseil
GMOCK_INCLUDES = -I$(top_srcdir)/googletest/googlemock/include \
-I$(top_srcdir)/googletest/googlemock \
-I$(top_srcdir)/googletest/googletest/include \
@ -1208,7 +1182,6 @@ libgmock_main_la_CPPFLAGS = $(GMOCK_INCLUDES) \
-pthread
# Build unittests
ABSEIL_LIBS = libabseil.la
GTEST_LIBS = libgtest.la libgtest_main.la -lpthread
GMOCK_LIBS = libgmock.la libgmock_main.la
TESS_LIBS = $(GTEST_LIBS)
@ -1330,12 +1303,11 @@ endif # !DISABLED_LEGACY_ENGINE
baseapi_test_SOURCES = unittest/baseapi_test.cc
baseapi_test_CPPFLAGS = $(unittest_CPPFLAGS)
baseapi_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
baseapi_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
baseapi_thread_test_SOURCES = unittest/baseapi_thread_test.cc
baseapi_thread_test_CPPFLAGS = $(unittest_CPPFLAGS)
baseapi_thread_test_LDADD = $(ABSEIL_LIBS)
baseapi_thread_test_LDADD += $(TESS_LIBS) $(LEPTONICA_LIBS)
baseapi_thread_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
if !DISABLED_LEGACY_ENGINE
bitvector_test_SOURCES = unittest/bitvector_test.cc
@ -1432,19 +1404,19 @@ loadlang_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
lstm_recode_test_SOURCES = unittest/lstm_recode_test.cc
lstm_recode_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstm_recode_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
lstm_recode_test_LDADD = $(TRAINING_LIBS)
lstm_squashed_test_SOURCES = unittest/lstm_squashed_test.cc
lstm_squashed_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstm_squashed_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
lstm_squashed_test_LDADD = $(TRAINING_LIBS)
lstm_test_SOURCES = unittest/lstm_test.cc
lstm_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstm_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
lstm_test_LDADD = $(TRAINING_LIBS)
lstmtrainer_test_SOURCES = unittest/lstmtrainer_test.cc
lstmtrainer_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstmtrainer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
lstmtrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
if !DISABLED_LEGACY_ENGINE
mastertrainer_test_SOURCES = unittest/mastertrainer_test.cc
@ -1489,7 +1461,7 @@ pango_font_info_test_SOURCES += unittest/util/utf8/unicodetext.cc
pango_font_info_test_SOURCES += unittest/util/utf8/unilib.cc
endif # TENSORFLOW
pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS)
pango_font_info_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
pango_font_info_test_LDADD += $(ICU_I18N_LIBS)
pango_font_info_test_LDADD += $(pangocairo_LIBS)
pango_font_info_test_LDADD += $(pangoft2_LIBS)
@ -1511,7 +1483,7 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
qrsequence_test_SOURCES = unittest/qrsequence_test.cc
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
qrsequence_test_LDADD = $(ABSEIL_LIBS) $(TESS_LIBS)
qrsequence_test_LDADD = $(TESS_LIBS)
recodebeam_test_SOURCES = unittest/recodebeam_test.cc
recodebeam_test_CPPFLAGS = $(unittest_CPPFLAGS)

1
abseil

@ -1 +0,0 @@
Subproject commit e1d388e7e74803050423d035e4374131b9b57919

1
sw.cpp
View File

@ -222,7 +222,6 @@ void build(Solution &s)
t += pango_training;
t += "org.sw.demo.google.googletest.gmock.main"_dep;
t += "org.sw.demo.google.googletest.gtest.main"_dep;
t += "org.sw.demo.google.abseil"_dep;
if (t.getCompilerType() == CompilerType::MSVC)
t.CompileOptions.push_back("-utf-8");

View File

@ -60,7 +60,7 @@
│   └── script
│   └── Latin.traineddata
└── tesseract
├── abseil
├── googletest
...
├── test
├── unittest

View File

@ -19,7 +19,6 @@
#include <tesseract/baseapi.h>
#include <allheaders.h>
#include "absl/strings/ascii.h"
#include "gmock/gmock-matchers.h"
#include <memory>
@ -48,7 +47,7 @@ std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) {
char *result = tess->GetUTF8Text();
std::string ocr_result = result;
delete[] result;
absl::StripAsciiWhitespace(&ocr_result);
trim(ocr_result);
return ocr_result;
}
@ -80,7 +79,7 @@ TEST_F(TesseractTest, BasicTesseractTest) {
ocr_text = GetCleanedTextResult(&api, src_pix);
CHECK_OK(
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
absl::StripAsciiWhitespace(&truth_text);
trim(truth_text);
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
src_pix.destroy();
} else {
@ -201,7 +200,7 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
CHECK(src_pix);
ocr_text = GetCleanedTextResult(&api, src_pix);
absl::StripAsciiWhitespace(&truth_text);
trim(truth_text);
EXPECT_STREQ(kTestText[i], ocr_text.c_str());
src_pix.destroy();
}
@ -223,7 +222,7 @@ TEST_F(TesseractTest, BasicLSTMTest) {
ocr_text = GetCleanedTextResult(&api, src_pix);
CHECK_OK(
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
absl::StripAsciiWhitespace(&truth_text);
trim(truth_text);
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
src_pix.destroy();
}

View File

@ -28,7 +28,6 @@
#endif
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include "absl/strings/ascii.h" // for absl::StripAsciiWhitespace
#include "commandlineflags.h"
#include "include_gunit.h"
#include "log.h"
@ -148,12 +147,12 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) {
EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str()));
}
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) {
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string &ocr_text) {
tess->SetImage(pix);
char *result = tess->GetUTF8Text();
*ocr_text = result;
ocr_text = result;
delete[] result;
absl::StripAsciiWhitespace(ocr_text);
trim(ocr_text);
}
static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang,
@ -166,7 +165,7 @@ static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &la
InitTessInstance(tess_local, lang);
}
std::string ocr_text;
GetCleanedText(tess_local, pix, &ocr_text);
GetCleanedText(tess_local, pix, ocr_text);
EXPECT_STREQ(expected_text.c_str(), ocr_text.c_str());
if (tess_local != tess) {
delete tess_local;
@ -180,7 +179,7 @@ TEST_F(BaseapiThreadTest, TestBasicSanity) {
TessBaseAPI tess;
InitTessInstance(&tess, langs_[i]);
std::string ocr_text;
GetCleanedText(&tess, pix_[i], &ocr_text);
GetCleanedText(&tess, pix_[i], ocr_text);
CHECK(strcmp(gt_text_[i].c_str(), ocr_text.c_str()) == 0) << "Failed with lang = " << langs_[i];
}
}

View File

@ -20,6 +20,19 @@
const char *FLAGS_test_tmpdir = "./tmp";
namespace tesseract {
void trim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
return !std::isspace(ch);
}));
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
return !std::isspace(ch);
}).base(), s.end());
}
} // namespace tesseract
class file : public tesseract::File {
public:
static void MakeTmpdir() {