Merge pull request #3516 from stweil/abseil

Remove submodule abseil
This commit is contained in:
Egor Pugin 2021-08-07 15:05:29 +03:00 committed by GitHub
commit 3a68a80eed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 161 additions and 182 deletions

View File

@ -25,7 +25,7 @@ jobs:
run: |
brew install autoconf automake libarchive
brew install leptonica cairo pango
brew install cabextract abseil
brew install cabextract
- name: Setup
run: |

3
.gitmodules vendored
View File

@ -1,6 +1,3 @@
[submodule "abseil"]
path = abseil
url = https://github.com/abseil/abseil-cpp.git
[submodule "googletest"]
path = googletest
url = https://github.com/google/googletest.git

View File

@ -69,7 +69,7 @@ your question has been asked (and has been answered) many times before...
You should always make sure your changes build and run successfully.
For that, your clone needs to have all submodules (`abseil`, `googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure.
For that, your clone needs to have all submodules (`googletest`, `test`) included. To do so, either specify `--recurse-submodules` during the initial clone, or run `git submodule update --init --recursive NAME` for each `NAME` later. If `configure` already created those directories (blocking the clone), remove them first (or `make distclean`), then clone and reconfigure.
Have a look at [the README](./README.md) and [testing README](./test/testing/README.md) and the [documentation](https://tesseract-ocr.github.io/tessdoc/Compiling-%E2%80%93-GitInstallation.html#unit-test-builds) on installation.

View File

@ -1156,7 +1156,6 @@ unittest_CPPFLAGS += $(pangocairo_CFLAGS)
endif # ENABLE_TRAINING
unittest_CPPFLAGS += -I$(top_srcdir)/src/viewer
unittest_CPPFLAGS += -I$(top_srcdir)/src/wordrec
unittest_CPPFLAGS += -I$(top_srcdir)/abseil
if TENSORFLOW
unittest_CPPFLAGS += -DINCLUDE_TENSORFLOW
unittest_CPPFLAGS += -I$(top_srcdir)/unittest
@ -1170,37 +1169,6 @@ libgtest_la_CPPFLAGS = -I$(top_srcdir)/googletest/googletest/include -I$(top_src
libgtest_main_la_SOURCES = googletest/googletest/src/gtest_main.cc
libgtest_main_la_CPPFLAGS = $(libgtest_la_CPPFLAGS)
# Build Abseil (needed for some unit tests).
check_LTLIBRARIES += libabseil.la
libabseil_la_SOURCES =
libabseil_la_SOURCES += abseil/absl/base/internal/cycleclock.cc
libabseil_la_SOURCES += abseil/absl/base/internal/raw_logging.cc
libabseil_la_SOURCES += abseil/absl/base/internal/spinlock.cc
libabseil_la_SOURCES += abseil/absl/base/internal/spinlock_wait.cc
libabseil_la_SOURCES += abseil/absl/base/internal/sysinfo.cc
libabseil_la_SOURCES += abseil/absl/base/internal/throw_delegate.cc
libabseil_la_SOURCES += abseil/absl/base/internal/unscaledcycleclock.cc
libabseil_la_SOURCES += abseil/absl/numeric/int128.cc
libabseil_la_SOURCES += abseil/absl/strings/ascii.cc
libabseil_la_SOURCES += abseil/absl/strings/charconv.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_bigint.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/charconv_parse.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/memutil.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/arg.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/bind.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/extension.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/float_conversion.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/output.cc
libabseil_la_SOURCES += abseil/absl/strings/internal/str_format/parser.cc
libabseil_la_SOURCES += abseil/absl/strings/match.cc
libabseil_la_SOURCES += abseil/absl/strings/numbers.cc
libabseil_la_SOURCES += abseil/absl/strings/str_cat.cc
libabseil_la_SOURCES += abseil/absl/strings/str_split.cc
libabseil_la_SOURCES += abseil/absl/strings/string_view.cc
libabseil_la_SOURCES += abseil/absl/time/clock.cc
libabseil_la_SOURCES += abseil/absl/time/duration.cc
libabseil_la_CPPFLAGS = -I$(top_srcdir)/abseil
GMOCK_INCLUDES = -I$(top_srcdir)/googletest/googlemock/include \
-I$(top_srcdir)/googletest/googlemock \
-I$(top_srcdir)/googletest/googletest/include \
@ -1214,7 +1182,6 @@ libgmock_main_la_CPPFLAGS = $(GMOCK_INCLUDES) \
-pthread
# Build unittests
ABSEIL_LIBS = libabseil.la
GTEST_LIBS = libgtest.la libgtest_main.la -lpthread
GMOCK_LIBS = libgmock.la libgmock_main.la
TESS_LIBS = $(GTEST_LIBS)
@ -1336,12 +1303,11 @@ endif # !DISABLED_LEGACY_ENGINE
baseapi_test_SOURCES = unittest/baseapi_test.cc
baseapi_test_CPPFLAGS = $(unittest_CPPFLAGS)
baseapi_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
baseapi_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
baseapi_thread_test_SOURCES = unittest/baseapi_thread_test.cc
baseapi_thread_test_CPPFLAGS = $(unittest_CPPFLAGS)
baseapi_thread_test_LDADD = $(ABSEIL_LIBS)
baseapi_thread_test_LDADD += $(TESS_LIBS) $(LEPTONICA_LIBS)
baseapi_thread_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
if !DISABLED_LEGACY_ENGINE
bitvector_test_SOURCES = unittest/bitvector_test.cc
@ -1377,7 +1343,7 @@ endif # !DISABLED_LEGACY_ENGINE
fileio_test_SOURCES = unittest/fileio_test.cc
fileio_test_CPPFLAGS = $(unittest_CPPFLAGS)
fileio_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
fileio_test_LDADD = $(TRAINING_LIBS)
heap_test_SOURCES = unittest/heap_test.cc
heap_test_CPPFLAGS = $(unittest_CPPFLAGS)
@ -1385,7 +1351,7 @@ heap_test_LDADD = $(TESS_LIBS)
imagedata_test_SOURCES = unittest/imagedata_test.cc
imagedata_test_CPPFLAGS = $(unittest_CPPFLAGS)
imagedata_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
imagedata_test_LDADD = $(TRAINING_LIBS)
if !DISABLED_LEGACY_ENGINE
indexmapbidi_test_SOURCES = unittest/indexmapbidi_test.cc
@ -1411,7 +1377,7 @@ intsimdmatrix_test_LDADD = $(TESS_LIBS)
lang_model_test_SOURCES = unittest/lang_model_test.cc
lang_model_test_CPPFLAGS = $(unittest_CPPFLAGS)
lang_model_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
lang_model_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
layout_test_SOURCES = unittest/layout_test.cc
layout_test_CPPFLAGS = $(unittest_CPPFLAGS)
@ -1438,24 +1404,24 @@ loadlang_test_LDADD = $(TESS_LIBS) $(LEPTONICA_LIBS)
lstm_recode_test_SOURCES = unittest/lstm_recode_test.cc
lstm_recode_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstm_recode_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
lstm_recode_test_LDADD = $(TRAINING_LIBS)
lstm_squashed_test_SOURCES = unittest/lstm_squashed_test.cc
lstm_squashed_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstm_squashed_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
lstm_squashed_test_LDADD = $(TRAINING_LIBS)
lstm_test_SOURCES = unittest/lstm_test.cc
lstm_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstm_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
lstm_test_LDADD = $(TRAINING_LIBS)
lstmtrainer_test_SOURCES = unittest/lstmtrainer_test.cc
lstmtrainer_test_CPPFLAGS = $(unittest_CPPFLAGS)
lstmtrainer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
lstmtrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
if !DISABLED_LEGACY_ENGINE
mastertrainer_test_SOURCES = unittest/mastertrainer_test.cc
mastertrainer_test_CPPFLAGS = $(unittest_CPPFLAGS)
mastertrainer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
mastertrainer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
endif # !DISABLED_LEGACY_ENGINE
matrix_test_SOURCES = unittest/matrix_test.cc
@ -1472,7 +1438,7 @@ normstrngs_test_SOURCES += unittest/third_party/utf/rune.c
normstrngs_test_SOURCES += unittest/util/utf8/unilib.cc
endif # TENSORFLOW
normstrngs_test_CPPFLAGS = $(unittest_CPPFLAGS)
normstrngs_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
normstrngs_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
nthitem_test_SOURCES = unittest/nthitem_test.cc
nthitem_test_CPPFLAGS = $(unittest_CPPFLAGS)
@ -1495,14 +1461,14 @@ pango_font_info_test_SOURCES += unittest/util/utf8/unicodetext.cc
pango_font_info_test_SOURCES += unittest/util/utf8/unilib.cc
endif # TENSORFLOW
pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS)
pango_font_info_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
pango_font_info_test_LDADD += $(ICU_I18N_LIBS)
pango_font_info_test_LDADD += $(pangocairo_LIBS)
pango_font_info_test_LDADD += $(pangoft2_LIBS)
paragraphs_test_SOURCES = unittest/paragraphs_test.cc
paragraphs_test_CPPFLAGS = $(unittest_CPPFLAGS)
paragraphs_test_LDADD = $(ABSEIL_LIBS) $(TESS_LIBS)
paragraphs_test_LDADD = $(TESS_LIBS)
if !DISABLED_LEGACY_ENGINE
params_model_test_SOURCES = unittest/params_model_test.cc
@ -1517,11 +1483,11 @@ progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS)
qrsequence_test_SOURCES = unittest/qrsequence_test.cc
qrsequence_test_CPPFLAGS = $(unittest_CPPFLAGS)
qrsequence_test_LDADD = $(ABSEIL_LIBS) $(TESS_LIBS)
qrsequence_test_LDADD = $(TESS_LIBS)
recodebeam_test_SOURCES = unittest/recodebeam_test.cc
recodebeam_test_CPPFLAGS = $(unittest_CPPFLAGS)
recodebeam_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
recodebeam_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
rect_test_SOURCES = unittest/rect_test.cc
rect_test_CPPFLAGS = $(unittest_CPPFLAGS)
@ -1529,7 +1495,7 @@ rect_test_LDADD = $(TESS_LIBS)
resultiterator_test_SOURCES = unittest/resultiterator_test.cc
resultiterator_test_CPPFLAGS = $(unittest_CPPFLAGS)
resultiterator_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
resultiterator_test_LDADD = $(TRAINING_LIBS)
resultiterator_test_LDADD += $(LEPTONICA_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
scanutils_test_SOURCES = unittest/scanutils_test.cc
@ -1539,7 +1505,7 @@ scanutils_test_LDADD = $(TRAINING_LIBS)
if !DISABLED_LEGACY_ENGINE
shapetable_test_SOURCES = unittest/shapetable_test.cc
shapetable_test_CPPFLAGS = $(unittest_CPPFLAGS)
shapetable_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS)
shapetable_test_LDADD = $(TRAINING_LIBS)
endif # !DISABLED_LEGACY_ENGINE
stats_test_SOURCES = unittest/stats_test.cc
@ -1552,7 +1518,7 @@ stridemap_test_LDADD = $(TESS_LIBS)
stringrenderer_test_SOURCES = unittest/stringrenderer_test.cc
stringrenderer_test_CPPFLAGS = $(unittest_CPPFLAGS)
stringrenderer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
stringrenderer_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
stringrenderer_test_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
stringrenderer_test_LDADD += $(pangocairo_LIBS) $(pangoft2_LIBS)
stringrenderer_test_LDADD += $(cairo_LIBS) $(pango_LIBS)
@ -1580,7 +1546,7 @@ tatweel_test_LDADD = $(TRAINING_LIBS)
textlineprojection_test_SOURCES = unittest/textlineprojection_test.cc
textlineprojection_test_CPPFLAGS = $(unittest_CPPFLAGS)
textlineprojection_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS)
textlineprojection_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS)
tfile_test_SOURCES = unittest/tfile_test.cc
tfile_test_CPPFLAGS = $(unittest_CPPFLAGS)
@ -1592,7 +1558,7 @@ unichar_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS)
unicharcompress_test_SOURCES = unittest/unicharcompress_test.cc
unicharcompress_test_CPPFLAGS = $(unittest_CPPFLAGS)
unicharcompress_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_UC_LIBS)
unicharcompress_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS)
unicharset_test_SOURCES = unittest/unicharset_test.cc
unicharset_test_CPPFLAGS = $(unittest_CPPFLAGS)
@ -1600,19 +1566,19 @@ unicharset_test_LDADD = $(TRAINING_LIBS) $(ICU_UC_LIBS)
validate_grapheme_test_SOURCES = unittest/validate_grapheme_test.cc
validate_grapheme_test_CPPFLAGS = $(unittest_CPPFLAGS)
validate_grapheme_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_grapheme_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_indic_test_SOURCES = unittest/validate_indic_test.cc
validate_indic_test_CPPFLAGS = $(unittest_CPPFLAGS)
validate_indic_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_indic_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_khmer_test_SOURCES = unittest/validate_khmer_test.cc
validate_khmer_test_CPPFLAGS = $(unittest_CPPFLAGS)
validate_khmer_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_khmer_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_myanmar_test_SOURCES = unittest/validate_myanmar_test.cc
validate_myanmar_test_CPPFLAGS = $(unittest_CPPFLAGS)
validate_myanmar_test_LDADD = $(ABSEIL_LIBS) $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validate_myanmar_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS)
validator_test_SOURCES = unittest/validator_test.cc
validator_test_CPPFLAGS = $(unittest_CPPFLAGS)

1
abseil

@ -1 +0,0 @@
Subproject commit e1d388e7e74803050423d035e4374131b9b57919

1
sw.cpp
View File

@ -224,7 +224,6 @@ void build(Solution &s)
t += pango_training;
t += "org.sw.demo.google.googletest.gmock.main"_dep;
t += "org.sw.demo.google.googletest.gtest.main"_dep;
t += "org.sw.demo.google.abseil"_dep;
if (t.getCompilerType() == CompilerType::MSVC)
t.CompileOptions.push_back("-utf-8");

View File

@ -60,7 +60,7 @@
│   └── script
│   └── Latin.traineddata
└── tesseract
├── abseil
├── googletest
...
├── test
├── unittest

View File

@ -19,8 +19,6 @@
#include <tesseract/baseapi.h>
#include <allheaders.h>
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "gmock/gmock-matchers.h"
#include <memory>
@ -49,7 +47,7 @@ std::string GetCleanedTextResult(tesseract::TessBaseAPI *tess, Image pix) {
char *result = tess->GetUTF8Text();
std::string ocr_result = result;
delete[] result;
absl::StripAsciiWhitespace(&ocr_result);
trim(ocr_result);
return ocr_result;
}
@ -81,7 +79,7 @@ TEST_F(TesseractTest, BasicTesseractTest) {
ocr_text = GetCleanedTextResult(&api, src_pix);
CHECK_OK(
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
absl::StripAsciiWhitespace(&truth_text);
trim(truth_text);
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
src_pix.destroy();
} else {
@ -202,7 +200,7 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
Image src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
CHECK(src_pix);
ocr_text = GetCleanedTextResult(&api, src_pix);
absl::StripAsciiWhitespace(&truth_text);
trim(truth_text);
EXPECT_STREQ(kTestText[i], ocr_text.c_str());
src_pix.destroy();
}
@ -224,7 +222,7 @@ TEST_F(TesseractTest, BasicLSTMTest) {
ocr_text = GetCleanedTextResult(&api, src_pix);
CHECK_OK(
file::GetContents(TestDataNameToPath("phototest.gold.txt"), &truth_text, file::Defaults()));
absl::StripAsciiWhitespace(&truth_text);
trim(truth_text);
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
src_pix.destroy();
}
@ -323,7 +321,9 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
// Preload images and verify that OCR is correct on them individually.
std::vector<Image > pix(num_langs);
for (int i = 0; i < num_langs; ++i) {
SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
std::string tracestring = "Single instance test with lang = ";
tracestring += langs[i];
SCOPED_TRACE(tracestring);
std::string path = file::JoinPath(TESTING_DIR, image_files[i]);
pix[i] = pixRead(path.c_str());
QCHECK(pix[i] != nullptr) << "Could not read " << path;

View File

@ -28,7 +28,6 @@
#endif
#include <allheaders.h>
#include <tesseract/baseapi.h>
#include "absl/strings/ascii.h" // for absl::StripAsciiWhitespace
#include "commandlineflags.h"
#include "include_gunit.h"
#include "log.h"
@ -148,12 +147,12 @@ static void InitTessInstance(TessBaseAPI *tess, const std::string &lang) {
EXPECT_EQ(0, tess->Init(TESSDATA_DIR, lang.c_str()));
}
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string *ocr_text) {
static void GetCleanedText(TessBaseAPI *tess, Image pix, std::string &ocr_text) {
tess->SetImage(pix);
char *result = tess->GetUTF8Text();
*ocr_text = result;
ocr_text = result;
delete[] result;
absl::StripAsciiWhitespace(ocr_text);
trim(ocr_text);
}
static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &lang,
@ -166,7 +165,7 @@ static void VerifyTextResult(TessBaseAPI *tess, Image pix, const std::string &la
InitTessInstance(tess_local, lang);
}
std::string ocr_text;
GetCleanedText(tess_local, pix, &ocr_text);
GetCleanedText(tess_local, pix, ocr_text);
EXPECT_STREQ(expected_text.c_str(), ocr_text.c_str());
if (tess_local != tess) {
delete tess_local;
@ -180,7 +179,7 @@ TEST_F(BaseapiThreadTest, TestBasicSanity) {
TessBaseAPI tess;
InitTessInstance(&tess, langs_[i]);
std::string ocr_text;
GetCleanedText(&tess, pix_[i], &ocr_text);
GetCleanedText(&tess, pix_[i], ocr_text);
CHECK(strcmp(gt_text_[i].c_str(), ocr_text.c_str()) == 0) << "Failed with lang = " << langs_[i];
}
}

View File

@ -13,10 +13,16 @@
#ifndef TESSERACT_UNITTEST_CYCLETIMER_H
#define TESSERACT_UNITTEST_CYCLETIMER_H
#include "absl/time/clock.h" // for GetCurrentTimeNanos
#include <chrono> // for std::chrono
// See https://github.com/google/or-tools/blob/master/ortools/base/timer.h
class CycleTimer {
private:
static int64_t now() {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now().time_since_epoch()).count();
}
public:
CycleTimer() {
Reset();
@ -31,7 +37,7 @@ public:
// When Start() is called multiple times, only the most recent is used.
void Start() {
running_ = true;
start_ = absl::GetCurrentTimeNanos();
start_ = now();
}
void Restart() {
@ -41,17 +47,12 @@ public:
void Stop() {
if (running_) {
sum_ += absl::GetCurrentTimeNanos() - start_;
sum_ += now() - start_;
running_ = false;
}
}
int64_t GetInMs() const {
return GetNanos() / 1000000;
}
protected:
int64_t GetNanos() const {
return running_ ? absl::GetCurrentTimeNanos() - start_ + sum_ : sum_;
return running_ ? now() - start_ + sum_ : sum_;
}
private:

View File

@ -12,8 +12,6 @@
#include <stdio.h>
#include <memory>
#include "absl/strings/str_split.h"
#include "fileio.h"
#include "include_gunit.h"
@ -58,7 +56,7 @@ TEST(InputBufferTest, Read) {
std::string str;
auto input = std::make_unique<InputBuffer>(fp);
EXPECT_TRUE(input->Read(&str));
std::vector<std::string> lines = absl::StrSplit(str, '\n', absl::SkipEmpty());
std::vector<std::string> lines = split(str, '\n');
EXPECT_EQ(2, lines.size());
EXPECT_EQ("Hello", lines[0]);
EXPECT_EQ(" world!", lines[1]);

View File

@ -12,9 +12,6 @@
#include <string>
#include <vector>
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "imagedata.h"
#include "include_gunit.h"
#include "log.h"
@ -42,7 +39,9 @@ protected:
DocumentData write_doc("My document");
for (int p = 0; p < num_pages; ++p) {
// Make some fake text that is different for each page and save it.
page_texts->push_back(absl::StrFormat("Page %d of %d in doc %u", p, num_pages, doc_id));
char text[80];
snprintf(text, sizeof(text), "Page %d of %d in doc %u", p, num_pages, doc_id);
page_texts->push_back(text);
// Make an imagedata and put it in the document.
ImageData *imagedata = ImageData::Build("noname", p, "eng", fake_image.data(),
fake_image.size(), (*page_texts)[p].c_str(), nullptr);
@ -51,7 +50,8 @@ protected:
}
// Write it to a file.
std::string filename =
file::JoinPath(FLAGS_test_tmpdir, absl::StrCat("documentdata", doc_id, ".lstmf"));
file::JoinPath(FLAGS_test_tmpdir, "documentdata");
filename += std::to_string(doc_id) + ".lstmf";
EXPECT_TRUE(write_doc.SaveDocument(filename.c_str(), nullptr));
return filename;
}

View File

@ -20,6 +20,19 @@
const char *FLAGS_test_tmpdir = "./tmp";
namespace tesseract {
void trim(std::string &s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) {
return !std::isspace(ch);
}));
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
return !std::isspace(ch);
}).base(), s.end());
}
} // namespace tesseract
class file : public tesseract::File {
public:
static void MakeTmpdir() {

View File

@ -11,8 +11,6 @@
#include <string> // for std::string
#include "absl/strings/str_cat.h"
#include "gmock/gmock.h" // for testing::ElementsAreArray
#include "include_gunit.h"
@ -59,7 +57,7 @@ TEST(LangModelTest, AddACharacter) {
pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
nullptr));
// Init a trainer with it, and encode kTestString.
std::string traineddata1 = file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
LSTMTrainer trainer1;
trainer1.InitCharSet(traineddata1);
std::vector<int> labels1;
@ -79,7 +77,7 @@ TEST(LangModelTest, AddACharacter) {
pass_through_recoder, words, puncs, numbers, lang_is_rtl,
nullptr, nullptr));
// Init a trainer with it, and encode kTestString.
std::string traineddata2 = file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
LSTMTrainer trainer2;
trainer2.InitCharSet(traineddata2);
std::vector<int> labels2;
@ -144,7 +142,7 @@ TEST(LangModelTest, AddACharacterHindi) {
pass_through_recoder, words, puncs, numbers, lang_is_rtl, nullptr,
nullptr));
// Init a trainer with it, and encode kTestString.
std::string traineddata1 = file::JoinPath(output_dir, lang1, absl::StrCat(lang1, ".traineddata"));
std::string traineddata1 = file::JoinPath(output_dir, lang1, lang1) + ".traineddata";
LSTMTrainer trainer1;
trainer1.InitCharSet(traineddata1);
std::vector<int> labels1;
@ -164,7 +162,7 @@ TEST(LangModelTest, AddACharacterHindi) {
pass_through_recoder, words, puncs, numbers, lang_is_rtl,
nullptr, nullptr));
// Init a trainer with it, and encode kTestString.
std::string traineddata2 = file::JoinPath(output_dir, lang2, absl::StrCat(lang2, ".traineddata"));
std::string traineddata2 = file::JoinPath(output_dir, lang2, lang2) + ".traineddata";
LSTMTrainer trainer2;
trainer2.InitCharSet(traineddata2);
std::vector<int> labels2;

View File

@ -18,7 +18,6 @@
#include "include_gunit.h"
#include "absl/strings/str_cat.h"
#include "helpers.h"
#include "tprintf.h"
@ -81,7 +80,7 @@ protected:
std::string checkpoint_path = model_path + "_checkpoint";
trainer_ = std::make_unique<LSTMTrainer>(model_path.c_str(), checkpoint_path.c_str(), 0, 0);
trainer_->InitCharSet(
file::JoinPath(FLAGS_test_tmpdir, kLang, absl::StrCat(kLang, ".traineddata")));
file::JoinPath(FLAGS_test_tmpdir, kLang, kLang) + ".traineddata");
int net_mode = adam ? NF_ADAM : 0;
// Adam needs a higher learning rate, due to not multiplying the effective
// rate by 1/(1-momentum).

View File

@ -32,9 +32,6 @@
#include "trainingsample.h"
#include "unicharset.h"
#include "absl/strings/numbers.h" // for safe_strto32
#include "absl/strings/str_split.h" // for absl::StrSplit
#include <string>
#include <utility>
#include <vector>
@ -268,7 +265,7 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) {
shape_classifier.get(), &accuracy_report);
LOG(INFO) << accuracy_report.c_str();
std::string result_string = accuracy_report.c_str();
std::vector<std::string> results = absl::StrSplit(result_string, '\t', absl::SkipEmpty());
std::vector<std::string> results = split(result_string, '\t');
EXPECT_EQ(tesseract::CT_SIZE + 1, results.size());
int result_values[tesseract::CT_SIZE];
for (int i = 0; i < tesseract::CT_SIZE; ++i) {

View File

@ -11,7 +11,6 @@
#include "normstrngs.h"
#include <tesseract/unichar.h>
#include "absl/strings/str_format.h" // for absl::StrFormat
#include "include_gunit.h"
#include "normstrngs_test.h"
#ifdef INCLUDE_TENSORFLOW
@ -315,7 +314,9 @@ TEST(NormstrngsTest, IsWhitespace) {
EXPECT_TRUE(IsWhitespace('\n'));
// U+2000 through U+200A
for (char32 ch = 0x2000; ch <= 0x200A; ++ch) {
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
char text[80];
snprintf(text, sizeof(text), "Failed at U+%x", ch);
SCOPED_TRACE(text);
EXPECT_TRUE(IsWhitespace(ch));
}
// U+3000 is whitespace
@ -354,7 +355,9 @@ TEST(NormstrngsTest, IsInterchangeValid) {
const int32_t kMinUnicodeValue = 33;
const int32_t kMaxUnicodeValue = 0x10FFFF;
for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
char text[80];
snprintf(text, sizeof(text), "Failed at U+%x", ch);
SCOPED_TRACE(text);
EXPECT_EQ(UniLib::IsInterchangeValid(ch), IsInterchangeValid(ch));
}
#else
@ -369,7 +372,9 @@ TEST(NormstrngsTest, IsInterchangeValid7BitAscii) {
const int32_t kMinUnicodeValue = 33;
const int32_t kMaxUnicodeValue = 0x10FFFF;
for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
char text[80];
snprintf(text, sizeof(text), "Failed at U+%x", ch);
SCOPED_TRACE(text);
std::string str = EncodeAsUTF8(ch);
EXPECT_EQ(UniLib::IsInterchangeValid7BitAscii(str), IsInterchangeValid7BitAscii(ch));
}
@ -396,7 +401,9 @@ TEST(NormstrngsTest, FullwidthToHalfwidth) {
for (int32_t ch = kMinUnicodeValue; ch <= kMaxUnicodeValue; ++ch) {
if (!IsValidCodepoint(ch))
continue;
SCOPED_TRACE(absl::StrFormat("Failed at U+%x", ch));
char text[80];
snprintf(text, sizeof(text), "Failed at U+%x", ch);
SCOPED_TRACE(text);
std::string str = EncodeAsUTF8(ch);
const std::string expected_half_str =
UniLib::FullwidthToHalfwidth(str.c_str(), str.length(), true);

View File

@ -16,8 +16,6 @@
#include <sstream> // for std::stringstream
#include <string>
#include <vector>
#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
namespace tesseract {
@ -33,7 +31,9 @@ inline std::string CodepointList(const std::vector<char32> &str32) {
inline std::string PrintString32WithUnicodes(const std::string &str) {
std::vector<char32> str32 = UNICHAR::UTF8ToUTF32(str.c_str());
return absl::StrCat("\"", str, "\" ", CodepointList(str32));
std::string s = "\"";
s += "\" " + CodepointList(str32);
return s;
}
inline std::string PrintStringVectorWithUnicodes(const std::vector<std::string> &glyphs) {
@ -49,18 +49,30 @@ inline void ExpectGraphemeModeResults(const std::string &str, UnicodeNormMode u_
int unicode_count, int glyph_count, int grapheme_count,
const std::string &target_str) {
std::vector<std::string> glyphs;
std::string s;
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(
u_mode, OCRNorm::kNone, GraphemeNormMode::kIndividualUnicodes, true, str.c_str(), &glyphs));
EXPECT_EQ(glyphs.size(), unicode_count) << PrintStringVectorWithUnicodes(glyphs);
EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
for (auto &glyph : glyphs) {
s += glyph;
}
EXPECT_EQ(target_str, s);
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kGlyphSplit,
true, str.c_str(), &glyphs));
EXPECT_EQ(glyphs.size(), glyph_count) << PrintStringVectorWithUnicodes(glyphs);
EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
s.clear();
for (auto &glyph : glyphs) {
s += glyph;
}
EXPECT_EQ(target_str, s);
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kCombined,
true, str.c_str(), &glyphs));
EXPECT_EQ(glyphs.size(), grapheme_count) << PrintStringVectorWithUnicodes(glyphs);
EXPECT_EQ(target_str, absl::StrJoin(glyphs.begin(), glyphs.end(), ""));
s.clear();
for (auto &glyph : glyphs) {
s += glyph;
}
EXPECT_EQ(target_str, s);
EXPECT_TRUE(NormalizeCleanAndSegmentUTF8(u_mode, OCRNorm::kNone, GraphemeNormMode::kSingleString,
true, str.c_str(), &glyphs));
EXPECT_EQ(glyphs.size(), 1) << PrintStringVectorWithUnicodes(glyphs);

View File

@ -13,7 +13,6 @@
#include <pango/pango.h>
#include <cstdio>
#include <string>
#include "absl/strings/str_cat.h" // for absl::StrCat
#include "commandlineflags.h"
#include "fileio.h"
#include "gmock/gmock-matchers.h" // for EXPECT_THAT
@ -316,7 +315,8 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) {
// Check that none of the included fonts cover the Mongolian or Ogham space
// characters.
for (size_t f = 0; f < countof(kExpectedFontNames); ++f) {
SCOPED_TRACE(absl::StrCat("Testing ", kExpectedFontNames[f]));
std::string tracestring = "Testing " + kExpectedFontNames[f];
SCOPED_TRACE(tracestring);
FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], &unicode_mask);
# if 0 // TODO: check fails because DejaVu Sans Ultra-Light supports ogham
EXPECT_FALSE(unicode_mask[kOghamChar]);

View File

@ -11,10 +11,6 @@
#include <string> // for std::string
#include "absl/strings/str_cat.h" // for absl::StrCat
#include "absl/strings/str_join.h" // for absl::StrJoin
#include "absl/strings/str_split.h" // for absl::StrSplit
#include "include_gunit.h" // for TEST
#include "log.h" // for LOG
@ -62,7 +58,7 @@ void AsciiToRowInfo(const char *text, int row_number, RowInfo *info) {
info->lword_text = info->rword_text = "";
info->ltr = true;
std::vector<std::string> words = absl::StrSplit(text, ' ', absl::SkipEmpty());
std::vector<std::string> words = split(text, ' ');
info->num_words = words.size();
if (info->num_words < 1) {
return;
@ -156,10 +152,11 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
dbg_lines.emplace_back("# ==========================");
for (int i = 0; i < n; i++) {
if (correct[i].model_type != PCONT) {
dbg_lines.push_back(absl::StrCat(correct[i].ascii, " # ",
correct[i].model.ToString().c_str(),
correct[i].is_very_first_or_continuation ? " crown" : "",
correct[i].is_list_item ? " li" : ""));
std::string s = std::string(correct[i].ascii) + " # " +
correct[i].model.ToString() +
(correct[i].is_very_first_or_continuation ? " crown" : "") +
(correct[i].is_list_item ? " li" : "");
dbg_lines.push_back(s);
} else {
dbg_lines.emplace_back(correct[i].ascii);
}
@ -173,16 +170,21 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
if (i == 0 || (detector_output[i - 1] != detector_output[i])) {
if (detector_output[i] && detector_output[i]->model) {
annotation +=
absl::StrCat(" # ", detector_output[i]->model->ToString().c_str(),
detector_output[i]->is_very_first_or_continuation ? " crown" : "",
detector_output[i]->is_list_item ? " li" : "");
" # " + detector_output[i]->model->ToString() +
(detector_output[i]->is_very_first_or_continuation ? " crown" : "") +
(detector_output[i]->is_list_item ? " li" : "");
} else {
annotation = " # Unmodeled paragraph.";
}
}
dbg_lines.push_back(absl::StrCat(correct[i].ascii, annotation));
std::string s = correct[i].ascii + annotation;
dbg_lines.push_back(s);
}
LOG(INFO) << "Discrepancy!\n" << absl::StrJoin(dbg_lines, "\n");
std::string s;
for (auto &dbg_line : dbg_lines) {
s += dbg_line + "\n";
}
LOG(INFO) << "Discrepancy!\n" << s;
}
}

View File

@ -22,8 +22,6 @@
#include "helpers.h"
#include "absl/strings/str_format.h" // for absl::StrFormat
namespace tesseract {
// Number of characters to test beam search with.
@ -160,9 +158,9 @@ protected:
if (u_decoded.size() < truth_utf8.size()) {
const char *str = ccutil_.unicharset.id_to_unichar(unichar_ids[u]);
total_rating += ratings[u];
LOG(INFO) << absl::StrFormat("%d:u_id=%d=%s, c=%g, r=%g, r_sum=%g @%d", u, unichar_ids[u],
str, certainties[u], ratings[u], total_rating, xcoords[u])
<< "\n";
LOG(INFO) << u << ":u_id=" << unichar_ids[u] << "=" << str << ", c="
<< certainties[u] << ", r=" << ratings[u] << "r_sum="
<< total_rating << " @" << xcoords[u] << "\n";
if (str[0] == ' ') {
total_rating = 0.0f;
}
@ -184,11 +182,9 @@ protected:
}
w_decoded += word->best_choice->unichar_string().c_str();
}
LOG(INFO) << absl::StrFormat("Word:%d = %s, c=%g, r=%g, perm=%d", w,
word->best_choice->unichar_string().c_str(),
word->best_choice->certainty(), word->best_choice->rating(),
word->best_choice->permuter())
<< "\n";
LOG(INFO) << "Word:" << w << " = " << word->best_choice->unichar_string()
<< ", c=" << word->best_choice->certainty() << ", r=" << word->best_choice->rating()
<< ", perm=" << word->best_choice->permuter() << "\n";
}
std::string w_trunc(w_decoded.data(), truth_utf8.size());
if (truth_utf8 != w_trunc) {

View File

@ -5,7 +5,6 @@
#include <string>
#include "scrollview.h"
#include "absl/strings/str_format.h" // for absl::StrFormat
#include "include_gunit.h"
#include "log.h" // for LOG
@ -99,7 +98,7 @@ protected:
pixWrite(outfile.c_str(), pix, IFF_PNG);
}
pix.destroy();
LOG(INFO) << absl::StrFormat("At level %d: pix diff = %d\n", level, pixcount);
LOG(INFO) << "At level " << level << ": pix diff = " << pixcount << "\n";
EXPECT_LE(pixcount, max_diff);
// if (base::GetFlag(FLAGS_v) > 1) CHECK_LE(pixcount, max_diff);
}
@ -293,7 +292,8 @@ TEST_F(ResultIteratorTest, EasyTest) {
// Test baseline of the first line.
int x1, y1, x2, y2;
r_it->Baseline(tesseract::RIL_TEXTLINE, &x1, &y1, &x2, &y2);
LOG(INFO) << absl::StrFormat("Baseline (%d,%d)->(%d,%d)", x1, y1, x2, y2) << "\n";
LOG(INFO) << "Baseline ("
<< x1 << ',' << y1 << ")->(" << x2 << ',' << y2 << ")\n";
// Make sure we have a decent vector.
EXPECT_GE(x2, x1 + 400);
// The point 200,116 should be very close to the baseline.
@ -315,9 +315,9 @@ TEST_F(ResultIteratorTest, EasyTest) {
float confidence = r_it->Confidence(tesseract::RIL_WORD);
EXPECT_GE(confidence, 80.0f);
char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
LOG(INFO) << absl::StrFormat("Word %s in font %s, id %d, size %d, conf %g", word_str, font,
font_id, pointsize, confidence)
<< "\n";
LOG(INFO) << "Word " << word_str << " in font " << font
<< ", id " << font_id << ", size " << pointsize
<< ", conf " << confidence << "\n";
delete[] word_str;
EXPECT_FALSE(bold);
EXPECT_FALSE(italic);
@ -372,8 +372,8 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
&pointsize, &font_id);
char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
if (word_str != nullptr) {
LOG(INFO) << absl::StrFormat("Word %s is %s", word_str, smallcaps ? "SMALLCAPS" : "Normal")
<< "\n";
LOG(INFO) << "Word " << word_str
<< " is " << (smallcaps ? "SMALLCAPS" : "Normal") << "\n";
if (r_it->SymbolIsDropcap()) {
++found_dropcaps;
}
@ -392,7 +392,7 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
while (s_it.Next(tesseract::RIL_SYMBOL) && !s_it.IsAtBeginningOf(tesseract::RIL_WORD)) {
if (s_it.SymbolIsDropcap()) {
char *sym_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
LOG(ERROR) << absl::StrFormat("Symbol %s of word %s is dropcap", sym_str, word_str);
LOG(ERROR) << "Symbol " << sym_str << " of word " << word_str << " is dropcap";
delete[] sym_str;
}
EXPECT_FALSE(s_it.SymbolIsDropcap());
@ -433,8 +433,7 @@ TEST_F(ResultIteratorTest, SubSuperTest) {
result = r_it->GetUTF8Text(tesseract::RIL_SYMBOL);
if (strchr(kAllowedSupers, result[0]) == nullptr) {
char* word = r_it->GetUTF8Text(tesseract::RIL_WORD);
LOG(ERROR) << absl::StrFormat("Char %s in word %s is unexpected super!",
result, word);
LOG(ERROR) << "Char " << result << " in word " << word << " is unexpected super!";
delete [] word;
EXPECT_TRUE(strchr(kAllowedSupers, result[0]) != nullptr);
}
@ -445,8 +444,8 @@ TEST_F(ResultIteratorTest, SubSuperTest) {
}
} while (r_it->Next(tesseract::RIL_SYMBOL));
delete r_it;
LOG(INFO) << absl::StrFormat("Subs = %d, supers= %d, normal = %d",
found_subs, found_supers, found_normal) << "\n";
LOG(INFO) << "Subs = " << found_subs << ", supers= " << found_supers
<< ", normal = " << found_normal << "\n";
EXPECT_GE(found_subs, 25);
EXPECT_GE(found_supers, 25);
EXPECT_GE(found_normal, 1350);
@ -528,7 +527,7 @@ TEST_F(ResultIteratorTest, DISABLED_NonNullChoicesTest) {
do {
char *word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
if (word_str != nullptr) {
LOG(INFO) << absl::StrFormat("Word %s:", word_str) << "\n";
LOG(INFO) << "Word " << word_str << ":\n";
ResultIterator s_it = *r_it;
do {
tesseract::ChoiceIterator c_it(s_it);
@ -571,7 +570,7 @@ TEST_F(ResultIteratorTest, NonNullConfidencesTest) {
const char *char_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
CHECK(char_str != nullptr);
float confidence = s_it.Confidence(tesseract::RIL_SYMBOL);
LOG(INFO) << absl::StrFormat("Char %s has confidence %g\n", char_str, confidence);
LOG(INFO) << "Char " << char_str << " has confidence " << confidence << "\n";
delete[] char_str;
} while (!s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) &&
s_it.Next(tesseract::RIL_SYMBOL));

View File

@ -12,8 +12,6 @@
#include <string>
#include <utility>
#include "absl/strings/str_format.h" // for absl::StrFormat
#include "include_gunit.h"
#include "serialis.h"
@ -139,8 +137,9 @@ TEST_F(ShapeTableTest, FullTest) {
UNICHARSET unicharset;
unicharset.unichar_insert(" ");
for (int i = 1; i <= 10; ++i) {
std::string class_str = absl::StrFormat("class%d", i);
unicharset.unichar_insert(class_str.c_str());
char class_str[20];
snprintf(class_str, sizeof(class_str), "class%d", i);
unicharset.unichar_insert(class_str);
}
ShapeTable st(unicharset);
EXPECT_EQ(0, st.AddShape(3, 101));

View File

@ -17,7 +17,6 @@
#include "stringrenderer.h"
#include <allheaders.h>
#include "absl/strings/str_split.h" // for absl::StrSplit
#include <memory>
#include <string>
@ -348,7 +347,7 @@ TEST_F(StringRendererTest, DoesRenderWordBoxes) {
EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
pix.destroy();
// Verify #boxchars = #words + #spaces
std::vector<std::string> words = absl::StrSplit(kEngText, ' ', absl::SkipEmpty());
std::vector<std::string> words = split(kEngText, ' ');
const int kNumSpaces = words.size() - 1;
const int kExpectedNumBoxes = words.size() + kNumSpaces;
const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();
@ -371,8 +370,12 @@ TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) {
EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
pix.destroy();
// Verify #boxchars = #words + #spaces + #newlines
std::vector<std::string> words =
absl::StrSplit(kMultlineText, absl::ByAnyChar(" \n"), absl::SkipEmpty());
std::vector<std::string> words;
for (auto &line : split(kMultlineText, '\n')) {
for (auto &word : split(line, ' ')) {
words.push_back(word);
}
}
const int kNumSeparators = words.size() - 1;
const int kExpectedNumBoxes = words.size() + kNumSeparators;
const std::vector<BoxChar *> &boxchars = renderer_->GetBoxes();

View File

@ -12,7 +12,6 @@
#include <allheaders.h>
#include <string> // for std::string
#include "absl/strings/str_format.h" // for absl::StrFormat
#include "include_gunit.h"
#include <tesseract/baseapi.h>
@ -114,13 +113,12 @@ protected:
const char *message) {
int value = projection_->EvaluateBox(box, denorm_, false);
if (greater_or_equal != (value > target_value)) {
LOG(INFO) << absl::StrFormat(
"EvaluateBox too %s:%d vs %d for %s word '%s' at:", greater_or_equal ? "low" : "high",
value, target_value, message, text);
LOG(INFO) << "EvaluateBox too " << (greater_or_equal ? "low" : "high")
<< ":" << value << " vs " << target_value << " for " << message << " word '" << text << "' at:";
box.print();
value = projection_->EvaluateBox(box, denorm_, true);
} else {
LOG(INFO) << absl::StrFormat("EvaluateBox OK(%d) for %s word '%s'", value, message, text);
LOG(INFO) << "EvaluateBox OK(" << value << ") for " << message << " word '" << text << "'";
}
if (greater_or_equal) {
EXPECT_GE(value, target_value);
@ -136,14 +134,14 @@ protected:
int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
if (false_dist <= true_dist) {
LOG(INFO) << absl::StrFormat("Distance wrong:%d vs %d for %s word '%s' at:", false_dist,
true_dist, message, text);
LOG(INFO) << "Distance wrong:" << false_dist << " vs " << true_dist
<< " for " << message << " word '" << text << "' at:";
true_box.print();
projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
} else {
LOG(INFO) << absl::StrFormat("Distance OK(%d vs %d) for %s word '%s'", false_dist, true_dist,
message, text);
LOG(INFO) << "Distance OK(" << false_dist << " vs " << true_dist
<< ") for " << message << " word '" << text << "'";
}
}

View File

@ -12,9 +12,6 @@
#include <string>
#include <allheaders.h>
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "include_gunit.h"
#include "log.h" // for LOG
@ -46,7 +43,7 @@ protected:
compressed_.EncodeUnichar(null_char_, &code);
encoded_null_char_ = code(0);
std::string output_name =
file::JoinPath(FLAGS_test_tmpdir, absl::StrCat(unicharset_name, ".encoding.txt"));
file::JoinPath(FLAGS_test_tmpdir, unicharset_name) + ".encoding.txt";
std::string encoding = compressed_.GetEncodingAsString(unicharset_);
std::string encoding_str(&encoding[0], encoding.size());
CHECK_OK(file::SetContents(output_name, encoding_str, file::Defaults()));
@ -233,7 +230,7 @@ TEST_F(UnicharcompressTest, GetEncodingAsString) {
ExpectCorrect("trivial");
std::string encoding = compressed_.GetEncodingAsString(unicharset_);
std::string encoding_str(&encoding[0], encoding.length());
std::vector<std::string> lines = absl::StrSplit(encoding_str, "\n", absl::SkipEmpty());
std::vector<std::string> lines = split(encoding_str, '\n');
EXPECT_EQ(5, lines.size());
// The first line is always space.
EXPECT_EQ("0\t ", lines[0]);