mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-18 06:30:14 +08:00
unittest: Format code
It was formatted with clang-format-7 -i unittest/*.{c*,h}. Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
0f3206d5fe
commit
9e66fb918f
@ -2,7 +2,8 @@
|
||||
// File: apiexample_test.cc
|
||||
// Description: Api Test for Tesseract using text fixtures and parameters.
|
||||
// Tests for Devanagari, Latin and Arabic scripts are disabled by default.
|
||||
// Disabled tests can be run when required by using the --gtest_also_run_disabled_tests argument.
|
||||
// Disabled tests can be run when required by using the
|
||||
// --gtest_also_run_disabled_tests argument.
|
||||
// ./unittest/apiexample_test --gtest_also_run_disabled_tests
|
||||
//
|
||||
// Author: ShreeDevi Kumar
|
||||
@ -21,91 +22,89 @@
|
||||
// expects clone of tessdata_fast repo in ../../tessdata_fast
|
||||
|
||||
//#include "log.h"
|
||||
#include "include_gunit.h"
|
||||
#include "baseapi.h"
|
||||
#include "leptonica/allheaders.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <locale>
|
||||
#include <limits.h>
|
||||
#include <time.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include "baseapi.h"
|
||||
#include "include_gunit.h"
|
||||
#include "leptonica/allheaders.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class QuickTest : public testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
start_time_ = time(nullptr);
|
||||
}
|
||||
virtual void SetUp() { start_time_ = time(nullptr); }
|
||||
virtual void TearDown() {
|
||||
const time_t end_time = time(nullptr);
|
||||
EXPECT_TRUE(end_time - start_time_ <=55) << "The test took too long - " << ::testing::PrintToString(end_time - start_time_);
|
||||
EXPECT_TRUE(end_time - start_time_ <= 55)
|
||||
<< "The test took too long - "
|
||||
<< ::testing::PrintToString(end_time - start_time_);
|
||||
}
|
||||
time_t start_time_;
|
||||
};
|
||||
};
|
||||
|
||||
void OCRTester(const char* imgname, const char* groundtruth, const char* tessdatadir, const char* lang) {
|
||||
//log.info() << tessdatadir << " for language: " << lang << std::endl;
|
||||
char *outText;
|
||||
std::locale loc("C"); // You can also use "" for the default system locale
|
||||
std::ifstream file(groundtruth);
|
||||
file.imbue(loc); // Use it for file input
|
||||
std::string gtText((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
||||
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
||||
Pix *image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
outText = api->GetUTF8Text();
|
||||
EXPECT_EQ(gtText,outText) << "Phototest.tif OCR does not match ground truth for " << ::testing::PrintToString(lang);
|
||||
api->End();
|
||||
delete [] outText;
|
||||
pixDestroy(&image);
|
||||
}
|
||||
void OCRTester(const char* imgname, const char* groundtruth,
|
||||
const char* tessdatadir, const char* lang) {
|
||||
// log.info() << tessdatadir << " for language: " << lang << std::endl;
|
||||
char* outText;
|
||||
std::locale loc("C"); // You can also use "" for the default system locale
|
||||
std::ifstream file(groundtruth);
|
||||
file.imbue(loc); // Use it for file input
|
||||
std::string gtText((std::istreambuf_iterator<char>(file)),
|
||||
std::istreambuf_iterator<char>());
|
||||
tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang))
|
||||
<< "Could not initialize tesseract.";
|
||||
Pix* image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
outText = api->GetUTF8Text();
|
||||
EXPECT_EQ(gtText, outText)
|
||||
<< "Phototest.tif OCR does not match ground truth for "
|
||||
<< ::testing::PrintToString(lang);
|
||||
api->End();
|
||||
delete[] outText;
|
||||
pixDestroy(&image);
|
||||
}
|
||||
|
||||
class MatchGroundTruth : public QuickTest ,
|
||||
public ::testing::WithParamInterface<const char*> {
|
||||
};
|
||||
class MatchGroundTruth : public QuickTest,
|
||||
public ::testing::WithParamInterface<const char*> {};
|
||||
|
||||
TEST_P(MatchGroundTruth, FastPhototestOCR) {
|
||||
OCRTester(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/phototest.txt",
|
||||
TESSDATA_DIR "_fast", GetParam());
|
||||
}
|
||||
TEST_P(MatchGroundTruth, FastPhototestOCR) {
|
||||
OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt",
|
||||
TESSDATA_DIR "_fast", GetParam());
|
||||
}
|
||||
|
||||
TEST_P(MatchGroundTruth, BestPhototestOCR) {
|
||||
OCRTester(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/phototest.txt",
|
||||
TESSDATA_DIR "_best", GetParam());
|
||||
}
|
||||
TEST_P(MatchGroundTruth, BestPhototestOCR) {
|
||||
OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt",
|
||||
TESSDATA_DIR "_best", GetParam());
|
||||
}
|
||||
|
||||
TEST_P(MatchGroundTruth, TessPhototestOCR) {
|
||||
OCRTester(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/phototest.txt",
|
||||
TESSDATA_DIR , GetParam());
|
||||
}
|
||||
TEST_P(MatchGroundTruth, TessPhototestOCR) {
|
||||
OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt",
|
||||
TESSDATA_DIR, GetParam());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( Eng, MatchGroundTruth,
|
||||
::testing::Values("eng") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Latin, MatchGroundTruth,
|
||||
::testing::Values("script/Latin") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Deva, MatchGroundTruth,
|
||||
::testing::Values("script/Devanagari") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Arabic, MatchGroundTruth,
|
||||
::testing::Values("script/Arabic") );
|
||||
INSTANTIATE_TEST_CASE_P(Eng, MatchGroundTruth, ::testing::Values("eng"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Latin, MatchGroundTruth,
|
||||
::testing::Values("script/Latin"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Deva, MatchGroundTruth,
|
||||
::testing::Values("script/Devanagari"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Arabic, MatchGroundTruth,
|
||||
::testing::Values("script/Arabic"));
|
||||
|
||||
class EuroText : public QuickTest {
|
||||
};
|
||||
class EuroText : public QuickTest {};
|
||||
|
||||
TEST_F(EuroText, FastLatinOCR) {
|
||||
OCRTester(TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/eurotext.txt",
|
||||
TESSDATA_DIR "_fast", "script/Latin");
|
||||
}
|
||||
TEST_F(EuroText, FastLatinOCR) {
|
||||
OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt",
|
||||
TESSDATA_DIR "_fast", "script/Latin");
|
||||
}
|
||||
|
||||
// script/Latin for eurotext.tif does not match groundtruth
|
||||
// for tessdata & tessdata_best.
|
||||
// so do not test these here.
|
||||
// script/Latin for eurotext.tif does not match groundtruth
|
||||
// for tessdata & tessdata_best.
|
||||
// so do not test these here.
|
||||
|
||||
} // namespace
|
||||
|
@ -31,19 +31,13 @@ class ApplyBoxTest : public testing::Test {
|
||||
std::string TestDataNameToPath(const std::string& name) {
|
||||
return file::JoinPath(TESTING_DIR, name);
|
||||
}
|
||||
std::string TessdataPath() {
|
||||
return TESSDATA_DIR;
|
||||
}
|
||||
std::string TessdataPath() { return TESSDATA_DIR; }
|
||||
std::string OutputNameToPath(const std::string& name) {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
}
|
||||
|
||||
ApplyBoxTest() {
|
||||
src_pix_ = NULL;
|
||||
}
|
||||
~ApplyBoxTest() {
|
||||
pixDestroy(&src_pix_);
|
||||
}
|
||||
ApplyBoxTest() { src_pix_ = NULL; }
|
||||
~ApplyBoxTest() { pixDestroy(&src_pix_); }
|
||||
|
||||
void SetImage(const char* filename) {
|
||||
pixDestroy(&src_pix_);
|
||||
@ -70,7 +64,7 @@ class ApplyBoxTest : public testing::Test {
|
||||
api_.Recognize(NULL);
|
||||
char* ocr_text = api_.GetUTF8Text();
|
||||
EXPECT_STREQ(truth_str, ocr_text);
|
||||
delete [] ocr_text;
|
||||
delete[] ocr_text;
|
||||
// Test the boxes by reading the target box file in parallel with the
|
||||
// bounding boxes in the ocr output.
|
||||
std::string box_filename = TestDataNameToPath(target_box_file);
|
||||
@ -80,22 +74,21 @@ class ApplyBoxTest : public testing::Test {
|
||||
ResultIterator* it = api_.GetIterator();
|
||||
do {
|
||||
int left, top, right, bottom;
|
||||
EXPECT_TRUE(it->BoundingBox(tesseract::RIL_SYMBOL,
|
||||
&left, &top, &right, &bottom));
|
||||
TBOX ocr_box(ICOORD(left, height - bottom),
|
||||
ICOORD(right, height - top));
|
||||
EXPECT_TRUE(
|
||||
it->BoundingBox(tesseract::RIL_SYMBOL, &left, &top, &right, &bottom));
|
||||
TBOX ocr_box(ICOORD(left, height - bottom), ICOORD(right, height - top));
|
||||
int line_number;
|
||||
TBOX truth_box;
|
||||
STRING box_text;
|
||||
EXPECT_TRUE(ReadNextBox(0, &line_number, box_file, &box_text,
|
||||
&truth_box));
|
||||
EXPECT_TRUE(
|
||||
ReadNextBox(0, &line_number, box_file, &box_text, &truth_box));
|
||||
// Testing for major overlap is a bit weak, but if they all
|
||||
// major overlap successfully, then it has to be fairly close.
|
||||
EXPECT_TRUE(ocr_box.major_overlap(truth_box));
|
||||
// Also check that the symbol text matches the box text.
|
||||
char* symbol_text = it->GetUTF8Text(tesseract::RIL_SYMBOL);
|
||||
EXPECT_STREQ(box_text.string(), symbol_text);
|
||||
delete [] symbol_text;
|
||||
delete[] symbol_text;
|
||||
} while (it->Next(tesseract::RIL_SYMBOL));
|
||||
delete it;
|
||||
}
|
||||
@ -107,14 +100,14 @@ class ApplyBoxTest : public testing::Test {
|
||||
|
||||
// Tests character-level applyboxes on normal Times New Roman.
|
||||
TEST_F(ApplyBoxTest, TimesCharLevel) {
|
||||
VerifyBoxesAndText("trainingtimes.tif", kTruthTextWords,
|
||||
"trainingtimes.box", false);
|
||||
VerifyBoxesAndText("trainingtimes.tif", kTruthTextWords, "trainingtimes.box",
|
||||
false);
|
||||
}
|
||||
|
||||
// Tests character-level applyboxes on italic Times New Roman.
|
||||
TEST_F(ApplyBoxTest, ItalicCharLevel) {
|
||||
VerifyBoxesAndText("trainingital.tif", kTruthTextWords,
|
||||
"trainingital.box", false);
|
||||
VerifyBoxesAndText("trainingital.tif", kTruthTextWords, "trainingital.box",
|
||||
false);
|
||||
}
|
||||
|
||||
// Tests line-level applyboxes on normal Times New Roman.
|
||||
@ -125,8 +118,8 @@ TEST_F(ApplyBoxTest, TimesLineLevel) {
|
||||
|
||||
// Tests line-level applyboxes on italic Times New Roman.
|
||||
TEST_F(ApplyBoxTest, ItalLineLevel) {
|
||||
VerifyBoxesAndText("trainingitalline.tif", kTruthTextLine,
|
||||
"trainingital.box", true);
|
||||
VerifyBoxesAndText("trainingitalline.tif", kTruthTextLine, "trainingital.box",
|
||||
true);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -8,8 +8,8 @@
|
||||
|
||||
namespace {
|
||||
|
||||
using ::testing::HasSubstr;
|
||||
using ::testing::ContainsRegex;
|
||||
using ::testing::HasSubstr;
|
||||
|
||||
const char* langs[] = {"eng", "vie", "hin", "ara", NULL};
|
||||
const char* image_files[] = {"HelloGoogle.tif", "viet.tif", "raaj.tif",
|
||||
@ -25,7 +25,7 @@ class FriendlyTessBaseAPI : public tesseract::TessBaseAPI {
|
||||
|
||||
string GetCleanedTextResult(tesseract::TessBaseAPI* tess, Pix* pix) {
|
||||
tess->SetImage(pix);
|
||||
char *result = tess->GetUTF8Text();
|
||||
char* result = tess->GetUTF8Text();
|
||||
string ocr_result = result;
|
||||
delete[] result;
|
||||
absl::StripAsciiWhitespace(&ocr_result);
|
||||
@ -36,19 +36,18 @@ string GetCleanedTextResult(tesseract::TessBaseAPI* tess, Pix* pix) {
|
||||
class TesseractTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessdataPath() {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"tessdata");
|
||||
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
}
|
||||
};
|
||||
|
||||
// Tests that array sizes match their intended size.
|
||||
TEST_F(TesseractTest, ArraySizeTest) {
|
||||
int size = 0;
|
||||
for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size);
|
||||
for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size)
|
||||
;
|
||||
EXPECT_EQ(size, PT_COUNT);
|
||||
}
|
||||
|
||||
@ -58,7 +57,7 @@ TEST_F(TesseractTest, BasicTesseractTest) {
|
||||
string truth_text;
|
||||
string ocr_text;
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
||||
CHECK(src_pix);
|
||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||
CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
|
||||
@ -75,14 +74,14 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||
api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
|
||||
api.SetVariable("paragraph_debug_level", "3");
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str());
|
||||
CHECK(src_pix);
|
||||
api.SetImage(src_pix);
|
||||
Boxa* para_boxes = api.GetComponentImages(tesseract::RIL_PARA,
|
||||
true, NULL, NULL);
|
||||
Boxa* para_boxes =
|
||||
api.GetComponentImages(tesseract::RIL_PARA, true, NULL, NULL);
|
||||
EXPECT_TRUE(para_boxes != NULL);
|
||||
Boxa* block_boxes = api.GetComponentImages(tesseract::RIL_BLOCK,
|
||||
true, NULL, NULL);
|
||||
Boxa* block_boxes =
|
||||
api.GetComponentImages(tesseract::RIL_BLOCK, true, NULL, NULL);
|
||||
EXPECT_TRUE(block_boxes != NULL);
|
||||
// TODO(eger): Get paragraphs out of this page pre-text.
|
||||
EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes));
|
||||
@ -96,14 +95,14 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
|
||||
TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
||||
tesseract::TessBaseAPI api;
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||
CHECK(src_pix);
|
||||
api.SetImage(src_pix);
|
||||
char *result = api.GetHOCRText(0);
|
||||
char* result = api.GetHOCRText(0);
|
||||
EXPECT_TRUE(result != NULL);
|
||||
EXPECT_THAT(result, HasSubstr("Hello"));
|
||||
EXPECT_THAT(result, HasSubstr("<div class='ocr_page'"));
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
pixDestroy(&src_pix);
|
||||
}
|
||||
|
||||
@ -111,16 +110,16 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
||||
TEST_F(TesseractTest, HOCRContainsBaseline) {
|
||||
tesseract::TessBaseAPI api;
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||
CHECK(src_pix);
|
||||
api.SetInputName("HelloGoogle.tif");
|
||||
api.SetImage(src_pix);
|
||||
char *result = api.GetHOCRText(0);
|
||||
char* result = api.GetHOCRText(0);
|
||||
EXPECT_TRUE(result != NULL);
|
||||
EXPECT_THAT(result, HasSubstr("Hello"));
|
||||
EXPECT_THAT(result, ContainsRegex("<span class='ocr_line'[^>]* "
|
||||
"baseline [-.0-9]+ [-.0-9]+"));
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
pixDestroy(&src_pix);
|
||||
}
|
||||
|
||||
@ -131,13 +130,13 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
|
||||
TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
|
||||
tesseract::TessBaseAPI api;
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
|
||||
CHECK(src_pix);
|
||||
api.SetImage(src_pix);
|
||||
char *result = api.GetHOCRText(0);
|
||||
char* result = api.GetHOCRText(0);
|
||||
EXPECT_TRUE(result != NULL);
|
||||
EXPECT_THAT(result, Not(HasSubstr("FUCK")));
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
pixDestroy(&src_pix);
|
||||
}
|
||||
|
||||
@ -146,19 +145,12 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
|
||||
static const char* kTrainingPages[] = {
|
||||
"136.tif", "256.tif", "410.tif", "432.tif", "540.tif",
|
||||
"692.tif", "779.tif", "793.tif", "808.tif", "815.tif",
|
||||
"12.tif", "12.tif", NULL
|
||||
};
|
||||
"12.tif", "12.tif", NULL};
|
||||
static const char* kTrainingText[] = {
|
||||
"1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0",
|
||||
"6 9 2", "7 7 9", "7 9 3", "8 0 8", "8 1 5",
|
||||
"1 2", "1 2", NULL
|
||||
};
|
||||
static const char* kTestPages[] = {
|
||||
"324.tif", "433.tif", "12.tif", NULL
|
||||
};
|
||||
static const char* kTestText[] = {
|
||||
"324", "433", "12", NULL
|
||||
};
|
||||
"1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0", "6 9 2", "7 7 9",
|
||||
"7 9 3", "8 0 8", "8 1 5", "1 2", "1 2", NULL};
|
||||
static const char* kTestPages[] = {"324.tif", "433.tif", "12.tif", NULL};
|
||||
static const char* kTestText[] = {"324", "433", "12", NULL};
|
||||
tesseract::TessBaseAPI api;
|
||||
string truth_text;
|
||||
string ocr_text;
|
||||
@ -168,20 +160,20 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
|
||||
// Train on the training text.
|
||||
for (int i = 0; kTrainingPages[i] != NULL; ++i) {
|
||||
string image_file = TestDataNameToPath(kTrainingPages[i]);
|
||||
Pix *src_pix = pixRead(image_file.c_str());
|
||||
Pix* src_pix = pixRead(image_file.c_str());
|
||||
CHECK(src_pix);
|
||||
api.SetImage(src_pix);
|
||||
EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD,
|
||||
kTrainingText[i]))
|
||||
<< "Failed to adapt to text \"" << kTrainingText[i]
|
||||
<< "\" on image " << image_file;
|
||||
EXPECT_TRUE(
|
||||
api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i]))
|
||||
<< "Failed to adapt to text \"" << kTrainingText[i] << "\" on image "
|
||||
<< image_file;
|
||||
pixDestroy(&src_pix);
|
||||
}
|
||||
// Test the test text.
|
||||
api.SetVariable("tess_bn_matching", "1");
|
||||
api.SetPageSegMode(tesseract::PSM_SINGLE_WORD);
|
||||
for (int i = 0; kTestPages[i] != NULL; ++i) {
|
||||
Pix *src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str());
|
||||
CHECK(src_pix);
|
||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||
absl::StripAsciiWhitespace(&truth_text);
|
||||
@ -196,7 +188,7 @@ TEST_F(TesseractTest, BasicLSTMTest) {
|
||||
string truth_text;
|
||||
string ocr_text;
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY);
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
|
||||
CHECK(src_pix);
|
||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||
CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"),
|
||||
@ -213,22 +205,22 @@ TEST_F(TesseractTest, BasicLSTMTest) {
|
||||
// errors due to float/int conversions (e.g., see OUTLINE::move() in
|
||||
// ccstruct/poutline.h) Instead, we do a loose check.
|
||||
TEST_F(TesseractTest, LSTMGeometryTest) {
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
|
||||
FriendlyTessBaseAPI api;
|
||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY);
|
||||
api.SetImage(src_pix);
|
||||
ASSERT_EQ(api.Recognize(NULL), 0);
|
||||
|
||||
const PAGE_RES *page_res = api.GetPageRes();
|
||||
PAGE_RES_IT page_res_it(const_cast<PAGE_RES *>(page_res));
|
||||
const PAGE_RES* page_res = api.GetPageRes();
|
||||
PAGE_RES_IT page_res_it(const_cast<PAGE_RES*>(page_res));
|
||||
page_res_it.restart_page();
|
||||
BLOCK* block = page_res_it.block()->block;
|
||||
CHECK(block);
|
||||
|
||||
// extract word and character boxes for each word
|
||||
for (page_res_it.restart_page(); page_res_it.word () != NULL;
|
||||
for (page_res_it.restart_page(); page_res_it.word() != NULL;
|
||||
page_res_it.forward()) {
|
||||
WERD_RES *word = page_res_it.word();
|
||||
WERD_RES* word = page_res_it.word();
|
||||
CHECK(word);
|
||||
CHECK(word->best_choice);
|
||||
CHECK_GT(word->best_choice->length(), 0);
|
||||
@ -255,13 +247,13 @@ TEST_F(TesseractTest, LSTMGeometryTest) {
|
||||
|
||||
TEST_F(TesseractTest, InitConfigOnlyTest) {
|
||||
// Languages for testing initialization.
|
||||
const char* langs[] = { "eng", "chi_tra", "jpn", "vie", "hin"};
|
||||
const char* langs[] = {"eng", "chi_tra", "jpn", "vie", "hin"};
|
||||
std::unique_ptr<tesseract::TessBaseAPI> api;
|
||||
CycleTimer timer;
|
||||
for (int i = 0; i < ARRAYSIZE(langs); ++i) {
|
||||
api.reset(new tesseract::TessBaseAPI);
|
||||
timer.Restart();
|
||||
EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i] ,
|
||||
EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i],
|
||||
tesseract::OEM_TESSERACT_ONLY));
|
||||
timer.Stop();
|
||||
LOG(INFO) << "Lang " << langs[i] << " took " << timer.GetInMs()
|
||||
@ -275,9 +267,9 @@ TEST_F(TesseractTest, InitConfigOnlyTest) {
|
||||
for (int i = 0; i < ARRAYSIZE(langs); ++i) {
|
||||
api.reset(new tesseract::TessBaseAPI);
|
||||
timer.Restart();
|
||||
EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i] ,
|
||||
tesseract::OEM_TESSERACT_ONLY, NULL, 0,
|
||||
&vars_vec, &vars_values, false));
|
||||
EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i],
|
||||
tesseract::OEM_TESSERACT_ONLY, NULL, 0, &vars_vec,
|
||||
&vars_values, false));
|
||||
timer.Stop();
|
||||
LOG(INFO) << "Lang " << langs[i] << " took " << timer.GetInMs()
|
||||
<< "ms in config-only init";
|
||||
@ -294,15 +286,13 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
|
||||
int num_langs = 0;
|
||||
while (langs[num_langs] != NULL) ++num_langs;
|
||||
|
||||
const string kTessdataPath = file::JoinPath(
|
||||
FLAGS_test_srcdir,"tessdata");
|
||||
const string kTessdataPath = file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
|
||||
// Preload images and verify that OCR is correct on them individually.
|
||||
std::vector<Pix *> pix(num_langs);
|
||||
std::vector<Pix*> pix(num_langs);
|
||||
for (int i = 0; i < num_langs; ++i) {
|
||||
SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i]));
|
||||
string path = FLAGS_test_srcdir
|
||||
+ "/testdata/" + image_files[i];
|
||||
string path = FLAGS_test_srcdir + "/testdata/" + image_files[i];
|
||||
pix[i] = pixRead(path.c_str());
|
||||
QCHECK(pix[i] != NULL) << "Could not read " << path;
|
||||
|
||||
@ -329,32 +319,30 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) {
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_langs; ++i)
|
||||
pixDestroy(&pix[i]);
|
||||
for (int i = 0; i < num_langs; ++i) pixDestroy(&pix[i]);
|
||||
}
|
||||
|
||||
// Tests whether Tesseract parameters are correctly set for the two instances.
|
||||
TEST(TesseractInstanceTest, TestMultipleTessInstanceVariables) {
|
||||
string illegal_name = "an_illegal_name";
|
||||
string langs[2] = { "eng", "hin" };
|
||||
string langs[2] = {"eng", "hin"};
|
||||
string int_param_name = "tessedit_pageseg_mode";
|
||||
int int_param[2] = { 1, 2 };
|
||||
string int_param_str[2] = { "1", "2" };
|
||||
int int_param[2] = {1, 2};
|
||||
string int_param_str[2] = {"1", "2"};
|
||||
string bool_param_name = "tessedit_ambigs_training";
|
||||
bool bool_param[2] = { false, true };
|
||||
string bool_param_str[2] = { "F", "T" };
|
||||
bool bool_param[2] = {false, true};
|
||||
string bool_param_str[2] = {"F", "T"};
|
||||
string str_param_name = "tessedit_char_blacklist";
|
||||
string str_param[2] = { "abc", "def" };
|
||||
string str_param[2] = {"abc", "def"};
|
||||
string double_param_name = "segment_penalty_dict_frequent_word";
|
||||
string double_param_str[2] = { "0.01", "2" };
|
||||
double double_param[2] = { 0.01, 2 };
|
||||
string double_param_str[2] = {"0.01", "2"};
|
||||
double double_param[2] = {0.01, 2};
|
||||
|
||||
const string kTessdataPath = file::JoinPath(
|
||||
FLAGS_test_srcdir,"tessdata");
|
||||
const string kTessdataPath = file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
|
||||
tesseract::TessBaseAPI tess1, tess2;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
|
||||
tesseract::TessBaseAPI* api = (i == 0) ? &tess1 : &tess2;
|
||||
api->Init(kTessdataPath.c_str(), langs[i].c_str());
|
||||
api->SetVariable(illegal_name.c_str(), "none");
|
||||
api->SetVariable(int_param_name.c_str(), int_param_str[i].c_str());
|
||||
@ -363,7 +351,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstanceVariables) {
|
||||
api->SetVariable(double_param_name.c_str(), double_param_str[i].c_str());
|
||||
}
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2;
|
||||
tesseract::TessBaseAPI* api = (i == 0) ? &tess1 : &tess2;
|
||||
EXPECT_FALSE(api->GetStringVariable(illegal_name.c_str()));
|
||||
int intvar;
|
||||
EXPECT_TRUE(api->GetIntVariable(int_param_name.c_str(), &intvar));
|
||||
|
@ -37,18 +37,16 @@ using tesseract::TessBaseAPI;
|
||||
|
||||
namespace {
|
||||
|
||||
const char* kTessLangs[] = { "eng", "vie", NULL };
|
||||
const char* kTessImages[] = { "HelloGoogle.tif", "viet.tif", NULL };
|
||||
const char* kTessTruthText[] = { "Hello Google", "\x74\x69\xe1\xba\xbf\x6e\x67",
|
||||
NULL };
|
||||
const char* kTessLangs[] = {"eng", "vie", NULL};
|
||||
const char* kTessImages[] = {"HelloGoogle.tif", "viet.tif", NULL};
|
||||
const char* kTessTruthText[] = {"Hello Google", "\x74\x69\xe1\xba\xbf\x6e\x67",
|
||||
NULL};
|
||||
|
||||
const char* kCubeLangs[] = { "hin", "ara", NULL };
|
||||
const char* kCubeImages[] = { "raaj.tif", "arabic.tif", NULL};
|
||||
const char* kCubeLangs[] = {"hin", "ara", NULL};
|
||||
const char* kCubeImages[] = {"raaj.tif", "arabic.tif", NULL};
|
||||
const char* kCubeTruthText[] = {
|
||||
"\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c",
|
||||
"\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a",
|
||||
NULL};
|
||||
|
||||
"\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c",
|
||||
"\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", NULL};
|
||||
|
||||
class BaseapiThreadTest : public ::testing::Test {
|
||||
protected:
|
||||
@ -85,16 +83,16 @@ class BaseapiThreadTest : public ::testing::Test {
|
||||
// and so entirely disallow concurrent access of a Pix instance.
|
||||
const int n = num_langs_ * FLAGS_reps;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
string path = FLAGS_test_srcdir +
|
||||
"/testdata/" +
|
||||
image_files[i % num_langs_];
|
||||
string path =
|
||||
FLAGS_test_srcdir + "/testdata/" + image_files[i % num_langs_];
|
||||
Pix* new_pix = pixRead(path.c_str());
|
||||
QCHECK(new_pix != NULL) << "Could not read " << path;
|
||||
pix_.push_back(new_pix);
|
||||
}
|
||||
|
||||
pool_size_ = (FLAGS_max_concurrent_instances < 1) ?
|
||||
num_langs_ * FLAGS_reps : FLAGS_max_concurrent_instances;
|
||||
pool_size_ = (FLAGS_max_concurrent_instances < 1)
|
||||
? num_langs_ * FLAGS_reps
|
||||
: FLAGS_max_concurrent_instances;
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
@ -108,9 +106,7 @@ class BaseapiThreadTest : public ::testing::Test {
|
||||
pool_->StartWorkers();
|
||||
}
|
||||
|
||||
void WaitForPoolWorkers() {
|
||||
pool_.reset(NULL);
|
||||
}
|
||||
void WaitForPoolWorkers() { pool_.reset(NULL); }
|
||||
|
||||
std::unique_ptr<ThreadPool> pool_;
|
||||
static int pool_size_;
|
||||
@ -127,25 +123,23 @@ std::vector<string> BaseapiThreadTest::langs_;
|
||||
std::vector<string> BaseapiThreadTest::gt_text_;
|
||||
int BaseapiThreadTest::num_langs_;
|
||||
|
||||
|
||||
void InitTessInstance(TessBaseAPI* tess, const string& lang) {
|
||||
CHECK(tess != nullptr);
|
||||
const string kTessdataPath = file::JoinPath(
|
||||
FLAGS_test_srcdir, "tessdata");
|
||||
const string kTessdataPath = file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
EXPECT_EQ(0, tess->Init(kTessdataPath.c_str(), lang.c_str()));
|
||||
}
|
||||
|
||||
void GetCleanedText(TessBaseAPI* tess, Pix* pix, string* ocr_text) {
|
||||
tess->SetImage(pix);
|
||||
char *result = tess->GetUTF8Text();
|
||||
char* result = tess->GetUTF8Text();
|
||||
*ocr_text = result;
|
||||
delete[] result;
|
||||
absl::StripAsciiWhitespace(ocr_text);
|
||||
}
|
||||
|
||||
void VerifyTextResult(TessBaseAPI* tess, Pix* pix, const string& lang,
|
||||
const string& expected_text) {
|
||||
TessBaseAPI *tess_local = NULL;
|
||||
const string& expected_text) {
|
||||
TessBaseAPI* tess_local = NULL;
|
||||
if (tess) {
|
||||
tess_local = tess;
|
||||
} else {
|
||||
@ -155,11 +149,9 @@ void VerifyTextResult(TessBaseAPI* tess, Pix* pix, const string& lang,
|
||||
string ocr_text;
|
||||
GetCleanedText(tess_local, pix, &ocr_text);
|
||||
EXPECT_STREQ(expected_text.c_str(), ocr_text.c_str());
|
||||
if (tess_local != tess)
|
||||
delete tess_local;
|
||||
if (tess_local != tess) delete tess_local;
|
||||
}
|
||||
|
||||
|
||||
// Check that Tesseract/Cube produce the correct results in single-threaded
|
||||
// operation. If not, it is pointless to run the real multi-threaded tests.
|
||||
TEST_F(BaseapiThreadTest, TestBasicSanity) {
|
||||
|
@ -34,13 +34,11 @@ class BitVectorTest : public testing::Test {
|
||||
TestAll(*map, false);
|
||||
map->SetBit(2);
|
||||
// Set all the odds to true.
|
||||
for (int i = 3; i <= kPrimeLimit; i += 2)
|
||||
map->SetValue(i, true);
|
||||
for (int i = 3; i <= kPrimeLimit; i += 2) map->SetValue(i, true);
|
||||
int factor_limit = static_cast<int>(sqrt(1.0 + kPrimeLimit));
|
||||
for (int f = 3; f <= factor_limit; f += 2) {
|
||||
if (map->At(f)) {
|
||||
for (int m = 2; m * f <= kPrimeLimit; ++m)
|
||||
map->ResetBit(f * m);
|
||||
for (int m = 2; m * f <= kPrimeLimit; ++m) map->ResetBit(f * m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,25 +16,14 @@
|
||||
// If this test fails to compile, clean up the includes in baseapi.h!
|
||||
// They are not supposed to drag in definitions of any of the tesseract
|
||||
// types included in this enum!
|
||||
enum NameTester {
|
||||
ABORT,
|
||||
OKAY,
|
||||
LOG,
|
||||
BLOB,
|
||||
ELIST,
|
||||
TBOX,
|
||||
TPOINT,
|
||||
WORD
|
||||
};
|
||||
enum NameTester { ABORT, OKAY, LOG, BLOB, ELIST, TBOX, TPOINT, WORD };
|
||||
|
||||
#define ERRCODE_H // avoid redefinition of ABORT in errcode.h
|
||||
#define ERRCODE_H // avoid redefinition of ABORT in errcode.h
|
||||
#include "include_gunit.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Verifies that the global namespace is clean.
|
||||
TEST(CleanNamespaceTess, DummyTest) {
|
||||
tesseract::TessBaseAPI api;
|
||||
}
|
||||
TEST(CleanNamespaceTess, DummyTest) { tesseract::TessBaseAPI api; }
|
||||
|
||||
} // namespace.
|
||||
|
@ -27,11 +27,9 @@ class TestableColPartition : public ColPartition {
|
||||
|
||||
class ColPartitionTest : public testing::Test {
|
||||
protected:
|
||||
void SetUp() {
|
||||
}
|
||||
void SetUp() {}
|
||||
|
||||
void TearDown() {
|
||||
}
|
||||
void TearDown() {}
|
||||
};
|
||||
|
||||
TEST_F(ColPartitionTest, IsInSameColumnAsReflexive) {
|
||||
@ -75,4 +73,4 @@ TEST_F(ColPartitionTest, IsInSameColumnAsPartialOverlap) {
|
||||
EXPECT_TRUE(b.IsInSameColumnAs(a));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
@ -41,10 +41,9 @@ class CommandlineflagsTest : public ::testing::Test {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
TEST_F(CommandlineflagsTest, RemoveFlags) {
|
||||
const char* const_argv[] = { "Progname", "--foo_int", "3",
|
||||
"file1.h", "file2.h" };
|
||||
const char* const_argv[] = {"Progname", "--foo_int", "3", "file1.h",
|
||||
"file2.h"};
|
||||
int argc = ARRAYSIZE(const_argv);
|
||||
char** argv = const_cast<char**>(const_argv);
|
||||
tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
|
||||
@ -56,7 +55,7 @@ TEST_F(CommandlineflagsTest, RemoveFlags) {
|
||||
EXPECT_STREQ("file2.h", argv[2]);
|
||||
}
|
||||
|
||||
#if 0 // TODO: this test needs an update (it currently fails).
|
||||
#if 0 // TODO: this test needs an update (it currently fails).
|
||||
TEST_F(CommandlineflagsTest, PrintUsageAndExit) {
|
||||
const char* argv[] = { "Progname", "--help" };
|
||||
EXPECT_EXIT(TestParser("Progname [flags]", ARRAYSIZE(argv), argv),
|
||||
@ -66,66 +65,65 @@ TEST_F(CommandlineflagsTest, PrintUsageAndExit) {
|
||||
#endif
|
||||
|
||||
TEST_F(CommandlineflagsTest, ExitsWithErrorOnInvalidFlag) {
|
||||
const char* argv[] = { "", "--test_nonexistent_flag" };
|
||||
EXPECT_EXIT(TestParser(ARRAYSIZE(argv), argv),
|
||||
::testing::ExitedWithCode(1),
|
||||
const char* argv[] = {"", "--test_nonexistent_flag"};
|
||||
EXPECT_EXIT(TestParser(ARRAYSIZE(argv), argv), ::testing::ExitedWithCode(1),
|
||||
"ERROR: Non-existent flag");
|
||||
}
|
||||
|
||||
TEST_F(CommandlineflagsTest, ParseIntegerFlags) {
|
||||
const char* argv[] = { "", "--foo_int=3", "--bar_int", "-4" };
|
||||
const char* argv[] = {"", "--foo_int=3", "--bar_int", "-4"};
|
||||
TestParser(ARRAYSIZE(argv), argv);
|
||||
EXPECT_EQ(3, FLAGS_foo_int);
|
||||
EXPECT_EQ(-4, FLAGS_bar_int);
|
||||
|
||||
const char* arg_no_value[] = { "", "--bar_int" };
|
||||
const char* arg_no_value[] = {"", "--bar_int"};
|
||||
EXPECT_EXIT(TestParser(ARRAYSIZE(arg_no_value), arg_no_value),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
|
||||
const char* arg_invalid_value[] = { "", "--bar_int", "--foo_int=3" };
|
||||
const char* arg_invalid_value[] = {"", "--bar_int", "--foo_int=3"};
|
||||
EXPECT_EXIT(TestParser(ARRAYSIZE(arg_invalid_value), arg_invalid_value),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
|
||||
const char* arg_bad_format[] = { "", "--bar_int=" };
|
||||
const char* arg_bad_format[] = {"", "--bar_int="};
|
||||
EXPECT_EXIT(TestParser(ARRAYSIZE(arg_bad_format), arg_bad_format),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
}
|
||||
|
||||
TEST_F(CommandlineflagsTest, ParseDoubleFlags) {
|
||||
const char* argv[] = { "", "--foo_double=3.14", "--bar_double", "1.2" };
|
||||
const char* argv[] = {"", "--foo_double=3.14", "--bar_double", "1.2"};
|
||||
TestParser(ARRAYSIZE(argv), argv);
|
||||
|
||||
EXPECT_EQ(3.14, FLAGS_foo_double);
|
||||
EXPECT_EQ(1.2, FLAGS_bar_double);
|
||||
|
||||
const char* arg_no_value[] = { "", "--bar_double" };
|
||||
EXPECT_EXIT(TestParser(2, arg_no_value),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
const char* arg_no_value[] = {"", "--bar_double"};
|
||||
EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1),
|
||||
"ERROR");
|
||||
|
||||
const char* arg_bad_format[] = { "", "--bar_double=" };
|
||||
EXPECT_EXIT(TestParser(2, arg_bad_format),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
const char* arg_bad_format[] = {"", "--bar_double="};
|
||||
EXPECT_EXIT(TestParser(2, arg_bad_format), ::testing::ExitedWithCode(1),
|
||||
"ERROR");
|
||||
}
|
||||
|
||||
TEST_F(CommandlineflagsTest, ParseStringFlags) {
|
||||
const char* argv[] = { "", "--foo_string=abc", "--bar_string", "def" };
|
||||
const char* argv[] = {"", "--foo_string=abc", "--bar_string", "def"};
|
||||
TestParser(ARRAYSIZE(argv), argv);
|
||||
|
||||
EXPECT_STREQ("abc", FLAGS_foo_string.c_str());
|
||||
EXPECT_STREQ("def", FLAGS_bar_string.c_str());
|
||||
|
||||
const char* arg_no_value[] = { "", "--bar_string" };
|
||||
EXPECT_EXIT(TestParser(2, arg_no_value),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
const char* arg_no_value[] = {"", "--bar_string"};
|
||||
EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1),
|
||||
"ERROR");
|
||||
|
||||
FLAGS_bar_string.set_value("bar");
|
||||
const char* arg_empty_string[] = { "", "--bar_string=" };
|
||||
const char* arg_empty_string[] = {"", "--bar_string="};
|
||||
TestParser(2, arg_empty_string);
|
||||
EXPECT_STREQ("", FLAGS_bar_string.c_str());
|
||||
}
|
||||
|
||||
TEST_F(CommandlineflagsTest, ParseBoolFlags) {
|
||||
const char* argv[] = { "", "--foo_bool=true", "--bar_bool=1" };
|
||||
const char* argv[] = {"", "--foo_bool=true", "--bar_bool=1"};
|
||||
FLAGS_foo_bool.set_value(false);
|
||||
FLAGS_bar_bool.set_value(false);
|
||||
TestParser(ARRAYSIZE(argv), argv);
|
||||
@ -133,7 +131,7 @@ TEST_F(CommandlineflagsTest, ParseBoolFlags) {
|
||||
EXPECT_TRUE(FLAGS_foo_bool);
|
||||
EXPECT_TRUE(FLAGS_bar_bool);
|
||||
|
||||
const char* inv_argv[] = { "", "--foo_bool=false", "--bar_bool=0" };
|
||||
const char* inv_argv[] = {"", "--foo_bool=false", "--bar_bool=0"};
|
||||
FLAGS_foo_bool.set_value(true);
|
||||
FLAGS_bar_bool.set_value(true);
|
||||
TestParser(3, inv_argv);
|
||||
@ -141,19 +139,19 @@ TEST_F(CommandlineflagsTest, ParseBoolFlags) {
|
||||
EXPECT_FALSE(FLAGS_foo_bool);
|
||||
EXPECT_FALSE(FLAGS_bar_bool);
|
||||
|
||||
const char* arg_implied_true[] = { "", "--bar_bool" };
|
||||
const char* arg_implied_true[] = {"", "--bar_bool"};
|
||||
FLAGS_bar_bool.set_value(false);
|
||||
TestParser(2, arg_implied_true);
|
||||
EXPECT_TRUE(FLAGS_bar_bool);
|
||||
|
||||
const char* arg_missing_val[] = { "", "--bar_bool=" };
|
||||
EXPECT_EXIT(TestParser(2, arg_missing_val),
|
||||
::testing::ExitedWithCode(1), "ERROR");
|
||||
const char* arg_missing_val[] = {"", "--bar_bool="};
|
||||
EXPECT_EXIT(TestParser(2, arg_missing_val), ::testing::ExitedWithCode(1),
|
||||
"ERROR");
|
||||
}
|
||||
|
||||
TEST_F(CommandlineflagsTest, ParseOldFlags) {
|
||||
EXPECT_STREQ("", FLAGS_q.c_str());
|
||||
const char* argv[] = { "", "-q", "text" };
|
||||
const char* argv[] = {"", "-q", "text"};
|
||||
TestParser(ARRAYSIZE(argv), argv);
|
||||
EXPECT_STREQ("text", FLAGS_q.c_str());
|
||||
}
|
||||
|
@ -5,20 +5,20 @@
|
||||
|
||||
#include "util/process/subprocess.h"
|
||||
|
||||
#include "tesseract/dict/trie.h"
|
||||
#include "tesseract/ccutil/unicharset.h"
|
||||
#include "tesseract/ccstruct/ratngs.h"
|
||||
#include "tesseract/ccutil/unicharset.h"
|
||||
#include "tesseract/dict/trie.h"
|
||||
|
||||
namespace {
|
||||
|
||||
void RemoveTrailingLineTerminators(char *line) {
|
||||
char *end = line + strlen(line) - 1;
|
||||
void RemoveTrailingLineTerminators(char* line) {
|
||||
char* end = line + strlen(line) - 1;
|
||||
while (end >= line && ('\n' == *end || '\r' == *end)) {
|
||||
*end-- = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AddLineToSet(std::set<string> *words, char *line) {
|
||||
void AddLineToSet(std::set<string>* words, char* line) {
|
||||
RemoveTrailingLineTerminators(line);
|
||||
words->insert(line);
|
||||
}
|
||||
@ -27,7 +27,7 @@ void AddLineToSet(std::set<string> *words, char *line) {
|
||||
// aka Directed Acyclic Word Graphs).
|
||||
class DawgTest : public testing::Test {
|
||||
protected:
|
||||
void LoadWordlist(const string &filename, std::set<string> *words) const {
|
||||
void LoadWordlist(const string& filename, std::set<string>* words) const {
|
||||
FileLineReader::Options options;
|
||||
options.set_comment_char(0);
|
||||
FileLineReader flr(filename.c_str(), options);
|
||||
@ -35,8 +35,7 @@ class DawgTest : public testing::Test {
|
||||
flr.Reload();
|
||||
}
|
||||
string TestDataNameToPath(const string& name) const {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessBinaryPath(const string& binary_name) const {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
@ -44,10 +43,8 @@ class DawgTest : public testing::Test {
|
||||
string OutputNameToPath(const string& name) const {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
}
|
||||
int RunCommand(const string &program,
|
||||
const string &arg1,
|
||||
const string &arg2,
|
||||
const string &arg3) const {
|
||||
int RunCommand(const string& program, const string& arg1, const string& arg2,
|
||||
const string& arg3) const {
|
||||
SubProcess p;
|
||||
std::vector<string> argv;
|
||||
argv.push_back(program);
|
||||
@ -62,8 +59,8 @@ class DawgTest : public testing::Test {
|
||||
// Test that we are able to convert a wordlist file (one "word" per line) to
|
||||
// a dawg (a compressed format) and then extract the original wordlist back
|
||||
// out using the tools "wordlist2dawg" and "dawg2wordlist."
|
||||
void TestDawgRoundTrip(const string &unicharset_filename,
|
||||
const string &wordlist_filename) const {
|
||||
void TestDawgRoundTrip(const string& unicharset_filename,
|
||||
const string& wordlist_filename) const {
|
||||
std::set<string> orig_words, roundtrip_words;
|
||||
string unicharset = TestDataNameToPath(unicharset_filename);
|
||||
string orig_wordlist = TestDataNameToPath(wordlist_filename);
|
||||
@ -71,8 +68,7 @@ class DawgTest : public testing::Test {
|
||||
string output_wordlist = OutputNameToPath(wordlist_filename);
|
||||
LoadWordlist(orig_wordlist, &orig_words);
|
||||
EXPECT_EQ(
|
||||
RunCommand("wordlist2dawg", orig_wordlist, output_dawg, unicharset),
|
||||
0);
|
||||
RunCommand("wordlist2dawg", orig_wordlist, output_dawg, unicharset), 0);
|
||||
EXPECT_EQ(
|
||||
RunCommand("dawg2wordlist", unicharset, output_dawg, output_wordlist),
|
||||
0);
|
||||
|
@ -18,11 +18,9 @@ namespace {
|
||||
|
||||
class DENORMTest : public testing::Test {
|
||||
public:
|
||||
void SetUp() {
|
||||
}
|
||||
void SetUp() {}
|
||||
|
||||
void TearDown() {
|
||||
}
|
||||
void TearDown() {}
|
||||
|
||||
void ExpectCorrectTransform(const DENORM& denorm, const TPOINT& src,
|
||||
const TPOINT& result, bool local) {
|
||||
@ -47,8 +45,7 @@ class DENORMTest : public testing::Test {
|
||||
// Tests a simple baseline-style normalization.
|
||||
TEST_F(DENORMTest, NoRotations) {
|
||||
DENORM denorm;
|
||||
denorm.SetupNormalization(NULL, NULL, NULL,
|
||||
1000.0f, 2000.0f, 2.0f, 3.0f,
|
||||
denorm.SetupNormalization(NULL, NULL, NULL, 1000.0f, 2000.0f, 2.0f, 3.0f,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset));
|
||||
TPOINT pt1(1100, 2000);
|
||||
TPOINT result1(200, kBlnBaselineOffset);
|
||||
@ -64,9 +61,8 @@ TEST_F(DENORMTest, NoRotations) {
|
||||
TEST_F(DENORMTest, WithRotations) {
|
||||
DENORM denorm;
|
||||
FCOORD rotation90(0.0f, 1.0f);
|
||||
denorm.SetupNormalization(NULL, &rotation90, NULL,
|
||||
1000.0f, 2000.0f, 2.0f, 3.0f,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset));
|
||||
denorm.SetupNormalization(NULL, &rotation90, NULL, 1000.0f, 2000.0f, 2.0f,
|
||||
3.0f, 0.0f, static_cast<float>(kBlnBaselineOffset));
|
||||
|
||||
TPOINT pt1(1100, 2000);
|
||||
TPOINT result1(0, 200 + kBlnBaselineOffset);
|
||||
@ -81,14 +77,13 @@ TEST_F(DENORMTest, WithRotations) {
|
||||
// Tests a simple baseline-style normalization with a second rotation & scale.
|
||||
TEST_F(DENORMTest, Multiple) {
|
||||
DENORM denorm;
|
||||
denorm.SetupNormalization(NULL, NULL, NULL,
|
||||
1000.0f, 2000.0f, 2.0f, 3.0f,
|
||||
denorm.SetupNormalization(NULL, NULL, NULL, 1000.0f, 2000.0f, 2.0f, 3.0f,
|
||||
0.0f, static_cast<float>(kBlnBaselineOffset));
|
||||
|
||||
DENORM denorm2;
|
||||
FCOORD rotation90(0.0f, 1.0f);
|
||||
denorm2.SetupNormalization(NULL, &rotation90, &denorm,
|
||||
128.0f, 128.0f, 0.5f, 0.25f, 0.0f, 0.0f);
|
||||
denorm2.SetupNormalization(NULL, &rotation90, &denorm, 128.0f, 128.0f, 0.5f,
|
||||
0.25f, 0.0f, 0.0f);
|
||||
TPOINT pt1(1050, 2000);
|
||||
TPOINT result1(100, kBlnBaselineOffset);
|
||||
ExpectCorrectTransform(denorm, pt1, result1, true);
|
||||
|
@ -13,9 +13,8 @@ namespace tesseract {
|
||||
|
||||
class TestableEquationDetect : public EquationDetect {
|
||||
public:
|
||||
TestableEquationDetect(const char* tessdata,
|
||||
Tesseract* lang_tesseract) :
|
||||
EquationDetect(tessdata, "equ") {
|
||||
TestableEquationDetect(const char* tessdata, Tesseract* lang_tesseract)
|
||||
: EquationDetect(tessdata, "equ") {
|
||||
SetLangTesseract(lang_tesseract);
|
||||
}
|
||||
|
||||
@ -26,46 +25,44 @@ class TestableEquationDetect : public EquationDetect {
|
||||
CHECK_LE(math_blobs + digit_blobs, total_blobs);
|
||||
int count = 0;
|
||||
for (int i = 0; i < math_blobs; i++, count++) {
|
||||
BLOBNBOX *blob = new BLOBNBOX();
|
||||
BLOBNBOX* blob = new BLOBNBOX();
|
||||
blob->set_special_text_type(BSTT_MATH);
|
||||
part->AddBox(blob);
|
||||
}
|
||||
for (int i = 0; i < digit_blobs; i++, count++) {
|
||||
BLOBNBOX *blob = new BLOBNBOX();
|
||||
BLOBNBOX* blob = new BLOBNBOX();
|
||||
blob->set_special_text_type(BSTT_DIGIT);
|
||||
part->AddBox(blob);
|
||||
}
|
||||
for (int i = count; i < total_blobs; i++) {
|
||||
BLOBNBOX *blob = new BLOBNBOX();
|
||||
BLOBNBOX* blob = new BLOBNBOX();
|
||||
blob->set_special_text_type(BSTT_NONE);
|
||||
part->AddBox(blob);
|
||||
}
|
||||
}
|
||||
|
||||
// Set up pix_binary for lang_tesseract_.
|
||||
void SetPixBinary(Pix *pix) {
|
||||
void SetPixBinary(Pix* pix) {
|
||||
CHECK_EQ(1, pixGetDepth(pix));
|
||||
*(lang_tesseract_->mutable_pix_binary()) = pix;
|
||||
}
|
||||
|
||||
void RunIdentifySpecialText(BLOBNBOX*blob, const int height_th) {
|
||||
void RunIdentifySpecialText(BLOBNBOX* blob, const int height_th) {
|
||||
IdentifySpecialText(blob, height_th);
|
||||
}
|
||||
|
||||
BlobSpecialTextType RunEstimateTypeForUnichar(
|
||||
const char*val) {
|
||||
BlobSpecialTextType RunEstimateTypeForUnichar(const char* val) {
|
||||
const UNICHARSET& unicharset = lang_tesseract_->unicharset;
|
||||
return EstimateTypeForUnichar(unicharset, unicharset.unichar_to_id(val));
|
||||
}
|
||||
|
||||
EquationDetect::IndentType RunIsIndented(
|
||||
ColPartitionGrid* part_grid, ColPartition* part) {
|
||||
EquationDetect::IndentType RunIsIndented(ColPartitionGrid* part_grid,
|
||||
ColPartition* part) {
|
||||
this->part_grid_ = part_grid;
|
||||
return IsIndented(part);
|
||||
}
|
||||
|
||||
bool RunIsNearSmallNeighbor(const TBOX& seed_box,
|
||||
const TBOX& part_box) {
|
||||
bool RunIsNearSmallNeighbor(const TBOX& seed_box, const TBOX& part_box) {
|
||||
return IsNearSmallNeighbor(seed_box, part_box);
|
||||
}
|
||||
|
||||
@ -108,17 +105,15 @@ class EquationFinderTest : public testing::Test {
|
||||
string testdata_dir_;
|
||||
|
||||
void SetUp() {
|
||||
string tessdata_dir = file::JoinPath(
|
||||
FLAGS_test_srcdir, "tessdata");
|
||||
string tessdata_dir = file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
tesseract_.reset(new Tesseract());
|
||||
tesseract_->init_tesseract(tessdata_dir.c_str(), "eng", OEM_TESSERACT_ONLY);
|
||||
tesseract_->set_source_resolution(300);
|
||||
equation_det_.reset(new TestableEquationDetect(
|
||||
tessdata_dir.c_str(), tesseract_.get()));
|
||||
equation_det_.reset(
|
||||
new TestableEquationDetect(tessdata_dir.c_str(), tesseract_.get()));
|
||||
equation_det_->SetResolution(300);
|
||||
|
||||
testdata_dir_ = file::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata");
|
||||
testdata_dir_ = file::JoinPath(FLAGS_test_srcdir, "testdata");
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
@ -127,19 +122,17 @@ class EquationFinderTest : public testing::Test {
|
||||
}
|
||||
|
||||
// Add a BLOCK covering the whole page.
|
||||
void AddPageBlock(Pix* pix,
|
||||
BLOCK_LIST* blocks) {
|
||||
void AddPageBlock(Pix* pix, BLOCK_LIST* blocks) {
|
||||
CHECK(pix != nullptr);
|
||||
CHECK(blocks != nullptr);
|
||||
BLOCK_IT block_it(blocks);
|
||||
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0,
|
||||
pixGetWidth(pix), pixGetHeight(pix));
|
||||
BLOCK* block =
|
||||
new BLOCK("", TRUE, 0, 0, 0, 0, pixGetWidth(pix), pixGetHeight(pix));
|
||||
block_it.add_to_end(block);
|
||||
}
|
||||
|
||||
// Create col partitions, add into part_grid, and put them into all_parts.
|
||||
void CreateColParts(const int rows,
|
||||
const int cols,
|
||||
void CreateColParts(const int rows, const int cols,
|
||||
ColPartitionGrid* part_grid,
|
||||
std::vector<ColPartition*>* all_parts) {
|
||||
const int kWidth = 10, kHeight = 10;
|
||||
@ -148,8 +141,8 @@ class EquationFinderTest : public testing::Test {
|
||||
for (int x = 0; x < cols; ++x) {
|
||||
int left = x * kWidth * 2, bottom = y * kHeight * 2;
|
||||
TBOX box(left, bottom, left + kWidth, bottom + kHeight);
|
||||
ColPartition* part = ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part = ColPartition::FakePartition(box, PT_FLOWING_TEXT,
|
||||
BRT_TEXT, BTFT_NONE);
|
||||
part_grid->InsertBBox(true, true, part);
|
||||
all_parts->push_back(part);
|
||||
}
|
||||
@ -159,15 +152,14 @@ class EquationFinderTest : public testing::Test {
|
||||
void ClearParts(std::vector<ColPartition*>* all_parts) {
|
||||
for (int i = 0; i < all_parts->size(); ++i) {
|
||||
(*all_parts)[i]->DeleteBoxes();
|
||||
delete((*all_parts)[i]);
|
||||
delete ((*all_parts)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Create a BLOBNBOX object with bounding box tbox, and add it into part.
|
||||
void AddBlobIntoPart(const TBOX& tbox,
|
||||
ColPartition* part) {
|
||||
void AddBlobIntoPart(const TBOX& tbox, ColPartition* part) {
|
||||
CHECK(part != nullptr);
|
||||
BLOBNBOX *blob = new BLOBNBOX();
|
||||
BLOBNBOX* blob = new BLOBNBOX();
|
||||
blob->set_bounding_box(tbox);
|
||||
part->AddBox(blob);
|
||||
}
|
||||
@ -176,7 +168,7 @@ class EquationFinderTest : public testing::Test {
|
||||
TEST_F(EquationFinderTest, IdentifySpecialText) {
|
||||
// Load Image.
|
||||
string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif");
|
||||
Pix *pix_binary = pixRead(imagefile.c_str());
|
||||
Pix* pix_binary = pixRead(imagefile.c_str());
|
||||
CHECK(pix_binary != NULL && pixGetDepth(pix_binary) == 1);
|
||||
|
||||
// Get components.
|
||||
@ -251,24 +243,24 @@ TEST_F(EquationFinderTest, IsIndented) {
|
||||
//
|
||||
// part 5: ********
|
||||
TBOX box1(0, 950, 999, 999);
|
||||
ColPartition* part1 = ColPartition::FakePartition(
|
||||
box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part1 =
|
||||
ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part_grid.InsertBBox(true, true, part1);
|
||||
TBOX box2(300, 920, 900, 940);
|
||||
ColPartition* part2 = ColPartition::FakePartition(
|
||||
box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part2 =
|
||||
ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part_grid.InsertBBox(true, true, part2);
|
||||
TBOX box3(0, 900, 600, 910);
|
||||
ColPartition* part3 = ColPartition::FakePartition(
|
||||
box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part3 =
|
||||
ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part_grid.InsertBBox(true, true, part3);
|
||||
TBOX box4(300, 890, 600, 899);
|
||||
ColPartition* part4 = ColPartition::FakePartition(
|
||||
box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part4 =
|
||||
ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part_grid.InsertBBox(true, true, part4);
|
||||
TBOX box5(300, 500, 900, 510);
|
||||
ColPartition* part5 = ColPartition::FakePartition(
|
||||
box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part5 =
|
||||
ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part_grid.InsertBBox(true, true, part5);
|
||||
|
||||
// Test
|
||||
@ -290,15 +282,15 @@ TEST_F(EquationFinderTest, IsIndented) {
|
||||
|
||||
// Release memory.
|
||||
part1->DeleteBoxes();
|
||||
delete(part1);
|
||||
delete (part1);
|
||||
part2->DeleteBoxes();
|
||||
delete(part2);
|
||||
delete (part2);
|
||||
part3->DeleteBoxes();
|
||||
delete(part3);
|
||||
delete (part3);
|
||||
part4->DeleteBoxes();
|
||||
delete(part4);
|
||||
delete (part4);
|
||||
part5->DeleteBoxes();
|
||||
delete(part5);
|
||||
delete (part5);
|
||||
}
|
||||
|
||||
TEST_F(EquationFinderTest, IsNearSmallNeighbor) {
|
||||
@ -332,14 +324,14 @@ TEST_F(EquationFinderTest, IsNearSmallNeighbor) {
|
||||
|
||||
TEST_F(EquationFinderTest, CheckSeedBlobsCount) {
|
||||
TBOX box(0, 950, 999, 999);
|
||||
ColPartition* part1 = ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part2= ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part3 = ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part4 = ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part1 =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part2 =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part3 =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part4 =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
|
||||
// Part 1: 8 math, 0 digit, 20 total.
|
||||
equation_det_->AddMathDigitBlobs(8, 0, 20, part1);
|
||||
@ -359,19 +351,19 @@ TEST_F(EquationFinderTest, CheckSeedBlobsCount) {
|
||||
|
||||
// Release memory.
|
||||
part1->DeleteBoxes();
|
||||
delete(part1);
|
||||
delete (part1);
|
||||
part2->DeleteBoxes();
|
||||
delete(part2);
|
||||
delete (part2);
|
||||
part3->DeleteBoxes();
|
||||
delete(part3);
|
||||
delete (part3);
|
||||
part4->DeleteBoxes();
|
||||
delete(part4);
|
||||
delete (part4);
|
||||
}
|
||||
|
||||
TEST_F(EquationFinderTest, ComputeForegroundDensity) {
|
||||
// Create the pix with top half foreground, bottom half background.
|
||||
int width = 1024, height = 768;
|
||||
Pix *pix = pixCreate(width, height, 1);
|
||||
Pix* pix = pixCreate(width, height, 1);
|
||||
pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, NULL, 0, 0);
|
||||
TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20),
|
||||
box3(100, height - 40, 140, height);
|
||||
@ -414,20 +406,20 @@ TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
|
||||
ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000));
|
||||
|
||||
TBOX box1(0, 0, 999, 99);
|
||||
ColPartition* part1 = ColPartition::FakePartition(
|
||||
box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part1 =
|
||||
ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
TBOX box2(0, 100, 499, 199);
|
||||
ColPartition* part2 = ColPartition::FakePartition(
|
||||
box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part2 =
|
||||
ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
TBOX box3(500, 100, 999, 199);
|
||||
ColPartition* part3 = ColPartition::FakePartition(
|
||||
box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part3 =
|
||||
ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
TBOX box4(0, 200, 999, 299);
|
||||
ColPartition* part4 = ColPartition::FakePartition(
|
||||
box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part4 =
|
||||
ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
TBOX box5(0, 900, 999, 999);
|
||||
ColPartition* part5 = ColPartition::FakePartition(
|
||||
box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part5 =
|
||||
ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
|
||||
// Add part1->part3 into part_grid and test.
|
||||
part_grid.InsertBBox(true, true, part1);
|
||||
@ -448,21 +440,21 @@ TEST_F(EquationFinderTest, ComputeCPsSuperBBox) {
|
||||
|
||||
// Release memory.
|
||||
part1->DeleteBoxes();
|
||||
delete(part1);
|
||||
delete (part1);
|
||||
part2->DeleteBoxes();
|
||||
delete(part2);
|
||||
delete (part2);
|
||||
part3->DeleteBoxes();
|
||||
delete(part3);
|
||||
delete (part3);
|
||||
part4->DeleteBoxes();
|
||||
delete(part4);
|
||||
delete (part4);
|
||||
part5->DeleteBoxes();
|
||||
delete(part5);
|
||||
delete (part5);
|
||||
}
|
||||
|
||||
TEST_F(EquationFinderTest, SplitCPHorLite) {
|
||||
TBOX box(0, 0, 999, 99);
|
||||
ColPartition* part = ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part->DeleteBoxes();
|
||||
part->set_median_width(10);
|
||||
GenericVector<TBOX> splitted_boxes;
|
||||
@ -491,13 +483,13 @@ TEST_F(EquationFinderTest, SplitCPHorLite) {
|
||||
EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]);
|
||||
|
||||
part->DeleteBoxes();
|
||||
delete(part);
|
||||
delete (part);
|
||||
}
|
||||
|
||||
TEST_F(EquationFinderTest, SplitCPHor) {
|
||||
TBOX box(0, 0, 999, 99);
|
||||
ColPartition* part = ColPartition::FakePartition(
|
||||
box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part->DeleteBoxes();
|
||||
part->set_median_width(10);
|
||||
GenericVector<ColPartition*> parts_splitted;
|
||||
@ -528,7 +520,7 @@ TEST_F(EquationFinderTest, SplitCPHor) {
|
||||
|
||||
parts_splitted.delete_data_pointers();
|
||||
part->DeleteBoxes();
|
||||
delete(part);
|
||||
delete (part);
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -18,8 +18,7 @@ TEST(FileTest, JoinPath) {
|
||||
TEST(OutputBufferTest, WriteString) {
|
||||
const int kMaxBufSize = 128;
|
||||
char buffer[kMaxBufSize];
|
||||
for (int i = 0; i < kMaxBufSize; ++i)
|
||||
buffer[i] = '\0';
|
||||
for (int i = 0; i < kMaxBufSize; ++i) buffer[i] = '\0';
|
||||
FILE* fp = fmemopen(buffer, kMaxBufSize, "w");
|
||||
CHECK(fp != nullptr);
|
||||
|
||||
|
@ -21,12 +21,12 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
int test_data[] = { 8, 1, 2, -4, 7, 9, 65536, 4, 9, 0};
|
||||
int test_data[] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0};
|
||||
|
||||
// The fixture for testing GenericHeap and DoublePtr.
|
||||
class HeapTest : public testing::Test {
|
||||
public:
|
||||
virtual ~HeapTest();
|
||||
virtual ~HeapTest();
|
||||
// Pushes the test data onto both the heap and the KDVector.
|
||||
void PushTestData(GenericHeap<IntKDPair>* heap, KDVector* v) {
|
||||
for (int i = 0; i < ARRAYSIZE(test_data); ++i) {
|
||||
|
@ -12,8 +12,7 @@ namespace {
|
||||
|
||||
class ImagedataTest : public ::testing::Test {
|
||||
protected:
|
||||
ImagedataTest() {
|
||||
}
|
||||
ImagedataTest() {}
|
||||
|
||||
// Creates a fake DocumentData, writes it to a file, and returns the filename.
|
||||
string MakeFakeDoc(int num_pages, int doc_id,
|
||||
@ -51,7 +50,7 @@ TEST_F(ImagedataTest, CachesProperly) {
|
||||
// Allowances to read the document. Big enough for 1, 3, 0, all pages.
|
||||
const int kMemoryAllowances[] = {2000000, 4000000, 1000000, 100000000, 0};
|
||||
// Order in which to read the pages, with some sequential and some seeks.
|
||||
const int kPageReadOrder[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, 11, 10, 9, -1 };
|
||||
const int kPageReadOrder[] = {0, 1, 2, 3, 8, 4, 5, 6, 7, 11, 10, 9, -1};
|
||||
|
||||
std::vector<string> page_texts;
|
||||
string filename = MakeFakeDoc(kNumPages, 0, &page_texts);
|
||||
|
@ -13,14 +13,13 @@
|
||||
#ifndef TESSERACT_UNITTEST_INCLUDE_GUNIT_H_
|
||||
#define TESSERACT_UNITTEST_INCLUDE_GUNIT_H_
|
||||
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#include "fileio.h" // for tesseract::File
|
||||
#include "gtest/gtest.h"
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
#include "fileio.h" // for tesseract::File
|
||||
|
||||
const char* FLAGS_test_tmpdir = ".";
|
||||
|
||||
class file: public tesseract::File {
|
||||
};
|
||||
class file : public tesseract::File {};
|
||||
|
||||
#define ABSL_ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
#define ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0]))
|
||||
|
@ -34,13 +34,11 @@ class IndexMapBiDiTest : public testing::Test {
|
||||
map->Init(kPrimeLimit + 1, false);
|
||||
map->SetMap(2, true);
|
||||
// Set all the odds to true.
|
||||
for (int i = 3; i <= kPrimeLimit; i += 2)
|
||||
map->SetMap(i, true);
|
||||
for (int i = 3; i <= kPrimeLimit; i += 2) map->SetMap(i, true);
|
||||
int factor_limit = static_cast<int>(sqrt(1.0 + kPrimeLimit));
|
||||
for (int f = 3; f <= factor_limit; f += 2) {
|
||||
if (map->SparseToCompact(f) >= 0) {
|
||||
for (int m = 2; m * f <= kPrimeLimit; ++m)
|
||||
map->SetMap(f * m, false);
|
||||
for (int m = 2; m * f <= kPrimeLimit; ++m) map->SetMap(f * m, false);
|
||||
}
|
||||
}
|
||||
map->Setup();
|
||||
|
@ -74,8 +74,8 @@ TEST_F(IntFeatureMapTest, Exhaustive) {
|
||||
int dtheta = kIntFeatureExtent / kThetaBuckets + 1;
|
||||
int bad_offsets = 0;
|
||||
for (int index = 0; index < total_buckets; ++index) {
|
||||
for (int dir = -tesseract::kNumOffsetMaps;
|
||||
dir <= tesseract::kNumOffsetMaps; ++dir) {
|
||||
for (int dir = -tesseract::kNumOffsetMaps; dir <= tesseract::kNumOffsetMaps;
|
||||
++dir) {
|
||||
int offset_index = map.OffsetFeature(index, dir);
|
||||
if (dir == 0) {
|
||||
EXPECT_EQ(index, offset_index);
|
||||
|
@ -7,8 +7,7 @@ namespace tesseract {
|
||||
namespace {
|
||||
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata", name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata", name);
|
||||
}
|
||||
|
||||
// This is an integration test that verifies that CombineLangModel works to
|
||||
|
@ -13,35 +13,27 @@
|
||||
namespace {
|
||||
|
||||
using tesseract::MutableIterator;
|
||||
using tesseract::ResultIterator;
|
||||
using tesseract::PageIteratorLevel;
|
||||
using tesseract::ResultIterator;
|
||||
|
||||
const char* kStrings8087_054[] = {
|
||||
"dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", NULL
|
||||
};
|
||||
const PolyBlockType kBlocks8087_054[] = {
|
||||
PT_HEADING_TEXT, PT_FLOWING_TEXT, PT_PULLOUT_IMAGE,
|
||||
PT_CAPTION_TEXT, PT_FLOWING_TEXT
|
||||
};
|
||||
"dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", NULL};
|
||||
const PolyBlockType kBlocks8087_054[] = {PT_HEADING_TEXT, PT_FLOWING_TEXT,
|
||||
PT_PULLOUT_IMAGE, PT_CAPTION_TEXT,
|
||||
PT_FLOWING_TEXT};
|
||||
|
||||
// The fixture for testing Tesseract.
|
||||
class LayoutTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessdataPath() {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"tessdata");
|
||||
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
}
|
||||
|
||||
LayoutTest() {
|
||||
src_pix_ = NULL;
|
||||
}
|
||||
~LayoutTest() {
|
||||
pixDestroy(&src_pix_);
|
||||
}
|
||||
LayoutTest() { src_pix_ = NULL; }
|
||||
~LayoutTest() { pixDestroy(&src_pix_); }
|
||||
|
||||
void SetImage(const char* filename, const char* lang) {
|
||||
pixDestroy(&src_pix_);
|
||||
@ -56,16 +48,14 @@ class LayoutTest : public testing::Test {
|
||||
// allowing for other blocks in between.
|
||||
// An empty string should match an image block, and a NULL string
|
||||
// indicates the end of the array.
|
||||
void VerifyBlockTextOrder(const char* strings[],
|
||||
const PolyBlockType* blocks,
|
||||
void VerifyBlockTextOrder(const char* strings[], const PolyBlockType* blocks,
|
||||
ResultIterator* it) {
|
||||
it->Begin();
|
||||
int string_index = 0;
|
||||
int block_index = 0;
|
||||
do {
|
||||
char* block_text = it->GetUTF8Text(tesseract::RIL_BLOCK);
|
||||
if (block_text != NULL &&
|
||||
it->BlockType() == blocks[string_index] &&
|
||||
if (block_text != NULL && it->BlockType() == blocks[string_index] &&
|
||||
strstr(block_text, strings[string_index]) != NULL) {
|
||||
VLOG(1) << StringPrintf("Found string %s in block %d of type %s",
|
||||
strings[string_index], block_index,
|
||||
@ -74,19 +64,18 @@ class LayoutTest : public testing::Test {
|
||||
++string_index;
|
||||
} else if (it->BlockType() == blocks[string_index] &&
|
||||
block_text == NULL && strings[string_index][0] == '\0') {
|
||||
VLOG(1) << StringPrintf("Found block of type %s at block %d",
|
||||
kPolyBlockNames[blocks[string_index]],
|
||||
block_index);
|
||||
// Found this one.
|
||||
++string_index;
|
||||
VLOG(1) << StringPrintf("Found block of type %s at block %d",
|
||||
kPolyBlockNames[blocks[string_index]],
|
||||
block_index);
|
||||
// Found this one.
|
||||
++string_index;
|
||||
} else {
|
||||
VLOG(1) << StringPrintf("No match found in block with text:\n%s",
|
||||
block_text);
|
||||
}
|
||||
delete [] block_text;
|
||||
delete[] block_text;
|
||||
++block_index;
|
||||
if (strings[string_index] == NULL)
|
||||
break;
|
||||
if (strings[string_index] == NULL) break;
|
||||
} while (it->Next(tesseract::RIL_BLOCK));
|
||||
EXPECT_TRUE(strings[string_index] == NULL);
|
||||
}
|
||||
@ -105,8 +94,8 @@ class LayoutTest : public testing::Test {
|
||||
do {
|
||||
int left, top, right, bottom;
|
||||
if (it->BoundingBox(tesseract::RIL_BLOCK, &left, &top, &right, &bottom) &&
|
||||
PTIsTextType(it->BlockType()) &&
|
||||
right - left > 800 && bottom - top > 200) {
|
||||
PTIsTextType(it->BlockType()) && right - left > 800 &&
|
||||
bottom - top > 200) {
|
||||
if (prev_right > prev_left) {
|
||||
if (min(right, prev_right) > max(left, prev_left)) {
|
||||
EXPECT_GE(top, prev_bottom) << "Overlapping block should be below";
|
||||
@ -134,8 +123,8 @@ class LayoutTest : public testing::Test {
|
||||
do {
|
||||
int left, top, right, bottom;
|
||||
if (it->BoundingBox(tesseract::RIL_BLOCK, &left, &top, &right, &bottom) &&
|
||||
PTIsTextType(it->BlockType()) &&
|
||||
right - left > 800 && bottom - top > 200 ) {
|
||||
PTIsTextType(it->BlockType()) && right - left > 800 &&
|
||||
bottom - top > 200) {
|
||||
const PAGE_RES_IT* pr_it = it->PageResIt();
|
||||
POLY_BLOCK* pb = pr_it->block()->block->poly_block();
|
||||
CHECK(pb != NULL);
|
||||
@ -148,11 +137,11 @@ class LayoutTest : public testing::Test {
|
||||
const PAGE_RES_IT* w_it = word_it.PageResIt();
|
||||
// Iterate the blobs in the word.
|
||||
C_BLOB_IT b_it(w_it->word()->word->cblob_list());
|
||||
for (b_it.mark_cycle_pt();!b_it.cycled_list(); b_it.forward()) {
|
||||
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
|
||||
C_BLOB* blob = b_it.data();
|
||||
// Iterate the outlines in the blob.
|
||||
C_OUTLINE_IT ol_it(blob->out_list());
|
||||
for (ol_it.mark_cycle_pt();!ol_it.cycled_list(); ol_it.forward()) {
|
||||
for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
|
||||
C_OUTLINE* ol = ol_it.data();
|
||||
TBOX box = ol->bounding_box();
|
||||
ICOORD middle((box.left() + box.right()) / 2,
|
||||
|
@ -25,13 +25,10 @@ const char kRenderableEngLigatureText[] = "fidelity effigy ſteep";
|
||||
class LigatureTableTest : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase() {
|
||||
FLAGS_fonts_dir = File::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
|
||||
}
|
||||
void SetUp() {
|
||||
lig_table_ = LigatureTable::Get();
|
||||
}
|
||||
void SetUp() { lig_table_ = LigatureTable::Get(); }
|
||||
LigatureTable* lig_table_;
|
||||
};
|
||||
|
||||
@ -66,11 +63,8 @@ TEST_F(LigatureTableTest, DoesRemoveLigatures) {
|
||||
|
||||
TEST_F(LigatureTableTest, TestCustomLigatures) {
|
||||
const char* kTestCases[] = {
|
||||
"act", "a\uE003",
|
||||
"publiſh", "publi\uE006",
|
||||
"ſince", "\uE007nce",
|
||||
"aſleep", "a\uE008eep",
|
||||
"neceſſary", "nece\uE009ary",
|
||||
"act", "a\uE003", "publiſh", "publi\uE006", "ſince",
|
||||
"\uE007nce", "aſleep", "a\uE008eep", "neceſſary", "nece\uE009ary",
|
||||
};
|
||||
for (int i = 0; i < ARRAYSIZE(kTestCases); i += 2) {
|
||||
EXPECT_STREQ(kTestCases[i + 1],
|
||||
@ -84,7 +78,9 @@ TEST_F(LigatureTableTest, TestCustomLigatures) {
|
||||
|
||||
TEST_F(LigatureTableTest, TestRemovesCustomLigatures) {
|
||||
const char* kTestCases[] = {
|
||||
"fiction", "fi\uE003ion", "fiction",
|
||||
"fiction",
|
||||
"fi\uE003ion",
|
||||
"fiction",
|
||||
};
|
||||
for (int i = 0; i < ARRAYSIZE(kTestCases); i += 3) {
|
||||
EXPECT_STREQ(kTestCases[i + 1],
|
||||
@ -93,4 +89,4 @@ TEST_F(LigatureTableTest, TestRemovesCustomLigatures) {
|
||||
lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -17,11 +17,9 @@ namespace {
|
||||
|
||||
class LLSQTest : public testing::Test {
|
||||
public:
|
||||
void SetUp() {
|
||||
}
|
||||
void SetUp() {}
|
||||
|
||||
void TearDown() {
|
||||
}
|
||||
void TearDown() {}
|
||||
|
||||
void ExpectCorrectLine(const LLSQ& llsq, double m, double c, double rms,
|
||||
double pearson, double tolerance) {
|
||||
@ -30,14 +28,14 @@ class LLSQTest : public testing::Test {
|
||||
EXPECT_NEAR(rms, llsq.rms(llsq.m(), llsq.c(llsq.m())), tolerance);
|
||||
EXPECT_NEAR(pearson, llsq.pearson(), tolerance);
|
||||
}
|
||||
FCOORD PtsMean(const std::vector<FCOORD> &pts) {
|
||||
FCOORD PtsMean(const std::vector<FCOORD>& pts) {
|
||||
FCOORD total(0, 0);
|
||||
for (int i = 0; i < pts.size(); i++) {
|
||||
total += pts[i];
|
||||
}
|
||||
return (pts.size() > 0) ? total / pts.size() : total;
|
||||
}
|
||||
void VerifyRmsOrth(const std::vector<FCOORD> &pts, const FCOORD &orth) {
|
||||
void VerifyRmsOrth(const std::vector<FCOORD>& pts, const FCOORD& orth) {
|
||||
LLSQ llsq;
|
||||
FCOORD xavg = PtsMean(pts);
|
||||
FCOORD nvec = !orth;
|
||||
@ -79,13 +77,11 @@ TEST_F(LLSQTest, BasicLines) {
|
||||
// The point at 1,2 pulls the result away from what would otherwise be a
|
||||
// perfect fit to a horizontal line by 0.25 unit, with rms error of 0.433.
|
||||
ExpectCorrectLine(llsq, 0.0, 1.25, 0.433, 0.0, 1e-2);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 1.25f),
|
||||
FCOORD(1.0f, 0.0f), 1e-3);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 1.25f), FCOORD(1.0f, 0.0f), 1e-3);
|
||||
llsq.add(1.0, 2.0, 10.0);
|
||||
// With a heavy weight, the point at 1,2 pulls the line nearer.
|
||||
ExpectCorrectLine(llsq, 0.0, 1.786, 0.41, 0.0, 1e-2);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 1.786f),
|
||||
FCOORD(1.0f, 0.0f), 1e-3);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 1.786f), FCOORD(1.0f, 0.0f), 1e-3);
|
||||
}
|
||||
|
||||
// Tests a simple baseline-style normalization with a rotation.
|
||||
@ -93,14 +89,12 @@ TEST_F(LLSQTest, Vectors) {
|
||||
LLSQ llsq;
|
||||
llsq.add(1.0, 1.0);
|
||||
llsq.add(1.0, -1.0);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f),
|
||||
FCOORD(0.0f, 1.0f), 1e-6);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-6);
|
||||
llsq.add(0.9, -2.0);
|
||||
llsq.add(1.1, -3.0);
|
||||
llsq.add(0.9, 2.0);
|
||||
llsq.add(1.10001, 3.0);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f),
|
||||
FCOORD(0.0f, 1.0f), 1e-3);
|
||||
ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-3);
|
||||
}
|
||||
|
||||
// Verify that rms_orth() actually calculates:
|
||||
@ -112,10 +106,10 @@ TEST_F(LLSQTest, RmsOrthWorksAsIntended) {
|
||||
pts.push_back(FCOORD(0.13, 0.77));
|
||||
pts.push_back(FCOORD(0.16, 0.83));
|
||||
pts.push_back(FCOORD(0.45, 0.79));
|
||||
VerifyRmsOrth(pts, FCOORD(1,0));
|
||||
VerifyRmsOrth(pts, FCOORD(1,1));
|
||||
VerifyRmsOrth(pts, FCOORD(1,2));
|
||||
VerifyRmsOrth(pts, FCOORD(2,1));
|
||||
VerifyRmsOrth(pts, FCOORD(1, 0));
|
||||
VerifyRmsOrth(pts, FCOORD(1, 1));
|
||||
VerifyRmsOrth(pts, FCOORD(1, 2));
|
||||
VerifyRmsOrth(pts, FCOORD(2, 1));
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
@ -2,8 +2,8 @@
|
||||
// File: loadlang_test.cc
|
||||
// Description: Test loading of All languages and Scripts for Tesseract.
|
||||
// Tests for All languages and scripts are Disabled by default.
|
||||
// Force the disabled test to run if required by using the --gtest_also_run_disabled_tests argument.
|
||||
// Author: Shree Devi Kumar
|
||||
// Force the disabled test to run if required by using the
|
||||
// --gtest_also_run_disabled_tests argument. Author: Shree Devi Kumar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -16,230 +16,235 @@
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "include_gunit.h"
|
||||
#include "baseapi.h"
|
||||
#include <time.h>
|
||||
#include "baseapi.h"
|
||||
#include "include_gunit.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class QuickTest : public testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
start_time_ = time(nullptr);
|
||||
}
|
||||
virtual void SetUp() { start_time_ = time(nullptr); }
|
||||
virtual void TearDown() {
|
||||
const time_t end_time = time(nullptr);
|
||||
EXPECT_TRUE(end_time - start_time_ <=25) << "The test took too long - " << ::testing::PrintToString(end_time - start_time_);
|
||||
EXPECT_TRUE(end_time - start_time_ <= 25)
|
||||
<< "The test took too long - "
|
||||
<< ::testing::PrintToString(end_time - start_time_);
|
||||
}
|
||||
time_t start_time_;
|
||||
};
|
||||
};
|
||||
|
||||
void LangLoader(const char* lang, const char* tessdatadir) {
|
||||
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract for $lang.";
|
||||
api->End();
|
||||
}
|
||||
void LangLoader(const char* lang, const char* tessdatadir) {
|
||||
tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang))
|
||||
<< "Could not initialize tesseract for $lang.";
|
||||
api->End();
|
||||
}
|
||||
|
||||
// For all languages
|
||||
|
||||
class LoadLanguage : public QuickTest ,
|
||||
public ::testing::WithParamInterface<const char*> {
|
||||
};
|
||||
class LoadLanguage : public QuickTest,
|
||||
public ::testing::WithParamInterface<const char*> {};
|
||||
|
||||
TEST_P(LoadLanguage, afr) {LangLoader("afr" , GetParam());}
|
||||
TEST_P(LoadLanguage, amh) {LangLoader("amh" , GetParam());}
|
||||
TEST_P(LoadLanguage, ara) {LangLoader("ara" , GetParam());}
|
||||
TEST_P(LoadLanguage, asm) {LangLoader("asm" , GetParam());}
|
||||
TEST_P(LoadLanguage, aze) {LangLoader("aze" , GetParam());}
|
||||
TEST_P(LoadLanguage, aze_cyrl) {LangLoader("aze_cyrl" , GetParam());}
|
||||
TEST_P(LoadLanguage, bel) {LangLoader("bel" , GetParam());}
|
||||
TEST_P(LoadLanguage, ben) {LangLoader("ben" , GetParam());}
|
||||
TEST_P(LoadLanguage, bod) {LangLoader("bod" , GetParam());}
|
||||
TEST_P(LoadLanguage, bos) {LangLoader("bos" , GetParam());}
|
||||
TEST_P(LoadLanguage, bre) {LangLoader("bre" , GetParam());}
|
||||
TEST_P(LoadLanguage, bul) {LangLoader("bul" , GetParam());}
|
||||
TEST_P(LoadLanguage, cat) {LangLoader("cat" , GetParam());}
|
||||
TEST_P(LoadLanguage, ceb) {LangLoader("ceb" , GetParam());}
|
||||
TEST_P(LoadLanguage, ces) {LangLoader("ces" , GetParam());}
|
||||
TEST_P(LoadLanguage, chi_sim) {LangLoader("chi_sim" , GetParam());}
|
||||
TEST_P(LoadLanguage, chi_sim_vert) {LangLoader("chi_sim_vert" , GetParam());}
|
||||
TEST_P(LoadLanguage, chi_tra) {LangLoader("chi_tra" , GetParam());}
|
||||
TEST_P(LoadLanguage, chi_tra_vert) {LangLoader("chi_tra_vert" , GetParam());}
|
||||
TEST_P(LoadLanguage, chr) {LangLoader("chr" , GetParam());}
|
||||
TEST_P(LoadLanguage, cos) {LangLoader("cos" , GetParam());}
|
||||
TEST_P(LoadLanguage, cym) {LangLoader("cym" , GetParam());}
|
||||
TEST_P(LoadLanguage, dan) {LangLoader("dan" , GetParam());}
|
||||
TEST_P(LoadLanguage, deu) {LangLoader("deu" , GetParam());}
|
||||
TEST_P(LoadLanguage, div) {LangLoader("div" , GetParam());}
|
||||
TEST_P(LoadLanguage, dzo) {LangLoader("dzo" , GetParam());}
|
||||
TEST_P(LoadLanguage, ell) {LangLoader("ell" , GetParam());}
|
||||
TEST_P(LoadLanguage, eng) {LangLoader("eng" , GetParam());}
|
||||
TEST_P(LoadLanguage, enm) {LangLoader("enm" , GetParam());}
|
||||
TEST_P(LoadLanguage, epo) {LangLoader("epo" , GetParam());}
|
||||
TEST_P(LoadLanguage, est) {LangLoader("est" , GetParam());}
|
||||
TEST_P(LoadLanguage, eus) {LangLoader("eus" , GetParam());}
|
||||
TEST_P(LoadLanguage, fao) {LangLoader("fao" , GetParam());}
|
||||
TEST_P(LoadLanguage, fas) {LangLoader("fas" , GetParam());}
|
||||
TEST_P(LoadLanguage, fil) {LangLoader("fil" , GetParam());}
|
||||
TEST_P(LoadLanguage, fin) {LangLoader("fin" , GetParam());}
|
||||
TEST_P(LoadLanguage, fra) {LangLoader("fra" , GetParam());}
|
||||
TEST_P(LoadLanguage, frk) {LangLoader("frk" , GetParam());}
|
||||
TEST_P(LoadLanguage, frm) {LangLoader("frm" , GetParam());}
|
||||
TEST_P(LoadLanguage, fry) {LangLoader("fry" , GetParam());}
|
||||
TEST_P(LoadLanguage, gla) {LangLoader("gla" , GetParam());}
|
||||
TEST_P(LoadLanguage, gle) {LangLoader("gle" , GetParam());}
|
||||
TEST_P(LoadLanguage, glg) {LangLoader("glg" , GetParam());}
|
||||
TEST_P(LoadLanguage, grc) {LangLoader("grc" , GetParam());}
|
||||
TEST_P(LoadLanguage, guj) {LangLoader("guj" , GetParam());}
|
||||
TEST_P(LoadLanguage, hat) {LangLoader("hat" , GetParam());}
|
||||
TEST_P(LoadLanguage, heb) {LangLoader("heb" , GetParam());}
|
||||
TEST_P(LoadLanguage, hin) {LangLoader("hin" , GetParam());}
|
||||
TEST_P(LoadLanguage, hrv) {LangLoader("hrv" , GetParam());}
|
||||
TEST_P(LoadLanguage, hun) {LangLoader("hun" , GetParam());}
|
||||
TEST_P(LoadLanguage, hye) {LangLoader("hye" , GetParam());}
|
||||
TEST_P(LoadLanguage, iku) {LangLoader("iku" , GetParam());}
|
||||
TEST_P(LoadLanguage, ind) {LangLoader("ind" , GetParam());}
|
||||
TEST_P(LoadLanguage, isl) {LangLoader("isl" , GetParam());}
|
||||
TEST_P(LoadLanguage, ita) {LangLoader("ita" , GetParam());}
|
||||
TEST_P(LoadLanguage, ita_old) {LangLoader("ita_old" , GetParam());}
|
||||
TEST_P(LoadLanguage, jav) {LangLoader("jav" , GetParam());}
|
||||
TEST_P(LoadLanguage, jpn) {LangLoader("jpn" , GetParam());}
|
||||
TEST_P(LoadLanguage, jpn_vert) {LangLoader("jpn_vert" , GetParam());}
|
||||
TEST_P(LoadLanguage, kan) {LangLoader("kan" , GetParam());}
|
||||
TEST_P(LoadLanguage, kat) {LangLoader("kat" , GetParam());}
|
||||
TEST_P(LoadLanguage, kat_old) {LangLoader("kat_old" , GetParam());}
|
||||
TEST_P(LoadLanguage, kaz) {LangLoader("kaz" , GetParam());}
|
||||
TEST_P(LoadLanguage, khm) {LangLoader("khm" , GetParam());}
|
||||
TEST_P(LoadLanguage, kir) {LangLoader("kir" , GetParam());}
|
||||
TEST_P(LoadLanguage, afr) { LangLoader("afr", GetParam()); }
|
||||
TEST_P(LoadLanguage, amh) { LangLoader("amh", GetParam()); }
|
||||
TEST_P(LoadLanguage, ara) { LangLoader("ara", GetParam()); }
|
||||
TEST_P(LoadLanguage, asm) { LangLoader("asm", GetParam()); }
|
||||
TEST_P(LoadLanguage, aze) { LangLoader("aze", GetParam()); }
|
||||
TEST_P(LoadLanguage, aze_cyrl) { LangLoader("aze_cyrl", GetParam()); }
|
||||
TEST_P(LoadLanguage, bel) { LangLoader("bel", GetParam()); }
|
||||
TEST_P(LoadLanguage, ben) { LangLoader("ben", GetParam()); }
|
||||
TEST_P(LoadLanguage, bod) { LangLoader("bod", GetParam()); }
|
||||
TEST_P(LoadLanguage, bos) { LangLoader("bos", GetParam()); }
|
||||
TEST_P(LoadLanguage, bre) { LangLoader("bre", GetParam()); }
|
||||
TEST_P(LoadLanguage, bul) { LangLoader("bul", GetParam()); }
|
||||
TEST_P(LoadLanguage, cat) { LangLoader("cat", GetParam()); }
|
||||
TEST_P(LoadLanguage, ceb) { LangLoader("ceb", GetParam()); }
|
||||
TEST_P(LoadLanguage, ces) { LangLoader("ces", GetParam()); }
|
||||
TEST_P(LoadLanguage, chi_sim) { LangLoader("chi_sim", GetParam()); }
|
||||
TEST_P(LoadLanguage, chi_sim_vert) { LangLoader("chi_sim_vert", GetParam()); }
|
||||
TEST_P(LoadLanguage, chi_tra) { LangLoader("chi_tra", GetParam()); }
|
||||
TEST_P(LoadLanguage, chi_tra_vert) { LangLoader("chi_tra_vert", GetParam()); }
|
||||
TEST_P(LoadLanguage, chr) { LangLoader("chr", GetParam()); }
|
||||
TEST_P(LoadLanguage, cos) { LangLoader("cos", GetParam()); }
|
||||
TEST_P(LoadLanguage, cym) { LangLoader("cym", GetParam()); }
|
||||
TEST_P(LoadLanguage, dan) { LangLoader("dan", GetParam()); }
|
||||
TEST_P(LoadLanguage, deu) { LangLoader("deu", GetParam()); }
|
||||
TEST_P(LoadLanguage, div) { LangLoader("div", GetParam()); }
|
||||
TEST_P(LoadLanguage, dzo) { LangLoader("dzo", GetParam()); }
|
||||
TEST_P(LoadLanguage, ell) { LangLoader("ell", GetParam()); }
|
||||
TEST_P(LoadLanguage, eng) { LangLoader("eng", GetParam()); }
|
||||
TEST_P(LoadLanguage, enm) { LangLoader("enm", GetParam()); }
|
||||
TEST_P(LoadLanguage, epo) { LangLoader("epo", GetParam()); }
|
||||
TEST_P(LoadLanguage, est) { LangLoader("est", GetParam()); }
|
||||
TEST_P(LoadLanguage, eus) { LangLoader("eus", GetParam()); }
|
||||
TEST_P(LoadLanguage, fao) { LangLoader("fao", GetParam()); }
|
||||
TEST_P(LoadLanguage, fas) { LangLoader("fas", GetParam()); }
|
||||
TEST_P(LoadLanguage, fil) { LangLoader("fil", GetParam()); }
|
||||
TEST_P(LoadLanguage, fin) { LangLoader("fin", GetParam()); }
|
||||
TEST_P(LoadLanguage, fra) { LangLoader("fra", GetParam()); }
|
||||
TEST_P(LoadLanguage, frk) { LangLoader("frk", GetParam()); }
|
||||
TEST_P(LoadLanguage, frm) { LangLoader("frm", GetParam()); }
|
||||
TEST_P(LoadLanguage, fry) { LangLoader("fry", GetParam()); }
|
||||
TEST_P(LoadLanguage, gla) { LangLoader("gla", GetParam()); }
|
||||
TEST_P(LoadLanguage, gle) { LangLoader("gle", GetParam()); }
|
||||
TEST_P(LoadLanguage, glg) { LangLoader("glg", GetParam()); }
|
||||
TEST_P(LoadLanguage, grc) { LangLoader("grc", GetParam()); }
|
||||
TEST_P(LoadLanguage, guj) { LangLoader("guj", GetParam()); }
|
||||
TEST_P(LoadLanguage, hat) { LangLoader("hat", GetParam()); }
|
||||
TEST_P(LoadLanguage, heb) { LangLoader("heb", GetParam()); }
|
||||
TEST_P(LoadLanguage, hin) { LangLoader("hin", GetParam()); }
|
||||
TEST_P(LoadLanguage, hrv) { LangLoader("hrv", GetParam()); }
|
||||
TEST_P(LoadLanguage, hun) { LangLoader("hun", GetParam()); }
|
||||
TEST_P(LoadLanguage, hye) { LangLoader("hye", GetParam()); }
|
||||
TEST_P(LoadLanguage, iku) { LangLoader("iku", GetParam()); }
|
||||
TEST_P(LoadLanguage, ind) { LangLoader("ind", GetParam()); }
|
||||
TEST_P(LoadLanguage, isl) { LangLoader("isl", GetParam()); }
|
||||
TEST_P(LoadLanguage, ita) { LangLoader("ita", GetParam()); }
|
||||
TEST_P(LoadLanguage, ita_old) { LangLoader("ita_old", GetParam()); }
|
||||
TEST_P(LoadLanguage, jav) { LangLoader("jav", GetParam()); }
|
||||
TEST_P(LoadLanguage, jpn) { LangLoader("jpn", GetParam()); }
|
||||
TEST_P(LoadLanguage, jpn_vert) { LangLoader("jpn_vert", GetParam()); }
|
||||
TEST_P(LoadLanguage, kan) { LangLoader("kan", GetParam()); }
|
||||
TEST_P(LoadLanguage, kat) { LangLoader("kat", GetParam()); }
|
||||
TEST_P(LoadLanguage, kat_old) { LangLoader("kat_old", GetParam()); }
|
||||
TEST_P(LoadLanguage, kaz) { LangLoader("kaz", GetParam()); }
|
||||
TEST_P(LoadLanguage, khm) { LangLoader("khm", GetParam()); }
|
||||
TEST_P(LoadLanguage, kir) { LangLoader("kir", GetParam()); }
|
||||
// TEST_P(LoadLanguage, kmr) {LangLoader("kmr" , GetParam());}
|
||||
TEST_P(LoadLanguage, kor) {LangLoader("kor" , GetParam());}
|
||||
TEST_P(LoadLanguage, kor_vert) {LangLoader("kor_vert" , GetParam());}
|
||||
TEST_P(LoadLanguage, lao) {LangLoader("lao" , GetParam());}
|
||||
TEST_P(LoadLanguage, lat) {LangLoader("lat" , GetParam());}
|
||||
TEST_P(LoadLanguage, lav) {LangLoader("lav" , GetParam());}
|
||||
TEST_P(LoadLanguage, lit) {LangLoader("lit" , GetParam());}
|
||||
TEST_P(LoadLanguage, ltz) {LangLoader("ltz" , GetParam());}
|
||||
TEST_P(LoadLanguage, mal) {LangLoader("mal" , GetParam());}
|
||||
TEST_P(LoadLanguage, mar) {LangLoader("mar" , GetParam());}
|
||||
TEST_P(LoadLanguage, mkd) {LangLoader("mkd" , GetParam());}
|
||||
TEST_P(LoadLanguage, mlt) {LangLoader("mlt" , GetParam());}
|
||||
TEST_P(LoadLanguage, mon) {LangLoader("mon" , GetParam());}
|
||||
TEST_P(LoadLanguage, mri) {LangLoader("mri" , GetParam());}
|
||||
TEST_P(LoadLanguage, msa) {LangLoader("msa" , GetParam());}
|
||||
TEST_P(LoadLanguage, mya) {LangLoader("mya" , GetParam());}
|
||||
TEST_P(LoadLanguage, nep) {LangLoader("nep" , GetParam());}
|
||||
TEST_P(LoadLanguage, nld) {LangLoader("nld" , GetParam());}
|
||||
TEST_P(LoadLanguage, nor) {LangLoader("nor" , GetParam());}
|
||||
TEST_P(LoadLanguage, oci) {LangLoader("oci" , GetParam());}
|
||||
TEST_P(LoadLanguage, ori) {LangLoader("ori" , GetParam());}
|
||||
TEST_P(LoadLanguage, osd) {LangLoader("osd" , GetParam());}
|
||||
TEST_P(LoadLanguage, pan) {LangLoader("pan" , GetParam());}
|
||||
TEST_P(LoadLanguage, pol) {LangLoader("pol" , GetParam());}
|
||||
TEST_P(LoadLanguage, por) {LangLoader("por" , GetParam());}
|
||||
TEST_P(LoadLanguage, pus) {LangLoader("pus" , GetParam());}
|
||||
TEST_P(LoadLanguage, que) {LangLoader("que" , GetParam());}
|
||||
TEST_P(LoadLanguage, ron) {LangLoader("ron" , GetParam());}
|
||||
TEST_P(LoadLanguage, rus) {LangLoader("rus" , GetParam());}
|
||||
TEST_P(LoadLanguage, san) {LangLoader("san" , GetParam());}
|
||||
TEST_P(LoadLanguage, sin) {LangLoader("sin" , GetParam());}
|
||||
TEST_P(LoadLanguage, slk) {LangLoader("slk" , GetParam());}
|
||||
TEST_P(LoadLanguage, slv) {LangLoader("slv" , GetParam());}
|
||||
TEST_P(LoadLanguage, snd) {LangLoader("snd" , GetParam());}
|
||||
TEST_P(LoadLanguage, spa) {LangLoader("spa" , GetParam());}
|
||||
TEST_P(LoadLanguage, spa_old) {LangLoader("spa_old" , GetParam());}
|
||||
TEST_P(LoadLanguage, sqi) {LangLoader("sqi" , GetParam());}
|
||||
TEST_P(LoadLanguage, srp) {LangLoader("srp" , GetParam());}
|
||||
TEST_P(LoadLanguage, srp_latn) {LangLoader("srp_latn" , GetParam());}
|
||||
TEST_P(LoadLanguage, sun) {LangLoader("sun" , GetParam());}
|
||||
TEST_P(LoadLanguage, swa) {LangLoader("swa" , GetParam());}
|
||||
TEST_P(LoadLanguage, swe) {LangLoader("swe" , GetParam());}
|
||||
TEST_P(LoadLanguage, syr) {LangLoader("syr" , GetParam());}
|
||||
TEST_P(LoadLanguage, tam) {LangLoader("tam" , GetParam());}
|
||||
TEST_P(LoadLanguage, tat) {LangLoader("tat" , GetParam());}
|
||||
TEST_P(LoadLanguage, tel) {LangLoader("tel" , GetParam());}
|
||||
TEST_P(LoadLanguage, tgk) {LangLoader("tgk" , GetParam());}
|
||||
TEST_P(LoadLanguage, tha) {LangLoader("tha" , GetParam());}
|
||||
TEST_P(LoadLanguage, tir) {LangLoader("tir" , GetParam());}
|
||||
TEST_P(LoadLanguage, ton) {LangLoader("ton" , GetParam());}
|
||||
TEST_P(LoadLanguage, tur) {LangLoader("tur" , GetParam());}
|
||||
TEST_P(LoadLanguage, uig) {LangLoader("uig" , GetParam());}
|
||||
TEST_P(LoadLanguage, ukr) {LangLoader("ukr" , GetParam());}
|
||||
TEST_P(LoadLanguage, urd) {LangLoader("urd" , GetParam());}
|
||||
TEST_P(LoadLanguage, uzb) {LangLoader("uzb" , GetParam());}
|
||||
TEST_P(LoadLanguage, uzb_cyrl) {LangLoader("uzb_cyrl" , GetParam());}
|
||||
TEST_P(LoadLanguage, vie) {LangLoader("vie" , GetParam());}
|
||||
TEST_P(LoadLanguage, yid) {LangLoader("yid" , GetParam());}
|
||||
TEST_P(LoadLanguage, yor) {LangLoader("yor" , GetParam());}
|
||||
TEST_P(LoadLanguage, kor) { LangLoader("kor", GetParam()); }
|
||||
TEST_P(LoadLanguage, kor_vert) { LangLoader("kor_vert", GetParam()); }
|
||||
TEST_P(LoadLanguage, lao) { LangLoader("lao", GetParam()); }
|
||||
TEST_P(LoadLanguage, lat) { LangLoader("lat", GetParam()); }
|
||||
TEST_P(LoadLanguage, lav) { LangLoader("lav", GetParam()); }
|
||||
TEST_P(LoadLanguage, lit) { LangLoader("lit", GetParam()); }
|
||||
TEST_P(LoadLanguage, ltz) { LangLoader("ltz", GetParam()); }
|
||||
TEST_P(LoadLanguage, mal) { LangLoader("mal", GetParam()); }
|
||||
TEST_P(LoadLanguage, mar) { LangLoader("mar", GetParam()); }
|
||||
TEST_P(LoadLanguage, mkd) { LangLoader("mkd", GetParam()); }
|
||||
TEST_P(LoadLanguage, mlt) { LangLoader("mlt", GetParam()); }
|
||||
TEST_P(LoadLanguage, mon) { LangLoader("mon", GetParam()); }
|
||||
TEST_P(LoadLanguage, mri) { LangLoader("mri", GetParam()); }
|
||||
TEST_P(LoadLanguage, msa) { LangLoader("msa", GetParam()); }
|
||||
TEST_P(LoadLanguage, mya) { LangLoader("mya", GetParam()); }
|
||||
TEST_P(LoadLanguage, nep) { LangLoader("nep", GetParam()); }
|
||||
TEST_P(LoadLanguage, nld) { LangLoader("nld", GetParam()); }
|
||||
TEST_P(LoadLanguage, nor) { LangLoader("nor", GetParam()); }
|
||||
TEST_P(LoadLanguage, oci) { LangLoader("oci", GetParam()); }
|
||||
TEST_P(LoadLanguage, ori) { LangLoader("ori", GetParam()); }
|
||||
TEST_P(LoadLanguage, osd) { LangLoader("osd", GetParam()); }
|
||||
TEST_P(LoadLanguage, pan) { LangLoader("pan", GetParam()); }
|
||||
TEST_P(LoadLanguage, pol) { LangLoader("pol", GetParam()); }
|
||||
TEST_P(LoadLanguage, por) { LangLoader("por", GetParam()); }
|
||||
TEST_P(LoadLanguage, pus) { LangLoader("pus", GetParam()); }
|
||||
TEST_P(LoadLanguage, que) { LangLoader("que", GetParam()); }
|
||||
TEST_P(LoadLanguage, ron) { LangLoader("ron", GetParam()); }
|
||||
TEST_P(LoadLanguage, rus) { LangLoader("rus", GetParam()); }
|
||||
TEST_P(LoadLanguage, san) { LangLoader("san", GetParam()); }
|
||||
TEST_P(LoadLanguage, sin) { LangLoader("sin", GetParam()); }
|
||||
TEST_P(LoadLanguage, slk) { LangLoader("slk", GetParam()); }
|
||||
TEST_P(LoadLanguage, slv) { LangLoader("slv", GetParam()); }
|
||||
TEST_P(LoadLanguage, snd) { LangLoader("snd", GetParam()); }
|
||||
TEST_P(LoadLanguage, spa) { LangLoader("spa", GetParam()); }
|
||||
TEST_P(LoadLanguage, spa_old) { LangLoader("spa_old", GetParam()); }
|
||||
TEST_P(LoadLanguage, sqi) { LangLoader("sqi", GetParam()); }
|
||||
TEST_P(LoadLanguage, srp) { LangLoader("srp", GetParam()); }
|
||||
TEST_P(LoadLanguage, srp_latn) { LangLoader("srp_latn", GetParam()); }
|
||||
TEST_P(LoadLanguage, sun) { LangLoader("sun", GetParam()); }
|
||||
TEST_P(LoadLanguage, swa) { LangLoader("swa", GetParam()); }
|
||||
TEST_P(LoadLanguage, swe) { LangLoader("swe", GetParam()); }
|
||||
TEST_P(LoadLanguage, syr) { LangLoader("syr", GetParam()); }
|
||||
TEST_P(LoadLanguage, tam) { LangLoader("tam", GetParam()); }
|
||||
TEST_P(LoadLanguage, tat) { LangLoader("tat", GetParam()); }
|
||||
TEST_P(LoadLanguage, tel) { LangLoader("tel", GetParam()); }
|
||||
TEST_P(LoadLanguage, tgk) { LangLoader("tgk", GetParam()); }
|
||||
TEST_P(LoadLanguage, tha) { LangLoader("tha", GetParam()); }
|
||||
TEST_P(LoadLanguage, tir) { LangLoader("tir", GetParam()); }
|
||||
TEST_P(LoadLanguage, ton) { LangLoader("ton", GetParam()); }
|
||||
TEST_P(LoadLanguage, tur) { LangLoader("tur", GetParam()); }
|
||||
TEST_P(LoadLanguage, uig) { LangLoader("uig", GetParam()); }
|
||||
TEST_P(LoadLanguage, ukr) { LangLoader("ukr", GetParam()); }
|
||||
TEST_P(LoadLanguage, urd) { LangLoader("urd", GetParam()); }
|
||||
TEST_P(LoadLanguage, uzb) { LangLoader("uzb", GetParam()); }
|
||||
TEST_P(LoadLanguage, uzb_cyrl) { LangLoader("uzb_cyrl", GetParam()); }
|
||||
TEST_P(LoadLanguage, vie) { LangLoader("vie", GetParam()); }
|
||||
TEST_P(LoadLanguage, yid) { LangLoader("yid", GetParam()); }
|
||||
TEST_P(LoadLanguage, yor) { LangLoader("yor", GetParam()); }
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_fast, LoadLanguage,
|
||||
::testing::Values(TESSDATA_DIR "_fast") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_best, LoadLanguage,
|
||||
::testing::Values(TESSDATA_DIR "_best") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata, LoadLanguage,
|
||||
::testing::Values(TESSDATA_DIR) );
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_fast, LoadLanguage,
|
||||
::testing::Values(TESSDATA_DIR "_fast"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_best, LoadLanguage,
|
||||
::testing::Values(TESSDATA_DIR "_best"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata, LoadLanguage,
|
||||
::testing::Values(TESSDATA_DIR));
|
||||
|
||||
// For all scripts
|
||||
|
||||
class LoadScript : public QuickTest ,
|
||||
public ::testing::WithParamInterface<const char*> {
|
||||
};
|
||||
class LoadScript : public QuickTest,
|
||||
public ::testing::WithParamInterface<const char*> {};
|
||||
|
||||
TEST_P(LoadScript, Arabic) {LangLoader("script/Arabic" , GetParam());}
|
||||
TEST_P(LoadScript, Armenian) {LangLoader("script/Armenian" , GetParam());}
|
||||
TEST_P(LoadScript, Bengali) {LangLoader("script/Bengali" , GetParam());}
|
||||
TEST_P(LoadScript, Canadian_Aboriginal) {LangLoader("script/Canadian_Aboriginal" , GetParam());}
|
||||
TEST_P(LoadScript, Cherokee) {LangLoader("script/Cherokee" , GetParam());}
|
||||
TEST_P(LoadScript, Cyrillic) {LangLoader("script/Cyrillic" , GetParam());}
|
||||
TEST_P(LoadScript, Devanagari) {LangLoader("script/Devanagari" , GetParam());}
|
||||
TEST_P(LoadScript, Ethiopic) {LangLoader("script/Ethiopic" , GetParam());}
|
||||
TEST_P(LoadScript, Fraktur) {LangLoader("script/Fraktur" , GetParam());}
|
||||
TEST_P(LoadScript, Georgian) {LangLoader("script/Georgian" , GetParam());}
|
||||
TEST_P(LoadScript, Greek) {LangLoader("script/Greek" , GetParam());}
|
||||
TEST_P(LoadScript, Gujarati) {LangLoader("script/Gujarati" , GetParam());}
|
||||
TEST_P(LoadScript, Gurmukhi) {LangLoader("script/Gurmukhi" , GetParam());}
|
||||
TEST_P(LoadScript, HanS) {LangLoader("script/HanS" , GetParam());}
|
||||
TEST_P(LoadScript, HanS_vert) {LangLoader("script/HanS_vert" , GetParam());}
|
||||
TEST_P(LoadScript, HanT) {LangLoader("script/HanT" , GetParam());}
|
||||
TEST_P(LoadScript, HanT_vert) {LangLoader("script/HanT_vert" , GetParam());}
|
||||
TEST_P(LoadScript, Hangul) {LangLoader("script/Hangul" , GetParam());}
|
||||
TEST_P(LoadScript, Hangul_vert) {LangLoader("script/Hangul_vert" , GetParam());}
|
||||
TEST_P(LoadScript, Hebrew) {LangLoader("script/Hebrew" , GetParam());}
|
||||
TEST_P(LoadScript, Japanese) {LangLoader("script/Japanese" , GetParam());}
|
||||
TEST_P(LoadScript, Japanese_vert) {LangLoader("script/Japanese_vert" , GetParam());}
|
||||
TEST_P(LoadScript, Kannada) {LangLoader("script/Kannada" , GetParam());}
|
||||
TEST_P(LoadScript, Khmer) {LangLoader("script/Khmer" , GetParam());}
|
||||
TEST_P(LoadScript, Lao) {LangLoader("script/Lao" , GetParam());}
|
||||
TEST_P(LoadScript, Latin) {LangLoader("script/Latin" , GetParam());}
|
||||
TEST_P(LoadScript, Malayalam) {LangLoader("script/Malayalam" , GetParam());}
|
||||
TEST_P(LoadScript, Myanmar) {LangLoader("script/Myanmar" , GetParam());}
|
||||
TEST_P(LoadScript, Oriya) {LangLoader("script/Oriya" , GetParam());}
|
||||
TEST_P(LoadScript, Sinhala) {LangLoader("script/Sinhala" , GetParam());}
|
||||
TEST_P(LoadScript, Syriac) {LangLoader("script/Syriac" , GetParam());}
|
||||
TEST_P(LoadScript, Tamil) {LangLoader("script/Tamil" , GetParam());}
|
||||
TEST_P(LoadScript, Telugu) {LangLoader("script/Telugu" , GetParam());}
|
||||
TEST_P(LoadScript, Thaana) {LangLoader("script/Thaana" , GetParam());}
|
||||
TEST_P(LoadScript, Thai) {LangLoader("script/Thai" , GetParam());}
|
||||
TEST_P(LoadScript, Tibetan) {LangLoader("script/Tibetan" , GetParam());}
|
||||
TEST_P(LoadScript, Vietnamese) {LangLoader("script/Vietnamese" , GetParam());}
|
||||
TEST_P(LoadScript, Arabic) { LangLoader("script/Arabic", GetParam()); }
|
||||
TEST_P(LoadScript, Armenian) { LangLoader("script/Armenian", GetParam()); }
|
||||
TEST_P(LoadScript, Bengali) { LangLoader("script/Bengali", GetParam()); }
|
||||
TEST_P(LoadScript, Canadian_Aboriginal) {
|
||||
LangLoader("script/Canadian_Aboriginal", GetParam());
|
||||
}
|
||||
TEST_P(LoadScript, Cherokee) { LangLoader("script/Cherokee", GetParam()); }
|
||||
TEST_P(LoadScript, Cyrillic) { LangLoader("script/Cyrillic", GetParam()); }
|
||||
TEST_P(LoadScript, Devanagari) { LangLoader("script/Devanagari", GetParam()); }
|
||||
TEST_P(LoadScript, Ethiopic) { LangLoader("script/Ethiopic", GetParam()); }
|
||||
TEST_P(LoadScript, Fraktur) { LangLoader("script/Fraktur", GetParam()); }
|
||||
TEST_P(LoadScript, Georgian) { LangLoader("script/Georgian", GetParam()); }
|
||||
TEST_P(LoadScript, Greek) { LangLoader("script/Greek", GetParam()); }
|
||||
TEST_P(LoadScript, Gujarati) { LangLoader("script/Gujarati", GetParam()); }
|
||||
TEST_P(LoadScript, Gurmukhi) { LangLoader("script/Gurmukhi", GetParam()); }
|
||||
TEST_P(LoadScript, HanS) { LangLoader("script/HanS", GetParam()); }
|
||||
TEST_P(LoadScript, HanS_vert) { LangLoader("script/HanS_vert", GetParam()); }
|
||||
TEST_P(LoadScript, HanT) { LangLoader("script/HanT", GetParam()); }
|
||||
TEST_P(LoadScript, HanT_vert) { LangLoader("script/HanT_vert", GetParam()); }
|
||||
TEST_P(LoadScript, Hangul) { LangLoader("script/Hangul", GetParam()); }
|
||||
TEST_P(LoadScript, Hangul_vert) {
|
||||
LangLoader("script/Hangul_vert", GetParam());
|
||||
}
|
||||
TEST_P(LoadScript, Hebrew) { LangLoader("script/Hebrew", GetParam()); }
|
||||
TEST_P(LoadScript, Japanese) { LangLoader("script/Japanese", GetParam()); }
|
||||
TEST_P(LoadScript, Japanese_vert) {
|
||||
LangLoader("script/Japanese_vert", GetParam());
|
||||
}
|
||||
TEST_P(LoadScript, Kannada) { LangLoader("script/Kannada", GetParam()); }
|
||||
TEST_P(LoadScript, Khmer) { LangLoader("script/Khmer", GetParam()); }
|
||||
TEST_P(LoadScript, Lao) { LangLoader("script/Lao", GetParam()); }
|
||||
TEST_P(LoadScript, Latin) { LangLoader("script/Latin", GetParam()); }
|
||||
TEST_P(LoadScript, Malayalam) { LangLoader("script/Malayalam", GetParam()); }
|
||||
TEST_P(LoadScript, Myanmar) { LangLoader("script/Myanmar", GetParam()); }
|
||||
TEST_P(LoadScript, Oriya) { LangLoader("script/Oriya", GetParam()); }
|
||||
TEST_P(LoadScript, Sinhala) { LangLoader("script/Sinhala", GetParam()); }
|
||||
TEST_P(LoadScript, Syriac) { LangLoader("script/Syriac", GetParam()); }
|
||||
TEST_P(LoadScript, Tamil) { LangLoader("script/Tamil", GetParam()); }
|
||||
TEST_P(LoadScript, Telugu) { LangLoader("script/Telugu", GetParam()); }
|
||||
TEST_P(LoadScript, Thaana) { LangLoader("script/Thaana", GetParam()); }
|
||||
TEST_P(LoadScript, Thai) { LangLoader("script/Thai", GetParam()); }
|
||||
TEST_P(LoadScript, Tibetan) { LangLoader("script/Tibetan", GetParam()); }
|
||||
TEST_P(LoadScript, Vietnamese) { LangLoader("script/Vietnamese", GetParam()); }
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_fast, LoadScript,
|
||||
::testing::Values(TESSDATA_DIR "_fast") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_best, LoadScript,
|
||||
::testing::Values(TESSDATA_DIR "_best") );
|
||||
INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata, LoadScript,
|
||||
::testing::Values(TESSDATA_DIR) );
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_fast, LoadScript,
|
||||
::testing::Values(TESSDATA_DIR "_fast"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_best, LoadScript,
|
||||
::testing::Values(TESSDATA_DIR "_best"));
|
||||
INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata, LoadScript,
|
||||
::testing::Values(TESSDATA_DIR));
|
||||
|
||||
class LoadLang : public QuickTest {
|
||||
};
|
||||
class LoadLang : public QuickTest {};
|
||||
|
||||
// Test Load of English here, as the parameterized tests are disabled by
|
||||
// default.
|
||||
TEST_F(LoadLang, engFast) { LangLoader("eng", TESSDATA_DIR "_fast"); }
|
||||
TEST_F(LoadLang, engBest) { LangLoader("eng", TESSDATA_DIR "_best"); }
|
||||
TEST_F(LoadLang, engBestInt) { LangLoader("eng", TESSDATA_DIR); }
|
||||
|
||||
// Test Load of English here, as the parameterized tests are disabled by default.
|
||||
TEST_F(LoadLang, engFast) {LangLoader("eng" , TESSDATA_DIR "_fast");}
|
||||
TEST_F(LoadLang, engBest) {LangLoader("eng" , TESSDATA_DIR "_best");}
|
||||
TEST_F(LoadLang, engBestInt) {LangLoader("eng" , TESSDATA_DIR);}
|
||||
|
||||
// Use class LoadLang for languages which are NOT there in all three repos
|
||||
TEST_F(LoadLang, kmrFast) {LangLoader("kmr" , TESSDATA_DIR "_fast");}
|
||||
TEST_F(LoadLang, kmrBest) {LangLoader("kmr" , TESSDATA_DIR "_best");}
|
||||
TEST_F(LoadLang, kmrFast) { LangLoader("kmr", TESSDATA_DIR "_fast"); }
|
||||
TEST_F(LoadLang, kmrBest) { LangLoader("kmr", TESSDATA_DIR "_best"); }
|
||||
// TEST_F(LoadLang, kmrBestInt) {LangLoader("kmr" , TESSDATA_DIR);}
|
||||
|
||||
} // namespace
|
||||
|
@ -1,7 +1,8 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: log.h
|
||||
// Description: Include for custom log message for unittest for tesseract.
|
||||
// based on //https://stackoverflow.com/questions/16491675/how-to-send-custom-message-in-google-c-testing-framework
|
||||
// based on
|
||||
// //https://stackoverflow.com/questions/16491675/how-to-send-custom-message-in-google-c-testing-framework
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -18,11 +19,13 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
static class LOG { public: LOG() {}
|
||||
std::ostream& info() {
|
||||
std::cout << "[ LOG MSG ] ";
|
||||
return std::cout;
|
||||
}
|
||||
static class LOG {
|
||||
public:
|
||||
LOG() {}
|
||||
std::ostream& info() {
|
||||
std::cout << "[ LOG MSG ] ";
|
||||
return std::cout;
|
||||
}
|
||||
} log;
|
||||
|
||||
#endif // TESSERACT_UNITTEST_LOG_H_
|
||||
|
@ -61,9 +61,8 @@ TEST_F(LSTMTrainerTest, ConvertModel) {
|
||||
deu_trainer.InitCharSet(TestDataNameToPath("deu.traineddata"));
|
||||
// Load the fra traineddata, strip out the model, and save to a tmp file.
|
||||
TessdataManager mgr;
|
||||
string fra_data = file::JoinPath(
|
||||
FLAGS_test_srcdir, "tessdata_best",
|
||||
"fra.traineddata");
|
||||
string fra_data =
|
||||
file::JoinPath(FLAGS_test_srcdir, "tessdata_best", "fra.traineddata");
|
||||
CHECK(mgr.Init(fra_data.c_str())) << "Failed to load " << fra_data;
|
||||
string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm");
|
||||
CHECK(mgr.ExtractToFile(model_path.c_str()));
|
||||
@ -76,7 +75,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) {
|
||||
// baseapi_test.cc).
|
||||
TessBaseAPI api;
|
||||
api.Init(FLAGS_test_tmpdir.c_str(), "deu", tesseract::OEM_LSTM_ONLY);
|
||||
Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
||||
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
||||
CHECK(src_pix);
|
||||
api.SetImage(src_pix);
|
||||
std::unique_ptr<char[]> result(api.GetUTF8Text());
|
||||
|
@ -41,8 +41,9 @@ const int kNumNonReject = 1000;
|
||||
const int kNumCorrect = kNumNonReject - kNumTop1Errs;
|
||||
// The total number of answers is given by the number of non-rejects plus
|
||||
// all the multiple answers.
|
||||
const int kNumAnswers = kNumNonReject + 2*(kNumTop2Errs - kNumTopNErrs) +
|
||||
(kNumTop1Errs - kNumTop2Errs) + (kNumTopTopErrs - kNumTop1Errs);
|
||||
const int kNumAnswers = kNumNonReject + 2 * (kNumTop2Errs - kNumTopNErrs) +
|
||||
(kNumTop1Errs - kNumTop2Errs) +
|
||||
(kNumTopTopErrs - kNumTop1Errs);
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -51,7 +52,7 @@ namespace tesseract {
|
||||
class MockClassifier : public ShapeClassifier {
|
||||
public:
|
||||
explicit MockClassifier(ShapeTable* shape_table)
|
||||
: shape_table_(shape_table), num_done_(0), done_bad_font_(false) {
|
||||
: shape_table_(shape_table), num_done_(0), done_bad_font_(false) {
|
||||
// Add a false font answer to the shape table. We pick a random unichar_id,
|
||||
// add a new shape for it with a false font. Font must actually exist in
|
||||
// the font table, but not match anything in the first 1000 samples.
|
||||
@ -108,9 +109,7 @@ class MockClassifier : public ShapeClassifier {
|
||||
return results->size();
|
||||
}
|
||||
// Provides access to the ShapeTable that this classifier works with.
|
||||
virtual const ShapeTable* GetShapeTable() const {
|
||||
return shape_table_;
|
||||
}
|
||||
virtual const ShapeTable* GetShapeTable() const { return shape_table_; }
|
||||
|
||||
private:
|
||||
// Borrowed pointer to the ShapeTable.
|
||||
@ -140,12 +139,10 @@ const double kMin1lDistance = 0.25;
|
||||
class MasterTrainerTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessdataPath() {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"tessdata");
|
||||
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
}
|
||||
string TmpNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
@ -169,15 +166,15 @@ class MasterTrainerTest : public testing::Test {
|
||||
FLAGS_X = TestDataNameToPath("eng.xheights");
|
||||
FLAGS_U = TestDataNameToPath("eng.unicharset");
|
||||
string tr_file_name(TestDataNameToPath("eng.Arial.exp0.tr"));
|
||||
const char* argv[] = {tr_file_name.c_str() };
|
||||
const char* argv[] = {tr_file_name.c_str()};
|
||||
int argc = 1;
|
||||
STRING file_prefix;
|
||||
delete master_trainer_;
|
||||
delete shape_table_;
|
||||
shape_table_ = NULL;
|
||||
tessoptind = 0;
|
||||
master_trainer_ = LoadTrainingData(argc, argv, false,
|
||||
&shape_table_, &file_prefix);
|
||||
master_trainer_ =
|
||||
LoadTrainingData(argc, argv, false, &shape_table_, &file_prefix);
|
||||
EXPECT_TRUE(master_trainer_ != NULL);
|
||||
EXPECT_TRUE(shape_table_ != NULL);
|
||||
}
|
||||
@ -203,29 +200,29 @@ class MasterTrainerTest : public testing::Test {
|
||||
int shape_1 = shape_table_->FindShape(unichar_1, font_id);
|
||||
EXPECT_GE(shape_1, 0);
|
||||
|
||||
float dist_I_l = master_trainer_->ShapeDistance(*shape_table_,
|
||||
shape_I, shape_l);
|
||||
float dist_I_l =
|
||||
master_trainer_->ShapeDistance(*shape_table_, shape_I, shape_l);
|
||||
// No tolerance here. We expect that I and l should match exactly.
|
||||
EXPECT_EQ(0.0f, dist_I_l);
|
||||
float dist_l_I = master_trainer_->ShapeDistance(*shape_table_,
|
||||
shape_l, shape_I);
|
||||
float dist_l_I =
|
||||
master_trainer_->ShapeDistance(*shape_table_, shape_l, shape_I);
|
||||
// BOTH ways.
|
||||
EXPECT_EQ(0.0f, dist_l_I);
|
||||
|
||||
// l/1 on the other hand should be distinct.
|
||||
float dist_l_1 = master_trainer_->ShapeDistance(*shape_table_,
|
||||
shape_l, shape_1);
|
||||
float dist_l_1 =
|
||||
master_trainer_->ShapeDistance(*shape_table_, shape_l, shape_1);
|
||||
EXPECT_GT(dist_l_1, kMin1lDistance);
|
||||
float dist_1_l = master_trainer_->ShapeDistance(*shape_table_,
|
||||
shape_1, shape_l);
|
||||
float dist_1_l =
|
||||
master_trainer_->ShapeDistance(*shape_table_, shape_1, shape_l);
|
||||
EXPECT_GT(dist_1_l, kMin1lDistance);
|
||||
|
||||
// So should I/1.
|
||||
float dist_I_1 = master_trainer_->ShapeDistance(*shape_table_,
|
||||
shape_I, shape_1);
|
||||
float dist_I_1 =
|
||||
master_trainer_->ShapeDistance(*shape_table_, shape_I, shape_1);
|
||||
EXPECT_GT(dist_I_1, kMin1lDistance);
|
||||
float dist_1_I = master_trainer_->ShapeDistance(*shape_table_,
|
||||
shape_1, shape_I);
|
||||
float dist_1_I =
|
||||
master_trainer_->ShapeDistance(*shape_table_, shape_1, shape_I);
|
||||
EXPECT_GT(dist_1_I, kMin1lDistance);
|
||||
}
|
||||
|
||||
@ -249,15 +246,14 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) {
|
||||
LoadMasterTrainer();
|
||||
// Add the space character to the shape_table_ if not already present to
|
||||
// count junk.
|
||||
if (shape_table_->FindShape(0, -1) < 0)
|
||||
shape_table_->AddShape(0, 0);
|
||||
if (shape_table_->FindShape(0, -1) < 0) shape_table_->AddShape(0, 0);
|
||||
// Make a mock classifier.
|
||||
tesseract::ShapeClassifier* shape_classifier =
|
||||
new tesseract::MockClassifier(shape_table_);
|
||||
// Get the accuracy report.
|
||||
STRING accuracy_report;
|
||||
master_trainer_->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR,
|
||||
0, false, shape_classifier,
|
||||
master_trainer_->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR, 0,
|
||||
false, shape_classifier,
|
||||
&accuracy_report);
|
||||
LOG(INFO) << accuracy_report.string();
|
||||
string result_string = accuracy_report.string();
|
||||
@ -287,6 +283,3 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) {
|
||||
}
|
||||
|
||||
} // namespace.
|
||||
|
||||
|
||||
|
||||
|
@ -15,8 +15,8 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "matrix.h"
|
||||
#include "include_gunit.h"
|
||||
#include "genericvector.h"
|
||||
#include "include_gunit.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
namespace {
|
||||
|
@ -1,10 +1,10 @@
|
||||
#include "tesseract/lstm/networkio.h"
|
||||
#include "tesseract/lstm/stridemap.h"
|
||||
|
||||
using tesseract::FlexDimensions;
|
||||
using tesseract::FD_BATCH;
|
||||
using tesseract::FD_HEIGHT;
|
||||
using tesseract::FD_WIDTH;
|
||||
using tesseract::FlexDimensions;
|
||||
using tesseract::NetworkIO;
|
||||
using tesseract::StrideMap;
|
||||
|
||||
@ -93,9 +93,9 @@ TEST_F(NetworkioTest, CopyWithYReversal) {
|
||||
StrideMap::Index index(copy.stride_map());
|
||||
int next_t = 0;
|
||||
int pos = 0;
|
||||
std::vector<int> expected_values = {8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2,
|
||||
3, 27, 28, 29, 30, 31, 22, 23, 24, 25, 26,
|
||||
17, 18, 19, 20, 21, 12, 13, 14, 15, 16};
|
||||
std::vector<int> expected_values = {
|
||||
8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 27, 28, 29, 30,
|
||||
31, 22, 23, 24, 25, 26, 17, 18, 19, 20, 21, 12, 13, 14, 15, 16};
|
||||
do {
|
||||
int t = index.t();
|
||||
// The indexed values match the expected values.
|
||||
@ -125,9 +125,9 @@ TEST_F(NetworkioTest, CopyWithXReversal) {
|
||||
StrideMap::Index index(copy.stride_map());
|
||||
int next_t = 0;
|
||||
int pos = 0;
|
||||
std::vector<int> expected_values = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9,
|
||||
8, 16, 15, 14, 13, 12, 21, 20, 19, 18, 17,
|
||||
26, 25, 24, 23, 22, 31, 30, 29, 28, 27};
|
||||
std::vector<int> expected_values = {
|
||||
3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 16, 15, 14, 13,
|
||||
12, 21, 20, 19, 18, 17, 26, 25, 24, 23, 22, 31, 30, 29, 28, 27};
|
||||
do {
|
||||
int t = index.t();
|
||||
// The indexed values match the expected values.
|
||||
@ -157,9 +157,9 @@ TEST_F(NetworkioTest, CopyWithXYTranspose) {
|
||||
StrideMap::Index index(copy.stride_map());
|
||||
int next_t = 0;
|
||||
int pos = 0;
|
||||
std::vector<int> expected_values = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7,
|
||||
11, 12, 17, 22, 27, 13, 18, 23, 28, 14, 19,
|
||||
24, 29, 15, 20, 25, 30, 16, 21, 26, 31};
|
||||
std::vector<int> expected_values = {
|
||||
0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11, 12, 17, 22, 27,
|
||||
13, 18, 23, 28, 14, 19, 24, 29, 15, 20, 25, 30, 16, 21, 26, 31};
|
||||
do {
|
||||
int t = index.t();
|
||||
// The indexed values match the expected values.
|
||||
|
@ -23,7 +23,7 @@ TEST(NormstrngsTest, BasicText) {
|
||||
}
|
||||
|
||||
TEST(NormstrngsTest, LigatureText) {
|
||||
const char* kTwoByteLigText = "ij"; // U+0133 (ij) -> ij
|
||||
const char* kTwoByteLigText = "ij"; // U+0133 (ij) -> ij
|
||||
string result;
|
||||
EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
|
||||
GraphemeNorm::kNormalize, kTwoByteLigText,
|
||||
@ -51,7 +51,7 @@ TEST(NormstrngsTest, OcrSpecificNormalization) {
|
||||
&result));
|
||||
EXPECT_STREQ("\"Hi", result.c_str());
|
||||
|
||||
const char* kEmDash = "Hi—"; // U+2014 (—) -> U+02D (-)
|
||||
const char* kEmDash = "Hi—"; // U+2014 (—) -> U+02D (-)
|
||||
EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize,
|
||||
GraphemeNorm::kNormalize, kEmDash, &result));
|
||||
EXPECT_STREQ("Hi-", result.c_str());
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
int test_data[] = { 8, 1, 2, -4, 7, 9, 65536, 4, 9, 0, -32767, 6, 7};
|
||||
int test_data[] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0, -32767, 6, 7};
|
||||
|
||||
// The fixture for testing GenericHeap and DoublePtr.
|
||||
class NthItemTest : public testing::Test {
|
||||
@ -64,7 +64,7 @@ TEST_F(NthItemTest, GeneralTest) {
|
||||
TEST_F(NthItemTest, BoringTest) {
|
||||
KDVector v;
|
||||
// Push the test data onto the KDVector.
|
||||
int test_data[] = { 8, 8, 8, 8, 8, 7, 7, 7, 7};
|
||||
int test_data[] = {8, 8, 8, 8, 8, 7, 7, 7, 7};
|
||||
for (int i = 0; i < ARRAYSIZE(test_data); ++i) {
|
||||
IntKDPair pair(test_data[i], i);
|
||||
v.push_back(pair);
|
||||
|
@ -14,104 +14,112 @@
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
//based on https://gist.github.com/amitdo/7c7a522004dd79b398340c9595b377e1
|
||||
// based on https://gist.github.com/amitdo/7c7a522004dd79b398340c9595b377e1
|
||||
|
||||
// expects clones of tessdata, tessdata_fast and tessdata_best repos
|
||||
|
||||
//#include "log.h"
|
||||
#include "include_gunit.h"
|
||||
#include "baseapi.h"
|
||||
#include "leptonica/allheaders.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "baseapi.h"
|
||||
#include "include_gunit.h"
|
||||
#include "leptonica/allheaders.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class TestClass : public testing::Test {
|
||||
protected:
|
||||
};
|
||||
};
|
||||
|
||||
void OSDTester( int expected_deg, const char* imgname, const char* tessdatadir) {
|
||||
//log.info() << tessdatadir << " for image: " << imgname << std::endl;
|
||||
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, "osd")) << "Could not initialize tesseract.";
|
||||
Pix *image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
int orient_deg;
|
||||
float orient_conf;
|
||||
const char* script_name;
|
||||
float script_conf;
|
||||
bool detected = api->DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf);
|
||||
ASSERT_FALSE(!detected) << "Failed to detect OSD.";
|
||||
printf("************ Orientation in degrees: %d, Orientation confidence: %.2f\n"
|
||||
" Script: %s, Script confidence: %.2f\n",
|
||||
orient_deg, orient_conf,
|
||||
script_name, script_conf);
|
||||
EXPECT_EQ(expected_deg, orient_deg);
|
||||
api->End();
|
||||
pixDestroy(&image);
|
||||
}
|
||||
void OSDTester(int expected_deg, const char* imgname, const char* tessdatadir) {
|
||||
// log.info() << tessdatadir << " for image: " << imgname << std::endl;
|
||||
tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, "osd"))
|
||||
<< "Could not initialize tesseract.";
|
||||
Pix* image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
int orient_deg;
|
||||
float orient_conf;
|
||||
const char* script_name;
|
||||
float script_conf;
|
||||
bool detected = api->DetectOrientationScript(&orient_deg, &orient_conf,
|
||||
&script_name, &script_conf);
|
||||
ASSERT_FALSE(!detected) << "Failed to detect OSD.";
|
||||
printf(
|
||||
"************ Orientation in degrees: %d, Orientation confidence: %.2f\n"
|
||||
" Script: %s, Script confidence: %.2f\n",
|
||||
orient_deg, orient_conf, script_name, script_conf);
|
||||
EXPECT_EQ(expected_deg, orient_deg);
|
||||
api->End();
|
||||
pixDestroy(&image);
|
||||
}
|
||||
|
||||
class OSDTest : public TestClass ,
|
||||
public ::testing::WithParamInterface<std::tuple<int, const char*, const char*>> {};
|
||||
class OSDTest : public TestClass,
|
||||
public ::testing::WithParamInterface<
|
||||
std::tuple<int, const char*, const char*>> {};
|
||||
|
||||
TEST_P(OSDTest, MatchOrientationDegrees) {
|
||||
OSDTester(std::get<0>(GetParam()), std::get<1>(GetParam()), std::get<2>(GetParam()));
|
||||
}
|
||||
TEST_P(OSDTest, MatchOrientationDegrees) {
|
||||
OSDTester(std::get<0>(GetParam()), std::get<1>(GetParam()),
|
||||
std::get<2>(GetParam()));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataEngEuroHebrew, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/hebrew.png"),
|
||||
::testing::Values(TESSDATA_DIR)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataEngEuroHebrew, OSDTest,
|
||||
::testing::Combine(::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/hebrew.png"),
|
||||
::testing::Values(TESSDATA_DIR)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataBestEngEuroHebrew, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/hebrew.png"),
|
||||
::testing::Values(TESSDATA_DIR "_best")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataBestEngEuroHebrew, OSDTest,
|
||||
::testing::Combine(::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/hebrew.png"),
|
||||
::testing::Values(TESSDATA_DIR "_best")));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataFastEngEuroHebrew, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/hebrew.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataFastEngEuroHebrew, OSDTest,
|
||||
::testing::Combine(::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/phototest.tif",
|
||||
TESTING_DIR "/eurotext.tif",
|
||||
TESTING_DIR "/hebrew.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataFastRotated90, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(90),
|
||||
::testing::Values(TESTING_DIR "/phototest-rotated-R.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataFastRotated90, OSDTest,
|
||||
::testing::Combine(::testing::Values(90),
|
||||
::testing::Values(TESTING_DIR
|
||||
"/phototest-rotated-R.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataFastRotated180, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(180),
|
||||
::testing::Values(TESTING_DIR "/phototest-rotated-180.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataFastRotated180, OSDTest,
|
||||
::testing::Combine(::testing::Values(180),
|
||||
::testing::Values(TESTING_DIR
|
||||
"/phototest-rotated-180.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataFastRotated270, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(270),
|
||||
::testing::Values(TESTING_DIR "/phototest-rotated-L.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataFastRotated270, OSDTest,
|
||||
::testing::Combine(::testing::Values(270),
|
||||
::testing::Values(TESTING_DIR
|
||||
"/phototest-rotated-L.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataFastDevaRotated270, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(270),
|
||||
::testing::Values(TESTING_DIR "/devatest-rotated-270.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataFastDevaRotated270, OSDTest,
|
||||
::testing::Combine(::testing::Values(270),
|
||||
::testing::Values(TESTING_DIR
|
||||
"/devatest-rotated-270.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P( TessdataFastDeva, OSDTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/devatest.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TessdataFastDeva, OSDTest,
|
||||
::testing::Combine(::testing::Values(0),
|
||||
::testing::Values(TESTING_DIR "/devatest.png"),
|
||||
::testing::Values(TESSDATA_DIR "_fast")));
|
||||
|
||||
} // namespace
|
||||
|
@ -10,20 +10,14 @@ namespace {
|
||||
class PageSegModeTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessdataPath() {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"tessdata");
|
||||
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
}
|
||||
|
||||
PageSegModeTest() {
|
||||
src_pix_ = NULL;
|
||||
}
|
||||
~PageSegModeTest() {
|
||||
pixDestroy(&src_pix_);
|
||||
}
|
||||
PageSegModeTest() { src_pix_ = NULL; }
|
||||
~PageSegModeTest() { pixDestroy(&src_pix_); }
|
||||
|
||||
void SetImage(const char* filename) {
|
||||
pixDestroy(&src_pix_);
|
||||
@ -34,26 +28,26 @@ class PageSegModeTest : public testing::Test {
|
||||
|
||||
// Tests that the given rectangle produces exactly the given text in the
|
||||
// given segmentation mode (after chopping off the last 2 newlines.)
|
||||
void VerifyRectText(tesseract::PageSegMode mode, const char* str,
|
||||
int left, int top, int width, int height) {
|
||||
void VerifyRectText(tesseract::PageSegMode mode, const char* str, int left,
|
||||
int top, int width, int height) {
|
||||
api_.SetPageSegMode(mode);
|
||||
api_.SetRectangle(left, top, width, height);
|
||||
char* result = api_.GetUTF8Text();
|
||||
chomp_string(result);
|
||||
chomp_string(result);
|
||||
EXPECT_STREQ(str, result);
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
}
|
||||
|
||||
// Tests that the given rectangle does NOT produce the given text in the
|
||||
// given segmentation mode.
|
||||
void NotRectText(tesseract::PageSegMode mode, const char* str,
|
||||
int left, int top, int width, int height) {
|
||||
void NotRectText(tesseract::PageSegMode mode, const char* str, int left,
|
||||
int top, int width, int height) {
|
||||
api_.SetPageSegMode(mode);
|
||||
api_.SetRectangle(left, top, width, height);
|
||||
char* result = api_.GetUTF8Text();
|
||||
EXPECT_STRNE(str, result);
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
}
|
||||
|
||||
Pix* src_pix_;
|
||||
@ -66,26 +60,21 @@ class PageSegModeTest : public testing::Test {
|
||||
TEST_F(PageSegModeTest, WordTest) {
|
||||
SetImage("segmodeimg.tif");
|
||||
// Test various rectangles around the inverse page number.
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "183",
|
||||
1482, 146, 72, 44);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "183",
|
||||
1474, 134, 82, 72);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "183",
|
||||
1459, 116, 118, 112);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1482, 146, 72, 44);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1474, 134, 82, 72);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1459, 116, 118, 112);
|
||||
// Test a random pair of words as a line
|
||||
VerifyRectText(tesseract::PSM_SINGLE_LINE, "What should",
|
||||
1119, 621, 245, 54);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_LINE, "What should", 1119, 621, 245, 54);
|
||||
// Test a random pair of words as a word
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "Whatshould",
|
||||
1119, 621, 245, 54);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_WORD, "Whatshould", 1119, 621, 245, 54);
|
||||
// Test single block mode.
|
||||
VerifyRectText(tesseract::PSM_SINGLE_BLOCK, "both the\nfrom the",
|
||||
181, 676, 179, 104);
|
||||
VerifyRectText(tesseract::PSM_SINGLE_BLOCK, "both the\nfrom the", 181, 676,
|
||||
179, 104);
|
||||
// But doesn't work in line or word mode.
|
||||
NotRectText(tesseract::PSM_SINGLE_LINE, "both the\nfrom the",
|
||||
181, 676, 179, 104);
|
||||
NotRectText(tesseract::PSM_SINGLE_WORD, "both the\nfrom the",
|
||||
181, 676, 179, 104);
|
||||
NotRectText(tesseract::PSM_SINGLE_LINE, "both the\nfrom the", 181, 676, 179,
|
||||
104);
|
||||
NotRectText(tesseract::PSM_SINGLE_WORD, "both the\nfrom the", 181, 676, 179,
|
||||
104);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -14,19 +14,24 @@ DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts);
|
||||
|
||||
namespace {
|
||||
|
||||
using tesseract::FontUtils;
|
||||
using tesseract::File;
|
||||
using tesseract::FontUtils;
|
||||
using tesseract::PangoFontInfo;
|
||||
|
||||
// Fonts in testdata directory
|
||||
const char* kExpectedFontNames[] = {
|
||||
"Arab", "Arial Bold Italic", "DejaVu Sans Ultra-Light", "Lohit Hindi",
|
||||
const char* kExpectedFontNames[] = {"Arab",
|
||||
"Arial Bold Italic",
|
||||
"DejaVu Sans Ultra-Light",
|
||||
"Lohit Hindi",
|
||||
#if PANGO_VERSION <= 12005
|
||||
"Times New Roman",
|
||||
"Times New Roman",
|
||||
#else
|
||||
"Times New Roman,", // Pango v1.36.2 requires a trailing ','
|
||||
"Times New Roman,", // Pango v1.36.2
|
||||
// requires a trailing
|
||||
// ','
|
||||
#endif
|
||||
"UnBatang", "Verdana"};
|
||||
"UnBatang",
|
||||
"Verdana"};
|
||||
|
||||
// Sample text used in tests.
|
||||
const char kArabicText[] = "والفكر والصراع 1234,\nوالفكر والصراع";
|
||||
@ -36,18 +41,17 @@ const char kKorText[] = "이는 것으로";
|
||||
// Hindi words containing illegal vowel sequences.
|
||||
const char* kBadlyFormedHinWords[] = {
|
||||
#if PANGO_VERSION <= 12005
|
||||
"उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस",
|
||||
"उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस",
|
||||
#endif
|
||||
// Pango v1.36.2 will render the above words even though they are invalid.
|
||||
"प्रंात", NULL };
|
||||
// Pango v1.36.2 will render the above words even though they are invalid.
|
||||
"प्रंात", NULL};
|
||||
|
||||
class PangoFontInfoTest : public ::testing::Test {
|
||||
protected:
|
||||
// Creates a fake fonts.conf file that points to the testdata fonts for
|
||||
// fontconfig to initialize with.
|
||||
static void SetUpTestCase() {
|
||||
FLAGS_fonts_dir = File::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
|
||||
FLAGS_use_only_legacy_fonts = false;
|
||||
}
|
||||
@ -111,12 +115,11 @@ TEST_F(PangoFontInfoTest, CanRenderString) {
|
||||
TEST_F(PangoFontInfoTest, CanRenderLigature) {
|
||||
font_info_.ParseFontDescriptionName("Arab 12");
|
||||
const char kArabicLigature[] = "لا";
|
||||
EXPECT_TRUE(font_info_.CanRenderString(kArabicLigature,
|
||||
strlen(kArabicLigature)));
|
||||
EXPECT_TRUE(
|
||||
font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature)));
|
||||
|
||||
printf("Next word\n");
|
||||
EXPECT_TRUE(font_info_.CanRenderString(kArabicText,
|
||||
strlen(kArabicText)));
|
||||
EXPECT_TRUE(font_info_.CanRenderString(kArabicText, strlen(kArabicText)));
|
||||
}
|
||||
|
||||
TEST_F(PangoFontInfoTest, CannotRenderUncoveredString) {
|
||||
@ -142,9 +145,9 @@ TEST_F(PangoFontInfoTest, CanDropUncoveredChars) {
|
||||
|
||||
// Dont drop non-letter characters like word joiners.
|
||||
const char* kJoiners[] = {
|
||||
"\u2060", // U+2060 (WJ)
|
||||
"\u200C", // U+200C (ZWJ)
|
||||
"\u200D" // U+200D (ZWNJ)
|
||||
"\u2060", // U+2060 (WJ)
|
||||
"\u200C", // U+200C (ZWJ)
|
||||
"\u200D" // U+200D (ZWNJ)
|
||||
};
|
||||
for (int i = 0; i < ARRAYSIZE(kJoiners); ++i) {
|
||||
word = kJoiners[i];
|
||||
@ -153,7 +156,6 @@ TEST_F(PangoFontInfoTest, CanDropUncoveredChars) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ------------------------ FontUtils ------------------------------------
|
||||
|
||||
class FontUtilsTest : public ::testing::Test {
|
||||
@ -161,8 +163,7 @@ class FontUtilsTest : public ::testing::Test {
|
||||
// Creates a fake fonts.conf file that points to the testdata fonts for
|
||||
// fontconfig to initialize with.
|
||||
static void SetUpTestCase() {
|
||||
FLAGS_fonts_dir = File::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
|
||||
}
|
||||
|
||||
@ -229,8 +230,8 @@ TEST_F(FontUtilsTest, DoesFindBestFonts) {
|
||||
}
|
||||
|
||||
TEST_F(FontUtilsTest, DoesSelectFont) {
|
||||
const char* kLangText[] = { kArabicText, kEngText, kHinText, kKorText, NULL };
|
||||
const char* kLangNames[] = { "Arabic", "English", "Hindi", "Korean", NULL };
|
||||
const char* kLangText[] = {kArabicText, kEngText, kHinText, kKorText, NULL};
|
||||
const char* kLangNames[] = {"Arabic", "English", "Hindi", "Korean", NULL};
|
||||
for (int i = 0; kLangText[i] != NULL; ++i) {
|
||||
SCOPED_TRACE(kLangNames[i]);
|
||||
std::vector<string> graphemes;
|
||||
@ -246,8 +247,7 @@ TEST_F(FontUtilsTest, DoesFailToSelectFont) {
|
||||
const char kMixedScriptText[] = "पिताने विवाह की | والفكر والصراع";
|
||||
std::vector<string> graphemes;
|
||||
string selected_font;
|
||||
EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText,
|
||||
strlen(kMixedScriptText),
|
||||
EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, strlen(kMixedScriptText),
|
||||
&selected_font, &graphemes));
|
||||
}
|
||||
|
||||
@ -271,17 +271,16 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) {
|
||||
FontUtils::GetAllRenderableCharacters(selected_fonts, &unicode_mask);
|
||||
EXPECT_TRUE(unicode_mask['1']);
|
||||
EXPECT_TRUE(unicode_mask[kHindiChar]);
|
||||
EXPECT_FALSE(unicode_mask['A']); // Lohit doesn't render English,
|
||||
EXPECT_FALSE(unicode_mask[kArabicChar]); // or Arabic,
|
||||
EXPECT_FALSE(unicode_mask[kMongolianChar]); // or Mongolian,
|
||||
EXPECT_FALSE(unicode_mask[kOghamChar]); // or Ogham.
|
||||
EXPECT_FALSE(unicode_mask['A']); // Lohit doesn't render English,
|
||||
EXPECT_FALSE(unicode_mask[kArabicChar]); // or Arabic,
|
||||
EXPECT_FALSE(unicode_mask[kMongolianChar]); // or Mongolian,
|
||||
EXPECT_FALSE(unicode_mask[kOghamChar]); // or Ogham.
|
||||
|
||||
// Check that none of the included fonts cover the Mongolian or Ogham space
|
||||
// characters.
|
||||
for (int f = 0; f < ARRAYSIZE(kExpectedFontNames); ++f) {
|
||||
SCOPED_TRACE(absl::StrCat("Testing ", kExpectedFontNames[f]));
|
||||
FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f],
|
||||
&unicode_mask);
|
||||
FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], &unicode_mask);
|
||||
EXPECT_FALSE(unicode_mask[kOghamChar]);
|
||||
EXPECT_FALSE(unicode_mask[kMongolianChar]);
|
||||
}
|
||||
|
@ -9,12 +9,11 @@ namespace { // anonymous namespace
|
||||
|
||||
// Functions for making monospace ASCII trial text for the paragraph detector.
|
||||
const tesseract::ParagraphJustification kLeft = tesseract::JUSTIFICATION_LEFT;
|
||||
const tesseract::ParagraphJustification kCenter
|
||||
= tesseract::JUSTIFICATION_CENTER;
|
||||
const tesseract::ParagraphJustification kRight
|
||||
= tesseract::JUSTIFICATION_RIGHT;
|
||||
const tesseract::ParagraphJustification kUnknown
|
||||
= tesseract::JUSTIFICATION_UNKNOWN;
|
||||
const tesseract::ParagraphJustification kCenter =
|
||||
tesseract::JUSTIFICATION_CENTER;
|
||||
const tesseract::ParagraphJustification kRight = tesseract::JUSTIFICATION_RIGHT;
|
||||
const tesseract::ParagraphJustification kUnknown =
|
||||
tesseract::JUSTIFICATION_UNKNOWN;
|
||||
|
||||
enum TextModelInputType {
|
||||
PCONT = 0, // Continuation line of a paragraph (default).
|
||||
@ -23,7 +22,7 @@ enum TextModelInputType {
|
||||
};
|
||||
|
||||
struct TextAndModel {
|
||||
const char *ascii;
|
||||
const char* ascii;
|
||||
TextModelInputType model_type;
|
||||
|
||||
// fields corresponding to PARA (see ccstruct/ocrpara.h)
|
||||
@ -34,13 +33,13 @@ struct TextAndModel {
|
||||
|
||||
// Imagine that the given text is typewriter ASCII with each character ten
|
||||
// pixels wide and twenty pixels high and return an appropriate row_info.
|
||||
void AsciiToRowInfo(const char *text, int row_number,
|
||||
tesseract::RowInfo *info) {
|
||||
void AsciiToRowInfo(const char* text, int row_number,
|
||||
tesseract::RowInfo* info) {
|
||||
const int kCharWidth = 10;
|
||||
const int kLineSpace = 30;
|
||||
info->text = text;
|
||||
info->has_leaders = strstr(text, "...") != NULL ||
|
||||
strstr(text, ". . .") != NULL;
|
||||
info->has_leaders =
|
||||
strstr(text, "...") != NULL || strstr(text, ". . .") != NULL;
|
||||
info->has_drop_cap = false;
|
||||
info->pix_ldistance = info->pix_rdistance = 0;
|
||||
info->average_interword_space = kCharWidth;
|
||||
@ -50,20 +49,21 @@ void AsciiToRowInfo(const char *text, int row_number,
|
||||
|
||||
std::vector<string> words = absl::StrSplit(text, ' ', absl::SkipEmpty());
|
||||
info->num_words = words.size();
|
||||
if (info->num_words < 1)
|
||||
return;
|
||||
if (info->num_words < 1) return;
|
||||
|
||||
info->lword_text = words[0].c_str();
|
||||
info->rword_text = words[words.size() - 1].c_str();
|
||||
int lspace = 0;
|
||||
while (lspace < info->text.size() && text[lspace] == ' ') { lspace++; }
|
||||
while (lspace < info->text.size() && text[lspace] == ' ') {
|
||||
lspace++;
|
||||
}
|
||||
int rspace = 0;
|
||||
while (rspace < info->text.size() &&
|
||||
text[info->text.size() - rspace - 1] == ' ') {
|
||||
rspace++;
|
||||
}
|
||||
|
||||
int top = - kLineSpace * row_number;
|
||||
int top = -kLineSpace * row_number;
|
||||
int bottom = top - kLineSpace;
|
||||
int row_right = kCharWidth * info->text.size();
|
||||
int lword_width = kCharWidth * info->lword_text.size();
|
||||
@ -71,25 +71,19 @@ void AsciiToRowInfo(const char *text, int row_number,
|
||||
info->pix_ldistance = lspace * kCharWidth;
|
||||
info->pix_rdistance = rspace * kCharWidth;
|
||||
info->lword_box =
|
||||
TBOX(info->pix_ldistance, bottom,
|
||||
info->pix_ldistance + lword_width, top);
|
||||
info->rword_box =
|
||||
TBOX(row_right - info->pix_rdistance - rword_width, bottom,
|
||||
row_right - info->pix_rdistance, top);
|
||||
TBOX(info->pix_ldistance, bottom, info->pix_ldistance + lword_width, top);
|
||||
info->rword_box = TBOX(row_right - info->pix_rdistance - rword_width, bottom,
|
||||
row_right - info->pix_rdistance, top);
|
||||
tesseract::LeftWordAttributes(
|
||||
NULL, NULL, info->lword_text,
|
||||
&info->lword_indicates_list_item,
|
||||
&info->lword_likely_starts_idea,
|
||||
&info->lword_likely_ends_idea);
|
||||
NULL, NULL, info->lword_text, &info->lword_indicates_list_item,
|
||||
&info->lword_likely_starts_idea, &info->lword_likely_ends_idea);
|
||||
tesseract::RightWordAttributes(
|
||||
NULL, NULL, info->rword_text,
|
||||
&info->rword_indicates_list_item,
|
||||
&info->rword_likely_starts_idea,
|
||||
&info->rword_likely_ends_idea);
|
||||
NULL, NULL, info->rword_text, &info->rword_indicates_list_item,
|
||||
&info->rword_likely_starts_idea, &info->rword_likely_ends_idea);
|
||||
}
|
||||
|
||||
void MakeAsciiRowInfos(const TextAndModel *row_infos, int n,
|
||||
GenericVector<tesseract::RowInfo> *output) {
|
||||
void MakeAsciiRowInfos(const TextAndModel* row_infos, int n,
|
||||
GenericVector<tesseract::RowInfo>* output) {
|
||||
output->clear();
|
||||
tesseract::RowInfo info;
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -100,8 +94,8 @@ void MakeAsciiRowInfos(const TextAndModel *row_infos, int n,
|
||||
|
||||
// Given n rows of reference ground truth, evaluate whether the n rows
|
||||
// of PARA * pointers yield the same paragraph breakpoints.
|
||||
void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
const GenericVector<PARA *> &detector_output) {
|
||||
void EvaluateParagraphDetection(const TextAndModel* correct, int n,
|
||||
const GenericVector<PARA*>& detector_output) {
|
||||
int incorrect_breaks = 0;
|
||||
int missed_breaks = 0;
|
||||
int poorly_matched_models = 0;
|
||||
@ -111,10 +105,8 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
for (int i = 1; i < n; i++) {
|
||||
bool has_break = correct[i].model_type != PCONT;
|
||||
bool detected_break = (detector_output[i - 1] != detector_output[i]);
|
||||
if (has_break && !detected_break)
|
||||
missed_breaks++;
|
||||
if (detected_break && !has_break)
|
||||
incorrect_breaks++;
|
||||
if (has_break && !detected_break) missed_breaks++;
|
||||
if (detected_break && !has_break) incorrect_breaks++;
|
||||
if (has_break) {
|
||||
if (correct[i].model_type == PNONE) {
|
||||
if (detector_output[i]->model != NULL) {
|
||||
@ -131,8 +123,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
detector_output[i]->is_very_first_or_continuation) {
|
||||
bad_crowns++;
|
||||
}
|
||||
if (correct[i].is_list_item ^
|
||||
detector_output[i]->is_list_item) {
|
||||
if (correct[i].is_list_item ^ detector_output[i]->is_list_item) {
|
||||
bad_list_items++;
|
||||
}
|
||||
}
|
||||
@ -180,16 +171,16 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n,
|
||||
}
|
||||
}
|
||||
|
||||
void TestParagraphDetection(const TextAndModel *correct, int num_rows) {
|
||||
void TestParagraphDetection(const TextAndModel* correct, int num_rows) {
|
||||
GenericVector<tesseract::RowInfo> row_infos;
|
||||
GenericVector<PARA *> row_owners;
|
||||
GenericVector<PARA*> row_owners;
|
||||
PARA_LIST paragraphs;
|
||||
GenericVector<ParagraphModel *> models;
|
||||
GenericVector<ParagraphModel*> models;
|
||||
|
||||
MakeAsciiRowInfos(correct, num_rows, &row_infos);
|
||||
int debug_level(3);
|
||||
tesseract::DetectParagraphs(debug_level, &row_infos, &row_owners,
|
||||
¶graphs, &models);
|
||||
tesseract::DetectParagraphs(debug_level, &row_infos, &row_owners, ¶graphs,
|
||||
&models);
|
||||
EvaluateParagraphDetection(correct, num_rows, row_owners);
|
||||
models.delete_data_pointers();
|
||||
}
|
||||
@ -220,15 +211,15 @@ TEST(ParagraphsTest, ListItemsIdentified) {
|
||||
typedef ParagraphModel PModel;
|
||||
|
||||
const TextAndModel kTwoSimpleParagraphs[] = {
|
||||
{" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"This paragraph starts at the top"},
|
||||
{"of the page and takes 3 lines. "},
|
||||
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is not a continuation "},
|
||||
{"from a previous page, as it is "},
|
||||
{"indented just like this second "},
|
||||
{"paragraph. "},
|
||||
{" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"This paragraph starts at the top"},
|
||||
{"of the page and takes 3 lines. "},
|
||||
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is not a continuation "},
|
||||
{"from a previous page, as it is "},
|
||||
{"indented just like this second "},
|
||||
{"paragraph. "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestSimpleParagraphDetection) {
|
||||
@ -237,33 +228,34 @@ TEST(ParagraphsTest, TestSimpleParagraphDetection) {
|
||||
}
|
||||
|
||||
const TextAndModel kFewCluesWithCrown[] = {
|
||||
{"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), true},
|
||||
{"of the page and takes two lines."},
|
||||
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is a continuation from"},
|
||||
{"a previous page, as it is "},
|
||||
{"indented just like this second "},
|
||||
{"paragraph. "},
|
||||
{"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0),
|
||||
true},
|
||||
{"of the page and takes two lines."},
|
||||
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is a continuation from"},
|
||||
{"a previous page, as it is "},
|
||||
{"indented just like this second "},
|
||||
{"paragraph. "},
|
||||
};
|
||||
|
||||
|
||||
TEST(ParagraphsTest, TestFewCluesWithCrown) {
|
||||
TestParagraphDetection(kFewCluesWithCrown,
|
||||
ABSL_ARRAYSIZE(kFewCluesWithCrown));
|
||||
}
|
||||
|
||||
const TextAndModel kCrownedParagraph[] = {
|
||||
{"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true},
|
||||
{"often not indented as the rest "},
|
||||
{"of the paragraphs are. Nonethe-"},
|
||||
{"less it should be counted as the"},
|
||||
{"same type of paragraph. "},
|
||||
{" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"graphs are both indented two "},
|
||||
{"spaces. "},
|
||||
{" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"fmt refers to as a 'crown.' "},
|
||||
{"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0),
|
||||
true},
|
||||
{"often not indented as the rest "},
|
||||
{"of the paragraphs are. Nonethe-"},
|
||||
{"less it should be counted as the"},
|
||||
{"same type of paragraph. "},
|
||||
{" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"graphs are both indented two "},
|
||||
{"spaces. "},
|
||||
{" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"fmt refers to as a 'crown.' "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestCrownParagraphDetection) {
|
||||
@ -271,18 +263,18 @@ TEST(ParagraphsTest, TestCrownParagraphDetection) {
|
||||
}
|
||||
|
||||
const TextAndModel kFlushLeftParagraphs[] = {
|
||||
{"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0)},
|
||||
{"flush left paragraphs (those"},
|
||||
{"with no body indent) are not"},
|
||||
{"actually crowns. "},
|
||||
{"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0)},
|
||||
{"also flush left aligned. Usual-"},
|
||||
{"ly, these paragraphs are set"},
|
||||
{"apart vertically by some white-"},
|
||||
{"space, but you can also detect"},
|
||||
{"them by observing the big empty"},
|
||||
{"space at the ends of the para-"},
|
||||
{"graphs. "},
|
||||
{"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0)},
|
||||
{"flush left paragraphs (those"},
|
||||
{"with no body indent) are not"},
|
||||
{"actually crowns. "},
|
||||
{"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0)},
|
||||
{"also flush left aligned. Usual-"},
|
||||
{"ly, these paragraphs are set"},
|
||||
{"apart vertically by some white-"},
|
||||
{"space, but you can also detect"},
|
||||
{"them by observing the big empty"},
|
||||
{"space at the ends of the para-"},
|
||||
{"graphs. "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsText, TestRealFlushLeftParagraphs) {
|
||||
@ -291,46 +283,45 @@ TEST(ParagraphsText, TestRealFlushLeftParagraphs) {
|
||||
};
|
||||
|
||||
const TextAndModel kSingleFullPageContinuation[] = {
|
||||
{"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true},
|
||||
{"continuation. It flows from"},
|
||||
{"line to line, using the full"},
|
||||
{"column width with no clear"},
|
||||
{"paragraph break, because it"},
|
||||
{"actually doesn't have one. It"},
|
||||
{"is the middle of one monster"},
|
||||
{"paragraph continued from the"},
|
||||
{"previous page and continuing"},
|
||||
{"onto the next page. There-"},
|
||||
{"fore, it ends up getting"},
|
||||
{"marked as a crown and then"},
|
||||
{"getting re-marked as any ex-"},
|
||||
{"isting model. Not great, but"},
|
||||
{"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true},
|
||||
{"continuation. It flows from"},
|
||||
{"line to line, using the full"},
|
||||
{"column width with no clear"},
|
||||
{"paragraph break, because it"},
|
||||
{"actually doesn't have one. It"},
|
||||
{"is the middle of one monster"},
|
||||
{"paragraph continued from the"},
|
||||
{"previous page and continuing"},
|
||||
{"onto the next page. There-"},
|
||||
{"fore, it ends up getting"},
|
||||
{"marked as a crown and then"},
|
||||
{"getting re-marked as any ex-"},
|
||||
{"isting model. Not great, but"},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestSingleFullPageContinuation) {
|
||||
const TextAndModel *correct = kSingleFullPageContinuation;
|
||||
const TextAndModel* correct = kSingleFullPageContinuation;
|
||||
int num_rows = ABSL_ARRAYSIZE(kSingleFullPageContinuation);
|
||||
GenericVector<tesseract::RowInfo> row_infos;
|
||||
GenericVector<PARA *> row_owners;
|
||||
GenericVector<PARA*> row_owners;
|
||||
PARA_LIST paragraphs;
|
||||
GenericVector<ParagraphModel *> models;
|
||||
GenericVector<ParagraphModel*> models;
|
||||
models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10));
|
||||
MakeAsciiRowInfos(correct, num_rows, &row_infos);
|
||||
tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs,
|
||||
&models);
|
||||
tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs, &models);
|
||||
EvaluateParagraphDetection(correct, num_rows, row_owners);
|
||||
models.delete_data_pointers();
|
||||
}
|
||||
|
||||
const TextAndModel kRightAligned[] = {
|
||||
{"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0)},
|
||||
{" uncommon in Left-to-Right"},
|
||||
{" languages, but they do"},
|
||||
{" exist."},
|
||||
{" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0)},
|
||||
{" horribly tiny paragraphs in"},
|
||||
{" tables on which we have no"},
|
||||
{" chance anyways."},
|
||||
{"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0)},
|
||||
{" uncommon in Left-to-Right"},
|
||||
{" languages, but they do"},
|
||||
{" exist."},
|
||||
{" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0)},
|
||||
{" horribly tiny paragraphs in"},
|
||||
{" tables on which we have no"},
|
||||
{" chance anyways."},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestRightAlignedParagraph) {
|
||||
@ -338,66 +329,71 @@ TEST(ParagraphsTest, TestRightAlignedParagraph) {
|
||||
}
|
||||
|
||||
const TextAndModel kTinyParagraphs[] = {
|
||||
{" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"obvious paragraph text, you might"},
|
||||
{"find short exchanges of dialogue "},
|
||||
{"between characters. "},
|
||||
{" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"mark a new paragraph whenever one"},
|
||||
{"of the statistics (left, right or"},
|
||||
{"center) changes from one text-"},
|
||||
{"line to the next. Such an"},
|
||||
{"approach would misclassify the"},
|
||||
{"tiny paragraphs above as a single"},
|
||||
{"paragraph. "},
|
||||
{" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"obvious paragraph text, you might"},
|
||||
{"find short exchanges of dialogue "},
|
||||
{"between characters. "},
|
||||
{" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"mark a new paragraph whenever one"},
|
||||
{"of the statistics (left, right or"},
|
||||
{"center) changes from one text-"},
|
||||
{"line to the next. Such an"},
|
||||
{"approach would misclassify the"},
|
||||
{"tiny paragraphs above as a single"},
|
||||
{"paragraph. "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestTinyParagraphs) {
|
||||
TestParagraphDetection(kTinyParagraphs, ABSL_ARRAYSIZE(kTinyParagraphs));
|
||||
}
|
||||
|
||||
|
||||
const TextAndModel kComplexPage1[] = {
|
||||
{" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)},
|
||||
{" Centered Title "},
|
||||
{" Paragraph Detection "},
|
||||
{" OCR TEAM "},
|
||||
{" 10 November 2010 "},
|
||||
{" ", PNONE},
|
||||
{" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"This paragraph starts at the top"},
|
||||
{"of the page and takes 3 lines. "},
|
||||
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is not a continuation "},
|
||||
{"from a previous page, as it is "},
|
||||
{"indented just like this second "},
|
||||
{"paragraph. "},
|
||||
{" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), true},
|
||||
{" looks like the prior text "},
|
||||
{" but it is indented more "},
|
||||
{" and is fully justified. "},
|
||||
{" So how does one deal with ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"centered text, block quotes, "},
|
||||
{"normal paragraphs, and lists "},
|
||||
{"like what follows? "},
|
||||
{"1. Make a plan. ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{"2. Use a heuristic, for example,", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" looking for lines where the "},
|
||||
{" first word of the next line "},
|
||||
{" would fit on the previous "},
|
||||
{" line. "},
|
||||
{"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" Python and try it out. "},
|
||||
{"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" mistakes. "},
|
||||
{"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" For extra painful penalty work", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"you can try to identify source "},
|
||||
{"code. Ouch! "},
|
||||
{" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)},
|
||||
{" Centered Title "},
|
||||
{" Paragraph Detection "},
|
||||
{" OCR TEAM "},
|
||||
{" 10 November 2010 "},
|
||||
{" ", PNONE},
|
||||
{" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"This paragraph starts at the top"},
|
||||
{"of the page and takes 3 lines. "},
|
||||
{" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is not a continuation "},
|
||||
{"from a previous page, as it is "},
|
||||
{"indented just like this second "},
|
||||
{"paragraph. "},
|
||||
{" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0),
|
||||
true},
|
||||
{" looks like the prior text "},
|
||||
{" but it is indented more "},
|
||||
{" and is fully justified. "},
|
||||
{" So how does one deal with ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"centered text, block quotes, "},
|
||||
{"normal paragraphs, and lists "},
|
||||
{"like what follows? "},
|
||||
{"1. Make a plan. ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{"2. Use a heuristic, for example,", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" looking for lines where the "},
|
||||
{" first word of the next line "},
|
||||
{" would fit on the previous "},
|
||||
{" line. "},
|
||||
{"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" Python and try it out. "},
|
||||
{"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" mistakes. "},
|
||||
{"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" For extra painful penalty work", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"you can try to identify source "},
|
||||
{"code. Ouch! "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestComplexPage1) {
|
||||
@ -406,41 +402,47 @@ TEST(ParagraphsTest, TestComplexPage1) {
|
||||
|
||||
// The same as above, but wider.
|
||||
const TextAndModel kComplexPage2[] = {
|
||||
{" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)},
|
||||
{" Centered Title "},
|
||||
{" Paragraph Detection "},
|
||||
{" OCR TEAM "},
|
||||
{" 10 November 2010 "},
|
||||
{" ", PNONE},
|
||||
{" Look here, I have a paragraph. ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"This paragraph starts at the top of"},
|
||||
{"the page and takes 3 lines. "},
|
||||
{" Here I have a second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is not a continuation "},
|
||||
{"from a previous page, as it is in- "},
|
||||
{"dented just like this second para- "},
|
||||
{"graph. "},
|
||||
{" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), true},
|
||||
{" looks like the prior text "},
|
||||
{" but it is indented more "},
|
||||
{" and is fully justified. "},
|
||||
{" So how does one deal with center-", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"ed text, block quotes, normal para-"},
|
||||
{"graphs, and lists like what follow?"},
|
||||
{"1. Make a plan. "}, // BUG!!
|
||||
{"2. Use a heuristic, for example, ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" looking for lines where the "},
|
||||
{" first word of the next line "},
|
||||
{" would fit on the previous line. "},
|
||||
{"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" Python and try it out. "},
|
||||
{"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" mistakes. "},
|
||||
{"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true},
|
||||
{" For extra painful penalty work ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"you can try to identify source "},
|
||||
{"code. Ouch! "},
|
||||
{" Awesome ", PSTART,
|
||||
PModel(kCenter, 0, 0, 0, 0)},
|
||||
{" Centered Title "},
|
||||
{" Paragraph Detection "},
|
||||
{" OCR TEAM "},
|
||||
{" 10 November 2010 "},
|
||||
{" ", PNONE},
|
||||
{" Look here, I have a paragraph. ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"This paragraph starts at the top of"},
|
||||
{"the page and takes 3 lines. "},
|
||||
{" Here I have a second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"which indicates that the first "},
|
||||
{"paragraph is not a continuation "},
|
||||
{"from a previous page, as it is in- "},
|
||||
{"dented just like this second para- "},
|
||||
{"graph. "},
|
||||
{" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0),
|
||||
true},
|
||||
{" looks like the prior text "},
|
||||
{" but it is indented more "},
|
||||
{" and is fully justified. "},
|
||||
{" So how does one deal with center-", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"ed text, block quotes, normal para-"},
|
||||
{"graphs, and lists like what follow?"},
|
||||
{"1. Make a plan. "}, // BUG!!
|
||||
{"2. Use a heuristic, for example, ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" looking for lines where the "},
|
||||
{" first word of the next line "},
|
||||
{" would fit on the previous line. "},
|
||||
{"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" Python and try it out. "},
|
||||
{"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" mistakes. "},
|
||||
{"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0),
|
||||
false, true},
|
||||
{" For extra painful penalty work ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"you can try to identify source "},
|
||||
{"code. Ouch! "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestComplexPage2) {
|
||||
@ -448,14 +450,15 @@ TEST(ParagraphsTest, TestComplexPage2) {
|
||||
}
|
||||
|
||||
const TextAndModel kSubtleCrown[] = {
|
||||
{"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true},
|
||||
{"often not indented as the rest "},
|
||||
{"of the paragraphs are. Nonethe-"},
|
||||
{"less it should be counted as the"},
|
||||
{"same type of paragraph. "},
|
||||
{" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"should suffice. "},
|
||||
{" 1235 ", PNONE},
|
||||
{"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0),
|
||||
true},
|
||||
{"often not indented as the rest "},
|
||||
{"of the paragraphs are. Nonethe-"},
|
||||
{"less it should be counted as the"},
|
||||
{"same type of paragraph. "},
|
||||
{" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"should suffice. "},
|
||||
{" 1235 ", PNONE},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestSubtleCrown) {
|
||||
@ -467,39 +470,43 @@ TEST(ParagraphsTest, TestStrayLineInBlock) {
|
||||
}
|
||||
|
||||
const TextAndModel kUnlvRep3AO[] = {
|
||||
{" Defined contribution plans cover employees in Australia, New", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"Zealand, Spain, the United Kingdom and some U.S. subsidiaries. "},
|
||||
{"In addition, employees in the U.S. are eligible to participate in "},
|
||||
{"defined contribution plans (Employee Savings Plans) by contribut-"},
|
||||
{"ing a portion of their compensation. The Company matches com- "},
|
||||
{"pensation, depending on Company profit levels. Contributions "},
|
||||
{"charged to income for defined contribution plans were $92 in "},
|
||||
{"1993, $98 in 1992 and $89 in 1991. "},
|
||||
{" In addition to providing pension benefits, the Company pro- ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"vides certain health care and life insurance benefits to retired "},
|
||||
{"employees. As discussed in Note A, the Company adopted FASB "},
|
||||
{"Statement No. 106 effective January 1, 1992. Previously, the "},
|
||||
{"Company recognized the cost of providing these benefits as the "},
|
||||
{"benefits were paid. These pretax costs amounted to $53 in 1991. "},
|
||||
{"The Company continues to fund most of the cost of these medical "},
|
||||
{"and life insurance benefits in the year incurred. "},
|
||||
{" The U.S. plan covering the parent company is the largest plan.", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"It provides medical and life insurance benefits including hospital, "},
|
||||
{"physicians’ services and major medical expense benefits and life "},
|
||||
{"insurance benefits. The plan provides benefits supplemental to "},
|
||||
{"Medicare after retirees are eligible for these benefits. The cost of "},
|
||||
{"these benefits are shared by the Company and the retiree, with the "},
|
||||
{"Company portion increasing as the retiree has increased years of "},
|
||||
{"credited service. The Company has the ability to change these "},
|
||||
{"benefits at any time. "},
|
||||
{" Effective October 1993, the Company amended its health ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"benefits plan in the U.S. to cap the cost absorbed by the Company "},
|
||||
{"at approximately twice the 1993 cost per person for employees who"},
|
||||
{"retire after December 31, 1993. The effect of this amendment was "},
|
||||
{"to reduce the December 31, 1993 accumulated postretirement "},
|
||||
{"benefit obligation by $327. It also reduced the net periodic postre- "},
|
||||
{"tirement cost by $21 for 1993 and is estimated to reduce this cost "},
|
||||
{"for 1994 by approximately $83. "},
|
||||
{" Defined contribution plans cover employees in Australia, New", PSTART,
|
||||
PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"Zealand, Spain, the United Kingdom and some U.S. subsidiaries. "},
|
||||
{"In addition, employees in the U.S. are eligible to participate in "},
|
||||
{"defined contribution plans (Employee Savings Plans) by contribut-"},
|
||||
{"ing a portion of their compensation. The Company matches com- "},
|
||||
{"pensation, depending on Company profit levels. Contributions "},
|
||||
{"charged to income for defined contribution plans were $92 in "},
|
||||
{"1993, $98 in 1992 and $89 in 1991. "},
|
||||
{" In addition to providing pension benefits, the Company pro- ", PSTART,
|
||||
PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"vides certain health care and life insurance benefits to retired "},
|
||||
{"employees. As discussed in Note A, the Company adopted FASB "},
|
||||
{"Statement No. 106 effective January 1, 1992. Previously, the "},
|
||||
{"Company recognized the cost of providing these benefits as the "},
|
||||
{"benefits were paid. These pretax costs amounted to $53 in 1991. "},
|
||||
{"The Company continues to fund most of the cost of these medical "},
|
||||
{"and life insurance benefits in the year incurred. "},
|
||||
{" The U.S. plan covering the parent company is the largest plan.",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"It provides medical and life insurance benefits including hospital, "},
|
||||
{"physicians’ services and major medical expense benefits and life "},
|
||||
{"insurance benefits. The plan provides benefits supplemental to "},
|
||||
{"Medicare after retirees are eligible for these benefits. The cost of "},
|
||||
{"these benefits are shared by the Company and the retiree, with the "},
|
||||
{"Company portion increasing as the retiree has increased years of "},
|
||||
{"credited service. The Company has the ability to change these "},
|
||||
{"benefits at any time. "},
|
||||
{" Effective October 1993, the Company amended its health ", PSTART,
|
||||
PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"benefits plan in the U.S. to cap the cost absorbed by the Company "},
|
||||
{"at approximately twice the 1993 cost per person for employees who"},
|
||||
{"retire after December 31, 1993. The effect of this amendment was "},
|
||||
{"to reduce the December 31, 1993 accumulated postretirement "},
|
||||
{"benefit obligation by $327. It also reduced the net periodic postre- "},
|
||||
{"tirement cost by $21 for 1993 and is estimated to reduce this cost "},
|
||||
{"for 1994 by approximately $83. "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestUnlvInsurance) {
|
||||
@ -512,19 +519,19 @@ TEST(ParagraphsTest, TestUnlvInsurance) {
|
||||
// paragraph or two.
|
||||
// This example comes from Volume 9886293, Page 5
|
||||
const TextAndModel kTableOfContents[] = {
|
||||
{"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
{" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0)},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, TestSplitsOutLeaderLines) {
|
||||
@ -532,31 +539,34 @@ TEST(ParagraphsTest, TestSplitsOutLeaderLines) {
|
||||
}
|
||||
|
||||
const TextAndModel kTextWithSourceCode[] = {
|
||||
{" A typical page of a programming book may contain", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"examples of source code to exemplify an algorithm "},
|
||||
{"being described in prose. Such examples should be"},
|
||||
{"rendered as lineated text, meaning text with "},
|
||||
{"explicit line breaks but without extra inter-line "},
|
||||
{"spacing. Accidentally finding stray paragraphs in"},
|
||||
{"source code would lead to a bad reading experience"},
|
||||
{"when the text is re-flowed. "},
|
||||
{" Let's show this by describing the function fact-", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"orial. Factorial is a simple recursive function "},
|
||||
{"which grows very quickly. So quickly, in fact, "},
|
||||
{"that the typical C implementation will only work "},
|
||||
{"for values less than about 12: "},
|
||||
{" ", PNONE},
|
||||
{" # Naive implementation in C "},
|
||||
{" int factorial(int n) { "},
|
||||
{" if (n < 2) "},
|
||||
{" return 1; "},
|
||||
{" return n * factorial(n - 1); "},
|
||||
{" } "},
|
||||
{" "},
|
||||
{" The C programming language does not have built- ", PSTART, PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"in support for detecting integer overflow, so this"},
|
||||
{"naive implementation simply returns random values "},
|
||||
{"if even a moderate sized n is provided. "},
|
||||
{" A typical page of a programming book may contain", PSTART,
|
||||
PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"examples of source code to exemplify an algorithm "},
|
||||
{"being described in prose. Such examples should be"},
|
||||
{"rendered as lineated text, meaning text with "},
|
||||
{"explicit line breaks but without extra inter-line "},
|
||||
{"spacing. Accidentally finding stray paragraphs in"},
|
||||
{"source code would lead to a bad reading experience"},
|
||||
{"when the text is re-flowed. "},
|
||||
{" Let's show this by describing the function fact-", PSTART,
|
||||
PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"orial. Factorial is a simple recursive function "},
|
||||
{"which grows very quickly. So quickly, in fact, "},
|
||||
{"that the typical C implementation will only work "},
|
||||
{"for values less than about 12: "},
|
||||
{" ", PNONE},
|
||||
{" # Naive implementation in C "},
|
||||
{" int factorial(int n) { "},
|
||||
{" if (n < 2) "},
|
||||
{" return 1; "},
|
||||
{" return n * factorial(n - 1); "},
|
||||
{" } "},
|
||||
{" "},
|
||||
{" The C programming language does not have built- ", PSTART,
|
||||
PModel(kLeft, 0, 20, 0, 0)},
|
||||
{"in support for detecting integer overflow, so this"},
|
||||
{"naive implementation simply returns random values "},
|
||||
{"if even a moderate sized n is provided. "},
|
||||
};
|
||||
|
||||
TEST(ParagraphsTest, NotDistractedBySourceCode) {
|
||||
@ -565,81 +575,103 @@ TEST(ParagraphsTest, NotDistractedBySourceCode) {
|
||||
}
|
||||
|
||||
const TextAndModel kOldManAndSea[] = {
|
||||
{"royal palm which are called guano and in it there was a bed, a", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"table, one chair, and a place on the dirt floor to cook with charcoal."},
|
||||
{"On the brown walls of the flattened, overlapping leaves of the"},
|
||||
{"sturdy fibered guano there was a picture in color of the Sacred"},
|
||||
{"Heart of Jesus and another of the Virgin of Cobre. These were"},
|
||||
{"relics of his wife. Once there had been a tinted photograph of his"},
|
||||
{"wife on the wall but he had taken it down because it made him too"},
|
||||
{"lonely to see it and it was on the shelf in the corner under his clean"},
|
||||
{"shirt. "},
|
||||
{" \"What do you have to eat?\" the boy asked. ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"A pot of yellow rice with fish. Do you want some?\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"No. I will eat at home. Do you want me to make the fire?\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"No. I will make it later on. Or I may eat the rice cold.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"May I take the cast net?\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"Of course.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" There was no cast net and the boy remembered when they had", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"sold it. But they went through this fiction every day. There was no"},
|
||||
{"pot of yellow rice and fish and the boy knew this too. "},
|
||||
{" \"Eighty-five is a lucky number,\" the old man said. \"How", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"would you like to see me bring one in that dressed out over a thou-"},
|
||||
{"sand pounds? "},
|
||||
{" \"I'll get the cast net and go for sardines. Will you sit in the sun", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"in the doorway?\" "},
|
||||
{" \"Yes. I have yesterday's paper and I will read the baseball.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" The boy did not know whether yesterday's paper was a fiction", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"too. But the old man brought it out from under the bed. "},
|
||||
{" \"Pedrico gave it to me at the bodega,\" he explained. ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"I'll be back when I have the sardines. I'll keep yours and mine", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"together on ice and we can share them in the morning. When I"},
|
||||
{"come back you can tell me about the baseball.\" "},
|
||||
{" \"The Yankees cannot lose.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"But I fear the Indians of Cleveland.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"Have faith in the Yankees my son. Think of the great Di-", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"Maggio.\" "},
|
||||
{" \"I fear both the Tigers of Detroit and the Indians of Cleve-", PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"land.\" "}
|
||||
};
|
||||
{"royal palm which are called guano and in it there was a bed, a",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"table, one chair, and a place on the dirt floor to cook with charcoal."},
|
||||
{"On the brown walls of the flattened, overlapping leaves of the"},
|
||||
{"sturdy fibered guano there was a picture in color of the Sacred"},
|
||||
{"Heart of Jesus and another of the Virgin of Cobre. These were"},
|
||||
{"relics of his wife. Once there had been a tinted photograph of his"},
|
||||
{"wife on the wall but he had taken it down because it made him too"},
|
||||
{"lonely to see it and it was on the shelf in the corner under his clean"},
|
||||
{"shirt. "},
|
||||
{" \"What do you have to eat?\" the boy asked. ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"A pot of yellow rice with fish. Do you want some?\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"No. I will eat at home. Do you want me to make the fire?\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"No. I will make it later on. Or I may eat the rice cold.\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"May I take the cast net?\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"Of course.\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" There was no cast net and the boy remembered when they had",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"sold it. But they went through this fiction every day. There was no"},
|
||||
{"pot of yellow rice and fish and the boy knew this too. "
|
||||
" "},
|
||||
{" \"Eighty-five is a lucky number,\" the old man said. \"How",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"would you like to see me bring one in that dressed out over a "
|
||||
"thou-"},
|
||||
{"sand pounds? "
|
||||
" "},
|
||||
{" \"I'll get the cast net and go for sardines. Will you sit in the "
|
||||
"sun",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"in the doorway?\" "
|
||||
" "},
|
||||
{" \"Yes. I have yesterday's paper and I will read the baseball.\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" The boy did not know whether yesterday's paper was a fiction",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"too. But the old man brought it out from under the bed. "},
|
||||
{" \"Pedrico gave it to me at the bodega,\" he explained. "
|
||||
" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"I'll be back when I have the sardines. I'll keep yours and mine",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"together on ice and we can share them in the morning. When I"},
|
||||
{"come back you can tell me about the baseball.\" "},
|
||||
{" \"The Yankees cannot lose.\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"But I fear the Indians of Cleveland.\" ",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{" \"Have faith in the Yankees my son. Think of the great Di-",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"Maggio.\" "},
|
||||
{" \"I fear both the Tigers of Detroit and the Indians of Cleve-",
|
||||
PSTART, PModel(kLeft, 0, 50, 0, 0)},
|
||||
{"land.\" "}};
|
||||
|
||||
TEST(ParagraphsTest, NotOverlyAggressiveWithBlockQuotes) {
|
||||
TestParagraphDetection(kOldManAndSea, ABSL_ARRAYSIZE(kOldManAndSea));
|
||||
}
|
||||
|
||||
const TextAndModel kNewZealandIndex[] = {
|
||||
{"Oats, 51 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"O'Brien, Gregory, 175 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Occupational composition, 110,", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{" 138 "},
|
||||
{"OECD rankings, 155, 172 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Okiato (original capital), 47 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Oil shock: 1974, xxx, 143; 1979,", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{" 145 "},
|
||||
{"Old Age Pensions, xxii, 89-90 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Old World evils, 77 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Oliver, W. H., 39, 77, 89 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Olssen, Erik, 45, 64, 84 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Olympic Games, 1924, 111, 144 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Once on Chunuk Bair, 149 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Once Were Warriors, xxxiii, 170", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"On—shore whaling, xvi ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Opotiki, xix ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Orakau battle of, xviii, 57 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"O’Regan, Tipene, 170, 198-99 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Organic agriculture, 177 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Orwell, George, 151 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago, xvii, 45, 49-50, 70 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago block, xvii ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Daily Times, 67 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Girls’ High School, xix, 61,", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{" 85 "},
|
||||
{"Otago gold rushes, 61-63 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Peninsula, xx ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Provincial Council, 68 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otaki, 33 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Owls Do Cry, 139 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}
|
||||
};
|
||||
{"Oats, 51 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"O'Brien, Gregory, 175 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Occupational composition, 110,", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{" 138 "},
|
||||
{"OECD rankings, 155, 172 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Okiato (original capital), 47 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Oil shock: 1974, xxx, 143; 1979,", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{" 145 "},
|
||||
{"Old Age Pensions, xxii, 89-90 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Old World evils, 77 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Oliver, W. H., 39, 77, 89 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Olssen, Erik, 45, 64, 84 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Olympic Games, 1924, 111, 144 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Once on Chunuk Bair, 149 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Once Were Warriors, xxxiii, 170", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"On—shore whaling, xvi ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Opotiki, xix ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Orakau battle of, xviii, 57 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"O’Regan, Tipene, 170, 198-99 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Organic agriculture, 177 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Orwell, George, 151 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago, xvii, 45, 49-50, 70 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago block, xvii ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Daily Times, 67 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Girls’ High School, xix, 61,", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{" 85 "},
|
||||
{"Otago gold rushes, 61-63 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Peninsula, xx ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otago Provincial Council, 68 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Otaki, 33 ", PSTART, PModel(kLeft, 0, 0, 30, 0)},
|
||||
{"Owls Do Cry, 139 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}};
|
||||
|
||||
TEST(ParagraphsTest, IndexPageTest) {
|
||||
TestParagraphDetection(kNewZealandIndex, ABSL_ARRAYSIZE(kNewZealandIndex));
|
||||
|
@ -10,15 +10,14 @@ namespace {
|
||||
class ParamsModelTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) const {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string OutputNameToPath(const string& name) const {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
}
|
||||
// Test that we are able to load a params model, save it, reload it,
|
||||
// and verify that the re-serialized version is the same as the original.
|
||||
void TestParamsModelRoundTrip(const string ¶ms_model_filename) const {
|
||||
void TestParamsModelRoundTrip(const string& params_model_filename) const {
|
||||
tesseract::ParamsModel orig_model;
|
||||
tesseract::ParamsModel duplicate_model;
|
||||
string orig_file = TestDataNameToPath(params_model_filename);
|
||||
@ -26,7 +25,7 @@ class ParamsModelTest : public testing::Test {
|
||||
|
||||
EXPECT_TRUE(orig_model.LoadFromFile("eng", orig_file.c_str()));
|
||||
EXPECT_TRUE(orig_model.SaveToFile(out_file.c_str()));
|
||||
|
||||
|
||||
EXPECT_TRUE(duplicate_model.LoadFromFile("eng", out_file.c_str()));
|
||||
EXPECT_TRUE(orig_model.Equivalent(duplicate_model));
|
||||
}
|
||||
|
@ -16,146 +16,146 @@
|
||||
|
||||
// expects clone of tessdata_fast repo in ../../tessdata_fast
|
||||
|
||||
#include "include_gunit.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "baseapi.h"
|
||||
#include "ocrclass.h"
|
||||
#include "leptonica/allheaders.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <locale>
|
||||
#include <limits.h>
|
||||
#include <time.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include "baseapi.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "include_gunit.h"
|
||||
#include "leptonica/allheaders.h"
|
||||
#include "ocrclass.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class QuickTest : public testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
start_time_ = time(nullptr);
|
||||
}
|
||||
virtual void SetUp() { start_time_ = time(nullptr); }
|
||||
virtual void TearDown() {
|
||||
const time_t end_time = time(nullptr);
|
||||
EXPECT_TRUE(end_time - start_time_ <=25) << "The test took too long - " << ::testing::PrintToString(end_time - start_time_);
|
||||
EXPECT_TRUE(end_time - start_time_ <= 25)
|
||||
<< "The test took too long - "
|
||||
<< ::testing::PrintToString(end_time - start_time_);
|
||||
}
|
||||
time_t start_time_;
|
||||
};
|
||||
};
|
||||
|
||||
class ClassicMockProgressSink {
|
||||
public:
|
||||
MOCK_METHOD1(classicProgress, bool( int ) );
|
||||
MOCK_METHOD1(cancel, bool( int ));
|
||||
class ClassicMockProgressSink {
|
||||
public:
|
||||
MOCK_METHOD1(classicProgress, bool(int));
|
||||
MOCK_METHOD1(cancel, bool(int));
|
||||
|
||||
ETEXT_DESC monitor;
|
||||
ETEXT_DESC monitor;
|
||||
|
||||
ClassicMockProgressSink()
|
||||
{
|
||||
monitor.progress_callback = []( int progress, int, int, int, int ) ->bool {
|
||||
return instance->classicProgress( progress );
|
||||
};
|
||||
monitor.cancel = []( void* ths, int words ) -> bool {
|
||||
return ((ClassicMockProgressSink*)ths)->cancel(words);
|
||||
};
|
||||
monitor.cancel_this = this;
|
||||
instance = this;
|
||||
}
|
||||
|
||||
static ClassicMockProgressSink* instance;
|
||||
};
|
||||
|
||||
ClassicMockProgressSink* ClassicMockProgressSink::instance = nullptr;
|
||||
|
||||
class NewMockProgressSink : public ClassicMockProgressSink {
|
||||
public:
|
||||
MOCK_METHOD1(progress, bool( int ) );
|
||||
|
||||
NewMockProgressSink()
|
||||
{
|
||||
monitor.progress_callback2 = [](ETEXT_DESC* ths, int, int, int, int ) -> bool {
|
||||
return ((NewMockProgressSink*)ths->cancel_this)->progress( ths->progress );
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
void ClassicProgressTester(const char* imgname, const char* tessdatadir, const char* lang) {
|
||||
using ::testing::_;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::AtLeast;
|
||||
using ::testing::DoAll;
|
||||
using ::testing::Gt;
|
||||
using ::testing::Le;
|
||||
using ::testing::Return;
|
||||
using ::testing::SaveArg;
|
||||
|
||||
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
||||
Pix *image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
|
||||
ClassicMockProgressSink progressSink;
|
||||
|
||||
int currentProgress = -1;
|
||||
EXPECT_CALL( progressSink, classicProgress(AllOf(Gt<int&>(currentProgress),Le(100))) )
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly( DoAll(SaveArg<0>(¤tProgress),
|
||||
Return(false) ));
|
||||
EXPECT_CALL( progressSink, cancel(_) )
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly(Return(false));
|
||||
|
||||
EXPECT_EQ( api->Recognize( &progressSink.monitor ), false );
|
||||
EXPECT_GE( currentProgress, 50 ) << "The reported progress did not reach 50%";
|
||||
|
||||
api->End();
|
||||
pixDestroy(&image);
|
||||
ClassicMockProgressSink() {
|
||||
monitor.progress_callback = [](int progress, int, int, int, int) -> bool {
|
||||
return instance->classicProgress(progress);
|
||||
};
|
||||
monitor.cancel = [](void* ths, int words) -> bool {
|
||||
return ((ClassicMockProgressSink*)ths)->cancel(words);
|
||||
};
|
||||
monitor.cancel_this = this;
|
||||
instance = this;
|
||||
}
|
||||
|
||||
void NewProgressTester(const char* imgname, const char* tessdatadir, const char* lang) {
|
||||
using ::testing::_;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::AtLeast;
|
||||
using ::testing::DoAll;
|
||||
using ::testing::Gt;
|
||||
using ::testing::Le;
|
||||
using ::testing::Return;
|
||||
using ::testing::SaveArg;
|
||||
static ClassicMockProgressSink* instance;
|
||||
};
|
||||
|
||||
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract.";
|
||||
Pix *image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
ClassicMockProgressSink* ClassicMockProgressSink::instance = nullptr;
|
||||
|
||||
NewMockProgressSink progressSink;
|
||||
class NewMockProgressSink : public ClassicMockProgressSink {
|
||||
public:
|
||||
MOCK_METHOD1(progress, bool(int));
|
||||
|
||||
int currentProgress = -1;
|
||||
EXPECT_CALL( progressSink, classicProgress(_) )
|
||||
.Times(0);
|
||||
EXPECT_CALL( progressSink, progress(AllOf(Gt<int&>(currentProgress),Le(100))) )
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly( DoAll(SaveArg<0>(¤tProgress),
|
||||
Return(false) ));
|
||||
EXPECT_CALL( progressSink, cancel(_) )
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly(Return(false));
|
||||
|
||||
EXPECT_EQ( api->Recognize( &progressSink.monitor ), false );
|
||||
EXPECT_GE( currentProgress, 50 ) << "The reported progress did not reach 50%";
|
||||
|
||||
api->End();
|
||||
pixDestroy(&image);
|
||||
NewMockProgressSink() {
|
||||
monitor.progress_callback2 = [](ETEXT_DESC* ths, int, int, int,
|
||||
int) -> bool {
|
||||
return ((NewMockProgressSink*)ths->cancel_this)->progress(ths->progress);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
TEST(QuickTest, ClassicProgressReporitng) {
|
||||
ClassicProgressTester(TESTING_DIR "/phototest.tif",
|
||||
TESSDATA_DIR "_fast", "eng");
|
||||
}
|
||||
void ClassicProgressTester(const char* imgname, const char* tessdatadir,
|
||||
const char* lang) {
|
||||
using ::testing::_;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::AtLeast;
|
||||
using ::testing::DoAll;
|
||||
using ::testing::Gt;
|
||||
using ::testing::Le;
|
||||
using ::testing::Return;
|
||||
using ::testing::SaveArg;
|
||||
|
||||
TEST(QuickTest, NewProgressReporitng) {
|
||||
NewProgressTester(TESTING_DIR "/phototest.tif",
|
||||
TESSDATA_DIR "_fast", "eng");
|
||||
}
|
||||
tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang))
|
||||
<< "Could not initialize tesseract.";
|
||||
Pix* image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
|
||||
ClassicMockProgressSink progressSink;
|
||||
|
||||
int currentProgress = -1;
|
||||
EXPECT_CALL(progressSink,
|
||||
classicProgress(AllOf(Gt<int&>(currentProgress), Le(100))))
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly(DoAll(SaveArg<0>(¤tProgress), Return(false)));
|
||||
EXPECT_CALL(progressSink, cancel(_))
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly(Return(false));
|
||||
|
||||
EXPECT_EQ(api->Recognize(&progressSink.monitor), false);
|
||||
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
|
||||
|
||||
api->End();
|
||||
pixDestroy(&image);
|
||||
}
|
||||
|
||||
void NewProgressTester(const char* imgname, const char* tessdatadir,
|
||||
const char* lang) {
|
||||
using ::testing::_;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::AtLeast;
|
||||
using ::testing::DoAll;
|
||||
using ::testing::Gt;
|
||||
using ::testing::Le;
|
||||
using ::testing::Return;
|
||||
using ::testing::SaveArg;
|
||||
|
||||
tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI();
|
||||
ASSERT_FALSE(api->Init(tessdatadir, lang))
|
||||
<< "Could not initialize tesseract.";
|
||||
Pix* image = pixRead(imgname);
|
||||
ASSERT_TRUE(image != nullptr) << "Failed to read test image.";
|
||||
api->SetImage(image);
|
||||
|
||||
NewMockProgressSink progressSink;
|
||||
|
||||
int currentProgress = -1;
|
||||
EXPECT_CALL(progressSink, classicProgress(_)).Times(0);
|
||||
EXPECT_CALL(progressSink, progress(AllOf(Gt<int&>(currentProgress), Le(100))))
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly(DoAll(SaveArg<0>(¤tProgress), Return(false)));
|
||||
EXPECT_CALL(progressSink, cancel(_))
|
||||
.Times(AtLeast(5))
|
||||
.WillRepeatedly(Return(false));
|
||||
|
||||
EXPECT_EQ(api->Recognize(&progressSink.monitor), false);
|
||||
EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%";
|
||||
|
||||
api->End();
|
||||
pixDestroy(&image);
|
||||
}
|
||||
|
||||
TEST(QuickTest, ClassicProgressReporitng) {
|
||||
ClassicProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast",
|
||||
"eng");
|
||||
}
|
||||
|
||||
TEST(QuickTest, NewProgressReporitng) {
|
||||
NewProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", "eng");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -18,14 +18,13 @@ class TestableQRSequenceGenerator : public QRSequenceGenerator {
|
||||
TEST(QRSequenceGenerator, GetBinaryReversedInteger) {
|
||||
const int kRangeSize = 8;
|
||||
TestableQRSequenceGenerator generator(kRangeSize);
|
||||
int reversed_vals[kRangeSize] = { 0, 4, 2, 6, 1, 5, 3, 7};
|
||||
int reversed_vals[kRangeSize] = {0, 4, 2, 6, 1, 5, 3, 7};
|
||||
for (int i = 0; i < kRangeSize; ++i)
|
||||
EXPECT_EQ(reversed_vals[i], generator.GetBinaryReversedInteger(i));
|
||||
}
|
||||
|
||||
// Trivial test fixture for a parameterized test.
|
||||
class QRSequenceGeneratorTest : public ::testing::TestWithParam<int> {
|
||||
};
|
||||
class QRSequenceGeneratorTest : public ::testing::TestWithParam<int> {};
|
||||
|
||||
TEST_P(QRSequenceGeneratorTest, GeneratesValidSequence) {
|
||||
const int kRangeSize = GetParam();
|
||||
@ -33,8 +32,7 @@ TEST_P(QRSequenceGeneratorTest, GeneratesValidSequence) {
|
||||
std::vector<int> vals(kRangeSize);
|
||||
CycleTimer timer;
|
||||
timer.Restart();
|
||||
for (int i = 0; i < kRangeSize; ++i)
|
||||
vals[i] = generator.GetVal();
|
||||
for (int i = 0; i < kRangeSize; ++i) vals[i] = generator.GetVal();
|
||||
LOG(INFO) << kRangeSize << "-length sequence took " << timer.Get() * 1e3
|
||||
<< "ms";
|
||||
// Sort the numbers to verify that we've covered the range without repetition.
|
||||
|
@ -10,10 +10,10 @@
|
||||
|
||||
using tesseract::CCUtil;
|
||||
using tesseract::Dict;
|
||||
using tesseract::RecodedCharID;
|
||||
using tesseract::RecodeBeamSearch;
|
||||
using tesseract::RecodeNode;
|
||||
using tesseract::PointerVector;
|
||||
using tesseract::RecodeBeamSearch;
|
||||
using tesseract::RecodedCharID;
|
||||
using tesseract::RecodeNode;
|
||||
using tesseract::TRand;
|
||||
using tesseract::UnicharCompress;
|
||||
|
||||
@ -59,13 +59,11 @@ class RecodeBeamTest : public ::testing::Test {
|
||||
|
||||
// Loads and compresses the given unicharset.
|
||||
void LoadUnicharset(const string& unicharset_name) {
|
||||
string radical_stroke_file =
|
||||
file::JoinPath(FLAGS_test_srcdir,
|
||||
"tesseract/training"
|
||||
"/langdata/radical-stroke.txt");
|
||||
string unicharset_file = file::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata",
|
||||
unicharset_name);
|
||||
string radical_stroke_file = file::JoinPath(FLAGS_test_srcdir,
|
||||
"tesseract/training"
|
||||
"/langdata/radical-stroke.txt");
|
||||
string unicharset_file =
|
||||
file::JoinPath(FLAGS_test_srcdir, "testdata", unicharset_name);
|
||||
string uni_data;
|
||||
CHECK_OK(file::GetContents(unicharset_file, &uni_data, file::Defaults()));
|
||||
string radical_data;
|
||||
@ -94,9 +92,8 @@ class RecodeBeamTest : public ::testing::Test {
|
||||
// Loads the dictionary.
|
||||
void LoadDict(const string& lang) {
|
||||
string traineddata_name = lang + ".traineddata";
|
||||
string traineddata_file = file::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata",
|
||||
traineddata_name);
|
||||
string traineddata_file =
|
||||
file::JoinPath(FLAGS_test_srcdir, "testdata", traineddata_name);
|
||||
lstm_dict_.SetupForLoad(NULL);
|
||||
tesseract::TessdataManager mgr;
|
||||
mgr.Init(traineddata_file.c_str());
|
||||
@ -140,8 +137,8 @@ class RecodeBeamTest : public ::testing::Test {
|
||||
code.length() < RecodedCharID::kMaxCodeLen &&
|
||||
(uni_id == INVALID_UNICHAR_ID ||
|
||||
!recoder_.IsValidFirstCode(labels[index])));
|
||||
EXPECT_NE(INVALID_UNICHAR_ID, uni_id) << "index=" << index << "/"
|
||||
<< labels.size();
|
||||
EXPECT_NE(INVALID_UNICHAR_ID, uni_id)
|
||||
<< "index=" << index << "/" << labels.size();
|
||||
// To the extent of truth_utf8, we expect decoded to match, but if
|
||||
// transcription is shorter, that is OK too, as we may just be testing
|
||||
// that we get a valid sequence when padded with random data.
|
||||
|
@ -16,12 +16,10 @@
|
||||
namespace {
|
||||
|
||||
class TBOXTest : public testing::Test {
|
||||
public:
|
||||
void SetUp() {
|
||||
}
|
||||
public:
|
||||
void SetUp() {}
|
||||
|
||||
void TearDown() {
|
||||
}
|
||||
void TearDown() {}
|
||||
};
|
||||
|
||||
TEST_F(TBOXTest, OverlapInside) {
|
||||
@ -56,10 +54,8 @@ TEST_F(TBOXTest, OverlapFractionCorners) {
|
||||
mid.overlap_fraction(bottom_left));
|
||||
EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0),
|
||||
bottom_left.overlap_fraction(mid));
|
||||
EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0),
|
||||
mid.overlap_fraction(top_left));
|
||||
EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0),
|
||||
top_left.overlap_fraction(mid));
|
||||
EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), mid.overlap_fraction(top_left));
|
||||
EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), top_left.overlap_fraction(mid));
|
||||
}
|
||||
|
||||
TEST_F(TBOXTest, OverlapBoolSides) {
|
||||
@ -175,4 +171,4 @@ TEST_F(TBOXTest, OverlapYFractionZeroSize) {
|
||||
EXPECT_DOUBLE_EQ(0.0, small.y_overlap_fraction(zero));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
@ -12,18 +12,18 @@ DEFINE_string(tess_config, "", "config file for tesseract");
|
||||
DEFINE_bool(visual_test, false, "Runs a visual test using scrollview");
|
||||
|
||||
using tesseract::PageIterator;
|
||||
using tesseract::ResultIterator;
|
||||
using tesseract::PageIteratorLevel;
|
||||
using tesseract::ResultIterator;
|
||||
|
||||
// Helper functions for converting to STL vectors
|
||||
template<typename T>
|
||||
void ToVector(const GenericVector<T> &from, std::vector<T> *to) {
|
||||
template <typename T>
|
||||
void ToVector(const GenericVector<T>& from, std::vector<T>* to) {
|
||||
to->clear();
|
||||
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void ToVector(const GenericVectorEqEq<T> &from, std::vector<T> *to) {
|
||||
template <typename T>
|
||||
void ToVector(const GenericVectorEqEq<T>& from, std::vector<T>* to) {
|
||||
to->clear();
|
||||
for (int i = 0; i < from.size(); i++) to->push_back(from[i]);
|
||||
}
|
||||
@ -32,22 +32,17 @@ void ToVector(const GenericVectorEqEq<T> &from, std::vector<T> *to) {
|
||||
class ResultIteratorTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessdataPath() {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"tessdata");
|
||||
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
}
|
||||
string OutputNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
}
|
||||
|
||||
ResultIteratorTest() {
|
||||
src_pix_ = NULL;
|
||||
}
|
||||
~ResultIteratorTest() {
|
||||
}
|
||||
ResultIteratorTest() { src_pix_ = NULL; }
|
||||
~ResultIteratorTest() {}
|
||||
|
||||
void SetImage(const char* filename) {
|
||||
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
|
||||
@ -63,16 +58,14 @@ class ResultIteratorTest : public testing::Test {
|
||||
// Rebuilds the image using the binary images at the given level, and
|
||||
// EXPECTs that the number of pixels in the xor of the rebuilt image with
|
||||
// the original is at most max_diff.
|
||||
void VerifyRebuild(int max_diff,
|
||||
PageIteratorLevel level, PageIterator* it) {
|
||||
void VerifyRebuild(int max_diff, PageIteratorLevel level, PageIterator* it) {
|
||||
it->Begin();
|
||||
int width = pixGetWidth(src_pix_);
|
||||
int height = pixGetHeight(src_pix_);
|
||||
int depth = pixGetDepth(src_pix_);
|
||||
Pix* pix = pixCreate(width, height, depth);
|
||||
EXPECT_TRUE(depth == 1 || depth == 8);
|
||||
if (depth == 8)
|
||||
pixSetAll(pix);
|
||||
if (depth == 8) pixSetAll(pix);
|
||||
do {
|
||||
int left, top, right, bottom;
|
||||
PageIteratorLevel im_level = level;
|
||||
@ -81,8 +74,8 @@ class ResultIteratorTest : public testing::Test {
|
||||
im_level = tesseract::RIL_BLOCK;
|
||||
EXPECT_TRUE(it->BoundingBox(im_level, &left, &top, &right, &bottom));
|
||||
}
|
||||
VLOG(1) << "BBox: [L:" << left << ", T:" << top
|
||||
<< ", R:" << right << ", B:" << bottom << "]";
|
||||
VLOG(1) << "BBox: [L:" << left << ", T:" << top << ", R:" << right
|
||||
<< ", B:" << bottom << "]";
|
||||
Pix* block_pix;
|
||||
if (depth == 1) {
|
||||
block_pix = it->GetBinaryImage(im_level);
|
||||
@ -90,9 +83,9 @@ class ResultIteratorTest : public testing::Test {
|
||||
PIX_SRC ^ PIX_DST, block_pix, 0, 0);
|
||||
} else {
|
||||
block_pix = it->GetImage(im_level, 2, src_pix_, &left, &top);
|
||||
pixRasterop(pix, left, top,
|
||||
pixGetWidth(block_pix), pixGetHeight(block_pix),
|
||||
PIX_SRC & PIX_DST, block_pix, 0, 0);
|
||||
pixRasterop(pix, left, top, pixGetWidth(block_pix),
|
||||
pixGetHeight(block_pix), PIX_SRC & PIX_DST, block_pix, 0,
|
||||
0);
|
||||
}
|
||||
CHECK(block_pix != nullptr);
|
||||
pixDestroy(&block_pix);
|
||||
@ -123,8 +116,7 @@ class ResultIteratorTest : public testing::Test {
|
||||
|
||||
// Rebuilds the text from the iterator strings at the given level, and
|
||||
// EXPECTs that the rebuild string exactly matches the truth string.
|
||||
void VerifyIteratorText(const string& truth,
|
||||
PageIteratorLevel level,
|
||||
void VerifyIteratorText(const string& truth, PageIteratorLevel level,
|
||||
ResultIterator* it) {
|
||||
VLOG(1) << "Text Test Level " << level;
|
||||
it->Begin();
|
||||
@ -132,7 +124,7 @@ class ResultIteratorTest : public testing::Test {
|
||||
do {
|
||||
char* text = it->GetUTF8Text(level);
|
||||
result += text;
|
||||
delete [] text;
|
||||
delete[] text;
|
||||
if ((level == tesseract::RIL_WORD || level == tesseract::RIL_SYMBOL) &&
|
||||
it->IsAtFinalElement(tesseract::RIL_WORD, level)) {
|
||||
if (it->IsAtFinalElement(tesseract::RIL_TEXTLINE, level)) {
|
||||
@ -140,8 +132,7 @@ class ResultIteratorTest : public testing::Test {
|
||||
} else {
|
||||
result += ' ';
|
||||
}
|
||||
if (it->IsAtFinalElement(tesseract::RIL_PARA, level))
|
||||
result += '\n';
|
||||
if (it->IsAtFinalElement(tesseract::RIL_PARA, level)) result += '\n';
|
||||
}
|
||||
} while (it->Next(level));
|
||||
EXPECT_STREQ(truth.c_str(), result.c_str())
|
||||
@ -170,9 +161,10 @@ class ResultIteratorTest : public testing::Test {
|
||||
// expected output reading order
|
||||
// (expected_reading_order[num_reading_order_entries]) and a given reading
|
||||
// context (ltr or rtl).
|
||||
void ExpectTextlineReadingOrder(
|
||||
bool in_ltr_context, StrongScriptDirection *word_dirs, int num_words,
|
||||
int *expected_reading_order, int num_reading_order_entries) const {
|
||||
void ExpectTextlineReadingOrder(bool in_ltr_context,
|
||||
StrongScriptDirection* word_dirs,
|
||||
int num_words, int* expected_reading_order,
|
||||
int num_reading_order_entries) const {
|
||||
GenericVector<StrongScriptDirection> gv_word_dirs;
|
||||
for (int i = 0; i < num_words; i++) {
|
||||
gv_word_dirs.push_back(word_dirs[i]);
|
||||
@ -195,7 +187,7 @@ class ResultIteratorTest : public testing::Test {
|
||||
// Sane means that the output contains some permutation of the indices
|
||||
// 0..[num_words - 1] interspersed optionally with negative (marker) values.
|
||||
void VerifySaneTextlineOrder(bool in_ltr_context,
|
||||
StrongScriptDirection *word_dirs,
|
||||
StrongScriptDirection* word_dirs,
|
||||
int num_words) const {
|
||||
GenericVector<StrongScriptDirection> gv_word_dirs;
|
||||
for (int i = 0; i < num_words; i++) {
|
||||
@ -235,7 +227,6 @@ class ResultIteratorTest : public testing::Test {
|
||||
tesseract::TessBaseAPI api_;
|
||||
};
|
||||
|
||||
|
||||
// Tests layout analysis output (and scrollview) on the UNLV page numbered
|
||||
// 8087_054.3G.tif. (Dubrovnik), but only if --visual_test is true.
|
||||
TEST_F(ResultIteratorTest, VisualTest) {
|
||||
@ -249,8 +240,8 @@ TEST_F(ResultIteratorTest, VisualTest) {
|
||||
// Make a scrollview window for the display.
|
||||
int width = pixGetWidth(src_pix_);
|
||||
int height = pixGetHeight(src_pix_);
|
||||
ScrollView* win = new ScrollView(kIms[i], 100, 100,
|
||||
width / 2, height / 2, width, height);
|
||||
ScrollView* win =
|
||||
new ScrollView(kIms[i], 100, 100, width / 2, height / 2, width, height);
|
||||
win->Image(src_pix_, 0, 0);
|
||||
it->Begin();
|
||||
ScrollView::Color color = ScrollView::RED;
|
||||
@ -296,7 +287,7 @@ TEST_F(ResultIteratorTest, EasyTest) {
|
||||
|
||||
char* result = api_.GetUTF8Text();
|
||||
ocr_text_ = result;
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
ResultIterator* r_it = api_.GetIterator();
|
||||
// The images should rebuild almost perfectly.
|
||||
LOG(INFO) << "Verifying image rebuilds 2a (resultiterator)";
|
||||
@ -330,15 +321,15 @@ TEST_F(ResultIteratorTest, EasyTest) {
|
||||
do {
|
||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||
int pointsize, font_id;
|
||||
const char* font = r_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||
&monospace, &serif, &smallcaps,
|
||||
&pointsize, &font_id);
|
||||
const char* font =
|
||||
r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
|
||||
&serif, &smallcaps, &pointsize, &font_id);
|
||||
float confidence = r_it->Confidence(tesseract::RIL_WORD);
|
||||
EXPECT_GE(confidence, 80.0f);
|
||||
char* word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
|
||||
VLOG(1) << StringPrintf("Word %s in font %s, id %d, size %d, conf %g",
|
||||
word_str, font, font_id, pointsize, confidence);
|
||||
delete [] word_str;
|
||||
delete[] word_str;
|
||||
EXPECT_FALSE(bold);
|
||||
EXPECT_FALSE(italic);
|
||||
EXPECT_FALSE(underlined);
|
||||
@ -379,7 +370,7 @@ TEST_F(ResultIteratorTest, GreyTest) {
|
||||
TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
|
||||
SetImage("8071_093.3B.tif");
|
||||
char* result = api_.GetUTF8Text();
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
ResultIterator* r_it = api_.GetIterator();
|
||||
// Iterate over the words.
|
||||
int found_dropcaps = 0;
|
||||
@ -388,26 +379,23 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
|
||||
do {
|
||||
bool bold, italic, underlined, monospace, serif, smallcaps;
|
||||
int pointsize, font_id;
|
||||
r_it->WordFontAttributes(&bold, &italic, &underlined,
|
||||
&monospace, &serif, &smallcaps,
|
||||
&pointsize, &font_id);
|
||||
r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
|
||||
&smallcaps, &pointsize, &font_id);
|
||||
char* word_str = r_it->GetUTF8Text(tesseract::RIL_WORD);
|
||||
if (word_str != NULL) {
|
||||
VLOG(1) << StringPrintf("Word %s is %s",
|
||||
word_str, smallcaps ? "Smallcaps" : "Normal");
|
||||
VLOG(1) << StringPrintf("Word %s is %s", word_str,
|
||||
smallcaps ? "Smallcaps" : "Normal");
|
||||
if (r_it->SymbolIsDropcap()) {
|
||||
++found_dropcaps;
|
||||
}
|
||||
if (strcmp(word_str, "SHE") == 0 ||
|
||||
strcmp(word_str, "MOPED") == 0 ||
|
||||
if (strcmp(word_str, "SHE") == 0 || strcmp(word_str, "MOPED") == 0 ||
|
||||
strcmp(word_str, "RALPH") == 0 ||
|
||||
strcmp(word_str, "KINNEY") == 0 || // Not working yet.
|
||||
strcmp(word_str, "BENNETT") == 0) {
|
||||
EXPECT_TRUE(smallcaps) << word_str;
|
||||
++found_smallcaps;
|
||||
} else {
|
||||
if (smallcaps)
|
||||
++false_positives;
|
||||
if (smallcaps) ++false_positives;
|
||||
}
|
||||
// No symbol other than the first of any word should be dropcap.
|
||||
ResultIterator s_it(*r_it);
|
||||
@ -415,13 +403,13 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) {
|
||||
!s_it.IsAtBeginningOf(tesseract::RIL_WORD)) {
|
||||
if (s_it.SymbolIsDropcap()) {
|
||||
char* sym_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
|
||||
LOG(ERROR) << StringPrintf("Symbol %s of word %s is dropcap",
|
||||
sym_str, word_str);
|
||||
delete [] sym_str;
|
||||
LOG(ERROR) << StringPrintf("Symbol %s of word %s is dropcap", sym_str,
|
||||
word_str);
|
||||
delete[] sym_str;
|
||||
}
|
||||
EXPECT_FALSE(s_it.SymbolIsDropcap());
|
||||
}
|
||||
delete [] word_str;
|
||||
delete[] word_str;
|
||||
}
|
||||
} while (r_it->Next(tesseract::RIL_WORD));
|
||||
delete r_it;
|
||||
@ -486,12 +474,13 @@ static const StrongScriptDirection dZ = DIR_MIX;
|
||||
// interpreted appropriately in different contexts.
|
||||
TEST_F(ResultIteratorTest, DualStartTextlineOrderTest) {
|
||||
StrongScriptDirection word_dirs[] = {dL, dL, dN, dL, dN, dR, dR, dR};
|
||||
int reading_order_rtl_context[] = {
|
||||
7, 6, 5, 4, ResultIterator::kMinorRunStart, 0, 1, 2, 3,
|
||||
ResultIterator::kMinorRunEnd};
|
||||
int reading_order_ltr_context[] = {
|
||||
0, 1, 2, 3, 4, ResultIterator::kMinorRunStart, 7, 6, 5,
|
||||
ResultIterator::kMinorRunEnd};
|
||||
int reading_order_rtl_context[] = {7, 6, 5, 4, ResultIterator::kMinorRunStart,
|
||||
0, 1, 2, 3, ResultIterator::kMinorRunEnd};
|
||||
int reading_order_ltr_context[] = {0, 1,
|
||||
2, 3,
|
||||
4, ResultIterator::kMinorRunStart,
|
||||
7, 6,
|
||||
5, ResultIterator::kMinorRunEnd};
|
||||
|
||||
ExpectTextlineReadingOrder(true, word_dirs, ABSL_ARRAYSIZE(word_dirs),
|
||||
reading_order_ltr_context,
|
||||
@ -510,8 +499,8 @@ TEST_F(ResultIteratorTest, LeftwardTextlineOrderTest) {
|
||||
// In the strange event that this shows up in an RTL paragraph, nonetheless
|
||||
// just presume the whole thing is an LTR line.
|
||||
int reading_order_rtl_context[] = {
|
||||
ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
ResultIterator::kMinorRunEnd};
|
||||
ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7,
|
||||
ResultIterator::kMinorRunEnd};
|
||||
|
||||
ExpectTextlineReadingOrder(true, word_dirs, ABSL_ARRAYSIZE(word_dirs),
|
||||
reading_order_ltr_context,
|
||||
@ -553,7 +542,7 @@ TEST_F(ResultIteratorTest, TextlineOrderSanityCheck) {
|
||||
TEST_F(ResultIteratorTest, NonNullChoicesTest) {
|
||||
SetImage("5318c4b679264.jpg");
|
||||
char* result = api_.GetUTF8Text();
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
ResultIterator* r_it = api_.GetIterator();
|
||||
// Iterate over the words.
|
||||
do {
|
||||
@ -571,10 +560,10 @@ TEST_F(ResultIteratorTest, NonNullChoicesTest) {
|
||||
VLOG(1) << "Char choice " << char_str;
|
||||
CHECK(char_str != nullptr);
|
||||
} while (c_it.Next());
|
||||
} while (!s_it.IsAtFinalElement(tesseract::RIL_WORD,
|
||||
tesseract::RIL_SYMBOL) &&
|
||||
s_it.Next(tesseract::RIL_SYMBOL));
|
||||
delete [] word_str;
|
||||
} while (
|
||||
!s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) &&
|
||||
s_it.Next(tesseract::RIL_SYMBOL));
|
||||
delete[] word_str;
|
||||
}
|
||||
} while (r_it->Next(tesseract::RIL_WORD));
|
||||
delete r_it;
|
||||
@ -586,7 +575,7 @@ TEST_F(ResultIteratorTest, NonNullConfidencesTest) {
|
||||
// Force recognition so we can used the result iterator.
|
||||
// We don't care about the return from GetUTF8Text.
|
||||
char* result = api_.GetUTF8Text();
|
||||
delete [] result;
|
||||
delete[] result;
|
||||
ResultIterator* r_it = api_.GetIterator();
|
||||
// Iterate over the words.
|
||||
do {
|
||||
@ -599,13 +588,13 @@ TEST_F(ResultIteratorTest, NonNullConfidencesTest) {
|
||||
const char* char_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL);
|
||||
CHECK(char_str != nullptr);
|
||||
float confidence = s_it.Confidence(tesseract::RIL_SYMBOL);
|
||||
VLOG(1) << StringPrintf("Char %s has confidence %g\n",
|
||||
char_str, confidence);
|
||||
delete [] char_str;
|
||||
} while (!s_it.IsAtFinalElement(tesseract::RIL_WORD,
|
||||
tesseract::RIL_SYMBOL) &&
|
||||
s_it.Next(tesseract::RIL_SYMBOL));
|
||||
delete [] word_str;
|
||||
VLOG(1) << StringPrintf("Char %s has confidence %g\n", char_str,
|
||||
confidence);
|
||||
delete[] char_str;
|
||||
} while (
|
||||
!s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) &&
|
||||
s_it.Next(tesseract::RIL_SYMBOL));
|
||||
delete[] word_str;
|
||||
} else {
|
||||
VLOG(1) << "Empty word found";
|
||||
}
|
||||
|
@ -7,8 +7,7 @@ namespace {
|
||||
class ScanutilsTest : public ::testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
};
|
||||
|
||||
@ -32,15 +31,13 @@ TEST_F(ScanutilsTest, DoesScanf) {
|
||||
int r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]);
|
||||
int r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]);
|
||||
EXPECT_EQ(r1, r2);
|
||||
for (int i = 0; i < kNumFloats; ++i)
|
||||
EXPECT_FLOAT_EQ(f1[i], f2[i]);
|
||||
for (int i = 0; i < kNumFloats; ++i) EXPECT_FLOAT_EQ(f1[i], f2[i]);
|
||||
const int kNumInts = 5;
|
||||
int i1[kNumInts], i2[kNumInts];
|
||||
r1 = fscanf(fp1, "%d %d %d %d %i", &i1[0], &i1[1], &i1[2], &i1[3], &i1[4]);
|
||||
r2 = tfscanf(fp2, "%d %d %d %d %i", &i2[0], &i2[1], &i2[2], &i2[3], &i2[4]);
|
||||
EXPECT_EQ(r1, r2);
|
||||
for (int i = 0; i < kNumInts; ++i)
|
||||
EXPECT_EQ(i1[i], i2[i]);
|
||||
for (int i = 0; i < kNumInts; ++i) EXPECT_EQ(i1[i], i2[i]);
|
||||
const int kStrLen = 1024;
|
||||
char s1[kStrLen];
|
||||
char s2[kStrLen];
|
||||
@ -68,11 +65,10 @@ TEST_F(ScanutilsTest, DoesScanf) {
|
||||
r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]);
|
||||
r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]);
|
||||
EXPECT_EQ(r1, r2);
|
||||
for (int i = 0; i < kNumFloats; ++i)
|
||||
EXPECT_FLOAT_EQ(f1[i], f2[i]);
|
||||
for (int i = 0; i < kNumFloats; ++i) EXPECT_FLOAT_EQ(f1[i], f2[i]);
|
||||
// Test the * for field suppression.
|
||||
r1 = fscanf(fp1, "%d %*s %*d %*f %*f", &i1[0]);
|
||||
r2 = tfscanf(fp2,"%d %*s %*d %*f %*f", &i2[0]);
|
||||
r2 = tfscanf(fp2, "%d %*s %*d %*f %*f", &i2[0]);
|
||||
EXPECT_EQ(r1, r2);
|
||||
EXPECT_EQ(i1[0], i2[0]);
|
||||
// We should still see the next value and no phantoms.
|
||||
@ -84,4 +80,3 @@ TEST_F(ScanutilsTest, DoesScanf) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -39,8 +39,7 @@ static void Expect352(int font_id, const Shape& shape) {
|
||||
}
|
||||
|
||||
// The fixture for testing Shape.
|
||||
class ShapeTest : public testing::Test {
|
||||
};
|
||||
class ShapeTest : public testing::Test {};
|
||||
|
||||
// Tests that a Shape works as expected for all the basic functions.
|
||||
TEST_F(ShapeTest, BasicTest) {
|
||||
@ -97,8 +96,7 @@ TEST_F(ShapeTest, AddShapeTest) {
|
||||
}
|
||||
|
||||
// The fixture for testing Shape.
|
||||
class ShapeTableTest : public testing::Test {
|
||||
};
|
||||
class ShapeTableTest : public testing::Test {};
|
||||
|
||||
// Tests that a Shape works as expected for all the basic functions.
|
||||
TEST_F(ShapeTableTest, FullTest) {
|
||||
@ -148,5 +146,3 @@ TEST_F(ShapeTableTest, FullTest) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
@ -9,15 +9,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "statistc.h"
|
||||
#include "genericvector.h"
|
||||
#include "kdpair.h"
|
||||
#include "statistc.h"
|
||||
|
||||
#include "include_gunit.h"
|
||||
|
||||
namespace {
|
||||
|
||||
const int kTestData[] = { 2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1 };
|
||||
const int kTestData[] = {2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1};
|
||||
|
||||
class STATSTest : public testing::Test {
|
||||
public:
|
||||
@ -27,8 +27,7 @@ class STATSTest : public testing::Test {
|
||||
stats_.add(i, kTestData[i]);
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
}
|
||||
void TearDown() {}
|
||||
|
||||
STATS stats_;
|
||||
};
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include "tesseract/lstm/stridemap.h"
|
||||
|
||||
using tesseract::FlexDimensions;
|
||||
using tesseract::FD_BATCH;
|
||||
using tesseract::FD_HEIGHT;
|
||||
using tesseract::FD_WIDTH;
|
||||
using tesseract::FlexDimensions;
|
||||
using tesseract::StrideMap;
|
||||
|
||||
namespace {
|
||||
@ -104,8 +104,8 @@ TEST_F(StridemapTest, Scaling) {
|
||||
|
||||
// Scale x by 2, keeping y the same.
|
||||
std::vector<int> values_x2 = {0, 1, 4, 5, 8, 9, 12, 13, 17, 18,
|
||||
22, 23, 27, 28, 32, 33, 36, 37, 40, 41,
|
||||
44, 45, 48, 49, 53, 54, 58, 59};
|
||||
22, 23, 27, 28, 32, 33, 36, 37, 40, 41,
|
||||
44, 45, 48, 49, 53, 54, 58, 59};
|
||||
StrideMap test_map(stride_map);
|
||||
test_map.ScaleXY(2, 1);
|
||||
StrideMap::Index index(test_map);
|
||||
@ -121,8 +121,8 @@ TEST_F(StridemapTest, Scaling) {
|
||||
test_map = stride_map;
|
||||
// Scale y by 2, keeping x the same.
|
||||
std::vector<int> values_y2 = {0, 1, 2, 3, 12, 13, 14, 15, 16,
|
||||
17, 18, 19, 20, 21, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 48, 49, 50, 51, 52};
|
||||
17, 18, 19, 20, 21, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 48, 49, 50, 51, 52};
|
||||
test_map.ScaleXY(1, 2);
|
||||
index.InitToFirst();
|
||||
pos = 0;
|
||||
@ -151,7 +151,7 @@ TEST_F(StridemapTest, Scaling) {
|
||||
test_map = stride_map;
|
||||
// Reduce Width to 1.
|
||||
std::vector<int> values_x_to_1 = {0, 4, 8, 12, 17, 22, 27,
|
||||
32, 36, 40, 44, 48, 53, 58};
|
||||
32, 36, 40, 44, 48, 53, 58};
|
||||
test_map.ReduceWidthTo1();
|
||||
index.InitToFirst();
|
||||
pos = 0;
|
||||
|
@ -23,9 +23,9 @@ namespace {
|
||||
const char kEngText[] = "the quick brown fox jumps over the lazy dog";
|
||||
const char kHinText[] = "पिताने विवाह की | हो गई उद्विग्न वह सोचा";
|
||||
|
||||
const char kKorText[] =
|
||||
"이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는";
|
||||
const char kArabicText[] = "والفكر والصراع ، بالتأمل والفهم والتحليل ، "
|
||||
const char kKorText[] = "이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는";
|
||||
const char kArabicText[] =
|
||||
"والفكر والصراع ، بالتأمل والفهم والتحليل ، "
|
||||
"بالعلم والفن ، وأخيرا بالضحك أوبالبكاء ، ";
|
||||
const char kMixedText[] = "والفكر 123 والصراع abc";
|
||||
|
||||
@ -40,8 +40,7 @@ class StringRendererTest : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase() {
|
||||
l_chooseDisplayProg(L_DISPLAY_WITH_XZGV);
|
||||
FLAGS_fonts_dir = file::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fonts_dir = file::JoinPath(FLAGS_test_srcdir, "testdata");
|
||||
FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir;
|
||||
FLAGS_use_only_legacy_fonts = false;
|
||||
// Needed for reliable heapchecking of pango layout structures.
|
||||
@ -66,7 +65,7 @@ class StringRendererTest : public ::testing::Test {
|
||||
|
||||
TEST_F(StringRendererTest, DoesRenderToImage) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kEngText),
|
||||
renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
|
||||
EXPECT_TRUE(pix != NULL);
|
||||
@ -112,7 +111,7 @@ TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) {
|
||||
// Underline all words but NOT intervening spaces.
|
||||
renderer_->set_underline_start_prob(1.0);
|
||||
renderer_->set_underline_continuation_prob(0);
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kEngText),
|
||||
renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
|
||||
EXPECT_TRUE(pix != NULL);
|
||||
@ -147,7 +146,7 @@ TEST_F(StringRendererTest, DoesHandleNewlineCharacters) {
|
||||
const char kRawText[] = "\n\n\n A \nB \nC \n\n\n";
|
||||
const char kStrippedText[] = " A B C "; // text with newline chars removed
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kRawText),
|
||||
renderer_->RenderToImage(kRawText, strlen(kRawText), &pix));
|
||||
EXPECT_TRUE(pix != NULL);
|
||||
@ -167,9 +166,9 @@ TEST_F(StringRendererTest, DoesRenderLigatures) {
|
||||
const char kArabicLigature[] = "لا";
|
||||
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kArabicLigature),
|
||||
renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature),
|
||||
&pix));
|
||||
EXPECT_EQ(
|
||||
strlen(kArabicLigature),
|
||||
renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix));
|
||||
EXPECT_TRUE(pix != NULL);
|
||||
EXPECT_GT(renderer_->GetBoxes().size(), 0);
|
||||
const std::vector<BoxChar*>& boxes = renderer_->GetBoxes();
|
||||
@ -186,12 +185,10 @@ TEST_F(StringRendererTest, DoesRenderLigatures) {
|
||||
pixDestroy(&pix);
|
||||
}
|
||||
|
||||
|
||||
static int FindBoxCharXCoord(const std::vector<BoxChar*>& boxchars,
|
||||
const string& ch) {
|
||||
for (int i = 0; i < boxchars.size(); ++i) {
|
||||
if (boxchars[i]->ch() == ch)
|
||||
return boxchars[i]->box()->x;
|
||||
if (boxchars[i]->ch() == ch) return boxchars[i]->box()->x;
|
||||
}
|
||||
return kint32max;
|
||||
}
|
||||
@ -223,14 +220,14 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) {
|
||||
|
||||
TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) {
|
||||
renderer_.reset(new StringRenderer("Arab 10", 600, 600));
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
// Arabic letters should be in decreasing x-coordinates
|
||||
const char kArabicWord[] = "والفكر";
|
||||
renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix);
|
||||
EXPECT_GT(renderer_->GetBoxes().size(), 0);
|
||||
const std::vector<BoxChar*>& boxchars = renderer_->GetBoxes();
|
||||
for (int i = 0; i < boxchars.size() - 1; ++i) {
|
||||
EXPECT_GT(boxchars[i]->box()->x, boxchars[i+1]->box()->x)
|
||||
EXPECT_GT(boxchars[i]->box()->x, boxchars[i + 1]->box()->x)
|
||||
<< boxchars[i]->ch();
|
||||
}
|
||||
pixDestroy(&pix);
|
||||
@ -241,7 +238,7 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) {
|
||||
renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix);
|
||||
EXPECT_EQ(boxchars.size(), strlen(kEnglishWord));
|
||||
for (int i = 0; i < boxchars.size() - 1; ++i) {
|
||||
EXPECT_LT(boxchars[i]->box()->x, boxchars[i+1]->box()->x)
|
||||
EXPECT_LT(boxchars[i]->box()->x, boxchars[i + 1]->box()->x)
|
||||
<< boxchars[i]->ch();
|
||||
}
|
||||
pixDestroy(&pix);
|
||||
@ -255,7 +252,6 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) {
|
||||
pixDestroy(&pix);
|
||||
}
|
||||
|
||||
|
||||
TEST_F(StringRendererTest, DoesRenderVerticalText) {
|
||||
Pix* pix = NULL;
|
||||
renderer_.reset(new StringRenderer("UnBatang 10", 600, 600));
|
||||
@ -271,7 +267,7 @@ TEST_F(StringRendererTest, DoesRenderVerticalText) {
|
||||
// appropriate page numbers.
|
||||
TEST_F(StringRendererTest, DoesKeepAllImageBoxes) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
int num_boxes_per_page = 0;
|
||||
const int kNumTrials = 2;
|
||||
for (int i = 0; i < kNumTrials; ++i) {
|
||||
@ -283,9 +279,10 @@ TEST_F(StringRendererTest, DoesKeepAllImageBoxes) {
|
||||
if (!num_boxes_per_page) {
|
||||
num_boxes_per_page = renderer_->GetBoxes().size();
|
||||
} else {
|
||||
EXPECT_EQ((i+1) * num_boxes_per_page, renderer_->GetBoxes().size());
|
||||
EXPECT_EQ((i + 1) * num_boxes_per_page, renderer_->GetBoxes().size());
|
||||
}
|
||||
for (int j = i * num_boxes_per_page; j < (i+1) * num_boxes_per_page; ++j) {
|
||||
for (int j = i * num_boxes_per_page; j < (i + 1) * num_boxes_per_page;
|
||||
++j) {
|
||||
EXPECT_EQ(i, renderer_->GetBoxes()[j]->page());
|
||||
}
|
||||
}
|
||||
@ -293,7 +290,7 @@ TEST_F(StringRendererTest, DoesKeepAllImageBoxes) {
|
||||
|
||||
TEST_F(StringRendererTest, DoesClearBoxes) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kEngText),
|
||||
renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
|
||||
pixDestroy(&pix);
|
||||
@ -310,7 +307,7 @@ TEST_F(StringRendererTest, DoesClearBoxes) {
|
||||
TEST_F(StringRendererTest, DoesLigatureTextForRendering) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
renderer_->set_add_ligatures(true);
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kEngNonLigatureText),
|
||||
renderer_->RenderToImage(kEngNonLigatureText,
|
||||
strlen(kEngNonLigatureText), &pix));
|
||||
@ -323,7 +320,7 @@ TEST_F(StringRendererTest, DoesLigatureTextForRendering) {
|
||||
|
||||
TEST_F(StringRendererTest, DoesRetainInputLigatureForRendering) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kEngLigatureText),
|
||||
renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText),
|
||||
&pix));
|
||||
@ -346,7 +343,7 @@ TEST_F(StringRendererTest, DoesStripUnrenderableWords) {
|
||||
TEST_F(StringRendererTest, DoesRenderWordBoxes) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
renderer_->set_output_word_boxes(true);
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
EXPECT_EQ(strlen(kEngText),
|
||||
renderer_->RenderToImage(kEngText, strlen(kEngText), &pix));
|
||||
pixDestroy(&pix);
|
||||
@ -369,7 +366,7 @@ TEST_F(StringRendererTest, DoesRenderWordBoxes) {
|
||||
TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) {
|
||||
renderer_.reset(new StringRenderer("Verdana 10", 600, 600));
|
||||
renderer_->set_output_word_boxes(true);
|
||||
Pix *pix = NULL;
|
||||
Pix* pix = NULL;
|
||||
const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog";
|
||||
EXPECT_EQ(strlen(kMultlineText),
|
||||
renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix));
|
||||
@ -398,9 +395,8 @@ TEST_F(StringRendererTest, DoesRenderAllFontsToImage) {
|
||||
do {
|
||||
Pix* pix = NULL;
|
||||
font_used.clear();
|
||||
offset += renderer_->RenderAllFontsToImage(1.0, kEngText + offset,
|
||||
strlen(kEngText + offset),
|
||||
&font_used, &pix);
|
||||
offset += renderer_->RenderAllFontsToImage(
|
||||
1.0, kEngText + offset, strlen(kEngText + offset), &font_used, &pix);
|
||||
if (offset < strlen(kEngText)) {
|
||||
EXPECT_TRUE(pix != NULL);
|
||||
EXPECT_STRNE("", font_used.c_str());
|
||||
@ -432,7 +428,8 @@ TEST_F(StringRendererTest, DoesDropUncoveredChars) {
|
||||
const string kWord = "office";
|
||||
const string kCleanWord = "oice";
|
||||
Pix* pix = NULL;
|
||||
EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
|
||||
EXPECT_FALSE(
|
||||
renderer_->font().CanRenderString(kWord.c_str(), kWord.length()));
|
||||
EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length()));
|
||||
int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix);
|
||||
pixDestroy(&pix);
|
||||
|
@ -25,14 +25,14 @@ namespace {
|
||||
|
||||
class TestableTableFinder : public tesseract::TableFinder {
|
||||
public:
|
||||
using TableFinder::set_global_median_xheight;
|
||||
using TableFinder::set_global_median_blob_width;
|
||||
using TableFinder::set_global_median_ledding;
|
||||
using TableFinder::GapInXProjection;
|
||||
using TableFinder::HasLeaderAdjacent;
|
||||
using TableFinder::InsertLeaderPartition;
|
||||
using TableFinder::InsertTextPartition;
|
||||
using TableFinder::set_global_median_blob_width;
|
||||
using TableFinder::set_global_median_ledding;
|
||||
using TableFinder::set_global_median_xheight;
|
||||
using TableFinder::SplitAndInsertFragmentedTextPartition;
|
||||
using TableFinder::HasLeaderAdjacent;
|
||||
|
||||
void ExpectPartition(const TBOX& box) {
|
||||
tesseract::ColPartitionGridSearch gsearch(&fragmented_text_grid_);
|
||||
@ -75,8 +75,7 @@ class TableFinderTest : public testing::Test {
|
||||
}
|
||||
|
||||
void TearDown() {
|
||||
if (partition_.get() != NULL)
|
||||
partition_->DeleteBoxes();
|
||||
if (partition_.get() != NULL) partition_->DeleteBoxes();
|
||||
DeletePartitionListBoxes();
|
||||
finder_.reset(NULL);
|
||||
}
|
||||
@ -87,12 +86,11 @@ class TableFinderTest : public testing::Test {
|
||||
|
||||
void MakePartition(int x_min, int y_min, int x_max, int y_max,
|
||||
int first_column, int last_column) {
|
||||
if (partition_.get() != NULL)
|
||||
partition_->DeleteBoxes();
|
||||
if (partition_.get() != NULL) partition_->DeleteBoxes();
|
||||
TBOX box;
|
||||
box.set_to_given_coords(x_min, y_min, x_max, y_max);
|
||||
partition_.reset(ColPartition::FakePartition(box, PT_UNKNOWN,
|
||||
BRT_UNKNOWN, BTFT_NONE));
|
||||
partition_.reset(
|
||||
ColPartition::FakePartition(box, PT_UNKNOWN, BRT_UNKNOWN, BTFT_NONE));
|
||||
partition_->set_first_column(first_column);
|
||||
partition_->set_last_column(last_column);
|
||||
}
|
||||
@ -119,8 +117,7 @@ class TableFinderTest : public testing::Test {
|
||||
}
|
||||
|
||||
void DeletePartitionListBoxes() {
|
||||
for (free_boxes_it_.mark_cycle_pt();
|
||||
!free_boxes_it_.cycled_list();
|
||||
for (free_boxes_it_.mark_cycle_pt(); !free_boxes_it_.cycled_list();
|
||||
free_boxes_it_.forward()) {
|
||||
ColPartition* part = free_boxes_it_.data();
|
||||
part->DeleteBoxes();
|
||||
@ -137,30 +134,23 @@ class TableFinderTest : public testing::Test {
|
||||
|
||||
TEST_F(TableFinderTest, GapInXProjectionNoGap) {
|
||||
int data[100];
|
||||
for (int i = 0; i < 100; ++i)
|
||||
data[i] = 10;
|
||||
for (int i = 0; i < 100; ++i) data[i] = 10;
|
||||
EXPECT_FALSE(finder_->GapInXProjection(data, 100));
|
||||
}
|
||||
|
||||
TEST_F(TableFinderTest, GapInXProjectionEdgeGap) {
|
||||
int data[100];
|
||||
for (int i = 0; i < 10; ++i)
|
||||
data[i] = 2;
|
||||
for (int i = 10; i < 90; ++i)
|
||||
data[i] = 10;
|
||||
for (int i = 90; i < 100; ++i)
|
||||
data[i] = 2;
|
||||
for (int i = 0; i < 10; ++i) data[i] = 2;
|
||||
for (int i = 10; i < 90; ++i) data[i] = 10;
|
||||
for (int i = 90; i < 100; ++i) data[i] = 2;
|
||||
EXPECT_FALSE(finder_->GapInXProjection(data, 100));
|
||||
}
|
||||
|
||||
TEST_F(TableFinderTest, GapInXProjectionExists) {
|
||||
int data[100];
|
||||
for (int i = 0; i < 10; ++i)
|
||||
data[i] = 10;
|
||||
for (int i = 10; i < 90; ++i)
|
||||
data[i] = 2;
|
||||
for (int i = 90; i < 100; ++i)
|
||||
data[i] = 10;
|
||||
for (int i = 0; i < 10; ++i) data[i] = 10;
|
||||
for (int i = 10; i < 90; ++i) data[i] = 2;
|
||||
for (int i = 90; i < 100; ++i) data[i] = 10;
|
||||
EXPECT_TRUE(finder_->GapInXProjection(data, 100));
|
||||
}
|
||||
|
||||
@ -216,18 +206,18 @@ TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass) {
|
||||
all->set_right_margin(100);
|
||||
TBOX blob_box = part_box;
|
||||
for (int i = 10; i <= 20; i += 5) {
|
||||
blob_box.set_left(i+1);
|
||||
blob_box.set_right(i+4);
|
||||
blob_box.set_left(i + 1);
|
||||
blob_box.set_right(i + 4);
|
||||
all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
|
||||
}
|
||||
for (int i = 35; i <= 55; i += 5) {
|
||||
blob_box.set_left(i+1);
|
||||
blob_box.set_right(i+4);
|
||||
blob_box.set_left(i + 1);
|
||||
blob_box.set_right(i + 4);
|
||||
all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
|
||||
}
|
||||
for (int i = 80; i <= 95; i += 5) {
|
||||
blob_box.set_left(i+1);
|
||||
blob_box.set_right(i+4);
|
||||
blob_box.set_left(i + 1);
|
||||
blob_box.set_right(i + 4);
|
||||
all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
|
||||
}
|
||||
// TODO(nbeato): Ray's newer code...
|
||||
@ -256,8 +246,8 @@ TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail) {
|
||||
all->set_right_margin(100);
|
||||
TBOX blob_box = part_box;
|
||||
for (int i = 10; i <= 95; i += 5) {
|
||||
blob_box.set_left(i+1);
|
||||
blob_box.set_right(i+4);
|
||||
blob_box.set_left(i + 1);
|
||||
blob_box.set_right(i + 4);
|
||||
all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box)));
|
||||
}
|
||||
// TODO(nbeato): Ray's newer code...
|
||||
|
@ -34,8 +34,8 @@ class TestableTableRecognizer : public tesseract::TableRecognizer {
|
||||
|
||||
class TestableStructuredTable : public tesseract::StructuredTable {
|
||||
public:
|
||||
using StructuredTable::CountVerticalIntersections;
|
||||
using StructuredTable::CountHorizontalIntersections;
|
||||
using StructuredTable::CountVerticalIntersections;
|
||||
using StructuredTable::FindLinedStructure;
|
||||
using StructuredTable::FindWhitespacedColumns;
|
||||
using StructuredTable::FindWhitespacedStructure;
|
||||
@ -51,11 +51,11 @@ class TestableStructuredTable : public tesseract::StructuredTable {
|
||||
}
|
||||
|
||||
void ExpectCellX(int x_min, int second, int add, int almost_done, int x_max) {
|
||||
ASSERT_EQ(0, (almost_done - second) % add);
|
||||
ASSERT_EQ(0, (almost_done - second) % add);
|
||||
EXPECT_EQ(3 + (almost_done - second) / add, cell_x_.length());
|
||||
EXPECT_EQ(x_min, cell_x_.get(0));
|
||||
EXPECT_EQ(x_max, cell_x_.get(cell_x_.length() - 1));
|
||||
for (int i = 1; i < cell_x_.length() - 1; ++i) {
|
||||
for (int i = 1; i < cell_x_.length() - 1; ++i) {
|
||||
EXPECT_EQ(second + add * (i - 1), cell_x_.get(i));
|
||||
}
|
||||
}
|
||||
@ -63,7 +63,7 @@ class TestableStructuredTable : public tesseract::StructuredTable {
|
||||
void ExpectSortedX() {
|
||||
EXPECT_GT(cell_x_.length(), 0);
|
||||
for (int i = 1; i < cell_x_.length(); ++i) {
|
||||
EXPECT_LT(cell_x_.get(i-1), cell_x_.get(i));
|
||||
EXPECT_LT(cell_x_.get(i - 1), cell_x_.get(i));
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -92,8 +92,8 @@ class SharedTest : public testing::Test {
|
||||
|
||||
void InsertPartition(int left, int bottom, int right, int top) {
|
||||
TBOX box(left, bottom, right, top);
|
||||
ColPartition* part = ColPartition::FakePartition(box, PT_FLOWING_TEXT,
|
||||
BRT_TEXT, BTFT_NONE);
|
||||
ColPartition* part =
|
||||
ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE);
|
||||
part->set_median_width(3);
|
||||
part->set_median_height(3);
|
||||
text_grid_->InsertBBox(true, true, part);
|
||||
@ -103,34 +103,30 @@ class SharedTest : public testing::Test {
|
||||
}
|
||||
|
||||
void InsertLines() {
|
||||
line_box_.set_to_given_coords(100 - line_grid_->gridsize(),
|
||||
10 - line_grid_->gridsize(),
|
||||
450 + line_grid_->gridsize(),
|
||||
50 + line_grid_->gridsize());
|
||||
for (int i = 10; i <= 50; i += 10)
|
||||
InsertHorizontalLine(100, 450, i);
|
||||
for (int i = 100; i <= 450; i += 50)
|
||||
InsertVerticalLine(i, 10, 50);
|
||||
line_box_.set_to_given_coords(
|
||||
100 - line_grid_->gridsize(), 10 - line_grid_->gridsize(),
|
||||
450 + line_grid_->gridsize(), 50 + line_grid_->gridsize());
|
||||
for (int i = 10; i <= 50; i += 10) InsertHorizontalLine(100, 450, i);
|
||||
for (int i = 100; i <= 450; i += 50) InsertVerticalLine(i, 10, 50);
|
||||
|
||||
for (int i = 100; i <= 200; i += 20)
|
||||
InsertHorizontalLine(0, 100, i);
|
||||
for (int i = 100; i <= 200; i += 20) InsertHorizontalLine(0, 100, i);
|
||||
}
|
||||
|
||||
void InsertHorizontalLine(int left, int right, int y) {
|
||||
TBOX box(left, y - line_grid_->gridsize(),
|
||||
right, y + line_grid_->gridsize());
|
||||
ColPartition* part = ColPartition::FakePartition(box, PT_HORZ_LINE,
|
||||
BRT_HLINE, BTFT_NONE);
|
||||
TBOX box(left, y - line_grid_->gridsize(), right,
|
||||
y + line_grid_->gridsize());
|
||||
ColPartition* part =
|
||||
ColPartition::FakePartition(box, PT_HORZ_LINE, BRT_HLINE, BTFT_NONE);
|
||||
line_grid_->InsertBBox(true, true, part);
|
||||
|
||||
tesseract::ColPartition_IT add_it(&allocated_parts_);
|
||||
add_it.add_after_stay_put(part);
|
||||
}
|
||||
void InsertVerticalLine(int x, int bottom, int top) {
|
||||
TBOX box(x - line_grid_->gridsize(), bottom,
|
||||
x + line_grid_->gridsize(), top);
|
||||
ColPartition* part = ColPartition::FakePartition(box, PT_VERT_LINE,
|
||||
BRT_VLINE, BTFT_NONE);
|
||||
TBOX box(x - line_grid_->gridsize(), bottom, x + line_grid_->gridsize(),
|
||||
top);
|
||||
ColPartition* part =
|
||||
ColPartition::FakePartition(box, PT_VERT_LINE, BRT_VLINE, BTFT_NONE);
|
||||
line_grid_->InsertBBox(true, true, part);
|
||||
|
||||
tesseract::ColPartition_IT add_it(&allocated_parts_);
|
||||
@ -273,10 +269,8 @@ TEST_F(StructuredTableTest, CountHorizontalIntersectionsAll) {
|
||||
}
|
||||
|
||||
TEST_F(StructuredTableTest, VerifyLinedTableBasicPass) {
|
||||
for (int y = 10; y <= 50; y += 10)
|
||||
table_->InjectCellY(y);
|
||||
for (int x = 100; x <= 450; x += 50)
|
||||
table_->InjectCellX(x);
|
||||
for (int y = 10; y <= 50; y += 10) table_->InjectCellY(y);
|
||||
for (int x = 100; x <= 450; x += 50) table_->InjectCellX(x);
|
||||
InsertLines();
|
||||
InsertCellsInLines();
|
||||
table_->set_bounding_box(line_box_);
|
||||
@ -284,10 +278,8 @@ TEST_F(StructuredTableTest, VerifyLinedTableBasicPass) {
|
||||
}
|
||||
|
||||
TEST_F(StructuredTableTest, VerifyLinedTableHorizontalFail) {
|
||||
for (int y = 10; y <= 50; y += 10)
|
||||
table_->InjectCellY(y);
|
||||
for (int x = 100; x <= 450; x += 50)
|
||||
table_->InjectCellX(x);
|
||||
for (int y = 10; y <= 50; y += 10) table_->InjectCellY(y);
|
||||
for (int x = 100; x <= 450; x += 50) table_->InjectCellX(x);
|
||||
InsertLines();
|
||||
InsertCellsInLines();
|
||||
InsertPartition(101, 11, 299, 19);
|
||||
@ -296,10 +288,8 @@ TEST_F(StructuredTableTest, VerifyLinedTableHorizontalFail) {
|
||||
}
|
||||
|
||||
TEST_F(StructuredTableTest, VerifyLinedTableVerticalFail) {
|
||||
for (int y = 10; y <= 50; y += 10)
|
||||
table_->InjectCellY(y);
|
||||
for (int x = 100; x <= 450; x += 50)
|
||||
table_->InjectCellX(x);
|
||||
for (int y = 10; y <= 50; y += 10) table_->InjectCellY(y);
|
||||
for (int x = 100; x <= 450; x += 50) table_->InjectCellX(x);
|
||||
InsertLines();
|
||||
InsertCellsInLines();
|
||||
InsertPartition(151, 21, 199, 39);
|
||||
|
@ -21,12 +21,9 @@ namespace {
|
||||
|
||||
class TabVectorTest : public testing::Test {
|
||||
protected:
|
||||
void SetUp() {
|
||||
vector_.reset();
|
||||
}
|
||||
void SetUp() { vector_.reset(); }
|
||||
|
||||
void TearDown() {
|
||||
}
|
||||
void TearDown() {}
|
||||
|
||||
void MakeSimpleTabVector(int x1, int y1, int x2, int y2) {
|
||||
vector_.reset(new TabVector());
|
||||
@ -60,7 +57,7 @@ TEST_F(TabVectorTest, XAtY45DegreeSlopeInRangeExact) {
|
||||
}
|
||||
|
||||
TEST_F(TabVectorTest, XAtYVerticalInRangeExact) {
|
||||
const int x = 120; // Arbitrary choice
|
||||
const int x = 120; // Arbitrary choice
|
||||
MakeSimpleTabVector(x, 0, x, 100);
|
||||
for (int y = 0; y <= 100; ++y) {
|
||||
int result_x = vector_->XAtY(y);
|
||||
@ -69,7 +66,7 @@ TEST_F(TabVectorTest, XAtYVerticalInRangeExact) {
|
||||
}
|
||||
|
||||
TEST_F(TabVectorTest, XAtYHorizontal) {
|
||||
const int y = 76; // arbitrary
|
||||
const int y = 76; // arbitrary
|
||||
MakeSimpleTabVector(0, y, 100, y);
|
||||
EXPECT_EQ(0, vector_->XAtY(y));
|
||||
// TODO(nbeato): What's the failure condition?
|
||||
@ -93,13 +90,13 @@ TEST_F(TabVectorTest, XAtYLargeNumbers) {
|
||||
// Assume a document is 800 DPI,
|
||||
// the width of a page is 10 inches across (8000 pixels), and
|
||||
// the height of the page is 15 inches (12000 pixels).
|
||||
MakeSimpleTabVector(7804, 504, 7968, 11768); // Arbitrary for vertical line
|
||||
int x = vector_->XAtY(6136); // test mid point
|
||||
MakeSimpleTabVector(7804, 504, 7968, 11768); // Arbitrary for vertical line
|
||||
int x = vector_->XAtY(6136); // test mid point
|
||||
EXPECT_EQ(7886, x);
|
||||
}
|
||||
|
||||
TEST_F(TabVectorTest, XAtYHorizontalInRangeExact) {
|
||||
const int y = 120; // Arbitrary choice
|
||||
const int y = 120; // Arbitrary choice
|
||||
MakeSimpleTabVector(50, y, 150, y);
|
||||
|
||||
int x = vector_->XAtY(y);
|
||||
@ -129,4 +126,4 @@ TEST_F(TabVectorTest, XYFlip) {
|
||||
EXPECT_EQ(3, vector_->endpt().y());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
@ -25,8 +25,7 @@ class TatweelTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
UNICHARSET unicharset_;
|
||||
};
|
||||
|
@ -20,17 +20,14 @@ using tesseract::TextlineProjection;
|
||||
// NOTE: Keep in sync with textlineprojection.cc.
|
||||
const int kMinStrongTextValue = 6;
|
||||
|
||||
|
||||
// The fixture for testing Tesseract.
|
||||
class TextlineProjectionTest : public testing::Test {
|
||||
protected:
|
||||
string TestDataNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata/" + name);
|
||||
return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name);
|
||||
}
|
||||
string TessdataPath() {
|
||||
return file::JoinPath(FLAGS_test_srcdir,
|
||||
"tessdata");
|
||||
return file::JoinPath(FLAGS_test_srcdir, "tessdata");
|
||||
}
|
||||
string OutputNameToPath(const string& name) {
|
||||
return file::JoinPath(FLAGS_test_tmpdir, name);
|
||||
@ -118,16 +115,15 @@ class TextlineProjectionTest : public testing::Test {
|
||||
const char* text, const char* message) {
|
||||
int value = projection_->EvaluateBox(box, denorm_, false);
|
||||
if (greater_or_equal != (value > target_value)) {
|
||||
LOG(INFO)
|
||||
<< StringPrintf("EvaluateBox too %s:%d vs %d for %s word '%s' at:",
|
||||
greater_or_equal ? "low" : "high", value,
|
||||
target_value,
|
||||
message, text);
|
||||
LOG(INFO) << StringPrintf(
|
||||
"EvaluateBox too %s:%d vs %d for %s word '%s' at:",
|
||||
greater_or_equal ? "low" : "high", value, target_value, message,
|
||||
text);
|
||||
box.print();
|
||||
value = projection_->EvaluateBox(box, denorm_, true);
|
||||
} else {
|
||||
VLOG(1) << StringPrintf("EvaluateBox OK(%d) for %s word '%s'",
|
||||
value, message, text);
|
||||
VLOG(1) << StringPrintf("EvaluateBox OK(%d) for %s word '%s'", value,
|
||||
message, text);
|
||||
}
|
||||
if (greater_or_equal) {
|
||||
EXPECT_GE(value, target_value);
|
||||
@ -139,12 +135,12 @@ class TextlineProjectionTest : public testing::Test {
|
||||
// Helper evaluates the DistanceOfBoxFromBox function by expecting that
|
||||
// box should be nearer to true_box than false_box.
|
||||
void EvaluateDistance(const TBOX& box, const TBOX& true_box,
|
||||
const TBOX& false_box,
|
||||
const char* text, const char* message) {
|
||||
int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true,
|
||||
denorm_, false);
|
||||
int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true,
|
||||
denorm_, false);
|
||||
const TBOX& false_box, const char* text,
|
||||
const char* message) {
|
||||
int true_dist =
|
||||
projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
|
||||
int false_dist =
|
||||
projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
|
||||
if (false_dist <= true_dist) {
|
||||
LOG(INFO) << StringPrintf("Distance wrong:%d vs %d for %s word '%s' at:",
|
||||
false_dist, true_dist, message, text);
|
||||
@ -194,8 +190,7 @@ class TextlineProjectionTest : public testing::Test {
|
||||
TBOX lower_box = word_box;
|
||||
lower_box.set_top(word_box.bottom());
|
||||
lower_box.set_bottom(word_box.bottom() - padding);
|
||||
if (tall_word)
|
||||
lower_box.move(ICOORD(0, padding / 2));
|
||||
if (tall_word) lower_box.move(ICOORD(0, padding / 2));
|
||||
EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
|
||||
EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
|
||||
|
||||
@ -224,20 +219,19 @@ class TextlineProjectionTest : public testing::Test {
|
||||
TBOX upper_challenger(upper_box);
|
||||
upper_challenger.set_bottom(upper_box.top());
|
||||
upper_challenger.set_top(upper_box.top() + word_box.height());
|
||||
EvaluateDistance(upper_box, target_box, upper_challenger,
|
||||
text, "Upper Word");
|
||||
if (tall_word)
|
||||
lower_box.move(ICOORD(0, padding / 2));
|
||||
EvaluateDistance(upper_box, target_box, upper_challenger, text,
|
||||
"Upper Word");
|
||||
if (tall_word) lower_box.move(ICOORD(0, padding / 2));
|
||||
lower_box.set_bottom(lower_box.top() - padding);
|
||||
target_box = word_box;
|
||||
target_box.set_bottom(lower_box.top());
|
||||
TBOX lower_challenger(lower_box);
|
||||
lower_challenger.set_top(lower_box.bottom());
|
||||
lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
|
||||
EvaluateDistance(lower_box, target_box, lower_challenger,
|
||||
text, "Lower Word");
|
||||
EvaluateDistance(lower_box, target_box, lower_challenger, text,
|
||||
"Lower Word");
|
||||
|
||||
delete [] text;
|
||||
delete[] text;
|
||||
} while (it->Next(tesseract::RIL_WORD));
|
||||
delete it;
|
||||
}
|
||||
@ -254,13 +248,9 @@ class TextlineProjectionTest : public testing::Test {
|
||||
};
|
||||
|
||||
// Tests all word boxes on an unrotated image.
|
||||
TEST_F(TextlineProjectionTest, Unrotated) {
|
||||
VerifyBoxes("phototest.tif", 31);
|
||||
}
|
||||
TEST_F(TextlineProjectionTest, Unrotated) { VerifyBoxes("phototest.tif", 31); }
|
||||
|
||||
// Tests character-level applyboxes on italic Times New Roman.
|
||||
TEST_F(TextlineProjectionTest, Rotated) {
|
||||
VerifyBoxes("phototestrot.tif", 31);
|
||||
}
|
||||
TEST_F(TextlineProjectionTest, Rotated) { VerifyBoxes("phototestrot.tif", 31); }
|
||||
|
||||
} // namespace
|
||||
|
@ -23,8 +23,7 @@ namespace {
|
||||
|
||||
class TfileTest : public ::testing::Test {
|
||||
protected:
|
||||
TfileTest() {
|
||||
}
|
||||
TfileTest() {}
|
||||
|
||||
// Some data to serialize.
|
||||
class MathData {
|
||||
@ -32,11 +31,9 @@ class TfileTest : public ::testing::Test {
|
||||
MathData() : num_squares_(0), num_triangles_(0) {}
|
||||
void Setup() {
|
||||
// Setup some data.
|
||||
for (int s = 0; s < 42; ++s)
|
||||
squares_.push_back(s * s);
|
||||
for (int s = 0; s < 42; ++s) squares_.push_back(s * s);
|
||||
num_squares_ = squares_.size();
|
||||
for (int t = 0; t < 52; ++t)
|
||||
triangles_.push_back(t * (t + 1) / 2);
|
||||
for (int t = 0; t < 52; ++t) triangles_.push_back(t * (t + 1) / 2);
|
||||
num_triangles_ = triangles_.size();
|
||||
}
|
||||
void ExpectEq(const MathData& other) {
|
||||
@ -52,7 +49,7 @@ class TfileTest : public ::testing::Test {
|
||||
if (fp->FWrite(&num_squares_, sizeof(num_squares_), 1) != 1) return false;
|
||||
if (!squares_.Serialize(fp)) return false;
|
||||
if (fp->FWrite(&num_triangles_, sizeof(num_triangles_), 1) != 1)
|
||||
return false;
|
||||
return false;
|
||||
if (!triangles_.Serialize(fp)) return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -10,8 +10,8 @@
|
||||
// limitations under the License.
|
||||
#include "unicharcompress.h"
|
||||
#include "gunit.h"
|
||||
#include "serialis.h"
|
||||
#include "printf.h"
|
||||
#include "serialis.h"
|
||||
|
||||
namespace tesseract {
|
||||
namespace {
|
||||
@ -21,11 +21,9 @@ class UnicharcompressTest : public ::testing::Test {
|
||||
// Loads and compresses the given unicharset.
|
||||
void LoadUnicharset(const string& unicharset_name) {
|
||||
string radical_stroke_file =
|
||||
file::JoinPath(FLAGS_test_srcdir,
|
||||
"langdata/radical-stroke.txt");
|
||||
string unicharset_file = file::JoinPath(
|
||||
FLAGS_test_srcdir, "testdata",
|
||||
unicharset_name);
|
||||
file::JoinPath(FLAGS_test_srcdir, "langdata/radical-stroke.txt");
|
||||
string unicharset_file =
|
||||
file::JoinPath(FLAGS_test_srcdir, "testdata", unicharset_name);
|
||||
string uni_data;
|
||||
CHECK_OK(file::GetContents(unicharset_file, &uni_data, file::Defaults()));
|
||||
string radical_data;
|
||||
|
@ -128,9 +128,8 @@ TEST(UnicharsetTest, MultibyteBigrams) {
|
||||
TEST(UnicharsetTest, OldStyle) {
|
||||
// This test verifies an old unicharset that contains fi/fl ligatures loads
|
||||
// and keeps all the entries.
|
||||
string filename = file::JoinPath(FLAGS_test_srcdir,
|
||||
"testdata",
|
||||
"eng.unicharset");
|
||||
string filename =
|
||||
file::JoinPath(FLAGS_test_srcdir, "testdata", "eng.unicharset");
|
||||
UNICHARSET u;
|
||||
LOG(INFO) << "Filename=" << filename;
|
||||
EXPECT_TRUE(u.load_from_file(filename.c_str()));
|
||||
|
@ -11,8 +11,8 @@
|
||||
|
||||
#include "validator.h"
|
||||
|
||||
#include "gmock/gmock.h" // for testing::ElementsAreArray
|
||||
#include "include_gunit.h"
|
||||
#include "gmock/gmock.h" // for testing::ElementsAreArray
|
||||
|
||||
namespace tesseract {
|
||||
namespace {
|
||||
|
Loading…
Reference in New Issue
Block a user