mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-11 20:53:24 +08:00
unittest: Catch missing eng.traineddata in baseapi_test
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
bbd3626d77
commit
ceabab8373
@ -76,7 +76,7 @@ TEST_F(TesseractTest, BasicTesseractTest) {
|
|||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
std::string truth_text;
|
std::string truth_text;
|
||||||
std::string ocr_text;
|
std::string ocr_text;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
|
||||||
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||||
@ -85,13 +85,17 @@ TEST_F(TesseractTest, BasicTesseractTest) {
|
|||||||
absl::StripAsciiWhitespace(&truth_text);
|
absl::StripAsciiWhitespace(&truth_text);
|
||||||
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
EXPECT_STREQ(truth_text.c_str(), ocr_text.c_str());
|
||||||
pixDestroy(&src_pix);
|
pixDestroy(&src_pix);
|
||||||
|
} else {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that api.GetComponentImages() will return a set of images for
|
// Test that api.GetComponentImages() will return a set of images for
|
||||||
// paragraphs even if text recognition was not run.
|
// paragraphs even if text recognition was not run.
|
||||||
TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
|
TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
|
||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) != -1) {
|
||||||
api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
|
api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
|
||||||
api.SetVariable("paragraph_debug_level", "3");
|
api.SetVariable("paragraph_debug_level", "3");
|
||||||
#if 0 // TODO: b622.png is missing
|
#if 0 // TODO: b622.png is missing
|
||||||
@ -110,13 +114,21 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) {
|
|||||||
boxaDestroy(¶_boxes);
|
boxaDestroy(¶_boxes);
|
||||||
pixDestroy(&src_pix);
|
pixDestroy(&src_pix);
|
||||||
#endif
|
#endif
|
||||||
|
} else {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should get hOCR output and not seg fault, even if the api caller doesn't
|
// We should get hOCR output and not seg fault, even if the api caller doesn't
|
||||||
// call SetInputName().
|
// call SetInputName().
|
||||||
TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
return;
|
||||||
|
}
|
||||||
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
api.SetImage(src_pix);
|
api.SetImage(src_pix);
|
||||||
@ -131,7 +143,11 @@ TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) {
|
|||||||
// hOCR output should contain baseline info for upright textlines.
|
// hOCR output should contain baseline info for upright textlines.
|
||||||
TEST_F(TesseractTest, HOCRContainsBaseline) {
|
TEST_F(TesseractTest, HOCRContainsBaseline) {
|
||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
return;
|
||||||
|
}
|
||||||
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
api.SetInputName("HelloGoogle.tif");
|
api.SetInputName("HelloGoogle.tif");
|
||||||
@ -151,6 +167,11 @@ TEST_F(TesseractTest, HOCRContainsBaseline) {
|
|||||||
// better algorithms to deal with baseline and xheight consistency.
|
// better algorithms to deal with baseline and xheight consistency.
|
||||||
TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
|
TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
|
||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
return;
|
||||||
|
}
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
||||||
#if 0 // TODO: rick_snyder.jpeg is missing
|
#if 0 // TODO: rick_snyder.jpeg is missing
|
||||||
Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
|
Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str());
|
||||||
@ -161,6 +182,8 @@ TEST_F(TesseractTest, RickSnyderNotFuckSnyder) {
|
|||||||
EXPECT_THAT(result, Not(HasSubstr("FUCK")));
|
EXPECT_THAT(result, Not(HasSubstr("FUCK")));
|
||||||
delete[] result;
|
delete[] result;
|
||||||
pixDestroy(&src_pix);
|
pixDestroy(&src_pix);
|
||||||
|
#else
|
||||||
|
GTEST_SKIP();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,7 +205,11 @@ TEST_F(TesseractTest, AdaptToWordStrTest) {
|
|||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
std::string truth_text;
|
std::string truth_text;
|
||||||
std::string ocr_text;
|
std::string ocr_text;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY) == -1) {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
return;
|
||||||
|
}
|
||||||
api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
|
api.SetVariable("matcher_sufficient_examples_for_prototyping", "1");
|
||||||
api.SetVariable("classify_class_pruner_threshold", "220");
|
api.SetVariable("classify_class_pruner_threshold", "220");
|
||||||
// Train on the training text.
|
// Train on the training text.
|
||||||
@ -216,7 +243,11 @@ TEST_F(TesseractTest, BasicLSTMTest) {
|
|||||||
tesseract::TessBaseAPI api;
|
tesseract::TessBaseAPI api;
|
||||||
std::string truth_text;
|
std::string truth_text;
|
||||||
std::string ocr_text;
|
std::string ocr_text;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
return;
|
||||||
|
}
|
||||||
Pix* src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
|
Pix* src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str());
|
||||||
CHECK(src_pix);
|
CHECK(src_pix);
|
||||||
ocr_text = GetCleanedTextResult(&api, src_pix);
|
ocr_text = GetCleanedTextResult(&api, src_pix);
|
||||||
@ -240,7 +271,11 @@ TEST_F(TesseractTest, LSTMGeometryTest) {
|
|||||||
#else
|
#else
|
||||||
Pix* src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
|
Pix* src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str());
|
||||||
FriendlyTessBaseAPI api;
|
FriendlyTessBaseAPI api;
|
||||||
api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY);
|
if (api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY) == -1) {
|
||||||
|
// eng.traineddata not found.
|
||||||
|
GTEST_SKIP();
|
||||||
|
return;
|
||||||
|
}
|
||||||
api.SetImage(src_pix);
|
api.SetImage(src_pix);
|
||||||
ASSERT_EQ(api.Recognize(nullptr), 0);
|
ASSERT_EQ(api.Recognize(nullptr), 0);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user