tesseract/unittest/textlineprojection_test.cc

267 lines
10 KiB
C++
Raw Normal View History

#include "tesseract/textord/textlineprojection.h"
#include <string>
#include "leptonica/include/allheaders.h"
#include "tesseract/api/baseapi.h"
#include "tesseract/ccmain/mutableiterator.h"
#include "tesseract/ccmain/osdetect.h"
#include "tesseract/ccmain/tesseractclass.h"
#include "tesseract/ccstruct/pageres.h"
#include "tesseract/textord/colfind.h"
namespace {
using tesseract::ColumnFinder;
using tesseract::MutableIterator;
using tesseract::Tesseract;
using tesseract::TextlineProjection;
// Minimum score for a STRONG_CHAIN textline.
// NOTE: Keep in sync with textlineprojection.cc.
const int kMinStrongTextValue = 6;
// The fixture for testing Tesseract.
class TextlineProjectionTest : public testing::Test {
protected:
string TestDataNameToPath(const string& name) {
return file::JoinPath(FLAGS_test_srcdir,
"testdata/" + name);
}
string TessdataPath() {
return file::JoinPath(FLAGS_test_srcdir,
"tessdata");
}
string OutputNameToPath(const string& name) {
return file::JoinPath(FLAGS_test_tmpdir, name);
}
TextlineProjectionTest() {
src_pix_ = NULL;
bin_pix_ = NULL;
tesseract_ = NULL;
finder_ = NULL;
denorm_ = NULL;
projection_ = NULL;
}
virtual ~TextlineProjectionTest() {
pixDestroy(&src_pix_);
pixDestroy(&bin_pix_);
delete finder_;
delete tesseract_;
}
void SetImage(const char* filename) {
pixDestroy(&src_pix_);
src_pix_ = pixRead(TestDataNameToPath(filename).c_str());
api_.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY);
api_.SetPageSegMode(tesseract::PSM_AUTO_OSD);
api_.SetImage(src_pix_);
}
// Ugly hacked-together function sets up projection_ and denorm_ by setting
// up for auto pagelayout, setting up a ColumnFinder, running it, and
// using accessors to get at the internal denorm and projection.
// If the coordinates have been rotated, the denorm should match
// correctly and transform coordinates back to the projection.
// We throw away all the blocks, blobs etc, and test the projection with
// the resultiterator from a separate BaseAPI run.
void SetupProjection() {
tesseract::TessdataManager mgr;
Tesseract* osd_tess = new Tesseract;
OSResults osr;
EXPECT_EQ(osd_tess->init_tesseract(TessdataPath().c_str(), NULL, "osd",
tesseract::OEM_TESSERACT_ONLY, NULL, 0,
NULL, NULL, false, &mgr),
0);
tesseract_ = new Tesseract;
EXPECT_EQ(tesseract_->init_tesseract(TessdataPath().c_str(), NULL, "eng",
tesseract::OEM_TESSERACT_ONLY, NULL, 0,
NULL, NULL, false, &mgr),
0);
bin_pix_ = api_.GetThresholdedImage();
*tesseract_->mutable_pix_binary() = pixClone(bin_pix_);
osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
int width = pixGetWidth(bin_pix_);
int height = pixGetHeight(bin_pix_);
// First make a single block covering the whole image.
BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);
block->set_right_to_left(false);
BLOCK_LIST src_blocks;
BLOCK_IT block_it(&src_blocks);
block_it.add_to_end(block);
Pix* photomask_pix = NULL;
// The blocks made by the ColumnFinder. Moved to blocks before return.
BLOCK_LIST found_blocks;
TO_BLOCK_LIST temp_blocks;
finder_ = tesseract_->SetupPageSegAndDetectOrientation(
tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess, &osr, &temp_blocks,
&photomask_pix, NULL);
TO_BLOCK_IT to_block_it(&temp_blocks);
TO_BLOCK* to_block = to_block_it.data();
denorm_ = finder_->denorm();
TO_BLOCK_LIST to_blocks;
BLOBNBOX_LIST diacritic_blobs;
EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block,
photomask_pix, nullptr, nullptr, nullptr,
&found_blocks, &diacritic_blobs, &to_blocks),
0);
projection_ = finder_->projection();
pixDestroy(&photomask_pix);
delete osd_tess;
}
// Helper evaluates the given box, expects the result to be greater_than
// or !greater_than the target_value and provides diagnostics if not.
void EvaluateBox(const TBOX& box, bool greater_or_equal, int target_value,
const char* text, const char* message) {
int value = projection_->EvaluateBox(box, denorm_, false);
if (greater_or_equal != (value > target_value)) {
LOG(INFO)
<< StringPrintf("EvaluateBox too %s:%d vs %d for %s word '%s' at:",
greater_or_equal ? "low" : "high", value,
target_value,
message, text);
box.print();
value = projection_->EvaluateBox(box, denorm_, true);
} else {
VLOG(1) << StringPrintf("EvaluateBox OK(%d) for %s word '%s'",
value, message, text);
}
if (greater_or_equal) {
EXPECT_GE(value, target_value);
} else {
EXPECT_LT(value, target_value);
}
}
// Helper evaluates the DistanceOfBoxFromBox function by expecting that
// box should be nearer to true_box than false_box.
void EvaluateDistance(const TBOX& box, const TBOX& true_box,
const TBOX& false_box,
const char* text, const char* message) {
int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true,
denorm_, false);
int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true,
denorm_, false);
if (false_dist <= true_dist) {
LOG(INFO) << StringPrintf("Distance wrong:%d vs %d for %s word '%s' at:",
false_dist, true_dist, message, text);
true_box.print();
projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
} else {
VLOG(1) << StringPrintf("Distance OK(%d vs %d) for %s word '%s'",
false_dist, true_dist, message, text);
}
}
// Tests the projection on the word boxes of the given image.
// line_height is the cap + descender size of the text.
void VerifyBoxes(const char* imagefile, int line_height) {
SetImage(imagefile);
api_.Recognize(NULL);
SetupProjection();
MutableIterator* it = api_.GetMutableIterator();
do {
char* text = it->GetUTF8Text(tesseract::RIL_WORD);
const PAGE_RES_IT* pr_it = it->PageResIt();
WERD_RES* word = pr_it->word();
// The word_box refers to the internal, possibly rotated, coords.
TBOX word_box = word->word->bounding_box();
bool small_word = word_box.height() * 1.5 < line_height;
bool tall_word = word_box.height() * 1.125 > line_height;
// We pad small and tall words differently because ascenders and
// descenders affect the position and size of the upper/lower boxes.
int padding;
if (small_word) {
padding = word_box.height();
} else if (tall_word) {
padding = word_box.height() / 3;
} else {
padding = word_box.height() / 2;
}
// Test that the word box gets a good score.
EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");
// Now test a displaced box, both above and below the word.
TBOX upper_box(word_box);
upper_box.set_bottom(word_box.top());
upper_box.set_top(word_box.top() + padding);
EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
TBOX lower_box = word_box;
lower_box.set_top(word_box.bottom());
lower_box.set_bottom(word_box.bottom() - padding);
if (tall_word)
lower_box.move(ICOORD(0, padding / 2));
EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
// Since some words have no text below and some words have no text above
// check that at least one of the boxes satisfies BoxOutOfTextline.
bool upper_or_lower_out_of_textline =
projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
projection_->BoxOutOfHTextline(lower_box, denorm_, false);
if (!upper_or_lower_out_of_textline) {
projection_->BoxOutOfHTextline(upper_box, denorm_, true);
projection_->BoxOutOfHTextline(lower_box, denorm_, true);
}
EXPECT_TRUE(upper_or_lower_out_of_textline);
// Now test DistanceOfBoxFromBox by faking a challenger word, and asking
// that each pad box be nearer to its true textline than the
// challenger. Due to the tight spacing of latin text, getting
// the right position and size of these test boxes is quite fiddly.
padding = line_height / 4;
upper_box.set_top(upper_box.bottom() + padding);
TBOX target_box(word_box);
if (!small_word) {
upper_box.move(ICOORD(0, -padding * 3 / 2));
}
target_box.set_top(upper_box.bottom());
TBOX upper_challenger(upper_box);
upper_challenger.set_bottom(upper_box.top());
upper_challenger.set_top(upper_box.top() + word_box.height());
EvaluateDistance(upper_box, target_box, upper_challenger,
text, "Upper Word");
if (tall_word)
lower_box.move(ICOORD(0, padding / 2));
lower_box.set_bottom(lower_box.top() - padding);
target_box = word_box;
target_box.set_bottom(lower_box.top());
TBOX lower_challenger(lower_box);
lower_challenger.set_top(lower_box.bottom());
lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
EvaluateDistance(lower_box, target_box, lower_challenger,
text, "Lower Word");
delete [] text;
} while (it->Next(tesseract::RIL_WORD));
delete it;
}
Pix* src_pix_;
Pix* bin_pix_;
BLOCK_LIST blocks_;
string ocr_text_;
tesseract::TessBaseAPI api_;
Tesseract* tesseract_;
ColumnFinder* finder_;
const DENORM* denorm_;
const TextlineProjection* projection_;
};
// Tests all word boxes on an unrotated image.
TEST_F(TextlineProjectionTest, Unrotated) {
VerifyBoxes("phototest.tif", 31);
}
// Tests character-level applyboxes on italic Times New Roman.
TEST_F(TextlineProjectionTest, Rotated) {
VerifyBoxes("phototestrot.tif", 31);
}
} // namespace