tesseract/unittest/textlineprojection_test.cc

255 lines
10 KiB
C++

// (C) Copyright 2017, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <allheaders.h>
#include <string> // for std::string
#include "absl/strings/str_format.h" // for absl::StrFormat
#include "include_gunit.h"
#include <tesseract/baseapi.h>
#include <tesseract/osdetect.h>
#include "colfind.h"
#include "log.h" // for LOG
#include "mutableiterator.h"
#include "pageres.h"
#include "tesseractclass.h"
#include "textlineprojection.h"
namespace tesseract {
// Minimum score for a STRONG_CHAIN textline.
// NOTE: Keep in sync with textlineprojection.cc.
const int kMinStrongTextValue = 6;
// The fixture for testing Tesseract.
class TextlineProjectionTest : public testing::Test {
protected:
std::string OutputNameToPath(const std::string &name) {
file::MakeTmpdir();
return file::JoinPath(FLAGS_test_tmpdir, name);
}
TextlineProjectionTest() {
src_pix_ = nullptr;
bin_pix_ = nullptr;
finder_ = nullptr;
denorm_ = nullptr;
projection_ = nullptr;
}
virtual ~TextlineProjectionTest() {
pixDestroy(&src_pix_);
pixDestroy(&bin_pix_);
delete finder_;
}
void SetImage(const char *filename) {
pixDestroy(&src_pix_);
src_pix_ = pixRead(file::JoinPath(TESTING_DIR, filename).c_str());
api_.Init(TESSDATA_DIR, "eng", tesseract::OEM_TESSERACT_ONLY);
api_.SetPageSegMode(tesseract::PSM_AUTO_OSD);
api_.SetImage(src_pix_);
}
// Ugly hacked-together function sets up projection_ and denorm_ by setting
// up for auto pagelayout, setting up a ColumnFinder, running it, and
// using accessors to get at the internal denorm and projection.
// If the coordinates have been rotated, the denorm should match
// correctly and transform coordinates back to the projection.
// We throw away all the blocks, blobs etc, and test the projection with
// the resultiterator from a separate BaseAPI run.
void SetupProjection() {
tesseract::TessdataManager mgr;
auto osd_tess = std::make_unique<Tesseract>();
OSResults osr;
EXPECT_EQ(osd_tess->init_tesseract(TESSDATA_DIR, "", "osd", tesseract::OEM_TESSERACT_ONLY,
nullptr, 0, nullptr, nullptr, false, &mgr),
0);
tesseract_ = std::make_unique<Tesseract>();
EXPECT_EQ(tesseract_->init_tesseract(TESSDATA_DIR, "", "eng", tesseract::OEM_TESSERACT_ONLY,
nullptr, 0, nullptr, nullptr, false, &mgr),
0);
bin_pix_ = api_.GetThresholdedImage();
*tesseract_->mutable_pix_binary() = pixClone(bin_pix_);
osd_tess->set_source_resolution(api_.tesseract()->source_resolution());
tesseract_->set_source_resolution(api_.tesseract()->source_resolution());
int width = pixGetWidth(bin_pix_);
int height = pixGetHeight(bin_pix_);
// First make a single block covering the whole image.
BLOCK *block = new BLOCK("", true, 0, 0, 0, 0, width, height);
block->set_right_to_left(false);
BLOCK_LIST src_blocks;
BLOCK_IT block_it(&src_blocks);
block_it.add_to_end(block);
Pix *photomask_pix = nullptr;
// The blocks made by the ColumnFinder. Moved to blocks before return.
BLOCK_LIST found_blocks;
TO_BLOCK_LIST temp_blocks;
finder_ =
tesseract_->SetupPageSegAndDetectOrientation(tesseract::PSM_AUTO_OSD, &src_blocks, osd_tess.get(),
&osr, &temp_blocks, &photomask_pix, nullptr);
TO_BLOCK_IT to_block_it(&temp_blocks);
TO_BLOCK *to_block = to_block_it.data();
denorm_ = finder_->denorm();
TO_BLOCK_LIST to_blocks;
BLOBNBOX_LIST diacritic_blobs;
EXPECT_GE(finder_->FindBlocks(tesseract::PSM_AUTO, nullptr, 1, to_block, photomask_pix, nullptr,
nullptr, nullptr, &found_blocks, &diacritic_blobs, &to_blocks),
0);
projection_ = finder_->projection();
pixDestroy(&photomask_pix);
}
// Helper evaluates the given box, expects the result to be greater_than
// or !greater_than the target_value and provides diagnostics if not.
void EvaluateBox(const TBOX &box, bool greater_or_equal, int target_value, const char *text,
const char *message) {
int value = projection_->EvaluateBox(box, denorm_, false);
if (greater_or_equal != (value > target_value)) {
LOG(INFO) << absl::StrFormat(
"EvaluateBox too %s:%d vs %d for %s word '%s' at:", greater_or_equal ? "low" : "high",
value, target_value, message, text);
box.print();
value = projection_->EvaluateBox(box, denorm_, true);
} else {
LOG(INFO) << absl::StrFormat("EvaluateBox OK(%d) for %s word '%s'", value, message, text);
}
if (greater_or_equal) {
EXPECT_GE(value, target_value);
} else {
EXPECT_LT(value, target_value);
}
}
// Helper evaluates the DistanceOfBoxFromBox function by expecting that
// box should be nearer to true_box than false_box.
void EvaluateDistance(const TBOX &box, const TBOX &true_box, const TBOX &false_box,
const char *text, const char *message) {
int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false);
int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false);
if (false_dist <= true_dist) {
LOG(INFO) << absl::StrFormat("Distance wrong:%d vs %d for %s word '%s' at:", false_dist,
true_dist, message, text);
true_box.print();
projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, true);
projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, true);
} else {
LOG(INFO) << absl::StrFormat("Distance OK(%d vs %d) for %s word '%s'", false_dist, true_dist,
message, text);
}
}
// Tests the projection on the word boxes of the given image.
// line_height is the cap + descender size of the text.
void VerifyBoxes(const char *imagefile, int line_height) {
SetImage(imagefile);
api_.Recognize(nullptr);
SetupProjection();
MutableIterator *it = api_.GetMutableIterator();
do {
char *text = it->GetUTF8Text(tesseract::RIL_WORD);
const PAGE_RES_IT *pr_it = it->PageResIt();
WERD_RES *word = pr_it->word();
// The word_box refers to the internal, possibly rotated, coords.
TBOX word_box = word->word->bounding_box();
bool small_word = word_box.height() * 1.5 < line_height;
bool tall_word = word_box.height() * 1.125 > line_height;
// We pad small and tall words differently because ascenders and
// descenders affect the position and size of the upper/lower boxes.
int padding;
if (small_word) {
padding = word_box.height();
} else if (tall_word) {
padding = word_box.height() / 3;
} else {
padding = word_box.height() / 2;
}
// Test that the word box gets a good score.
EvaluateBox(word_box, true, kMinStrongTextValue, text, "Real Word");
// Now test a displaced box, both above and below the word.
TBOX upper_box(word_box);
upper_box.set_bottom(word_box.top());
upper_box.set_top(word_box.top() + padding);
EvaluateBox(upper_box, false, kMinStrongTextValue, text, "Upper Word");
EvaluateBox(upper_box, true, -1, text, "Upper Word not vertical");
TBOX lower_box = word_box;
lower_box.set_top(word_box.bottom());
lower_box.set_bottom(word_box.bottom() - padding);
if (tall_word)
lower_box.move(ICOORD(0, padding / 2));
EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word");
EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical");
// Since some words have no text below and some words have no text above
// check that at least one of the boxes satisfies BoxOutOfTextline.
bool upper_or_lower_out_of_textline =
projection_->BoxOutOfHTextline(upper_box, denorm_, false) ||
projection_->BoxOutOfHTextline(lower_box, denorm_, false);
if (!upper_or_lower_out_of_textline) {
projection_->BoxOutOfHTextline(upper_box, denorm_, true);
projection_->BoxOutOfHTextline(lower_box, denorm_, true);
}
EXPECT_TRUE(upper_or_lower_out_of_textline);
// Now test DistanceOfBoxFromBox by faking a challenger word, and asking
// that each pad box be nearer to its true textline than the
// challenger. Due to the tight spacing of latin text, getting
// the right position and size of these test boxes is quite fiddly.
padding = line_height / 4;
upper_box.set_top(upper_box.bottom() + padding);
TBOX target_box(word_box);
if (!small_word) {
upper_box.move(ICOORD(0, -padding * 3 / 2));
}
target_box.set_top(upper_box.bottom());
TBOX upper_challenger(upper_box);
upper_challenger.set_bottom(upper_box.top());
upper_challenger.set_top(upper_box.top() + word_box.height());
EvaluateDistance(upper_box, target_box, upper_challenger, text, "Upper Word");
if (tall_word)
lower_box.move(ICOORD(0, padding / 2));
lower_box.set_bottom(lower_box.top() - padding);
target_box = word_box;
target_box.set_bottom(lower_box.top());
TBOX lower_challenger(lower_box);
lower_challenger.set_top(lower_box.bottom());
lower_challenger.set_bottom(lower_box.bottom() - word_box.height());
EvaluateDistance(lower_box, target_box, lower_challenger, text, "Lower Word");
delete[] text;
} while (it->Next(tesseract::RIL_WORD));
delete it;
}
Pix *src_pix_;
Pix *bin_pix_;
BLOCK_LIST blocks_;
std::string ocr_text_;
tesseract::TessBaseAPI api_;
std::unique_ptr<Tesseract> tesseract_;
ColumnFinder *finder_;
const DENORM *denorm_;
const TextlineProjection *projection_;
};
// Tests all word boxes on an unrotated image.
TEST_F(TextlineProjectionTest, Unrotated) {
VerifyBoxes("phototest.tif", 31);
}
// Tests character-level applyboxes on italic Times New Roman.
TEST_F(TextlineProjectionTest, Rotated) {
VerifyBoxes("phototestrot.tif", 31);
}
} // namespace tesseract