mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-07 09:52:40 +08:00
Merge 51a3398a3c
into de095fc074
This commit is contained in:
commit
7e1bc48521
@ -269,6 +269,7 @@ endif
|
||||
# Rules for src/ccstruct.
|
||||
|
||||
noinst_HEADERS += src/ccstruct/blamer.h
|
||||
noinst_HEADERS += src/ccstruct/blob_bounds_calculator.h
|
||||
noinst_HEADERS += src/ccstruct/blobbox.h
|
||||
noinst_HEADERS += src/ccstruct/blobs.h
|
||||
noinst_HEADERS += src/ccstruct/blread.h
|
||||
@ -312,6 +313,7 @@ noinst_HEADERS += src/ccstruct/params_training_featdef.h
|
||||
endif
|
||||
|
||||
libtesseract_la_SOURCES += src/ccstruct/blamer.cpp
|
||||
libtesseract_la_SOURCES += src/ccstruct/blob_bounds_calculator.cpp
|
||||
libtesseract_la_SOURCES += src/ccstruct/blobbox.cpp
|
||||
libtesseract_la_SOURCES += src/ccstruct/blobs.cpp
|
||||
libtesseract_la_SOURCES += src/ccstruct/blread.cpp
|
||||
@ -1176,6 +1178,7 @@ if !DISABLED_LEGACY_ENGINE
|
||||
check_PROGRAMS += bitvector_test
|
||||
endif # !DISABLED_LEGACY_ENGINE
|
||||
endif # ENABLE_TRAINING
|
||||
check_PROGRAMS += blob_bounds_calculator_test
|
||||
check_PROGRAMS += cleanapi_test
|
||||
check_PROGRAMS += colpartition_test
|
||||
if ENABLE_TRAINING
|
||||
@ -1288,6 +1291,10 @@ bitvector_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
bitvector_test_LDADD = $(TRAINING_LIBS)
|
||||
endif # !DISABLED_LEGACY_ENGINE
|
||||
|
||||
blob_bounds_calculator_test_SOURCES = unittest/blob_bounds_calculator_test.cc
|
||||
blob_bounds_calculator_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
blob_bounds_calculator_test_LDADD = $(TESS_LIBS)
|
||||
|
||||
cleanapi_test_SOURCES = unittest/cleanapi_test.cc
|
||||
cleanapi_test_CPPFLAGS = $(unittest_CPPFLAGS)
|
||||
cleanapi_test_LDADD = $(TESS_LIBS)
|
||||
|
491
src/ccstruct/blob_bounds_calculator.cpp
Normal file
491
src/ccstruct/blob_bounds_calculator.cpp
Normal file
@ -0,0 +1,491 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: blob_bounds_calculator.h
|
||||
// Description: Module for calculation of blob bounds from LSTM data
|
||||
// Author: Povilas Kanapickas
|
||||
//
|
||||
// (C) Copyright 2022, Povilas Kanapickas <povilas@radix.lt>
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "blob_bounds_calculator.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const CharBoundaryByBoxIndex& d) {
|
||||
out << "CharBoundaryByBoxIndex{ "
|
||||
<< d.index << ", "
|
||||
<< d.split_index << " " << d.split_count << " }";
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const CharacterPlaceDecision& d) {
|
||||
out << "CharacterPlaceDecision{"
|
||||
<< " prev_index: " << d.prev_index
|
||||
<< " has_boxes: " << d.has_boxes
|
||||
<< " begin: " << d.begin
|
||||
<< " end: " << d.end
|
||||
<< " prev_pos_diff: " << d.prev_pos_diff
|
||||
<< " cost: " << d.cost
|
||||
<< " }";
|
||||
return out;
|
||||
}
|
||||
|
||||
void CharacterPlaceDecisions::add_place(unsigned prev_index, bool has_boxes,
|
||||
CharBoundaryByBoxIndex begin,
|
||||
CharBoundaryByBoxIndex end,
|
||||
double prev_pos_diff,
|
||||
double cost, double max_cost_diff) {
|
||||
if (cost > min_cost + max_cost_diff) {
|
||||
return;
|
||||
}
|
||||
|
||||
int replace_existing_decision_index = -1;
|
||||
for (std::size_t i = 0; i < decisions.size(); ++i) {
|
||||
if (decisions[i].end == end) {
|
||||
if (cost < decisions[i].cost) {
|
||||
replace_existing_decision_index = i;
|
||||
break;
|
||||
} else {
|
||||
// existing decision is better
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CharacterPlaceDecision new_decision{prev_index, has_boxes, begin, end,
|
||||
prev_pos_diff, cost};
|
||||
if (replace_existing_decision_index >= 0) {
|
||||
decisions[replace_existing_decision_index] = new_decision;
|
||||
} else {
|
||||
decisions.push_back(new_decision);
|
||||
}
|
||||
|
||||
if (cost < min_cost) {
|
||||
min_cost = cost;
|
||||
|
||||
// Remove all decisions that no longer satisfy maximum cost difference
|
||||
// requirement.
|
||||
auto last_it = std::remove_if(decisions.begin(), decisions.end(),
|
||||
[=](const auto& d) {
|
||||
return d.cost > min_cost + max_cost_diff;
|
||||
});
|
||||
decisions.erase(last_it, decisions.end());
|
||||
}
|
||||
}
|
||||
|
||||
bool CharacterBoundaries::operator==(const CharacterBoundaries& other) const {
|
||||
return begin_x == other.begin_x &&
|
||||
begin_box_index == other.begin_box_index &&
|
||||
end_x == other.end_x &&
|
||||
end_box_index == other.end_box_index;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const CharacterBoundaries& bounds) {
|
||||
out << "CharacterBoundaries{" << bounds.begin_x << ", "
|
||||
<< bounds.begin_box_index << ", "
|
||||
<< bounds.end_x << ", "
|
||||
<< bounds.end_box_index << "}";
|
||||
return out;
|
||||
}
|
||||
|
||||
BoxBoundariesCalculator::BoxBoundariesCalculator(
|
||||
const std::vector<BoxBoundaries>& bounds,
|
||||
const BoxBoundariesCalculatorConfig& config) :
|
||||
bounds_{bounds},
|
||||
config_{config}
|
||||
{
|
||||
if (!bounds_.empty()) {
|
||||
double width_sum = 0;
|
||||
for (const auto& b : bounds) {
|
||||
width_sum += b.end - b.begin;
|
||||
}
|
||||
average_box_width_ = width_sum / static_cast<double>(bounds.size());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CharacterBoundaries>
|
||||
BoxBoundariesCalculator::calculate_bounds(const std::vector<BoxBoundaries>& symbols)
|
||||
{
|
||||
std::vector<CharacterPlaceDecisions> decisions;
|
||||
decisions.resize(symbols.size());
|
||||
|
||||
// The initial state
|
||||
CharacterPlaceDecisions init_decisions;
|
||||
init_decisions.add_place(0, true, {0, 0, 0}, {0, 0, 0}, 0, 0,
|
||||
config_.max_character_cost_diff);
|
||||
|
||||
for (std::size_t is = 0; is != symbols.size(); ++is) {
|
||||
const auto& symbol = symbols[is];
|
||||
const auto& prev_decisions = is == 0 ? init_decisions : decisions[is - 1];
|
||||
auto& next_decisions = decisions[is];
|
||||
|
||||
auto [symbol_min_box, symbol_max_box] = possible_boxes_for_symbol(symbol);
|
||||
|
||||
unsigned prev_farthest_index = farthest_decision_index(prev_decisions);
|
||||
const auto& prev_farthest_decision =
|
||||
prev_decisions.decisions[prev_farthest_index];
|
||||
|
||||
if (symbol_min_box == symbol_max_box) {
|
||||
// There are no boxes for the current symbol. Select the previous
|
||||
// decision which went farthest and was at box boundary.
|
||||
//
|
||||
// We ignore everything that affects the cost for this symbol because the
|
||||
// cost will be the same for all decision paths, thus will not affect
|
||||
// which decision path is ultimately selected.
|
||||
auto new_cost = prev_farthest_decision.cost +
|
||||
config_.symbol_with_no_box_cost;
|
||||
|
||||
// We reset prev_pos_diff as we are effectively starting over.
|
||||
next_decisions.add_place(prev_farthest_index, false, {{}, 0, 0},
|
||||
prev_farthest_decision.end,
|
||||
0, new_cost,
|
||||
config_.max_character_cost_diff);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev_farthest_decision.end.index < symbol_min_box) {
|
||||
// There are boxes that can't be attributed to any of the symbols because
|
||||
// they are too far away. In this case we pick the previous decision path
|
||||
// that went farthest and force the first box to be attributed to the
|
||||
// symbol.
|
||||
//
|
||||
// We ignore everything that affects the cost for this symbol because the
|
||||
// cost will be the same for all decision paths, thus will not affect
|
||||
// which decision path is ultimately selected.
|
||||
|
||||
auto boxes_with_no_symbols =
|
||||
symbol_min_box - prev_farthest_decision.end.index;
|
||||
|
||||
auto new_cost = prev_farthest_decision.cost +
|
||||
config_.box_with_no_symbol_cost * boxes_with_no_symbols;
|
||||
|
||||
// We reset prev_pos_diff as we are effectively starting over.
|
||||
try_decisions_from_prev_decision(next_decisions, prev_farthest_index,
|
||||
{symbol_min_box, 0, 0},
|
||||
0, new_cost,
|
||||
symbol, symbol_max_box);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (std::size_t i_d = 0; i_d < prev_decisions.decisions.size(); ++i_d) {
|
||||
const auto& prev_decision = prev_decisions.decisions[i_d];
|
||||
try_decisions_from_prev_decision(next_decisions, i_d,
|
||||
prev_decision.end,
|
||||
prev_decision.prev_pos_diff,
|
||||
prev_decision.cost,
|
||||
symbol, symbol_max_box);
|
||||
}
|
||||
}
|
||||
|
||||
add_costs_for_remaining_boxes(decisions.back());
|
||||
auto best_decision_path = pick_best_decision_path(decisions);
|
||||
fix_decisions_split_count(best_decision_path);
|
||||
return decisions_to_results(symbols, best_decision_path);
|
||||
}
|
||||
|
||||
void BoxBoundariesCalculator::try_decisions_from_prev_decision(
|
||||
CharacterPlaceDecisions& next_decisions,
|
||||
unsigned prev_decision_index,
|
||||
CharBoundaryByBoxIndex start_bound,
|
||||
double prev_decision_pos_diff,
|
||||
double prev_decision_cost,
|
||||
const BoxBoundaries& symbol, unsigned symbol_max_box)
|
||||
{
|
||||
if (start_bound.split_index > 0) {
|
||||
// attempt to split the start box once again
|
||||
try_decision_from_prev_decision(next_decisions, prev_decision_index,
|
||||
start_bound,
|
||||
{start_bound.index,
|
||||
start_bound.split_index + 1,
|
||||
start_bound.split_count + 1},
|
||||
prev_decision_pos_diff, prev_decision_cost,
|
||||
symbol);
|
||||
// attempt to take the remaining split of the start box
|
||||
try_decision_from_prev_decision(next_decisions, prev_decision_index,
|
||||
start_bound, {start_bound.index, 0, 0},
|
||||
prev_decision_pos_diff, prev_decision_cost,
|
||||
symbol);
|
||||
}
|
||||
for (unsigned end_box = start_bound.index + 1;
|
||||
end_box <= symbol_max_box; ++end_box) {
|
||||
// try one or more full boxes
|
||||
try_decision_from_prev_decision(next_decisions, prev_decision_index,
|
||||
start_bound, {end_box, 0, 0},
|
||||
prev_decision_pos_diff, prev_decision_cost,
|
||||
symbol);
|
||||
// try zero or more full boxes and a split box
|
||||
try_decision_from_prev_decision(next_decisions, prev_decision_index,
|
||||
start_bound, {end_box, 1, 2},
|
||||
prev_decision_pos_diff, prev_decision_cost,
|
||||
symbol);
|
||||
}
|
||||
}
|
||||
|
||||
void BoxBoundariesCalculator::try_decision_from_prev_decision(
|
||||
CharacterPlaceDecisions& next_decisions,
|
||||
unsigned prev_decision_index,
|
||||
CharBoundaryByBoxIndex start_bound, CharBoundaryByBoxIndex end_bound,
|
||||
double prev_decision_pos_diff,
|
||||
double prev_decision_cost,
|
||||
const BoxBoundaries& symbol)
|
||||
{
|
||||
// The following computes the additional cost of the decision. The
|
||||
// following rules are used:
|
||||
//
|
||||
// - The center of the resulting merged boxes that we assign to the symbol
|
||||
// is just the middle between the start and end boundaries. We don't use
|
||||
// anything like weighted averages because presumably the boxes actually
|
||||
// represent a single symbol and were split into parts due to bad quality
|
||||
// input or a segmenter error. Instead we just consider whole area as a
|
||||
// single box.
|
||||
//
|
||||
// - In case of split box, the boundary position is computed according to
|
||||
// the currently known split factor without taking into account that
|
||||
// future decisions may split the box further. In theory we could go back
|
||||
// to previous decisions and adjust the cost, but this is not currently
|
||||
// implemented.
|
||||
double cost = prev_decision_cost;
|
||||
|
||||
bool is_split = end_bound.split_index != 0;
|
||||
if (is_split) {
|
||||
cost += config_.split_cost;
|
||||
}
|
||||
|
||||
unsigned merge_count = end_bound.index - start_bound.index;
|
||||
if (start_bound.split_index == 0) {
|
||||
merge_count--;
|
||||
}
|
||||
|
||||
cost += config_.merge_cost * merge_count;
|
||||
|
||||
double merged_box_center = (get_box_pos_begin(start_bound) +
|
||||
get_box_pos_end(end_bound)) / 2;
|
||||
double symbol_center = symbol.middle();
|
||||
|
||||
double pos_diff = symbol_center - merged_box_center;
|
||||
double pos_diff_for_cost = 0;
|
||||
|
||||
if (pos_diff < 0 && pos_diff < prev_decision_pos_diff) {
|
||||
if (prev_decision_pos_diff < 0) {
|
||||
pos_diff_for_cost = prev_decision_pos_diff - pos_diff;
|
||||
} else {
|
||||
pos_diff_for_cost = -pos_diff;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos_diff > 0 && pos_diff > prev_decision_pos_diff) {
|
||||
if (prev_decision_pos_diff > 0) {
|
||||
pos_diff_for_cost = pos_diff - prev_decision_pos_diff;
|
||||
} else {
|
||||
pos_diff_for_cost = pos_diff;
|
||||
}
|
||||
}
|
||||
|
||||
cost += config_.pos_diff_cost * pos_diff_for_cost / average_box_width_;
|
||||
|
||||
next_decisions.add_place(prev_decision_index, true, start_bound, end_bound,
|
||||
pos_diff, cost, config_.max_character_cost_diff);
|
||||
}
|
||||
|
||||
|
||||
double BoxBoundariesCalculator::get_box_pos_begin(CharBoundaryByBoxIndex bound)
|
||||
{
|
||||
if (bound.split_index == 0) {
|
||||
return bounds_[bound.index].begin;
|
||||
}
|
||||
assert(bound.index > 0);
|
||||
return get_box_split_pos(bounds_[bound.index - 1],
|
||||
bound.split_index, bound.split_count);
|
||||
}
|
||||
|
||||
double BoxBoundariesCalculator::get_box_pos_end(CharBoundaryByBoxIndex bound)
|
||||
{
|
||||
assert(bound.index > 0);
|
||||
|
||||
if (bound.split_index == 0) {
|
||||
return bounds_[bound.index - 1].end;
|
||||
}
|
||||
return get_box_split_pos(bounds_[bound.index - 1],
|
||||
bound.split_index, bound.split_count);
|
||||
}
|
||||
|
||||
|
||||
int BoxBoundariesCalculator::farthest_decision_index(
|
||||
const CharacterPlaceDecisions& decisions)
|
||||
{
|
||||
unsigned best_decision = 0;
|
||||
unsigned max_box_index = 0;
|
||||
double best_decision_cost = std::numeric_limits<double>::infinity();
|
||||
|
||||
for (std::size_t i = 0; i < decisions.decisions.size(); ++i) {
|
||||
const auto& decision = decisions.decisions[i];
|
||||
|
||||
if (decision.end.split_index == 0) {
|
||||
if ((decision.end.index == max_box_index &&
|
||||
decision.cost < best_decision_cost) ||
|
||||
decision.end.index < max_box_index) {
|
||||
max_box_index = decision.end.index;
|
||||
best_decision_cost = decision.cost;
|
||||
best_decision = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return best_decision;
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
BoxBoundariesCalculator::possible_boxes_for_symbol(const BoxBoundaries& symbol)
|
||||
{
|
||||
auto min = symbol.begin - config_.max_pos_diff * average_box_width_;
|
||||
auto max = symbol.end + config_.max_pos_diff * average_box_width_;
|
||||
|
||||
auto range_begin = std::partition_point(bounds_.begin(), bounds_.end(),
|
||||
[min](const auto& b){
|
||||
return b.middle() < min;
|
||||
});
|
||||
|
||||
auto range_end = std::partition_point(range_begin, bounds_.end(),
|
||||
[max](const auto& b){
|
||||
return b.middle() < max;
|
||||
});
|
||||
|
||||
if (range_begin == bounds_.end()) {
|
||||
return { 0, 0 };
|
||||
}
|
||||
return { std::distance(bounds_.begin(), range_begin),
|
||||
std::distance(bounds_.begin(), range_end) };
|
||||
}
|
||||
|
||||
void BoxBoundariesCalculator::add_costs_for_remaining_boxes(
|
||||
CharacterPlaceDecisions& decisions) {
|
||||
|
||||
for (auto& decision : decisions.decisions) {
|
||||
if (decision.end.split_index != 0) {
|
||||
// We don't care about decisions that don't end on a box boundary.
|
||||
continue;
|
||||
}
|
||||
assert(decision.end.index > 0);
|
||||
|
||||
auto unused_boxes = bounds_.size() - decision.end.index;
|
||||
decision.cost += unused_boxes * config_.box_with_no_symbol_cost;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CharacterPlaceDecision>
|
||||
BoxBoundariesCalculator::pick_best_decision_path(
|
||||
std::vector<CharacterPlaceDecisions>& decisions) {
|
||||
|
||||
std::vector<CharacterPlaceDecision> result;
|
||||
result.resize(decisions.size());
|
||||
|
||||
unsigned next_best_decision = get_best_end_decision(decisions.back());
|
||||
for (int i = decisions.size(); i > 0; --i) {
|
||||
int curr_index = i - 1;
|
||||
const auto& curr_decisions = decisions[curr_index];
|
||||
const auto& curr_best_decision = curr_decisions.decisions[next_best_decision];
|
||||
next_best_decision = curr_best_decision.prev_index;
|
||||
|
||||
result[curr_index] = curr_best_decision;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void BoxBoundariesCalculator::fix_decisions_split_count(
|
||||
std::vector<CharacterPlaceDecision>& decisions) {
|
||||
unsigned last_box_index = std::numeric_limits<unsigned>::max();
|
||||
unsigned last_box_split_count = 0;
|
||||
|
||||
auto adjust_index = [&](CharBoundaryByBoxIndex& index) {
|
||||
// The box indexes are always increasing and the last index with nonzero
|
||||
// split_count contains the largest split_count that we must apply to the
|
||||
// rest of indexes with nonzero split_count and the same box index.
|
||||
// Note that we iterate backwards in the loop below, so the order reverses
|
||||
// here.
|
||||
if (index.index == last_box_index) {
|
||||
if (index.split_count != 0) {
|
||||
last_box_split_count = index.split_count;
|
||||
}
|
||||
index.split_count = last_box_split_count;
|
||||
} else {
|
||||
last_box_index = index.index;
|
||||
last_box_split_count = index.split_count;
|
||||
}
|
||||
};
|
||||
|
||||
for (auto it = decisions.rbegin(); it != decisions.rend(); it++) {
|
||||
if (it->has_boxes) {
|
||||
adjust_index(it->end);
|
||||
adjust_index(it->begin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CharacterBoundaries> BoxBoundariesCalculator::decisions_to_results(
|
||||
const std::vector<BoxBoundaries>& symbols,
|
||||
const std::vector<CharacterPlaceDecision>& decisions)
|
||||
{
|
||||
std::vector<CharacterBoundaries> results;
|
||||
results.resize(symbols.size());
|
||||
|
||||
for (int i = decisions.size(); i > 0; --i) {
|
||||
int curr_index = i - 1;
|
||||
const auto& decision = decisions[curr_index];
|
||||
const auto& symbol = symbols[curr_index];
|
||||
|
||||
if (!decision.has_boxes) {
|
||||
results[curr_index] = CharacterBoundaries{symbol.begin, 0, symbol.end, 0};
|
||||
continue;
|
||||
}
|
||||
|
||||
// The result is in terms of boxes that are at least partially assigned to
|
||||
// characters. Decisions store bounds which need adjustment in case of
|
||||
// split boxes.
|
||||
auto begin_index = decision.begin.index;
|
||||
if (decision.begin.split_count > 0) {
|
||||
begin_index--;
|
||||
}
|
||||
|
||||
results[curr_index] = CharacterBoundaries{
|
||||
static_cast<int>(get_box_pos_begin(decision.begin)),
|
||||
begin_index,
|
||||
static_cast<int>(get_box_pos_end(decision.end)),
|
||||
decision.end.index};
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
int BoxBoundariesCalculator::get_best_end_decision(
|
||||
const CharacterPlaceDecisions& decisions) {
|
||||
assert(!decisions.decisions.empty());
|
||||
|
||||
unsigned best_decision = 0;
|
||||
double min_cost = std::numeric_limits<double>::infinity();
|
||||
|
||||
for (unsigned i = 0; i < decisions.decisions.size(); ++i) {
|
||||
const auto& decision = decisions.decisions[i];
|
||||
if (decision.end.split_index != 0)
|
||||
continue;
|
||||
if (decision.cost < min_cost) {
|
||||
best_decision = i;
|
||||
min_cost = decision.cost;
|
||||
}
|
||||
}
|
||||
|
||||
return best_decision;
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
278
src/ccstruct/blob_bounds_calculator.h
Normal file
278
src/ccstruct/blob_bounds_calculator.h
Normal file
@ -0,0 +1,278 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: blob_bounds_calculator.h
|
||||
// Description: Module for calculation of blob bounds from LSTM data
|
||||
// Author: Povilas Kanapickas
|
||||
//
|
||||
// (C) Copyright 2022, Povilas Kanapickas <povilas@radix.lt>
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef TESSERACT_CCSTRUCT_BLOB_BOUNDS_CALCULATOR_H
|
||||
#define TESSERACT_CCSTRUCT_BLOB_BOUNDS_CALCULATOR_H
|
||||
|
||||
#include <iosfwd>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
/* This file contains an implementation of an algorithm for improving character
|
||||
positions when using LSTM models. LSTM model output produces only approximate
|
||||
character positions without boundary data. Matching it to the blobs that
|
||||
comprise the characters is non-trivial task, because the character positions
|
||||
in the LTSM output have drift that is large enough for simple algorithms such
|
||||
"pick nearest blobs" to produce large amounts of errors.
|
||||
|
||||
It can be noticed that while LSTM model output produces only approximate
|
||||
character positions, the regular segmenter is pretty good. Most of the blob
|
||||
boundaries correspond to boundaries of characters and most significant errors
|
||||
are occasional blobs that correspond to multiple characters or multiple blobs
|
||||
that correspond to a single character.
|
||||
|
||||
Thus the basic idea of the algorithm is to treat the output of the regular
|
||||
segmenter as a template to which LSTM model output is matched. The selection
|
||||
of best match is done by assigning each unwanted property a cost and
|
||||
then minimizing the total cost of the solution. The algorithm uses the
|
||||
following costs:
|
||||
|
||||
- cost for merging multiple blobs to represent a character
|
||||
- cost for splitting a blob to represent multiple characters
|
||||
- cost for difference between the positions of the blobs and characters
|
||||
that they are matched to.
|
||||
|
||||
The cost of difference between positions is computed not by simply
|
||||
accumulating the sum of all position differences, but by only taking into
|
||||
account additional difference of each character compared to previous
|
||||
character. This way the algorithm does not attempt to "optimize" out of
|
||||
place characters by adding unneeded blob merges and splits.
|
||||
|
||||
The optimization problem is solved by dynamic programming techniques by
|
||||
noticing that assigning specific blobs to a character leaves us with a
|
||||
slightly smaller problem.
|
||||
|
||||
The approach is to place the first character in all potential positions
|
||||
and record the outcomes. Then for each of these outcomes attempts are made
|
||||
to place the second character at all potential positions and so on.
|
||||
Whenever there are multiple decision paths to arrive to a situation when the
|
||||
end of a specific character is at the same position, the path with the
|
||||
lowest cost is picked and others are ignored.
|
||||
*/
|
||||
|
||||
// Represents a character boundary in terms of index of a box in a list and
|
||||
// potentially partition within that box.
|
||||
struct CharBoundaryByBoxIndex {
|
||||
// The index of the box following the boundary.
|
||||
unsigned index = 0;
|
||||
|
||||
// The location of the boundary within the box. split_count == 0 means that
|
||||
// the boundary is just before the box. Otherwise, the location is
|
||||
// (split_index / split_count) position within the preceding box.
|
||||
unsigned split_index = 0;
|
||||
unsigned split_count = 0;
|
||||
|
||||
bool operator==(const CharBoundaryByBoxIndex& other) const {
|
||||
return index == other.index &&
|
||||
split_index == other.split_index &&
|
||||
split_count == other.split_count;
|
||||
}
|
||||
|
||||
bool operator!=(const CharBoundaryByBoxIndex& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const CharBoundaryByBoxIndex& d);
|
||||
|
||||
|
||||
// Represents a placement of a specific character at specific location.
|
||||
struct CharacterPlaceDecision {
|
||||
// Index of the placement decision of the previous character.
|
||||
unsigned prev_index;
|
||||
// Whether the character had any boxes assigned to it. If not, then the
|
||||
// data stored in `begin` in not defined.
|
||||
bool has_boxes = false;
|
||||
// Placement of the start of a character in the input box list.
|
||||
CharBoundaryByBoxIndex begin;
|
||||
// Placement of the end of a character in the input box list.
|
||||
CharBoundaryByBoxIndex end;
|
||||
// The difference of positions between the center of the previous character
|
||||
// and the center of the assigned boxes
|
||||
double prev_pos_diff = 0;
|
||||
// The cost incurred so far
|
||||
double cost = 0;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const CharacterPlaceDecision& d);
|
||||
|
||||
|
||||
// Represents a set of placement decisions for a specific character
|
||||
struct CharacterPlaceDecisions {
|
||||
std::vector<CharacterPlaceDecision> decisions;
|
||||
// minimum cost across all decisions
|
||||
double min_cost = std::numeric_limits<double>::infinity();
|
||||
|
||||
// Adds a character placement decision.
|
||||
void add_place(unsigned prev_index, bool has_boxes,
|
||||
CharBoundaryByBoxIndex begin, CharBoundaryByBoxIndex end,
|
||||
double prev_pos_diff, double cost, double max_cost_diff);
|
||||
};
|
||||
|
||||
// Represents bounds of a box in X direction
|
||||
struct BoxBoundaries {
|
||||
int begin = 0;
|
||||
int end = 0;
|
||||
|
||||
double middle() const { return (double(begin) + end) / 2; }
|
||||
};
|
||||
|
||||
|
||||
// Represents resulting character boundaries. The exact X positions are
|
||||
// provided as well as which input blobs the character corresponds to, which
|
||||
// allows computing correct boundaries in the Y axis.
|
||||
struct CharacterBoundaries {
|
||||
int begin_x = 0;
|
||||
|
||||
// Inclusive index of the beginning box.
|
||||
unsigned begin_box_index = 0;
|
||||
|
||||
int end_x = 0;
|
||||
|
||||
// Exclusive index of the ending box. If box data is invalid,
|
||||
// begin_box_index == end_box_index
|
||||
unsigned end_box_index = 0;
|
||||
|
||||
bool operator==(const CharacterBoundaries& other) const;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const CharacterBoundaries& bounds);
|
||||
|
||||
|
||||
struct BoxBoundariesCalculatorConfig
|
||||
{
|
||||
// The cost of each merging of two input boxes.
|
||||
double merge_cost = 2;
|
||||
|
||||
// The cost of each split of two input boxes.
|
||||
double split_cost = 2;
|
||||
|
||||
// The cost of each box that is not attributed to any symbol
|
||||
double box_with_no_symbol_cost = 2.2;
|
||||
|
||||
// The cost of each symbol that has no boxes
|
||||
double symbol_with_no_box_cost = 2.2;
|
||||
|
||||
// The cost of difference between the center the symbol and the center of
|
||||
// the input box. This cost is only incurred whenever subsequent character
|
||||
// "moves" in wrong direction. The total cost is computed by multiplying
|
||||
// the multiplier and the difference of positions relative to the average
|
||||
// width of input boxes.
|
||||
double pos_diff_cost = 1;
|
||||
|
||||
// Defines which boxes to potentially consider for symbol. The number is
|
||||
// relative to the average width of input boxes.
|
||||
double max_pos_diff = 2;
|
||||
|
||||
// Defines the maximum difference between minimum and maximum cost for all
|
||||
// placements of a character.
|
||||
double max_character_cost_diff = 5;
|
||||
};
|
||||
|
||||
// See the description of the algorithm at the top of the file.
|
||||
class BoxBoundariesCalculator {
|
||||
public:
|
||||
// Constructs the calculator for blob boundaries computed by regular
|
||||
// segmenter.
|
||||
BoxBoundariesCalculator(const std::vector<BoxBoundaries>& bounds,
|
||||
const BoxBoundariesCalculatorConfig& config);
|
||||
|
||||
// Computes improved character positions given LSTM model output. For the
|
||||
// purposes of character positioning only the center coordinate is used.
|
||||
// The start and end coordinates are used only as a fallback when the data
|
||||
// does not match any input blobs.
|
||||
std::vector<CharacterBoundaries>
|
||||
calculate_bounds(const std::vector<BoxBoundaries>& symbols);
|
||||
|
||||
private:
|
||||
|
||||
// This function takes all possible combinations of box boundaries between
|
||||
// start_bound and symbol_max_box, computes the costs of each option and adds
|
||||
// them to next_decisions array. The number of possibilities is approximately
|
||||
// (symbol_max_box - start_bound.index) * 2. The number is twice the number
|
||||
// of available boxes in range because we may want to split each box with
|
||||
// subsequent symbol.
|
||||
void try_decisions_from_prev_decision(CharacterPlaceDecisions& next_decisions,
|
||||
unsigned prev_decision_index,
|
||||
CharBoundaryByBoxIndex start_bound,
|
||||
double prev_decision_pos_diff,
|
||||
double prev_decision_cost,
|
||||
const BoxBoundaries& symbol,
|
||||
unsigned symbol_max_box);
|
||||
|
||||
void try_decision_from_prev_decision(CharacterPlaceDecisions& next_decisions,
|
||||
unsigned prev_decision_index,
|
||||
CharBoundaryByBoxIndex start_bound,
|
||||
CharBoundaryByBoxIndex end_bound,
|
||||
double prev_decision_pos_diff,
|
||||
double prev_decision_cost,
|
||||
const BoxBoundaries& symbol);
|
||||
|
||||
double get_box_pos_begin(CharBoundaryByBoxIndex bound);
|
||||
double get_box_pos_end(CharBoundaryByBoxIndex bound);
|
||||
|
||||
double get_box_split_pos(const BoxBoundaries& b, unsigned split_index,
|
||||
unsigned split_count)
|
||||
{
|
||||
return b.begin + (b.end - b.begin) * double(split_index) / split_count;
|
||||
}
|
||||
|
||||
static int farthest_decision_index(const CharacterPlaceDecisions& decisions);
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
possible_boxes_for_symbol(const BoxBoundaries& symbol);
|
||||
|
||||
|
||||
// Goes through the decisions and adds costs for all boxes that have not
|
||||
// been added to a symbol.
|
||||
void add_costs_for_remaining_boxes(CharacterPlaceDecisions& decisions);
|
||||
|
||||
// Goes through the final decisions and picks full path of the best placement
|
||||
// decision.
|
||||
std::vector<CharacterPlaceDecision> pick_best_decision_path(
|
||||
std::vector<CharacterPlaceDecisions>& decisions);
|
||||
|
||||
// When constructing decisions we didn't care to update split sizes of
|
||||
// blobs when splitting more than once. As a result, splitting a blob into 4
|
||||
// parts splits at 0.5, 0.66 and 0.75 of the blob whereas the correct
|
||||
// splits are at 0.25, 0.5, 0.75. We assume this does not matter when
|
||||
// computing the costs, but for positions of the characters we need to
|
||||
// produce exact results.
|
||||
void fix_decisions_split_count(std::vector<CharacterPlaceDecision>& decisions);
|
||||
|
||||
std::vector<CharacterBoundaries>
|
||||
decisions_to_results(const std::vector<BoxBoundaries>& symbols,
|
||||
const std::vector<CharacterPlaceDecision>& decisions);
|
||||
|
||||
// Finds the best decision from the final decisions. The best decision is
|
||||
// such that it has minimum cost among decisions that end at an proper box
|
||||
// boundary.
|
||||
static int get_best_end_decision(const CharacterPlaceDecisions& decisions);
|
||||
|
||||
private:
|
||||
std::vector<BoxBoundaries> bounds_;
|
||||
BoxBoundariesCalculatorConfig config_;
|
||||
double average_box_width_ = 0;
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
||||
#endif // TESSERACT_CCSTRUCT_BLOB_BOUNDS_CALCULATOR_H
|
@ -24,6 +24,7 @@
|
||||
#include "pageres.h"
|
||||
|
||||
#include "blamer.h" // for BlamerBundle
|
||||
#include "blob_bounds_calculator.h" // for BoxBoundariesCalculator
|
||||
#include "blobs.h" // for TWERD, TBLOB
|
||||
#include "boxword.h" // for BoxWord
|
||||
#include "errcode.h" // for ASSERT_HOST
|
||||
@ -1273,36 +1274,6 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res,
|
||||
return new_res;
|
||||
}
|
||||
|
||||
// Helper computes the boundaries between blobs in the word. The blob bounds
|
||||
// are likely very poor, if they come from LSTM, where it only outputs the
|
||||
// character at one pixel within it, so we find the midpoints between them.
|
||||
static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
|
||||
C_BLOB_LIST *next_word_blobs,
|
||||
std::vector<int> *blob_ends) {
|
||||
C_BLOB_IT blob_it(word.word->cblob_list());
|
||||
for (int length : word.best_state) {
|
||||
// Get the bounding box of the fake blobs
|
||||
TBOX blob_box = blob_it.data()->bounding_box();
|
||||
blob_it.forward();
|
||||
for (int b = 1; b < length; ++b) {
|
||||
blob_box += blob_it.data()->bounding_box();
|
||||
blob_it.forward();
|
||||
}
|
||||
// This blob_box is crap, so for now we are only looking for the
|
||||
// boundaries between them.
|
||||
int blob_end = INT32_MAX;
|
||||
if (!blob_it.at_first() || next_word_blobs != nullptr) {
|
||||
if (blob_it.at_first()) {
|
||||
blob_it.set_to_list(next_word_blobs);
|
||||
}
|
||||
blob_end = (blob_box.right() + blob_it.data()->bounding_box().left()) / 2;
|
||||
}
|
||||
blob_end = ClipToRange<int>(blob_end, clip_box.left(), clip_box.right());
|
||||
blob_ends->push_back(blob_end);
|
||||
}
|
||||
blob_ends->back() = clip_box.right();
|
||||
}
|
||||
|
||||
// Helper computes the bounds of a word by restricting it to existing words
|
||||
// that significantly overlap.
|
||||
static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words,
|
||||
@ -1349,11 +1320,45 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words,
|
||||
return clipped_box;
|
||||
}
|
||||
|
||||
// Helper moves the blob from src to dest. If it isn't contained by clip_box,
|
||||
// the blob is replaced by a fake that is contained.
|
||||
static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it,
|
||||
const TBOX &clip_box) {
|
||||
C_BLOB *src_blob = src_it->extract();
|
||||
// Helper to compute input for BoxBoundariesCalculator
|
||||
static std::vector<BoxBoundaries> ComputeFakeWordBlobXBounds(
|
||||
const PointerVector<WERD_RES> &words) {
|
||||
|
||||
std::vector<BoxBoundaries> result;
|
||||
|
||||
for (size_t w = 0; w < words.size(); ++w) {
|
||||
WERD_RES *word_w = words[w];
|
||||
|
||||
C_BLOB_IT blob_it(word_w->word->cblob_list());
|
||||
for (int length : word_w->best_state) {
|
||||
TBOX blob_box = blob_it.data()->bounding_box();
|
||||
blob_it.forward();
|
||||
for (int b = 1; b < length; ++b) {
|
||||
blob_box += blob_it.data()->bounding_box();
|
||||
blob_it.forward();
|
||||
}
|
||||
result.push_back({blob_box.left(), blob_box.right()});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper to compute input for BoxBoundariesCalculator
|
||||
static std::vector<BoxBoundaries> ComputeBlobXBoundsFromTBOX(
|
||||
const std::vector<TBOX> &boxes) {
|
||||
std::vector<BoxBoundaries> result;
|
||||
result.reserve(boxes.size());
|
||||
for (const auto& box : boxes) {
|
||||
result.push_back({box.left(), box.right()});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper moves the src_blob to dest. If it isn't contained by clip_box,
|
||||
// the blob is replaced by a fake that is contained. The helper takes ownership
|
||||
// of the blob.
|
||||
static TBOX ClipAndAddBlob(C_BLOB *src_blob, C_BLOB_IT *dest_it,
|
||||
const TBOX &clip_box) {
|
||||
TBOX box = src_blob->bounding_box();
|
||||
if (!clip_box.contains(box)) {
|
||||
int left =
|
||||
@ -1372,6 +1377,13 @@ static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it,
|
||||
return box;
|
||||
}
|
||||
|
||||
// Helper to clip a box only in X direction
|
||||
static TBOX ClipBoxX(const TBOX &box, int left, int right) {
|
||||
int clip_left = ClipToRange<int>(box.left(), left, right - 1);
|
||||
int clip_right = ClipToRange<int>(box.right(), left + 1, right);
|
||||
return TBOX(clip_left, box.bottom(), clip_right, box.top());
|
||||
}
|
||||
|
||||
// Replaces the current WERD/WERD_RES with the given words. The given words
|
||||
// contain fake blobs that indicate the position of the characters. These are
|
||||
// replaced with real blobs from the current word as much as possible.
|
||||
@ -1416,21 +1428,31 @@ void PAGE_RES_IT::ReplaceCurrentWord(
|
||||
}
|
||||
}
|
||||
ASSERT_HOST(!wr_it.cycled_list());
|
||||
// Since we only have an estimate of the bounds between blobs, use the blob
|
||||
// x-middle as the determiner of where to put the blobs
|
||||
|
||||
std::vector<TBOX> blob_boxes;
|
||||
|
||||
C_BLOB_IT src_b_it(input_word->word->cblob_list());
|
||||
src_b_it.sort(&C_BLOB::SortByXMiddle);
|
||||
for (src_b_it.mark_cycle_pt(); !src_b_it.cycled_list(); src_b_it.forward()) {
|
||||
blob_boxes.push_back(src_b_it.data()->bounding_box());
|
||||
}
|
||||
src_b_it.move_to_first();
|
||||
|
||||
C_BLOB_IT rej_b_it(input_word->word->rej_cblob_list());
|
||||
rej_b_it.sort(&C_BLOB::SortByXMiddle);
|
||||
|
||||
auto fake_blob_bounds = ComputeFakeWordBlobXBounds(*words);
|
||||
BoxBoundariesCalculator calculator{ComputeBlobXBoundsFromTBOX(blob_boxes), {}};
|
||||
auto char_bounds = calculator.calculate_bounds(fake_blob_bounds);
|
||||
size_t char_bounds_i = 0;
|
||||
size_t box_bounds_i = 0;
|
||||
TBOX last_blob_box;
|
||||
|
||||
TBOX clip_box;
|
||||
for (size_t w = 0; w < words->size(); ++w) {
|
||||
WERD_RES *word_w = (*words)[w];
|
||||
clip_box = ComputeWordBounds(*words, w, clip_box, wr_it_of_current_word);
|
||||
// Compute blob boundaries.
|
||||
std::vector<int> blob_ends;
|
||||
C_BLOB_LIST *next_word_blobs =
|
||||
w + 1 < words->size() ? (*words)[w + 1]->word->cblob_list() : nullptr;
|
||||
ComputeBlobEnds(*word_w, clip_box, next_word_blobs, &blob_ends);
|
||||
|
||||
// Remove the fake blobs on the current word, but keep safe for back-up if
|
||||
// no blob can be found.
|
||||
C_BLOB_LIST fake_blobs;
|
||||
@ -1441,26 +1463,64 @@ void PAGE_RES_IT::ReplaceCurrentWord(
|
||||
C_BLOB_IT dest_it(word_w->word->cblob_list());
|
||||
// Build the box word as we move the blobs.
|
||||
auto *box_word = new tesseract::BoxWord;
|
||||
for (size_t i = 0; i < blob_ends.size(); ++i, fake_b_it.forward()) {
|
||||
int end_x = blob_ends[i];
|
||||
|
||||
for (size_t i = 0; i < word_w->best_state.size(); ++i) {
|
||||
const auto& char_bound = char_bounds[char_bounds_i++];
|
||||
|
||||
TBOX blob_box;
|
||||
// Add the blobs up to end_x.
|
||||
while (!src_b_it.empty() &&
|
||||
src_b_it.data()->bounding_box().x_middle() < end_x) {
|
||||
blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
|
||||
src_b_it.forward();
|
||||
if (char_bound.begin_box_index != char_bound.end_box_index) {
|
||||
// The box indices in curr_char_bound will always be increasing, thus
|
||||
// we can iterate src_b_it in the same order.
|
||||
while (box_bounds_i < char_bound.begin_box_index) {
|
||||
box_bounds_i++;
|
||||
src_b_it.forward();
|
||||
}
|
||||
|
||||
if (box_bounds_i > char_bound.begin_box_index) {
|
||||
// The blob was split across multiple characters and has already
|
||||
// been extracted for a previous character. We have the bounds
|
||||
// of the blob and can create a fake blob out of it.
|
||||
TBOX fake_box = ClipBoxX(last_blob_box,
|
||||
char_bound.begin_x, char_bound.end_x);
|
||||
blob_box += ClipAndAddBlob(C_BLOB::FakeBlob(fake_box),
|
||||
&dest_it, clip_box);
|
||||
}
|
||||
|
||||
// Add all blobs that have not yet been assigned to any of the
|
||||
// characters.
|
||||
while (box_bounds_i < char_bound.end_box_index) {
|
||||
auto* src_blob = src_b_it.extract();
|
||||
last_blob_box = src_blob->bounding_box();
|
||||
TBOX inserted_box = ClipAndAddBlob(src_blob, &dest_it, clip_box);
|
||||
|
||||
box_bounds_i++;
|
||||
src_b_it.forward();
|
||||
|
||||
// Note that the blob may be split across multiple characters in
|
||||
// which case we want to clip the box to the part that was "assigned"
|
||||
// to the character.
|
||||
blob_box += ClipBoxX(inserted_box,
|
||||
char_bound.begin_x, char_bound.end_x);
|
||||
}
|
||||
}
|
||||
|
||||
// It's not clear where rejected blobs should be added because by
|
||||
// definition we don't have enough information about them. So we just
|
||||
// add them to whatever character follows.
|
||||
while (!rej_b_it.empty() &&
|
||||
rej_b_it.data()->bounding_box().x_middle() < end_x) {
|
||||
blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
|
||||
rej_b_it.data()->bounding_box().x_middle() < char_bound.end_x) {
|
||||
blob_box += ClipAndAddBlob(rej_b_it.extract(), &dest_it, clip_box);
|
||||
rej_b_it.forward();
|
||||
}
|
||||
|
||||
if (blob_box.null_box()) {
|
||||
// Use the original box as a back-up.
|
||||
blob_box = MoveAndClipBlob(&fake_b_it, &dest_it, clip_box);
|
||||
blob_box = ClipAndAddBlob(fake_b_it.extract(), &dest_it, clip_box);
|
||||
}
|
||||
box_word->InsertBox(i, blob_box);
|
||||
fake_b_it.forward();
|
||||
}
|
||||
|
||||
delete word_w->box_word;
|
||||
word_w->box_word = box_word;
|
||||
if (!input_word->combination) {
|
||||
|
197
unittest/blob_bounds_calculator_test.cc
Normal file
197
unittest/blob_bounds_calculator_test.cc
Normal file
@ -0,0 +1,197 @@
|
||||
// (C) Copyright 2022, Povilas Kanapickas <povilas@radix.lt>.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "blob_bounds_calculator.h"
|
||||
|
||||
#include "include_gunit.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
namespace {
|
||||
|
||||
BoxBoundariesCalculatorConfig get_default_config() {
|
||||
BoxBoundariesCalculatorConfig config;
|
||||
config.merge_cost = 1;
|
||||
config.split_cost = 1;
|
||||
config.pos_diff_cost = 1;
|
||||
config.max_pos_diff = 2;
|
||||
config.box_with_no_symbol_cost = 2;
|
||||
config.symbol_with_no_box_cost = 2;
|
||||
return config;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, MatchesExactly) {
|
||||
BoxBoundariesCalculator calc{{{10, 20}, {21, 30}, {31, 40}, {41, 50}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{10, 0, 20, 1},
|
||||
{21, 1, 30, 2},
|
||||
{31, 2, 40, 3},
|
||||
{41, 3, 50, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, OneMergedInMiddle) {
|
||||
BoxBoundariesCalculator calc{{{10, 20}, {21, 40}, {41, 50}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{10, 0, 20, 1},
|
||||
{21, 1, 30, 2},
|
||||
{30, 1, 40, 2},
|
||||
{41, 2, 50, 3}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, OneSplit) {
|
||||
BoxBoundariesCalculator calc{{{10, 20}, {21, 25}, {26, 30}, {31, 40}, {41, 50}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{10, 0, 20, 1},
|
||||
{21, 1, 30, 3},
|
||||
{31, 3, 40, 4},
|
||||
{41, 4, 50, 5}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, ManySplitAtEnd) {
|
||||
BoxBoundariesCalculator calc{
|
||||
{
|
||||
{10, 20}, {21, 30}, {31, 40}, {41, 50}, {51, 60}, {61, 70}
|
||||
},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{10, 0, 20, 1},
|
||||
{21, 1, 30, 2},
|
||||
{31, 2, 40, 3},
|
||||
{41, 3, 70, 6}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, ShiftedSymbolPositionsForward) {
|
||||
BoxBoundariesCalculator calc{{{10, 20}, {21, 30}, {31, 40}, {41, 50}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{10, 0, 20, 1},
|
||||
{21, 1, 30, 2},
|
||||
{31, 2, 40, 3},
|
||||
{41, 3, 50, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{15, 25}, {25, 35}, {35, 45}, {45, 55}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, VeryShiftedSymbolPositionsForward) {
|
||||
BoxBoundariesCalculator calc{{{10, 20}, {21, 30}, {31, 40}, {41, 50}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{10, 0, 20, 1},
|
||||
{21, 1, 30, 2},
|
||||
{31, 2, 40, 3},
|
||||
{41, 3, 50, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{25, 35}, {35, 45}, {45, 55}, {55, 65}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, ShiftedSymbolPositionsBackward) {
|
||||
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{110, 0, 120, 1},
|
||||
{121, 1, 130, 2},
|
||||
{131, 2, 140, 3},
|
||||
{141, 3, 150, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{105, 115}, {115, 125}, {125, 135}, {135, 145}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, VeryShiftedSymbolPositionsBackward) {
|
||||
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{110, 0, 120, 1},
|
||||
{121, 1, 130, 2},
|
||||
{131, 2, 140, 3},
|
||||
{141, 3, 150, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{95, 105}, {105, 115}, {115, 125}, {125, 135}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, HoleInMiddle) {
|
||||
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{110, 0, 120, 1},
|
||||
{121, 1, 130, 2},
|
||||
{131, 2, 140, 3},
|
||||
{141, 3, 150, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{105, 115}, {115, 125}, {135, 145}, {145, 155}
|
||||
}));
|
||||
}
|
||||
|
||||
TEST(BoxBoundariesCalculatorTest, LargeHoleInMiddle) {
|
||||
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
||||
get_default_config()};
|
||||
|
||||
std::vector<CharacterBoundaries> expected = {
|
||||
{110, 0, 120, 1},
|
||||
{121, 1, 130, 2},
|
||||
{131, 2, 140, 3},
|
||||
{141, 3, 150, 4}
|
||||
};
|
||||
|
||||
ASSERT_EQ(expected, calc.calculate_bounds({
|
||||
{95, 105}, {105, 115}, {145, 155}, {155, 165}
|
||||
}));
|
||||
}
|
||||
|
||||
} // namespace tesseract
|
Loading…
Reference in New Issue
Block a user