mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-06-13 06:08:52 +08:00

When using LSTM models the accuracy of character bounding boxes is low with many blobs assigned to wrong characters. This is caused by the fact that LSTM model output produces only approximate character positions without boundary data. As a result the input blobs cannot be accurately mapped to characters and which compromises the accuracy of character bounding boxes. Current this problem is solved as follows. The character boundaries are computed according to the character positions from the LSTM output by placing the boundaries at the middle between two character positions. The blobs are then assigned according to which character the center of the blob falls to. In other words the blobs are assigned to the nearest characters. This unfortunately produces a lot of errors because the character positions in the LSTM output have a tendency to drift, thus the nearest character is often not the right one. Fortunately while the LSTM model produces approximate positions, the blob boundaries produced by the regular segmenter are pretty good. Most of the time a single blob corresponds to a single character and vice-versa. The above is used to create an optimization algorithm that treats the output of the regular segmenter as a template to which LSTM model output is matched. The selection of best match is done by assigning each unwanted property of the outcome a cost and then minimizing the total cost of the solution. This reliably solves the most frequent error present in the current solution when blobs are simply assigned to wrong character. As a result the current algorithm produces up to 20 times less errors. Fixes https://github.com/tesseract-ocr/tesseract/issues/1712.
198 lines
5.2 KiB
C++
198 lines
5.2 KiB
C++
// (C) Copyright 2022, Povilas Kanapickas <povilas@radix.lt>.
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
|
|
#include "blob_bounds_calculator.h"
|
|
|
|
#include "include_gunit.h"
|
|
|
|
namespace tesseract {
|
|
|
|
namespace {
|
|
|
|
BoxBoundariesCalculatorConfig get_default_config() {
|
|
BoxBoundariesCalculatorConfig config;
|
|
config.merge_cost = 1;
|
|
config.split_cost = 1;
|
|
config.pos_diff_cost = 1;
|
|
config.max_pos_diff = 2;
|
|
config.box_with_no_symbol_cost = 2;
|
|
config.symbol_with_no_box_cost = 2;
|
|
return config;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
TEST(BoxBoundariesCalculatorTest, MatchesExactly) {
|
|
BoxBoundariesCalculator calc{{{10, 20}, {21, 30}, {31, 40}, {41, 50}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{10, 0, 20, 1},
|
|
{21, 1, 30, 2},
|
|
{31, 2, 40, 3},
|
|
{41, 3, 50, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, OneMergedInMiddle) {
|
|
BoxBoundariesCalculator calc{{{10, 20}, {21, 40}, {41, 50}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{10, 0, 20, 1},
|
|
{21, 1, 30, 2},
|
|
{30, 1, 40, 2},
|
|
{41, 2, 50, 3}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, OneSplit) {
|
|
BoxBoundariesCalculator calc{{{10, 20}, {21, 25}, {26, 30}, {31, 40}, {41, 50}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{10, 0, 20, 1},
|
|
{21, 1, 30, 3},
|
|
{31, 3, 40, 4},
|
|
{41, 4, 50, 5}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, ManySplitAtEnd) {
|
|
BoxBoundariesCalculator calc{
|
|
{
|
|
{10, 20}, {21, 30}, {31, 40}, {41, 50}, {51, 60}, {61, 70}
|
|
},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{10, 0, 20, 1},
|
|
{21, 1, 30, 2},
|
|
{31, 2, 40, 3},
|
|
{41, 3, 70, 6}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{10, 20}, {20, 30}, {30, 40}, {40, 50}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, ShiftedSymbolPositionsForward) {
|
|
BoxBoundariesCalculator calc{{{10, 20}, {21, 30}, {31, 40}, {41, 50}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{10, 0, 20, 1},
|
|
{21, 1, 30, 2},
|
|
{31, 2, 40, 3},
|
|
{41, 3, 50, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{15, 25}, {25, 35}, {35, 45}, {45, 55}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, VeryShiftedSymbolPositionsForward) {
|
|
BoxBoundariesCalculator calc{{{10, 20}, {21, 30}, {31, 40}, {41, 50}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{10, 0, 20, 1},
|
|
{21, 1, 30, 2},
|
|
{31, 2, 40, 3},
|
|
{41, 3, 50, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{25, 35}, {35, 45}, {45, 55}, {55, 65}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, ShiftedSymbolPositionsBackward) {
|
|
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{110, 0, 120, 1},
|
|
{121, 1, 130, 2},
|
|
{131, 2, 140, 3},
|
|
{141, 3, 150, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{105, 115}, {115, 125}, {125, 135}, {135, 145}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, VeryShiftedSymbolPositionsBackward) {
|
|
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{110, 0, 120, 1},
|
|
{121, 1, 130, 2},
|
|
{131, 2, 140, 3},
|
|
{141, 3, 150, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{95, 105}, {105, 115}, {115, 125}, {125, 135}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, HoleInMiddle) {
|
|
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{110, 0, 120, 1},
|
|
{121, 1, 130, 2},
|
|
{131, 2, 140, 3},
|
|
{141, 3, 150, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{105, 115}, {115, 125}, {135, 145}, {145, 155}
|
|
}));
|
|
}
|
|
|
|
TEST(BoxBoundariesCalculatorTest, LargeHoleInMiddle) {
|
|
BoxBoundariesCalculator calc{{{110, 120}, {121, 130}, {131, 140}, {141, 150}},
|
|
get_default_config()};
|
|
|
|
std::vector<CharacterBoundaries> expected = {
|
|
{110, 0, 120, 1},
|
|
{121, 1, 130, 2},
|
|
{131, 2, 140, 3},
|
|
{141, 3, 150, 4}
|
|
};
|
|
|
|
ASSERT_EQ(expected, calc.calculate_bounds({
|
|
{95, 105}, {105, 115}, {145, 155}, {155, 165}
|
|
}));
|
|
}
|
|
|
|
} // namespace tesseract
|