mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-30 15:39:05 +08:00
Optimize performance with clang-tidy
The code was partially formatted with clang-format and optimized with clang-tidy --checks="-*,perfor*" --fix src/*/*.cpp Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
e5011c545a
commit
d8d63fd71b
@ -37,7 +37,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
|
||||
tesseract::WritingDirection writing_direction;
|
||||
tesseract::TextlineOrder textline_order;
|
||||
float deskew_angle;
|
||||
it->Orientation(&orientation, &writing_direction, &textline_order, &deskew_angle);
|
||||
it->Orientation(&orientation, &writing_direction, &textline_order,
|
||||
&deskew_angle);
|
||||
return orientation;
|
||||
}
|
||||
|
||||
@ -49,7 +50,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
|
||||
* method currently only inserts a 'textangle' property to indicate the rotation
|
||||
* direction and does not add any baseline information to the hocr string.
|
||||
*/
|
||||
static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel level,
|
||||
static void AddBaselineCoordsTohOCR(const PageIterator *it,
|
||||
PageIteratorLevel level,
|
||||
std::stringstream &hocr_str) {
|
||||
tesseract::Orientation orientation = GetBlockTextOrientation(it);
|
||||
if (orientation != ORIENTATION_PAGE_UP) {
|
||||
@ -82,7 +84,8 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel le
|
||||
double p1 = (y2 - y1) / static_cast<double>(x2 - x1);
|
||||
double p0 = y1 - p1 * x1;
|
||||
|
||||
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " " << round(p0 * 1000.0) / 1000.0;
|
||||
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " "
|
||||
<< round(p0 * 1000.0) / 1000.0;
|
||||
}
|
||||
|
||||
static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
|
||||
@ -91,7 +94,8 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
|
||||
it->BoundingBox(level, &left, &top, &right, &bottom);
|
||||
// This is the only place we use double quotes instead of single quotes,
|
||||
// but it may too late to change for consistency
|
||||
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " " << bottom;
|
||||
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " "
|
||||
<< bottom;
|
||||
// Add baseline coordinates & heights for textlines only.
|
||||
if (level == RIL_TEXTLINE) {
|
||||
AddBaselineCoordsTohOCR(it, level, hocr_str);
|
||||
@ -99,8 +103,8 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
|
||||
float row_height, descenders, ascenders; // row attributes
|
||||
it->RowAttributes(&row_height, &descenders, &ascenders);
|
||||
// TODO(rays): Do we want to limit these to a single decimal place?
|
||||
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders << "; x_ascenders "
|
||||
<< ascenders;
|
||||
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders
|
||||
<< "; x_ascenders " << ascenders;
|
||||
}
|
||||
hocr_str << "\">";
|
||||
}
|
||||
@ -128,7 +132,8 @@ char *TessBaseAPI::GetHOCRText(int page_number) {
|
||||
* Returned string must be freed with the delete [] operator.
|
||||
*/
|
||||
char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
|
||||
if (tesseract_ == nullptr ||
|
||||
(page_res_ == nullptr && Recognize(monitor) < 0)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -147,13 +152,16 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
|
||||
#ifdef _WIN32
|
||||
// convert input name from ANSI encoding to utf-8
|
||||
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
|
||||
int str16_len =
|
||||
MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
|
||||
wchar_t *uni16_str = new WCHAR[str16_len];
|
||||
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
|
||||
int utf8_len =
|
||||
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
|
||||
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str,
|
||||
str16_len);
|
||||
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
|
||||
0, nullptr, nullptr);
|
||||
char *utf8_str = new char[utf8_len];
|
||||
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
|
||||
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
|
||||
nullptr, nullptr);
|
||||
input_file_ = utf8_str;
|
||||
delete[] uni16_str;
|
||||
delete[] utf8_str;
|
||||
@ -174,8 +182,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
hocr_str << "unknown";
|
||||
}
|
||||
|
||||
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " << rect_width_ << " "
|
||||
<< rect_height_ << "; ppageno " << page_number
|
||||
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
|
||||
<< rect_width_ << " " << rect_height_ << "; ppageno " << page_number
|
||||
<< "; scan_res " << GetSourceYResolution() << " "
|
||||
<< GetSourceYResolution() << "'>\n";
|
||||
|
||||
@ -230,7 +238,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
|
||||
// Now, process the word...
|
||||
int32_t lstm_choice_mode = tesseract_->lstm_choice_mode;
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *rawTimestepMap = nullptr;
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*rawTimestepMap = nullptr;
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *CTCMap = nullptr;
|
||||
if (lstm_choice_mode) {
|
||||
CTCMap = res_it->GetBestLSTMSymbolChoices();
|
||||
@ -244,10 +253,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
int pointsize, font_id;
|
||||
const char *font_name;
|
||||
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
|
||||
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
|
||||
&smallcaps, &pointsize, &font_id);
|
||||
hocr_str << " title='bbox " << left << " " << top << " " << right << " " << bottom
|
||||
<< "; x_wconf " << static_cast<int>(res_it->Confidence(RIL_WORD));
|
||||
font_name =
|
||||
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
|
||||
&serif, &smallcaps, &pointsize, &font_id);
|
||||
hocr_str << " title='bbox " << left << " " << top << " " << right << " "
|
||||
<< bottom << "; x_wconf "
|
||||
<< static_cast<int>(res_it->Confidence(RIL_WORD));
|
||||
if (font_info) {
|
||||
if (font_name) {
|
||||
hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
|
||||
@ -287,31 +298,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
hocr_str << "<em>";
|
||||
}
|
||||
do {
|
||||
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
|
||||
const std::unique_ptr<const char[]> grapheme(
|
||||
res_it->GetUTF8Text(RIL_SYMBOL));
|
||||
if (grapheme && grapheme[0] != 0) {
|
||||
if (hocr_boxes) {
|
||||
res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
|
||||
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes " << left << " " << top
|
||||
<< " " << right << " " << bottom << "; x_conf " << res_it->Confidence(RIL_SYMBOL)
|
||||
<< "'>";
|
||||
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
|
||||
<< left << " " << top << " " << right << " " << bottom
|
||||
<< "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
|
||||
}
|
||||
hocr_str << HOcrEscape(grapheme.get()).c_str();
|
||||
if (hocr_boxes) {
|
||||
hocr_str << "</span>";
|
||||
tesseract::ChoiceIterator ci(*res_it);
|
||||
if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) {
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *symbol = ci.Timesteps();
|
||||
std::vector<std::vector<std::pair<const char *, float>>> *symbol =
|
||||
ci.Timesteps();
|
||||
hocr_str << "\n <span class='ocr_symbol'"
|
||||
<< " id='"
|
||||
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
|
||||
for (auto timestep : *symbol) {
|
||||
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt
|
||||
<< "'>";
|
||||
for (const auto ×tep : *symbol) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
|
||||
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
|
||||
<< "'>";
|
||||
for (auto conf : timestep) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
|
||||
<< "'"
|
||||
<< " title='x_confs " << int(conf.second * 100) << "'>"
|
||||
<< HOcrEscape(conf.first).c_str() << "</span>";
|
||||
++ccnt;
|
||||
@ -324,16 +340,18 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
} else if (lstm_choice_mode == 2) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
|
||||
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
|
||||
<< "'>";
|
||||
do {
|
||||
const char *choice = ci.GetUTF8Text();
|
||||
float choiceconf = ci.Confidence();
|
||||
if (choice != nullptr) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
|
||||
<< " title='x_confs " << choiceconf << "'>" << HOcrEscape(choice).c_str()
|
||||
<< "</span>";
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
|
||||
<< "'"
|
||||
<< " title='x_confs " << choiceconf << "'>"
|
||||
<< HOcrEscape(choice).c_str() << "</span>";
|
||||
ccnt++;
|
||||
}
|
||||
} while (ci.Next());
|
||||
@ -352,18 +370,20 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
}
|
||||
// If the lstm choice mode is required it is added here
|
||||
if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
|
||||
for (auto symbol : *rawTimestepMap) {
|
||||
for (const auto &symbol : *rawTimestepMap) {
|
||||
hocr_str << "\n <span class='ocr_symbol'"
|
||||
<< " id='"
|
||||
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
|
||||
for (auto timestep : symbol) {
|
||||
for (const auto ×tep : symbol) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
|
||||
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
|
||||
<< "'>";
|
||||
for (auto conf : timestep) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
|
||||
<< "'"
|
||||
<< " title='x_confs " << int(conf.second * 100) << "'>"
|
||||
<< HOcrEscape(conf.first).c_str() << "</span>";
|
||||
++ccnt;
|
||||
@ -375,11 +395,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
++scnt;
|
||||
}
|
||||
} else if (lstm_choice_mode == 2 && !hocr_boxes && CTCMap != nullptr) {
|
||||
for (auto timestep : *CTCMap) {
|
||||
for (const auto ×tep : *CTCMap) {
|
||||
if (timestep.size() > 0) {
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
|
||||
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
|
||||
<< "'>";
|
||||
for (auto &j : timestep) {
|
||||
float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
|
||||
if (conf < 0.0f) {
|
||||
@ -390,9 +411,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
|
||||
}
|
||||
hocr_str << "\n <span class='ocrx_cinfo'"
|
||||
<< " id='"
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
|
||||
<< " title='x_confs " << conf << "'>" << HOcrEscape(j.first).c_str()
|
||||
<< "</span>";
|
||||
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
|
||||
<< "'"
|
||||
<< " title='x_confs " << conf << "'>"
|
||||
<< HOcrEscape(j.first).c_str() << "</span>";
|
||||
ccnt++;
|
||||
}
|
||||
hocr_str << "</span>";
|
||||
|
@ -35,6 +35,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cfloat>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
@ -189,11 +190,11 @@ void EquationDetect::IdentifySpecialText(BLOBNBOX *blobnbox, const int height_th
|
||||
const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
|
||||
// The scores here are negative, so the max/min == fabs(min/max).
|
||||
// float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score);
|
||||
const float diff = fabs(lang_score - equ_score);
|
||||
const float diff = std::fabs(lang_score - equ_score);
|
||||
BlobSpecialTextType type = BSTT_NONE;
|
||||
|
||||
// Classification.
|
||||
if (fmax(lang_score, equ_score) < kConfScoreTh) {
|
||||
if (std::fmax(lang_score, equ_score) < kConfScoreTh) {
|
||||
// If both score are very small, then mark it as unclear.
|
||||
type = BSTT_UNCLEAR;
|
||||
} else if (diff > kConfDiffTh && equ_score > lang_score) {
|
||||
@ -727,7 +728,7 @@ int EquationDetect::CountAlignment(const std::vector<int> &sorted_vec, const int
|
||||
if (sorted_vec.empty()) {
|
||||
return 0;
|
||||
}
|
||||
const int kDistTh = static_cast<int>(round(0.03f * resolution_));
|
||||
const int kDistTh = static_cast<int>(std::round(0.03f * resolution_));
|
||||
auto pos = std::upper_bound(sorted_vec.begin(), sorted_vec.end(), val);
|
||||
if (pos > sorted_vec.begin()) {
|
||||
--pos;
|
||||
@ -772,7 +773,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() {
|
||||
ASSERT_HOST(cps_super_bbox_);
|
||||
std::vector<ColPartition *> new_seeds;
|
||||
const int kMarginDiffTh = IntCastRounded(0.5 * lang_tesseract_->source_resolution());
|
||||
const int kGapTh = static_cast<int>(round(1.0f * lang_tesseract_->source_resolution()));
|
||||
const int kGapTh = static_cast<int>(std::round(1.0f * lang_tesseract_->source_resolution()));
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
search.SetUniqueMode(true);
|
||||
// The center x coordinate of the cp_super_bbox_.
|
||||
@ -923,8 +924,8 @@ bool EquationDetect::IsInline(const bool search_bottom, const int textparts_line
|
||||
// Check if neighbor and part is inline similar.
|
||||
const float kHeightRatioTh = 0.5;
|
||||
const int kYGapTh = textparts_linespacing > 0
|
||||
? textparts_linespacing + static_cast<int>(round(0.02f * resolution_))
|
||||
: static_cast<int>(round(0.05f * resolution_)); // Default value.
|
||||
? textparts_linespacing + static_cast<int>(std::round(0.02f * resolution_))
|
||||
: static_cast<int>(std::round(0.05f * resolution_)); // Default value.
|
||||
if (part_box.x_overlap(neighbor_box) && // Location feature.
|
||||
part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing.
|
||||
// Geo feature.
|
||||
@ -978,9 +979,9 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition *part) {
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
ColPartition *neighbor = nullptr;
|
||||
const TBOX &part_box(part->bounding_box());
|
||||
const int kXGapTh = static_cast<int>(round(0.5f * resolution_));
|
||||
const int kRadiusTh = static_cast<int>(round(3.0f * resolution_));
|
||||
const int kYGapTh = static_cast<int>(round(0.5f * resolution_));
|
||||
const int kXGapTh = static_cast<int>(std::round(0.5f * resolution_));
|
||||
const int kRadiusTh = static_cast<int>(std::round(3.0f * resolution_));
|
||||
const int kYGapTh = static_cast<int>(std::round(0.5f * resolution_));
|
||||
|
||||
// Here we use a simple approximation algorithm: from the center of part, We
|
||||
// perform the radius search, and check if we can find a neighboring partition
|
||||
@ -1080,7 +1081,7 @@ void EquationDetect::ExpandSeedHorizontal(const bool search_left, ColPartition *
|
||||
std::vector<ColPartition *> *parts_to_merge) {
|
||||
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr);
|
||||
const float kYOverlapTh = 0.6;
|
||||
const int kXGapTh = static_cast<int>(round(0.2f * resolution_));
|
||||
const int kXGapTh = static_cast<int>(std::round(0.2f * resolution_));
|
||||
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
const TBOX &seed_box(seed->bounding_box());
|
||||
@ -1132,7 +1133,7 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
|
||||
std::vector<ColPartition *> *parts_to_merge) {
|
||||
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && cps_super_bbox_ != nullptr);
|
||||
const float kXOverlapTh = 0.4;
|
||||
const int kYGapTh = static_cast<int>(round(0.2f * resolution_));
|
||||
const int kYGapTh = static_cast<int>(std::round(0.2f * resolution_));
|
||||
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
const TBOX &seed_box(seed->bounding_box());
|
||||
@ -1210,8 +1211,8 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
|
||||
}
|
||||
|
||||
bool EquationDetect::IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const {
|
||||
const int kXGapTh = static_cast<int>(round(0.25f * resolution_));
|
||||
const int kYGapTh = static_cast<int>(round(0.05f * resolution_));
|
||||
const int kXGapTh = static_cast<int>(std::round(0.25f * resolution_));
|
||||
const int kYGapTh = static_cast<int>(std::round(0.05f * resolution_));
|
||||
|
||||
// Check geometric feature.
|
||||
if (part_box.height() > seed_box.height() || part_box.width() > seed_box.width()) {
|
||||
@ -1266,7 +1267,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
|
||||
int med_height = text_box.height();
|
||||
if (text_parts.size() % 2 == 0 && text_parts.size() > 1) {
|
||||
const TBOX &text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box();
|
||||
med_height = static_cast<int>(round(0.5f * (text_box.height() + med_height)));
|
||||
med_height = static_cast<int>(std::round(0.5f * (text_box.height() + med_height)));
|
||||
}
|
||||
|
||||
// Iterate every text_parts and check if it is a math block satellite.
|
||||
@ -1348,7 +1349,7 @@ bool EquationDetect::IsMathBlockSatellite(ColPartition *part,
|
||||
ColPartition *EquationDetect::SearchNNVertical(const bool search_bottom, const ColPartition *part) {
|
||||
ASSERT_HOST(part);
|
||||
ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr;
|
||||
const int kYGapTh = static_cast<int>(round(resolution_ * 0.5f));
|
||||
const int kYGapTh = static_cast<int>(std::round(resolution_ * 0.5f));
|
||||
|
||||
ColPartitionGridSearch search(part_grid_);
|
||||
search.SetUniqueMode(true);
|
||||
@ -1383,7 +1384,7 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei
|
||||
if (!neighbor) {
|
||||
return false;
|
||||
}
|
||||
const int kYGapTh = static_cast<int>(round(resolution_ * 0.1f));
|
||||
const int kYGapTh = static_cast<int>(std::round(resolution_ * 0.1f));
|
||||
return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh;
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
namespace tesseract {
|
||||
@ -205,7 +206,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
|
||||
new_xht / word_res->denorm.y_scale());
|
||||
}
|
||||
// The xheight must change by at least x_ht_min_change to be used.
|
||||
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
|
||||
if (std::fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
|
||||
return new_xht / word_res->denorm.y_scale();
|
||||
} else {
|
||||
return bottom_shift != 0 ? word_res->x_height : 0.0f;
|
||||
|
@ -428,7 +428,7 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
|
||||
// Normalize the orientation scores for the blob and use them to
|
||||
// update the aggregated orientation score.
|
||||
for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) {
|
||||
osr_->orientations[i] += log(blob_o_score[i] / total_blob_o_score);
|
||||
osr_->orientations[i] += std::log(blob_o_score[i] / total_blob_o_score);
|
||||
}
|
||||
|
||||
// TODO(ranjith) Add an early exit test, based on min_orientation_margin,
|
||||
|
@ -113,6 +113,7 @@ static void PrintTable(const std::vector<std::vector<std::string>> &rows, const
|
||||
}
|
||||
|
||||
std::vector<std::string> col_width_patterns;
|
||||
col_width_patterns.reserve(max_col_widths.size());
|
||||
for (int max_col_width : max_col_widths) {
|
||||
col_width_patterns.push_back(std::string("%-") + std::to_string(max_col_width) + "s");
|
||||
}
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <allheaders.h> // for pixGetHeight, pixGetPixel
|
||||
|
||||
#include <algorithm> // for max, min
|
||||
#include <cmath>
|
||||
#include <cstdint> // for INT32_MAX, INT16_MAX
|
||||
|
||||
#define PROJECTION_MARGIN 10 // arbitrary
|
||||
@ -133,7 +134,7 @@ void BLOBNBOX::chop( // chop blobs
|
||||
BLOBNBOX_IT blob_it; // blob iterator
|
||||
|
||||
// get no of chops
|
||||
blobcount = static_cast<int16_t>(floor(box.width() / xheight));
|
||||
blobcount = static_cast<int16_t>(std::floor(box.width() / xheight));
|
||||
if (blobcount > 1 && cblob_ptr != nullptr) {
|
||||
// width of each
|
||||
blobwidth = static_cast<float>(box.width() + 1) / blobcount;
|
||||
@ -150,12 +151,12 @@ void BLOBNBOX::chop( // chop blobs
|
||||
UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
|
||||
} while (blob != end_it->data());
|
||||
if (ymin < ymax) {
|
||||
leftx = static_cast<int16_t>(floor(rightx - blobwidth));
|
||||
leftx = static_cast<int16_t>(std::floor(rightx - blobwidth));
|
||||
if (leftx < box.left()) {
|
||||
leftx = box.left(); // clip to real box
|
||||
}
|
||||
bl = ICOORD(leftx, static_cast<int16_t>(floor(ymin)));
|
||||
tr = ICOORD(static_cast<int16_t>(ceil(rightx)), static_cast<int16_t>(ceil(ymax)));
|
||||
bl = ICOORD(leftx, static_cast<int16_t>(std::floor(ymin)));
|
||||
tr = ICOORD(static_cast<int16_t>(std::ceil(rightx)), static_cast<int16_t>(std::ceil(ymax)));
|
||||
if (blobindex == 0) {
|
||||
box = TBOX(bl, tr); // change box
|
||||
} else {
|
||||
|
@ -63,7 +63,8 @@ BoxWord *BoxWord::CopyFromNormalized(TWERD *tessword) {
|
||||
for (unsigned b = 0; b < boxword->length_; ++b) {
|
||||
TBLOB *tblob = tessword->blobs[b];
|
||||
TBOX blob_box;
|
||||
for (TESSLINE *outline = tblob->outlines; outline != nullptr; outline = outline->next) {
|
||||
for (TESSLINE *outline = tblob->outlines; outline != nullptr;
|
||||
outline = outline->next) {
|
||||
EDGEPT *edgept = outline->loop;
|
||||
// Iterate over the edges.
|
||||
do {
|
||||
@ -92,7 +93,8 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
|
||||
for (unsigned i = 0; i < length_; ++i) {
|
||||
TBOX box = boxes_[i];
|
||||
// Expand by a single pixel, as the poly approximation error is 1 pixel.
|
||||
box = TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
|
||||
box =
|
||||
TBOX(box.left() - 1, box.bottom() - 1, box.right() + 1, box.top() + 1);
|
||||
// Now find the original box that matches.
|
||||
TBOX original_box;
|
||||
C_BLOB_IT b_it(original_word->cblob_list());
|
||||
@ -106,16 +108,19 @@ void BoxWord::ClipToOriginalWord(const BLOCK *block, WERD *original_word) {
|
||||
}
|
||||
}
|
||||
if (!original_box.null_box()) {
|
||||
if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance)) {
|
||||
if (NearlyEqual<int>(original_box.left(), box.left(),
|
||||
kBoxClipTolerance)) {
|
||||
box.set_left(original_box.left());
|
||||
}
|
||||
if (NearlyEqual<int>(original_box.right(), box.right(), kBoxClipTolerance)) {
|
||||
if (NearlyEqual<int>(original_box.right(), box.right(),
|
||||
kBoxClipTolerance)) {
|
||||
box.set_right(original_box.right());
|
||||
}
|
||||
if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance)) {
|
||||
box.set_top(original_box.top());
|
||||
}
|
||||
if (NearlyEqual<int>(original_box.bottom(), box.bottom(), kBoxClipTolerance)) {
|
||||
if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
|
||||
kBoxClipTolerance)) {
|
||||
box.set_bottom(original_box.bottom());
|
||||
}
|
||||
}
|
||||
@ -193,7 +198,8 @@ void BoxWord::ComputeBoundingBox() {
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void BoxWord::ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const {
|
||||
void BoxWord::ProcessMatchedBlobs(const TWERD &other,
|
||||
const std::function<void(int)> &cb) const {
|
||||
for (unsigned i = 0; i < length_ && i < other.NumBlobs(); ++i) {
|
||||
TBOX blob_box = other.blobs[i]->bounding_box();
|
||||
if (blob_box == boxes_[i]) {
|
||||
|
@ -72,7 +72,8 @@ public:
|
||||
// This and other putatively are the same, so call the (permanent) callback
|
||||
// for each blob index where the bounding boxes match.
|
||||
// The callback is deleted on completion.
|
||||
void ProcessMatchedBlobs(const TWERD &other, std::function<void(int)> cb) const;
|
||||
void ProcessMatchedBlobs(const TWERD &other,
|
||||
const std::function<void(int)> &cb) const;
|
||||
|
||||
const TBOX &bounding_box() const {
|
||||
return bbox_;
|
||||
|
@ -43,7 +43,8 @@ const int kMaxReadAhead = 8;
|
||||
|
||||
ImageData::ImageData() : page_number_(-1), vertical_text_(false) {}
|
||||
// Takes ownership of the pix and destroys it.
|
||||
ImageData::ImageData(bool vertical, Image pix) : page_number_(0), vertical_text_(vertical) {
|
||||
ImageData::ImageData(bool vertical, Image pix)
|
||||
: page_number_(0), vertical_text_(vertical) {
|
||||
SetPix(pix);
|
||||
}
|
||||
ImageData::~ImageData() {
|
||||
@ -55,8 +56,8 @@ ImageData::~ImageData() {
|
||||
// Builds and returns an ImageData from the basic data. Note that imagedata,
|
||||
// truth_text, and box_text are all the actual file data, NOT filenames.
|
||||
ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
|
||||
const char *imagedata, int imagedatasize, const char *truth_text,
|
||||
const char *box_text) {
|
||||
const char *imagedata, int imagedatasize,
|
||||
const char *truth_text, const char *box_text) {
|
||||
auto *image_data = new ImageData();
|
||||
image_data->imagefilename_ = name;
|
||||
image_data->page_number_ = page_number;
|
||||
@ -67,7 +68,8 @@ ImageData *ImageData::Build(const char *name, int page_number, const char *lang,
|
||||
memcpy(&image_data->image_data_[0], imagedata, imagedatasize);
|
||||
if (!image_data->AddBoxes(box_text)) {
|
||||
if (truth_text == nullptr || truth_text[0] == '\0') {
|
||||
tprintf("Error: No text corresponding to page %d from image %s!\n", page_number, name);
|
||||
tprintf("Error: No text corresponding to page %d from image %s!\n",
|
||||
page_number, name);
|
||||
delete image_data;
|
||||
return nullptr;
|
||||
}
|
||||
@ -210,8 +212,9 @@ Image ImageData::GetPix() const {
|
||||
// The return value is the scaled Pix, which must be pixDestroyed after use,
|
||||
// and scale_factor (if not nullptr) is set to the scale factor that was applied
|
||||
// to the image to achieve the target_height.
|
||||
Image ImageData::PreScale(int target_height, int max_height, float *scale_factor, int *scaled_width,
|
||||
int *scaled_height, std::vector<TBOX> *boxes) const {
|
||||
Image ImageData::PreScale(int target_height, int max_height,
|
||||
float *scale_factor, int *scaled_width,
|
||||
int *scaled_height, std::vector<TBOX> *boxes) const {
|
||||
int input_width = 0;
|
||||
int input_height = 0;
|
||||
Image src_pix = GetPix();
|
||||
@ -231,8 +234,8 @@ Image ImageData::PreScale(int target_height, int max_height, float *scale_factor
|
||||
// Get the scaled image.
|
||||
Image pix = pixScale(src_pix, im_factor, im_factor);
|
||||
if (pix == nullptr) {
|
||||
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", input_width, input_height,
|
||||
im_factor);
|
||||
tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n",
|
||||
input_width, input_height, im_factor);
|
||||
src_pix.destroy();
|
||||
return nullptr;
|
||||
}
|
||||
@ -278,9 +281,9 @@ void ImageData::Display() const {
|
||||
}
|
||||
int width = pixGetWidth(pix);
|
||||
int height = pixGetHeight(pix);
|
||||
auto *win =
|
||||
new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
|
||||
2 * (height + 4 * kTextSize), width + 10, height + 3 * kTextSize, true);
|
||||
auto *win = new ScrollView("Imagedata", 100, 100, 2 * (width + 2 * kTextSize),
|
||||
2 * (height + 4 * kTextSize), width + 10,
|
||||
height + 3 * kTextSize, true);
|
||||
win->Draw(pix, 0, height - 1);
|
||||
pix.destroy();
|
||||
// Draw the boxes.
|
||||
@ -309,7 +312,8 @@ void ImageData::Display() const {
|
||||
|
||||
// Adds the supplied boxes and transcriptions that correspond to the correct
|
||||
// page number.
|
||||
void ImageData::AddBoxes(const std::vector<TBOX> &boxes, const std::vector<std::string> &texts,
|
||||
void ImageData::AddBoxes(const std::vector<TBOX> &boxes,
|
||||
const std::vector<std::string> &texts,
|
||||
const std::vector<int> &box_pages) {
|
||||
// Copy the boxes and make the transcription.
|
||||
for (unsigned i = 0; i < box_pages.size(); ++i) {
|
||||
@ -346,7 +350,8 @@ Image ImageData::GetPixInternal(const std::vector<char> &image_data) {
|
||||
Image pix = nullptr;
|
||||
if (!image_data.empty()) {
|
||||
// Convert the array to an image.
|
||||
const auto *u_data = reinterpret_cast<const unsigned char *>(&image_data[0]);
|
||||
const auto *u_data =
|
||||
reinterpret_cast<const unsigned char *>(&image_data[0]);
|
||||
pix = pixReadMem(u_data, image_data.size());
|
||||
}
|
||||
return pix;
|
||||
@ -361,23 +366,25 @@ bool ImageData::AddBoxes(const char *box_text) {
|
||||
std::vector<std::string> texts;
|
||||
std::vector<int> box_pages;
|
||||
if (ReadMemBoxes(page_number_, /*skip_blanks*/ false, box_text,
|
||||
/*continue_on_failure*/ true, &boxes, &texts, nullptr, &box_pages)) {
|
||||
/*continue_on_failure*/ true, &boxes, &texts, nullptr,
|
||||
&box_pages)) {
|
||||
AddBoxes(boxes, texts, box_pages);
|
||||
return true;
|
||||
} else {
|
||||
tprintf("Error: No boxes for page %d from image %s!\n", page_number_, imagefilename_.c_str());
|
||||
tprintf("Error: No boxes for page %d from image %s!\n", page_number_,
|
||||
imagefilename_.c_str());
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
DocumentData::DocumentData(const std::string &name)
|
||||
: document_name_(name)
|
||||
, pages_offset_(-1)
|
||||
, total_pages_(-1)
|
||||
, memory_used_(0)
|
||||
, max_memory_(0)
|
||||
, reader_(nullptr) {}
|
||||
: document_name_(name),
|
||||
pages_offset_(-1),
|
||||
total_pages_(-1),
|
||||
memory_used_(0),
|
||||
max_memory_(0),
|
||||
reader_(nullptr) {}
|
||||
|
||||
DocumentData::~DocumentData() {
|
||||
if (thread.joinable()) {
|
||||
@ -392,15 +399,16 @@ DocumentData::~DocumentData() {
|
||||
|
||||
// Reads all the pages in the given lstmf filename to the cache. The reader
|
||||
// is used to read the file.
|
||||
bool DocumentData::LoadDocument(const char *filename, int start_page, int64_t max_memory,
|
||||
FileReader reader) {
|
||||
bool DocumentData::LoadDocument(const char *filename, int start_page,
|
||||
int64_t max_memory, FileReader reader) {
|
||||
SetDocument(filename, max_memory, reader);
|
||||
pages_offset_ = start_page;
|
||||
return ReCachePages();
|
||||
}
|
||||
|
||||
// Sets up the document, without actually loading it.
|
||||
void DocumentData::SetDocument(const char *filename, int64_t max_memory, FileReader reader) {
|
||||
void DocumentData::SetDocument(const char *filename, int64_t max_memory,
|
||||
FileReader reader) {
|
||||
std::lock_guard<std::mutex> lock_p(pages_mutex_);
|
||||
std::lock_guard<std::mutex> lock(general_mutex_);
|
||||
document_name_ = filename;
|
||||
@ -485,7 +493,8 @@ bool DocumentData::IsPageAvailable(int index, ImageData **page) {
|
||||
}
|
||||
if (num_pages > 0) {
|
||||
index = Modulo(index, num_pages);
|
||||
if (pages_offset_ <= index && static_cast<unsigned>(index) < pages_offset_ + pages_.size()) {
|
||||
if (pages_offset_ <= index &&
|
||||
static_cast<unsigned>(index) < pages_offset_ + pages_.size()) {
|
||||
*page = pages_[index - pages_offset_]; // Page is available already.
|
||||
return true;
|
||||
}
|
||||
@ -505,8 +514,8 @@ int64_t DocumentData::UnCache() {
|
||||
pages_offset_ = -1;
|
||||
set_total_pages(-1);
|
||||
set_memory_used(0);
|
||||
tprintf("Unloaded document %s, saving %" PRId64 " memory\n", document_name_.c_str(),
|
||||
memory_saved);
|
||||
tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
|
||||
document_name_.c_str(), memory_saved);
|
||||
return memory_saved;
|
||||
}
|
||||
|
||||
@ -538,8 +547,8 @@ bool DocumentData::ReCachePages() {
|
||||
}
|
||||
pages_.clear();
|
||||
TFile fp;
|
||||
if (!fp.Open(document_name_.c_str(), reader_) || !fp.DeSerializeSize(&loaded_pages) ||
|
||||
loaded_pages <= 0) {
|
||||
if (!fp.Open(document_name_.c_str(), reader_) ||
|
||||
!fp.DeSerializeSize(&loaded_pages) || loaded_pages <= 0) {
|
||||
tprintf("Deserialize header failed: %s\n", document_name_.c_str());
|
||||
return false;
|
||||
}
|
||||
@ -552,7 +561,8 @@ bool DocumentData::ReCachePages() {
|
||||
if (!fp.DeSerialize(&non_null)) {
|
||||
break;
|
||||
}
|
||||
if (page < pages_offset_ || (max_memory_ > 0 && memory_used() > max_memory_)) {
|
||||
if (page < pages_offset_ ||
|
||||
(max_memory_ > 0 && memory_used() > max_memory_)) {
|
||||
if (non_null && !ImageData::SkipDeSerialize(&fp)) {
|
||||
break;
|
||||
}
|
||||
@ -574,16 +584,17 @@ bool DocumentData::ReCachePages() {
|
||||
}
|
||||
}
|
||||
if (page < loaded_pages) {
|
||||
tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(), page,
|
||||
loaded_pages);
|
||||
tprintf("Deserialize failed: %s read %d/%d lines\n", document_name_.c_str(),
|
||||
page, loaded_pages);
|
||||
for (auto page : pages_) {
|
||||
delete page;
|
||||
}
|
||||
pages_.clear();
|
||||
} else if (loaded_pages > 1) {
|
||||
// Avoid lots of messages for training with single line images.
|
||||
tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), loaded_pages,
|
||||
pages_offset_ + 1, pages_offset_ + pages_.size(), document_name_.c_str());
|
||||
tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(),
|
||||
loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(),
|
||||
document_name_.c_str());
|
||||
}
|
||||
set_total_pages(loaded_pages);
|
||||
return !pages_.empty();
|
||||
@ -601,7 +612,8 @@ DocumentCache::~DocumentCache() {
|
||||
// Adds all the documents in the list of filenames, counting memory.
|
||||
// The reader is used to read the files.
|
||||
bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
|
||||
CachingStrategy cache_strategy, FileReader reader) {
|
||||
CachingStrategy cache_strategy,
|
||||
FileReader reader) {
|
||||
cache_strategy_ = cache_strategy;
|
||||
int64_t fair_share_memory = 0;
|
||||
// In the round-robin case, each DocumentData handles restricting its content
|
||||
@ -610,7 +622,7 @@ bool DocumentCache::LoadDocuments(const std::vector<std::string> &filenames,
|
||||
if (cache_strategy_ == CS_ROUND_ROBIN) {
|
||||
fair_share_memory = max_memory_ / filenames.size();
|
||||
}
|
||||
for (auto filename : filenames) {
|
||||
for (const auto &filename : filenames) {
|
||||
auto *document = new DocumentData(filename);
|
||||
document->SetDocument(filename.c_str(), fair_share_memory, reader);
|
||||
AddToCache(document);
|
||||
@ -632,7 +644,8 @@ bool DocumentCache::AddToCache(DocumentData *data) {
|
||||
}
|
||||
|
||||
// Finds and returns a document by name.
|
||||
DocumentData *DocumentCache::FindDocument(const std::string &document_name) const {
|
||||
DocumentData *DocumentCache::FindDocument(
|
||||
const std::string &document_name) const {
|
||||
for (auto *document : documents_) {
|
||||
if (document->document_name() == document_name) {
|
||||
return document;
|
||||
@ -696,7 +709,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
|
||||
}
|
||||
}
|
||||
int doc_index = serial / num_pages_per_doc_ % num_docs;
|
||||
const ImageData *doc = documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
|
||||
const ImageData *doc =
|
||||
documents_[doc_index]->GetPage(serial % num_pages_per_doc_);
|
||||
// Count up total memory. Background loading makes it more complicated to
|
||||
// keep a running count.
|
||||
int64_t total_memory = 0;
|
||||
@ -710,7 +724,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
|
||||
// we create a hole between them and then un-caching the backmost occupied
|
||||
// will work for both.
|
||||
int num_in_front = CountNeighbourDocs(doc_index, 1);
|
||||
for (int offset = num_in_front - 2; offset > 1 && total_memory >= max_memory_; --offset) {
|
||||
for (int offset = num_in_front - 2;
|
||||
offset > 1 && total_memory >= max_memory_; --offset) {
|
||||
int next_index = (doc_index + offset) % num_docs;
|
||||
total_memory -= documents_[next_index]->UnCache();
|
||||
}
|
||||
@ -718,7 +733,8 @@ const ImageData *DocumentCache::GetPageSequential(int serial) {
|
||||
// we take away the document that a 2nd reader is using, it will put it
|
||||
// back and make a hole between.
|
||||
int num_behind = CountNeighbourDocs(doc_index, -1);
|
||||
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; ++offset) {
|
||||
for (int offset = num_behind; offset < 0 && total_memory >= max_memory_;
|
||||
++offset) {
|
||||
int next_index = (doc_index + offset + num_docs) % num_docs;
|
||||
total_memory -= documents_[next_index]->UnCache();
|
||||
}
|
||||
|
@ -65,7 +65,8 @@ const double kMaxWordGapRatio = 2.0;
|
||||
// which words to keep, based on the adjustment factors of the two words.
|
||||
// TODO(rays) This is horrible. Replace with an enhance params training model.
|
||||
static double StopperAmbigThreshold(double f1, double f2) {
|
||||
return (f2 - f1) * kStopperAmbiguityThresholdGain - kStopperAmbiguityThresholdOffset;
|
||||
return (f2 - f1) * kStopperAmbiguityThresholdGain -
|
||||
kStopperAmbiguityThresholdOffset;
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
@ -79,7 +80,8 @@ PAGE_RES::PAGE_RES(bool merge_similar_words, BLOCK_LIST *the_block_list,
|
||||
BLOCK_IT block_it(the_block_list);
|
||||
BLOCK_RES_IT block_res_it(&block_res_list);
|
||||
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
|
||||
block_res_it.add_to_end(new BLOCK_RES(merge_similar_words, block_it.data()));
|
||||
block_res_it.add_to_end(
|
||||
new BLOCK_RES(merge_similar_words, block_it.data()));
|
||||
}
|
||||
prev_word_best_choice = prev_word_best_choice_ptr;
|
||||
}
|
||||
@ -127,7 +129,8 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) {
|
||||
row = the_row;
|
||||
bool add_next_word = false;
|
||||
TBOX union_box;
|
||||
float line_height = the_row->x_height() + the_row->ascenders() - the_row->descenders();
|
||||
float line_height =
|
||||
the_row->x_height() + the_row->ascenders() - the_row->descenders();
|
||||
for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
|
||||
auto *word_res = new WERD_RES(word_it.data());
|
||||
word_res->x_height = the_row->x_height();
|
||||
@ -298,14 +301,17 @@ void WERD_RES::InitForRetryRecognition(const WERD_RES &source) {
|
||||
// norm_box is used to override the word bounding box to determine the
|
||||
// normalization scale and offset.
|
||||
// Returns false if the word is empty and sets up fake results.
|
||||
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tess,
|
||||
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
|
||||
bool use_body_size, bool allow_detailed_fx, ROW *row,
|
||||
bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in,
|
||||
tesseract::Tesseract *tess, Image pix,
|
||||
int norm_mode, const TBOX *norm_box,
|
||||
bool numeric_mode, bool use_body_size,
|
||||
bool allow_detailed_fx, ROW *row,
|
||||
const BLOCK *block) {
|
||||
auto norm_mode_hint = static_cast<tesseract::OcrEngineMode>(norm_mode);
|
||||
tesseract = tess;
|
||||
POLY_BLOCK *pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
|
||||
if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY && word->cblob_list()->empty()) ||
|
||||
if ((norm_mode_hint != tesseract::OEM_LSTM_ONLY &&
|
||||
word->cblob_list()->empty()) ||
|
||||
(pb != nullptr && !pb->IsText())) {
|
||||
// Empty words occur when all the blobs have been moved to the rej_blobs
|
||||
// list, which seems to occur frequently in junk.
|
||||
@ -317,9 +323,12 @@ bool WERD_RES::SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::T
|
||||
SetupWordScript(unicharset_in);
|
||||
chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
|
||||
float word_xheight =
|
||||
use_body_size && row != nullptr && row->body_size() > 0.0f ? row->body_size() : x_height;
|
||||
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE), word_xheight, baseline_shift,
|
||||
numeric_mode, norm_mode_hint, norm_box, &denorm);
|
||||
use_body_size && row != nullptr && row->body_size() > 0.0f
|
||||
? row->body_size()
|
||||
: x_height;
|
||||
chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
|
||||
word_xheight, baseline_shift, numeric_mode,
|
||||
norm_mode_hint, norm_box, &denorm);
|
||||
blob_row = row;
|
||||
SetupBasicsFromChoppedWord(unicharset_in);
|
||||
SetupBlamerBundle();
|
||||
@ -398,7 +407,8 @@ void WERD_RES::SetupBlobWidthsAndGaps() {
|
||||
TBOX box = blob->bounding_box();
|
||||
blob_widths.push_back(box.width());
|
||||
if (b + 1 < num_blobs) {
|
||||
blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
|
||||
blob_gaps.push_back(chopped_word->blobs[b + 1]->bounding_box().left() -
|
||||
box.right());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -460,8 +470,8 @@ bool WERD_RES::StatesAllValid() {
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
|
||||
WERD_CHOICE *choice = it.data();
|
||||
if (choice->TotalOfStates() != ratings_dim) {
|
||||
tprintf("Cooked #%u has total of states = %u vs ratings dim of %u\n", index,
|
||||
choice->TotalOfStates(), ratings_dim);
|
||||
tprintf("Cooked #%u has total of states = %u vs ratings dim of %u\n",
|
||||
index, choice->TotalOfStates(), ratings_dim);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -471,7 +481,8 @@ bool WERD_RES::StatesAllValid() {
|
||||
// Prints a list of words found if debug is true or the word result matches
|
||||
// the word_to_debug.
|
||||
void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
|
||||
if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' && best_choice != nullptr &&
|
||||
if (debug || (word_to_debug != nullptr && *word_to_debug != '\0' &&
|
||||
best_choice != nullptr &&
|
||||
best_choice->unichar_string() == std::string(word_to_debug))) {
|
||||
if (raw_choice != nullptr) {
|
||||
raw_choice->print("\nBest Raw Choice");
|
||||
@ -490,8 +501,8 @@ void WERD_RES::DebugWordChoices(bool debug, const char *word_to_debug) {
|
||||
|
||||
// Prints the top choice along with the accepted/done flags.
|
||||
void WERD_RES::DebugTopChoice(const char *msg) const {
|
||||
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted, tess_would_adapt,
|
||||
done);
|
||||
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ", tess_accepted,
|
||||
tess_would_adapt, done);
|
||||
if (best_choice == nullptr) {
|
||||
tprintf("<Null choice>\n");
|
||||
} else {
|
||||
@ -516,7 +527,8 @@ void WERD_RES::FilterWordChoices(int debug_level) {
|
||||
int index = 0;
|
||||
for (it.forward(); !it.at_first(); it.forward(), ++index) {
|
||||
WERD_CHOICE *choice = it.data();
|
||||
float threshold = StopperAmbigThreshold(best_choice->adjust_factor(), choice->adjust_factor());
|
||||
float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
|
||||
choice->adjust_factor());
|
||||
// i, j index the blob choice in choice, best_choice.
|
||||
// chunk is an index into the chopped_word blobs (AKA chunks).
|
||||
// Since the two words may use different segmentations of the chunks, we
|
||||
@ -555,8 +567,10 @@ void WERD_RES::FilterWordChoices(int debug_level) {
|
||||
}
|
||||
}
|
||||
|
||||
void WERD_RES::ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating,
|
||||
float rating_margin, float *thresholds) {
|
||||
void WERD_RES::ComputeAdaptionThresholds(float certainty_scale,
|
||||
float min_rating, float max_rating,
|
||||
float rating_margin,
|
||||
float *thresholds) {
|
||||
int chunk = 0;
|
||||
int end_chunk = best_choice->state(0);
|
||||
int end_raw_chunk = raw_choice->state(0);
|
||||
@ -612,26 +626,29 @@ bool WERD_RES::LogNewRawChoice(WERD_CHOICE *word_choice) {
|
||||
// The best_choices list is kept in sorted order by rating. Duplicates are
|
||||
// removed, and the list is kept no longer than max_num_choices in length.
|
||||
// Returns true if the word_choice is still a valid pointer.
|
||||
bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice) {
|
||||
bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug,
|
||||
WERD_CHOICE *word_choice) {
|
||||
if (best_choice != nullptr) {
|
||||
// Throw out obviously bad choices to save some work.
|
||||
// TODO(rays) Get rid of this! This piece of code produces different
|
||||
// results according to the order in which words are found, which is an
|
||||
// undesirable behavior. It would be better to keep all the choices and
|
||||
// prune them later when more information is available.
|
||||
float max_certainty_delta =
|
||||
StopperAmbigThreshold(best_choice->adjust_factor(), word_choice->adjust_factor());
|
||||
float max_certainty_delta = StopperAmbigThreshold(
|
||||
best_choice->adjust_factor(), word_choice->adjust_factor());
|
||||
if (max_certainty_delta > -kStopperAmbiguityThresholdOffset) {
|
||||
max_certainty_delta = -kStopperAmbiguityThresholdOffset;
|
||||
}
|
||||
if (word_choice->certainty() - best_choice->certainty() < max_certainty_delta) {
|
||||
if (word_choice->certainty() - best_choice->certainty() <
|
||||
max_certainty_delta) {
|
||||
if (debug) {
|
||||
std::string bad_string;
|
||||
word_choice->string_and_lengths(&bad_string, nullptr);
|
||||
tprintf(
|
||||
"Discarding choice \"%s\" with an overly low certainty"
|
||||
" %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
|
||||
bad_string.c_str(), word_choice->certainty(), best_choice->certainty(),
|
||||
bad_string.c_str(), word_choice->certainty(),
|
||||
best_choice->certainty(),
|
||||
max_certainty_delta + best_choice->certainty());
|
||||
}
|
||||
delete word_choice;
|
||||
@ -664,8 +681,8 @@ bool WERD_RES::LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *
|
||||
} else {
|
||||
// Old is better.
|
||||
if (debug) {
|
||||
tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n", new_str.c_str(),
|
||||
word_choice->rating(), choice->rating());
|
||||
tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
|
||||
new_str.c_str(), word_choice->rating(), choice->rating());
|
||||
}
|
||||
delete word_choice;
|
||||
return false;
|
||||
@ -720,8 +737,8 @@ void WERD_RES::PrintBestChoices() const {
|
||||
}
|
||||
alternates_str += it.data()->unichar_string();
|
||||
}
|
||||
tprintf("Alternates for \"%s\": {\"%s\"}\n", best_choice->unichar_string().c_str(),
|
||||
alternates_str.c_str());
|
||||
tprintf("Alternates for \"%s\": {\"%s\"}\n",
|
||||
best_choice->unichar_string().c_str(), alternates_str.c_str());
|
||||
}
|
||||
|
||||
// Returns the sum of the widths of the blob between start_blob and last_blob
|
||||
@ -830,12 +847,14 @@ void WERD_RES::RebuildBestState() {
|
||||
int length = best_choice->state(i);
|
||||
best_state.push_back(length);
|
||||
if (length > 1) {
|
||||
SEAM::JoinPieces(seam_array, chopped_word->blobs, start, start + length - 1);
|
||||
SEAM::JoinPieces(seam_array, chopped_word->blobs, start,
|
||||
start + length - 1);
|
||||
}
|
||||
TBLOB *blob = chopped_word->blobs[start];
|
||||
rebuild_word->blobs.push_back(new TBLOB(*blob));
|
||||
if (length > 1) {
|
||||
SEAM::BreakPieces(seam_array, chopped_word->blobs, start, start + length - 1);
|
||||
SEAM::BreakPieces(seam_array, chopped_word->blobs, start,
|
||||
start + length - 1);
|
||||
}
|
||||
start += length;
|
||||
}
|
||||
@ -925,7 +944,8 @@ void WERD_RES::FakeWordFromRatings(PermuterType permuter) {
|
||||
rating = choice->rating();
|
||||
certainty = choice->certainty();
|
||||
}
|
||||
word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating, certainty);
|
||||
word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
|
||||
certainty);
|
||||
}
|
||||
LogNewRawChoice(word_choice);
|
||||
// Ownership of word_choice taken by word here.
|
||||
@ -948,14 +968,17 @@ void WERD_RES::BestChoiceToCorrectText() {
|
||||
// callback box_cb is nullptr or returns true, setting the merged blob
|
||||
// result to the class returned from class_cb.
|
||||
// Returns true if anything was merged.
|
||||
bool WERD_RES::ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb,
|
||||
std::function<bool(const TBOX &, const TBOX &)> box_cb) {
|
||||
bool WERD_RES::ConditionalBlobMerge(
|
||||
const std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> &class_cb,
|
||||
const std::function<bool(const TBOX &, const TBOX &)> &box_cb) {
|
||||
ASSERT_HOST(best_choice->empty() || ratings != nullptr);
|
||||
bool modified = false;
|
||||
for (unsigned i = 0; i + 1 < best_choice->length(); ++i) {
|
||||
UNICHAR_ID new_id = class_cb(best_choice->unichar_id(i), best_choice->unichar_id(i + 1));
|
||||
UNICHAR_ID new_id =
|
||||
class_cb(best_choice->unichar_id(i), best_choice->unichar_id(i + 1));
|
||||
if (new_id != INVALID_UNICHAR_ID &&
|
||||
(box_cb == nullptr || box_cb(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) {
|
||||
(box_cb == nullptr ||
|
||||
box_cb(box_word->BlobBox(i), box_word->BlobBox(i + 1)))) {
|
||||
// Raw choice should not be fixed.
|
||||
best_choice->set_unichar_id(new_id, i);
|
||||
modified = true;
|
||||
@ -1003,8 +1026,9 @@ static int is_simple_quote(const char *signed_str, int length) {
|
||||
// Standard 1 byte quotes.
|
||||
return (length == 1 && (*str == '\'' || *str == '`')) ||
|
||||
// UTF-8 3 bytes curved quotes.
|
||||
(length == 3 && ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
|
||||
(*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
|
||||
(length == 3 &&
|
||||
((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
|
||||
(*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
|
||||
}
|
||||
|
||||
// Callback helper for fix_quotes returns a double quote if both
|
||||
@ -1012,7 +1036,8 @@ static int is_simple_quote(const char *signed_str, int length) {
|
||||
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
|
||||
const char *ch = uch_set->id_to_unichar(id1);
|
||||
const char *next_ch = uch_set->id_to_unichar(id2);
|
||||
if (is_simple_quote(ch, strlen(ch)) && is_simple_quote(next_ch, strlen(next_ch))) {
|
||||
if (is_simple_quote(ch, strlen(ch)) &&
|
||||
is_simple_quote(next_ch, strlen(next_ch))) {
|
||||
return uch_set->unichar_to_id("\"");
|
||||
}
|
||||
return INVALID_UNICHAR_ID;
|
||||
@ -1020,7 +1045,8 @@ UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
|
||||
|
||||
// Change pairs of quotes to double quotes.
|
||||
void WERD_RES::fix_quotes() {
|
||||
if (!uch_set->contains_unichar("\"") || !uch_set->get_enabled(uch_set->unichar_to_id("\""))) {
|
||||
if (!uch_set->contains_unichar("\"") ||
|
||||
!uch_set->get_enabled(uch_set->unichar_to_id("\""))) {
|
||||
return; // Don't create it if it is disallowed.
|
||||
}
|
||||
|
||||
@ -1049,7 +1075,8 @@ bool WERD_RES::HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2) {
|
||||
// Change pairs of hyphens to a single hyphen if the bounding boxes touch
|
||||
// Typically a long dash which has been segmented.
|
||||
void WERD_RES::fix_hyphens() {
|
||||
if (!uch_set->contains_unichar("-") || !uch_set->get_enabled(uch_set->unichar_to_id("-"))) {
|
||||
if (!uch_set->contains_unichar("-") ||
|
||||
!uch_set->get_enabled(uch_set->unichar_to_id("-"))) {
|
||||
return; // Don't create it if it is disallowed.
|
||||
}
|
||||
|
||||
@ -1071,7 +1098,8 @@ UNICHAR_ID WERD_RES::BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2) {
|
||||
// Change pairs of tess failures to a single one
|
||||
void WERD_RES::merge_tess_fails() {
|
||||
using namespace std::placeholders; // for _1, _2
|
||||
if (ConditionalBlobMerge(std::bind(&WERD_RES::BothSpaces, this, _1, _2), nullptr)) {
|
||||
if (ConditionalBlobMerge(std::bind(&WERD_RES::BothSpaces, this, _1, _2),
|
||||
nullptr)) {
|
||||
unsigned len = best_choice->length();
|
||||
ASSERT_HOST(reject_map.length() == len);
|
||||
ASSERT_HOST(box_word->length() == len);
|
||||
@ -1178,7 +1206,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
|
||||
}
|
||||
|
||||
WERD_RES_IT word_res_it(&row_res->word_res_list);
|
||||
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list(); word_res_it.forward()) {
|
||||
for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
|
||||
word_res_it.forward()) {
|
||||
if (word_res_it.data() == word_res) {
|
||||
return -1;
|
||||
} else if (word_res_it.data() == other.word_res) {
|
||||
@ -1190,7 +1219,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
|
||||
|
||||
// we both point to the same block, but different rows.
|
||||
ROW_RES_IT row_res_it(&block_res->row_res_list);
|
||||
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list(); row_res_it.forward()) {
|
||||
for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
|
||||
row_res_it.forward()) {
|
||||
if (row_res_it.data() == row_res) {
|
||||
return -1;
|
||||
} else if (row_res_it.data() == other.row_res) {
|
||||
@ -1202,7 +1232,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
|
||||
|
||||
// We point to different blocks.
|
||||
BLOCK_RES_IT block_res_it(&page_res->block_res_list);
|
||||
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list(); block_res_it.forward()) {
|
||||
for (block_res_it.mark_cycle_pt(); !block_res_it.cycled_list();
|
||||
block_res_it.forward()) {
|
||||
if (block_res_it.data() == block_res) {
|
||||
return -1;
|
||||
} else if (block_res_it.data() == other.block_res) {
|
||||
@ -1218,7 +1249,8 @@ int PAGE_RES_IT::cmp(const PAGE_RES_IT &other) const {
|
||||
// before the current position. The simple fields of the WERD_RES are copied
|
||||
// from clone_res and the resulting WERD_RES is returned for further setup
|
||||
// with best_choice etc.
|
||||
WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word) {
|
||||
WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res,
|
||||
WERD *new_word) {
|
||||
// Make a WERD_RES for the new_word.
|
||||
auto *new_res = new WERD_RES(new_word);
|
||||
new_res->CopySimpleFields(clone_res);
|
||||
@ -1245,7 +1277,8 @@ WERD_RES *PAGE_RES_IT::InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *ne
|
||||
// are likely very poor, if they come from LSTM, where it only outputs the
|
||||
// character at one pixel within it, so we find the midpoints between them.
|
||||
static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
|
||||
C_BLOB_LIST *next_word_blobs, std::vector<int> *blob_ends) {
|
||||
C_BLOB_LIST *next_word_blobs,
|
||||
std::vector<int> *blob_ends) {
|
||||
C_BLOB_IT blob_it(word.word->cblob_list());
|
||||
for (int length : word.best_state) {
|
||||
// Get the bounding box of the fake blobs
|
||||
@ -1272,17 +1305,18 @@ static void ComputeBlobEnds(const WERD_RES &word, const TBOX &clip_box,
|
||||
|
||||
// Helper computes the bounds of a word by restricting it to existing words
|
||||
// that significantly overlap.
|
||||
static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, int w_index,
|
||||
TBOX prev_box, WERD_RES_IT w_it) {
|
||||
static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words,
|
||||
int w_index, TBOX prev_box, WERD_RES_IT w_it) {
|
||||
constexpr int kSignificantOverlapFraction = 4;
|
||||
TBOX clipped_box;
|
||||
TBOX current_box = words[w_index]->word->bounding_box();
|
||||
TBOX next_box;
|
||||
if (static_cast<size_t>(w_index + 1) < words.size() && words[w_index + 1] != nullptr &&
|
||||
words[w_index + 1]->word != nullptr) {
|
||||
if (static_cast<size_t>(w_index + 1) < words.size() &&
|
||||
words[w_index + 1] != nullptr && words[w_index + 1]->word != nullptr) {
|
||||
next_box = words[w_index + 1]->word->bounding_box();
|
||||
}
|
||||
for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo; w_it.forward()) {
|
||||
for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo;
|
||||
w_it.forward()) {
|
||||
if (w_it.data() == nullptr || w_it.data()->word == nullptr) {
|
||||
continue;
|
||||
}
|
||||
@ -1317,14 +1351,19 @@ static TBOX ComputeWordBounds(const tesseract::PointerVector<WERD_RES> &words, i
|
||||
|
||||
// Helper moves the blob from src to dest. If it isn't contained by clip_box,
|
||||
// the blob is replaced by a fake that is contained.
|
||||
static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, const TBOX &clip_box) {
|
||||
static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it,
|
||||
const TBOX &clip_box) {
|
||||
C_BLOB *src_blob = src_it->extract();
|
||||
TBOX box = src_blob->bounding_box();
|
||||
if (!clip_box.contains(box)) {
|
||||
int left = ClipToRange<int>(box.left(), clip_box.left(), clip_box.right() - 1);
|
||||
int right = ClipToRange<int>(box.right(), clip_box.left() + 1, clip_box.right());
|
||||
int top = ClipToRange<int>(box.top(), clip_box.bottom() + 1, clip_box.top());
|
||||
int bottom = ClipToRange<int>(box.bottom(), clip_box.bottom(), clip_box.top() - 1);
|
||||
int left =
|
||||
ClipToRange<int>(box.left(), clip_box.left(), clip_box.right() - 1);
|
||||
int right =
|
||||
ClipToRange<int>(box.right(), clip_box.left() + 1, clip_box.right());
|
||||
int top =
|
||||
ClipToRange<int>(box.top(), clip_box.bottom() + 1, clip_box.top());
|
||||
int bottom =
|
||||
ClipToRange<int>(box.bottom(), clip_box.bottom(), clip_box.top() - 1);
|
||||
box = TBOX(left, bottom, right, top);
|
||||
delete src_blob;
|
||||
src_blob = C_BLOB::FakeBlob(box);
|
||||
@ -1336,7 +1375,8 @@ static TBOX MoveAndClipBlob(C_BLOB_IT *src_it, C_BLOB_IT *dest_it, const TBOX &c
|
||||
// Replaces the current WERD/WERD_RES with the given words. The given words
|
||||
// contain fake blobs that indicate the position of the characters. These are
|
||||
// replaced with real blobs from the current word as much as possible.
|
||||
void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words) {
|
||||
void PAGE_RES_IT::ReplaceCurrentWord(
|
||||
tesseract::PointerVector<WERD_RES> *words) {
|
||||
if (words->empty()) {
|
||||
DeleteCurrentWord();
|
||||
return;
|
||||
@ -1405,11 +1445,13 @@ void PAGE_RES_IT::ReplaceCurrentWord(tesseract::PointerVector<WERD_RES> *words)
|
||||
int end_x = blob_ends[i];
|
||||
TBOX blob_box;
|
||||
// Add the blobs up to end_x.
|
||||
while (!src_b_it.empty() && src_b_it.data()->bounding_box().x_middle() < end_x) {
|
||||
while (!src_b_it.empty() &&
|
||||
src_b_it.data()->bounding_box().x_middle() < end_x) {
|
||||
blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
|
||||
src_b_it.forward();
|
||||
}
|
||||
while (!rej_b_it.empty() && rej_b_it.data()->bounding_box().x_middle() < end_x) {
|
||||
while (!rej_b_it.empty() &&
|
||||
rej_b_it.data()->bounding_box().x_middle() < end_x) {
|
||||
blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
|
||||
rej_b_it.forward();
|
||||
}
|
||||
@ -1484,13 +1526,14 @@ void PAGE_RES_IT::MakeCurrentWordFuzzy() {
|
||||
// The next word should be the corresponding part of combo, but we have
|
||||
// already stepped past it, so find it by search.
|
||||
WERD_RES_IT wr_it(&row()->word_res_list);
|
||||
for (wr_it.mark_cycle_pt(); !wr_it.cycled_list() && wr_it.data() != word_res;
|
||||
wr_it.forward()) {
|
||||
for (wr_it.mark_cycle_pt();
|
||||
!wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
|
||||
}
|
||||
wr_it.forward();
|
||||
ASSERT_HOST(wr_it.data()->part_of_combo);
|
||||
real_word = wr_it.data()->word;
|
||||
ASSERT_HOST(!real_word->flag(W_FUZZY_SP) && !real_word->flag(W_FUZZY_NON));
|
||||
ASSERT_HOST(!real_word->flag(W_FUZZY_SP) &&
|
||||
!real_word->flag(W_FUZZY_NON));
|
||||
real_word->set_flag(W_FUZZY_SP, true);
|
||||
}
|
||||
}
|
||||
@ -1531,7 +1574,8 @@ void PAGE_RES_IT::ResetWordIterator() {
|
||||
// cycled_list state correctly.
|
||||
word_res_it.move_to_first();
|
||||
for (word_res_it.mark_cycle_pt();
|
||||
!word_res_it.cycled_list() && word_res_it.data() != next_word_res; word_res_it.forward()) {
|
||||
!word_res_it.cycled_list() && word_res_it.data() != next_word_res;
|
||||
word_res_it.forward()) {
|
||||
if (!word_res_it.data()->part_of_combo) {
|
||||
if (prev_row_res == row_res) {
|
||||
prev_word_res = word_res;
|
||||
@ -1624,8 +1668,9 @@ WERD_RES *PAGE_RES_IT::internal_forward(bool new_block, bool empty_ok) {
|
||||
foundword:
|
||||
// Update prev_word_best_choice pointer.
|
||||
if (page_res != nullptr && page_res->prev_word_best_choice != nullptr) {
|
||||
*page_res->prev_word_best_choice =
|
||||
(new_block || prev_word_res == nullptr) ? nullptr : prev_word_res->best_choice;
|
||||
*page_res->prev_word_best_choice = (new_block || prev_word_res == nullptr)
|
||||
? nullptr
|
||||
: prev_word_res->best_choice;
|
||||
}
|
||||
return word_res;
|
||||
}
|
||||
@ -1653,8 +1698,9 @@ WERD_RES *PAGE_RES_IT::restart_row() {
|
||||
*************************************************************************/
|
||||
|
||||
WERD_RES *PAGE_RES_IT::forward_paragraph() {
|
||||
while (block_res == next_block_res && (next_row_res != nullptr && next_row_res->row != nullptr &&
|
||||
row_res->row->para() == next_row_res->row->para())) {
|
||||
while (block_res == next_block_res &&
|
||||
(next_row_res != nullptr && next_row_res->row != nullptr &&
|
||||
row_res->row->para() == next_row_res->row->para())) {
|
||||
internal_forward(false, true);
|
||||
}
|
||||
return internal_forward(false, true);
|
||||
|
@ -19,24 +19,24 @@
|
||||
#ifndef PAGERES_H
|
||||
#define PAGERES_H
|
||||
|
||||
#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS
|
||||
#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH
|
||||
#include "blamer.h" // for BlamerBundle (ptr only), IRR_NUM_REASONS
|
||||
#include "clst.h" // for CLIST_ITERATOR, CLISTIZEH
|
||||
#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH
|
||||
#include "genericvector.h" // for PointerVector
|
||||
#include "elst.h" // for ELIST_ITERATOR, ELIST_LINK, ELISTIZEH
|
||||
#include "matrix.h" // for MATRIX
|
||||
#include "normalis.h" // for DENORM
|
||||
#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only)
|
||||
#include "rect.h" // for TBOX
|
||||
#include "rejctmap.h" // for REJMAP
|
||||
#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI...
|
||||
#include "werd.h" // for WERD, W_BOL, W_EOL
|
||||
#include "matrix.h" // for MATRIX
|
||||
#include "normalis.h" // for DENORM
|
||||
#include "ratngs.h" // for WERD_CHOICE, BLOB_CHOICE (ptr only)
|
||||
#include "rect.h" // for TBOX
|
||||
#include "rejctmap.h" // for REJMAP
|
||||
#include "unicharset.h" // for UNICHARSET, UNICHARSET::Direction, UNI...
|
||||
#include "werd.h" // for WERD, W_BOL, W_EOL
|
||||
|
||||
#include <tesseract/unichar.h> // for UNICHAR_ID, INVALID_UNICHAR_ID
|
||||
|
||||
#include <cstdint> // for int32_t, int16_t
|
||||
#include <functional> // for std::function
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
#include <cstdint> // for int32_t, int16_t
|
||||
#include <functional> // for std::function
|
||||
#include <set> // for std::pair
|
||||
#include <vector> // for std::vector
|
||||
|
||||
#include <sys/types.h> // for int8_t
|
||||
|
||||
@ -218,7 +218,8 @@ public:
|
||||
// Stores the lstm choices of every timestep
|
||||
std::vector<std::vector<std::pair<const char *, float>>> timesteps;
|
||||
// Stores the lstm choices of every timestep segmented by character
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> segmented_timesteps;
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
segmented_timesteps;
|
||||
// Symbolchoices acquired during CTC
|
||||
std::vector<std::vector<std::pair<const char *, float>>> CTC_symbol_choices;
|
||||
// Stores if the timestep vector starts with a space
|
||||
@ -391,7 +392,8 @@ public:
|
||||
}
|
||||
|
||||
bool AnyRtlCharsInWord() const {
|
||||
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
|
||||
if (uch_set == nullptr || best_choice == nullptr ||
|
||||
best_choice->length() < 1) {
|
||||
return false;
|
||||
}
|
||||
for (unsigned id = 0; id < best_choice->length(); id++) {
|
||||
@ -400,7 +402,8 @@ public:
|
||||
continue; // Ignore illegal chars.
|
||||
}
|
||||
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
|
||||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
|
||||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
|
||||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -408,7 +411,8 @@ public:
|
||||
}
|
||||
|
||||
bool AnyLtrCharsInWord() const {
|
||||
if (uch_set == nullptr || best_choice == nullptr || best_choice->length() < 1) {
|
||||
if (uch_set == nullptr || best_choice == nullptr ||
|
||||
best_choice->length() < 1) {
|
||||
return false;
|
||||
}
|
||||
for (unsigned id = 0; id < best_choice->length(); id++) {
|
||||
@ -417,7 +421,8 @@ public:
|
||||
continue; // Ignore illegal chars.
|
||||
}
|
||||
UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
|
||||
if (dir == UNICHARSET::U_LEFT_TO_RIGHT || dir == UNICHARSET::U_ARABIC_NUMBER) {
|
||||
if (dir == UNICHARSET::U_LEFT_TO_RIGHT ||
|
||||
dir == UNICHARSET::U_ARABIC_NUMBER) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -463,9 +468,11 @@ public:
|
||||
// of any of the above flags. It should really be a tesseract::OcrEngineMode
|
||||
// but is declared as int for ease of use with tessedit_ocr_engine_mode.
|
||||
// Returns false if the word is empty and sets up fake results.
|
||||
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract,
|
||||
Image pix, int norm_mode, const TBOX *norm_box, bool numeric_mode,
|
||||
bool use_body_size, bool allow_detailed_fx, ROW *row,
|
||||
bool SetupForRecognition(const UNICHARSET &unicharset_in,
|
||||
tesseract::Tesseract *tesseract, Image pix,
|
||||
int norm_mode, const TBOX *norm_box,
|
||||
bool numeric_mode, bool use_body_size,
|
||||
bool allow_detailed_fx, ROW *row,
|
||||
const BLOCK *block);
|
||||
|
||||
// Set up the seam array, bln_boxes, best_choice, and raw_choice to empty
|
||||
@ -529,8 +536,9 @@ public:
|
||||
// min_rating limits how tight to make a template.
|
||||
// max_rating limits how loose to make a template.
|
||||
// rating_margin denotes the amount of margin to put in template.
|
||||
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating,
|
||||
float rating_margin, float *thresholds);
|
||||
void ComputeAdaptionThresholds(float certainty_scale, float min_rating,
|
||||
float max_rating, float rating_margin,
|
||||
float *thresholds);
|
||||
|
||||
// Saves a copy of the word_choice if it has the best unadjusted rating.
|
||||
// Returns true if the word_choice was the new best.
|
||||
@ -541,7 +549,8 @@ public:
|
||||
// The best_choices list is kept in sorted order by rating. Duplicates are
|
||||
// removed, and the list is kept no longer than max_num_choices in length.
|
||||
// Returns true if the word_choice is still a valid pointer.
|
||||
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice);
|
||||
bool LogNewCookedChoice(int max_num_choices, bool debug,
|
||||
WERD_CHOICE *word_choice);
|
||||
|
||||
// Prints a brief list of all the best choices.
|
||||
void PrintBestChoices() const;
|
||||
@ -616,8 +625,9 @@ public:
|
||||
// callback box_cb is nullptr or returns true, setting the merged blob
|
||||
// result to the class returned from class_cb.
|
||||
// Returns true if anything was merged.
|
||||
bool ConditionalBlobMerge(std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> class_cb,
|
||||
std::function<bool(const TBOX &, const TBOX &)> box_cb);
|
||||
bool ConditionalBlobMerge(
|
||||
const std::function<UNICHAR_ID(UNICHAR_ID, UNICHAR_ID)> &class_cb,
|
||||
const std::function<bool(const TBOX &, const TBOX &)> &box_cb);
|
||||
|
||||
// Merges 2 adjacent blobs in the result (index and index+1) and corrects
|
||||
// all the data to account for the change.
|
||||
@ -683,7 +693,8 @@ public:
|
||||
// Do two PAGE_RES_ITs point at the same word?
|
||||
// This is much cheaper than cmp().
|
||||
bool operator==(const PAGE_RES_IT &other) const {
|
||||
return word_res == other.word_res && row_res == other.row_res && block_res == other.block_res;
|
||||
return word_res == other.word_res && row_res == other.row_res &&
|
||||
block_res == other.block_res;
|
||||
}
|
||||
|
||||
bool operator!=(const PAGE_RES_IT &other) const {
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "unicharset.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -149,7 +150,7 @@ BLOB_CHOICE &BLOB_CHOICE::operator=(const BLOB_CHOICE &other) {
|
||||
// Returns true if *this and other agree on the baseline and x-height
|
||||
// to within some tolerance based on a given estimate of the x-height.
|
||||
bool BLOB_CHOICE::PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const {
|
||||
double baseline_diff = fabs(yshift() - other.yshift());
|
||||
double baseline_diff = std::fabs(yshift() - other.yshift());
|
||||
if (baseline_diff > kMaxBaselineDrift * x_height) {
|
||||
if (debug) {
|
||||
tprintf("Baseline diff %g for %d v %d\n", baseline_diff, unichar_id_, other.unichar_id_);
|
||||
|
@ -58,24 +58,26 @@ const double kMinXHeightFraction = 0.25;
|
||||
const double kMinCapHeightFraction = 0.05;
|
||||
|
||||
/*static */
|
||||
const char *UNICHARSET::kCustomLigatures[][2] = {{"ct", "\uE003"}, // c + t -> U+E003
|
||||
{"ſh", "\uE006"}, // long-s + h -> U+E006
|
||||
{"ſi", "\uE007"}, // long-s + i -> U+E007
|
||||
{"ſl", "\uE008"}, // long-s + l -> U+E008
|
||||
{"ſſ", "\uE009"}, // long-s + long-s -> U+E009
|
||||
{nullptr, nullptr}};
|
||||
const char *UNICHARSET::kCustomLigatures[][2] = {
|
||||
{"ct", "\uE003"}, // c + t -> U+E003
|
||||
{"ſh", "\uE006"}, // long-s + h -> U+E006
|
||||
{"ſi", "\uE007"}, // long-s + i -> U+E007
|
||||
{"ſl", "\uE008"}, // long-s + l -> U+E008
|
||||
{"ſſ", "\uE009"}, // long-s + long-s -> U+E009
|
||||
{nullptr, nullptr}};
|
||||
|
||||
// List of mappings to make when ingesting strings from the outside.
|
||||
// The substitutions clean up text that should exist for rendering of
|
||||
// synthetic data, but not in the recognition set.
|
||||
const char *UNICHARSET::kCleanupMaps[][2] = {{"\u0640", ""}, // TATWEEL is deleted.
|
||||
{"\ufb01", "fi"}, // fi ligature->fi pair.
|
||||
{"\ufb02", "fl"}, // fl ligature->fl pair.
|
||||
{nullptr, nullptr}};
|
||||
const char *UNICHARSET::kCleanupMaps[][2] = {
|
||||
{"\u0640", ""}, // TATWEEL is deleted.
|
||||
{"\ufb01", "fi"}, // fi ligature->fi pair.
|
||||
{"\ufb02", "fl"}, // fl ligature->fl pair.
|
||||
{nullptr, nullptr}};
|
||||
|
||||
// List of strings for the SpecialUnicharCodes. Keep in sync with the enum.
|
||||
const char *UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = {" ", "Joined",
|
||||
"|Broken|0|1"};
|
||||
const char *UNICHARSET::kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT] = {
|
||||
" ", "Joined", "|Broken|0|1"};
|
||||
|
||||
const char *UNICHARSET::null_script = "NULL";
|
||||
|
||||
@ -137,7 +139,8 @@ bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
|
||||
}
|
||||
|
||||
// Expands the ranges with the ranges from the src properties.
|
||||
void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(const UNICHAR_PROPERTIES &src) {
|
||||
void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
|
||||
const UNICHAR_PROPERTIES &src) {
|
||||
UpdateRange(src.min_bottom, &min_bottom, &max_bottom);
|
||||
UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
|
||||
UpdateRange(src.min_top, &min_top, &max_top);
|
||||
@ -164,7 +167,8 @@ void UNICHARSET::UNICHAR_PROPERTIES::CopyFrom(const UNICHAR_PROPERTIES &src) {
|
||||
fragment = saved_fragment;
|
||||
}
|
||||
|
||||
UNICHARSET::UNICHARSET() : ids(), script_table(nullptr), script_table_size_used(0) {
|
||||
UNICHARSET::UNICHARSET()
|
||||
: ids(), script_table(nullptr), script_table_size_used(0) {
|
||||
clear();
|
||||
for (int i = 0; i < SPECIAL_UNICHAR_CODES_COUNT; ++i) {
|
||||
unichar_insert(kSpecialUnicharCodes[i]);
|
||||
@ -180,13 +184,15 @@ UNICHARSET::~UNICHARSET() {
|
||||
|
||||
UNICHAR_ID
|
||||
UNICHARSET::unichar_to_id(const char *const unichar_repr) const {
|
||||
std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
|
||||
std::string cleaned =
|
||||
old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
|
||||
return ids.contains(cleaned.data(), cleaned.size())
|
||||
? ids.unichar_to_id(cleaned.data(), cleaned.size())
|
||||
: INVALID_UNICHAR_ID;
|
||||
}
|
||||
|
||||
UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr, int length) const {
|
||||
UNICHAR_ID UNICHARSET::unichar_to_id(const char *const unichar_repr,
|
||||
int length) const {
|
||||
assert(length > 0 && length <= UNICHAR_LEN);
|
||||
std::string cleaned(unichar_repr, length);
|
||||
if (!old_style_included_) {
|
||||
@ -215,7 +221,8 @@ int UNICHARSET::step(const char *str) const {
|
||||
// Return whether the given UTF-8 string is encodable with this UNICHARSET.
|
||||
// If not encodable, write the first byte offset which cannot be converted
|
||||
// into the second (return) argument.
|
||||
bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position) const {
|
||||
bool UNICHARSET::encodable_string(const char *str,
|
||||
unsigned *first_bad_position) const {
|
||||
std::vector<UNICHAR_ID> encoding;
|
||||
return encode_string(str, true, &encoding, nullptr, first_bad_position);
|
||||
}
|
||||
@ -230,7 +237,8 @@ bool UNICHARSET::encodable_string(const char *str, unsigned *first_bad_position)
|
||||
// that do not belong in the unicharset, or encoding may fail.
|
||||
// Use CleanupString to perform the cleaning.
|
||||
bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
std::vector<UNICHAR_ID> *encoding,
|
||||
std::vector<char> *lengths,
|
||||
unsigned *encoded_length) const {
|
||||
std::vector<UNICHAR_ID> working_encoding;
|
||||
std::vector<char> working_lengths;
|
||||
@ -240,8 +248,8 @@ bool UNICHARSET::encode_string(const char *str, bool give_up_on_failure,
|
||||
unsigned str_pos = 0;
|
||||
bool perfect = true;
|
||||
while (str_pos < str_length) {
|
||||
encode_string(str, str_pos, str_length, &working_encoding, &working_lengths, &str_pos, encoding,
|
||||
&best_lengths);
|
||||
encode_string(str, str_pos, str_length, &working_encoding, &working_lengths,
|
||||
&str_pos, encoding, &best_lengths);
|
||||
if (str_pos < str_length) {
|
||||
// This is a non-match. Skip one utf-8 character.
|
||||
perfect = false;
|
||||
@ -357,8 +365,9 @@ void UNICHARSET::set_normed_ids(UNICHAR_ID unichar_id) {
|
||||
unichars[unichar_id].properties.normed_ids.clear();
|
||||
if (unichar_id == UNICHAR_SPACE && id_to_unichar(unichar_id)[0] == ' ') {
|
||||
unichars[unichar_id].properties.normed_ids.push_back(UNICHAR_SPACE);
|
||||
} else if (!encode_string(unichars[unichar_id].properties.normed.c_str(), true,
|
||||
&unichars[unichar_id].properties.normed_ids, nullptr, nullptr)) {
|
||||
} else if (!encode_string(unichars[unichar_id].properties.normed.c_str(),
|
||||
true, &unichars[unichar_id].properties.normed_ids,
|
||||
nullptr, nullptr)) {
|
||||
unichars[unichar_id].properties.normed_ids.clear();
|
||||
unichars[unichar_id].properties.normed_ids.push_back(unichar_id);
|
||||
}
|
||||
@ -383,7 +392,8 @@ void UNICHARSET::set_ranges_empty() {
|
||||
// Sets all the properties for this unicharset given a src unicharset with
|
||||
// everything set. The unicharsets don't have to be the same, and graphemes
|
||||
// are correctly accounted for.
|
||||
void UNICHARSET::PartialSetPropertiesFromOther(int start_index, const UNICHARSET &src) {
|
||||
void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
|
||||
const UNICHARSET &src) {
|
||||
for (unsigned ch = start_index; ch < unichars.size(); ++ch) {
|
||||
const char *utf8 = id_to_unichar(ch);
|
||||
UNICHAR_PROPERTIES properties;
|
||||
@ -464,8 +474,10 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET &src) {
|
||||
// Returns true if the acceptable ranges of the tops of the characters do
|
||||
// not overlap, making their x-height calculations distinct.
|
||||
bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const {
|
||||
int overlap = std::min(unichars[id1].properties.max_top, unichars[id2].properties.max_top) -
|
||||
std::max(unichars[id1].properties.min_top, unichars[id2].properties.min_top);
|
||||
int overlap = std::min(unichars[id1].properties.max_top,
|
||||
unichars[id2].properties.max_top) -
|
||||
std::max(unichars[id1].properties.min_top,
|
||||
unichars[id2].properties.min_top);
|
||||
return overlap <= 0;
|
||||
}
|
||||
|
||||
@ -478,8 +490,10 @@ bool UNICHARSET::SizesDistinct(UNICHAR_ID id1, UNICHAR_ID id2) const {
|
||||
// the overall process of encoding a partially failed string more efficient.
|
||||
// See unicharset.h for definition of the args.
|
||||
void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<UNICHAR_ID> *encoding,
|
||||
std::vector<char> *lengths,
|
||||
unsigned *best_total_length,
|
||||
std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<char> *best_lengths) const {
|
||||
if (str_index > static_cast<int>(*best_total_length)) {
|
||||
// This is the best result so far.
|
||||
@ -504,8 +518,8 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
|
||||
UNICHAR_ID id = ids.unichar_to_id(str + str_index, length);
|
||||
encoding->push_back(id);
|
||||
lengths->push_back(length);
|
||||
encode_string(str, str_index + length, str_length, encoding, lengths, best_total_length,
|
||||
best_encoding, best_lengths);
|
||||
encode_string(str, str_index + length, str_length, encoding, lengths,
|
||||
best_total_length, best_encoding, best_lengths);
|
||||
if (static_cast<int>(*best_total_length) == str_length) {
|
||||
return; // Tail recursion success!
|
||||
}
|
||||
@ -526,7 +540,8 @@ void UNICHARSET::encode_string(const char *str, int str_index, int str_length,
|
||||
// Returns false if no valid match was found in the unicharset.
|
||||
// NOTE that script_id, mirror, and other_case refer to this unicharset on
|
||||
// return and will need translation if the target unicharset is different.
|
||||
bool UNICHARSET::GetStrProperties(const char *utf8_str, UNICHAR_PROPERTIES *props) const {
|
||||
bool UNICHARSET::GetStrProperties(const char *utf8_str,
|
||||
UNICHAR_PROPERTIES *props) const {
|
||||
props->Init();
|
||||
props->SetRangesEmpty();
|
||||
int total_unicodes = 0;
|
||||
@ -636,22 +651,26 @@ char UNICHARSET::get_chartype(UNICHAR_ID id) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void UNICHARSET::unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style) {
|
||||
void UNICHARSET::unichar_insert(const char *const unichar_repr,
|
||||
OldUncleanUnichars old_style) {
|
||||
if (old_style == OldUncleanUnichars::kTrue) {
|
||||
old_style_included_ = true;
|
||||
}
|
||||
std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
|
||||
std::string cleaned =
|
||||
old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
|
||||
if (!cleaned.empty() && !ids.contains(cleaned.data(), cleaned.size())) {
|
||||
const char *str = cleaned.c_str();
|
||||
std::vector<int> encoding;
|
||||
if (!old_style_included_ && encode_string(str, true, &encoding, nullptr, nullptr)) {
|
||||
if (!old_style_included_ &&
|
||||
encode_string(str, true, &encoding, nullptr, nullptr)) {
|
||||
return;
|
||||
}
|
||||
auto &u = unichars.emplace_back();
|
||||
int index = 0;
|
||||
do {
|
||||
if (index >= UNICHAR_LEN) {
|
||||
fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN, unichar_repr);
|
||||
fprintf(stderr, "Utf8 buffer too big, size>%d for %s\n", UNICHAR_LEN,
|
||||
unichar_repr);
|
||||
return;
|
||||
}
|
||||
u.representation[index++] = *str++;
|
||||
@ -673,11 +692,13 @@ void UNICHARSET::unichar_insert(const char *const unichar_repr, OldUncleanUnicha
|
||||
}
|
||||
|
||||
bool UNICHARSET::contains_unichar(const char *const unichar_repr) const {
|
||||
std::string cleaned = old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
|
||||
std::string cleaned =
|
||||
old_style_included_ ? unichar_repr : CleanupString(unichar_repr);
|
||||
return ids.contains(cleaned.data(), cleaned.size());
|
||||
}
|
||||
|
||||
bool UNICHARSET::contains_unichar(const char *const unichar_repr, int length) const {
|
||||
bool UNICHARSET::contains_unichar(const char *const unichar_repr,
|
||||
int length) const {
|
||||
if (length == 0) {
|
||||
return false;
|
||||
}
|
||||
@ -688,7 +709,8 @@ bool UNICHARSET::contains_unichar(const char *const unichar_repr, int length) co
|
||||
return ids.contains(cleaned.data(), cleaned.size());
|
||||
}
|
||||
|
||||
bool UNICHARSET::eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const {
|
||||
bool UNICHARSET::eq(UNICHAR_ID unichar_id,
|
||||
const char *const unichar_repr) const {
|
||||
return strcmp(this->id_to_unichar(unichar_id), unichar_repr) == 0;
|
||||
}
|
||||
|
||||
@ -709,17 +731,20 @@ bool UNICHARSET::save_to_string(std::string &str) const {
|
||||
unsigned int properties = this->get_properties(id);
|
||||
if (strcmp(this->id_to_unichar(id), " ") == 0) {
|
||||
snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
|
||||
this->get_script_from_script_id(this->get_script(id)), this->get_other_case(id));
|
||||
this->get_script_from_script_id(this->get_script(id)),
|
||||
this->get_other_case(id));
|
||||
str += buffer;
|
||||
} else {
|
||||
std::ostringstream stream;
|
||||
stream.imbue(std::locale::classic());
|
||||
stream << this->id_to_unichar(id) << ' ' << properties << ' ' << min_bottom << ','
|
||||
<< max_bottom << ',' << min_top << ',' << max_top << ',' << width << ',' << width_sd
|
||||
<< ',' << bearing << ',' << bearing_sd << ',' << advance << ',' << advance_sd << ' '
|
||||
stream << this->id_to_unichar(id) << ' ' << properties << ' '
|
||||
<< min_bottom << ',' << max_bottom << ',' << min_top << ','
|
||||
<< max_top << ',' << width << ',' << width_sd << ',' << bearing
|
||||
<< ',' << bearing_sd << ',' << advance << ',' << advance_sd << ' '
|
||||
<< this->get_script_from_script_id(this->get_script(id)) << ' '
|
||||
<< this->get_other_case(id) << ' ' << this->get_direction(id) << ' '
|
||||
<< this->get_mirror(id) << ' ' << this->get_normed_unichar(id) << "\t# "
|
||||
<< this->get_other_case(id) << ' ' << this->get_direction(id)
|
||||
<< ' ' << this->get_mirror(id) << ' '
|
||||
<< this->get_normed_unichar(id) << "\t# "
|
||||
<< this->debug_str(id).c_str() << '\n';
|
||||
str += stream.str().c_str();
|
||||
}
|
||||
@ -741,24 +766,28 @@ private:
|
||||
bool UNICHARSET::load_from_file(FILE *file, bool skip_fragments) {
|
||||
LocalFilePointer lfp(file);
|
||||
using namespace std::placeholders; // for _1, _2
|
||||
std::function<char *(char *, int)> fgets_cb = std::bind(&LocalFilePointer::fgets, &lfp, _1, _2);
|
||||
std::function<char *(char *, int)> fgets_cb =
|
||||
std::bind(&LocalFilePointer::fgets, &lfp, _1, _2);
|
||||
bool success = load_via_fgets(fgets_cb, skip_fragments);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool UNICHARSET::load_from_file(tesseract::TFile *file, bool skip_fragments) {
|
||||
using namespace std::placeholders; // for _1, _2
|
||||
std::function<char *(char *, int)> fgets_cb = std::bind(&tesseract::TFile::FGets, file, _1, _2);
|
||||
std::function<char *(char *, int)> fgets_cb =
|
||||
std::bind(&tesseract::TFile::FGets, file, _1, _2);
|
||||
bool success = load_via_fgets(fgets_cb, skip_fragments);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool UNICHARSET::load_via_fgets(std::function<char *(char *, int)> fgets_cb, bool skip_fragments) {
|
||||
bool UNICHARSET::load_via_fgets(
|
||||
const std::function<char *(char *, int)> &fgets_cb, bool skip_fragments) {
|
||||
int unicharset_size;
|
||||
char buffer[256];
|
||||
|
||||
this->clear();
|
||||
if (fgets_cb(buffer, sizeof(buffer)) == nullptr || sscanf(buffer, "%d", &unicharset_size) != 1) {
|
||||
if (fgets_cb(buffer, sizeof(buffer)) == nullptr ||
|
||||
sscanf(buffer, "%d", &unicharset_size) != 1) {
|
||||
return false;
|
||||
}
|
||||
for (UNICHAR_ID id = 0; id < unicharset_size; ++id) {
|
||||
@ -800,27 +829,30 @@ bool UNICHARSET::load_via_fgets(std::function<char *(char *, int)> fgets_cb, boo
|
||||
auto position = stream.tellg();
|
||||
stream.seekg(position);
|
||||
char c1, c2, c3, c4, c5, c6, c7, c8, c9;
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> width >>
|
||||
c5 >> width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> advance >> c9 >> advance_sd >>
|
||||
std::setw(63) >> script >> other_case >> direction >> mirror >> std::setw(63) >> normed;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || c5 != ',' ||
|
||||
c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
|
||||
max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >>
|
||||
bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >>
|
||||
script >> other_case >> direction >> mirror >> std::setw(63) >> normed;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' ||
|
||||
c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> width >>
|
||||
c5 >> width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> advance >> c9 >>
|
||||
advance_sd >> std::setw(63) >> script >> other_case >> direction >> mirror;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || c5 != ',' ||
|
||||
c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
|
||||
max_top >> c4 >> width >> c5 >> width_sd >> c6 >> bearing >> c7 >>
|
||||
bearing_sd >> c8 >> advance >> c9 >> advance_sd >> std::setw(63) >>
|
||||
script >> other_case >> direction >> mirror;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' ||
|
||||
c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >>
|
||||
std::setw(63) >> script >> other_case >> direction >> mirror;
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
|
||||
max_top >> std::setw(63) >> script >> other_case >> direction >>
|
||||
mirror;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >>
|
||||
std::setw(63) >> script >> other_case;
|
||||
stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >>
|
||||
max_top >> std::setw(63) >> script >> other_case;
|
||||
if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') {
|
||||
stream.clear();
|
||||
stream.seekg(position);
|
||||
@ -909,8 +941,9 @@ void UNICHARSET::post_load_setup() {
|
||||
|
||||
script_has_upper_lower_ = net_case_alphas > 0;
|
||||
script_has_xheight_ =
|
||||
script_has_upper_lower_ || (x_height_alphas > cap_height_alphas * kMinXHeightFraction &&
|
||||
cap_height_alphas > x_height_alphas * kMinCapHeightFraction);
|
||||
script_has_upper_lower_ ||
|
||||
(x_height_alphas > cap_height_alphas * kMinXHeightFraction &&
|
||||
cap_height_alphas > x_height_alphas * kMinCapHeightFraction);
|
||||
|
||||
null_sid_ = get_script_id_from_name(null_script);
|
||||
ASSERT_HOST(null_sid_ == 0);
|
||||
@ -954,7 +987,8 @@ bool UNICHARSET::major_right_to_left() const {
|
||||
if (dir == UNICHARSET::U_LEFT_TO_RIGHT) {
|
||||
ltr_count++;
|
||||
}
|
||||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT || dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
|
||||
if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
|
||||
dir == UNICHARSET::U_RIGHT_TO_LEFT_ARABIC ||
|
||||
dir == UNICHARSET::U_ARABIC_NUMBER) {
|
||||
rtl_count++;
|
||||
}
|
||||
@ -966,7 +1000,8 @@ bool UNICHARSET::major_right_to_left() const {
|
||||
// An empty or nullptr whitelist enables everything (minus any blacklist).
|
||||
// An empty or nullptr blacklist disables nothing.
|
||||
// An empty or nullptr blacklist has no effect.
|
||||
void UNICHARSET::set_black_and_whitelist(const char *blacklist, const char *whitelist,
|
||||
void UNICHARSET::set_black_and_whitelist(const char *blacklist,
|
||||
const char *whitelist,
|
||||
const char *unblacklist) {
|
||||
bool def_enabled = whitelist == nullptr || whitelist[0] == '\0';
|
||||
// Set everything to default
|
||||
@ -1037,7 +1072,8 @@ int UNICHARSET::add_script(const char *script) {
|
||||
assert(script_table_size_used == script_table_size_reserved);
|
||||
script_table_size_reserved += script_table_size_reserved;
|
||||
char **new_script_table = new char *[script_table_size_reserved];
|
||||
memcpy(new_script_table, script_table, script_table_size_used * sizeof(char *));
|
||||
memcpy(new_script_table, script_table,
|
||||
script_table_size_used * sizeof(char *));
|
||||
delete[] script_table;
|
||||
script_table = new_script_table;
|
||||
}
|
||||
@ -1048,7 +1084,8 @@ int UNICHARSET::add_script(const char *script) {
|
||||
|
||||
// Returns the string that represents a fragment
|
||||
// with the given unichar, pos and total.
|
||||
std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, bool natural) {
|
||||
std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total,
|
||||
bool natural) {
|
||||
if (total == 1) {
|
||||
return std::string(unichar);
|
||||
}
|
||||
@ -1056,8 +1093,8 @@ std::string CHAR_FRAGMENT::to_string(const char *unichar, int pos, int total, bo
|
||||
result += kSeparator;
|
||||
result += unichar;
|
||||
char buffer[kMaxLen];
|
||||
snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos, natural ? kNaturalFlag : kSeparator,
|
||||
total);
|
||||
snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos,
|
||||
natural ? kNaturalFlag : kSeparator, total);
|
||||
result += buffer;
|
||||
return result;
|
||||
}
|
||||
|
@ -85,7 +85,8 @@ public:
|
||||
|
||||
// Returns the string that represents a fragment
|
||||
// with the given unichar, pos and total.
|
||||
static std::string to_string(const char *unichar, int pos, int total, bool natural);
|
||||
static std::string to_string(const char *unichar, int pos, int total,
|
||||
bool natural);
|
||||
// Returns the string that represents this fragment.
|
||||
std::string to_string() const {
|
||||
return to_string(unichar, pos, total, natural);
|
||||
@ -93,19 +94,22 @@ public:
|
||||
|
||||
// Checks whether a fragment has the same unichar,
|
||||
// position and total as the given inputs.
|
||||
inline bool equals(const char *other_unichar, int other_pos, int other_total) const {
|
||||
return (strcmp(this->unichar, other_unichar) == 0 && this->pos == other_pos &&
|
||||
this->total == other_total);
|
||||
inline bool equals(const char *other_unichar, int other_pos,
|
||||
int other_total) const {
|
||||
return (strcmp(this->unichar, other_unichar) == 0 &&
|
||||
this->pos == other_pos && this->total == other_total);
|
||||
}
|
||||
inline bool equals(const CHAR_FRAGMENT *other) const {
|
||||
return this->equals(other->get_unichar(), other->get_pos(), other->get_total());
|
||||
return this->equals(other->get_unichar(), other->get_pos(),
|
||||
other->get_total());
|
||||
}
|
||||
|
||||
// Checks whether a given fragment is a continuation of this fragment.
|
||||
// Assumes that the given fragment pointer is not nullptr.
|
||||
inline bool is_continuation_of(const CHAR_FRAGMENT *fragment) const {
|
||||
return (strcmp(this->unichar, fragment->get_unichar()) == 0 &&
|
||||
this->total == fragment->get_total() && this->pos == fragment->get_pos() + 1);
|
||||
this->total == fragment->get_total() &&
|
||||
this->pos == fragment->get_pos() + 1);
|
||||
}
|
||||
|
||||
// Returns true if this fragment is a beginning fragment.
|
||||
@ -237,8 +241,10 @@ public:
|
||||
// WARNING: Caller must guarantee that str has already been cleaned of codes
|
||||
// that do not belong in the unicharset, or encoding may fail.
|
||||
// Use CleanupString to perform the cleaning.
|
||||
bool encode_string(const char *str, bool give_up_on_failure, std::vector<UNICHAR_ID> *encoding,
|
||||
std::vector<char> *lengths, unsigned *encoded_length) const;
|
||||
bool encode_string(const char *str, bool give_up_on_failure,
|
||||
std::vector<UNICHAR_ID> *encoding,
|
||||
std::vector<char> *lengths,
|
||||
unsigned *encoded_length) const;
|
||||
|
||||
// Return the unichar representation corresponding to the given UNICHAR_ID
|
||||
// within the UNICHARSET.
|
||||
@ -272,7 +278,8 @@ public:
|
||||
// TATWEEL characters are kept and n-grams are allowed. Otherwise TATWEEL
|
||||
// characters are ignored/skipped as if they don't exist and n-grams that
|
||||
// can already be encoded are not added.
|
||||
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style);
|
||||
void unichar_insert(const char *const unichar_repr,
|
||||
OldUncleanUnichars old_style);
|
||||
void unichar_insert(const char *const unichar_repr) {
|
||||
unichar_insert(unichar_repr, OldUncleanUnichars::kFalse);
|
||||
}
|
||||
@ -365,7 +372,8 @@ public:
|
||||
// Returns true if the operation is successful.
|
||||
bool save_to_file(FILE *file) const {
|
||||
std::string str;
|
||||
return save_to_string(str) && tesseract::Serialize(file, &str[0], str.length());
|
||||
return save_to_string(str) &&
|
||||
tesseract::Serialize(file, &str[0], str.length());
|
||||
}
|
||||
|
||||
bool save_to_file(tesseract::TFile *file) const {
|
||||
@ -575,8 +583,8 @@ public:
|
||||
// baseline-normalized coordinates, ie, where the baseline is
|
||||
// kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight
|
||||
// (See normalis.h for the definitions).
|
||||
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top,
|
||||
int *max_top) const {
|
||||
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom,
|
||||
int *min_top, int *max_top) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*min_bottom = *min_top = 0;
|
||||
*max_bottom = *max_top = 256; // kBlnCellHeight
|
||||
@ -588,16 +596,21 @@ public:
|
||||
*min_top = unichars[unichar_id].properties.min_top;
|
||||
*max_top = unichars[unichar_id].properties.max_top;
|
||||
}
|
||||
void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top,
|
||||
int max_top) {
|
||||
unichars[unichar_id].properties.min_bottom = ClipToRange<int>(min_bottom, 0, UINT8_MAX);
|
||||
unichars[unichar_id].properties.max_bottom = ClipToRange<int>(max_bottom, 0, UINT8_MAX);
|
||||
unichars[unichar_id].properties.min_top = ClipToRange<int>(min_top, 0, UINT8_MAX);
|
||||
unichars[unichar_id].properties.max_top = ClipToRange<int>(max_top, 0, UINT8_MAX);
|
||||
void set_top_bottom(UNICHAR_ID unichar_id, int min_bottom, int max_bottom,
|
||||
int min_top, int max_top) {
|
||||
unichars[unichar_id].properties.min_bottom =
|
||||
ClipToRange<int>(min_bottom, 0, UINT8_MAX);
|
||||
unichars[unichar_id].properties.max_bottom =
|
||||
ClipToRange<int>(max_bottom, 0, UINT8_MAX);
|
||||
unichars[unichar_id].properties.min_top =
|
||||
ClipToRange<int>(min_top, 0, UINT8_MAX);
|
||||
unichars[unichar_id].properties.max_top =
|
||||
ClipToRange<int>(max_top, 0, UINT8_MAX);
|
||||
}
|
||||
// Returns the width stats (as mean, sd) of the given unichar relative to the
|
||||
// median advance of all characters in the character set.
|
||||
void get_width_stats(UNICHAR_ID unichar_id, float *width, float *width_sd) const {
|
||||
void get_width_stats(UNICHAR_ID unichar_id, float *width,
|
||||
float *width_sd) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*width = 0.0f;
|
||||
*width_sd = 0.0f;
|
||||
@ -614,7 +627,8 @@ public:
|
||||
}
|
||||
// Returns the stats of the x-bearing (as mean, sd) of the given unichar
|
||||
// relative to the median advance of all characters in the character set.
|
||||
void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const {
|
||||
void get_bearing_stats(UNICHAR_ID unichar_id, float *bearing,
|
||||
float *bearing_sd) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*bearing = *bearing_sd = 0.0f;
|
||||
return;
|
||||
@ -623,13 +637,15 @@ public:
|
||||
*bearing = unichars[unichar_id].properties.bearing;
|
||||
*bearing_sd = unichars[unichar_id].properties.bearing_sd;
|
||||
}
|
||||
void set_bearing_stats(UNICHAR_ID unichar_id, float bearing, float bearing_sd) {
|
||||
void set_bearing_stats(UNICHAR_ID unichar_id, float bearing,
|
||||
float bearing_sd) {
|
||||
unichars[unichar_id].properties.bearing = bearing;
|
||||
unichars[unichar_id].properties.bearing_sd = bearing_sd;
|
||||
}
|
||||
// Returns the stats of the x-advance of the given unichar (as mean, sd)
|
||||
// relative to the median advance of all characters in the character set.
|
||||
void get_advance_stats(UNICHAR_ID unichar_id, float *advance, float *advance_sd) const {
|
||||
void get_advance_stats(UNICHAR_ID unichar_id, float *advance,
|
||||
float *advance_sd) const {
|
||||
if (INVALID_UNICHAR_ID == unichar_id) {
|
||||
*advance = *advance_sd = 0;
|
||||
return;
|
||||
@ -638,7 +654,8 @@ public:
|
||||
*advance = unichars[unichar_id].properties.advance;
|
||||
*advance_sd = unichars[unichar_id].properties.advance_sd;
|
||||
}
|
||||
void set_advance_stats(UNICHAR_ID unichar_id, float advance, float advance_sd) {
|
||||
void set_advance_stats(UNICHAR_ID unichar_id, float advance,
|
||||
float advance_sd) {
|
||||
unichars[unichar_id].properties.advance = advance;
|
||||
unichars[unichar_id].properties.advance_sd = advance_sd;
|
||||
}
|
||||
@ -654,8 +671,9 @@ public:
|
||||
return true;
|
||||
}
|
||||
int script_id = get_script(unichar_id);
|
||||
return script_id != han_sid_ && script_id != thai_sid_ && script_id != hangul_sid_ &&
|
||||
script_id != hiragana_sid_ && script_id != katakana_sid_;
|
||||
return script_id != han_sid_ && script_id != thai_sid_ &&
|
||||
script_id != hangul_sid_ && script_id != hiragana_sid_ &&
|
||||
script_id != katakana_sid_;
|
||||
}
|
||||
|
||||
// Return the script name of the given unichar.
|
||||
@ -738,7 +756,8 @@ public:
|
||||
// at these codes and they should not be used.
|
||||
bool has_special_codes() const {
|
||||
return get_fragment(UNICHAR_BROKEN) != nullptr &&
|
||||
strcmp(id_to_unichar(UNICHAR_BROKEN), kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0;
|
||||
strcmp(id_to_unichar(UNICHAR_BROKEN),
|
||||
kSpecialUnicharCodes[UNICHAR_BROKEN]) == 0;
|
||||
}
|
||||
|
||||
// Returns true if there are any repeated unicodes in the normalized
|
||||
@ -800,7 +819,8 @@ public:
|
||||
// Return a pointer to the CHAR_FRAGMENT class struct if the given
|
||||
// unichar representation represents a character fragment.
|
||||
const CHAR_FRAGMENT *get_fragment(const char *const unichar_repr) const {
|
||||
if (unichar_repr == nullptr || unichar_repr[0] == '\0' || !ids.contains(unichar_repr, false)) {
|
||||
if (unichar_repr == nullptr || unichar_repr[0] == '\0' ||
|
||||
!ids.contains(unichar_repr, false)) {
|
||||
return nullptr;
|
||||
}
|
||||
return get_fragment(unichar_to_id(unichar_repr));
|
||||
@ -1020,8 +1040,9 @@ private:
|
||||
// best_encoding contains the encoding that used the longest part of str.
|
||||
// best_lengths (may be null) contains the lengths of best_encoding.
|
||||
void encode_string(const char *str, int str_index, int str_length,
|
||||
std::vector<UNICHAR_ID> *encoding, std::vector<char> *lengths,
|
||||
unsigned *best_total_length, std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<UNICHAR_ID> *encoding,
|
||||
std::vector<char> *lengths, unsigned *best_total_length,
|
||||
std::vector<UNICHAR_ID> *best_encoding,
|
||||
std::vector<char> *best_lengths) const;
|
||||
|
||||
// Gets the properties for a grapheme string, combining properties for
|
||||
@ -1034,7 +1055,8 @@ private:
|
||||
// Load ourselves from a "file" where our only interface to the file is
|
||||
// an implementation of fgets(). This is the parsing primitive accessed by
|
||||
// the public routines load_from_file().
|
||||
bool load_via_fgets(std::function<char *(char *, int)> fgets_cb, bool skip_fragments);
|
||||
bool load_via_fgets(const std::function<char *(char *, int)> &fgets_cb,
|
||||
bool skip_fragments);
|
||||
|
||||
// List of mappings to make when ingesting strings from the outside.
|
||||
// The substitutions clean up text that should exists for rendering of
|
||||
|
@ -1782,13 +1782,13 @@ PROTO_ID Classify::MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATU
|
||||
Y2 = F2->Params[PicoFeatY];
|
||||
A2 = F2->Params[PicoFeatDir];
|
||||
|
||||
AngleDelta = fabs(A1 - A2);
|
||||
AngleDelta = std::fabs(A1 - A2);
|
||||
if (AngleDelta > 0.5) {
|
||||
AngleDelta = 1.0 - AngleDelta;
|
||||
}
|
||||
|
||||
if (AngleDelta > matcher_clustering_max_angle_delta || fabs(X1 - X2) > SegmentLength ||
|
||||
fabs(Y1 - Y2) > SegmentLength) {
|
||||
if (AngleDelta > matcher_clustering_max_angle_delta || std::fabs(X1 - X2) > SegmentLength ||
|
||||
std::fabs(Y1 - Y2) > SegmentLength) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1674,13 +1674,13 @@ float Mean(PROTOTYPE *Proto, uint16_t Dimension) {
|
||||
float StandardDeviation(PROTOTYPE *Proto, uint16_t Dimension) {
|
||||
switch (Proto->Style) {
|
||||
case spherical:
|
||||
return sqrt(Proto->Variance.Spherical);
|
||||
return std::sqrt(Proto->Variance.Spherical);
|
||||
case elliptical:
|
||||
return sqrt(Proto->Variance.Elliptical[Dimension]);
|
||||
return std::sqrt(Proto->Variance.Elliptical[Dimension]);
|
||||
case mixed:
|
||||
switch (Proto->Distrib[Dimension]) {
|
||||
case normal:
|
||||
return sqrt(Proto->Variance.Elliptical[Dimension]);
|
||||
return std::sqrt(Proto->Variance.Elliptical[Dimension]);
|
||||
case uniform:
|
||||
case D_random:
|
||||
return Proto->Variance.Elliptical[Dimension];
|
||||
@ -2268,7 +2268,7 @@ static PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, STATIST
|
||||
}
|
||||
|
||||
FillBuckets(NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]), Proto->Mean[i],
|
||||
sqrt(Proto->Variance.Elliptical[i]));
|
||||
std::sqrt(Proto->Variance.Elliptical[i]));
|
||||
if (DistributionOK(NormalBuckets)) {
|
||||
continue;
|
||||
}
|
||||
@ -2576,7 +2576,7 @@ static bool Independent(PARAM_DESC *ParamDesc, int16_t N, float *CoVariance, flo
|
||||
if ((*VARii == 0.0) || (*VARjj == 0.0)) {
|
||||
CorrelationCoeff = 0.0;
|
||||
} else {
|
||||
CorrelationCoeff = sqrt(sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj)));
|
||||
CorrelationCoeff = sqrt(std::sqrt(*CoVariance * *CoVariance / (*VARii * *VARjj)));
|
||||
}
|
||||
if (CorrelationCoeff > Independence) {
|
||||
return false;
|
||||
|
@ -207,7 +207,7 @@ PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) {
|
||||
case spherical:
|
||||
ReadNFloats(fp, 1, &(Proto->Variance.Spherical));
|
||||
Proto->Magnitude.Spherical = 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
|
||||
Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
|
||||
Proto->TotalMagnitude = std::pow(Proto->Magnitude.Spherical, static_cast<float>(N));
|
||||
Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
|
||||
Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
|
||||
Proto->Distrib.clear();
|
||||
|
@ -676,7 +676,7 @@ IntegerMatcher::IntegerMatcher(tesseract::IntParam *classify_debug_level)
|
||||
|
||||
if (kSEExponentialMultiplier > 0.0) {
|
||||
double scale =
|
||||
1.0 - exp(-kSEExponentialMultiplier) *
|
||||
1.0 - std::exp(-kSEExponentialMultiplier) *
|
||||
exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
|
||||
evidence *= ClipToRange(scale, 0.0, 1.0);
|
||||
}
|
||||
|
@ -365,14 +365,14 @@ void AddProtoToProtoPruner(PROTO_STRUCT *Proto, int ProtoId, INT_CLASS_STRUCT *C
|
||||
Length = Proto->Length;
|
||||
|
||||
X = Proto->X + X_SHIFT;
|
||||
Pad = std::max(fabs(cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
|
||||
fabs(sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
|
||||
Pad = std::max(fabs(std::cos(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
|
||||
fabs(std::sin(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
|
||||
|
||||
FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_X], Index, X, Pad, debug);
|
||||
|
||||
Y = Proto->Y + Y_SHIFT;
|
||||
Pad = std::max(fabs(sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
|
||||
fabs(cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
|
||||
Pad = std::max(fabs(std::sin(Angle)) * (Length / 2.0 + classify_pp_end_pad * GetPicoFeatureLength()),
|
||||
fabs(std::cos(Angle)) * (classify_pp_side_pad * GetPicoFeatureLength()));
|
||||
|
||||
FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug);
|
||||
} /* AddProtoToProtoPruner */
|
||||
@ -1388,8 +1388,8 @@ void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *
|
||||
if ((Angle > 0.0 && Angle < 0.25) || (Angle > 0.5 && Angle < 0.75)) {
|
||||
/* rising diagonal proto */
|
||||
Angle *= 2.0 * M_PI;
|
||||
Cos = fabs(cos(Angle));
|
||||
Sin = fabs(sin(Angle));
|
||||
Cos = fabs(std::cos(Angle));
|
||||
Sin = fabs(std::sin(Angle));
|
||||
|
||||
/* compute the positions of the corners of the acceptance region */
|
||||
Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
|
||||
@ -1438,8 +1438,8 @@ void InitTableFiller(float EndPad, float SidePad, float AnglePad, PROTO_STRUCT *
|
||||
} else {
|
||||
/* falling diagonal proto */
|
||||
Angle *= 2.0 * M_PI;
|
||||
Cos = fabs(cos(Angle));
|
||||
Sin = fabs(sin(Angle));
|
||||
Cos = fabs(std::cos(Angle));
|
||||
Sin = fabs(std::sin(Angle));
|
||||
|
||||
/* compute the positions of the corners of the acceptance region */
|
||||
Start.x = X - (HalfLength + EndPad) * Cos - SidePad * Sin;
|
||||
|
@ -33,7 +33,7 @@ namespace tesseract {
|
||||
/*-----------------------------------------------------------------------------
|
||||
Global Data Definitions and Declarations
|
||||
-----------------------------------------------------------------------------*/
|
||||
#define MINSEARCH -FLT_MAX
|
||||
#define MINSEARCH (-FLT_MAX)
|
||||
#define MAXSEARCH FLT_MAX
|
||||
|
||||
// Helper function to find the next essential dimension in a cycle.
|
||||
@ -398,7 +398,7 @@ float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
|
||||
}
|
||||
|
||||
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
|
||||
return sqrt(DistanceSquared(k, dim, p1, p2));
|
||||
return std::sqrt(DistanceSquared(k, dim, p1, p2));
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
@ -38,7 +38,8 @@ namespace tesseract {
|
||||
// instead of weak vtables in every compilation unit.
|
||||
Dawg::~Dawg() = default;
|
||||
|
||||
bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, bool requires_complete) const {
|
||||
bool Dawg::prefix_in_dawg(const WERD_CHOICE &word,
|
||||
bool requires_complete) const {
|
||||
if (word.empty()) {
|
||||
return !requires_complete;
|
||||
}
|
||||
@ -56,7 +57,8 @@ bool Dawg::prefix_in_dawg(const WERD_CHOICE &word, bool requires_complete) const
|
||||
}
|
||||
}
|
||||
// Now check the last character.
|
||||
return edge_char_of(node, word.unichar_id(end_index), requires_complete) != NO_EDGE;
|
||||
return edge_char_of(node, word.unichar_id(end_index), requires_complete) !=
|
||||
NO_EDGE;
|
||||
}
|
||||
|
||||
bool Dawg::word_in_dawg(const WERD_CHOICE &word) const {
|
||||
@ -84,7 +86,8 @@ int Dawg::check_for_words(const char *filename, const UNICHARSET &unicharset,
|
||||
chomp_string(string); // remove newline
|
||||
WERD_CHOICE word(string, unicharset);
|
||||
if (word.length() > 0 && !word.contains_unichar_id(INVALID_UNICHAR_ID)) {
|
||||
if (!match_words(&word, 0, 0, enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
|
||||
if (!match_words(&word, 0, 0,
|
||||
enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
|
||||
tprintf("Missing word: %s\n", string);
|
||||
++misses;
|
||||
}
|
||||
@ -106,21 +109,25 @@ void Dawg::iterate_words(const UNICHARSET &unicharset,
|
||||
iterate_words_rec(word, 0, cb);
|
||||
}
|
||||
|
||||
static void CallWithUTF8(std::function<void(const char *)> cb, const WERD_CHOICE *wc) {
|
||||
static void CallWithUTF8(const std::function<void(const char *)> &cb,
|
||||
const WERD_CHOICE *wc) {
|
||||
std::string s;
|
||||
wc->string_and_lengths(&s, nullptr);
|
||||
cb(s.c_str());
|
||||
}
|
||||
|
||||
void Dawg::iterate_words(const UNICHARSET &unicharset, std::function<void(const char *)> cb) const {
|
||||
void Dawg::iterate_words(const UNICHARSET &unicharset,
|
||||
const std::function<void(const char *)> &cb) const {
|
||||
using namespace std::placeholders; // for _1
|
||||
std::function<void(const WERD_CHOICE *)> shim(std::bind(CallWithUTF8, cb, _1));
|
||||
std::function<void(const WERD_CHOICE *)> shim(
|
||||
std::bind(CallWithUTF8, cb, _1));
|
||||
WERD_CHOICE word(&unicharset);
|
||||
iterate_words_rec(word, 0, shim);
|
||||
}
|
||||
|
||||
void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore,
|
||||
std::function<void(const WERD_CHOICE *)> cb) const {
|
||||
void Dawg::iterate_words_rec(
|
||||
const WERD_CHOICE &word_so_far, NODE_REF to_explore,
|
||||
const std::function<void(const WERD_CHOICE *)> &cb) const {
|
||||
NodeChildVector children;
|
||||
this->unichar_ids_of(to_explore, &children, false);
|
||||
for (auto &i : children) {
|
||||
@ -136,7 +143,8 @@ void Dawg::iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore
|
||||
}
|
||||
}
|
||||
|
||||
bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const {
|
||||
bool Dawg::match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node,
|
||||
UNICHAR_ID wildcard) const {
|
||||
if (wildcard != INVALID_UNICHAR_ID && word->unichar_id(index) == wildcard) {
|
||||
bool any_matched = false;
|
||||
NodeChildVector vec;
|
||||
@ -187,7 +195,8 @@ SquishedDawg::~SquishedDawg() {
|
||||
delete[] edges_;
|
||||
}
|
||||
|
||||
EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const {
|
||||
EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
|
||||
bool word_end) const {
|
||||
EDGE_REF edge = node;
|
||||
if (node == 0) { // binary search
|
||||
EDGE_REF start = 0;
|
||||
@ -195,7 +204,8 @@ EDGE_REF SquishedDawg::edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool w
|
||||
int compare;
|
||||
while (start <= end) {
|
||||
edge = (start + end) >> 1; // (start + end) / 2
|
||||
compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id, edges_[edge]);
|
||||
compare = given_greater_than_edge_rec(NO_EDGE, word_end, unichar_id,
|
||||
edges_[edge]);
|
||||
if (compare == 0) { // given == vec[k]
|
||||
return edge;
|
||||
} else if (compare == 1) { // given > vec[k]
|
||||
@ -258,8 +268,8 @@ void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const {
|
||||
eow = end_of_word(edge) ? eow_string : not_eow_string;
|
||||
|
||||
unichar_id = edge_letter(edge);
|
||||
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", edge,
|
||||
next_node(edge), unichar_id, direction, is_last, eow);
|
||||
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n",
|
||||
edge, next_node(edge), unichar_id, direction, is_last, eow);
|
||||
|
||||
if (edge - node > max_num_edges) {
|
||||
return;
|
||||
@ -273,8 +283,9 @@ void SquishedDawg::print_node(NODE_REF node, int max_num_edges) const {
|
||||
eow = end_of_word(edge) ? eow_string : not_eow_string;
|
||||
|
||||
unichar_id = edge_letter(edge);
|
||||
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = %d, %s %s %s\n", edge,
|
||||
next_node(edge), unichar_id, direction, is_last, eow);
|
||||
tprintf(REFFORMAT " : next = " REFFORMAT
|
||||
", unichar_id = %d, %s %s %s\n",
|
||||
edge, next_node(edge), unichar_id, direction, is_last, eow);
|
||||
|
||||
if (edge - node > MAX_NODE_EDGES_DISPLAY) {
|
||||
return;
|
||||
@ -291,9 +302,11 @@ void SquishedDawg::print_edge(EDGE_REF edge) const {
|
||||
if (edge == NO_EDGE) {
|
||||
tprintf("NO_EDGE\n");
|
||||
} else {
|
||||
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n", edge,
|
||||
next_node(edge), edge_letter(edge), (forward_edge(edge) ? "FORWARD" : " "),
|
||||
(last_edge(edge) ? "LAST" : " "), (end_of_word(edge) ? "EOW" : ""));
|
||||
tprintf(REFFORMAT " : next = " REFFORMAT ", unichar_id = '%d', %s %s %s\n",
|
||||
edge, next_node(edge), edge_letter(edge),
|
||||
(forward_edge(edge) ? "FORWARD" : " "),
|
||||
(last_edge(edge) ? "LAST" : " "),
|
||||
(end_of_word(edge) ? "EOW" : ""));
|
||||
}
|
||||
}
|
||||
|
||||
@ -328,8 +341,8 @@ bool SquishedDawg::read_squished_dawg(TFile *file) {
|
||||
return false;
|
||||
}
|
||||
if (debug_level_ > 2) {
|
||||
tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n", type_, lang_.c_str(),
|
||||
perm_, unicharset_size_, num_edges_);
|
||||
tprintf("type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n",
|
||||
type_, lang_.c_str(), perm_, unicharset_size_, num_edges_);
|
||||
for (EDGE_REF edge = 0; edge < num_edges_; ++edge) {
|
||||
print_edge(edge);
|
||||
}
|
||||
@ -337,7 +350,8 @@ bool SquishedDawg::read_squished_dawg(TFile *file) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<EDGE_REF[]> SquishedDawg::build_node_map(int32_t *num_nodes) const {
|
||||
std::unique_ptr<EDGE_REF[]> SquishedDawg::build_node_map(
|
||||
int32_t *num_nodes) const {
|
||||
EDGE_REF edge;
|
||||
std::unique_ptr<EDGE_REF[]> node_map(new EDGE_REF[num_edges_]);
|
||||
int32_t node_counter;
|
||||
|
@ -147,16 +147,19 @@ public:
|
||||
|
||||
// For each word in the Dawg, call the given (permanent) callback with the
|
||||
// text (UTF-8) version of the word.
|
||||
void iterate_words(const UNICHARSET &unicharset, std::function<void(const char *)> cb) const;
|
||||
void iterate_words(const UNICHARSET &unicharset,
|
||||
const std::function<void(const char *)> &cb) const;
|
||||
|
||||
// Pure virtual function that should be implemented by the derived classes.
|
||||
|
||||
/// Returns the edge that corresponds to the letter out of this node.
|
||||
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const = 0;
|
||||
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
|
||||
bool word_end) const = 0;
|
||||
|
||||
/// Fills the given NodeChildVector with all the unichar ids (and the
|
||||
/// corresponding EDGE_REFs) for which there is an edge out of this node.
|
||||
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const = 0;
|
||||
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
|
||||
bool word_end) const = 0;
|
||||
|
||||
/// Returns the next node visited by following the edge
|
||||
/// indicated by the given EDGE_REF.
|
||||
@ -175,7 +178,8 @@ public:
|
||||
|
||||
/// Fills vec with unichar ids that represent the character classes
|
||||
/// of the given unichar_id.
|
||||
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset,
|
||||
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
|
||||
const UNICHARSET &unicharset,
|
||||
std::vector<UNICHAR_ID> *vec) const {
|
||||
(void)unichar_id;
|
||||
(void)unicharset;
|
||||
@ -194,8 +198,13 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
Dawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level)
|
||||
: lang_(lang), type_(type), perm_(perm), unicharset_size_(0), debug_level_(debug_level) {}
|
||||
Dawg(DawgType type, const std::string &lang, PermuterType perm,
|
||||
int debug_level)
|
||||
: lang_(lang),
|
||||
type_(type),
|
||||
perm_(perm),
|
||||
unicharset_size_(0),
|
||||
debug_level_(debug_level) {}
|
||||
|
||||
/// Returns the next node visited by following this edge.
|
||||
inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
|
||||
@ -207,14 +216,16 @@ protected:
|
||||
}
|
||||
/// Returns the direction flag of this edge.
|
||||
inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
|
||||
return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE : FORWARD_EDGE;
|
||||
return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ? BACKWARD_EDGE
|
||||
: FORWARD_EDGE;
|
||||
}
|
||||
/// Returns true if this edge marks the end of a word.
|
||||
inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
|
||||
return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
|
||||
}
|
||||
/// Returns UNICHAR_ID recorded in this edge.
|
||||
inline UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const {
|
||||
inline UNICHAR_ID unichar_id_from_edge_rec(
|
||||
const EDGE_RECORD &edge_rec) const {
|
||||
return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
|
||||
}
|
||||
/// Sets the next node link for this edge in the Dawg.
|
||||
@ -233,13 +244,14 @@ protected:
|
||||
/// checked are the same)
|
||||
/// 0 if edge_rec_match() returns true
|
||||
/// -1 otherwise
|
||||
inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id,
|
||||
inline int given_greater_than_edge_rec(NODE_REF next_node, bool word_end,
|
||||
UNICHAR_ID unichar_id,
|
||||
const EDGE_RECORD &edge_rec) const {
|
||||
UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
|
||||
NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
|
||||
bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
|
||||
if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node, curr_word_end,
|
||||
curr_unichar_id)) {
|
||||
if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
|
||||
curr_word_end, curr_unichar_id)) {
|
||||
return 0;
|
||||
}
|
||||
if (unichar_id > curr_unichar_id) {
|
||||
@ -260,8 +272,9 @@ protected:
|
||||
/// Returns true if all the values are equal (any value matches
|
||||
/// next_node if next_node == NO_EDGE, any value matches word_end
|
||||
/// if word_end is false).
|
||||
inline bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id,
|
||||
NODE_REF other_next_node, bool other_word_end,
|
||||
inline bool edge_rec_match(NODE_REF next_node, bool word_end,
|
||||
UNICHAR_ID unichar_id, NODE_REF other_next_node,
|
||||
bool other_word_end,
|
||||
UNICHAR_ID other_unichar_id) const {
|
||||
return ((unichar_id == other_unichar_id) &&
|
||||
(next_node == NO_EDGE || next_node == other_next_node) &&
|
||||
@ -277,11 +290,13 @@ protected:
|
||||
/// the *'s in this string are interpreted as wildcards.
|
||||
/// WERD_CHOICE param is not passed by const so that wildcard searches
|
||||
/// can modify it and work without having to copy WERD_CHOICEs.
|
||||
bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node, UNICHAR_ID wildcard) const;
|
||||
bool match_words(WERD_CHOICE *word, uint32_t index, NODE_REF node,
|
||||
UNICHAR_ID wildcard) const;
|
||||
|
||||
// Recursively iterate over all words in a dawg (see public iterate_words).
|
||||
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore,
|
||||
std::function<void(const WERD_CHOICE *)> cb) const;
|
||||
void iterate_words_rec(
|
||||
const WERD_CHOICE &word_so_far, NODE_REF to_explore,
|
||||
const std::function<void(const WERD_CHOICE *)> &cb) const;
|
||||
|
||||
// Member Variables.
|
||||
std::string lang_;
|
||||
@ -339,12 +354,13 @@ protected:
|
||||
// We're back in the punctuation dawg. Continuing there is the only option.
|
||||
struct DawgPosition {
|
||||
DawgPosition() = default;
|
||||
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
|
||||
: dawg_ref(dawgref)
|
||||
, punc_ref(puncref)
|
||||
, dawg_index(dawg_idx)
|
||||
, punc_index(punc_idx)
|
||||
, back_to_punc(backtopunc) {}
|
||||
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref,
|
||||
bool backtopunc)
|
||||
: dawg_ref(dawgref),
|
||||
punc_ref(puncref),
|
||||
dawg_index(dawg_idx),
|
||||
punc_index(punc_idx),
|
||||
back_to_punc(backtopunc) {}
|
||||
bool operator==(const DawgPosition &other) {
|
||||
return dawg_index == other.dawg_index && dawg_ref == other.dawg_ref &&
|
||||
punc_index == other.punc_index && punc_ref == other.punc_ref &&
|
||||
@ -364,7 +380,8 @@ public:
|
||||
/// Adds an entry for the given dawg_index with the given node to the vec.
|
||||
/// Returns false if the same entry already exists in the vector,
|
||||
/// true otherwise.
|
||||
inline bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg) {
|
||||
inline bool add_unique(const DawgPosition &new_pos, bool debug,
|
||||
const char *debug_msg) {
|
||||
for (auto position : *this) {
|
||||
if (position == new_pos) {
|
||||
return false;
|
||||
@ -372,8 +389,9 @@ public:
|
||||
}
|
||||
push_back(new_pos);
|
||||
if (debug) {
|
||||
tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg, new_pos.dawg_index,
|
||||
new_pos.dawg_ref, new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
|
||||
tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n", debug_msg,
|
||||
new_pos.dawg_index, new_pos.dawg_ref, new_pos.punc_ref,
|
||||
new_pos.back_to_punc ? " returned" : "");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -389,19 +407,23 @@ public:
|
||||
//
|
||||
class TESS_API SquishedDawg : public Dawg {
|
||||
public:
|
||||
SquishedDawg(DawgType type, const std::string &lang, PermuterType perm, int debug_level)
|
||||
: Dawg(type, lang, perm, debug_level) {}
|
||||
SquishedDawg(const char *filename, DawgType type, const std::string &lang, PermuterType perm,
|
||||
SquishedDawg(DawgType type, const std::string &lang, PermuterType perm,
|
||||
int debug_level)
|
||||
: Dawg(type, lang, perm, debug_level) {}
|
||||
SquishedDawg(const char *filename, DawgType type, const std::string &lang,
|
||||
PermuterType perm, int debug_level)
|
||||
: Dawg(type, lang, perm, debug_level) {
|
||||
TFile file;
|
||||
ASSERT_HOST(file.Open(filename, nullptr));
|
||||
ASSERT_HOST(read_squished_dawg(&file));
|
||||
num_forward_edges_in_node0 = num_forward_edges(0);
|
||||
}
|
||||
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const std::string &lang,
|
||||
PermuterType perm, int unicharset_size, int debug_level)
|
||||
: Dawg(type, lang, perm, debug_level), edges_(edges), num_edges_(num_edges) {
|
||||
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
|
||||
const std::string &lang, PermuterType perm, int unicharset_size,
|
||||
int debug_level)
|
||||
: Dawg(type, lang, perm, debug_level),
|
||||
edges_(edges),
|
||||
num_edges_(num_edges) {
|
||||
init(unicharset_size);
|
||||
num_forward_edges_in_node0 = num_forward_edges(0);
|
||||
if (debug_level > 3) {
|
||||
@ -424,11 +446,13 @@ public:
|
||||
}
|
||||
|
||||
/// Returns the edge that corresponds to the letter out of this node.
|
||||
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const override;
|
||||
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
|
||||
bool word_end) const override;
|
||||
|
||||
/// Fills the given NodeChildVector with all the unichar ids (and the
|
||||
/// corresponding EDGE_REFs) for which there is an edge out of this node.
|
||||
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const override {
|
||||
void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
|
||||
bool word_end) const override {
|
||||
EDGE_REF edge = node;
|
||||
if (!edge_occupied(edge) || edge == NO_EDGE) {
|
||||
return;
|
||||
@ -502,7 +526,8 @@ private:
|
||||
}
|
||||
/// Returns true if this edge is in the forward direction.
|
||||
inline bool forward_edge(EDGE_REF edge_ref) const {
|
||||
return (edge_occupied(edge_ref) && (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
|
||||
return (edge_occupied(edge_ref) &&
|
||||
(FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
|
||||
}
|
||||
/// Returns true if this edge is in the backward direction.
|
||||
inline bool backward_edge(EDGE_REF edge_ref) const {
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "networkio.h"
|
||||
#include <cfloat> // for FLT_MAX
|
||||
#include <cmath>
|
||||
|
||||
#include <allheaders.h>
|
||||
#include "functions.h"
|
||||
@ -28,7 +29,7 @@ namespace tesseract {
|
||||
// Minimum value to output for certainty.
|
||||
const float kMinCertainty = -20.0f;
|
||||
// Probability corresponding to kMinCertainty.
|
||||
const float kMinProb = exp(kMinCertainty);
|
||||
const float kMinProb = std::exp(kMinCertainty);
|
||||
|
||||
// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
|
||||
void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
|
||||
@ -356,7 +357,7 @@ Image NetworkIO::ToPix() const {
|
||||
} else if (num_features > 3) {
|
||||
// More than 3 features use false yellow/blue color, assuming a signed
|
||||
// input in the range [-1,1].
|
||||
red = ClipToRange<int>(IntCastRounded(fabs(pixel) * 255), 0, 255);
|
||||
red = ClipToRange<int>(IntCastRounded(std::fabs(pixel) * 255), 0, 255);
|
||||
if (pixel >= 0) {
|
||||
green = red;
|
||||
blue = 0;
|
||||
@ -586,7 +587,7 @@ void NetworkIO::EnsureBestLabel(int t, int label) {
|
||||
// Helper function converts prob to certainty taking the minimum into account.
|
||||
/* static */
|
||||
float NetworkIO::ProbToCertainty(float prob) {
|
||||
return prob > kMinProb ? log(prob) : kMinCertainty;
|
||||
return prob > kMinProb ? std::log(prob) : kMinCertainty;
|
||||
}
|
||||
|
||||
// Returns true if there is any bad value that is suspiciously like a GT
|
||||
@ -807,7 +808,7 @@ void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const Network
|
||||
// Reconstruct the target from the delta.
|
||||
float comb_target = delta_line[i] + output;
|
||||
comb_line[i] = comb_target - comb_line[i];
|
||||
float base_delta = fabs(comb_target - base_line[i]);
|
||||
float base_delta = std::fabs(comb_target - base_line[i]);
|
||||
if (base_delta > max_base_delta) {
|
||||
max_base_delta = base_delta;
|
||||
}
|
||||
|
@ -41,11 +41,13 @@ const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
|
||||
static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
|
||||
|
||||
// Prints debug details of the node.
|
||||
void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const {
|
||||
void RecodeNode::Print(int null_char, const UNICHARSET &unicharset,
|
||||
int depth) const {
|
||||
if (code == null_char) {
|
||||
tprintf("null_char");
|
||||
} else {
|
||||
tprintf("label=%d, uid=%d=%s", code, unichar_id, unicharset.debug_str(unichar_id).c_str());
|
||||
tprintf("label=%d, uid=%d=%s", code, unichar_id,
|
||||
unicharset.debug_str(unichar_id).c_str());
|
||||
}
|
||||
tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%" PRIx64, score, certainty,
|
||||
start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
|
||||
@ -59,16 +61,16 @@ void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) c
|
||||
}
|
||||
|
||||
// Borrows the pointer, which is expected to survive until *this is deleted.
|
||||
RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text,
|
||||
Dict *dict)
|
||||
: recoder_(recoder)
|
||||
, beam_size_(0)
|
||||
, top_code_(-1)
|
||||
, second_code_(-1)
|
||||
, dict_(dict)
|
||||
, space_delimited_(true)
|
||||
, is_simple_text_(simple_text)
|
||||
, null_char_(null_char) {
|
||||
RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder,
|
||||
int null_char, bool simple_text, Dict *dict)
|
||||
: recoder_(recoder),
|
||||
beam_size_(0),
|
||||
top_code_(-1),
|
||||
second_code_(-1),
|
||||
dict_(dict),
|
||||
space_delimited_(true),
|
||||
is_simple_text_(simple_text),
|
||||
null_char_(null_char) {
|
||||
if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
|
||||
space_delimited_ = false;
|
||||
}
|
||||
@ -84,9 +86,9 @@ RecodeBeamSearch::~RecodeBeamSearch() {
|
||||
}
|
||||
|
||||
// Decodes the set of network outputs, storing the lattice internally.
|
||||
void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double cert_offset,
|
||||
double worst_dict_cert, const UNICHARSET *charset,
|
||||
int lstm_choice_mode) {
|
||||
void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
const UNICHARSET *charset, int lstm_choice_mode) {
|
||||
beam_size_ = 0;
|
||||
int width = output.Width();
|
||||
if (lstm_choice_mode) {
|
||||
@ -94,14 +96,16 @@ void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double
|
||||
}
|
||||
for (int t = 0; t < width; ++t) {
|
||||
ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
|
||||
DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
|
||||
DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
|
||||
charset);
|
||||
if (lstm_choice_mode) {
|
||||
SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output,
|
||||
double dict_ratio, double cert_offset,
|
||||
double worst_dict_cert,
|
||||
const UNICHARSET *charset) {
|
||||
beam_size_ = 0;
|
||||
int width = output.dim1();
|
||||
@ -111,9 +115,9 @@ void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output, double dict
|
||||
}
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
const UNICHARSET *charset, int lstm_choice_mode) {
|
||||
void RecodeBeamSearch::DecodeSecondaryBeams(
|
||||
const NetworkIO &output, double dict_ratio, double cert_offset,
|
||||
double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode) {
|
||||
for (auto data : secondary_beam_) {
|
||||
delete data;
|
||||
}
|
||||
@ -128,14 +132,17 @@ void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict
|
||||
t >= character_boundaries_[bucketNumber + 1]) {
|
||||
++bucketNumber;
|
||||
}
|
||||
ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t), output.NumFeatures(),
|
||||
kBeamWidths[0]);
|
||||
DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
|
||||
ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t),
|
||||
output.NumFeatures(), kBeamWidths[0]);
|
||||
DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset,
|
||||
worst_dict_cert, charset);
|
||||
}
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outputs,
|
||||
const UNICHARSET *charset, int xCoord) {
|
||||
void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs,
|
||||
int num_outputs,
|
||||
const UNICHARSET *charset,
|
||||
int xCoord) {
|
||||
std::vector<std::pair<const char *, float>> choices;
|
||||
for (int i = 0; i < num_outputs; ++i) {
|
||||
if (outputs[i] >= 0.01f) {
|
||||
@ -153,7 +160,8 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp
|
||||
while (choices.size() > pos && choices[pos].second > outputs[i]) {
|
||||
pos++;
|
||||
}
|
||||
choices.insert(choices.begin() + pos, std::pair<const char *, float>(character, outputs[i]));
|
||||
choices.insert(choices.begin() + pos,
|
||||
std::pair<const char *, float>(character, outputs[i]));
|
||||
}
|
||||
}
|
||||
timesteps.push_back(choices);
|
||||
@ -162,7 +170,8 @@ void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outp
|
||||
void RecodeBeamSearch::segmentTimestepsByCharacters() {
|
||||
for (unsigned i = 1; i < character_boundaries_.size(); ++i) {
|
||||
std::vector<std::vector<std::pair<const char *, float>>> segment;
|
||||
for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; ++j) {
|
||||
for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i];
|
||||
++j) {
|
||||
segment.push_back(timesteps[j]);
|
||||
}
|
||||
segmentedTimesteps.push_back(segment);
|
||||
@ -170,7 +179,8 @@ void RecodeBeamSearch::segmentTimestepsByCharacters() {
|
||||
}
|
||||
std::vector<std::vector<std::pair<const char *, float>>>
|
||||
RecodeBeamSearch::combineSegmentedTimesteps(
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *segmentedTimesteps) {
|
||||
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
|
||||
*segmentedTimesteps) {
|
||||
std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
|
||||
for (auto &segmentedTimestep : *segmentedTimesteps) {
|
||||
for (auto &j : segmentedTimestep) {
|
||||
@ -180,8 +190,10 @@ RecodeBeamSearch::combineSegmentedTimesteps(
|
||||
return combined_timesteps;
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
|
||||
std::vector<int> *char_bounds_, int maxWidth) {
|
||||
void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts,
|
||||
std::vector<int> *ends,
|
||||
std::vector<int> *char_bounds_,
|
||||
int maxWidth) {
|
||||
char_bounds_->push_back(0);
|
||||
for (unsigned i = 0; i < ends->size(); ++i) {
|
||||
int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
|
||||
@ -192,8 +204,8 @@ void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::ve
|
||||
}
|
||||
|
||||
// Returns the best path as labels/scores/xcoords similar to simple CTC.
|
||||
void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector<int> *labels,
|
||||
std::vector<int> *xcoords) const {
|
||||
void RecodeBeamSearch::ExtractBestPathAsLabels(
|
||||
std::vector<int> *labels, std::vector<int> *xcoords) const {
|
||||
labels->clear();
|
||||
xcoords->clear();
|
||||
std::vector<const RecodeNode *> best_nodes;
|
||||
@ -215,22 +227,23 @@ void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector<int> *labels,
|
||||
|
||||
// Returns the best path as unichar-ids/certs/ratings/xcoords skipping
|
||||
// duplicates, nulls and intermediate parts.
|
||||
void RecodeBeamSearch::ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset,
|
||||
std::vector<int> *unichar_ids,
|
||||
std::vector<float> *certs,
|
||||
std::vector<float> *ratings,
|
||||
std::vector<int> *xcoords) const {
|
||||
void RecodeBeamSearch::ExtractBestPathAsUnicharIds(
|
||||
bool debug, const UNICHARSET *unicharset, std::vector<int> *unichar_ids,
|
||||
std::vector<float> *certs, std::vector<float> *ratings,
|
||||
std::vector<int> *xcoords) const {
|
||||
std::vector<const RecodeNode *> best_nodes;
|
||||
ExtractBestPaths(&best_nodes, nullptr);
|
||||
ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
|
||||
if (debug) {
|
||||
DebugPath(unicharset, best_nodes);
|
||||
DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, *xcoords);
|
||||
DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
|
||||
*xcoords);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the best path as a set of WERD_RES.
|
||||
void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug,
|
||||
void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box,
|
||||
float scale_factor, bool debug,
|
||||
const UNICHARSET *unicharset,
|
||||
PointerVector<WERD_RES> *words,
|
||||
int lstm_choice_mode) {
|
||||
@ -245,9 +258,11 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_
|
||||
ExtractBestPaths(&best_nodes, &second_nodes);
|
||||
if (debug) {
|
||||
DebugPath(unicharset, best_nodes);
|
||||
ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, &xcoords);
|
||||
ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
|
||||
&xcoords);
|
||||
tprintf("\nSecond choice path:\n");
|
||||
DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, xcoords);
|
||||
DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
|
||||
xcoords);
|
||||
}
|
||||
// If lstm choice mode is required in granularity level 2, it stores the x
|
||||
// Coordinates of every chosen character, to match the alternative choices to
|
||||
@ -256,7 +271,8 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_
|
||||
&character_boundaries_);
|
||||
int num_ids = unichar_ids.size();
|
||||
if (debug) {
|
||||
DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, xcoords);
|
||||
DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
|
||||
xcoords);
|
||||
}
|
||||
// Convert labels to unichar-ids.
|
||||
int word_end = 0;
|
||||
@ -283,16 +299,19 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_
|
||||
if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
|
||||
space_cert = certs[word_end];
|
||||
}
|
||||
bool leading_space = word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
|
||||
bool leading_space =
|
||||
word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
|
||||
// Create a WERD_RES for the output word.
|
||||
WERD_RES *word_res =
|
||||
InitializeWord(leading_space, line_box, word_start, word_end,
|
||||
std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
|
||||
std::min(space_cert, prev_space_cert), unicharset,
|
||||
xcoords, scale_factor);
|
||||
for (int i = word_start; i < word_end; ++i) {
|
||||
auto *choices = new BLOB_CHOICE_LIST;
|
||||
BLOB_CHOICE_IT bc_it(choices);
|
||||
auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
|
||||
static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
|
||||
auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1,
|
||||
1.0f, static_cast<float>(INT16_MAX), 0.0f,
|
||||
BCC_STATIC_CLASSIFIER);
|
||||
int col = i - word_start;
|
||||
choice->set_matrix_cell(col, col);
|
||||
bc_it.add_after_then_move(choice);
|
||||
@ -314,7 +333,8 @@ struct greater_than {
|
||||
}
|
||||
};
|
||||
|
||||
void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset,
|
||||
void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs,
|
||||
const UNICHARSET *charset,
|
||||
bool secondary) const {
|
||||
std::vector<std::vector<const RecodeNode *>> topology;
|
||||
std::unordered_set<const RecodeNode *> visited;
|
||||
@ -340,7 +360,7 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *
|
||||
}
|
||||
int ct = 0;
|
||||
unsigned cb = 1;
|
||||
for (std::vector<const RecodeNode *> layer : topology) {
|
||||
for (const std::vector<const RecodeNode *> &layer : topology) {
|
||||
if (cb >= character_boundaries_.size()) {
|
||||
break;
|
||||
}
|
||||
@ -380,7 +400,8 @@ void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *
|
||||
prevCode = " ";
|
||||
}
|
||||
if (uids) {
|
||||
tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode, node->score);
|
||||
tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode,
|
||||
node->score);
|
||||
} else {
|
||||
tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
|
||||
}
|
||||
@ -397,7 +418,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
}
|
||||
// For the first iteration the original beam is analyzed. After that a
|
||||
// new beam is calculated based on the results from the original beam.
|
||||
std::vector<RecodeBeam *> ¤tBeam = secondary_beam_.empty() ? beam_ : secondary_beam_;
|
||||
std::vector<RecodeBeam *> ¤tBeam =
|
||||
secondary_beam_.empty() ? beam_ : secondary_beam_;
|
||||
character_boundaries_[0] = 0;
|
||||
for (unsigned j = 1; j < character_boundaries_.size(); ++j) {
|
||||
std::vector<int> unichar_ids;
|
||||
@ -406,7 +428,7 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
std::vector<int> xcoords;
|
||||
int backpath = character_boundaries_[j] - character_boundaries_[j - 1];
|
||||
std::vector<tesseract::RecodePair> &heaps =
|
||||
currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
|
||||
currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
|
||||
std::vector<const RecodeNode *> best_nodes;
|
||||
std::vector<const RecodeNode *> best;
|
||||
// Scan the segmented node chain for valid unichar ids.
|
||||
@ -415,7 +437,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
int backcounter = 0;
|
||||
const RecodeNode *node = &entry.data();
|
||||
while (node != nullptr && backcounter < backpath) {
|
||||
if (node->code != null_char_ && node->unichar_id != INVALID_UNICHAR_ID) {
|
||||
if (node->code != null_char_ &&
|
||||
node->unichar_id != INVALID_UNICHAR_ID) {
|
||||
validChar = true;
|
||||
break;
|
||||
}
|
||||
@ -430,7 +453,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
if (!best.empty()) {
|
||||
std::sort(best.begin(), best.end(), greater_than());
|
||||
ExtractPath(best[0], &best_nodes, backpath);
|
||||
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords);
|
||||
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
|
||||
&xcoords);
|
||||
}
|
||||
if (!unichar_ids.empty()) {
|
||||
int bestPos = 0;
|
||||
@ -466,7 +490,8 @@ void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
|
||||
int id = unichar_ids[bestPos];
|
||||
const char *result = unicharset->id_to_unichar_ext(id);
|
||||
float rating = ratings[bestPos];
|
||||
ctc_choices[j - 1].push_back(std::pair<const char *, float>(result, rating));
|
||||
ctc_choices[j - 1].push_back(
|
||||
std::pair<const char *, float>(result, rating));
|
||||
} else {
|
||||
std::vector<std::pair<const char *, float>> choice;
|
||||
int id = unichar_ids[bestPos];
|
||||
@ -504,7 +529,8 @@ void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const {
|
||||
continue;
|
||||
}
|
||||
// Print all the best scoring nodes for each unichar found.
|
||||
tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", kNodeContNames[c]);
|
||||
tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
|
||||
kNodeContNames[c]);
|
||||
DebugBeamPos(unicharset, beam_[p]->beams_[index]);
|
||||
}
|
||||
}
|
||||
@ -512,7 +538,8 @@ void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const {
|
||||
}
|
||||
|
||||
// Generates debug output of the content of a single beam position.
|
||||
void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHeap &heap) const {
|
||||
void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset,
|
||||
const RecodeHeap &heap) const {
|
||||
std::vector<const RecodeNode *> unichar_bests(unicharset.size());
|
||||
const RecodeNode *null_best = nullptr;
|
||||
int heap_size = heap.size();
|
||||
@ -543,12 +570,11 @@ void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHe
|
||||
// Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
|
||||
// duplicates, nulls and intermediate parts.
|
||||
/* static */
|
||||
void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNode *> &best_nodes,
|
||||
std::vector<int> *unichar_ids,
|
||||
std::vector<float> *certs,
|
||||
std::vector<float> *ratings,
|
||||
std::vector<int> *xcoords,
|
||||
std::vector<int> *character_boundaries) {
|
||||
void RecodeBeamSearch::ExtractPathAsUnicharIds(
|
||||
const std::vector<const RecodeNode *> &best_nodes,
|
||||
std::vector<int> *unichar_ids, std::vector<float> *certs,
|
||||
std::vector<float> *ratings, std::vector<int> *xcoords,
|
||||
std::vector<int> *character_boundaries) {
|
||||
unichar_ids->clear();
|
||||
certs->clear();
|
||||
ratings->clear();
|
||||
@ -571,7 +597,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
|
||||
starts.push_back(t);
|
||||
if (t < width) {
|
||||
int unichar_id = best_nodes[t]->unichar_id;
|
||||
if (unichar_id == UNICHAR_SPACE && !certs->empty() && best_nodes[t]->permuter != NO_PERM) {
|
||||
if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
|
||||
best_nodes[t]->permuter != NO_PERM) {
|
||||
// All the rating and certainty go on the previous character except
|
||||
// for the space itself.
|
||||
if (certainty < certs->back()) {
|
||||
@ -587,8 +614,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
|
||||
double cert = best_nodes[t++]->certainty;
|
||||
// Special-case NO-PERM space to forget the certainty of the previous
|
||||
// nulls. See long comment in ContinueContext.
|
||||
if (cert < certainty ||
|
||||
(unichar_id == UNICHAR_SPACE && best_nodes[t - 1]->permuter == NO_PERM)) {
|
||||
if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
|
||||
best_nodes[t - 1]->permuter == NO_PERM)) {
|
||||
certainty = cert;
|
||||
}
|
||||
rating -= cert;
|
||||
@ -612,19 +639,23 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
|
||||
|
||||
// Sets up a word with the ratings matrix and fake blobs with boxes in the
|
||||
// right places.
|
||||
WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_box, int word_start,
|
||||
WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space,
|
||||
const TBOX &line_box, int word_start,
|
||||
int word_end, float space_certainty,
|
||||
const UNICHARSET *unicharset,
|
||||
const std::vector<int> &xcoords, float scale_factor) {
|
||||
const std::vector<int> &xcoords,
|
||||
float scale_factor) {
|
||||
// Make a fake blob for each non-zero label.
|
||||
C_BLOB_LIST blobs;
|
||||
C_BLOB_IT b_it(&blobs);
|
||||
for (int i = word_start; i < word_end; ++i) {
|
||||
if (static_cast<unsigned>(i + 1) < character_boundaries_.size()) {
|
||||
TBOX box(static_cast<int16_t>(std::floor(character_boundaries_[i] * scale_factor)) +
|
||||
TBOX box(static_cast<int16_t>(
|
||||
std::floor(character_boundaries_[i] * scale_factor)) +
|
||||
line_box.left(),
|
||||
line_box.bottom(),
|
||||
static_cast<int16_t>(std::ceil(character_boundaries_[i + 1] * scale_factor)) +
|
||||
static_cast<int16_t>(
|
||||
std::ceil(character_boundaries_[i + 1] * scale_factor)) +
|
||||
line_box.left(),
|
||||
line_box.top());
|
||||
b_it.add_after_then_move(C_BLOB::FakeBlob(box));
|
||||
@ -644,7 +675,8 @@ WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_
|
||||
|
||||
// Fills top_n_flags_ with bools that are true iff the corresponding output
|
||||
// is one of the top_n.
|
||||
void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) {
|
||||
void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs,
|
||||
int top_n) {
|
||||
top_n_flags_.clear();
|
||||
top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
|
||||
top_code_ = -1;
|
||||
@ -676,15 +708,17 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
|
||||
top_n_flags_[null_char_] = TN_TOP2;
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs,
|
||||
int num_outputs, int top_n) {
|
||||
void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList,
|
||||
const float *outputs, int num_outputs,
|
||||
int top_n) {
|
||||
top_n_flags_.clear();
|
||||
top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
|
||||
top_code_ = -1;
|
||||
second_code_ = -1;
|
||||
top_heap_.clear();
|
||||
for (int i = 0; i < num_outputs; ++i) {
|
||||
if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) && !exList->count(i)) {
|
||||
if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) &&
|
||||
!exList->count(i)) {
|
||||
TopPair entry(outputs[i], i);
|
||||
top_heap_.Push(&entry);
|
||||
if (top_heap_.size() > top_n) {
|
||||
@ -712,8 +746,9 @@ void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const flo
|
||||
// Adds the computation for the current time-step to the beam. Call at each
|
||||
// time-step in sequence from left to right. outputs is the activation vector
|
||||
// for the current timestep.
|
||||
void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
void RecodeBeamSearch::DecodeStep(const float *outputs, int t,
|
||||
double dict_ratio, double cert_offset,
|
||||
double worst_dict_cert,
|
||||
const UNICHARSET *charset, bool debug) {
|
||||
if (t == static_cast<int>(beam_.size())) {
|
||||
beam_.push_back(new RecodeBeam);
|
||||
@ -723,11 +758,12 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
|
||||
step->Clear();
|
||||
if (t == 0) {
|
||||
// The first step can only use singles and initials.
|
||||
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
|
||||
dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
|
||||
charset, dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
if (dict_ != nullptr) {
|
||||
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
|
||||
dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs,
|
||||
TN_TOP2, charset, dict_ratio, cert_offset,
|
||||
worst_dict_cert, step);
|
||||
}
|
||||
} else {
|
||||
RecodeBeam *prev = beam_[t - 1];
|
||||
@ -759,8 +795,9 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
|
||||
// best first, but it comes before a lot of the worst, so it is slightly
|
||||
// more efficient than going forwards.
|
||||
for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
|
||||
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
|
||||
dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs,
|
||||
top_n, charset, dict_ratio, cert_offset,
|
||||
worst_dict_cert, step);
|
||||
}
|
||||
}
|
||||
for (int index = 0; index < kNumBeams; ++index) {
|
||||
@ -775,15 +812,16 @@ void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio
|
||||
if (step->best_initial_dawgs_[c].code >= 0) {
|
||||
int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
|
||||
RecodeHeap *dawg_heap = &step->beams_[index];
|
||||
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
|
||||
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
|
||||
dawg_heap);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert,
|
||||
const UNICHARSET *charset, bool debug) {
|
||||
void RecodeBeamSearch::DecodeSecondaryStep(
|
||||
const float *outputs, int t, double dict_ratio, double cert_offset,
|
||||
double worst_dict_cert, const UNICHARSET *charset, bool debug) {
|
||||
if (t == static_cast<int>(secondary_beam_.size())) {
|
||||
secondary_beam_.push_back(new RecodeBeam);
|
||||
}
|
||||
@ -791,11 +829,12 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
|
||||
step->Clear();
|
||||
if (t == 0) {
|
||||
// The first step can only use singles and initials.
|
||||
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
|
||||
dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
|
||||
charset, dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
if (dict_ != nullptr) {
|
||||
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
|
||||
dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs,
|
||||
TN_TOP2, charset, dict_ratio, cert_offset,
|
||||
worst_dict_cert, step);
|
||||
}
|
||||
} else {
|
||||
RecodeBeam *prev = secondary_beam_[t - 1];
|
||||
@ -827,8 +866,9 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
|
||||
// best first, but it comes before a lot of the worst, so it is slightly
|
||||
// more efficient than going forwards.
|
||||
for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
|
||||
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
|
||||
dict_ratio, cert_offset, worst_dict_cert, step);
|
||||
ContinueContext(&prev->beams_[index].get(i).data(), index, outputs,
|
||||
top_n, charset, dict_ratio, cert_offset,
|
||||
worst_dict_cert, step);
|
||||
}
|
||||
}
|
||||
for (int index = 0; index < kNumBeams; ++index) {
|
||||
@ -843,7 +883,8 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
|
||||
if (step->best_initial_dawgs_[c].code >= 0) {
|
||||
int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
|
||||
RecodeHeap *dawg_heap = &step->beams_[index];
|
||||
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
|
||||
PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
|
||||
dawg_heap);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -853,10 +894,10 @@ void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double d
|
||||
// continuations of context prev, which is of the given length, using the
|
||||
// given network outputs to provide scores to the choices. Uses only those
|
||||
// choices for which top_n_flags[index] == top_n_flag.
|
||||
void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const float *outputs,
|
||||
TopNState top_n_flag, const UNICHARSET *charset,
|
||||
double dict_ratio, double cert_offset,
|
||||
double worst_dict_cert, RecodeBeam *step) {
|
||||
void RecodeBeamSearch::ContinueContext(
|
||||
const RecodeNode *prev, int index, const float *outputs,
|
||||
TopNState top_n_flag, const UNICHARSET *charset, double dict_ratio,
|
||||
double cert_offset, double worst_dict_cert, RecodeBeam *step) {
|
||||
RecodedCharID prefix;
|
||||
RecodedCharID full_code;
|
||||
const RecodeNode *previous = prev;
|
||||
@ -864,7 +905,8 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
|
||||
bool use_dawgs = IsDawgFromBeamsIndex(index);
|
||||
NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
|
||||
for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
|
||||
while (previous != nullptr && (previous->duplicate || previous->code == null_char_)) {
|
||||
while (previous != nullptr &&
|
||||
(previous->duplicate || previous->code == null_char_)) {
|
||||
previous = previous->prev;
|
||||
}
|
||||
if (previous != nullptr) {
|
||||
@ -875,26 +917,34 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
|
||||
if (prev != nullptr && !is_simple_text_) {
|
||||
if (top_n_flags_[prev->code] == top_n_flag) {
|
||||
if (prev_cont != NC_NO_DUP) {
|
||||
float cert = NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
|
||||
dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
|
||||
}
|
||||
if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && prev->code != null_char_) {
|
||||
float cert =
|
||||
NetworkIO::ProbToCertainty(outputs[prev->code] + outputs[null_char_]) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
|
||||
dict_ratio, use_dawgs, NC_NO_DUP, prev, step);
|
||||
NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
|
||||
cert, worst_dict_cert, dict_ratio, use_dawgs,
|
||||
NC_ANYTHING, prev, step);
|
||||
}
|
||||
if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 &&
|
||||
prev->code != null_char_) {
|
||||
float cert = NetworkIO::ProbToCertainty(outputs[prev->code] +
|
||||
outputs[null_char_]) +
|
||||
cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
|
||||
cert, worst_dict_cert, dict_ratio, use_dawgs,
|
||||
NC_NO_DUP, prev, step);
|
||||
}
|
||||
}
|
||||
if (prev_cont == NC_ONLY_DUP) {
|
||||
return;
|
||||
}
|
||||
if (prev->code != null_char_ && length > 0 && top_n_flags_[null_char_] == top_n_flag) {
|
||||
if (prev->code != null_char_ && length > 0 &&
|
||||
top_n_flags_[null_char_] == top_n_flag) {
|
||||
// Allow nulls within multi code sequences, as the nulls within are not
|
||||
// explicitly included in the code sequence.
|
||||
float cert = NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, cert, worst_dict_cert,
|
||||
dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
|
||||
float cert =
|
||||
NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID,
|
||||
cert, worst_dict_cert, dict_ratio, use_dawgs,
|
||||
NC_ANYTHING, prev, step);
|
||||
}
|
||||
}
|
||||
const std::vector<int> *final_codes = recoder_.GetFinalCodes(prefix);
|
||||
@ -920,18 +970,19 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
|
||||
!charset->get_enabled(unichar_id)) {
|
||||
continue; // disabled by whitelist/blacklist
|
||||
}
|
||||
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ANYTHING,
|
||||
prev, step);
|
||||
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
|
||||
use_dawgs, NC_ANYTHING, prev, step);
|
||||
if (top_n_flag == TN_TOP2 && code != null_char_) {
|
||||
float prob = outputs[code] + outputs[null_char_];
|
||||
if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
|
||||
if (prev != nullptr && prev_cont == NC_ANYTHING &&
|
||||
prev->code != null_char_ &&
|
||||
((prev->code == top_code_ && code == second_code_) ||
|
||||
(code == top_code_ && prev->code == second_code_))) {
|
||||
prob += outputs[prev->code];
|
||||
}
|
||||
cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
|
||||
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ONLY_DUP,
|
||||
prev, step);
|
||||
ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
|
||||
use_dawgs, NC_ONLY_DUP, prev, step);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -945,37 +996,44 @@ void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const
|
||||
continue;
|
||||
}
|
||||
float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
|
||||
dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
|
||||
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert,
|
||||
worst_dict_cert, dict_ratio, use_dawgs,
|
||||
NC_ANYTHING, prev, step);
|
||||
if (top_n_flag == TN_TOP2 && code != null_char_) {
|
||||
float prob = outputs[code] + outputs[null_char_];
|
||||
if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
|
||||
if (prev != nullptr && prev_cont == NC_ANYTHING &&
|
||||
prev->code != null_char_ &&
|
||||
((prev->code == top_code_ && code == second_code_) ||
|
||||
(code == top_code_ && prev->code == second_code_))) {
|
||||
prob += outputs[prev->code];
|
||||
}
|
||||
cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
|
||||
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
|
||||
dict_ratio, use_dawgs, NC_ONLY_DUP, prev, step);
|
||||
PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID,
|
||||
cert, worst_dict_cert, dict_ratio, use_dawgs,
|
||||
NC_ONLY_DUP, prev, step);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Continues for a new unichar, using dawg or non-dawg as per flag.
|
||||
void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, float worst_dict_cert,
|
||||
float dict_ratio, bool use_dawgs, NodeContinuation cont,
|
||||
const RecodeNode *prev, RecodeBeam *step) {
|
||||
void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert,
|
||||
float worst_dict_cert, float dict_ratio,
|
||||
bool use_dawgs, NodeContinuation cont,
|
||||
const RecodeNode *prev,
|
||||
RecodeBeam *step) {
|
||||
if (use_dawgs) {
|
||||
if (cert > worst_dict_cert) {
|
||||
ContinueDawg(code, unichar_id, cert, cont, prev, step);
|
||||
}
|
||||
} else {
|
||||
RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, false, false, false,
|
||||
cert * dict_ratio, prev, nullptr, nodawg_heap);
|
||||
if (dict_ != nullptr && ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
|
||||
!dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false,
|
||||
false, false, false, cert * dict_ratio, prev, nullptr,
|
||||
nodawg_heap);
|
||||
if (dict_ != nullptr &&
|
||||
((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
|
||||
!dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
|
||||
// Any top choice position that can start a new word, ie a space or
|
||||
// any non-space-delimited character, should also be considered
|
||||
// by the dawg search, so push initial dawg to the dawg heap.
|
||||
@ -995,8 +1053,8 @@ void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, flo
|
||||
} else {
|
||||
dawg_cert *= dict_ratio;
|
||||
}
|
||||
PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, dawg_cert, cont, prev,
|
||||
step);
|
||||
PushInitialDawgIfBetter(code, unichar_id, permuter, false, false,
|
||||
dawg_cert, cont, prev, step);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1004,13 +1062,14 @@ void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, flo
|
||||
// Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
|
||||
// appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
|
||||
// is a valid continuation of whatever is in prev.
|
||||
void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont,
|
||||
void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
|
||||
NodeContinuation cont,
|
||||
const RecodeNode *prev, RecodeBeam *step) {
|
||||
RecodeHeap *dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
|
||||
RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
|
||||
if (unichar_id == INVALID_UNICHAR_ID) {
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, false, false, cert,
|
||||
prev, nullptr, dawg_heap);
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false,
|
||||
false, false, cert, prev, nullptr, dawg_heap);
|
||||
return;
|
||||
}
|
||||
// Avoid dictionary probe if score a total loss.
|
||||
@ -1018,8 +1077,10 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
|
||||
if (prev != nullptr) {
|
||||
score += prev->score;
|
||||
}
|
||||
if (dawg_heap->size() >= kBeamWidths[0] && score <= dawg_heap->PeekTop().data().score &&
|
||||
nodawg_heap->size() >= kBeamWidths[0] && score <= nodawg_heap->PeekTop().data().score) {
|
||||
if (dawg_heap->size() >= kBeamWidths[0] &&
|
||||
score <= dawg_heap->PeekTop().data().score &&
|
||||
nodawg_heap->size() >= kBeamWidths[0] &&
|
||||
score <= nodawg_heap->PeekTop().data().score) {
|
||||
return;
|
||||
}
|
||||
const RecodeNode *uni_prev = prev;
|
||||
@ -1033,10 +1094,11 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
|
||||
if (uni_prev != nullptr && uni_prev->end_of_word) {
|
||||
// Space is good. Push initial state, to the dawg beam and a regular
|
||||
// space to the top choice beam.
|
||||
PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, false, cert, cont, prev,
|
||||
step);
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, false, false, false,
|
||||
false, cert, prev, nullptr, nodawg_heap);
|
||||
PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false,
|
||||
false, cert, cont, prev, step);
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
|
||||
false, false, false, false, cert, prev, nullptr,
|
||||
nodawg_heap);
|
||||
}
|
||||
return;
|
||||
} else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
|
||||
@ -1060,18 +1122,21 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
|
||||
} else {
|
||||
return; // Can't continue if not a dict word.
|
||||
}
|
||||
auto permuter = static_cast<PermuterType>(
|
||||
dict_->def_letter_is_okay(&dawg_args, dict_->getUnicharset(), unichar_id, false));
|
||||
auto permuter = static_cast<PermuterType>(dict_->def_letter_is_okay(
|
||||
&dawg_args, dict_->getUnicharset(), unichar_id, false));
|
||||
if (permuter != NO_PERM) {
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start,
|
||||
dawg_args.valid_end, false, cert, prev, dawg_args.updated_dawgs, dawg_heap);
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
|
||||
word_start, dawg_args.valid_end, false, cert, prev,
|
||||
dawg_args.updated_dawgs, dawg_heap);
|
||||
if (dawg_args.valid_end && !space_delimited_) {
|
||||
// We can start another word right away, so push initial state as well,
|
||||
// to the dawg beam, and the regular character to the top choice beam,
|
||||
// since non-dict words can start here too.
|
||||
PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, cert, cont, prev, step);
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start, true, false,
|
||||
cert, prev, nullptr, nodawg_heap);
|
||||
PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true,
|
||||
cert, cont, prev, step);
|
||||
PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
|
||||
word_start, true, false, cert, prev, nullptr,
|
||||
nodawg_heap);
|
||||
}
|
||||
} else {
|
||||
delete updated_dawgs;
|
||||
@ -1081,9 +1146,11 @@ void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeCo
|
||||
// Adds a RecodeNode composed of the tuple (code, unichar_id,
|
||||
// initial-dawg-state, prev, cert) to the given heap if/ there is room or if
|
||||
// better than the current worst element if already full.
|
||||
void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter,
|
||||
void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
|
||||
PermuterType permuter,
|
||||
bool start, bool end, float cert,
|
||||
NodeContinuation cont, const RecodeNode *prev,
|
||||
NodeContinuation cont,
|
||||
const RecodeNode *prev,
|
||||
RecodeBeam *step) {
|
||||
RecodeNode *best_initial_dawg = &step->best_initial_dawgs_[cont];
|
||||
float score = cert;
|
||||
@ -1093,8 +1160,9 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, Permute
|
||||
if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
|
||||
auto *initial_dawgs = new DawgPositionVector;
|
||||
dict_->default_dawgs(initial_dawgs, false);
|
||||
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, score, prev,
|
||||
initial_dawgs, ComputeCodeHash(code, false, prev));
|
||||
RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
|
||||
score, prev, initial_dawgs,
|
||||
ComputeCodeHash(code, false, prev));
|
||||
*best_initial_dawg = node;
|
||||
}
|
||||
}
|
||||
@ -1103,22 +1171,23 @@ void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, Permute
|
||||
// false, false, false, false, cert, prev, nullptr) to heap if there is room
|
||||
// or if better than the current worst element if already full.
|
||||
/* static */
|
||||
void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id,
|
||||
float cert, float worst_dict_cert, float dict_ratio,
|
||||
bool use_dawgs, NodeContinuation cont,
|
||||
const RecodeNode *prev, RecodeBeam *step) {
|
||||
void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
|
||||
int length, bool dup, int code, int unichar_id, float cert,
|
||||
float worst_dict_cert, float dict_ratio, bool use_dawgs,
|
||||
NodeContinuation cont, const RecodeNode *prev, RecodeBeam *step) {
|
||||
int index = BeamIndex(use_dawgs, cont, length);
|
||||
if (use_dawgs) {
|
||||
if (cert > worst_dict_cert) {
|
||||
PushHeapIfBetter(kBeamWidths[length], code, unichar_id, prev ? prev->permuter : NO_PERM,
|
||||
false, false, false, dup, cert, prev, nullptr, &step->beams_[index]);
|
||||
PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
|
||||
prev ? prev->permuter : NO_PERM, false, false, false,
|
||||
dup, cert, prev, nullptr, &step->beams_[index]);
|
||||
}
|
||||
} else {
|
||||
cert *= dict_ratio;
|
||||
if (cert >= kMinCertainty || code == null_char_) {
|
||||
PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
|
||||
prev ? prev->permuter : TOP_CHOICE_PERM, false, false, false, dup, cert,
|
||||
prev, nullptr, &step->beams_[index]);
|
||||
prev ? prev->permuter : TOP_CHOICE_PERM, false, false,
|
||||
false, dup, cert, prev, nullptr, &step->beams_[index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1127,17 +1196,19 @@ void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, i
|
||||
// dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
|
||||
// or if better than the current worst element if already full.
|
||||
void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
|
||||
PermuterType permuter, bool dawg_start, bool word_start,
|
||||
bool end, bool dup, float cert, const RecodeNode *prev,
|
||||
DawgPositionVector *d, RecodeHeap *heap) {
|
||||
PermuterType permuter, bool dawg_start,
|
||||
bool word_start, bool end, bool dup,
|
||||
float cert, const RecodeNode *prev,
|
||||
DawgPositionVector *d,
|
||||
RecodeHeap *heap) {
|
||||
float score = cert;
|
||||
if (prev != nullptr) {
|
||||
score += prev->score;
|
||||
}
|
||||
if (heap->size() < max_size || score > heap->PeekTop().data().score) {
|
||||
uint64_t hash = ComputeCodeHash(code, dup, prev);
|
||||
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, dup, cert, score, prev,
|
||||
d, hash);
|
||||
RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
|
||||
dup, cert, score, prev, d, hash);
|
||||
if (UpdateHeapIfMatched(&node, heap)) {
|
||||
return;
|
||||
}
|
||||
@ -1154,7 +1225,8 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
|
||||
|
||||
// Adds a RecodeNode to heap if there is room
|
||||
// or if better than the current worst element if already full.
|
||||
void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHeap *heap) {
|
||||
void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node,
|
||||
RecodeHeap *heap) {
|
||||
if (heap->size() < max_size || node->score > heap->PeekTop().data().score) {
|
||||
if (UpdateHeapIfMatched(node, heap)) {
|
||||
return;
|
||||
@ -1170,7 +1242,8 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHe
|
||||
|
||||
// Searches the heap for a matching entry, and updates the score with
|
||||
// reshuffle if needed. Returns true if there was a match.
|
||||
bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap) {
|
||||
bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node,
|
||||
RecodeHeap *heap) {
|
||||
// TODO(rays) consider hash map instead of linear search.
|
||||
// It might not be faster because the hash map would have to be updated
|
||||
// every time a heap reshuffle happens, and that would be a lot of overhead.
|
||||
@ -1178,7 +1251,8 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
|
||||
for (auto &i : nodes) {
|
||||
RecodeNode &node = i.data();
|
||||
if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
|
||||
node.permuter == new_node->permuter && node.start_of_dawg == new_node->start_of_dawg) {
|
||||
node.permuter == new_node->permuter &&
|
||||
node.start_of_dawg == new_node->start_of_dawg) {
|
||||
if (new_node->score > node.score) {
|
||||
// The new one is better. Update the entire node in the heap and
|
||||
// reshuffle.
|
||||
@ -1193,7 +1267,8 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
|
||||
}
|
||||
|
||||
// Computes and returns the code-hash for the given code and prev.
|
||||
uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const {
|
||||
uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
|
||||
const RecodeNode *prev) const {
|
||||
uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
|
||||
if (!dup && code != null_char_) {
|
||||
int num_classes = recoder_.code_range();
|
||||
@ -1209,8 +1284,9 @@ uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode
|
||||
// during Decode. On return the best_nodes vector essentially contains the set
|
||||
// of code, score pairs that make the optimal path with the constraint that
|
||||
// the recoder can decode the code sequence back to a sequence of unichar-ids.
|
||||
void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_nodes,
|
||||
std::vector<const RecodeNode *> *second_nodes) const {
|
||||
void RecodeBeamSearch::ExtractBestPaths(
|
||||
std::vector<const RecodeNode *> *best_nodes,
|
||||
std::vector<const RecodeNode *> *second_nodes) const {
|
||||
// Scan both beams to extract the best and second best paths.
|
||||
const RecodeNode *best_node = nullptr;
|
||||
const RecodeNode *second_best_node = nullptr;
|
||||
@ -1230,11 +1306,13 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_no
|
||||
// last valid unichar_id.
|
||||
const RecodeNode *dawg_node = node;
|
||||
while (dawg_node != nullptr &&
|
||||
(dawg_node->unichar_id == INVALID_UNICHAR_ID || dawg_node->duplicate)) {
|
||||
(dawg_node->unichar_id == INVALID_UNICHAR_ID ||
|
||||
dawg_node->duplicate)) {
|
||||
dawg_node = dawg_node->prev;
|
||||
}
|
||||
if (dawg_node == nullptr ||
|
||||
(!dawg_node->end_of_word && dawg_node->unichar_id != UNICHAR_SPACE)) {
|
||||
(!dawg_node->end_of_word &&
|
||||
dawg_node->unichar_id != UNICHAR_SPACE)) {
|
||||
// Dawg node is not valid.
|
||||
continue;
|
||||
}
|
||||
@ -1242,7 +1320,8 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_no
|
||||
if (best_node == nullptr || node->score > best_node->score) {
|
||||
second_best_node = best_node;
|
||||
best_node = node;
|
||||
} else if (second_best_node == nullptr || node->score > second_best_node->score) {
|
||||
} else if (second_best_node == nullptr ||
|
||||
node->score > second_best_node->score) {
|
||||
second_best_node = node;
|
||||
}
|
||||
}
|
||||
@ -1256,8 +1335,8 @@ void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_no
|
||||
|
||||
// Helper backtracks through the lattice from the given node, storing the
|
||||
// path and reversing it.
|
||||
void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
|
||||
std::vector<const RecodeNode *> *path) const {
|
||||
void RecodeBeamSearch::ExtractPath(
|
||||
const RecodeNode *node, std::vector<const RecodeNode *> *path) const {
|
||||
path->clear();
|
||||
while (node != nullptr) {
|
||||
path->push_back(node);
|
||||
@ -1266,7 +1345,8 @@ void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
|
||||
std::reverse(path->begin(), path->end());
|
||||
}
|
||||
|
||||
void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector<const RecodeNode *> *path,
|
||||
void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
|
||||
std::vector<const RecodeNode *> *path,
|
||||
int limiter) const {
|
||||
int pathcounter = 0;
|
||||
path->clear();
|
||||
@ -1279,8 +1359,9 @@ void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector<const Rec
|
||||
}
|
||||
|
||||
// Helper prints debug information on the given lattice path.
|
||||
void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset,
|
||||
const std::vector<const RecodeNode *> &path) const {
|
||||
void RecodeBeamSearch::DebugPath(
|
||||
const UNICHARSET *unicharset,
|
||||
const std::vector<const RecodeNode *> &path) const {
|
||||
for (unsigned c = 0; c < path.size(); ++c) {
|
||||
const RecodeNode &node = *path[c];
|
||||
tprintf("%u ", c);
|
||||
@ -1289,19 +1370,18 @@ void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset,
|
||||
}
|
||||
|
||||
// Helper prints debug information on the given unichar path.
|
||||
void RecodeBeamSearch::DebugUnicharPath(const UNICHARSET *unicharset,
|
||||
const std::vector<const RecodeNode *> &path,
|
||||
const std::vector<int> &unichar_ids,
|
||||
const std::vector<float> &certs,
|
||||
const std::vector<float> &ratings,
|
||||
const std::vector<int> &xcoords) const {
|
||||
void RecodeBeamSearch::DebugUnicharPath(
|
||||
const UNICHARSET *unicharset, const std::vector<const RecodeNode *> &path,
|
||||
const std::vector<int> &unichar_ids, const std::vector<float> &certs,
|
||||
const std::vector<float> &ratings, const std::vector<int> &xcoords) const {
|
||||
auto num_ids = unichar_ids.size();
|
||||
double total_rating = 0.0;
|
||||
for (unsigned c = 0; c < num_ids; ++c) {
|
||||
int coord = xcoords[c];
|
||||
tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
|
||||
unicharset->debug_str(unichar_ids[c]).c_str(), ratings[c], certs[c],
|
||||
path[coord]->start_of_word, path[coord]->end_of_word, path[coord]->permuter);
|
||||
path[coord]->start_of_word, path[coord]->end_of_word,
|
||||
path[coord]->permuter);
|
||||
total_rating += ratings[c];
|
||||
}
|
||||
tprintf("Path total rating = %g\n", total_rating);
|
||||
|
@ -63,11 +63,11 @@ const double kMinFittingLinespacings = 0.25;
|
||||
namespace tesseract {
|
||||
|
||||
BaselineRow::BaselineRow(double line_spacing, TO_ROW *to_row)
|
||||
: blobs_(to_row->blob_list())
|
||||
, baseline_pt1_(0.0f, 0.0f)
|
||||
, baseline_pt2_(0.0f, 0.0f)
|
||||
, baseline_error_(0.0)
|
||||
, good_baseline_(false) {
|
||||
: blobs_(to_row->blob_list()),
|
||||
baseline_pt1_(0.0f, 0.0f),
|
||||
baseline_pt2_(0.0f, 0.0f),
|
||||
baseline_error_(0.0),
|
||||
good_baseline_(false) {
|
||||
ComputeBoundingBox();
|
||||
// Compute a scale factor for rounding to ints.
|
||||
disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
|
||||
@ -87,11 +87,11 @@ void BaselineRow::SetupOldLineParameters(TO_ROW *row) const {
|
||||
|
||||
// Outputs diagnostic information.
|
||||
void BaselineRow::Print() const {
|
||||
tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", baseline_pt1_.x(),
|
||||
baseline_pt1_.y(), baseline_pt2_.x(), baseline_pt2_.y(), BaselineAngle(),
|
||||
StraightYAtX(0.0));
|
||||
tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_, baseline_error_,
|
||||
good_baseline_);
|
||||
tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
|
||||
baseline_pt1_.x(), baseline_pt1_.y(), baseline_pt2_.x(),
|
||||
baseline_pt2_.y(), BaselineAngle(), StraightYAtX(0.0));
|
||||
tprintf("Quant factor=%g, error=%g, good=%d, box:", disp_quant_factor_,
|
||||
baseline_error_, good_baseline_);
|
||||
bounding_box_.print();
|
||||
}
|
||||
|
||||
@ -133,8 +133,9 @@ double BaselineRow::StraightYAtX(double x) const {
|
||||
if (denominator == 0.0) {
|
||||
return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
|
||||
}
|
||||
return baseline_pt1_.y() +
|
||||
(x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / denominator;
|
||||
return baseline_pt1_.y() + (x - baseline_pt1_.x()) *
|
||||
(baseline_pt2_.y() - baseline_pt1_.y()) /
|
||||
denominator;
|
||||
}
|
||||
|
||||
// Fits a straight baseline to the points. Returns true if it had enough
|
||||
@ -170,7 +171,8 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) {
|
||||
baseline_error_ = fitter_.Fit(&pt1, &pt2);
|
||||
baseline_pt1_ = pt1;
|
||||
baseline_pt2_ = pt2;
|
||||
if (baseline_error_ > max_baseline_error_ && fitter_.SufficientPointsForIndependentFit()) {
|
||||
if (baseline_error_ > max_baseline_error_ &&
|
||||
fitter_.SufficientPointsForIndependentFit()) {
|
||||
// The fit was bad but there were plenty of points, so try skipping
|
||||
// the first and last few, and use the new line if it dramatically improves
|
||||
// the error of fit.
|
||||
@ -184,7 +186,10 @@ bool BaselineRow::FitBaseline(bool use_box_bottoms) {
|
||||
int debug = 0;
|
||||
#ifdef kDebugYCoord
|
||||
Print();
|
||||
debug = bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord ? 3 : 2;
|
||||
debug = bounding_box_.bottom() < kDebugYCoord &&
|
||||
bounding_box_.top() > kDebugYCoord
|
||||
? 3
|
||||
: 2;
|
||||
#endif
|
||||
// Now we obtained a direction from that fit, see if we can improve the
|
||||
// fit using the same direction and some other start point.
|
||||
@ -218,7 +223,8 @@ void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) {
|
||||
return;
|
||||
}
|
||||
#ifdef kDebugYCoord
|
||||
if (bounding_box_.bottom() < kDebugYCoord && bounding_box_.top() > kDebugYCoord && debug < 3)
|
||||
if (bounding_box_.bottom() < kDebugYCoord &&
|
||||
bounding_box_.top() > kDebugYCoord && debug < 3)
|
||||
debug = 3;
|
||||
#endif
|
||||
FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
|
||||
@ -226,7 +232,8 @@ void BaselineRow::AdjustBaselineToParallel(int debug, const FCOORD &direction) {
|
||||
|
||||
// Modifies the baseline to snap to the textline grid if the existing
|
||||
// result is not good enough.
|
||||
double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing,
|
||||
double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction,
|
||||
double line_spacing,
|
||||
double line_offset) {
|
||||
if (blobs_->empty()) {
|
||||
if (debug > 1) {
|
||||
@ -240,7 +247,8 @@ double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, dou
|
||||
int best_index = -1;
|
||||
for (unsigned i = 0; i < displacement_modes_.size(); ++i) {
|
||||
double blob_y = displacement_modes_[i];
|
||||
double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset);
|
||||
double error =
|
||||
BaselineBlock::SpacingModelError(blob_y, line_spacing, line_offset);
|
||||
if (debug > 1) {
|
||||
tprintf("Mode at %g has error %g from model \n", blob_y, error);
|
||||
}
|
||||
@ -263,9 +271,11 @@ double BaselineRow::AdjustBaselineToGrid(int debug, const FCOORD &direction, dou
|
||||
displacement_modes_[best_index]);
|
||||
bounding_box_.print();
|
||||
}
|
||||
FitConstrainedIfBetter(debug, direction, model_margin, displacement_modes_[best_index]);
|
||||
FitConstrainedIfBetter(debug, direction, model_margin,
|
||||
displacement_modes_[best_index]);
|
||||
} else if (debug > 1) {
|
||||
tprintf("Linespacing model only moves current line by %g for row at:", shift);
|
||||
tprintf("Linespacing model only moves current line by %g for row at:",
|
||||
shift);
|
||||
bounding_box_.print();
|
||||
}
|
||||
} else if (debug > 1) {
|
||||
@ -296,7 +306,8 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
|
||||
if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord)
|
||||
debug = true;
|
||||
#endif
|
||||
FCOORD blob_pos((box.left() + box.right()) / 2.0f, blob->baseline_position());
|
||||
FCOORD blob_pos((box.left() + box.right()) / 2.0f,
|
||||
blob->baseline_position());
|
||||
double offset = direction * blob_pos;
|
||||
perp_blob_dists.push_back(offset);
|
||||
#ifdef kDebugYCoord
|
||||
@ -338,24 +349,28 @@ void BaselineRow::SetupBlobDisplacements(const FCOORD &direction) {
|
||||
// Otherwise the new fit will only replace the old if it is really better,
|
||||
// or the old fit is marked bad and the new fit has sufficient points, as
|
||||
// well as being within the max_baseline_error_.
|
||||
void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, double cheat_allowance,
|
||||
void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction,
|
||||
double cheat_allowance,
|
||||
double target_offset) {
|
||||
double halfrange = fit_halfrange_ * direction.length();
|
||||
double min_dist = target_offset - halfrange;
|
||||
double max_dist = target_offset + halfrange;
|
||||
ICOORD line_pt;
|
||||
double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, debug > 2, &line_pt);
|
||||
double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
|
||||
debug > 2, &line_pt);
|
||||
// Allow cheat_allowance off the new error
|
||||
new_error -= cheat_allowance;
|
||||
double old_angle = BaselineAngle();
|
||||
double new_angle = direction.angle();
|
||||
if (debug > 1) {
|
||||
tprintf("Constrained error = %g, original = %g", new_error, baseline_error_);
|
||||
tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle, new_angle,
|
||||
new_angle - old_angle, kMaxSkewDeviation);
|
||||
tprintf("Constrained error = %g, original = %g", new_error,
|
||||
baseline_error_);
|
||||
tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", old_angle,
|
||||
new_angle, new_angle - old_angle, kMaxSkewDeviation);
|
||||
}
|
||||
bool new_good_baseline = new_error <= max_baseline_error_ &&
|
||||
(cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
|
||||
bool new_good_baseline =
|
||||
new_error <= max_baseline_error_ &&
|
||||
(cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
|
||||
// The new will replace the old if any are true:
|
||||
// 1. the new error is better
|
||||
// 2. the old is NOT good, but the new is
|
||||
@ -368,7 +383,8 @@ void BaselineRow::FitConstrainedIfBetter(int debug, const FCOORD &direction, dou
|
||||
baseline_pt2_ = baseline_pt1_ + direction;
|
||||
good_baseline_ = new_good_baseline;
|
||||
if (debug > 1) {
|
||||
tprintf("Replacing with constrained baseline, good = %d\n", good_baseline_);
|
||||
tprintf("Replacing with constrained baseline, good = %d\n",
|
||||
good_baseline_);
|
||||
}
|
||||
} else if (debug > 1) {
|
||||
tprintf("Keeping old baseline\n");
|
||||
@ -400,14 +416,14 @@ void BaselineRow::ComputeBoundingBox() {
|
||||
}
|
||||
|
||||
BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
|
||||
: block_(block)
|
||||
, debug_level_(debug_level)
|
||||
, non_text_block_(non_text)
|
||||
, good_skew_angle_(false)
|
||||
, skew_angle_(0.0)
|
||||
, line_spacing_(block->line_spacing)
|
||||
, line_offset_(0.0)
|
||||
, model_error_(0.0) {
|
||||
: block_(block),
|
||||
debug_level_(debug_level),
|
||||
non_text_block_(non_text),
|
||||
good_skew_angle_(false),
|
||||
skew_angle_(0.0),
|
||||
line_spacing_(block->line_spacing),
|
||||
line_offset_(0.0),
|
||||
model_error_(0.0) {
|
||||
TO_ROW_IT row_it(block_->get_rows());
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
// Sort the blobs on the rows.
|
||||
@ -418,7 +434,8 @@ BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
|
||||
|
||||
// Computes and returns the absolute error of the given perp_disp from the
|
||||
// given linespacing model.
|
||||
double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, double line_offset) {
|
||||
double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
|
||||
double line_offset) {
|
||||
// Round to the nearest multiple of line_spacing + line offset.
|
||||
int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
|
||||
double model_y = line_spacing * multiple + line_offset;
|
||||
@ -452,7 +469,8 @@ bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
|
||||
good_skew_angle_ = false;
|
||||
}
|
||||
if (debug_level_ > 0) {
|
||||
tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, good_skew_angle_);
|
||||
tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_,
|
||||
good_skew_angle_);
|
||||
}
|
||||
return good_skew_angle_;
|
||||
}
|
||||
@ -483,9 +501,11 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
|
||||
// baseline.
|
||||
// Start by finding the row that is best fitted to the model.
|
||||
unsigned best_row = 0;
|
||||
double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), line_spacing_, line_offset_);
|
||||
double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
|
||||
line_spacing_, line_offset_);
|
||||
for (unsigned r = 1; r < rows_.size(); ++r) {
|
||||
double error = SpacingModelError(rows_[r]->PerpDisp(direction), line_spacing_, line_offset_);
|
||||
double error = SpacingModelError(rows_[r]->PerpDisp(direction),
|
||||
line_spacing_, line_offset_);
|
||||
if (error < best_error) {
|
||||
best_error = error;
|
||||
best_row = r;
|
||||
@ -494,11 +514,13 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
|
||||
// Starting at the best fitting row, work outwards, syncing the offset.
|
||||
double offset = line_offset_;
|
||||
for (auto r = best_row + 1; r < rows_.size(); ++r) {
|
||||
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset);
|
||||
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
|
||||
line_spacing_, offset);
|
||||
}
|
||||
offset = line_offset_;
|
||||
for (int r = best_row - 1; r >= 0; --r) {
|
||||
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset);
|
||||
offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
|
||||
line_spacing_, offset);
|
||||
}
|
||||
}
|
||||
|
||||
@ -506,7 +528,8 @@ void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
|
||||
void BaselineBlock::SetupBlockParameters() const {
|
||||
if (line_spacing_ > 0.0) {
|
||||
// Where was block_line_spacing set before?
|
||||
float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
|
||||
float min_spacing =
|
||||
std::min(block_->line_spacing, static_cast<float>(line_spacing_));
|
||||
if (min_spacing < block_->line_size) {
|
||||
block_->line_size = min_spacing;
|
||||
}
|
||||
@ -549,8 +572,8 @@ void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
|
||||
// As a side-effect, computes the xheights of the rows and the block.
|
||||
// Although x-height estimation is conceptually separate, it is part of
|
||||
// detecting perspective distortion and therefore baseline fitting.
|
||||
void BaselineBlock::FitBaselineSplines(bool enable_splines, bool show_final_rows,
|
||||
Textord *textord) {
|
||||
void BaselineBlock::FitBaselineSplines(bool enable_splines,
|
||||
bool show_final_rows, Textord *textord) {
|
||||
double gradient = tan(skew_angle_);
|
||||
FCOORD rotation(1.0f, 0.0f);
|
||||
|
||||
@ -565,8 +588,8 @@ void BaselineBlock::FitBaselineSplines(bool enable_splines, bool show_final_rows
|
||||
int32_t xstarts[2] = {block_box.left(), block_box.right()};
|
||||
double coeffs[3] = {0.0, row->line_m(), row->line_c()};
|
||||
row->baseline = QSPLINE(1, xstarts, coeffs);
|
||||
textord->compute_row_xheight(row, block_->block->classify_rotation(), row->line_m(),
|
||||
block_->line_size);
|
||||
textord->compute_row_xheight(row, block_->block->classify_rotation(),
|
||||
row->line_m(), block_->line_size);
|
||||
}
|
||||
}
|
||||
textord->compute_block_xheight(block_, gradient);
|
||||
@ -599,7 +622,8 @@ void BaselineBlock::DrawFinalRows(const ICOORD &page_tr) {
|
||||
}
|
||||
plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
|
||||
// Show discarded blobs.
|
||||
plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, ScrollView::CORAL);
|
||||
plot_blob_list(win, &block_->underlines, ScrollView::YELLOW,
|
||||
ScrollView::CORAL);
|
||||
if (block_->blobs.length() > 0) {
|
||||
tprintf("%d blobs discarded as noise\n", block_->blobs.length());
|
||||
}
|
||||
@ -647,8 +671,9 @@ bool BaselineBlock::ComputeLineSpacing() {
|
||||
}
|
||||
}
|
||||
if (debug_level_ > 0) {
|
||||
tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n", line_spacing_,
|
||||
row_positions.size(), fitting_gaps, non_trivial_gaps);
|
||||
tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n",
|
||||
line_spacing_, row_positions.size(), fitting_gaps,
|
||||
non_trivial_gaps);
|
||||
}
|
||||
return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
|
||||
}
|
||||
@ -686,7 +711,8 @@ void BaselineBlock::EstimateLineSpacing() {
|
||||
// Find the first row after row that overlaps it significantly.
|
||||
const TBOX &row_box = row->bounding_box();
|
||||
unsigned r2;
|
||||
for (r2 = r + 1; r2 < rows_.size() && !row_box.major_x_overlap(rows_[r2]->bounding_box());
|
||||
for (r2 = r + 1; r2 < rows_.size() &&
|
||||
!row_box.major_x_overlap(rows_[r2]->bounding_box());
|
||||
++r2) {
|
||||
;
|
||||
}
|
||||
@ -703,7 +729,8 @@ void BaselineBlock::EstimateLineSpacing() {
|
||||
// If we have at least one value, use it, otherwise leave the previous
|
||||
// value unchanged.
|
||||
if (!spacings.empty()) {
|
||||
std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2, spacings.end());
|
||||
std::nth_element(spacings.begin(), spacings.begin() + spacings.size() / 2,
|
||||
spacings.end());
|
||||
line_spacing_ = spacings[spacings.size() / 2];
|
||||
if (debug_level_ > 1) {
|
||||
tprintf("Estimate of linespacing = %g\n", line_spacing_);
|
||||
@ -718,14 +745,16 @@ void BaselineBlock::EstimateLineSpacing() {
|
||||
void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
|
||||
double spacings[3], offsets[3], errors[3];
|
||||
int index_range;
|
||||
errors[0] =
|
||||
FitLineSpacingModel(positions, line_spacing_, &spacings[0], &offsets[0], &index_range);
|
||||
errors[0] = FitLineSpacingModel(positions, line_spacing_, &spacings[0],
|
||||
&offsets[0], &index_range);
|
||||
if (index_range > 1) {
|
||||
double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
|
||||
// Try the hypotheses that there might be index_range +/- 1 line spaces.
|
||||
errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1], &offsets[1], nullptr);
|
||||
errors[1] = FitLineSpacingModel(positions, spacing_plus, &spacings[1],
|
||||
&offsets[1], nullptr);
|
||||
double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
|
||||
errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2], &offsets[2], nullptr);
|
||||
errors[2] = FitLineSpacingModel(positions, spacing_minus, &spacings[2],
|
||||
&offsets[2], nullptr);
|
||||
for (int i = 1; i <= 2; ++i) {
|
||||
if (errors[i] < errors[0]) {
|
||||
spacings[0] = spacings[i];
|
||||
@ -739,8 +768,8 @@ void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
|
||||
line_offset_ = offsets[0];
|
||||
model_error_ = errors[0];
|
||||
if (debug_level_ > 0) {
|
||||
tprintf("Final linespacing model = %g + offset %g, error %g\n", line_spacing_, line_offset_,
|
||||
model_error_);
|
||||
tprintf("Final linespacing model = %g + offset %g, error %g\n",
|
||||
line_spacing_, line_offset_, model_error_);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -750,8 +779,9 @@ void BaselineBlock::RefineLineSpacing(const std::vector<double> &positions) {
|
||||
// and the corresponding intercept in c_out, and the number of spacings seen
|
||||
// in index_delta. Returns the error of fit to the line spacing model.
|
||||
// Uses a simple linear regression, but optimized the offset using the median.
|
||||
double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions, double m_in,
|
||||
double *m_out, double *c_out, int *index_delta) {
|
||||
double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
|
||||
double m_in, double *m_out,
|
||||
double *c_out, int *index_delta) {
|
||||
if (m_in == 0.0f || positions.size() < 2) {
|
||||
*m_out = m_in;
|
||||
*c_out = 0.0;
|
||||
@ -762,6 +792,7 @@ double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
|
||||
}
|
||||
std::vector<double> offsets;
|
||||
// Get the offset (remainder) linespacing for each line and choose the median.
|
||||
offsets.reserve(positions.size());
|
||||
for (double position : positions) {
|
||||
offsets.push_back(fmod(position, m_in));
|
||||
}
|
||||
@ -795,7 +826,8 @@ double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
|
||||
*c_out = 0.0;
|
||||
}
|
||||
if (debug_level_ > 1) {
|
||||
tprintf("Median offset = %g, compared to mean of %g.\n", *c_out, llsq.c(*m_out));
|
||||
tprintf("Median offset = %g, compared to mean of %g.\n", *c_out,
|
||||
llsq.c(*m_out));
|
||||
}
|
||||
// Index_delta is the number of hypothesized line gaps present.
|
||||
if (index_delta != nullptr) {
|
||||
@ -805,13 +837,14 @@ double BaselineBlock::FitLineSpacingModel(const std::vector<double> &positions,
|
||||
// a full line-spacing in disagreement with the median.
|
||||
double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
|
||||
if (debug_level_ > 1) {
|
||||
tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in, median_offset,
|
||||
*m_out, *c_out, rms_error);
|
||||
tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", m_in,
|
||||
median_offset, *m_out, *c_out, rms_error);
|
||||
}
|
||||
return rms_error;
|
||||
}
|
||||
|
||||
BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
|
||||
BaselineDetect::BaselineDetect(int debug_level, const FCOORD &page_skew,
|
||||
TO_BLOCK_LIST *blocks)
|
||||
: page_skew_(page_skew), debug_level_(debug_level) {
|
||||
TO_BLOCK_IT it(blocks);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
@ -863,8 +896,10 @@ void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
|
||||
// x-heights and displaying debug information.
|
||||
// NOTE that ComputeStraightBaselines must have been called first as this
|
||||
// sets up data in the TO_ROWs upon which this function depends.
|
||||
void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines,
|
||||
bool remove_noise, bool show_final_rows,
|
||||
void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD &page_tr,
|
||||
bool enable_splines,
|
||||
bool remove_noise,
|
||||
bool show_final_rows,
|
||||
Textord *textord) {
|
||||
for (auto bl_block : blocks_) {
|
||||
if (enable_splines) {
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "tovars.h"
|
||||
|
||||
#include <algorithm> // for std::sort
|
||||
#include <cmath>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
@ -437,7 +438,7 @@ private:
|
||||
}
|
||||
|
||||
const float real_pitch = box_pitch(box1, box2);
|
||||
if (fabs(real_pitch - pitch) < pitch * kFPTolerance) {
|
||||
if (std::fabs(real_pitch - pitch) < pitch * kFPTolerance) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -645,7 +646,7 @@ void FPRow::EstimatePitch(bool pass1) {
|
||||
// So we collect only pitch values between two good
|
||||
// characters. and within tolerance in pass2.
|
||||
if (pass1 ||
|
||||
(prev_was_good && fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
|
||||
(prev_was_good && std::fabs(estimated_pitch_ - pitch) < kFPTolerance * estimated_pitch_)) {
|
||||
good_pitches_.Add(pitch);
|
||||
if (!is_box_modified(i - 1) && !is_box_modified(i)) {
|
||||
good_gaps_.Add(gap);
|
||||
|
@ -25,9 +25,9 @@
|
||||
#include "colpartitiongrid.h"
|
||||
#include "colpartitionset.h"
|
||||
#include "detlinefit.h"
|
||||
#include "helpers.h" // for UpdateRange
|
||||
#include "dppoint.h"
|
||||
#include "host.h" // for NearlyEqual
|
||||
#include "helpers.h" // for UpdateRange
|
||||
#include "host.h" // for NearlyEqual
|
||||
#include "imagefind.h"
|
||||
#include "workingpartset.h"
|
||||
|
||||
@ -89,14 +89,14 @@ const int kMaxColorDistance = 900;
|
||||
// blob_type is the blob_region_type_ of the blobs in this partition.
|
||||
// Vertical is the direction of logical vertical on the possibly skewed image.
|
||||
ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD &vertical)
|
||||
: left_margin_(-INT32_MAX)
|
||||
, right_margin_(INT32_MAX)
|
||||
, median_bottom_(INT32_MAX)
|
||||
, median_top_(-INT32_MAX)
|
||||
, median_left_(INT32_MAX)
|
||||
, median_right_(-INT32_MAX)
|
||||
, blob_type_(blob_type)
|
||||
, vertical_(vertical) {
|
||||
: left_margin_(-INT32_MAX),
|
||||
right_margin_(INT32_MAX),
|
||||
median_bottom_(INT32_MAX),
|
||||
median_top_(-INT32_MAX),
|
||||
median_left_(INT32_MAX),
|
||||
median_right_(-INT32_MAX),
|
||||
blob_type_(blob_type),
|
||||
vertical_(vertical) {
|
||||
memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
|
||||
}
|
||||
|
||||
@ -105,8 +105,10 @@ ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD &vertical)
|
||||
// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
|
||||
// the ColPartition owns the BLOBNBOX!!!
|
||||
// Call DeleteBoxes before deleting the ColPartition.
|
||||
ColPartition *ColPartition::FakePartition(const TBOX &box, PolyBlockType block_type,
|
||||
BlobRegionType blob_type, BlobTextFlowType flow) {
|
||||
ColPartition *ColPartition::FakePartition(const TBOX &box,
|
||||
PolyBlockType block_type,
|
||||
BlobRegionType blob_type,
|
||||
BlobTextFlowType flow) {
|
||||
auto *part = new ColPartition(blob_type, ICOORD(0, 1));
|
||||
part->set_type(block_type);
|
||||
part->set_flow(flow);
|
||||
@ -124,7 +126,8 @@ ColPartition *ColPartition::FakePartition(const TBOX &box, PolyBlockType block_t
|
||||
// than the surrounding text that may be a dropcap, two or more vertically
|
||||
// touching characters, or some graphic element.
|
||||
// If the given list is not nullptr, the partition is also added to the list.
|
||||
ColPartition *ColPartition::MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list) {
|
||||
ColPartition *ColPartition::MakeBigPartition(BLOBNBOX *box,
|
||||
ColPartition_LIST *big_part_list) {
|
||||
box->set_owner(nullptr);
|
||||
auto *single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
|
||||
single->set_flow(BTFT_NONE);
|
||||
@ -155,8 +158,9 @@ ColPartition::~ColPartition() {
|
||||
|
||||
// Constructs a fake ColPartition with no BLOBNBOXes to represent a
|
||||
// horizontal or vertical line, given a type and a bounding box.
|
||||
ColPartition *ColPartition::MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical,
|
||||
int left, int bottom, int right, int top) {
|
||||
ColPartition *ColPartition::MakeLinePartition(BlobRegionType blob_type,
|
||||
const ICOORD &vertical, int left,
|
||||
int bottom, int right, int top) {
|
||||
auto *part = new ColPartition(blob_type, vertical);
|
||||
part->bounding_box_ = TBOX(left, bottom, right, top);
|
||||
part->median_bottom_ = bottom;
|
||||
@ -202,8 +206,9 @@ void ColPartition::AddBox(BLOBNBOX *bbox) {
|
||||
right_key_ = BoxRightKey();
|
||||
}
|
||||
if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) {
|
||||
tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", box.left(),
|
||||
box.bottom(), box.right(), box.top(), bounding_box_.left(), bounding_box_.right());
|
||||
tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
|
||||
box.left(), box.bottom(), box.right(), box.top(),
|
||||
bounding_box_.left(), bounding_box_.right());
|
||||
}
|
||||
}
|
||||
|
||||
@ -227,11 +232,13 @@ BLOBNBOX *ColPartition::BiggestBox() {
|
||||
for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
|
||||
BLOBNBOX *bbox = bb_it.data();
|
||||
if (IsVerticalType()) {
|
||||
if (biggest == nullptr || bbox->bounding_box().width() > biggest->bounding_box().width()) {
|
||||
if (biggest == nullptr ||
|
||||
bbox->bounding_box().width() > biggest->bounding_box().width()) {
|
||||
biggest = bbox;
|
||||
}
|
||||
} else {
|
||||
if (biggest == nullptr || bbox->bounding_box().height() > biggest->bounding_box().height()) {
|
||||
if (biggest == nullptr ||
|
||||
bbox->bounding_box().height() > biggest->bounding_box().height()) {
|
||||
biggest = bbox;
|
||||
}
|
||||
}
|
||||
@ -362,7 +369,8 @@ bool ColPartition::IsLegal() {
|
||||
}
|
||||
return false; // Bounding box invalid.
|
||||
}
|
||||
if (left_margin_ > bounding_box_.left() || right_margin_ < bounding_box_.right()) {
|
||||
if (left_margin_ > bounding_box_.left() ||
|
||||
right_margin_ < bounding_box_.right()) {
|
||||
if (textord_debug_bugs) {
|
||||
tprintf("Margins invalid\n");
|
||||
Print();
|
||||
@ -371,8 +379,8 @@ bool ColPartition::IsLegal() {
|
||||
}
|
||||
if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
|
||||
if (textord_debug_bugs) {
|
||||
tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(), right_key_,
|
||||
BoxRightKey());
|
||||
tprintf("Key inside box: %d v %d or %d v %d\n", left_key_, BoxLeftKey(),
|
||||
right_key_, BoxRightKey());
|
||||
Print();
|
||||
}
|
||||
return false; // Keys inside the box.
|
||||
@ -383,10 +391,12 @@ bool ColPartition::IsLegal() {
|
||||
// Returns true if the left and right edges are approximately equal.
|
||||
bool ColPartition::MatchingColumns(const ColPartition &other) const {
|
||||
int y = (MidY() + other.MidY()) / 2;
|
||||
if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, LeftAtY(y) / kColumnWidthFactor, 1)) {
|
||||
if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
|
||||
LeftAtY(y) / kColumnWidthFactor, 1)) {
|
||||
return false;
|
||||
}
|
||||
if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, RightAtY(y) / kColumnWidthFactor, 1)) {
|
||||
if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
|
||||
RightAtY(y) / kColumnWidthFactor, 1)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -400,10 +410,14 @@ bool ColPartition::MatchingTextColor(const ColPartition &other) const {
|
||||
}
|
||||
|
||||
// Colors must match for other to count.
|
||||
double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_);
|
||||
double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_);
|
||||
double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_);
|
||||
double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_);
|
||||
double d_this1_o =
|
||||
ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color1_);
|
||||
double d_this2_o =
|
||||
ImageFind::ColorDistanceFromLine(other.color1_, other.color2_, color2_);
|
||||
double d_o1_this =
|
||||
ImageFind::ColorDistanceFromLine(color1_, color2_, other.color1_);
|
||||
double d_o2_this =
|
||||
ImageFind::ColorDistanceFromLine(color1_, color2_, other.color2_);
|
||||
// All 4 distances must be small enough.
|
||||
return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
|
||||
d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
|
||||
@ -441,7 +455,8 @@ bool ColPartition::ConfirmNoTabViolation(const ColPartition &other) const {
|
||||
}
|
||||
|
||||
// Returns true if other has a similar stroke width to this.
|
||||
bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance,
|
||||
bool ColPartition::MatchingStrokeWidth(const ColPartition &other,
|
||||
double fractional_tolerance,
|
||||
double constant_tolerance) const {
|
||||
int match_count = 0;
|
||||
int nonmatch_count = 0;
|
||||
@ -450,8 +465,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractio
|
||||
box_it.mark_cycle_pt();
|
||||
other_it.mark_cycle_pt();
|
||||
while (!box_it.cycled_list() && !other_it.cycled_list()) {
|
||||
if (box_it.data()->MatchingStrokeWidth(*other_it.data(), fractional_tolerance,
|
||||
constant_tolerance)) {
|
||||
if (box_it.data()->MatchingStrokeWidth(
|
||||
*other_it.data(), fractional_tolerance, constant_tolerance)) {
|
||||
++match_count;
|
||||
} else {
|
||||
++nonmatch_count;
|
||||
@ -468,7 +483,8 @@ bool ColPartition::MatchingStrokeWidth(const ColPartition &other, double fractio
|
||||
// (1) this is a ColPartition containing only diacritics, and
|
||||
// (2) the base characters indicated on the diacritics all believably lie
|
||||
// within the text line of the candidate ColPartition.
|
||||
bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, bool debug) const {
|
||||
bool ColPartition::OKDiacriticMerge(const ColPartition &candidate,
|
||||
bool debug) const {
|
||||
BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST *>(&boxes_));
|
||||
int min_top = INT32_MAX;
|
||||
int max_bottom = -INT32_MAX;
|
||||
@ -490,13 +506,14 @@ bool ColPartition::OKDiacriticMerge(const ColPartition &candidate, bool debug) c
|
||||
}
|
||||
// If the intersection of all vertical ranges of all base characters
|
||||
// overlaps the median range of this, then it is OK.
|
||||
bool result = min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
|
||||
bool result =
|
||||
min_top > candidate.median_bottom_ && max_bottom < candidate.median_top_;
|
||||
if (debug) {
|
||||
if (result) {
|
||||
tprintf("OKDiacritic!\n");
|
||||
} else {
|
||||
tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top, median_bottom_,
|
||||
median_top_);
|
||||
tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", max_bottom, min_top,
|
||||
median_bottom_, median_top_);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
@ -591,7 +608,8 @@ int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) {
|
||||
return count;
|
||||
}
|
||||
|
||||
void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density) {
|
||||
void ColPartition::SetSpecialBlobsDensity(const BlobSpecialTextType type,
|
||||
const float density) {
|
||||
ASSERT_HOST(type < BSTT_COUNT);
|
||||
special_blobs_densities_[type] = density;
|
||||
}
|
||||
@ -619,10 +637,12 @@ void ColPartition::ComputeSpecialBlobsDensity() {
|
||||
// Partnerships are added symmetrically to partner and this.
|
||||
void ColPartition::AddPartner(bool upper, ColPartition *partner) {
|
||||
if (upper) {
|
||||
partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, this);
|
||||
partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
|
||||
this);
|
||||
upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
|
||||
} else {
|
||||
partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, this);
|
||||
partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true,
|
||||
this);
|
||||
lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
|
||||
}
|
||||
}
|
||||
@ -651,14 +671,16 @@ ColPartition *ColPartition::SingletonPartner(bool upper) {
|
||||
}
|
||||
|
||||
// Merge with the other partition and delete it.
|
||||
void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
|
||||
void ColPartition::Absorb(ColPartition *other, const WidthCallback &cb) {
|
||||
// The result has to either own all of the blobs or none of them.
|
||||
// Verify the flag is consistent.
|
||||
ASSERT_HOST(owns_blobs() == other->owns_blobs());
|
||||
// TODO(nbeato): check owns_blobs better. Right now owns_blobs
|
||||
// should always be true when this is called. So there is no issues.
|
||||
if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom()) ||
|
||||
TabFind::WithinTestRegion(2, other->bounding_box_.left(), other->bounding_box_.bottom())) {
|
||||
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom()) ||
|
||||
TabFind::WithinTestRegion(2, other->bounding_box_.left(),
|
||||
other->bounding_box_.bottom())) {
|
||||
tprintf("Merging:");
|
||||
Print();
|
||||
other->Print();
|
||||
@ -669,8 +691,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
|
||||
for (int type = 0; type < BSTT_COUNT; ++type) {
|
||||
unsigned w1 = boxes_.length();
|
||||
unsigned w2 = other->boxes_.length();
|
||||
float new_val =
|
||||
special_blobs_densities_[type] * w1 + other->special_blobs_densities_[type] * w2;
|
||||
float new_val = special_blobs_densities_[type] * w1 +
|
||||
other->special_blobs_densities_[type] * w2;
|
||||
if (!w1 || !w2) {
|
||||
ASSERT_HOST((w1 + w2) > 0);
|
||||
special_blobs_densities_[type] = new_val / (w1 + w2);
|
||||
@ -723,7 +745,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
|
||||
for (int upper = 0; upper < 2; ++upper) {
|
||||
ColPartition_CLIST partners;
|
||||
ColPartition_C_IT part_it(&partners);
|
||||
part_it.add_list_after(upper ? &other->upper_partners_ : &other->lower_partners_);
|
||||
part_it.add_list_after(upper ? &other->upper_partners_
|
||||
: &other->lower_partners_);
|
||||
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
|
||||
ColPartition *partner = part_it.extract();
|
||||
partner->RemovePartner(!upper, other);
|
||||
@ -747,7 +770,8 @@ void ColPartition::Absorb(ColPartition *other, WidthCallback cb) {
|
||||
// the text involved, and is usually a fraction of the median size of merge1
|
||||
// and/or merge2, or this.
|
||||
// TODO(rays) Determine whether vertical text needs to be considered.
|
||||
bool ColPartition::OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2,
|
||||
bool ColPartition::OKMergeOverlap(const ColPartition &merge1,
|
||||
const ColPartition &merge2,
|
||||
int ok_box_overlap, bool debug) {
|
||||
// Vertical partitions are not allowed to be involved.
|
||||
if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
|
||||
@ -916,7 +940,8 @@ void ColPartition::ComputeLimits() {
|
||||
if (it.empty()) {
|
||||
return;
|
||||
}
|
||||
if (IsImageType() || blob_type() == BRT_RECTIMAGE || blob_type() == BRT_POLYIMAGE) {
|
||||
if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
|
||||
blob_type() == BRT_POLYIMAGE) {
|
||||
median_top_ = bounding_box_.top();
|
||||
median_bottom_ = bounding_box_.bottom();
|
||||
median_height_ = bounding_box_.height();
|
||||
@ -957,7 +982,8 @@ void ColPartition::ComputeLimits() {
|
||||
Print();
|
||||
}
|
||||
if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
|
||||
tprintf("Made partition with bad left coords, %d > %d\n", left_margin_, bounding_box_.left());
|
||||
tprintf("Made partition with bad left coords, %d > %d\n", left_margin_,
|
||||
bounding_box_.left());
|
||||
Print();
|
||||
}
|
||||
// Fix partner lists. The bounding box has changed and partners are stored
|
||||
@ -973,7 +999,8 @@ void ColPartition::ComputeLimits() {
|
||||
partner->AddPartner(!upper, this);
|
||||
}
|
||||
}
|
||||
if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) {
|
||||
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom())) {
|
||||
tprintf("Recomputed box for partition %p\n", this);
|
||||
Print();
|
||||
}
|
||||
@ -998,10 +1025,12 @@ void ColPartition::SetPartitionType(int resolution, ColPartitionSet *columns) {
|
||||
int first_spanned_col = -1;
|
||||
ColumnSpanningType span_type = columns->SpanningType(
|
||||
resolution, bounding_box_.left(), bounding_box_.right(),
|
||||
std::min(bounding_box_.height(), bounding_box_.width()), MidY(), left_margin_, right_margin_,
|
||||
&first_column_, &last_column_, &first_spanned_col);
|
||||
std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
|
||||
left_margin_, right_margin_, &first_column_, &last_column_,
|
||||
&first_spanned_col);
|
||||
column_set_ = columns;
|
||||
if (first_column_ < last_column_ && span_type == CST_PULLOUT && !IsLineType()) {
|
||||
if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
|
||||
!IsLineType()) {
|
||||
// Unequal columns may indicate that the pullout spans one of the columns
|
||||
// it lies in, so force it to be allocated to just that column.
|
||||
if (first_spanned_col >= 0) {
|
||||
@ -1026,8 +1055,8 @@ void ColPartition::SetPartitionType(int resolution, ColPartitionSet *columns) {
|
||||
// in the columns.
|
||||
PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const {
|
||||
if (flow == CST_NOISE) {
|
||||
if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && blob_type_ != BRT_RECTIMAGE &&
|
||||
blob_type_ != BRT_VERT_TEXT) {
|
||||
if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
|
||||
blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) {
|
||||
return PT_NOISE;
|
||||
}
|
||||
flow = CST_FLOWING;
|
||||
@ -1075,18 +1104,18 @@ PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const {
|
||||
|
||||
// Returns the first and last column touched by this partition.
|
||||
// resolution refers to the ppi resolution of the image.
|
||||
void ColPartition::ColumnRange(int resolution, ColPartitionSet *columns, int *first_col,
|
||||
int *last_col) {
|
||||
void ColPartition::ColumnRange(int resolution, ColPartitionSet *columns,
|
||||
int *first_col, int *last_col) {
|
||||
int first_spanned_col = -1;
|
||||
ColumnSpanningType span_type =
|
||||
columns->SpanningType(resolution, bounding_box_.left(), bounding_box_.right(),
|
||||
std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
|
||||
left_margin_, right_margin_, first_col, last_col, &first_spanned_col);
|
||||
ColumnSpanningType span_type = columns->SpanningType(
|
||||
resolution, bounding_box_.left(), bounding_box_.right(),
|
||||
std::min(bounding_box_.height(), bounding_box_.width()), MidY(),
|
||||
left_margin_, right_margin_, first_col, last_col, &first_spanned_col);
|
||||
type_ = PartitionType(span_type);
|
||||
}
|
||||
|
||||
// Sets the internal flags good_width_ and good_column_.
|
||||
void ColPartition::SetColumnGoodness(WidthCallback cb) {
|
||||
void ColPartition::SetColumnGoodness(const WidthCallback &cb) {
|
||||
int y = MidY();
|
||||
int width = RightAtY(y) - LeftAtY(y);
|
||||
good_width_ = cb(width);
|
||||
@ -1127,10 +1156,12 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
|
||||
double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
|
||||
if (textord_debug_tabfind >= 4) {
|
||||
tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", gap_iqr, blob_count,
|
||||
max_width * kMaxLeaderGapFractionOfMax, min_width * kMaxLeaderGapFractionOfMin);
|
||||
max_width * kMaxLeaderGapFractionOfMax,
|
||||
min_width * kMaxLeaderGapFractionOfMin);
|
||||
}
|
||||
if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
|
||||
gap_iqr < min_width * kMaxLeaderGapFractionOfMin && blob_count >= kMinLeaderCount) {
|
||||
gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
|
||||
blob_count >= kMinLeaderCount) {
|
||||
// This is stable enough to be called a leader, so check the widths.
|
||||
// Since leader dashes can join, run a dp cutting algorithm and go
|
||||
// on the cost.
|
||||
@ -1151,8 +1182,9 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
|
||||
projection[left - part_left].AddLocalCost(height);
|
||||
}
|
||||
}
|
||||
DPPoint *best_end = DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance,
|
||||
part_width, projection);
|
||||
DPPoint *best_end =
|
||||
DPPoint::Solve(min_step, max_step, false, &DPPoint::CostWithVariance,
|
||||
part_width, projection);
|
||||
if (best_end != nullptr && best_end->total_cost() < blob_count) {
|
||||
// Good enough. Call it a leader.
|
||||
result = true;
|
||||
@ -1161,7 +1193,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
|
||||
BLOBNBOX *blob = it.data();
|
||||
// If the first or last blob is spaced too much, don't mark it.
|
||||
if (it.at_first()) {
|
||||
int gap = it.data_relative(1)->bounding_box().left() - blob->bounding_box().right();
|
||||
int gap = it.data_relative(1)->bounding_box().left() -
|
||||
blob->bounding_box().right();
|
||||
if (blob->bounding_box().width() + gap > max_step) {
|
||||
it.extract();
|
||||
modified_blob_list = true;
|
||||
@ -1169,7 +1202,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
|
||||
}
|
||||
}
|
||||
if (it.at_last()) {
|
||||
int gap = blob->bounding_box().left() - it.data_relative(-1)->bounding_box().right();
|
||||
int gap = blob->bounding_box().left() -
|
||||
it.data_relative(-1)->bounding_box().right();
|
||||
if (blob->bounding_box().width() + gap > max_step) {
|
||||
it.extract();
|
||||
modified_blob_list = true;
|
||||
@ -1188,7 +1222,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() {
|
||||
if (best_end == nullptr) {
|
||||
tprintf("No path\n");
|
||||
} else {
|
||||
tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), blob_count);
|
||||
tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
|
||||
blob_count);
|
||||
}
|
||||
}
|
||||
delete[] projection;
|
||||
@ -1275,10 +1310,12 @@ void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) {
|
||||
blob_type_ = BRT_NOISE;
|
||||
}
|
||||
}
|
||||
if (TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom())) {
|
||||
tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", blob_count,
|
||||
noisy_count, good_blob_score_);
|
||||
tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_, blob_type_);
|
||||
if (TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom())) {
|
||||
tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
|
||||
blob_count, noisy_count, good_blob_score_);
|
||||
tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", value, flow_,
|
||||
blob_type_);
|
||||
Print();
|
||||
}
|
||||
SetBlobTypes();
|
||||
@ -1371,7 +1408,8 @@ bool ColPartition::HasGoodBaseline() {
|
||||
|
||||
// Adds this ColPartition to a matching WorkingPartSet if one can be found,
|
||||
// otherwise starts a new one in the appropriate column, ending the previous.
|
||||
void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution,
|
||||
void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright,
|
||||
int resolution,
|
||||
ColPartition_LIST *used_parts,
|
||||
WorkingPartSet_LIST *working_sets) {
|
||||
if (block_owned_) {
|
||||
@ -1414,10 +1452,11 @@ void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, in
|
||||
// Find the column that the right edge falls in.
|
||||
BLOCK_LIST completed_blocks;
|
||||
TO_BLOCK_LIST to_blocks;
|
||||
for (; !it.cycled_list() && col_index <= last_column_; it.forward(), ++col_index) {
|
||||
for (; !it.cycled_list() && col_index <= last_column_;
|
||||
it.forward(), ++col_index) {
|
||||
WorkingPartSet *end_set = it.data();
|
||||
end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks,
|
||||
&to_blocks);
|
||||
end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
|
||||
&completed_blocks, &to_blocks);
|
||||
}
|
||||
work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
|
||||
}
|
||||
@ -1431,9 +1470,12 @@ void ColPartition::AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, in
|
||||
// The used partitions are put onto used_parts, as they may still be referred
|
||||
// to in the partition grid. bleft, tright and resolution are the bounds
|
||||
// and resolution of the original image.
|
||||
void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution,
|
||||
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts,
|
||||
BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks) {
|
||||
void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
||||
int resolution,
|
||||
ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts,
|
||||
BLOCK_LIST *completed_blocks,
|
||||
TO_BLOCK_LIST *to_blocks) {
|
||||
int page_height = tright.y() - bleft.y();
|
||||
// Compute the initial spacing stats.
|
||||
ColPartition_IT it(block_parts);
|
||||
@ -1466,7 +1508,8 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
||||
part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
|
||||
if (!it.at_last()) {
|
||||
ColPartition *next_part = it.data_relative(1);
|
||||
part->set_bottom_spacing(part->median_bottom() - next_part->median_bottom());
|
||||
part->set_bottom_spacing(part->median_bottom() -
|
||||
next_part->median_bottom());
|
||||
part->set_top_spacing(part->median_top() - next_part->median_top());
|
||||
} else {
|
||||
part->set_bottom_spacing(page_height);
|
||||
@ -1474,8 +1517,8 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
||||
}
|
||||
if (textord_debug_tabfind) {
|
||||
part->Print();
|
||||
tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", side_steps.median(),
|
||||
part->top_spacing(), part->bottom_spacing());
|
||||
tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
|
||||
side_steps.median(), part->top_spacing(), part->bottom_spacing());
|
||||
}
|
||||
++part_count;
|
||||
}
|
||||
@ -1508,21 +1551,25 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
||||
tprintf(
|
||||
"Spacings unequal: upper:%d/%d, lower:%d/%d,"
|
||||
" sizes %d %d %d\n",
|
||||
part->top_spacing(), part->bottom_spacing(), next_part->top_spacing(),
|
||||
next_part->bottom_spacing(), part->median_height(), next_part->median_height(),
|
||||
part->top_spacing(), part->bottom_spacing(),
|
||||
next_part->top_spacing(), next_part->bottom_spacing(),
|
||||
part->median_height(), next_part->median_height(),
|
||||
third_part != nullptr ? third_part->median_height() : 0);
|
||||
}
|
||||
// We can only consider adding the next line to the block if the sizes
|
||||
// match and the lines are close enough for their size.
|
||||
if (part->SizesSimilar(*next_part) &&
|
||||
next_part->median_height() * kMaxSameBlockLineSpacing > part->bottom_spacing() &&
|
||||
part->median_height() * kMaxSameBlockLineSpacing > part->top_spacing()) {
|
||||
next_part->median_height() * kMaxSameBlockLineSpacing >
|
||||
part->bottom_spacing() &&
|
||||
part->median_height() * kMaxSameBlockLineSpacing >
|
||||
part->top_spacing()) {
|
||||
// Even now, we can only add it as long as the third line doesn't
|
||||
// match in the same way and have a smaller bottom spacing.
|
||||
if (third_part == nullptr || !next_part->SizesSimilar(*third_part) ||
|
||||
third_part->median_height() * kMaxSameBlockLineSpacing <=
|
||||
next_part->bottom_spacing() ||
|
||||
next_part->median_height() * kMaxSameBlockLineSpacing <= next_part->top_spacing() ||
|
||||
next_part->median_height() * kMaxSameBlockLineSpacing <=
|
||||
next_part->top_spacing() ||
|
||||
next_part->bottom_spacing() > part->bottom_spacing()) {
|
||||
// Add to the current block.
|
||||
sp_block_it.add_to_end(it.extract());
|
||||
@ -1542,8 +1589,9 @@ void ColPartition::LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
||||
} else {
|
||||
if (textord_debug_tabfind && !it.empty()) {
|
||||
ColPartition *next_part = it.data();
|
||||
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", part->top_spacing(),
|
||||
part->bottom_spacing(), next_part->top_spacing(), next_part->bottom_spacing(),
|
||||
tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
|
||||
part->top_spacing(), part->bottom_spacing(),
|
||||
next_part->top_spacing(), next_part->bottom_spacing(),
|
||||
part->median_height(), next_part->median_height());
|
||||
}
|
||||
}
|
||||
@ -1570,8 +1618,9 @@ static void ClipCoord(const ICOORD &bleft, const ICOORD &tright, ICOORD *pos) {
|
||||
// itself. Sets up the block for (old) textline formation correctly for
|
||||
// vertical and horizontal text. The partitions are moved to used_parts
|
||||
// afterwards, as they cannot be deleted yet.
|
||||
static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK *block,
|
||||
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) {
|
||||
static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing,
|
||||
BLOCK *block, ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts) {
|
||||
// Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it.
|
||||
// Move all the parts to a done list as they are no longer needed, except
|
||||
// that have have to continue to exist until the part grid is deleted.
|
||||
@ -1646,7 +1695,8 @@ static TO_BLOCK *MoveBlobsToBlock(bool vertical_text, int line_spacing, BLOCK *b
|
||||
// Constructs a block from the given list of partitions.
|
||||
// Arguments are as LineSpacingBlocks above.
|
||||
TO_BLOCK *ColPartition::MakeBlock(const ICOORD &bleft, const ICOORD &tright,
|
||||
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) {
|
||||
ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts) {
|
||||
if (block_parts->empty()) {
|
||||
return nullptr; // Nothing to do.
|
||||
}
|
||||
@ -1704,7 +1754,8 @@ TO_BLOCK *ColPartition::MakeBlock(const ICOORD &bleft, const ICOORD &tright,
|
||||
|
||||
// Constructs a block from the given list of vertical text partitions.
|
||||
// Currently only creates rectangular blocks.
|
||||
TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright,
|
||||
TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft,
|
||||
const ICOORD &tright,
|
||||
ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts) {
|
||||
if (block_parts->empty()) {
|
||||
@ -1722,8 +1773,8 @@ TO_BLOCK *ColPartition::MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD
|
||||
tprintf("Making block at:");
|
||||
block_box.print();
|
||||
}
|
||||
auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), block_box.right(),
|
||||
block_box.top());
|
||||
auto *block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
|
||||
block_box.right(), block_box.top());
|
||||
block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
|
||||
return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
|
||||
}
|
||||
@ -1741,8 +1792,9 @@ TO_ROW *ColPartition::MakeToRow() {
|
||||
int top = blob->bounding_box().top();
|
||||
int bottom = blob->bounding_box().bottom();
|
||||
if (row == nullptr) {
|
||||
row = new TO_ROW(blob, static_cast<float>(top), static_cast<float>(bottom),
|
||||
static_cast<float>(line_size));
|
||||
row =
|
||||
new TO_ROW(blob, static_cast<float>(top), static_cast<float>(bottom),
|
||||
static_cast<float>(line_size));
|
||||
} else {
|
||||
row->add_blob(blob, static_cast<float>(top), static_cast<float>(bottom),
|
||||
static_cast<float>(line_size));
|
||||
@ -1785,7 +1837,8 @@ ColPartition *ColPartition::CopyButDontOwnBlobs() {
|
||||
copy->set_owns_blobs(false);
|
||||
BLOBNBOX_C_IT inserter(copy->boxes());
|
||||
BLOBNBOX_C_IT traverser(boxes());
|
||||
for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) {
|
||||
for (traverser.mark_cycle_pt(); !traverser.cycled_list();
|
||||
traverser.forward()) {
|
||||
inserter.add_after_then_move(traverser.data());
|
||||
}
|
||||
return copy;
|
||||
@ -1812,19 +1865,21 @@ void ColPartition::Print() const {
|
||||
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
|
||||
" w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
|
||||
" ts=%d bs=%d ls=%d rs=%d\n",
|
||||
boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
|
||||
bounding_box_.left(), median_left_, bounding_box_.bottom(), median_bottom_,
|
||||
bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', right_margin_, median_right_,
|
||||
bounding_box_.top(), median_top_, good_width_, good_column_, type_, kBlobTypes[blob_type_],
|
||||
flow_, first_column_, last_column_, boxes_.length(), space_above_, space_below_,
|
||||
space_to_left_, space_to_right_);
|
||||
boxes_.empty() ? 'E' : ' ', left_margin_, left_key_tab_ ? 'T' : 'B',
|
||||
LeftAtY(y), bounding_box_.left(), median_left_, bounding_box_.bottom(),
|
||||
median_bottom_, bounding_box_.right(), RightAtY(y),
|
||||
right_key_tab_ ? 'T' : 'B', right_margin_, median_right_,
|
||||
bounding_box_.top(), median_top_, good_width_, good_column_, type_,
|
||||
kBlobTypes[blob_type_], flow_, first_column_, last_column_,
|
||||
boxes_.length(), space_above_, space_below_, space_to_left_,
|
||||
space_to_right_);
|
||||
}
|
||||
|
||||
// Prints debug information on the colors.
|
||||
void ColPartition::PrintColors() {
|
||||
tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED], color1_[COLOR_GREEN],
|
||||
color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL], color2_[COLOR_RED], color2_[COLOR_GREEN],
|
||||
color2_[COLOR_BLUE]);
|
||||
tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", color1_[COLOR_RED],
|
||||
color1_[COLOR_GREEN], color1_[COLOR_BLUE], color1_[L_ALPHA_CHANNEL],
|
||||
color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
|
||||
}
|
||||
|
||||
// Sets the types of all partitions in the run to be the max of the types.
|
||||
@ -1898,7 +1953,8 @@ void ColPartition::SmoothPartnerRun(int working_set_count) {
|
||||
// one partner. This makes block creation simpler.
|
||||
// If get_desperate is true, goes to more desperate merge methods
|
||||
// to merge flowing text before breaking partnerships.
|
||||
void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid) {
|
||||
void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate,
|
||||
ColPartitionGrid *grid) {
|
||||
if (TypesSimilar(type_, type)) {
|
||||
RefinePartnersInternal(true, get_desperate, grid);
|
||||
RefinePartnersInternal(false, get_desperate, grid);
|
||||
@ -1924,7 +1980,8 @@ void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, ColPar
|
||||
// Cleans up the partners above if upper is true, else below.
|
||||
// If get_desperate is true, goes to more desperate merge methods
|
||||
// to merge flowing text before breaking partnerships.
|
||||
void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid *grid) {
|
||||
void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate,
|
||||
ColPartitionGrid *grid) {
|
||||
ColPartition_CLIST *partners = upper ? &upper_partners_ : &lower_partners_;
|
||||
if (!partners->empty() && !partners->singleton()) {
|
||||
RefinePartnersByType(upper, partners);
|
||||
@ -1952,8 +2009,10 @@ void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, ColPar
|
||||
// Cleans up the partners above if upper is true, else below.
|
||||
// Restricts the partners to only desirable types. For text and BRT_HLINE this
|
||||
// means the same type_ , and for image types it means any image type.
|
||||
void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
|
||||
void ColPartition::RefinePartnersByType(bool upper,
|
||||
ColPartition_CLIST *partners) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom());
|
||||
if (debug) {
|
||||
tprintf("Refining %d %s partners by type for:\n", partners->length(),
|
||||
upper ? "Upper" : "Lower");
|
||||
@ -1983,7 +2042,8 @@ void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners
|
||||
// Only polyimages are allowed to have partners of any kind!
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColPartition *partner = it.data();
|
||||
if (partner->blob_type() != BRT_POLYIMAGE || blob_type() != BRT_POLYIMAGE) {
|
||||
if (partner->blob_type() != BRT_POLYIMAGE ||
|
||||
blob_type() != BRT_POLYIMAGE) {
|
||||
if (debug) {
|
||||
tprintf("Removing partner:");
|
||||
partner->Print();
|
||||
@ -2003,7 +2063,8 @@ void ColPartition::RefinePartnersByType(bool upper, ColPartition_CLIST *partners
|
||||
// Gets rid of this<->b, leaving a clean chain.
|
||||
// Also if we have this<->a and a<->this, then gets rid of this<->a, as
|
||||
// this has multiple partners.
|
||||
void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partners) {
|
||||
void ColPartition::RefinePartnerShortcuts(bool upper,
|
||||
ColPartition_CLIST *partners) {
|
||||
bool done_any = false;
|
||||
do {
|
||||
done_any = false;
|
||||
@ -2054,8 +2115,10 @@ void ColPartition::RefinePartnerShortcuts(bool upper, ColPartition_CLIST *partne
|
||||
// by aggressive line fitting/splitting, as there are probably vertically
|
||||
// joined blobs that cross textlines.
|
||||
void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
|
||||
ColPartition_CLIST *partners, ColPartitionGrid *grid) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
|
||||
ColPartition_CLIST *partners,
|
||||
ColPartitionGrid *grid) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom());
|
||||
if (debug) {
|
||||
tprintf("Refining %d %s partners by merge for:\n", partners->length(),
|
||||
upper ? "Upper" : "Lower");
|
||||
@ -2078,12 +2141,13 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
|
||||
}
|
||||
}
|
||||
int overlap_increase;
|
||||
ColPartition *candidate =
|
||||
grid->BestMergeCandidate(part, &candidates, debug, nullptr, &overlap_increase);
|
||||
ColPartition *candidate = grid->BestMergeCandidate(
|
||||
part, &candidates, debug, nullptr, &overlap_increase);
|
||||
if (candidate != nullptr && (overlap_increase <= 0 || desperate)) {
|
||||
if (debug) {
|
||||
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*candidate),
|
||||
part->VCoreOverlap(*candidate), overlap_increase);
|
||||
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
|
||||
part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
|
||||
overlap_increase);
|
||||
}
|
||||
// Remove before merge and re-insert to keep the integrity of the grid.
|
||||
grid->RemoveBBox(candidate);
|
||||
@ -2102,8 +2166,10 @@ void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
|
||||
|
||||
// Cleans up the partners above if upper is true, else below.
|
||||
// Keep the partner with the biggest overlap.
|
||||
void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
|
||||
void ColPartition::RefinePartnersByOverlap(bool upper,
|
||||
ColPartition_CLIST *partners) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom());
|
||||
if (debug) {
|
||||
tprintf("Refining %d %s partners by overlap for:\n", partners->length(),
|
||||
upper ? "Upper" : "Lower");
|
||||
@ -2115,8 +2181,9 @@ void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partn
|
||||
int best_overlap = 0;
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColPartition *partner = it.data();
|
||||
int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right()) -
|
||||
std::max(bounding_box_.left(), partner->bounding_box_.left());
|
||||
int overlap =
|
||||
std::min(bounding_box_.right(), partner->bounding_box_.right()) -
|
||||
std::max(bounding_box_.left(), partner->bounding_box_.left());
|
||||
if (overlap > best_overlap) {
|
||||
best_overlap = overlap;
|
||||
best_partner = partner;
|
||||
@ -2137,7 +2204,8 @@ void ColPartition::RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partn
|
||||
}
|
||||
|
||||
// Return true if bbox belongs better in this than other.
|
||||
bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other) {
|
||||
bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox,
|
||||
const ColPartition &other) {
|
||||
const TBOX &box = bbox->bounding_box();
|
||||
// Margins take priority.
|
||||
int left = box.left();
|
||||
@ -2150,14 +2218,17 @@ bool ColPartition::ThisPartitionBetter(BLOBNBOX *bbox, const ColPartition &other
|
||||
}
|
||||
int top = box.top();
|
||||
int bottom = box.bottom();
|
||||
int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
|
||||
int other_overlap = std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_);
|
||||
int this_overlap =
|
||||
std::min(top, median_top_) - std::max(bottom, median_bottom_);
|
||||
int other_overlap =
|
||||
std::min(top, other.median_top_) - std::max(bottom, other.median_bottom_);
|
||||
int this_miss = median_top_ - median_bottom_ - this_overlap;
|
||||
int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
|
||||
if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) {
|
||||
tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", box.left(),
|
||||
box.bottom(), box.right(), box.top(), this_overlap, other_overlap, this_miss,
|
||||
other_miss, median_top_, other.median_top_);
|
||||
tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
|
||||
box.left(), box.bottom(), box.right(), box.top(), this_overlap,
|
||||
other_overlap, this_miss, other_miss, median_top_,
|
||||
other.median_top_);
|
||||
}
|
||||
if (this_miss < other_miss) {
|
||||
return true;
|
||||
@ -2200,13 +2271,15 @@ bool ColPartition::IsInSameColumnAs(const ColPartition &part) const {
|
||||
// Overlap does not occur when last < part.first or first > part.last.
|
||||
// In other words, one is completely to the side of the other.
|
||||
// This is just DeMorgan's law applied to that so the function returns true.
|
||||
return (last_column_ >= part.first_column_) && (first_column_ <= part.last_column_);
|
||||
return (last_column_ >= part.first_column_) &&
|
||||
(first_column_ <= part.last_column_);
|
||||
}
|
||||
|
||||
// Smoothes the spacings in the list into groups of equal linespacing.
|
||||
// resolution is the resolution of the original image, used as a basis
|
||||
// for thresholds in change of spacing. page_height is in pixels.
|
||||
void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_LIST *parts) {
|
||||
void ColPartition::SmoothSpacings(int resolution, int page_height,
|
||||
ColPartition_LIST *parts) {
|
||||
// The task would be trivial if we didn't have to allow for blips -
|
||||
// occasional offsets in spacing caused by anomalous text, such as all
|
||||
// caps, groups of descenders, joined words, Arabic etc.
|
||||
@ -2258,13 +2331,17 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
|
||||
// The last time, everything is shifted up 1, so we present OKSpacingBlip
|
||||
// with neighbourhood-1 and check that PN_LOWER matches the median.
|
||||
if (neighbourhood[PN_LOWER] == nullptr ||
|
||||
(!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], resolution) &&
|
||||
(neighbourhood[PN_UPPER] == nullptr || neighbourhood[PN_LOWER] == nullptr ||
|
||||
(!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
|
||||
resolution) &&
|
||||
(neighbourhood[PN_UPPER] == nullptr ||
|
||||
neighbourhood[PN_LOWER] == nullptr ||
|
||||
!OKSpacingBlip(resolution, median_space, neighbourhood, 0)) &&
|
||||
(neighbourhood[PN_UPPER - 1] == nullptr || neighbourhood[PN_LOWER - 1] == nullptr ||
|
||||
(neighbourhood[PN_UPPER - 1] == nullptr ||
|
||||
neighbourhood[PN_LOWER - 1] == nullptr ||
|
||||
!OKSpacingBlip(resolution, median_space, neighbourhood, -1) ||
|
||||
!neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
|
||||
(neighbourhood[PN_UPPER + 1] == nullptr || neighbourhood[PN_LOWER + 1] == nullptr ||
|
||||
(neighbourhood[PN_UPPER + 1] == nullptr ||
|
||||
neighbourhood[PN_LOWER + 1] == nullptr ||
|
||||
!OKSpacingBlip(resolution, median_space, neighbourhood, 1) ||
|
||||
!neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
|
||||
// The group has ended. PN_UPPER is the last member.
|
||||
@ -2297,7 +2374,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
|
||||
if (neighbourhood[i] == nullptr) {
|
||||
tprintf("NULL");
|
||||
if (i > 0 && neighbourhood[i - 1] != nullptr) {
|
||||
if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) {
|
||||
if (neighbourhood[i - 1]->SingletonPartner(false) !=
|
||||
nullptr) {
|
||||
tprintf(" Lower partner:");
|
||||
neighbourhood[i - 1]->SingletonPartner(false)->Print();
|
||||
} else {
|
||||
@ -2307,7 +2385,8 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
|
||||
tprintf("\n");
|
||||
}
|
||||
} else {
|
||||
tprintf("Top = %d, bottom = %d\n", neighbourhood[i]->top_spacing(),
|
||||
tprintf("Top = %d, bottom = %d\n",
|
||||
neighbourhood[i]->top_spacing(),
|
||||
neighbourhood[i]->bottom_spacing());
|
||||
}
|
||||
}
|
||||
@ -2350,12 +2429,13 @@ void ColPartition::SmoothSpacings(int resolution, int page_height, ColPartition_
|
||||
// Returns true if the parts array of pointers to partitions matches the
|
||||
// condition for a spacing blip. See SmoothSpacings for what this means
|
||||
// and how it is used.
|
||||
bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, ColPartition **parts,
|
||||
int offset) {
|
||||
bool ColPartition::OKSpacingBlip(int resolution, int median_spacing,
|
||||
ColPartition **parts, int offset) {
|
||||
// The blip is OK if upper and lower sum to an OK value and at least
|
||||
// one of above1 and below1 is equal to the median.
|
||||
parts += offset;
|
||||
return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing, resolution) &&
|
||||
return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], median_spacing,
|
||||
resolution) &&
|
||||
((parts[PN_ABOVE1] != nullptr &&
|
||||
parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
|
||||
(parts[PN_BELOW1] != nullptr &&
|
||||
@ -2373,22 +2453,27 @@ bool ColPartition::SpacingEqual(int spacing, int resolution) const {
|
||||
|
||||
// Returns true if both the top and bottom spacings of this and other
|
||||
// match to within suitable margins dictated by the image resolution.
|
||||
bool ColPartition::SpacingsEqual(const ColPartition &other, int resolution) const {
|
||||
int bottom_error =
|
||||
std::max(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution));
|
||||
int top_error = std::max(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution));
|
||||
bool ColPartition::SpacingsEqual(const ColPartition &other,
|
||||
int resolution) const {
|
||||
int bottom_error = std::max(BottomSpacingMargin(resolution),
|
||||
other.BottomSpacingMargin(resolution));
|
||||
int top_error = std::max(TopSpacingMargin(resolution),
|
||||
other.TopSpacingMargin(resolution));
|
||||
return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
|
||||
(NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
|
||||
NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, bottom_error));
|
||||
NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
|
||||
bottom_error));
|
||||
}
|
||||
|
||||
// Returns true if the sum spacing of this and other match the given
|
||||
// spacing (or twice the given spacing) to within a suitable margin dictated
|
||||
// by the image resolution.
|
||||
bool ColPartition::SummedSpacingOK(const ColPartition &other, int spacing, int resolution) const {
|
||||
int bottom_error =
|
||||
std::max(BottomSpacingMargin(resolution), other.BottomSpacingMargin(resolution));
|
||||
int top_error = std::max(TopSpacingMargin(resolution), other.TopSpacingMargin(resolution));
|
||||
bool ColPartition::SummedSpacingOK(const ColPartition &other, int spacing,
|
||||
int resolution) const {
|
||||
int bottom_error = std::max(BottomSpacingMargin(resolution),
|
||||
other.BottomSpacingMargin(resolution));
|
||||
int top_error = std::max(TopSpacingMargin(resolution),
|
||||
other.TopSpacingMargin(resolution));
|
||||
int bottom_total = bottom_spacing_ + other.bottom_spacing_;
|
||||
int top_total = top_spacing_ + other.top_spacing_;
|
||||
return (NearlyEqual(spacing, bottom_total, bottom_error) &&
|
||||
@ -2420,7 +2505,8 @@ bool ColPartition::SizesSimilar(const ColPartition &other) const {
|
||||
// Helper updates margin_left and margin_right, being the bounds of the left
|
||||
// margin of part of a block. Returns false and does not update the bounds if
|
||||
// this partition has a disjoint margin with the established margin.
|
||||
static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, int *margin_right) {
|
||||
static bool UpdateLeftMargin(const ColPartition &part, int *margin_left,
|
||||
int *margin_right) {
|
||||
const TBOX &part_box = part.bounding_box();
|
||||
int top = part_box.top();
|
||||
int bottom = part_box.bottom();
|
||||
@ -2444,7 +2530,8 @@ static bool UpdateLeftMargin(const ColPartition &part, int *margin_left, int *ma
|
||||
// condition that the intersection of the left margins is non-empty, ie the
|
||||
// rightmost left margin is to the left of the leftmost left bounding box edge.
|
||||
// On return the iterator is set to the start of the next run.
|
||||
void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end) {
|
||||
void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start,
|
||||
ICOORD *end) {
|
||||
ColPartition *part = part_it->data();
|
||||
ColPartition *start_part = part;
|
||||
int start_y = part->bounding_box_.top();
|
||||
@ -2463,7 +2550,8 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *
|
||||
do {
|
||||
part_it->forward();
|
||||
part = part_it->data();
|
||||
} while (!part_it->at_first() && UpdateLeftMargin(*part, &margin_left, &margin_right));
|
||||
} while (!part_it->at_first() &&
|
||||
UpdateLeftMargin(*part, &margin_left, &margin_right));
|
||||
// The run ended. If we were pushed inwards, compute the next run and
|
||||
// extend it backwards into the run we just calculated to find the end of
|
||||
// this run that provides a tight box.
|
||||
@ -2475,13 +2563,15 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *
|
||||
do {
|
||||
next_it.forward();
|
||||
part = next_it.data();
|
||||
} while (!next_it.at_first() && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
|
||||
} while (!next_it.at_first() &&
|
||||
UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
|
||||
// Now extend the next run backwards into the original run to get the
|
||||
// tightest fit.
|
||||
do {
|
||||
part_it->backward();
|
||||
part = part_it->data();
|
||||
} while (part != start_part && UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
|
||||
} while (part != start_part &&
|
||||
UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
|
||||
part_it->forward();
|
||||
}
|
||||
// Now calculate the end_y.
|
||||
@ -2495,16 +2585,17 @@ void ColPartition::LeftEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *
|
||||
end->set_y(end_y);
|
||||
end->set_x(part->XAtY(margin_right, end_y));
|
||||
if (textord_debug_tabfind && !part_it->at_first()) {
|
||||
tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y,
|
||||
part->XAtY(margin_left, end_y), end->x(), part->left_margin_,
|
||||
part->bounding_box_.left());
|
||||
tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
|
||||
start_y, end_y, part->XAtY(margin_left, end_y), end->x(),
|
||||
part->left_margin_, part->bounding_box_.left());
|
||||
}
|
||||
}
|
||||
|
||||
// Helper updates margin_left and margin_right, being the bounds of the right
|
||||
// margin of part of a block. Returns false and does not update the bounds if
|
||||
// this partition has a disjoint margin with the established margin.
|
||||
static bool UpdateRightMargin(const ColPartition &part, int *margin_left, int *margin_right) {
|
||||
static bool UpdateRightMargin(const ColPartition &part, int *margin_left,
|
||||
int *margin_right) {
|
||||
const TBOX &part_box = part.bounding_box();
|
||||
int top = part_box.top();
|
||||
int bottom = part_box.bottom();
|
||||
@ -2529,7 +2620,8 @@ static bool UpdateRightMargin(const ColPartition &part, int *margin_left, int *m
|
||||
// leftmost right margin is to the right of the rightmost right bounding box
|
||||
// edge.
|
||||
// On return the iterator is set to the start of the next run.
|
||||
void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end) {
|
||||
void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
|
||||
ICOORD *end) {
|
||||
ColPartition *part = part_it->data();
|
||||
ColPartition *start_part = part;
|
||||
int start_y = part->bounding_box_.bottom();
|
||||
@ -2548,7 +2640,8 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD
|
||||
do {
|
||||
part_it->backward();
|
||||
part = part_it->data();
|
||||
} while (!part_it->at_last() && UpdateRightMargin(*part, &margin_left, &margin_right));
|
||||
} while (!part_it->at_last() &&
|
||||
UpdateRightMargin(*part, &margin_left, &margin_right));
|
||||
// The run ended. If we were pushed inwards, compute the next run and
|
||||
// extend it backwards to find the end of this run for a tight box.
|
||||
int next_margin_right = INT32_MAX;
|
||||
@ -2559,13 +2652,15 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD
|
||||
do {
|
||||
next_it.backward();
|
||||
part = next_it.data();
|
||||
} while (!next_it.at_last() && UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
|
||||
} while (!next_it.at_last() &&
|
||||
UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
|
||||
// Now extend the next run forwards into the original run to get the
|
||||
// tightest fit.
|
||||
do {
|
||||
part_it->forward();
|
||||
part = part_it->data();
|
||||
} while (part != start_part && UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
|
||||
} while (part != start_part &&
|
||||
UpdateRightMargin(*part, &next_margin_left, &next_margin_right));
|
||||
part_it->backward();
|
||||
}
|
||||
// Now calculate the end_y.
|
||||
@ -2579,9 +2674,9 @@ void ColPartition::RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD
|
||||
end->set_y(end_y);
|
||||
end->set_x(part->XAtY(margin_left, end_y));
|
||||
if (textord_debug_tabfind && !part_it->at_last()) {
|
||||
tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", start_y, end_y,
|
||||
end->x(), part->XAtY(margin_right, end_y), part->bounding_box_.right(),
|
||||
part->right_margin_);
|
||||
tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
|
||||
start_y, end_y, end->x(), part->XAtY(margin_right, end_y),
|
||||
part->bounding_box_.right(), part->right_margin_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,8 @@ public:
|
||||
* Constructs a fake ColPartition with no BLOBNBOXes to represent a
|
||||
* horizontal or vertical line, given a type and a bounding box.
|
||||
*/
|
||||
static ColPartition *MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left,
|
||||
static ColPartition *MakeLinePartition(BlobRegionType blob_type,
|
||||
const ICOORD &vertical, int left,
|
||||
int bottom, int right, int top);
|
||||
|
||||
// Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
|
||||
@ -90,14 +91,16 @@ public:
|
||||
// the ColPartition owns the BLOBNBOX!!!
|
||||
// Call DeleteBoxes before deleting the ColPartition.
|
||||
static ColPartition *FakePartition(const TBOX &box, PolyBlockType block_type,
|
||||
BlobRegionType blob_type, BlobTextFlowType flow);
|
||||
BlobRegionType blob_type,
|
||||
BlobTextFlowType flow);
|
||||
|
||||
// Constructs and returns a ColPartition with the given real BLOBNBOX,
|
||||
// and sets it up to be a "big" partition (single-blob partition bigger
|
||||
// than the surrounding text that may be a dropcap, two or more vertically
|
||||
// touching characters, or some graphic element.
|
||||
// If the given list is not nullptr, the partition is also added to the list.
|
||||
static ColPartition *MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list);
|
||||
static ColPartition *MakeBigPartition(BLOBNBOX *box,
|
||||
ColPartition_LIST *big_part_list);
|
||||
|
||||
~ColPartition();
|
||||
|
||||
@ -389,7 +392,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
int overlap = VCoreOverlap(other);
|
||||
int height = std::min(median_top_ - median_bottom_, other.median_top_ - other.median_bottom_);
|
||||
int height = std::min(median_top_ - median_bottom_,
|
||||
other.median_top_ - other.median_bottom_);
|
||||
return overlap * 3 > height;
|
||||
}
|
||||
// Returns true if this and other can be combined without putting a
|
||||
@ -412,7 +416,8 @@ public:
|
||||
|
||||
// Returns true if the types are similar to each other.
|
||||
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
|
||||
return (type1 == type2 || (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
|
||||
return (type1 == type2 ||
|
||||
(type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
|
||||
(type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
|
||||
}
|
||||
|
||||
@ -519,7 +524,8 @@ public:
|
||||
bool ConfirmNoTabViolation(const ColPartition &other) const;
|
||||
|
||||
// Returns true if other has a similar stroke width to this.
|
||||
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance,
|
||||
bool MatchingStrokeWidth(const ColPartition &other,
|
||||
double fractional_tolerance,
|
||||
double constant_tolerance) const;
|
||||
// Returns true if candidate is an acceptable diacritic base char merge
|
||||
// with this as the diacritic.
|
||||
@ -548,7 +554,8 @@ public:
|
||||
// Set the density value for a particular BlobSpecialTextType, should ONLY be
|
||||
// used for debugging or testing. In production code, use
|
||||
// ComputeSpecialBlobsDensity instead.
|
||||
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density);
|
||||
void SetSpecialBlobsDensity(const BlobSpecialTextType type,
|
||||
const float density);
|
||||
// Compute the SpecialTextType density of blobs, where we assume
|
||||
// that the SpecialTextType in the boxes_ has been set.
|
||||
void ComputeSpecialBlobsDensity();
|
||||
@ -565,14 +572,14 @@ public:
|
||||
ColPartition *SingletonPartner(bool upper);
|
||||
|
||||
// Merge with the other partition and delete it.
|
||||
void Absorb(ColPartition *other, WidthCallback cb);
|
||||
void Absorb(ColPartition *other, const WidthCallback &cb);
|
||||
|
||||
// Returns true if the overlap between this and the merged pair of
|
||||
// merge candidates is sufficiently trivial to be allowed.
|
||||
// The merged box can graze the edge of this by the ok_box_overlap
|
||||
// if that exceeds the margin to the median top and bottom.
|
||||
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap,
|
||||
bool debug);
|
||||
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2,
|
||||
int ok_box_overlap, bool debug);
|
||||
|
||||
// Find the blob at which to split this to minimize the overlap with the
|
||||
// given box. Returns the first blob to go in the second partition.
|
||||
@ -606,10 +613,11 @@ public:
|
||||
|
||||
// Returns the first and last column touched by this partition.
|
||||
// resolution refers to the ppi resolution of the image.
|
||||
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col);
|
||||
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col,
|
||||
int *last_col);
|
||||
|
||||
// Sets the internal flags good_width_ and good_column_.
|
||||
void SetColumnGoodness(WidthCallback cb);
|
||||
void SetColumnGoodness(const WidthCallback &cb);
|
||||
|
||||
// Determines whether the blobs in this partition mostly represent
|
||||
// a leader (fixed pitch sequence) and sets the member blobs accordingly.
|
||||
@ -634,8 +642,9 @@ public:
|
||||
|
||||
// Adds this ColPartition to a matching WorkingPartSet if one can be found,
|
||||
// otherwise starts a new one in the appropriate column, ending the previous.
|
||||
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution,
|
||||
ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set);
|
||||
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright,
|
||||
int resolution, ColPartition_LIST *used_parts,
|
||||
WorkingPartSet_LIST *working_set);
|
||||
|
||||
// From the given block_parts list, builds one or more BLOCKs and
|
||||
// corresponding TO_BLOCKs, such that the line spacing is uniform in each.
|
||||
@ -643,17 +652,21 @@ public:
|
||||
// The used partitions are put onto used_parts, as they may still be referred
|
||||
// to in the partition grid. bleft, tright and resolution are the bounds
|
||||
// and resolution of the original image.
|
||||
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution,
|
||||
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts,
|
||||
BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks);
|
||||
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright,
|
||||
int resolution, ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts,
|
||||
BLOCK_LIST *completed_blocks,
|
||||
TO_BLOCK_LIST *to_blocks);
|
||||
// Constructs a block from the given list of partitions.
|
||||
// Arguments are as LineSpacingBlocks above.
|
||||
static TO_BLOCK *MakeBlock(const ICOORD &bleft, const ICOORD &tright,
|
||||
ColPartition_LIST *block_parts, ColPartition_LIST *used_parts);
|
||||
ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts);
|
||||
|
||||
// Constructs a block from the given list of vertical text partitions.
|
||||
// Currently only creates rectangular blocks.
|
||||
static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright,
|
||||
static TO_BLOCK *MakeVerticalTextBlock(const ICOORD &bleft,
|
||||
const ICOORD &tright,
|
||||
ColPartition_LIST *block_parts,
|
||||
ColPartition_LIST *used_parts);
|
||||
|
||||
@ -686,7 +699,8 @@ public:
|
||||
// one partner. This makes block creation simpler.
|
||||
// If get_desperate is true, goes to more desperate merge methods
|
||||
// to merge flowing text before breaking partnerships.
|
||||
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid);
|
||||
void RefinePartners(PolyBlockType type, bool get_desperate,
|
||||
ColPartitionGrid *grid);
|
||||
|
||||
// Returns true if this column partition is in the same column as
|
||||
// part. This function will only work after the SetPartitionType function
|
||||
@ -700,8 +714,10 @@ public:
|
||||
const ColPartition *part2 = *static_cast<const ColPartition *const *>(p2);
|
||||
int mid_y1 = part1->bounding_box_.y_middle();
|
||||
int mid_y2 = part2->bounding_box_.y_middle();
|
||||
if ((part2->bounding_box_.bottom() <= mid_y1 && mid_y1 <= part2->bounding_box_.top()) ||
|
||||
(part1->bounding_box_.bottom() <= mid_y2 && mid_y2 <= part1->bounding_box_.top())) {
|
||||
if ((part2->bounding_box_.bottom() <= mid_y1 &&
|
||||
mid_y1 <= part2->bounding_box_.top()) ||
|
||||
(part1->bounding_box_.bottom() <= mid_y2 &&
|
||||
mid_y2 <= part1->bounding_box_.top())) {
|
||||
// Sort by increasing x.
|
||||
return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
|
||||
}
|
||||
@ -721,7 +737,8 @@ private:
|
||||
// Cleans up the partners above if upper is true, else below.
|
||||
// If get_desperate is true, goes to more desperate merge methods
|
||||
// to merge flowing text before breaking partnerships.
|
||||
void RefinePartnersInternal(bool upper, bool get_desperate, ColPartitionGrid *grid);
|
||||
void RefinePartnersInternal(bool upper, bool get_desperate,
|
||||
ColPartitionGrid *grid);
|
||||
// Restricts the partners to only desirable types. For text and BRT_HLINE this
|
||||
// means the same type_ , and for image types it means any image type.
|
||||
void RefinePartnersByType(bool upper, ColPartition_CLIST *partners);
|
||||
@ -736,7 +753,8 @@ private:
|
||||
// is set, indicating that the textlines probably need to be regenerated
|
||||
// by aggressive line fitting/splitting, as there are probably vertically
|
||||
// joined blobs that cross textlines.
|
||||
void RefineTextPartnersByMerge(bool upper, bool desperate, ColPartition_CLIST *partners,
|
||||
void RefineTextPartnersByMerge(bool upper, bool desperate,
|
||||
ColPartition_CLIST *partners,
|
||||
ColPartitionGrid *grid);
|
||||
// Keep the partner with the biggest overlap.
|
||||
void RefinePartnersByOverlap(bool upper, ColPartition_CLIST *partners);
|
||||
@ -747,12 +765,14 @@ private:
|
||||
// Smoothes the spacings in the list into groups of equal linespacing.
|
||||
// resolution is the resolution of the original image, used as a basis
|
||||
// for thresholds in change of spacing. page_height is in pixels.
|
||||
static void SmoothSpacings(int resolution, int page_height, ColPartition_LIST *parts);
|
||||
static void SmoothSpacings(int resolution, int page_height,
|
||||
ColPartition_LIST *parts);
|
||||
|
||||
// Returns true if the parts array of pointers to partitions matches the
|
||||
// condition for a spacing blip. See SmoothSpacings for what this means
|
||||
// and how it is used.
|
||||
static bool OKSpacingBlip(int resolution, int median_spacing, ColPartition **parts, int offset);
|
||||
static bool OKSpacingBlip(int resolution, int median_spacing,
|
||||
ColPartition **parts, int offset);
|
||||
|
||||
// Returns true if both the top and bottom spacings of this match the given
|
||||
// spacing to within suitable margins dictated by the image resolution.
|
||||
@ -765,7 +785,8 @@ private:
|
||||
// Returns true if the sum spacing of this and other match the given
|
||||
// spacing (or twice the given spacing) to within a suitable margin dictated
|
||||
// by the image resolution.
|
||||
bool SummedSpacingOK(const ColPartition &other, int spacing, int resolution) const;
|
||||
bool SummedSpacingOK(const ColPartition &other, int spacing,
|
||||
int resolution) const;
|
||||
|
||||
// Returns a suitable spacing margin that can be applied to bottoms of
|
||||
// text lines, based on the resolution and the stored side_step_.
|
||||
@ -792,7 +813,8 @@ private:
|
||||
// rightmost right bounding box edge.
|
||||
// TODO(rays) Not good enough. Needs improving to tightly wrap text in both
|
||||
// directions, and to loosely wrap images.
|
||||
static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start, ICOORD *end);
|
||||
static void RightEdgeRun(ColPartition_IT *part_it, ICOORD *start,
|
||||
ICOORD *end);
|
||||
|
||||
// The margins are determined by the position of the nearest vertically
|
||||
// overlapping neighbour to the side. They indicate the maximum extent
|
||||
@ -893,7 +915,8 @@ private:
|
||||
};
|
||||
|
||||
// Typedef it now in case it becomes a class later.
|
||||
using ColPartitionGridSearch = GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>;
|
||||
using ColPartitionGridSearch =
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>;
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "imagefind.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -63,12 +64,15 @@ const double kMaxPartitionSpacing = 1.75;
|
||||
// decision in GridSmoothNeighbour.
|
||||
const int kSmoothDecisionMargin = 4;
|
||||
|
||||
ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
|
||||
: BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize, bleft, tright) {}
|
||||
ColPartitionGrid::ColPartitionGrid(int gridsize, const ICOORD &bleft,
|
||||
const ICOORD &tright)
|
||||
: BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(
|
||||
gridsize, bleft, tright) {}
|
||||
|
||||
// Handles a click event in a display window.
|
||||
void ColPartitionGrid::HandleClick(int x, int y) {
|
||||
BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
|
||||
BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x,
|
||||
y);
|
||||
// Run a radial search for partitions that overlap.
|
||||
ColPartitionGridSearch radsearch(this);
|
||||
radsearch.SetUniqueMode(true);
|
||||
@ -93,8 +97,9 @@ void ColPartitionGrid::HandleClick(int x, int y) {
|
||||
// true, then the partitions are merged.
|
||||
// Both callbacks are deleted before returning.
|
||||
void ColPartitionGrid::Merges(
|
||||
std::function<bool(ColPartition *, TBOX *)> box_cb,
|
||||
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb) {
|
||||
const std::function<bool(ColPartition *, TBOX *)> &box_cb,
|
||||
const std::function<bool(const ColPartition *, const ColPartition *)>
|
||||
&confirm_cb) {
|
||||
// Iterate the ColPartitions in the grid.
|
||||
ColPartitionGridSearch gsearch(this);
|
||||
gsearch.StartFullSearch();
|
||||
@ -112,8 +117,9 @@ void ColPartitionGrid::Merges(
|
||||
// true, then the partitions are merged.
|
||||
// Returns true if the partition is consumed by one or more merges.
|
||||
bool ColPartitionGrid::MergePart(
|
||||
std::function<bool(ColPartition *, TBOX *)> box_cb,
|
||||
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
|
||||
const std::function<bool(ColPartition *, TBOX *)> &box_cb,
|
||||
const std::function<bool(const ColPartition *, const ColPartition *)>
|
||||
&confirm_cb,
|
||||
ColPartition *part) {
|
||||
if (part->IsUnMergeableType()) {
|
||||
return false;
|
||||
@ -138,12 +144,13 @@ bool ColPartitionGrid::MergePart(
|
||||
FindMergeCandidates(part, box, debug, &merge_candidates);
|
||||
// Find the best merge candidate based on minimal overlap increase.
|
||||
int overlap_increase;
|
||||
ColPartition *neighbour =
|
||||
BestMergeCandidate(part, &merge_candidates, debug, confirm_cb, &overlap_increase);
|
||||
ColPartition *neighbour = BestMergeCandidate(part, &merge_candidates, debug,
|
||||
confirm_cb, &overlap_increase);
|
||||
if (neighbour != nullptr && overlap_increase <= 0) {
|
||||
if (debug) {
|
||||
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", part->HCoreOverlap(*neighbour),
|
||||
part->VCoreOverlap(*neighbour), overlap_increase);
|
||||
tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
|
||||
part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
|
||||
overlap_increase);
|
||||
}
|
||||
// Looks like a good candidate so merge it.
|
||||
RemoveBBox(neighbour);
|
||||
@ -171,7 +178,8 @@ bool ColPartitionGrid::MergePart(
|
||||
// In general we only want to merge partitions that look like they
|
||||
// are on the same text line, ie their median limits overlap, but we have
|
||||
// to make exceptions for diacritics and stray punctuation.
|
||||
static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candidate, bool debug) {
|
||||
static bool OKMergeCandidate(const ColPartition *part,
|
||||
const ColPartition *candidate, bool debug) {
|
||||
const TBOX &part_box = part->bounding_box();
|
||||
if (candidate == part) {
|
||||
return false; // Ignore itself.
|
||||
@ -205,7 +213,8 @@ static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candi
|
||||
}
|
||||
// Candidates must either overlap in median y,
|
||||
// or part or candidate must be an acceptable diacritic.
|
||||
if (!part->VSignificantCoreOverlap(*candidate) && !part->OKDiacriticMerge(*candidate, debug) &&
|
||||
if (!part->VSignificantCoreOverlap(*candidate) &&
|
||||
!part->OKDiacriticMerge(*candidate, debug) &&
|
||||
!candidate->OKDiacriticMerge(*part, debug)) {
|
||||
if (debug) {
|
||||
tprintf("Candidate fails overlap and diacritic tests!\n");
|
||||
@ -221,7 +230,8 @@ static bool OKMergeCandidate(const ColPartition *part, const ColPartition *candi
|
||||
// the overlap with them uncombined.
|
||||
// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
|
||||
// as the pixel overlap limit. merge1 and merge2 must both be non-nullptr.
|
||||
static int IncreaseInOverlap(const ColPartition *merge1, const ColPartition *merge2, int ok_overlap,
|
||||
static int IncreaseInOverlap(const ColPartition *merge1,
|
||||
const ColPartition *merge2, int ok_overlap,
|
||||
ColPartition_CLIST *parts) {
|
||||
ASSERT_HOST(merge1 != nullptr && merge2 != nullptr);
|
||||
int total_area = 0;
|
||||
@ -236,7 +246,8 @@ static int IncreaseInOverlap(const ColPartition *merge1, const ColPartition *mer
|
||||
TBOX part_box = part->bounding_box();
|
||||
// Compute the overlap of the merged box with part.
|
||||
int overlap_area = part_box.intersection(merged_box).area();
|
||||
if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) {
|
||||
if (overlap_area > 0 &&
|
||||
!part->OKMergeOverlap(*merge1, *merge2, ok_overlap, false)) {
|
||||
total_area += overlap_area;
|
||||
// Subtract the overlap of merge1 and merge2 individually.
|
||||
overlap_area = part_box.intersection(merge1->bounding_box()).area();
|
||||
@ -289,7 +300,8 @@ static bool TestCompatibleCandidates(const ColPartition &part, bool debug,
|
||||
ColPartition_C_IT it2(it);
|
||||
for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
|
||||
ColPartition *candidate2 = it2.data();
|
||||
if (candidate2 != candidate && !OKMergeCandidate(candidate, candidate2, false)) {
|
||||
if (candidate2 != candidate &&
|
||||
!OKMergeCandidate(candidate, candidate2, false)) {
|
||||
if (debug) {
|
||||
tprintf("NC overlap failed:Candidate:");
|
||||
candidate2->bounding_box().print();
|
||||
@ -341,7 +353,8 @@ int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid **overlap_grid) {
|
||||
// Finds all the ColPartitions in the grid that overlap with the given
|
||||
// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
|
||||
// Any partition equal to not_this (may be nullptr) is excluded.
|
||||
void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this,
|
||||
void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box,
|
||||
const ColPartition *not_this,
|
||||
ColPartition_CLIST *parts) {
|
||||
ColPartitionGridSearch rsearch(this);
|
||||
rsearch.StartRectSearch(box);
|
||||
@ -396,7 +409,8 @@ void ColPartitionGrid::FindOverlappingPartitions(const TBOX &box, const ColParti
|
||||
// in overlap, or tightly spaced text would end up in bits.
|
||||
ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
const ColPartition *part, ColPartition_CLIST *candidates, bool debug,
|
||||
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
|
||||
const std::function<bool(const ColPartition *, const ColPartition *)>
|
||||
&confirm_cb,
|
||||
int *overlap_increase) {
|
||||
if (overlap_increase != nullptr) {
|
||||
*overlap_increase = 0;
|
||||
@ -404,7 +418,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
if (candidates->empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
|
||||
int ok_overlap =
|
||||
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
|
||||
// The best neighbour to merge with is the one that causes least
|
||||
// total pairwise overlap among all the neighbours.
|
||||
// If more than one offers the same total overlap, choose the one
|
||||
@ -424,8 +439,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
// we need anything that might be overlapped by the merged box.
|
||||
FindOverlappingPartitions(full_box, part, &neighbours);
|
||||
if (debug) {
|
||||
tprintf("Finding best merge candidate from %d, %d neighbours for box:", candidates->length(),
|
||||
neighbours.length());
|
||||
tprintf("Finding best merge candidate from %d, %d neighbours for box:",
|
||||
candidates->length(), neighbours.length());
|
||||
part_box.print();
|
||||
}
|
||||
// If the best increase in overlap is positive, then we also check the
|
||||
@ -434,7 +449,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
// non-candidate overlap is better than the best overlap, then return
|
||||
// the worst non-candidate overlap instead.
|
||||
ColPartition_CLIST non_candidate_neighbours;
|
||||
non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, &neighbours, candidates);
|
||||
non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
|
||||
&neighbours, candidates);
|
||||
int worst_nc_increase = 0;
|
||||
int best_increase = INT32_MAX;
|
||||
int best_area = 0;
|
||||
@ -454,8 +470,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
best_increase = increase;
|
||||
best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
|
||||
if (debug) {
|
||||
tprintf("New best merge candidate has increase %d, area %d, over box:", increase,
|
||||
best_area);
|
||||
tprintf("New best merge candidate has increase %d, area %d, over box:",
|
||||
increase, best_area);
|
||||
full_box.print();
|
||||
candidate->Print();
|
||||
}
|
||||
@ -466,7 +482,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
best_candidate = candidate;
|
||||
}
|
||||
}
|
||||
increase = IncreaseInOverlap(part, candidate, ok_overlap, &non_candidate_neighbours);
|
||||
increase = IncreaseInOverlap(part, candidate, ok_overlap,
|
||||
&non_candidate_neighbours);
|
||||
if (increase > worst_nc_increase) {
|
||||
worst_nc_increase = increase;
|
||||
}
|
||||
@ -478,7 +495,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
// but only if each candidate is either a good diacritic merge with part,
|
||||
// or an ok merge candidate with all the others.
|
||||
// See TestCompatibleCandidates for more explanation and a picture.
|
||||
if (worst_nc_increase < best_increase && TestCompatibleCandidates(*part, debug, candidates)) {
|
||||
if (worst_nc_increase < best_increase &&
|
||||
TestCompatibleCandidates(*part, debug, candidates)) {
|
||||
best_increase = worst_nc_increase;
|
||||
}
|
||||
}
|
||||
@ -490,7 +508,8 @@ ColPartition *ColPartitionGrid::BestMergeCandidate(
|
||||
|
||||
// Helper to remove the given box from the given partition, put it in its
|
||||
// own partition, and add to the partition list.
|
||||
static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, ColPartition_LIST *part_list) {
|
||||
static void RemoveBadBox(BLOBNBOX *box, ColPartition *part,
|
||||
ColPartition_LIST *part_list) {
|
||||
part->RemoveBox(box);
|
||||
ColPartition::MakeBigPartition(box, part_list);
|
||||
}
|
||||
@ -501,8 +520,10 @@ static void RemoveBadBox(BLOBNBOX *box, ColPartition *part, ColPartition_LIST *p
|
||||
// Blobs that cause overlaps get removed, put in individual partitions
|
||||
// and added to the big_parts list. They are most likely characters on
|
||||
// 2 textlines that touch, or something big like a dropcap.
|
||||
void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts) {
|
||||
int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
|
||||
void ColPartitionGrid::SplitOverlappingPartitions(
|
||||
ColPartition_LIST *big_parts) {
|
||||
int ok_overlap =
|
||||
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
|
||||
// Iterate the ColPartitions in the grid.
|
||||
ColPartitionGridSearch gsearch(this);
|
||||
gsearch.StartFullSearch();
|
||||
@ -534,7 +555,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
|
||||
BLOBNBOX *excluded = part->BiggestBox();
|
||||
TBOX shrunken = part->BoundsWithoutBox(excluded);
|
||||
if (!shrunken.overlap(neighbour_box) &&
|
||||
excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) {
|
||||
excluded->bounding_box().height() >
|
||||
kBigPartSizeRatio * shrunken.height()) {
|
||||
// Removing the biggest box fixes the overlap, so do it!
|
||||
gsearch.RemoveBBox();
|
||||
RemoveBadBox(excluded, part, big_parts);
|
||||
@ -550,7 +572,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
|
||||
BLOBNBOX *excluded = neighbour->BiggestBox();
|
||||
TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
|
||||
if (!shrunken.overlap(box) &&
|
||||
excluded->bounding_box().height() > kBigPartSizeRatio * shrunken.height()) {
|
||||
excluded->bounding_box().height() >
|
||||
kBigPartSizeRatio * shrunken.height()) {
|
||||
// Removing the biggest box fixes the overlap, so do it!
|
||||
rsearch.RemoveBBox();
|
||||
RemoveBadBox(excluded, neighbour, big_parts);
|
||||
@ -562,7 +585,8 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
|
||||
int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
|
||||
int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
|
||||
ColPartition *right_part = nullptr;
|
||||
if (neighbour_overlap_count <= part_overlap_count || part->IsSingleton()) {
|
||||
if (neighbour_overlap_count <= part_overlap_count ||
|
||||
part->IsSingleton()) {
|
||||
// Try to split the neighbour to reduce overlap.
|
||||
BLOBNBOX *split_blob = neighbour->OverlapSplitBlob(box);
|
||||
if (split_blob != nullptr) {
|
||||
@ -608,15 +632,18 @@ void ColPartitionGrid::SplitOverlappingPartitions(ColPartition_LIST *big_parts)
|
||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||
// into images.
|
||||
// Returns true if anything was changed.
|
||||
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map,
|
||||
const TBOX &im_box, const FCOORD &rotation) {
|
||||
bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type,
|
||||
Image nontext_map,
|
||||
const TBOX &im_box,
|
||||
const FCOORD &rotation) {
|
||||
// Iterate the ColPartitions in the grid.
|
||||
ColPartitionGridSearch gsearch(this);
|
||||
gsearch.StartFullSearch();
|
||||
ColPartition *part;
|
||||
bool any_changed = false;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) {
|
||||
if (part->flow() != source_type ||
|
||||
BLOBNBOX::IsLineType(part->blob_type())) {
|
||||
continue;
|
||||
}
|
||||
const TBOX &box = part->bounding_box();
|
||||
@ -658,7 +685,8 @@ void ColPartitionGrid::ReflectInYAxis() {
|
||||
// it into proper blocks or columns.
|
||||
// TODO(rays) some kind of sort function would be useful and probably better
|
||||
// than the default here, which is to sort by order of the grid search.
|
||||
void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
|
||||
void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks,
|
||||
TO_BLOCK_LIST *to_blocks) {
|
||||
TO_BLOCK_IT to_block_it(to_blocks);
|
||||
BLOCK_IT block_it(blocks);
|
||||
// All partitions will be put on this list and deleted on return.
|
||||
@ -672,8 +700,10 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LI
|
||||
part_it.add_after_then_move(part);
|
||||
// The partition has to be at least vaguely like text.
|
||||
BlobRegionType blob_type = part->blob_type();
|
||||
if (BLOBNBOX::IsTextType(blob_type) || (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
|
||||
PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT;
|
||||
if (BLOBNBOX::IsTextType(blob_type) ||
|
||||
(blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
|
||||
PolyBlockType type =
|
||||
blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT : PT_FLOWING_TEXT;
|
||||
// Get metrics from the row that will be used for the block.
|
||||
TBOX box = part->bounding_box();
|
||||
int median_width = part->median_width();
|
||||
@ -685,7 +715,8 @@ void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LI
|
||||
part->DeleteBoxes();
|
||||
continue;
|
||||
}
|
||||
auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), box.right(), box.top());
|
||||
auto *block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
|
||||
box.right(), box.top());
|
||||
block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
|
||||
auto *to_block = new TO_BLOCK(block);
|
||||
TO_ROW_IT row_it(to_block->get_rows());
|
||||
@ -780,7 +811,8 @@ bool ColPartitionGrid::MakeColPartSets(PartSetVector *part_sets) {
|
||||
bool any_parts_found = false;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
BlobRegionType blob_type = part->blob_type();
|
||||
if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
|
||||
if (blob_type != BRT_NOISE &&
|
||||
(blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
|
||||
int grid_x, grid_y;
|
||||
const TBOX &part_box = part->bounding_box();
|
||||
GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
|
||||
@ -815,11 +847,13 @@ ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) {
|
||||
ColPartition *part;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
BlobRegionType blob_type = part->blob_type();
|
||||
if (blob_type != BRT_NOISE && (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
|
||||
if (blob_type != BRT_NOISE &&
|
||||
(blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
|
||||
// Consider for single column.
|
||||
BlobTextFlowType flow = part->flow();
|
||||
if ((blob_type == BRT_TEXT && (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
|
||||
flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
|
||||
if ((blob_type == BRT_TEXT &&
|
||||
(flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
|
||||
flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
|
||||
blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
|
||||
if (single_column_part == nullptr) {
|
||||
single_column_part = part->ShallowCopy();
|
||||
@ -841,7 +875,7 @@ ColPartitionSet *ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) {
|
||||
if (single_column_part != nullptr) {
|
||||
// Make a ColPartitionSet out of the single_column_part as a candidate
|
||||
// for the single column case.
|
||||
single_column_part->SetColumnGoodness(cb);
|
||||
single_column_part->SetColumnGoodness(std::move(cb));
|
||||
return new ColPartitionSet(single_column_part);
|
||||
}
|
||||
return nullptr;
|
||||
@ -923,7 +957,8 @@ void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST *im_blobs) {
|
||||
|
||||
// The boxes within the partitions have changed (by deskew) so recompute
|
||||
// the bounds of all the partitions and reinsert them into the grid.
|
||||
void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft, const ICOORD &tright,
|
||||
void ColPartitionGrid::RecomputeBounds(int gridsize, const ICOORD &bleft,
|
||||
const ICOORD &tright,
|
||||
const ICOORD &vertical) {
|
||||
ColPartition_LIST saved_parts;
|
||||
ColPartition_IT part_it(&saved_parts);
|
||||
@ -957,7 +992,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) {
|
||||
ColPartition *part;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
// Set up a rectangle search x-bounded by the column and y by the part.
|
||||
ColPartitionSet *columns = best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr;
|
||||
ColPartitionSet *columns =
|
||||
best_columns != nullptr ? best_columns[gsearch.GridY()] : nullptr;
|
||||
FindPartitionMargins(columns, part);
|
||||
const TBOX &box = part->bounding_box();
|
||||
if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
|
||||
@ -972,7 +1008,8 @@ void ColPartitionGrid::GridFindMargins(ColPartitionSet **best_columns) {
|
||||
// best_columns, which may be nullptr, is an array of pointers indicating the
|
||||
// column set at each y-coordinate in the grid.
|
||||
// best_columns is usually the best_columns_ member of ColumnFinder.
|
||||
void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts) {
|
||||
void ColPartitionGrid::ListFindMargins(ColPartitionSet **best_columns,
|
||||
ColPartition_LIST *parts) {
|
||||
ColPartition_IT part_it(parts);
|
||||
for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
|
||||
ColPartition *part = part_it.data();
|
||||
@ -1050,15 +1087,18 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
if (part->IsImageType()) {
|
||||
const TBOX &part_box = part->bounding_box();
|
||||
bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom());
|
||||
bool debug =
|
||||
AlignedBlob::WithinTestRegion(2, part_box.left(), part_box.bottom());
|
||||
ColPartition *best_caption = nullptr;
|
||||
int best_dist = 0; // Distance to best_caption.
|
||||
int best_upper = 0; // Direction of best_caption.
|
||||
// Handle both lower and upper directions.
|
||||
for (int upper = 0; upper < 2; ++upper) {
|
||||
ColPartition_C_IT partner_it(upper ? part->upper_partners() : part->lower_partners());
|
||||
ColPartition_C_IT partner_it(upper ? part->upper_partners()
|
||||
: part->lower_partners());
|
||||
// If there are no image partners, then this direction is ok.
|
||||
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) {
|
||||
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
|
||||
partner_it.forward()) {
|
||||
ColPartition *partner = partner_it.data();
|
||||
if (partner->IsImageType()) {
|
||||
break;
|
||||
@ -1068,7 +1108,8 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
continue;
|
||||
}
|
||||
// Find the nearest totally overlapping text partner.
|
||||
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); partner_it.forward()) {
|
||||
for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
|
||||
partner_it.forward()) {
|
||||
ColPartition *partner = partner_it.data();
|
||||
if (!partner->IsTextType() || partner->type() == PT_TABLE) {
|
||||
continue;
|
||||
@ -1080,7 +1121,8 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
tprintf("Considering partner:");
|
||||
partner_box.print();
|
||||
}
|
||||
if (partner_box.left() >= part_box.left() && partner_box.right() <= part_box.right()) {
|
||||
if (partner_box.left() >= part_box.left() &&
|
||||
partner_box.right() <= part_box.right()) {
|
||||
int dist = partner_box.y_gap(part_box);
|
||||
if (best_caption == nullptr || dist < best_dist) {
|
||||
best_dist = dist;
|
||||
@ -1106,7 +1148,8 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
ColPartition *end_partner = nullptr;
|
||||
ColPartition *next_partner = nullptr;
|
||||
for (ColPartition *partner = best_caption;
|
||||
partner != nullptr && line_count <= kMaxCaptionLines; partner = next_partner) {
|
||||
partner != nullptr && line_count <= kMaxCaptionLines;
|
||||
partner = next_partner) {
|
||||
if (!partner->IsTextType()) {
|
||||
end_partner = partner;
|
||||
break;
|
||||
@ -1115,7 +1158,8 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
total_height += partner->bounding_box().height();
|
||||
next_partner = partner->SingletonPartner(best_upper);
|
||||
if (next_partner != nullptr) {
|
||||
int gap = partner->bounding_box().y_gap(next_partner->bounding_box());
|
||||
int gap =
|
||||
partner->bounding_box().y_gap(next_partner->bounding_box());
|
||||
if (gap > biggest_gap) {
|
||||
biggest_gap = gap;
|
||||
end_partner = next_partner;
|
||||
@ -1132,8 +1176,8 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
}
|
||||
}
|
||||
if (debug) {
|
||||
tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", line_count,
|
||||
biggest_gap, smallest_gap, mean_height);
|
||||
tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
|
||||
line_count, biggest_gap, smallest_gap, mean_height);
|
||||
if (end_partner != nullptr) {
|
||||
tprintf("End partner:");
|
||||
end_partner->bounding_box().print();
|
||||
@ -1144,7 +1188,8 @@ void ColPartitionGrid::FindFigureCaptions() {
|
||||
}
|
||||
if (line_count <= kMaxCaptionLines) {
|
||||
// This is a qualified caption. Mark the text as caption.
|
||||
for (ColPartition *partner = best_caption; partner != nullptr && partner != end_partner;
|
||||
for (ColPartition *partner = best_caption;
|
||||
partner != nullptr && partner != end_partner;
|
||||
partner = next_partner) {
|
||||
partner->set_type(PT_CAPTION_TEXT);
|
||||
partner->SetBlobTypes();
|
||||
@ -1232,7 +1277,8 @@ void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition *part) {
|
||||
|
||||
// Finds the best partner in the given direction for the given partition.
|
||||
// Stores the result with AddPartner.
|
||||
void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, ColPartition *part) {
|
||||
void ColPartitionGrid::FindVPartitionPartners(bool to_the_left,
|
||||
ColPartition *part) {
|
||||
if (part->type() == PT_NOISE) {
|
||||
return; // Noise is not allowed to partner anything.
|
||||
}
|
||||
@ -1292,7 +1338,8 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
|
||||
gsearch.StartFullSearch();
|
||||
ColPartition *part;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate, this);
|
||||
part->RefinePartners(static_cast<PolyBlockType>(type), get_desperate,
|
||||
this);
|
||||
// Iterator may have been messed up by a merge.
|
||||
gsearch.RepositionIterator();
|
||||
}
|
||||
@ -1304,9 +1351,11 @@ void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
|
||||
// Finds and returns a list of candidate ColPartitions to merge with part.
|
||||
// The candidates must overlap search_box, and when merged must not
|
||||
// overlap any other partitions that are not overlapped by each individually.
|
||||
void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX &search_box,
|
||||
bool debug, ColPartition_CLIST *candidates) {
|
||||
int ok_overlap = static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
|
||||
void ColPartitionGrid::FindMergeCandidates(const ColPartition *part,
|
||||
const TBOX &search_box, bool debug,
|
||||
ColPartition_CLIST *candidates) {
|
||||
int ok_overlap =
|
||||
static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
|
||||
const TBOX &part_box = part->bounding_box();
|
||||
// Now run the rect search.
|
||||
ColPartitionGridSearch rsearch(this);
|
||||
@ -1393,7 +1442,8 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition *part, const TBOX
|
||||
// into images.
|
||||
// Returns true if the partition was changed.
|
||||
bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug, ColPartition *part) {
|
||||
const FCOORD &rerotation, bool debug,
|
||||
ColPartition *part) {
|
||||
const TBOX &part_box = part->bounding_box();
|
||||
if (debug) {
|
||||
tprintf("Smooothing part at:");
|
||||
@ -1409,8 +1459,8 @@ bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
|
||||
for (int d = 0; d < BND_COUNT; ++d) {
|
||||
int dist;
|
||||
auto dir = static_cast<BlobNeighbourDir>(d);
|
||||
BlobRegionType type =
|
||||
SmoothInOneDirection(dir, nontext_map, im_box, rerotation, debug, *part, &dist);
|
||||
BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
|
||||
rerotation, debug, *part, &dist);
|
||||
if (debug) {
|
||||
tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
|
||||
}
|
||||
@ -1459,8 +1509,9 @@ bool ColPartitionGrid::SmoothRegionType(Image nontext_map, const TBOX &im_box,
|
||||
// Sets up a search box based on the part_box, padded in all directions
|
||||
// except direction. Also setup dist_scaling to weight x,y distances according
|
||||
// to the given direction.
|
||||
static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, const TBOX &part_box,
|
||||
int min_padding, TBOX *search_box, ICOORD *dist_scaling) {
|
||||
static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
|
||||
const TBOX &part_box, int min_padding,
|
||||
TBOX *search_box, ICOORD *dist_scaling) {
|
||||
*search_box = part_box;
|
||||
// Generate a pad value based on the min dimension of part_box, but at least
|
||||
// min_padding and then scaled by kMaxPadFactor.
|
||||
@ -1511,20 +1562,21 @@ enum NeighbourPartitionType {
|
||||
// partitions that makes a decisive result (if any) and returns the type
|
||||
// and the distance of the collection. If there are any pixels in the
|
||||
// nontext_map, then the decision is biased towards image.
|
||||
BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
|
||||
const TBOX &im_box, const FCOORD &rerotation,
|
||||
bool debug, const ColPartition &part,
|
||||
int *best_distance) {
|
||||
BlobRegionType ColPartitionGrid::SmoothInOneDirection(
|
||||
BlobNeighbourDir direction, Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug, const ColPartition &part,
|
||||
int *best_distance) {
|
||||
// Set up a rectangle search bounded by the part.
|
||||
const TBOX &part_box = part.bounding_box();
|
||||
TBOX search_box;
|
||||
ICOORD dist_scaling;
|
||||
ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box, &dist_scaling);
|
||||
bool image_region =
|
||||
ImageFind::CountPixelsInRotatedBox(search_box, im_box, rerotation, nontext_map) > 0;
|
||||
ComputeSearchBoxAndScaling(direction, part_box, gridsize(), &search_box,
|
||||
&dist_scaling);
|
||||
bool image_region = ImageFind::CountPixelsInRotatedBox(
|
||||
search_box, im_box, rerotation, nontext_map) > 0;
|
||||
std::vector<int> dists[NPT_COUNT];
|
||||
AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box, rerotation, debug,
|
||||
dists);
|
||||
AccumulatePartDistances(part, dist_scaling, search_box, nontext_map, im_box,
|
||||
rerotation, debug, dists);
|
||||
// By iteratively including the next smallest distance across the vectors,
|
||||
// (as in a merge sort) we can use the vector indices as counts of each type
|
||||
// and find the nearest set of objects that give us a definite decision.
|
||||
@ -1551,33 +1603,35 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
|
||||
}
|
||||
*best_distance = min_dist;
|
||||
if (debug) {
|
||||
tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n", counts[NPT_HTEXT],
|
||||
counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE],
|
||||
image_bias, min_dist);
|
||||
tprintf("Totals: htext=%u+%u, vtext=%u+%u, image=%u+%u, at dist=%d\n",
|
||||
counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], counts[NPT_VTEXT],
|
||||
counts[NPT_WEAK_VTEXT], counts[NPT_IMAGE], image_bias, min_dist);
|
||||
}
|
||||
// See if we have a decision yet.
|
||||
auto image_count = counts[NPT_IMAGE];
|
||||
auto htext_score =
|
||||
counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - (image_count + counts[NPT_WEAK_VTEXT]);
|
||||
auto vtext_score =
|
||||
counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - (image_count + counts[NPT_WEAK_HTEXT]);
|
||||
auto htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
|
||||
(image_count + counts[NPT_WEAK_VTEXT]);
|
||||
auto vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
|
||||
(image_count + counts[NPT_WEAK_HTEXT]);
|
||||
if (image_count > 0 && image_bias - htext_score >= kSmoothDecisionMargin &&
|
||||
image_bias - vtext_score >= kSmoothDecisionMargin) {
|
||||
*best_distance = dists[NPT_IMAGE][0];
|
||||
if (!dists[NPT_WEAK_VTEXT].empty() && *best_distance > dists[NPT_WEAK_VTEXT][0]) {
|
||||
if (!dists[NPT_WEAK_VTEXT].empty() &&
|
||||
*best_distance > dists[NPT_WEAK_VTEXT][0]) {
|
||||
*best_distance = dists[NPT_WEAK_VTEXT][0];
|
||||
}
|
||||
if (!dists[NPT_WEAK_HTEXT].empty() && *best_distance > dists[NPT_WEAK_HTEXT][0]) {
|
||||
if (!dists[NPT_WEAK_HTEXT].empty() &&
|
||||
*best_distance > dists[NPT_WEAK_HTEXT][0]) {
|
||||
*best_distance = dists[NPT_WEAK_HTEXT][0];
|
||||
}
|
||||
return BRT_POLYIMAGE;
|
||||
}
|
||||
if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && counts[NPT_HTEXT] > 0 &&
|
||||
htext_score >= kSmoothDecisionMargin) {
|
||||
if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
|
||||
counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
|
||||
*best_distance = dists[NPT_HTEXT][0];
|
||||
return BRT_TEXT;
|
||||
} else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && counts[NPT_VTEXT] > 0 &&
|
||||
vtext_score >= kSmoothDecisionMargin) {
|
||||
} else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
|
||||
counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
|
||||
*best_distance = dists[NPT_VTEXT][0];
|
||||
return BRT_VERT_TEXT;
|
||||
}
|
||||
@ -1592,11 +1646,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection(BlobNeighbourDir direction
|
||||
// The nontext_map (+im_box, rerotation) is used to make text invisible if
|
||||
// there is non-text in between.
|
||||
// dists must be an array of vectors of size NPT_COUNT.
|
||||
void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
|
||||
const ICOORD &dist_scaling, const TBOX &search_box,
|
||||
Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug,
|
||||
std::vector<int> *dists) {
|
||||
void ColPartitionGrid::AccumulatePartDistances(
|
||||
const ColPartition &base_part, const ICOORD &dist_scaling,
|
||||
const TBOX &search_box, Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug, std::vector<int> *dists) {
|
||||
const TBOX &part_box = base_part.bounding_box();
|
||||
ColPartitionGridSearch rsearch(this);
|
||||
rsearch.SetUniqueMode(true);
|
||||
@ -1605,14 +1658,16 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
|
||||
// Search for compatible neighbours with a similar strokewidth, but not
|
||||
// on the other side of a tab vector.
|
||||
while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
|
||||
if (neighbour->IsUnMergeableType() || !base_part.ConfirmNoTabViolation(*neighbour) ||
|
||||
if (neighbour->IsUnMergeableType() ||
|
||||
!base_part.ConfirmNoTabViolation(*neighbour) ||
|
||||
neighbour == &base_part) {
|
||||
continue;
|
||||
}
|
||||
TBOX nbox = neighbour->bounding_box();
|
||||
BlobRegionType n_type = neighbour->blob_type();
|
||||
if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
|
||||
!ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, nontext_map)) {
|
||||
!ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
|
||||
nontext_map)) {
|
||||
continue; // Text not visible the other side of image.
|
||||
}
|
||||
if (BLOBNBOX::IsLineType(n_type)) {
|
||||
@ -1673,7 +1728,8 @@ void ColPartitionGrid::AccumulatePartDistances(const ColPartition &base_part,
|
||||
// neighbours that vertically overlap significantly.
|
||||
// columns may be nullptr, and indicates the assigned column structure this
|
||||
// is applicable to part.
|
||||
void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, ColPartition *part) {
|
||||
void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns,
|
||||
ColPartition *part) {
|
||||
// Set up a rectangle search x-bounded by the column and y by the part.
|
||||
TBOX box = part->bounding_box();
|
||||
int y = part->MidY();
|
||||
@ -1693,19 +1749,20 @@ void ColPartitionGrid::FindPartitionMargins(ColPartitionSet *columns, ColPartiti
|
||||
left_margin -= kColumnWidthFactor;
|
||||
right_margin += kColumnWidthFactor;
|
||||
// Search for ColPartitions that reduce the margin.
|
||||
left_margin =
|
||||
FindMargin(box.left() + box.height(), true, left_margin, box.bottom(), box.top(), part);
|
||||
left_margin = FindMargin(box.left() + box.height(), true, left_margin,
|
||||
box.bottom(), box.top(), part);
|
||||
part->set_left_margin(left_margin);
|
||||
// Search for ColPartitions that reduce the margin.
|
||||
right_margin =
|
||||
FindMargin(box.right() - box.height(), false, right_margin, box.bottom(), box.top(), part);
|
||||
right_margin = FindMargin(box.right() - box.height(), false, right_margin,
|
||||
box.bottom(), box.top(), part);
|
||||
part->set_right_margin(right_margin);
|
||||
}
|
||||
|
||||
// Starting at x, and going in the specified direction, up to x_limit, finds
|
||||
// the margin for the given y range by searching sideways,
|
||||
// and ignoring not_this.
|
||||
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, int y_top,
|
||||
int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
|
||||
int y_bottom, int y_top,
|
||||
const ColPartition *not_this) {
|
||||
int height = y_top - y_bottom;
|
||||
// Iterate the ColPartitions in the grid.
|
||||
|
@ -47,16 +47,18 @@ public:
|
||||
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
||||
// true, then the partitions are merged.
|
||||
// Both callbacks are deleted before returning.
|
||||
void Merges(std::function<bool(ColPartition *, TBOX *)> box_cb,
|
||||
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb);
|
||||
void Merges(const std::function<bool(ColPartition *, TBOX *)> &box_cb,
|
||||
const std::function<bool(const ColPartition *,
|
||||
const ColPartition *)> &confirm_cb);
|
||||
|
||||
// For the given partition, calls the box_cb permanent callback
|
||||
// to compute the search box, searches the box, and if a candidate is found,
|
||||
// calls the confirm_cb to check any more rules. If the confirm_cb returns
|
||||
// true, then the partitions are merged.
|
||||
// Returns true if the partition is consumed by one or more merges.
|
||||
bool MergePart(std::function<bool(ColPartition *, TBOX *)> box_cb,
|
||||
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
|
||||
bool MergePart(const std::function<bool(ColPartition *, TBOX *)> &box_cb,
|
||||
const std::function<bool(const ColPartition *,
|
||||
const ColPartition *)> &confirm_cb,
|
||||
ColPartition *part);
|
||||
|
||||
// Computes and returns the total overlap of all partitions in the grid.
|
||||
@ -78,7 +80,8 @@ public:
|
||||
// See colpartitiongrid.cpp for a diagram.
|
||||
ColPartition *BestMergeCandidate(
|
||||
const ColPartition *part, ColPartition_CLIST *candidates, bool debug,
|
||||
std::function<bool(const ColPartition *, const ColPartition *)> confirm_cb,
|
||||
const std::function<bool(const ColPartition *, const ColPartition *)>
|
||||
&confirm_cb,
|
||||
int *overlap_increase);
|
||||
|
||||
// Split partitions where it reduces overlap between their bounding boxes.
|
||||
@ -98,8 +101,8 @@ public:
|
||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||
// into images.
|
||||
// Returns true if anything was changed.
|
||||
bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation);
|
||||
bool GridSmoothNeighbours(BlobTextFlowType source_type, Image nontext_map,
|
||||
const TBOX &im_box, const FCOORD &rerotation);
|
||||
|
||||
// Reflects the grid and its colpartitions in the y-axis, assuming that
|
||||
// all blob boxes have already been done.
|
||||
@ -150,7 +153,8 @@ public:
|
||||
|
||||
// Improves the margins of the ColPartitions in the list by calling
|
||||
// FindPartitionMargins on each.
|
||||
void ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts);
|
||||
void ListFindMargins(ColPartitionSet **best_columns,
|
||||
ColPartition_LIST *parts);
|
||||
|
||||
// Deletes all the partitions in the grid after disowning all the blobs.
|
||||
void DeleteParts();
|
||||
@ -185,8 +189,8 @@ private:
|
||||
// Finds and returns a list of candidate ColPartitions to merge with part.
|
||||
// The candidates must overlap search_box, and when merged must not
|
||||
// overlap any other partitions that are not overlapped by each individually.
|
||||
void FindMergeCandidates(const ColPartition *part, const TBOX &search_box, bool debug,
|
||||
ColPartition_CLIST *candidates);
|
||||
void FindMergeCandidates(const ColPartition *part, const TBOX &search_box,
|
||||
bool debug, ColPartition_CLIST *candidates);
|
||||
|
||||
// Smoothes the region type/flow type of the given part by looking at local
|
||||
// neighbours and the given image mask. Searches a padded rectangle with the
|
||||
@ -199,7 +203,8 @@ private:
|
||||
// nontext_map, which is used to prevent the spread of text neighbourhoods
|
||||
// into images.
|
||||
// Returns true if the partition was changed.
|
||||
bool SmoothRegionType(Image nontext_map, const TBOX &im_box, const FCOORD &rerotation, bool debug,
|
||||
bool SmoothRegionType(Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug,
|
||||
ColPartition *part);
|
||||
// Executes the search for SmoothRegionType in a single direction.
|
||||
// Creates a bounding box that is padded in all directions except direction,
|
||||
@ -207,17 +212,21 @@ private:
|
||||
// partitions that makes a decisive result (if any) and returns the type
|
||||
// and the distance of the collection. If there are any pixels in the
|
||||
// nontext_map, then the decision is biased towards image.
|
||||
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, Image nontext_map,
|
||||
const TBOX &im_box, const FCOORD &rerotation, bool debug,
|
||||
const ColPartition &part, int *best_distance);
|
||||
BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
|
||||
Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug,
|
||||
const ColPartition &part,
|
||||
int *best_distance);
|
||||
// Counts the partitions in the given search_box by appending the gap
|
||||
// distance (scaled by dist_scaling) of the part from the base_part to the
|
||||
// vector of the appropriate type for the partition. Prior to return, the
|
||||
// vectors in the dists array are sorted in increasing order.
|
||||
// dists must be an array of vectors of size NPT_COUNT.
|
||||
void AccumulatePartDistances(const ColPartition &base_part, const ICOORD &dist_scaling,
|
||||
const TBOX &search_box, Image nontext_map, const TBOX &im_box,
|
||||
const FCOORD &rerotation, bool debug, std::vector<int> *dists);
|
||||
void AccumulatePartDistances(const ColPartition &base_part,
|
||||
const ICOORD &dist_scaling,
|
||||
const TBOX &search_box, Image nontext_map,
|
||||
const TBOX &im_box, const FCOORD &rerotation,
|
||||
bool debug, std::vector<int> *dists);
|
||||
|
||||
// Improves the margins of the ColPartition by searching for
|
||||
// neighbours that vertically overlap significantly.
|
||||
@ -226,8 +235,8 @@ private:
|
||||
// Starting at x, and going in the specified direction, up to x_limit, finds
|
||||
// the margin for the given y range by searching sideways,
|
||||
// and ignoring not_this.
|
||||
int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom, int y_top,
|
||||
const ColPartition *not_this);
|
||||
int FindMargin(int x, bool right_to_left, int x_limit, int y_bottom,
|
||||
int y_top, const ColPartition *not_this);
|
||||
};
|
||||
|
||||
} // namespace tesseract.
|
||||
|
@ -90,7 +90,8 @@ void ColPartitionSet::RelinquishParts() {
|
||||
}
|
||||
|
||||
// Attempt to improve this by adding partitions or expanding partitions.
|
||||
void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets) {
|
||||
void ColPartitionSet::ImproveColumnCandidate(const WidthCallback &cb,
|
||||
PartSetVector *src_sets) {
|
||||
int set_size = src_sets->size();
|
||||
// Iterate over the provided column sets, as each one may have something
|
||||
// to improve this.
|
||||
@ -140,7 +141,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
|
||||
// it was before, so use the tab.
|
||||
part->CopyLeftTab(*col_part, false);
|
||||
part->SetColumnGoodness(cb);
|
||||
} else if (col_box_left < part_left && (box_width_ok || !part_width_ok)) {
|
||||
} else if (col_box_left < part_left &&
|
||||
(box_width_ok || !part_width_ok)) {
|
||||
// The box is leaving the good column metric at least as good as
|
||||
// it was before, so use the box.
|
||||
part->CopyLeftTab(*col_part, true);
|
||||
@ -149,7 +151,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
|
||||
part_left = part->left_key();
|
||||
}
|
||||
if (col_right > part_right &&
|
||||
(part_it.at_last() || part_it.data_relative(1)->left_key() > col_right)) {
|
||||
(part_it.at_last() ||
|
||||
part_it.data_relative(1)->left_key() > col_right)) {
|
||||
// The right edge is better, so we can possibly expand it.
|
||||
int col_box_right = col_part->BoxRightKey();
|
||||
bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
|
||||
@ -159,7 +162,8 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
|
||||
// it was before, so use the tab.
|
||||
part->CopyRightTab(*col_part, false);
|
||||
part->SetColumnGoodness(cb);
|
||||
} else if (col_box_right > part_right && (box_width_ok || !part_width_ok)) {
|
||||
} else if (col_box_right > part_right &&
|
||||
(box_width_ok || !part_width_ok)) {
|
||||
// The box is leaving the good column metric at least as good as
|
||||
// it was before, so use the box.
|
||||
part->CopyRightTab(*col_part, true);
|
||||
@ -173,8 +177,10 @@ void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, PartSetVector *sr
|
||||
|
||||
// If this set is good enough to represent a new partitioning into columns,
|
||||
// add it to the vector of sets, otherwise delete it.
|
||||
void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), bounding_box_.bottom());
|
||||
void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets,
|
||||
const WidthCallback &cb) {
|
||||
bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
|
||||
bounding_box_.bottom());
|
||||
if (debug) {
|
||||
tprintf("Considering new column candidate:\n");
|
||||
Print();
|
||||
@ -222,7 +228,8 @@ void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthC
|
||||
|
||||
// Return true if the partitions in other are all compatible with the columns
|
||||
// in this.
|
||||
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb) {
|
||||
bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other,
|
||||
const WidthCallback &cb) {
|
||||
if (debug) {
|
||||
tprintf("CompatibleColumns testing compatibility\n");
|
||||
Print();
|
||||
@ -288,7 +295,8 @@ bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet *other, Widt
|
||||
if (debug) {
|
||||
int next_right = next_part->bounding_box().right();
|
||||
tprintf("CompatibleColumns false due to 2 parts of good width\n");
|
||||
tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left, next_right);
|
||||
tprintf("part1 %d-%d, part2 %d-%d\n", left, right, next_left,
|
||||
next_right);
|
||||
right_col->Print();
|
||||
}
|
||||
return false;
|
||||
@ -375,7 +383,8 @@ ColPartitionSet *ColPartitionSet::Copy(bool good_only) {
|
||||
}
|
||||
|
||||
// Return the bounding boxes of columns at the given y-range
|
||||
void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments) {
|
||||
void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
|
||||
ColSegment_LIST *segments) {
|
||||
ColPartition_IT it(&parts_);
|
||||
ColSegment_IT col_it(segments);
|
||||
col_it.move_to_last();
|
||||
@ -392,7 +401,8 @@ void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *s
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
|
||||
// Display the edges of the columns at the given y coords.
|
||||
void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win) {
|
||||
void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
|
||||
ScrollView *win) {
|
||||
ColPartition_IT it(&parts_);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColPartition *part = it.data();
|
||||
@ -410,10 +420,9 @@ void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, ScrollView *wi
|
||||
// Column indices are 2n + 1 for real columns (0 based) and even values
|
||||
// represent the gaps in between columns, with 0 being left of the leftmost.
|
||||
// resolution refers to the ppi resolution of the image.
|
||||
ColumnSpanningType ColPartitionSet::SpanningType(int resolution, int left, int right, int height,
|
||||
int y, int left_margin, int right_margin,
|
||||
int *first_col, int *last_col,
|
||||
int *first_spanned_col) {
|
||||
ColumnSpanningType ColPartitionSet::SpanningType(
|
||||
int resolution, int left, int right, int height, int y, int left_margin,
|
||||
int right_margin, int *first_col, int *last_col, int *first_spanned_col) {
|
||||
*first_col = -1;
|
||||
*last_col = -1;
|
||||
*first_spanned_col = -1;
|
||||
@ -505,7 +514,8 @@ ColumnSpanningType ColPartitionSet::SpanningType(int resolution, int left, int r
|
||||
// columns that do not match and start new ones for the new columns in this.
|
||||
// As ColPartitions are turned into BLOCKs, the used ones are put in
|
||||
// used_parts, as they still need to be referenced in the grid.
|
||||
void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution,
|
||||
void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft,
|
||||
const ICOORD &tright, int resolution,
|
||||
ColPartition_LIST *used_parts,
|
||||
WorkingPartSet_LIST *working_set_list) {
|
||||
// Move the input list to a temporary location so we can delete its elements
|
||||
@ -525,11 +535,12 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
|
||||
for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
|
||||
ColPartition *column = col_it.data();
|
||||
// Any existing column to the left of column is completed.
|
||||
while (!src_it.empty() && ((working_set = src_it.data())->column() == nullptr ||
|
||||
working_set->column()->right_key() <= column->left_key())) {
|
||||
while (!src_it.empty() &&
|
||||
((working_set = src_it.data())->column() == nullptr ||
|
||||
working_set->column()->right_key() <= column->left_key())) {
|
||||
src_it.extract();
|
||||
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks,
|
||||
&to_blocks);
|
||||
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
|
||||
&completed_blocks, &to_blocks);
|
||||
delete working_set;
|
||||
src_it.forward();
|
||||
}
|
||||
@ -542,7 +553,8 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
|
||||
// A matching column gets to stay, and first_new_set gets all the
|
||||
// completed_sets.
|
||||
working_set = src_it.empty() ? nullptr : src_it.data();
|
||||
if (working_set != nullptr && working_set->column()->MatchingColumns(*column)) {
|
||||
if (working_set != nullptr &&
|
||||
working_set->column()->MatchingColumns(*column)) {
|
||||
working_set->set_column(column);
|
||||
dest_it.add_after_then_move(src_it.extract());
|
||||
src_it.forward();
|
||||
@ -557,8 +569,8 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
|
||||
// Complete any remaining src working sets.
|
||||
while (!src_it.empty()) {
|
||||
working_set = src_it.extract();
|
||||
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, &completed_blocks,
|
||||
&to_blocks);
|
||||
working_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
|
||||
&completed_blocks, &to_blocks);
|
||||
delete working_set;
|
||||
src_it.forward();
|
||||
}
|
||||
@ -573,8 +585,10 @@ void ColPartitionSet::ChangeWorkColumns(const ICOORD &bleft, const ICOORD &trigh
|
||||
}
|
||||
|
||||
// Accumulate the widths and gaps into the given variables.
|
||||
void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples,
|
||||
int *total_gap, int *gap_samples) {
|
||||
void ColPartitionSet::AccumulateColumnWidthsAndGaps(int *total_width,
|
||||
int *width_samples,
|
||||
int *total_gap,
|
||||
int *gap_samples) {
|
||||
ColPartition_IT it(&parts_);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColPartition *part = it.data();
|
||||
@ -597,8 +611,9 @@ void ColPartitionSet::Print() {
|
||||
tprintf(
|
||||
"Partition set of %d parts, %d good, coverage=%d+%d"
|
||||
" (%d,%d)->(%d,%d)\n",
|
||||
it.length(), good_column_count_, good_coverage_, bad_coverage_, bounding_box_.left(),
|
||||
bounding_box_.bottom(), bounding_box_.right(), bounding_box_.top());
|
||||
it.length(), good_column_count_, good_coverage_, bad_coverage_,
|
||||
bounding_box_.left(), bounding_box_.bottom(), bounding_box_.right(),
|
||||
bounding_box_.top());
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColPartition *part = it.data();
|
||||
part->Print();
|
||||
@ -608,7 +623,8 @@ void ColPartitionSet::Print() {
|
||||
// PRIVATE CODE.
|
||||
|
||||
// Add the given partition to the list in the appropriate place.
|
||||
void ColPartitionSet::AddPartition(ColPartition *new_part, ColPartition_IT *it) {
|
||||
void ColPartitionSet::AddPartition(ColPartition *new_part,
|
||||
ColPartition_IT *it) {
|
||||
AddPartitionCoverageAndBox(*new_part);
|
||||
int new_right = new_part->right_key();
|
||||
if (it->data()->left_key() >= new_right) {
|
||||
|
@ -20,9 +20,9 @@
|
||||
#ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_
|
||||
#define TESSERACT_TEXTORD_COLPARTITIONSET_H_
|
||||
|
||||
#include "colpartition.h" // For ColPartition_LIST.
|
||||
#include "rect.h" // For TBOX.
|
||||
#include "tabvector.h" // For BLOBNBOX_CLIST.
|
||||
#include "colpartition.h" // For ColPartition_LIST.
|
||||
#include "rect.h" // For TBOX.
|
||||
#include "tabvector.h" // For BLOBNBOX_CLIST.
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
@ -71,15 +71,17 @@ public:
|
||||
void RelinquishParts();
|
||||
|
||||
// Attempt to improve this by adding partitions or expanding partitions.
|
||||
void ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets);
|
||||
void ImproveColumnCandidate(const WidthCallback &cb, PartSetVector *src_sets);
|
||||
|
||||
// If this set is good enough to represent a new partitioning into columns,
|
||||
// add it to the vector of sets, otherwise delete it.
|
||||
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb);
|
||||
void AddToColumnSetsIfUnique(PartSetVector *column_sets,
|
||||
const WidthCallback &cb);
|
||||
|
||||
// Return true if the partitions in other are all compatible with the columns
|
||||
// in this.
|
||||
bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb);
|
||||
bool CompatibleColumns(bool debug, ColPartitionSet *other,
|
||||
const WidthCallback &cb);
|
||||
|
||||
// Returns the total width of all blobs in the part_set that do not lie
|
||||
// within an approved column. Used as a cost measure for using this
|
||||
@ -104,20 +106,22 @@ public:
|
||||
// represent the gaps in between columns, with 0 being left of the leftmost.
|
||||
// resolution refers to the ppi resolution of the image. It may be 0 if only
|
||||
// the first_col and last_col are required.
|
||||
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y,
|
||||
int left_margin, int right_margin, int *first_col, int *last_col,
|
||||
int *first_spanned_col);
|
||||
ColumnSpanningType SpanningType(int resolution, int left, int right,
|
||||
int height, int y, int left_margin,
|
||||
int right_margin, int *first_col,
|
||||
int *last_col, int *first_spanned_col);
|
||||
|
||||
// The column_set has changed. Close down all in-progress WorkingPartSets in
|
||||
// columns that do not match and start new ones for the new columns in this.
|
||||
// As ColPartitions are turned into BLOCKs, the used ones are put in
|
||||
// used_parts, as they still need to be referenced in the grid.
|
||||
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution,
|
||||
ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set);
|
||||
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright,
|
||||
int resolution, ColPartition_LIST *used_parts,
|
||||
WorkingPartSet_LIST *working_set);
|
||||
|
||||
// Accumulate the widths and gaps into the given variables.
|
||||
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap,
|
||||
int *gap_samples);
|
||||
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples,
|
||||
int *total_gap, int *gap_samples);
|
||||
|
||||
// Provide debug output for this ColPartitionSet and all the ColPartitions.
|
||||
void Print();
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "underlin.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
namespace tesseract {
|
||||
@ -357,7 +358,7 @@ void compute_page_skew( // get average gradient
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
row = row_it.data();
|
||||
blob_count = row->blob_list()->length();
|
||||
row_err = static_cast<int32_t>(ceil(row->line_error()));
|
||||
row_err = static_cast<int32_t>(std::ceil(row->line_error()));
|
||||
if (row_err <= 0) {
|
||||
row_err = 1;
|
||||
}
|
||||
@ -636,7 +637,7 @@ void delete_non_dropout_rows( // find lines
|
||||
min_y = block_box.bottom() - 1;
|
||||
max_y = block_box.top() + 1;
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
line_index = static_cast<int32_t>(floor(row_it.data()->intercept()));
|
||||
line_index = static_cast<int32_t>(std::floor(row_it.data()->intercept()));
|
||||
if (line_index <= min_y) {
|
||||
min_y = line_index - 1;
|
||||
}
|
||||
@ -668,7 +669,7 @@ void delete_non_dropout_rows( // find lines
|
||||
compute_dropout_distances(&occupation[0], &deltas[0], line_count);
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
row = row_it.data();
|
||||
line_index = static_cast<int32_t>(floor(row->intercept()));
|
||||
line_index = static_cast<int32_t>(std::floor(row->intercept()));
|
||||
distance = deltas[line_index - min_y];
|
||||
if (find_best_dropout_row(row, distance, block->line_spacing / 2, line_index, &row_it,
|
||||
testing_on)) {
|
||||
@ -726,7 +727,7 @@ bool find_best_dropout_row( // find neighbours
|
||||
row_offset = row_inc;
|
||||
do {
|
||||
next_row = row_it->data_relative(row_offset);
|
||||
next_index = static_cast<int32_t>(floor(next_row->intercept()));
|
||||
next_index = static_cast<int32_t>(std::floor(next_row->intercept()));
|
||||
if ((distance < 0 && next_index < line_index &&
|
||||
next_index > line_index + distance + distance) ||
|
||||
(distance >= 0 && next_index > line_index &&
|
||||
@ -774,7 +775,7 @@ TBOX deskew_block_coords( // block box
|
||||
BLOBNBOX *blob; // current blob
|
||||
BLOBNBOX_IT blob_it; // iterator
|
||||
|
||||
length = sqrt(gradient * gradient + 1);
|
||||
length = std::sqrt(gradient * gradient + 1);
|
||||
rotation = FCOORD(1 / length, -gradient / length);
|
||||
for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
|
||||
row = row_it.data();
|
||||
@ -815,7 +816,7 @@ void compute_line_occupation( // project blobs
|
||||
FCOORD rotation; // inverse of skew
|
||||
|
||||
line_count = max_y - min_y + 1;
|
||||
length = sqrt(gradient * gradient + 1);
|
||||
length = std::sqrt(gradient * gradient + 1);
|
||||
rotation = FCOORD(1 / length, -gradient / length);
|
||||
for (line_index = 0; line_index < line_count; line_index++) {
|
||||
deltas[line_index] = 0;
|
||||
@ -1193,7 +1194,7 @@ void compute_row_stats( // find lines
|
||||
row_it.backward();
|
||||
} while (!row_it.at_last());
|
||||
block->key_row = prev_row;
|
||||
block->baseline_offset = fmod(prev_row->parallel_c(), block->line_spacing);
|
||||
block->baseline_offset = std::fmod(prev_row->parallel_c(), block->line_spacing);
|
||||
if (testing_on) {
|
||||
tprintf("Blob based spacing=(%g,%g), offset=%g", block->line_size, block->line_spacing,
|
||||
block->baseline_offset);
|
||||
@ -1237,7 +1238,7 @@ void compute_row_stats( // find lines
|
||||
block->line_spacing = rows[row_index]->spacing;
|
||||
block->max_blob_size = block->line_spacing * textord_excess_blobsize;
|
||||
}
|
||||
block->baseline_offset = fmod(rows[row_index]->intercept(), block->line_spacing);
|
||||
block->baseline_offset = std::fmod(rows[row_index]->intercept(), block->line_spacing);
|
||||
}
|
||||
if (testing_on) {
|
||||
tprintf("\nEstimate line size=%g, spacing=%g, offset=%g\n", block->line_size,
|
||||
@ -1796,7 +1797,7 @@ void separate_underlines(TO_BLOCK *block, // block to do
|
||||
int min_blob_height = static_cast<int>(textord_min_blob_height_fraction * block->line_size + 0.5);
|
||||
|
||||
// length of vector
|
||||
length = sqrt(1 + gradient * gradient);
|
||||
length = std::sqrt(1 + gradient * gradient);
|
||||
g_vec = FCOORD(1 / length, -gradient / length);
|
||||
blob_rotation = FCOORD(rotation.x(), -rotation.y());
|
||||
blob_rotation.rotate(g_vec); // undoing everything
|
||||
@ -2295,7 +2296,7 @@ void assign_blobs_to_rows( // find lines
|
||||
(block->block->pdblk.bounding_box().bottom() + block->block->pdblk.bounding_box().top()) /
|
||||
2.0f;
|
||||
if (gradient != nullptr) {
|
||||
g_length = sqrt(1 + *gradient * *gradient);
|
||||
g_length = std::sqrt(1 + *gradient * *gradient);
|
||||
}
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
if (drawing_skew) {
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "textord.h"
|
||||
#include "tprintf.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <vector> // for std::vector
|
||||
|
||||
#include <algorithm>
|
||||
@ -1450,7 +1451,7 @@ void make_first_xheight( // find xheight
|
||||
for (blobindex = 0; blobindex < blobcount; blobindex++) {
|
||||
int xcenter = (blobcoords[blobindex].left() + blobcoords[blobindex].right()) / 2;
|
||||
float base = baseline->y(xcenter);
|
||||
float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
|
||||
float bottomdiff = std::fabs(base - blobcoords[blobindex].bottom());
|
||||
int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
|
||||
int height = static_cast<int>(blobcoords[blobindex].top() - base + 0.5);
|
||||
if (blobcoords[blobindex].height() > init_lineheight * kMinHeight) {
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include "tablefind.h"
|
||||
|
||||
#include <allheaders.h>
|
||||
@ -157,11 +158,11 @@ void DeleteObject(T *object) {
|
||||
}
|
||||
|
||||
TableFinder::TableFinder()
|
||||
: resolution_(0)
|
||||
, global_median_xheight_(0)
|
||||
, global_median_blob_width_(0)
|
||||
, global_median_ledding_(0)
|
||||
, left_to_right_language_(true) {}
|
||||
: resolution_(0),
|
||||
global_median_xheight_(0),
|
||||
global_median_blob_width_(0),
|
||||
global_median_ledding_(0),
|
||||
left_to_right_language_(true) {}
|
||||
|
||||
TableFinder::~TableFinder() {
|
||||
// ColPartitions and ColSegments created by this class for storage in grids
|
||||
@ -177,7 +178,8 @@ void TableFinder::set_left_to_right_language(bool order) {
|
||||
left_to_right_language_ = order;
|
||||
}
|
||||
|
||||
void TableFinder::Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right) {
|
||||
void TableFinder::Init(int grid_size, const ICOORD &bottom_left,
|
||||
const ICOORD &top_right) {
|
||||
// Initialize clean partitions list and grid
|
||||
clean_part_grid_.Init(grid_size, bottom_left, top_right);
|
||||
leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
|
||||
@ -188,7 +190,8 @@ void TableFinder::Init(int grid_size, const ICOORD &bottom_left, const ICOORD &t
|
||||
|
||||
// Copy cleaned partitions from part_grid_ to clean_part_grid_ and
|
||||
// insert leaders and rulers into the leader_and_ruling_grid_
|
||||
void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block) {
|
||||
void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid,
|
||||
TO_BLOCK *block) {
|
||||
// Calculate stats. This lets us filter partitions in AllowTextPartition()
|
||||
// and filter blobs in AllowBlob().
|
||||
SetGlobalSpacings(grid);
|
||||
@ -255,7 +258,8 @@ void TableFinder::InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
|
||||
}
|
||||
|
||||
// High level function to perform table detection
|
||||
void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_columns,
|
||||
void TableFinder::LocateTables(ColPartitionGrid *grid,
|
||||
ColPartitionSet **all_columns,
|
||||
WidthCallback width_cb, const FCOORD &reskew) {
|
||||
// initialize spacing, neighbors, and columns
|
||||
InitializePartitions(all_columns);
|
||||
@ -264,8 +268,10 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
|
||||
if (textord_show_tables) {
|
||||
ScrollView *table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
|
||||
DisplayColPartitionConnections(table_win, &clean_part_grid_, ScrollView::ORANGE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
|
||||
ScrollView::AQUAMARINE);
|
||||
DisplayColPartitionConnections(table_win, &clean_part_grid_,
|
||||
ScrollView::ORANGE);
|
||||
|
||||
table_win = MakeWindow(100, 300, "Fragmented Text");
|
||||
DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE);
|
||||
@ -339,7 +345,8 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
if (textord_show_tables) {
|
||||
ScrollView *table_win = MakeWindow(1400, 600, "Recognized Tables");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE,
|
||||
ScrollView::BLUE);
|
||||
table_grid_.DisplayBoxes(table_win);
|
||||
}
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
@ -353,7 +360,8 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
if (textord_show_tables) {
|
||||
ScrollView *table_win = MakeWindow(1500, 300, "Detected Tables");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE,
|
||||
ScrollView::BLUE);
|
||||
table_grid_.DisplayBoxes(table_win);
|
||||
}
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
@ -362,7 +370,7 @@ void TableFinder::LocateTables(ColPartitionGrid *grid, ColPartitionSet **all_col
|
||||
// Merge all colpartitions in table regions to make them a single
|
||||
// colpartition and revert types of isolated table cells not
|
||||
// assigned to any table to their original types.
|
||||
MakeTableBlocks(grid, all_columns, width_cb);
|
||||
MakeTableBlocks(grid, all_columns, std::move(width_cb));
|
||||
}
|
||||
// All grids have the same dimensions. The clean_part_grid_ sizes are set from
|
||||
// the part_grid_ that is passed to InsertCleanPartitions, which was the same as
|
||||
@ -452,7 +460,8 @@ void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition *part) {
|
||||
// Look for the next split in the partition.
|
||||
for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
|
||||
const TBOX &box = box_it.data()->bounding_box();
|
||||
if (previous_right != INT32_MIN && box.left() - previous_right > kThreshold) {
|
||||
if (previous_right != INT32_MIN &&
|
||||
box.left() - previous_right > kThreshold) {
|
||||
// We have a split position. Split the partition in two pieces.
|
||||
// Insert the left piece in the grid and keep processing the right.
|
||||
int mid_x = (box.left() + previous_right) / 2;
|
||||
@ -484,7 +493,8 @@ bool TableFinder::AllowTextPartition(const ColPartition &part) const {
|
||||
const int median_area = global_median_xheight_ * global_median_blob_width_;
|
||||
const double kAreaPerBlobRequired = median_area * kAllowTextArea;
|
||||
// Keep comparisons strictly greater to disallow 0!
|
||||
return part.median_height() > kHeightRequired && part.median_width() > kWidthRequired &&
|
||||
return part.median_height() > kHeightRequired &&
|
||||
part.median_width() > kWidthRequired &&
|
||||
part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
|
||||
}
|
||||
|
||||
@ -512,13 +522,15 @@ ScrollView *TableFinder::MakeWindow(int x, int y, const char *window_name) {
|
||||
#endif
|
||||
|
||||
// Make single-column blocks from good_columns_ partitions.
|
||||
void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, ColSegment_LIST *column_blocks) {
|
||||
void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns,
|
||||
ColSegment_LIST *column_blocks) {
|
||||
for (int i = 0; i < gridheight(); ++i) {
|
||||
ColPartitionSet *columns = all_columns[i];
|
||||
if (columns != nullptr) {
|
||||
ColSegment_LIST new_blocks;
|
||||
// Get boxes from the current vertical position on the grid
|
||||
columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(), &new_blocks);
|
||||
columns->GetColumnBoxes(i * gridsize(), (i + 1) * gridsize(),
|
||||
&new_blocks);
|
||||
// Merge the new_blocks boxes into column_blocks if they are well-aligned
|
||||
GroupColumnBlocks(&new_blocks, column_blocks);
|
||||
}
|
||||
@ -526,7 +538,8 @@ void TableFinder::GetColumnBlocks(ColPartitionSet **all_columns, ColSegment_LIST
|
||||
}
|
||||
|
||||
// Merge column segments into the current list if they are well aligned.
|
||||
void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, ColSegment_LIST *column_blocks) {
|
||||
void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks,
|
||||
ColSegment_LIST *column_blocks) {
|
||||
ColSegment_IT src_it(new_blocks);
|
||||
ColSegment_IT dest_it(column_blocks);
|
||||
// iterate through the source list
|
||||
@ -558,8 +571,10 @@ void TableFinder::GroupColumnBlocks(ColSegment_LIST *new_blocks, ColSegment_LIST
|
||||
bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) {
|
||||
int x_margin = 20;
|
||||
int y_margin = 5;
|
||||
return (abs(b1.left() - b2.left()) < x_margin) && (abs(b1.right() - b2.right()) < x_margin) &&
|
||||
(abs(b1.top() - b2.bottom()) < y_margin || abs(b2.top() - b1.bottom()) < y_margin);
|
||||
return (abs(b1.left() - b2.left()) < x_margin) &&
|
||||
(abs(b1.right() - b2.right()) < x_margin) &&
|
||||
(abs(b1.top() - b2.bottom()) < y_margin ||
|
||||
abs(b2.top() - b1.bottom()) < y_margin);
|
||||
}
|
||||
|
||||
// Set up info for clean_part_grid_ partitions to be valid during detection
|
||||
@ -571,7 +586,8 @@ void TableFinder::InitializePartitions(ColPartitionSet **all_columns) {
|
||||
}
|
||||
|
||||
// Set left, right and top, bottom spacings of each colpartition.
|
||||
void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns) {
|
||||
void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid,
|
||||
ColPartitionSet **all_columns) {
|
||||
// Iterate the ColPartitions in the grid.
|
||||
ColPartitionGridSearch gsearch(grid);
|
||||
gsearch.StartFullSearch();
|
||||
@ -599,7 +615,8 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
|
||||
hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
|
||||
ColPartition *neighbor = nullptr;
|
||||
while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) {
|
||||
if (neighbor->type() == PT_PULLOUT_IMAGE || neighbor->type() == PT_FLOWING_IMAGE ||
|
||||
if (neighbor->type() == PT_PULLOUT_IMAGE ||
|
||||
neighbor->type() == PT_FLOWING_IMAGE ||
|
||||
neighbor->type() == PT_HEADING_IMAGE) {
|
||||
int right = neighbor->bounding_box().right();
|
||||
if (right < box.left()) {
|
||||
@ -611,7 +628,8 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
|
||||
hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
|
||||
neighbor = nullptr;
|
||||
while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) {
|
||||
if (neighbor->type() == PT_PULLOUT_IMAGE || neighbor->type() == PT_FLOWING_IMAGE ||
|
||||
if (neighbor->type() == PT_PULLOUT_IMAGE ||
|
||||
neighbor->type() == PT_FLOWING_IMAGE ||
|
||||
neighbor->type() == PT_HEADING_IMAGE) {
|
||||
int left = neighbor->bounding_box().left();
|
||||
if (left > box.right()) {
|
||||
@ -623,8 +641,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
|
||||
|
||||
ColPartition *upper_part = part->SingletonPartner(true);
|
||||
if (upper_part) {
|
||||
int space = std::max(
|
||||
0, static_cast<int>(upper_part->bounding_box().bottom() - part->bounding_box().bottom()));
|
||||
int space =
|
||||
std::max(0, static_cast<int>(upper_part->bounding_box().bottom() -
|
||||
part->bounding_box().bottom()));
|
||||
part->set_space_above(space);
|
||||
} else {
|
||||
// TODO(nbeato): What constitutes a good value?
|
||||
@ -635,8 +654,9 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
|
||||
|
||||
ColPartition *lower_part = part->SingletonPartner(false);
|
||||
if (lower_part) {
|
||||
int space = std::max(
|
||||
0, static_cast<int>(part->bounding_box().bottom() - lower_part->bounding_box().bottom()));
|
||||
int space =
|
||||
std::max(0, static_cast<int>(part->bounding_box().bottom() -
|
||||
lower_part->bounding_box().bottom()));
|
||||
part->set_space_below(space);
|
||||
} else {
|
||||
// TODO(nbeato): What constitutes a good value?
|
||||
@ -650,14 +670,17 @@ void TableFinder::SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet *
|
||||
// Set spacing and closest neighbors above and below a given colpartition.
|
||||
void TableFinder::SetVerticalSpacing(ColPartition *part) {
|
||||
TBOX box = part->bounding_box();
|
||||
int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
|
||||
int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast<int>(bleft().y()));
|
||||
int top_range =
|
||||
std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
|
||||
int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing,
|
||||
static_cast<int>(bleft().y()));
|
||||
box.set_top(top_range);
|
||||
box.set_bottom(bottom_range);
|
||||
|
||||
TBOX part_box = part->bounding_box();
|
||||
// Start a rect search
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
|
||||
&clean_part_grid_);
|
||||
rectsearch.StartRectSearch(box);
|
||||
ColPartition *neighbor;
|
||||
int min_space_above = kMaxVerticalSpacing;
|
||||
@ -676,7 +699,8 @@ void TableFinder::SetVerticalSpacing(ColPartition *part) {
|
||||
min_space_below = gap;
|
||||
below_neighbor = neighbor;
|
||||
} // If neighbor is above current partition
|
||||
else if (part_box.top() < neighbor_box.bottom() && gap < min_space_above) {
|
||||
else if (part_box.top() < neighbor_box.bottom() &&
|
||||
gap < min_space_above) {
|
||||
min_space_above = gap;
|
||||
above_neighbor = neighbor;
|
||||
}
|
||||
@ -777,7 +801,8 @@ void TableFinder::MarkTablePartitions() {
|
||||
if (textord_tablefind_show_mark) {
|
||||
ScrollView *table_win = MakeWindow(300, 300, "Initial Table Partitions");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
|
||||
ScrollView::AQUAMARINE);
|
||||
}
|
||||
#endif
|
||||
FilterFalseAlarms();
|
||||
@ -785,7 +810,8 @@ void TableFinder::MarkTablePartitions() {
|
||||
if (textord_tablefind_show_mark) {
|
||||
ScrollView *table_win = MakeWindow(600, 300, "Filtered Table Partitions");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
|
||||
ScrollView::AQUAMARINE);
|
||||
}
|
||||
#endif
|
||||
SmoothTablePartitionRuns();
|
||||
@ -793,7 +819,8 @@ void TableFinder::MarkTablePartitions() {
|
||||
if (textord_tablefind_show_mark) {
|
||||
ScrollView *table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
|
||||
ScrollView::AQUAMARINE);
|
||||
}
|
||||
#endif
|
||||
FilterFalseAlarms();
|
||||
@ -801,7 +828,8 @@ void TableFinder::MarkTablePartitions() {
|
||||
if (textord_tablefind_show_mark || textord_show_tables) {
|
||||
ScrollView *table_win = MakeWindow(900, 300, "Final Table Partitions");
|
||||
DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_, ScrollView::AQUAMARINE);
|
||||
DisplayColPartitions(table_win, &leader_and_ruling_grid_,
|
||||
ScrollView::AQUAMARINE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -815,7 +843,8 @@ void TableFinder::MarkTablePartitions() {
|
||||
// 4- Partitions with leaders before/after them.
|
||||
void TableFinder::MarkPartitionsUsingLocalInformation() {
|
||||
// Iterate the ColPartitions in the grid.
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(
|
||||
&clean_part_grid_);
|
||||
gsearch.StartFullSearch();
|
||||
ColPartition *part = nullptr;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
@ -850,7 +879,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
|
||||
BLOBNBOX_CLIST *part_boxes = part->boxes();
|
||||
BLOBNBOX_C_IT it(part_boxes);
|
||||
// Check if this is a relatively small partition (such as a single word)
|
||||
if (part->bounding_box().width() < kMinBoxesInTextPartition * part->median_height() &&
|
||||
if (part->bounding_box().width() <
|
||||
kMinBoxesInTextPartition * part->median_height() &&
|
||||
part_boxes->length() < kMinBoxesInTextPartition) {
|
||||
return true;
|
||||
}
|
||||
@ -907,7 +937,8 @@ bool TableFinder::HasWideOrNoInterWordGap(ColPartition *part) const {
|
||||
}
|
||||
// Since no large gap was found, return false if the partition is too
|
||||
// long to be a data cell
|
||||
if (part->bounding_box().width() > kMaxBoxesInDataPartition * part->median_height() ||
|
||||
if (part->bounding_box().width() >
|
||||
kMaxBoxesInDataPartition * part->median_height() ||
|
||||
part_boxes->length() > kMaxBoxesInDataPartition) {
|
||||
return false;
|
||||
}
|
||||
@ -1016,19 +1047,23 @@ void TableFinder::FilterParagraphEndings() {
|
||||
// To account for that, check if the partition center is to
|
||||
// the left of the one above it.
|
||||
int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
|
||||
int upper_mid = (upper_part->bounding_box().left() + upper_part->bounding_box().right()) / 2;
|
||||
int upper_mid = (upper_part->bounding_box().left() +
|
||||
upper_part->bounding_box().right()) /
|
||||
2;
|
||||
int current_spacing = 0; // spacing of the current line to margin
|
||||
int upper_spacing = 0; // spacing of the previous line to the margin
|
||||
if (left_to_right_language_) {
|
||||
// Left to right languages, use mid - left to figure out the distance
|
||||
// the middle is from the left margin.
|
||||
int left = std::min(part->bounding_box().left(), upper_part->bounding_box().left());
|
||||
int left = std::min(part->bounding_box().left(),
|
||||
upper_part->bounding_box().left());
|
||||
current_spacing = mid - left;
|
||||
upper_spacing = upper_mid - left;
|
||||
} else {
|
||||
// Right to left languages, use right - mid to figure out the distance
|
||||
// the middle is from the right margin.
|
||||
int right = std::max(part->bounding_box().right(), upper_part->bounding_box().right());
|
||||
int right = std::max(part->bounding_box().right(),
|
||||
upper_part->bounding_box().right());
|
||||
current_spacing = right - mid;
|
||||
upper_spacing = right - upper_mid;
|
||||
}
|
||||
@ -1046,7 +1081,8 @@ void TableFinder::FilterParagraphEndings() {
|
||||
// The last line of a paragraph should be left aligned.
|
||||
// TODO(nbeato): This would be untrue if the text was right aligned.
|
||||
// How often is that?
|
||||
if (part->space_to_left() > kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) {
|
||||
if (part->space_to_left() >
|
||||
kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) {
|
||||
continue;
|
||||
}
|
||||
// The line above it should be right aligned (assuming justified format).
|
||||
@ -1055,7 +1091,8 @@ void TableFinder::FilterParagraphEndings() {
|
||||
// line could have fit on the previous line). So compare
|
||||
// whitespace to text.
|
||||
if (upper_part->bounding_box().width() <
|
||||
kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right()) {
|
||||
kMinParagraphEndingTextToWhitespaceRatio *
|
||||
upper_part->space_to_right()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1153,7 +1190,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) {
|
||||
TBOX box = seg->bounding_box();
|
||||
int num_table_cells = 0;
|
||||
int num_text_cells = 0;
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rsearch(
|
||||
&clean_part_grid_);
|
||||
rsearch.SetUniqueMode(true);
|
||||
rsearch.StartRectSearch(box);
|
||||
ColPartition *part = nullptr;
|
||||
@ -1178,7 +1216,8 @@ void TableFinder::SetColumnsType(ColSegment_LIST *column_blocks) {
|
||||
}
|
||||
|
||||
// Move column blocks to grid
|
||||
void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid) {
|
||||
void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments,
|
||||
ColSegmentGrid *col_seg_grid) {
|
||||
ColSegment_IT it(segments);
|
||||
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
|
||||
ColSegment *seg = it.extract();
|
||||
@ -1200,7 +1239,8 @@ void TableFinder::GridMergeColumnBlocks() {
|
||||
int margin = gridsize();
|
||||
|
||||
// Iterate the Column Blocks in the grid.
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(&col_seg_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
|
||||
&col_seg_grid_);
|
||||
gsearch.StartFullSearch();
|
||||
ColSegment *seg;
|
||||
while ((seg = gsearch.NextFullSearch()) != nullptr) {
|
||||
@ -1214,12 +1254,15 @@ void TableFinder::GridMergeColumnBlocks() {
|
||||
do {
|
||||
TBOX box = seg->bounding_box();
|
||||
// slightly expand the search region vertically
|
||||
int top_range = std::min(box.top() + margin, static_cast<int>(tright().y()));
|
||||
int bottom_range = std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
|
||||
int top_range =
|
||||
std::min(box.top() + margin, static_cast<int>(tright().y()));
|
||||
int bottom_range =
|
||||
std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
|
||||
box.set_top(top_range);
|
||||
box.set_bottom(bottom_range);
|
||||
neighbor_found = false;
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(&col_seg_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(
|
||||
&col_seg_grid_);
|
||||
rectsearch.StartRectSearch(box);
|
||||
ColSegment *neighbor = nullptr;
|
||||
while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
|
||||
@ -1277,7 +1320,8 @@ void TableFinder::GridMergeColumnBlocks() {
|
||||
void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
|
||||
ColSegment_IT it(table_columns);
|
||||
// Iterate the ColPartitions in the grid.
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(
|
||||
&clean_part_grid_);
|
||||
gsearch.StartFullSearch();
|
||||
ColPartition *part;
|
||||
while ((part = gsearch.NextFullSearch()) != nullptr) {
|
||||
@ -1291,7 +1335,8 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
|
||||
// Start a search below the current cell to find bottom neighbours
|
||||
// Note: a full search will always process things above it first, so
|
||||
// this should be starting at the highest cell and working its way down.
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> vsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> vsearch(
|
||||
&clean_part_grid_);
|
||||
vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
|
||||
ColPartition *neighbor = nullptr;
|
||||
bool found_neighbours = false;
|
||||
@ -1326,11 +1371,13 @@ void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
|
||||
|
||||
// Mark regions in a column that are x-bounded by the column boundaries and
|
||||
// y-bounded by the table columns' projection on the y-axis as table regions
|
||||
void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions) {
|
||||
void TableFinder::GetTableRegions(ColSegment_LIST *table_columns,
|
||||
ColSegment_LIST *table_regions) {
|
||||
ColSegment_IT cit(table_columns);
|
||||
ColSegment_IT rit(table_regions);
|
||||
// Iterate through column blocks
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(&col_seg_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
|
||||
&col_seg_grid_);
|
||||
gsearch.StartFullSearch();
|
||||
ColSegment *part;
|
||||
int page_height = tright().y() - bleft().y();
|
||||
@ -1389,7 +1436,8 @@ void TableFinder::GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIS
|
||||
// single line and hence the tables get merged together
|
||||
void TableFinder::GridMergeTableRegions() {
|
||||
// Iterate the table regions in the grid.
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(&table_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> gsearch(
|
||||
&table_grid_);
|
||||
gsearch.StartFullSearch();
|
||||
ColSegment *seg = nullptr;
|
||||
while ((seg = gsearch.NextFullSearch()) != nullptr) {
|
||||
@ -1402,7 +1450,8 @@ void TableFinder::GridMergeTableRegions() {
|
||||
search_region.set_left(bleft().x());
|
||||
search_region.set_right(tright().x());
|
||||
neighbor_found = false;
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(&table_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> rectsearch(
|
||||
&table_grid_);
|
||||
rectsearch.StartRectSearch(search_region);
|
||||
ColSegment *neighbor = nullptr;
|
||||
while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
|
||||
@ -1454,13 +1503,15 @@ bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) {
|
||||
// Check for ColPartitions spanning both table regions
|
||||
TBOX bbox = box1.bounding_union(box2);
|
||||
// Start a rect search on bbox
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
|
||||
&clean_part_grid_);
|
||||
rectsearch.StartRectSearch(bbox);
|
||||
ColPartition *part = nullptr;
|
||||
while ((part = rectsearch.NextRectSearch()) != nullptr) {
|
||||
const TBOX &part_box = part->bounding_box();
|
||||
// return true if a colpartition spanning both table regions is found
|
||||
if (part_box.overlap(box1) && part_box.overlap(box2) && !part->IsImageType()) {
|
||||
if (part_box.overlap(box1) && part_box.overlap(box2) &&
|
||||
!part->IsImageType()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1542,12 +1593,14 @@ void TableFinder::GrowTableBox(const TBOX &table_box, TBOX *result_box) {
|
||||
|
||||
// Grow a table by increasing the size of the box to include
|
||||
// partitions with significant overlap with the table.
|
||||
void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range,
|
||||
void TableFinder::GrowTableToIncludePartials(const TBOX &table_box,
|
||||
const TBOX &search_range,
|
||||
TBOX *result_box) {
|
||||
// Rulings are in a different grid, so search 2 grids for rulings, text,
|
||||
// and table partitions that are not entirely within the new box.
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
ColPartitionGrid *grid = (i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_;
|
||||
ColPartitionGrid *grid =
|
||||
(i == 0) ? &fragmented_text_grid_ : &leader_and_ruling_grid_;
|
||||
ColPartitionGridSearch rectsearch(grid);
|
||||
rectsearch.StartRectSearch(search_range);
|
||||
ColPartition *part = nullptr;
|
||||
@ -1569,7 +1622,8 @@ void TableFinder::GrowTableToIncludePartials(const TBOX &table_box, const TBOX &
|
||||
|
||||
// Grow a table by expanding to the extents of significantly
|
||||
// overlapping lines.
|
||||
void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range,
|
||||
void TableFinder::GrowTableToIncludeLines(const TBOX &table_box,
|
||||
const TBOX &search_range,
|
||||
TBOX *result_box) {
|
||||
ColPartitionGridSearch rsearch(&leader_and_ruling_grid_);
|
||||
rsearch.SetUniqueMode(true);
|
||||
@ -1601,7 +1655,8 @@ void TableFinder::GrowTableToIncludeLines(const TBOX &table_box, const TBOX &sea
|
||||
// Checks whether the horizontal line belong to the table by looking at the
|
||||
// side spacing of extra ColParitions that will be included in the table
|
||||
// due to expansion
|
||||
bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &table_box) {
|
||||
bool TableFinder::HLineBelongsToTable(const ColPartition &part,
|
||||
const TBOX &table_box) {
|
||||
if (!part.IsHorizontalLine()) {
|
||||
return false;
|
||||
}
|
||||
@ -1627,7 +1682,8 @@ bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &tabl
|
||||
// Rulings are in a different grid, so search 2 grids for rulings, text,
|
||||
// and table partitions that are introduced by the new box.
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
ColPartitionGrid *grid = (i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_;
|
||||
ColPartitionGrid *grid =
|
||||
(i == 0) ? &clean_part_grid_ : &leader_and_ruling_grid_;
|
||||
// Start a rect search on bbox
|
||||
ColPartitionGridSearch rectsearch(grid);
|
||||
rectsearch.SetUniqueMode(true);
|
||||
@ -1672,12 +1728,14 @@ bool TableFinder::HLineBelongsToTable(const ColPartition &part, const TBOX &tabl
|
||||
void TableFinder::IncludeLeftOutColumnHeaders(TBOX *table_box) {
|
||||
// Start a search above the current table to look for column headers
|
||||
ColPartitionGridSearch vsearch(&clean_part_grid_);
|
||||
vsearch.StartVerticalSearch(table_box->left(), table_box->right(), table_box->top());
|
||||
vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
|
||||
table_box->top());
|
||||
ColPartition *neighbor = nullptr;
|
||||
ColPartition *previous_neighbor = nullptr;
|
||||
while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
|
||||
// Max distance to find a table heading.
|
||||
const int max_distance = kMaxColumnHeaderDistance * neighbor->median_height();
|
||||
const int max_distance =
|
||||
kMaxColumnHeaderDistance * neighbor->median_height();
|
||||
int table_top = table_box->top();
|
||||
const TBOX &box = neighbor->bounding_box();
|
||||
// Do not continue if the next box is way above
|
||||
@ -1714,7 +1772,8 @@ void TableFinder::DeleteSingleColumnTables() {
|
||||
// create an integer array to hold projection on x-axis
|
||||
int *table_xprojection = new int[page_width];
|
||||
// Iterate through all tables in the table grid
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(&table_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(
|
||||
&table_grid_);
|
||||
table_search.StartFullSearch();
|
||||
ColSegment *table;
|
||||
while ((table = table_search.NextFullSearch()) != nullptr) {
|
||||
@ -1724,7 +1783,8 @@ void TableFinder::DeleteSingleColumnTables() {
|
||||
table_xprojection[i] = 0;
|
||||
}
|
||||
// Start a rect search on table_box
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(&clean_part_grid_);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
|
||||
&clean_part_grid_);
|
||||
rectsearch.SetUniqueMode(true);
|
||||
rectsearch.StartRectSearch(table_box);
|
||||
ColPartition *part;
|
||||
@ -1938,7 +1998,8 @@ void TableFinder::DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
|
||||
DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
|
||||
}
|
||||
|
||||
void TableFinder::DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid,
|
||||
void TableFinder::DisplayColPartitionConnections(ScrollView *win,
|
||||
ColPartitionGrid *grid,
|
||||
ScrollView::Color color) {
|
||||
// Iterate the ColPartitions in the grid.
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(grid);
|
||||
@ -1982,8 +2043,9 @@ void TableFinder::DisplayColPartitionConnections(ScrollView *win, ColPartitionGr
|
||||
// Merge all colpartitions in table regions to make them a single
|
||||
// colpartition and revert types of isolated table cells not
|
||||
// assigned to any table to their original types.
|
||||
void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_columns,
|
||||
WidthCallback width_cb) {
|
||||
void TableFinder::MakeTableBlocks(ColPartitionGrid *grid,
|
||||
ColPartitionSet **all_columns,
|
||||
const WidthCallback &width_cb) {
|
||||
// Since we have table blocks already, remove table tags from all
|
||||
// colpartitions
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(grid);
|
||||
@ -1997,13 +2059,15 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_
|
||||
}
|
||||
// Now make a single colpartition out of each table block and remove
|
||||
// all colpartitions contained within a table
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(&table_grid_);
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> table_search(
|
||||
&table_grid_);
|
||||
table_search.StartFullSearch();
|
||||
ColSegment *table;
|
||||
while ((table = table_search.NextFullSearch()) != nullptr) {
|
||||
const TBOX &table_box = table->bounding_box();
|
||||
// Start a rect search on table_box
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(grid);
|
||||
GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rectsearch(
|
||||
grid);
|
||||
rectsearch.StartRectSearch(table_box);
|
||||
ColPartition *part;
|
||||
ColPartition *table_partition = nullptr;
|
||||
@ -2045,7 +2109,10 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **all_
|
||||
//////// ColSegment code
|
||||
////////
|
||||
ColSegment::ColSegment()
|
||||
: ELIST_LINK(), num_table_cells_(0), num_text_cells_(0), type_(COL_UNKNOWN) {}
|
||||
: ELIST_LINK(),
|
||||
num_table_cells_(0),
|
||||
num_text_cells_(0),
|
||||
type_(COL_UNKNOWN) {}
|
||||
|
||||
// Provides a color for BBGrid to draw the rectangle.
|
||||
ScrollView::Color ColSegment::BoxColor() const {
|
||||
|
@ -107,7 +107,8 @@ private:
|
||||
|
||||
// Typedef BBGrid of ColSegments
|
||||
using ColSegmentGrid = BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>;
|
||||
using ColSegmentGridSearch = GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>;
|
||||
using ColSegmentGridSearch =
|
||||
GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>;
|
||||
|
||||
// TableFinder is a utility class to find a set of tables given a set of
|
||||
// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
|
||||
@ -143,8 +144,8 @@ public:
|
||||
// tables. The columns and width callbacks are used to merge tables.
|
||||
// The reskew argument is only used to write the tables to the out.png
|
||||
// if that feature is enabled.
|
||||
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb,
|
||||
const FCOORD &reskew);
|
||||
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns,
|
||||
WidthCallback width_cb, const FCOORD &reskew);
|
||||
|
||||
protected:
|
||||
// Access for the grid dimensions.
|
||||
@ -179,7 +180,8 @@ protected:
|
||||
// Utility function to move segments to col_seg_grid
|
||||
// Note: Move includes ownership,
|
||||
// so segments will be be owned by col_seg_grid
|
||||
void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid);
|
||||
void MoveColSegmentsToGrid(ColSegment_LIST *segments,
|
||||
ColSegmentGrid *col_seg_grid);
|
||||
|
||||
//////// Set up code to run during table detection to correctly
|
||||
//////// initialize variables on column partitions that are used later.
|
||||
@ -191,7 +193,8 @@ protected:
|
||||
// Set left, right and top, bottom spacings of each colpartition.
|
||||
// Left/right spacings are w.r.t the column boundaries
|
||||
// Top/bottom spacings are w.r.t. previous and next colpartitions
|
||||
static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns);
|
||||
static void SetPartitionSpacings(ColPartitionGrid *grid,
|
||||
ColPartitionSet **all_columns);
|
||||
|
||||
// Set spacing and closest neighbors above and below a given colpartition.
|
||||
void SetVerticalSpacing(ColPartition *part);
|
||||
@ -263,10 +266,12 @@ protected:
|
||||
////////
|
||||
|
||||
// Get Column segments from best_columns_
|
||||
void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments);
|
||||
void GetColumnBlocks(ColPartitionSet **columns,
|
||||
ColSegment_LIST *col_segments);
|
||||
|
||||
// Group Column segments into consecutive single column regions.
|
||||
void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments);
|
||||
void GroupColumnBlocks(ColSegment_LIST *current_segments,
|
||||
ColSegment_LIST *col_segments);
|
||||
|
||||
// Check if two boxes are consecutive within the same column
|
||||
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
|
||||
@ -295,7 +300,8 @@ protected:
|
||||
// earlier functions) in the x direction and the min/max extent of
|
||||
// overlapping table columns in the y direction.
|
||||
// Section 4.2 of paper.
|
||||
void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions);
|
||||
void GetTableRegions(ColSegment_LIST *table_columns,
|
||||
ColSegment_LIST *table_regions);
|
||||
|
||||
//////// Functions to "patch up" found tables
|
||||
////////
|
||||
@ -316,11 +322,12 @@ protected:
|
||||
void GrowTableBox(const TBOX &table_box, TBOX *result_box);
|
||||
// Grow a table by increasing the size of the box to include
|
||||
// partitions with significant overlap with the table.
|
||||
void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range,
|
||||
TBOX *result_box);
|
||||
void GrowTableToIncludePartials(const TBOX &table_box,
|
||||
const TBOX &search_range, TBOX *result_box);
|
||||
// Grow a table by expanding to the extents of significantly
|
||||
// overlapping lines.
|
||||
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box);
|
||||
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range,
|
||||
TBOX *result_box);
|
||||
// Checks whether the horizontal line belong to the table by looking at the
|
||||
// side spacing of extra ColParitions that will be included in the table
|
||||
// due to expansion
|
||||
@ -351,12 +358,14 @@ protected:
|
||||
|
||||
// Displays Colpartitions marked as table row. Overlays them on top of
|
||||
// part_grid_.
|
||||
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color);
|
||||
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols,
|
||||
ScrollView::Color color);
|
||||
|
||||
// Displays the colpartitions using a new coloring on an existing window.
|
||||
// Note: This method is only for debug purpose during development and
|
||||
// would not be part of checked in code
|
||||
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color,
|
||||
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
|
||||
ScrollView::Color text_color,
|
||||
ScrollView::Color table_color);
|
||||
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid,
|
||||
ScrollView::Color default_color);
|
||||
@ -366,7 +375,8 @@ protected:
|
||||
// Merge all colpartitions in table regions to make them a single
|
||||
// colpartition and revert types of isolated table cells not
|
||||
// assigned to any table to their original types.
|
||||
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb);
|
||||
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns,
|
||||
const WidthCallback &width_cb);
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Useful objects used during table find process.
|
||||
|
@ -720,15 +720,6 @@ int StructuredTable::CountPartitions(const TBOX &box) {
|
||||
//////// TableRecognizer Class
|
||||
////////
|
||||
|
||||
TableRecognizer::TableRecognizer()
|
||||
: text_grid_(nullptr)
|
||||
, line_grid_(nullptr)
|
||||
, min_height_(0)
|
||||
, min_width_(0)
|
||||
, max_text_height_(INT32_MAX) {}
|
||||
|
||||
TableRecognizer::~TableRecognizer() = default;
|
||||
|
||||
void TableRecognizer::Init() {}
|
||||
|
||||
void TableRecognizer::set_text_grid(ColPartitionGrid *text_grid) {
|
||||
|
@ -250,8 +250,8 @@ protected:
|
||||
|
||||
class TESS_API TableRecognizer {
|
||||
public:
|
||||
TableRecognizer();
|
||||
~TableRecognizer();
|
||||
TableRecognizer() = default;
|
||||
~TableRecognizer() = default;
|
||||
|
||||
// Initialization code. Must be called after the constructor.
|
||||
void Init();
|
||||
@ -358,13 +358,13 @@ protected:
|
||||
static bool IsWeakTableRow(StructuredTable *table, int row);
|
||||
|
||||
// Input data, used as read only data to make decisions.
|
||||
ColPartitionGrid *text_grid_; // Text ColPartitions
|
||||
ColPartitionGrid *line_grid_; // Line ColPartitions
|
||||
ColPartitionGrid *text_grid_ = nullptr; // Text ColPartitions
|
||||
ColPartitionGrid *line_grid_ = nullptr; // Line ColPartitions
|
||||
// Table constraints, a "good" table must satisfy these.
|
||||
int min_height_;
|
||||
int min_width_;
|
||||
int min_height_ = 0;
|
||||
int min_width_ = 0;
|
||||
// Filters, used to prevent awkward partitions from destroying structure.
|
||||
int max_text_height_; // Horizontal lines may intersect taller text.
|
||||
int max_text_height_ = INT32_MAX; // Horizontal lines may intersect taller text.
|
||||
};
|
||||
|
||||
} // namespace tesseract
|
||||
|
@ -325,7 +325,7 @@ float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list, // original list
|
||||
(tesseract::CCStruct::kDescenderFraction + tesseract::CCStruct::kXHeightFraction +
|
||||
2 * tesseract::CCStruct::kAscenderFraction) /
|
||||
tesseract::CCStruct::kXHeightFraction);
|
||||
min_y = floor(initial_x / 2);
|
||||
min_y = std::floor(initial_x / 2);
|
||||
max_x = ceil(initial_x * textord_width_limit);
|
||||
small_it.move_to_first();
|
||||
for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
|
||||
@ -729,7 +729,7 @@ void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BL
|
||||
int best_g = 0;
|
||||
float best_angle_diff = FLT_MAX;
|
||||
for (const auto &group : groups) {
|
||||
double angle_diff = fabs(block_angle - group->angle);
|
||||
double angle_diff = std::fabs(block_angle - group->angle);
|
||||
if (angle_diff > M_PI) {
|
||||
angle_diff = fabs(angle_diff - 2.0 * M_PI);
|
||||
}
|
||||
|
@ -36,6 +36,7 @@
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#define MAXSPACING 128 /*max expected spacing in pix */
|
||||
@ -295,7 +296,7 @@ void Textord::row_spacing_stats(TO_ROW *row, GAPMAP *gapmap, int16_t block_idx,
|
||||
/* Collect first pass stats for row */
|
||||
|
||||
if (!good_block_space_estimate) {
|
||||
block_space_gap_width = int16_t(floor(row->xheight / 2));
|
||||
block_space_gap_width = int16_t(std::floor(row->xheight / 2));
|
||||
}
|
||||
if (!row->blob_list()->empty()) {
|
||||
if (tosp_threshold_bias1 > 0) {
|
||||
@ -435,7 +436,7 @@ are ignoring big gaps*/
|
||||
if (suspected_table) {
|
||||
sane_space =
|
||||
std::max(tosp_table_kn_sp_ratio * row->kern_size, tosp_table_xht_sp_ratio * row->xheight);
|
||||
sane_threshold = int32_t(floor((sane_space + row->kern_size) / 2));
|
||||
sane_threshold = int32_t(std::floor((sane_space + row->kern_size) / 2));
|
||||
|
||||
if ((row->space_size < sane_space) || (row->space_threshold < sane_threshold)) {
|
||||
if (tosp_debug_level > 5) {
|
||||
@ -606,7 +607,7 @@ It comes to the same thing.
|
||||
(Though there is a difference in that old textor has integer space_size
|
||||
and kern_size.)
|
||||
*/
|
||||
row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2));
|
||||
row->space_threshold = int32_t(std::floor((row->space_size + row->kern_size) / 2));
|
||||
}
|
||||
|
||||
// Apply the same logic and ratios as in row_spacing_stats to
|
||||
@ -648,7 +649,7 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta
|
||||
crude_threshold_estimate =
|
||||
std::max(tosp_init_guess_kn_mult * kern_estimate, tosp_init_guess_xht_mult * row->xheight);
|
||||
small_gaps_count =
|
||||
stats_count_under(all_gap_stats, static_cast<int16_t>(ceil(crude_threshold_estimate)));
|
||||
stats_count_under(all_gap_stats, static_cast<int16_t>(std::ceil(crude_threshold_estimate)));
|
||||
total = all_gap_stats->get_total();
|
||||
|
||||
if ((total <= tosp_redo_kern_limit) ||
|
||||
@ -718,7 +719,7 @@ bool Textord::isolated_row_stats(TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_sta
|
||||
} else {
|
||||
row->kern_size = all_gap_stats->median();
|
||||
}
|
||||
row->space_threshold = int32_t(floor((row->space_size + row->kern_size) / 2));
|
||||
row->space_threshold = int32_t(std::floor((row->space_size + row->kern_size) / 2));
|
||||
/* Sanity check */
|
||||
if ((row->kern_size >= row->space_threshold) || (row->space_threshold >= row->space_size) ||
|
||||
(row->space_threshold <= 0)) {
|
||||
@ -793,7 +794,7 @@ threshold is not within it, move the threshold so that is is just inside it.
|
||||
reqd_zero_width = 3;
|
||||
}
|
||||
|
||||
for (index = int16_t(ceil(kn)); index < int16_t(floor(sp)); index++) {
|
||||
for (index = int16_t(std::ceil(kn)); index < int16_t(std::floor(sp)); index++) {
|
||||
if (all_gap_stats->pile_count(index) == 0) {
|
||||
if (zero_width == 0) {
|
||||
zero_start = index;
|
||||
@ -909,7 +910,7 @@ the gap between the word being built and the next one. */
|
||||
current_gap = box_it.data()->bounding_box().left() - next_rep_char_word_right;
|
||||
current_within_xht_gap = current_gap;
|
||||
if (current_gap > tosp_rep_space * repetition_spacing) {
|
||||
prev_blanks = static_cast<uint8_t>(floor(current_gap / row->space_size));
|
||||
prev_blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
|
||||
if (prev_blanks < 1) {
|
||||
prev_blanks = 1;
|
||||
}
|
||||
@ -1002,7 +1003,7 @@ the gap between the word being built and the next one. */
|
||||
current_gap = word->bounding_box().left() - prev_x;
|
||||
current_within_xht_gap = current_gap;
|
||||
if (current_gap > tosp_rep_space * repetition_spacing) {
|
||||
blanks = static_cast<uint8_t>(floor(current_gap / row->space_size));
|
||||
blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
|
||||
if (blanks < 1) {
|
||||
blanks = 1;
|
||||
}
|
||||
@ -1066,7 +1067,7 @@ the gap between the word being built and the next one. */
|
||||
repetition_spacing = find_mean_blob_spacing(word);
|
||||
current_gap = word->bounding_box().left() - prev_x;
|
||||
if (current_gap > tosp_rep_space * repetition_spacing) {
|
||||
blanks = static_cast<uint8_t>(floor(current_gap / row->space_size));
|
||||
blanks = static_cast<uint8_t>(std::floor(current_gap / row->space_size));
|
||||
if (blanks < 1) {
|
||||
blanks = 1;
|
||||
}
|
||||
@ -1738,7 +1739,7 @@ caps ht chars which should NOT have their box reduced: T, Y, V, W etc
|
||||
if (left_limit > junk) {
|
||||
*left_above_xht = INT16_MAX; // No area above xht
|
||||
} else {
|
||||
*left_above_xht = static_cast<int16_t>(floor(left_limit));
|
||||
*left_above_xht = static_cast<int16_t>(std::floor(left_limit));
|
||||
}
|
||||
/*
|
||||
Find reduced LH limit of blob - the left extent of the region ABOVE the
|
||||
@ -1762,7 +1763,7 @@ Find reduced RH limit of blob - the right extent of the region BELOW the xht.
|
||||
return TBOX(); // no area within xht so return empty box
|
||||
}
|
||||
|
||||
return TBOX(ICOORD(static_cast<int16_t>(floor(left_limit)), blob_box.bottom()),
|
||||
ICOORD(static_cast<int16_t>(ceil(right_limit)), blob_box.top()));
|
||||
return TBOX(ICOORD(static_cast<int16_t>(std::floor(left_limit)), blob_box.bottom()),
|
||||
ICOORD(static_cast<int16_t>(std::ceil(right_limit)), blob_box.top()));
|
||||
}
|
||||
} // namespace tesseract
|
||||
|
@ -23,6 +23,8 @@
|
||||
|
||||
#include "wordseg.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "blobbox.h"
|
||||
#include "cjkpitch.h"
|
||||
#include "drawtord.h"
|
||||
@ -222,7 +224,7 @@ int32_t row_words( // compute space size
|
||||
lower = row->xheight * textord_words_initial_lower;
|
||||
upper = row->xheight * textord_words_initial_upper;
|
||||
cluster_count = gap_stats.cluster(lower, upper, textord_spacesize_ratioprop, 3, cluster_stats);
|
||||
while (cluster_count < 2 && ceil(lower) < floor(upper)) {
|
||||
while (cluster_count < 2 && std::ceil(lower) < std::floor(upper)) {
|
||||
// shrink gap
|
||||
upper = (upper * 3 + lower) / 4;
|
||||
lower = (lower * 3 + upper) / 4;
|
||||
|
@ -26,8 +26,7 @@
|
||||
|
||||
using namespace tesseract;
|
||||
|
||||
static int list_components(TessdataManager &tm, const char *filename)
|
||||
{
|
||||
static int list_components(TessdataManager &tm, const char *filename) {
|
||||
// Initialize TessdataManager with the data in the given traineddata file.
|
||||
if (filename != nullptr && !tm.Init(filename)) {
|
||||
tprintf("Failed to read %s\n", filename);
|
||||
@ -37,8 +36,7 @@ static int list_components(TessdataManager &tm, const char *filename)
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int list_network(TessdataManager &tm, const char *filename)
|
||||
{
|
||||
static int list_network(TessdataManager &tm, const char *filename) {
|
||||
if (filename != nullptr && !tm.Init(filename)) {
|
||||
tprintf("Failed to read %s\n", filename);
|
||||
return EXIT_FAILURE;
|
||||
@ -62,11 +60,11 @@ static int list_network(TessdataManager &tm, const char *filename)
|
||||
|
||||
std::cout << "Layer Learning Rates: ";
|
||||
auto layers = recognizer.EnumerateLayers();
|
||||
for (auto id : layers) {
|
||||
for (const auto &id : layers) {
|
||||
auto layer = recognizer.GetLayer(id);
|
||||
std::cout << id << "(" << layer->name() << ")"
|
||||
<< "=" << recognizer.GetLayerLearningRate(id)
|
||||
<< (layers[layers.size()-1] != id ? ", " : "");
|
||||
<< (layers[layers.size() - 1] != id ? ", " : "");
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
@ -138,7 +136,8 @@ int main(int argc, char **argv) {
|
||||
} else {
|
||||
printf("Output %s created successfully.\n", output_file.c_str());
|
||||
}
|
||||
} else if (argc >= 4 && (strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) {
|
||||
} else if (argc >= 4 &&
|
||||
(strcmp(argv[1], "-e") == 0 || strcmp(argv[1], "-u") == 0)) {
|
||||
// Initialize TessdataManager with the data in the given traineddata file.
|
||||
if (!tm.Init(argv[2])) {
|
||||
tprintf("Failed to read %s\n", argv[2]);
|
||||
@ -173,7 +172,8 @@ int main(int argc, char **argv) {
|
||||
if (tm.ExtractToFile(filename.c_str())) {
|
||||
printf("Wrote %s\n", filename.c_str());
|
||||
} else if (errno != 0) {
|
||||
printf("Error, could not extract %s: %s\n", filename.c_str(), strerror(errno));
|
||||
printf("Error, could not extract %s: %s\n", filename.c_str(),
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
@ -184,7 +184,8 @@ int main(int argc, char **argv) {
|
||||
std::string traineddata_filename = new_traineddata_filename;
|
||||
traineddata_filename += ".__tmp__";
|
||||
if (rename(new_traineddata_filename, traineddata_filename.c_str()) != 0) {
|
||||
tprintf("Failed to create a temporary file %s\n", traineddata_filename.c_str());
|
||||
tprintf("Failed to create a temporary file %s\n",
|
||||
traineddata_filename.c_str());
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
@ -212,7 +213,8 @@ int main(int argc, char **argv) {
|
||||
std::vector<char> lstm_data;
|
||||
fp.OpenWrite(&lstm_data);
|
||||
ASSERT_HOST(recognizer.Serialize(&tm, &fp));
|
||||
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0], lstm_data.size());
|
||||
tm.OverwriteEntry(tesseract::TESSDATA_LSTM, &lstm_data[0],
|
||||
lstm_data.size());
|
||||
if (!tm.SaveFile(argv[2], nullptr)) {
|
||||
tprintf("Failed to write modified traineddata:%s!\n", argv[2]);
|
||||
return EXIT_FAILURE;
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cfloat> // for FLT_MAX
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
namespace tesseract {
|
||||
@ -266,7 +267,7 @@ float CTC::CalculateBiasFraction() {
|
||||
if (total_labels == 0) {
|
||||
return 0.0f;
|
||||
}
|
||||
return exp(std::max(true_pos - false_pos, 1) * log(kMinProb_) / total_labels);
|
||||
return exp(std::max(true_pos - false_pos, 1) * std::log(kMinProb_) / total_labels);
|
||||
}
|
||||
|
||||
// Given ln(x) and ln(y), returns ln(x + y), using:
|
||||
@ -319,7 +320,7 @@ void CTC::Backward(GENERIC_2D_ARRAY<double> *log_probs) const {
|
||||
const float *outputs_tp1 = outputs_[t + 1];
|
||||
for (int u = min_labels_[t]; u <= max_labels_[t]; ++u) {
|
||||
// Continuing the same label.
|
||||
double log_sum = log_probs->get(t + 1, u) + log(outputs_tp1[labels_[u]]);
|
||||
double log_sum = log_probs->get(t + 1, u) + std::log(outputs_tp1[labels_[u]]);
|
||||
// Change from previous label.
|
||||
if (u + 1 < num_labels_) {
|
||||
double prev_prob = outputs_tp1[labels_[u + 1]];
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include "shapeclassifier.h"
|
||||
#include "shapetable.h"
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
#include "svmnode.h"
|
||||
# include "svmnode.h"
|
||||
#endif
|
||||
|
||||
#include "scanutils.h"
|
||||
@ -51,16 +51,16 @@ const float kFontMergeDistance = 0.025;
|
||||
|
||||
MasterTrainer::MasterTrainer(NormalizationMode norm_mode, bool shape_analysis,
|
||||
bool replicate_samples, int debug_level)
|
||||
: norm_mode_(norm_mode)
|
||||
, samples_(fontinfo_table_)
|
||||
, junk_samples_(fontinfo_table_)
|
||||
, verify_samples_(fontinfo_table_)
|
||||
, charsetsize_(0)
|
||||
, enable_shape_analysis_(shape_analysis)
|
||||
, enable_replication_(replicate_samples)
|
||||
, fragments_(nullptr)
|
||||
, prev_unichar_id_(-1)
|
||||
, debug_level_(debug_level) {}
|
||||
: norm_mode_(norm_mode),
|
||||
samples_(fontinfo_table_),
|
||||
junk_samples_(fontinfo_table_),
|
||||
verify_samples_(fontinfo_table_),
|
||||
charsetsize_(0),
|
||||
enable_shape_analysis_(shape_analysis),
|
||||
enable_replication_(replicate_samples),
|
||||
fragments_(nullptr),
|
||||
prev_unichar_id_(-1),
|
||||
debug_level_(debug_level) {}
|
||||
|
||||
MasterTrainer::~MasterTrainer() {
|
||||
delete[] fragments_;
|
||||
@ -137,10 +137,14 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
|
||||
const FEATURE_DEFS_STRUCT &feature_defs,
|
||||
bool verification) {
|
||||
char buffer[2048];
|
||||
const int int_feature_type = ShortNameToFeatureType(feature_defs, kIntFeatureType);
|
||||
const int micro_feature_type = ShortNameToFeatureType(feature_defs, kMicroFeatureType);
|
||||
const int cn_feature_type = ShortNameToFeatureType(feature_defs, kCNFeatureType);
|
||||
const int geo_feature_type = ShortNameToFeatureType(feature_defs, kGeoFeatureType);
|
||||
const int int_feature_type =
|
||||
ShortNameToFeatureType(feature_defs, kIntFeatureType);
|
||||
const int micro_feature_type =
|
||||
ShortNameToFeatureType(feature_defs, kMicroFeatureType);
|
||||
const int cn_feature_type =
|
||||
ShortNameToFeatureType(feature_defs, kCNFeatureType);
|
||||
const int geo_feature_type =
|
||||
ShortNameToFeatureType(feature_defs, kGeoFeatureType);
|
||||
|
||||
FILE *fp = fopen(page_name, "rb");
|
||||
if (fp == nullptr) {
|
||||
@ -175,8 +179,8 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
|
||||
sample->set_font_id(font_id);
|
||||
sample->set_page_num(page_number + page_images_.size());
|
||||
sample->set_bounding_box(bounding_box);
|
||||
sample->ExtractCharDesc(int_feature_type, micro_feature_type, cn_feature_type, geo_feature_type,
|
||||
char_desc);
|
||||
sample->ExtractCharDesc(int_feature_type, micro_feature_type,
|
||||
cn_feature_type, geo_feature_type, char_desc);
|
||||
AddSample(verification, unichar.c_str(), sample);
|
||||
delete char_desc;
|
||||
}
|
||||
@ -186,7 +190,8 @@ void MasterTrainer::ReadTrainingSamples(const char *page_name,
|
||||
|
||||
// Adds the given single sample to the trainer, setting the classid
|
||||
// appropriately from the given unichar_str.
|
||||
void MasterTrainer::AddSample(bool verification, const char *unichar, TrainingSample *sample) {
|
||||
void MasterTrainer::AddSample(bool verification, const char *unichar,
|
||||
TrainingSample *sample) {
|
||||
if (verification) {
|
||||
verify_samples_.AddSample(unichar, sample);
|
||||
prev_unichar_id_ = -1;
|
||||
@ -314,7 +319,8 @@ void MasterTrainer::SetupMasterShapes() {
|
||||
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance,
|
||||
&char_shapes_end_fragment);
|
||||
char_shapes.AppendMasterShapes(char_shapes_end_fragment, nullptr);
|
||||
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance, &char_shapes);
|
||||
ClusterShapes(kMinClusteredShapes, kMaxUnicharsPerCluster, kFontMergeDistance,
|
||||
&char_shapes);
|
||||
master_shapes_.AppendMasterShapes(char_shapes, nullptr);
|
||||
tprintf("Master shape_table:%s\n", master_shapes_.SummaryStr().c_str());
|
||||
}
|
||||
@ -383,13 +389,13 @@ bool MasterTrainer::LoadFontInfo(const char *filename) {
|
||||
fontinfo.name = font_name;
|
||||
fontinfo.properties = 0;
|
||||
fontinfo.universal_id = 0;
|
||||
if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, &fixed, &serif,
|
||||
&fraktur) != 6) {
|
||||
if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold,
|
||||
&fixed, &serif, &fraktur) != 6) {
|
||||
delete[] font_name;
|
||||
continue;
|
||||
}
|
||||
fontinfo.properties =
|
||||
(italic << 0) + (bold << 1) + (fixed << 2) + (serif << 3) + (fraktur << 4);
|
||||
fontinfo.properties = (italic << 0) + (bold << 1) + (fixed << 2) +
|
||||
(serif << 3) + (fraktur << 4);
|
||||
if (!fontinfo_table_.contains(fontinfo)) {
|
||||
fontinfo_table_.push_back(fontinfo);
|
||||
} else {
|
||||
@ -477,7 +483,8 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
|
||||
fi->init_spacing(unicharset_.size());
|
||||
FontSpacingInfo *spacing = nullptr;
|
||||
for (int l = 0; l < num_unichars; ++l) {
|
||||
if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after, &num_kerned) != 4) {
|
||||
if (tfscanf(fontinfo_file, "%s %d %d %d", uch, &x_gap_before, &x_gap_after,
|
||||
&num_kerned) != 4) {
|
||||
tprintf("Bad format of font spacing file %s\n", filename);
|
||||
fclose(fontinfo_file);
|
||||
return false;
|
||||
@ -498,7 +505,8 @@ bool MasterTrainer::AddSpacingInfo(const char *filename) {
|
||||
if (!valid || !unicharset_.contains_unichar(kerned_uch)) {
|
||||
continue;
|
||||
}
|
||||
spacing->kerned_unichar_ids.push_back(unicharset_.unichar_to_id(kerned_uch));
|
||||
spacing->kerned_unichar_ids.push_back(
|
||||
unicharset_.unichar_to_id(kerned_uch));
|
||||
spacing->kerned_x_gaps.push_back(static_cast<int16_t>(x_gap * scale));
|
||||
}
|
||||
if (valid) {
|
||||
@ -572,13 +580,14 @@ void MasterTrainer::SetupFlatShapeTable(ShapeTable *shape_table) {
|
||||
|
||||
// Sets up a Clusterer for mftraining on a single shape_id.
|
||||
// Call FreeClusterer on the return value after use.
|
||||
CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table,
|
||||
const FEATURE_DEFS_STRUCT &feature_defs, int shape_id,
|
||||
int *num_samples) {
|
||||
CLUSTERER *MasterTrainer::SetupForClustering(
|
||||
const ShapeTable &shape_table, const FEATURE_DEFS_STRUCT &feature_defs,
|
||||
int shape_id, int *num_samples) {
|
||||
int desc_index = ShortNameToFeatureType(feature_defs, kMicroFeatureType);
|
||||
int num_params = feature_defs.FeatureDesc[desc_index]->NumParams;
|
||||
ASSERT_HOST(num_params == (int)MicroFeatureParameter::MFCount);
|
||||
CLUSTERER *clusterer = MakeClusterer(num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc);
|
||||
CLUSTERER *clusterer = MakeClusterer(
|
||||
num_params, feature_defs.FeatureDesc[desc_index]->ParamDesc);
|
||||
|
||||
// We want to iterate over the samples of just the one shape.
|
||||
IndexMapBiDi shape_map;
|
||||
@ -612,12 +621,14 @@ CLUSTERER *MasterTrainer::SetupForClustering(const ShapeTable &shape_table,
|
||||
void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
|
||||
const UNICHARSET &shape_set,
|
||||
const ShapeTable &shape_table,
|
||||
CLASS_STRUCT *float_classes, const char *inttemp_file,
|
||||
CLASS_STRUCT *float_classes,
|
||||
const char *inttemp_file,
|
||||
const char *pffmtable_file) {
|
||||
auto *classify = new tesseract::Classify();
|
||||
// Move the fontinfo table to classify.
|
||||
fontinfo_table_.MoveTo(&classify->get_fontinfo_table());
|
||||
INT_TEMPLATES_STRUCT *int_templates = classify->CreateIntTemplates(float_classes, shape_set);
|
||||
INT_TEMPLATES_STRUCT *int_templates =
|
||||
classify->CreateIntTemplates(float_classes, shape_set);
|
||||
FILE *fp = fopen(inttemp_file, "wb");
|
||||
if (fp == nullptr) {
|
||||
tprintf("Error, failed to open file \"%s\"\n", inttemp_file);
|
||||
@ -631,10 +642,7 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
|
||||
// We put the shapetable_cutoffs in a vector, and compute the
|
||||
// unicharset cutoffs along the way.
|
||||
std::vector<uint16_t> shapetable_cutoffs;
|
||||
std::vector<uint16_t> unichar_cutoffs;
|
||||
for (int c = 0; c < unicharset.size(); ++c) {
|
||||
unichar_cutoffs.push_back(0);
|
||||
}
|
||||
std::vector<uint16_t> unichar_cutoffs(unicharset.size());
|
||||
/* then write out each class */
|
||||
for (int i = 0; i < int_templates->NumClasses; ++i) {
|
||||
INT_CLASS_STRUCT *Class = ClassForClassId(int_templates, i);
|
||||
@ -679,7 +687,8 @@ void MasterTrainer::WriteInttempAndPFFMTable(const UNICHARSET &unicharset,
|
||||
|
||||
// Generate debug output relating to the canonical distance between the
|
||||
// two given UTF8 grapheme strings.
|
||||
void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar_str2) {
|
||||
void MasterTrainer::DebugCanonical(const char *unichar_str1,
|
||||
const char *unichar_str2) {
|
||||
int class_id1 = unicharset_.unichar_to_id(unichar_str1);
|
||||
int class_id2 = unicharset_.unichar_to_id(unichar_str2);
|
||||
if (class_id2 == INVALID_UNICHAR_ID) {
|
||||
@ -689,8 +698,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
|
||||
tprintf("No unicharset entry found for %s\n", unichar_str1);
|
||||
return;
|
||||
} else {
|
||||
tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1, unichar_str1,
|
||||
class_id2, unichar_str2);
|
||||
tprintf("Font ambiguities for unichar %d = %s and %d = %s\n", class_id1,
|
||||
unichar_str1, class_id2, unichar_str2);
|
||||
}
|
||||
int num_fonts = samples_.NumFonts();
|
||||
const IntFeatureMap &feature_map = feature_map_;
|
||||
@ -714,7 +723,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
|
||||
if (samples_.NumClassSamples(f2, class_id2, false) == 0) {
|
||||
continue;
|
||||
}
|
||||
float dist = samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map);
|
||||
float dist =
|
||||
samples_.ClusterDistance(f1, class_id1, f2, class_id2, feature_map);
|
||||
tprintf(" %5.3f", dist);
|
||||
}
|
||||
tprintf("\n");
|
||||
@ -725,7 +735,8 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
|
||||
if (samples_.NumClassSamples(f, class_id1, true) > 0) {
|
||||
shapes.AddShape(class_id1, f);
|
||||
}
|
||||
if (class_id1 != class_id2 && samples_.NumClassSamples(f, class_id2, true) > 0) {
|
||||
if (class_id1 != class_id2 &&
|
||||
samples_.NumClassSamples(f, class_id2, true) > 0) {
|
||||
shapes.AddShape(class_id2, f);
|
||||
}
|
||||
}
|
||||
@ -743,14 +754,17 @@ void MasterTrainer::DebugCanonical(const char *unichar_str1, const char *unichar
|
||||
// Until the features window is destroyed, each click in the features window
|
||||
// will display the samples that have that feature in a separate window.
|
||||
void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font,
|
||||
const char *unichar_str2, int canonical_font) {
|
||||
const char *unichar_str2,
|
||||
int canonical_font) {
|
||||
const IntFeatureMap &feature_map = feature_map_;
|
||||
const IntFeatureSpace &feature_space = feature_map.feature_space();
|
||||
ScrollView *f_window = CreateFeatureSpaceWindow("Features", 100, 500);
|
||||
ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character, f_window);
|
||||
ClearFeatureSpaceWindow(norm_mode_ == NM_BASELINE ? baseline : character,
|
||||
f_window);
|
||||
int class_id2 = samples_.unicharset().unichar_to_id(unichar_str2);
|
||||
if (class_id2 != INVALID_UNICHAR_ID && canonical_font >= 0) {
|
||||
const TrainingSample *sample = samples_.GetCanonicalSample(canonical_font, class_id2);
|
||||
const TrainingSample *sample =
|
||||
samples_.GetCanonicalSample(canonical_font, class_id2);
|
||||
for (uint32_t f = 0; f < sample->num_features(); ++f) {
|
||||
RenderIntFeature(f_window, &sample->features()[f], ScrollView::RED);
|
||||
}
|
||||
@ -780,8 +794,8 @@ void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font,
|
||||
Shape shape;
|
||||
shape.AddToShape(class_id1, cloud_font);
|
||||
s_window->Clear();
|
||||
samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space, ScrollView::GREEN,
|
||||
s_window);
|
||||
samples_.DisplaySamplesWithFeature(feature_index, shape, feature_space,
|
||||
ScrollView::GREEN, s_window);
|
||||
s_window->Update();
|
||||
}
|
||||
}
|
||||
@ -790,22 +804,25 @@ void MasterTrainer::DisplaySamples(const char *unichar_str1, int cloud_font,
|
||||
}
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
|
||||
void MasterTrainer::TestClassifierVOld(bool replicate_samples, ShapeClassifier *test_classifier,
|
||||
void MasterTrainer::TestClassifierVOld(bool replicate_samples,
|
||||
ShapeClassifier *test_classifier,
|
||||
ShapeClassifier *old_classifier) {
|
||||
SampleIterator sample_it;
|
||||
sample_it.Init(nullptr, nullptr, replicate_samples, &samples_);
|
||||
ErrorCounter::DebugNewErrors(test_classifier, old_classifier, CT_UNICHAR_TOPN_ERR,
|
||||
fontinfo_table_, page_images_, &sample_it);
|
||||
ErrorCounter::DebugNewErrors(test_classifier, old_classifier,
|
||||
CT_UNICHAR_TOPN_ERR, fontinfo_table_,
|
||||
page_images_, &sample_it);
|
||||
}
|
||||
|
||||
// Tests the given test_classifier on the internal samples.
|
||||
// See TestClassifier for details.
|
||||
void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, int report_level,
|
||||
void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode,
|
||||
int report_level,
|
||||
bool replicate_samples,
|
||||
ShapeClassifier *test_classifier,
|
||||
std::string *report_string) {
|
||||
TestClassifier(error_mode, report_level, replicate_samples, &samples_, test_classifier,
|
||||
report_string);
|
||||
TestClassifier(error_mode, report_level, replicate_samples, &samples_,
|
||||
test_classifier, report_string);
|
||||
}
|
||||
|
||||
// Tests the given test_classifier on the given samples.
|
||||
@ -822,8 +839,10 @@ void MasterTrainer::TestClassifierOnSamples(CountTypes error_mode, int report_le
|
||||
// If report_string is non-nullptr, a summary of the results for each font
|
||||
// is appended to the report_string.
|
||||
double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level,
|
||||
bool replicate_samples, TrainingSampleSet *samples,
|
||||
ShapeClassifier *test_classifier, std::string *report_string) {
|
||||
bool replicate_samples,
|
||||
TrainingSampleSet *samples,
|
||||
ShapeClassifier *test_classifier,
|
||||
std::string *report_string) {
|
||||
SampleIterator sample_it;
|
||||
sample_it.Init(nullptr, nullptr, replicate_samples, samples);
|
||||
if (report_level > 0) {
|
||||
@ -837,8 +856,9 @@ double MasterTrainer::TestClassifier(CountTypes error_mode, int report_level,
|
||||
tprintf("Testing %sREPLICATED:\n", replicate_samples ? "" : "NON-");
|
||||
}
|
||||
double unichar_error = 0.0;
|
||||
ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode, fontinfo_table_,
|
||||
page_images_, &sample_it, &unichar_error, nullptr, report_string);
|
||||
ErrorCounter::ComputeErrorRate(test_classifier, report_level, error_mode,
|
||||
fontinfo_table_, page_images_, &sample_it,
|
||||
&unichar_error, nullptr, report_string);
|
||||
return unichar_error;
|
||||
}
|
||||
|
||||
@ -857,14 +877,16 @@ float MasterTrainer::ShapeDistance(const ShapeTable &shapes, int s1, int s2) {
|
||||
// distances between characters of matching font where possible.
|
||||
for (int c1 = 0; c1 < num_chars1; ++c1) {
|
||||
for (int c2 = 0; c2 < num_chars2; ++c2) {
|
||||
dist_sum += samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map);
|
||||
dist_sum +=
|
||||
samples_.UnicharDistance(shape1[c1], shape2[c2], true, feature_map);
|
||||
++dist_count;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// In the single unichar case, there is little alternative, but to compute
|
||||
// the squared-order distance between pairs of fonts.
|
||||
dist_sum = samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map);
|
||||
dist_sum =
|
||||
samples_.UnicharDistance(shape1[0], shape2[0], false, feature_map);
|
||||
++dist_count;
|
||||
}
|
||||
return dist_sum / dist_count;
|
||||
@ -942,8 +964,8 @@ void MasterTrainer::ReplaceFragmentedSamples() {
|
||||
// * No shape shall have more than max_shape_unichars in it,
|
||||
// * Don't merge shapes where the distance between them exceeds max_dist.
|
||||
const float kInfiniteDist = 999.0f;
|
||||
void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float max_dist,
|
||||
ShapeTable *shapes) {
|
||||
void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars,
|
||||
float max_dist, ShapeTable *shapes) {
|
||||
int num_shapes = shapes->NumShapes();
|
||||
int max_merges = num_shapes - min_shapes;
|
||||
// TODO: avoid new / delete.
|
||||
@ -971,8 +993,8 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float
|
||||
int num_unichars = shapes->MergedUnicharCount(min_s1, min_s2);
|
||||
shape_dists[min_s1][min_s2 - min_s1 - 1].distance = kInfiniteDist;
|
||||
if (num_unichars > max_shape_unichars) {
|
||||
tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n", min_s1, min_s2,
|
||||
num_unichars, max_shape_unichars);
|
||||
tprintf("Merge of %d and %d with %d would exceed max of %d unichars\n",
|
||||
min_s1, min_s2, num_unichars, max_shape_unichars);
|
||||
} else {
|
||||
shapes->MergeShapes(min_s1, min_s2);
|
||||
shape_dists[min_s2].clear();
|
||||
@ -980,13 +1002,15 @@ void MasterTrainer::ClusterShapes(int min_shapes, int max_shape_unichars, float
|
||||
|
||||
for (int s = 0; s < min_s1; ++s) {
|
||||
if (!shape_dists[s].empty()) {
|
||||
shape_dists[s][min_s1 - s - 1].distance = ShapeDistance(*shapes, s, min_s1);
|
||||
shape_dists[s][min_s1 - s - 1].distance =
|
||||
ShapeDistance(*shapes, s, min_s1);
|
||||
shape_dists[s][min_s2 - s - 1].distance = kInfiniteDist;
|
||||
}
|
||||
}
|
||||
for (int s2 = min_s1 + 1; s2 < num_shapes; ++s2) {
|
||||
if (shape_dists[min_s1][s2 - min_s1 - 1].distance < kInfiniteDist) {
|
||||
shape_dists[min_s1][s2 - min_s1 - 1].distance = ShapeDistance(*shapes, min_s1, s2);
|
||||
shape_dists[min_s1][s2 - min_s1 - 1].distance =
|
||||
ShapeDistance(*shapes, min_s1, s2);
|
||||
}
|
||||
}
|
||||
for (int s = min_s1 + 1; s < min_s2; ++s) {
|
||||
|
@ -69,7 +69,7 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
|
||||
float Angle, Length;
|
||||
|
||||
/* if p1 and p2 are not close in length, don't let them match */
|
||||
Length = fabs(p1->Length - p2->Length);
|
||||
Length = std::fabs(p1->Length - p2->Length);
|
||||
if (Length > MAX_LENGTH_MISMATCH) {
|
||||
return (0.0);
|
||||
}
|
||||
@ -88,8 +88,8 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
|
||||
}
|
||||
|
||||
/* set the dummy pico-feature at one end of p1 and match it to p2 */
|
||||
Feature->Params[PicoFeatX] = p1->X + cos(Angle) * Length;
|
||||
Feature->Params[PicoFeatY] = p1->Y + sin(Angle) * Length;
|
||||
Feature->Params[PicoFeatX] = p1->X + std::cos(Angle) * Length;
|
||||
Feature->Params[PicoFeatY] = p1->Y + std::sin(Angle) * Length;
|
||||
if (DummyFastMatch(Feature, p2)) {
|
||||
Evidence = SubfeatureEvidence(Feature, p2);
|
||||
if (Evidence < WorstEvidence) {
|
||||
@ -101,8 +101,8 @@ float CompareProtos(PROTO_STRUCT *p1, PROTO_STRUCT *p2) {
|
||||
}
|
||||
|
||||
/* set the dummy pico-feature at the other end of p1 and match it to p2 */
|
||||
Feature->Params[PicoFeatX] = p1->X - cos(Angle) * Length;
|
||||
Feature->Params[PicoFeatY] = p1->Y - sin(Angle) * Length;
|
||||
Feature->Params[PicoFeatX] = p1->X - std::cos(Angle) * Length;
|
||||
Feature->Params[PicoFeatY] = p1->Y - std::sin(Angle) * Length;
|
||||
if (DummyFastMatch(Feature, p2)) {
|
||||
Evidence = SubfeatureEvidence(Feature, p2);
|
||||
if (Evidence < WorstEvidence) {
|
||||
@ -266,7 +266,7 @@ bool DummyFastMatch(FEATURE Feature, PROTO_STRUCT *Proto) {
|
||||
float AngleError;
|
||||
|
||||
MaxAngleError = training_angle_pad / 360.0;
|
||||
AngleError = fabs(Proto->Angle - Feature->Params[PicoFeatDir]);
|
||||
AngleError = std::fabs(Proto->Angle - Feature->Params[PicoFeatDir]);
|
||||
if (AngleError > 0.5) {
|
||||
AngleError = 1.0 - AngleError;
|
||||
}
|
||||
@ -296,8 +296,8 @@ void ComputePaddedBoundingBox(PROTO_STRUCT *Proto, float TangentPad, float Ortho
|
||||
FRECT *BoundingBox) {
|
||||
float Length = Proto->Length / 2.0 + TangentPad;
|
||||
float Angle = Proto->Angle * 2.0 * M_PI;
|
||||
float CosOfAngle = fabs(cos(Angle));
|
||||
float SinOfAngle = fabs(sin(Angle));
|
||||
float CosOfAngle = fabs(std::cos(Angle));
|
||||
float SinOfAngle = fabs(std::sin(Angle));
|
||||
|
||||
float Pad = std::max(CosOfAngle * Length, SinOfAngle * OrthogonalPad);
|
||||
BoundingBox->MinX = Proto->X - Pad;
|
||||
|
@ -22,6 +22,7 @@
|
||||
# include "config_auto.h"
|
||||
#endif
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include "lstmtrainer.h"
|
||||
|
||||
@ -71,14 +72,17 @@ const int kTargetXScale = 5;
|
||||
const int kTargetYScale = 100;
|
||||
#endif // !GRAPHICS_DISABLED
|
||||
|
||||
LSTMTrainer::LSTMTrainer() : randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) {
|
||||
LSTMTrainer::LSTMTrainer()
|
||||
: randomly_rotate_(false), training_data_(0), sub_trainer_(nullptr) {
|
||||
EmptyConstructor();
|
||||
debug_interval_ = 0;
|
||||
}
|
||||
|
||||
LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name, int debug_interval,
|
||||
int64_t max_memory)
|
||||
: randomly_rotate_(false), training_data_(max_memory), sub_trainer_(nullptr) {
|
||||
LSTMTrainer::LSTMTrainer(const char *model_base, const char *checkpoint_name,
|
||||
int debug_interval, int64_t max_memory)
|
||||
: randomly_rotate_(false),
|
||||
training_data_(max_memory),
|
||||
sub_trainer_(nullptr) {
|
||||
EmptyConstructor();
|
||||
debug_interval_ = debug_interval;
|
||||
model_base_ = model_base;
|
||||
@ -96,7 +100,8 @@ LSTMTrainer::~LSTMTrainer() {
|
||||
|
||||
// Tries to deserialize a trainer from the given file and silently returns
|
||||
// false in case of failure.
|
||||
bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_traineddata) {
|
||||
bool LSTMTrainer::TryLoadingCheckpoint(const char *filename,
|
||||
const char *old_traineddata) {
|
||||
std::vector<char> data;
|
||||
if (!LoadDataFromFile(filename, &data)) {
|
||||
return false;
|
||||
@ -106,7 +111,8 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra
|
||||
return false;
|
||||
}
|
||||
if (IsIntMode()) {
|
||||
tprintf("Error, %s is an integer (fast) model, cannot continue training\n", filename);
|
||||
tprintf("Error, %s is an integer (fast) model, cannot continue training\n",
|
||||
filename);
|
||||
return false;
|
||||
}
|
||||
if (((old_traineddata == nullptr || *old_traineddata == '\0') &&
|
||||
@ -114,7 +120,8 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra
|
||||
filename == old_traineddata) {
|
||||
return true; // Normal checkpoint load complete.
|
||||
}
|
||||
tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(), recoder_.code_range());
|
||||
tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
|
||||
recoder_.code_range());
|
||||
if (old_traineddata == nullptr || *old_traineddata == '\0') {
|
||||
tprintf("Must supply the old traineddata for code conversion!\n");
|
||||
return false;
|
||||
@ -152,21 +159,23 @@ bool LSTMTrainer::TryLoadingCheckpoint(const char *filename, const char *old_tra
|
||||
// are implemented.
|
||||
// For other args see NetworkBuilder::InitNetwork.
|
||||
// Note: Be sure to call InitCharSet before InitNetwork!
|
||||
bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index, int net_flags,
|
||||
float weight_range, float learning_rate, float momentum,
|
||||
bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index,
|
||||
int net_flags, float weight_range,
|
||||
float learning_rate, float momentum,
|
||||
float adam_beta) {
|
||||
mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec);
|
||||
adam_beta_ = adam_beta;
|
||||
learning_rate_ = learning_rate;
|
||||
momentum_ = momentum;
|
||||
SetNullChar();
|
||||
if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec, append_index, net_flags,
|
||||
weight_range, &randomizer_, &network_)) {
|
||||
if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec,
|
||||
append_index, net_flags, weight_range,
|
||||
&randomizer_, &network_)) {
|
||||
return false;
|
||||
}
|
||||
network_str_ += network_spec;
|
||||
tprintf("Built network:%s from request %s\n",
|
||||
network_->spec().c_str(), network_spec);
|
||||
tprintf("Built network:%s from request %s\n", network_->spec().c_str(),
|
||||
network_spec);
|
||||
tprintf(
|
||||
"Training parameters:\n Debug interval = %d,"
|
||||
" weights = %g, learning rate = %g, momentum=%g\n",
|
||||
@ -222,14 +231,14 @@ void LSTMTrainer::InitIterations() {
|
||||
// If the training sample is usable, grid searches for the optimal
|
||||
// dict_ratio/cert_offset, and returns the results in a string of space-
|
||||
// separated triplets of ratio,offset=worderr.
|
||||
Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, int iteration,
|
||||
double min_dict_ratio, double dict_ratio_step,
|
||||
double max_dict_ratio, double min_cert_offset,
|
||||
double cert_offset_step, double max_cert_offset,
|
||||
std::string &results) {
|
||||
Trainability LSTMTrainer::GridSearchDictParams(
|
||||
const ImageData *trainingdata, int iteration, double min_dict_ratio,
|
||||
double dict_ratio_step, double max_dict_ratio, double min_cert_offset,
|
||||
double cert_offset_step, double max_cert_offset, std::string &results) {
|
||||
sample_iteration_ = iteration;
|
||||
NetworkIO fwd_outputs, targets;
|
||||
Trainability result = PrepareForBackward(trainingdata, &fwd_outputs, &targets);
|
||||
Trainability result =
|
||||
PrepareForBackward(trainingdata, &fwd_outputs, &targets);
|
||||
if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr) {
|
||||
return result;
|
||||
}
|
||||
@ -238,8 +247,10 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
|
||||
std::vector<int> truth_labels, ocr_labels, xcoords;
|
||||
ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels));
|
||||
// NO-dict error.
|
||||
RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr);
|
||||
base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
|
||||
RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(),
|
||||
nullptr);
|
||||
base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty,
|
||||
nullptr);
|
||||
base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
|
||||
std::string truth_text = DecodeLabels(truth_labels);
|
||||
std::string ocr_text = DecodeLabels(ocr_labels);
|
||||
@ -248,18 +259,21 @@ Trainability LSTMTrainer::GridSearchDictParams(const ImageData *trainingdata, in
|
||||
|
||||
RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);
|
||||
for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {
|
||||
for (double c = min_cert_offset; c < max_cert_offset; c += cert_offset_step) {
|
||||
search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, nullptr);
|
||||
for (double c = min_cert_offset; c < max_cert_offset;
|
||||
c += cert_offset_step) {
|
||||
search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty,
|
||||
nullptr);
|
||||
search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
|
||||
truth_text = DecodeLabels(truth_labels);
|
||||
ocr_text = DecodeLabels(ocr_labels);
|
||||
// This is destructive on both strings.
|
||||
double word_error = ComputeWordError(&truth_text, &ocr_text);
|
||||
if ((r == min_dict_ratio && c == min_cert_offset) || !std::isfinite(word_error)) {
|
||||
if ((r == min_dict_ratio && c == min_cert_offset) ||
|
||||
!std::isfinite(word_error)) {
|
||||
std::string t = DecodeLabels(truth_labels);
|
||||
std::string o = DecodeLabels(ocr_labels);
|
||||
tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c, t.c_str(), o.c_str(),
|
||||
word_error, truth_labels[0]);
|
||||
std::string o = DecodeLabels(ocr_labels);
|
||||
tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c,
|
||||
t.c_str(), o.c_str(), word_error, truth_labels[0]);
|
||||
}
|
||||
results += " " + std::to_string(r);
|
||||
results += "," + std::to_string(c);
|
||||
@ -278,17 +292,20 @@ void LSTMTrainer::DebugNetwork() {
|
||||
// tesseract into memory ready for training. Returns false if nothing was
|
||||
// loaded.
|
||||
bool LSTMTrainer::LoadAllTrainingData(const std::vector<std::string> &filenames,
|
||||
CachingStrategy cache_strategy, bool randomly_rotate) {
|
||||
CachingStrategy cache_strategy,
|
||||
bool randomly_rotate) {
|
||||
randomly_rotate_ = randomly_rotate;
|
||||
training_data_.Clear();
|
||||
return training_data_.LoadDocuments(filenames, cache_strategy, LoadDataFromFile);
|
||||
return training_data_.LoadDocuments(filenames, cache_strategy,
|
||||
LoadDataFromFile);
|
||||
}
|
||||
|
||||
// Keeps track of best and locally worst char error_rate and launches tests
|
||||
// using tester, when a new min or max is reached.
|
||||
// Writes checkpoints at appropriate times and builds and returns a log message
|
||||
// to indicate progress. Returns false if nothing interesting happened.
|
||||
bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg) {
|
||||
bool LSTMTrainer::MaintainCheckpoints(const TestCallback &tester,
|
||||
std::string &log_msg) {
|
||||
PrepareLogMsg(log_msg);
|
||||
double error_rate = CharError();
|
||||
int iteration = learning_iteration();
|
||||
@ -320,7 +337,8 @@ bool LSTMTrainer::MaintainCheckpoints(TestCallback tester, std::string &log_msg)
|
||||
sub_trainer_.reset();
|
||||
stall_iteration_ = learning_iteration() + kMinStallIterations;
|
||||
if (TransitionTrainingStage(kStageTransitionThreshold)) {
|
||||
log_msg += " Transitioned to stage " + std::to_string(CurrentTrainingStage());
|
||||
log_msg +=
|
||||
" Transitioned to stage " + std::to_string(CurrentTrainingStage());
|
||||
}
|
||||
SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
|
||||
if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
|
||||
@ -386,7 +404,8 @@ void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const {
|
||||
|
||||
// Appends <intro_str> iteration learning_iteration()/training_iteration()/
|
||||
// sample_iteration() to the log_msg.
|
||||
void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) const {
|
||||
void LSTMTrainer::LogIterations(const char *intro_str,
|
||||
std::string &log_msg) const {
|
||||
log_msg += intro_str;
|
||||
log_msg += " iteration " + std::to_string(learning_iteration());
|
||||
log_msg += "/" + std::to_string(training_iteration());
|
||||
@ -396,7 +415,8 @@ void LSTMTrainer::LogIterations(const char *intro_str, std::string &log_msg) con
|
||||
// Returns true and increments the training_stage_ if the error rate has just
|
||||
// passed through the given threshold for the first time.
|
||||
bool LSTMTrainer::TransitionTrainingStage(float error_threshold) {
|
||||
if (best_error_rate_ < error_threshold && training_stage_ + 1 < num_training_stages_) {
|
||||
if (best_error_rate_ < error_threshold &&
|
||||
training_stage_ + 1 < num_training_stages_) {
|
||||
++training_stage_;
|
||||
return true;
|
||||
}
|
||||
@ -404,8 +424,8 @@ bool LSTMTrainer::TransitionTrainingStage(float error_threshold) {
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr,
|
||||
TFile *fp) const {
|
||||
bool LSTMTrainer::Serialize(SerializeAmount serialize_amount,
|
||||
const TessdataManager *mgr, TFile *fp) const {
|
||||
if (!LSTMRecognizer::Serialize(mgr, fp)) {
|
||||
return false;
|
||||
}
|
||||
@ -470,7 +490,8 @@ bool LSTMTrainer::Serialize(SerializeAmount serialize_amount, const TessdataMana
|
||||
return false;
|
||||
}
|
||||
std::vector<char> sub_data;
|
||||
if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) {
|
||||
if (sub_trainer_ != nullptr &&
|
||||
!SaveTrainingDump(LIGHT, *sub_trainer_, &sub_data)) {
|
||||
return false;
|
||||
}
|
||||
if (!fp->Serialize(sub_data)) {
|
||||
@ -587,11 +608,13 @@ void LSTMTrainer::StartSubtrainer(std::string &log_msg) {
|
||||
log_msg += " Failed to revert to previous best for trial!";
|
||||
sub_trainer_.reset();
|
||||
} else {
|
||||
log_msg += " Trial sub_trainer_ from iteration " + std::to_string(sub_trainer_->training_iteration());
|
||||
log_msg += " Trial sub_trainer_ from iteration " +
|
||||
std::to_string(sub_trainer_->training_iteration());
|
||||
// Reduce learning rate so it doesn't diverge this time.
|
||||
sub_trainer_->ReduceLearningRates(this, log_msg);
|
||||
// If it fails again, we will wait twice as long before reverting again.
|
||||
int stall_offset = learning_iteration() - sub_trainer_->learning_iteration();
|
||||
int stall_offset =
|
||||
learning_iteration() - sub_trainer_->learning_iteration();
|
||||
stall_iteration_ = learning_iteration() + 2 * stall_offset;
|
||||
sub_trainer_->stall_iteration_ = stall_iteration_;
|
||||
// Re-save the best trainer with the new learning rates and stall iteration.
|
||||
@ -619,7 +642,8 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
int end_iteration = training_iteration();
|
||||
while (sub_trainer_->training_iteration() < end_iteration &&
|
||||
sub_margin >= kSubTrainerMarginFraction) {
|
||||
int target_iteration = sub_trainer_->training_iteration() + kNumPagesPerBatch;
|
||||
int target_iteration =
|
||||
sub_trainer_->training_iteration() + kNumPagesPerBatch;
|
||||
while (sub_trainer_->training_iteration() < target_iteration) {
|
||||
sub_trainer_->TrainOnLine(this, false);
|
||||
}
|
||||
@ -631,12 +655,14 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
sub_error = sub_trainer_->CharError();
|
||||
sub_margin = (training_error - sub_error) / sub_error;
|
||||
}
|
||||
if (sub_error < best_error_rate_ && sub_margin >= kSubTrainerMarginFraction) {
|
||||
if (sub_error < best_error_rate_ &&
|
||||
sub_margin >= kSubTrainerMarginFraction) {
|
||||
// The sub_trainer_ has won the race to a new best. Switch to it.
|
||||
std::vector<char> updated_trainer;
|
||||
SaveTrainingDump(LIGHT, *sub_trainer_, &updated_trainer);
|
||||
ReadTrainingDump(updated_trainer, *this);
|
||||
log_msg += " Sub trainer wins at iteration " + std::to_string(training_iteration());
|
||||
log_msg += " Sub trainer wins at iteration " +
|
||||
std::to_string(training_iteration());
|
||||
log_msg += "\n";
|
||||
return STR_REPLACED;
|
||||
}
|
||||
@ -647,11 +673,13 @@ SubTrainerResult LSTMTrainer::UpdateSubtrainer(std::string &log_msg) {
|
||||
|
||||
// Reduces network learning rates, either for everything, or for layers
|
||||
// independently, according to NF_LAYER_SPECIFIC_LR.
|
||||
void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer, std::string &log_msg) {
|
||||
void LSTMTrainer::ReduceLearningRates(LSTMTrainer *samples_trainer,
|
||||
std::string &log_msg) {
|
||||
if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
|
||||
int num_reduced =
|
||||
ReduceLayerLearningRates(kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
|
||||
log_msg += "\nReduced learning rate on layers: " + std::to_string(num_reduced);
|
||||
int num_reduced = ReduceLayerLearningRates(
|
||||
kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
|
||||
log_msg +=
|
||||
"\nReduced learning rate on layers: " + std::to_string(num_reduced);
|
||||
} else {
|
||||
ScaleLearningRate(kLearningRateDecay);
|
||||
log_msg += "\nReduced learning rate to :" + std::to_string(learning_rate_);
|
||||
@ -712,7 +740,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
copy_trainer.SetIteration(iteration);
|
||||
// Train on the sample, but keep the update in updates_ instead of
|
||||
// applying to the weights.
|
||||
const ImageData *trainingdata = copy_trainer.TrainOnLine(samples_trainer, true);
|
||||
const ImageData *trainingdata =
|
||||
copy_trainer.TrainOnLine(samples_trainer, true);
|
||||
if (trainingdata == nullptr) {
|
||||
continue;
|
||||
}
|
||||
@ -727,7 +756,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
samples_trainer->ReadTrainingDump(updated_trainer, layer_trainer);
|
||||
Network *layer = layer_trainer.GetLayer(layers[i]);
|
||||
// Update the weights in just the layer, using Adam if enabled.
|
||||
layer->Update(0.0, momentum_, adam_beta_, layer_trainer.training_iteration_ + 1);
|
||||
layer->Update(0.0, momentum_, adam_beta_,
|
||||
layer_trainer.training_iteration_ + 1);
|
||||
// Zero the updates matrix again.
|
||||
layer->Update(0.0, 0.0, 0.0, 0);
|
||||
// Train again on the same sample, again holding back the updates.
|
||||
@ -735,9 +765,10 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
// Count the sign changes in the updates in layer vs in copy_trainer.
|
||||
float before_bad = bad_sums[ww][i];
|
||||
float before_ok = ok_sums[ww][i];
|
||||
layer->CountAlternators(*copy_trainer.GetLayer(layers[i]), &ok_sums[ww][i],
|
||||
&bad_sums[ww][i]);
|
||||
float bad_frac = bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
|
||||
layer->CountAlternators(*copy_trainer.GetLayer(layers[i]),
|
||||
&ok_sums[ww][i], &bad_sums[ww][i]);
|
||||
float bad_frac =
|
||||
bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
|
||||
if (bad_frac > 0.0f) {
|
||||
bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac;
|
||||
}
|
||||
@ -756,8 +787,8 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
TFloat total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
|
||||
TFloat frac_down = bad_sums[LR_DOWN][i] / total_down;
|
||||
TFloat frac_same = bad_sums[LR_SAME][i] / total_same;
|
||||
tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(), lr * factor,
|
||||
100.0 * frac_down, lr, 100.0 * frac_same);
|
||||
tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().c_str(),
|
||||
lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same);
|
||||
if (frac_down < frac_same * kImprovementFraction) {
|
||||
tprintf(" REDUCED\n");
|
||||
ScaleLayerLearningRate(layers[i], factor);
|
||||
@ -781,9 +812,10 @@ int LSTMTrainer::ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
// Converts the string to integer class labels, with appropriate null_char_s
|
||||
// in between if not in SimpleTextOutput mode. Returns false on failure.
|
||||
/* static */
|
||||
bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unicharset,
|
||||
const UnicharCompress *recoder, bool simple_text, int null_char,
|
||||
std::vector<int> *labels) {
|
||||
bool LSTMTrainer::EncodeString(const std::string &str,
|
||||
const UNICHARSET &unicharset,
|
||||
const UnicharCompress *recoder, bool simple_text,
|
||||
int null_char, std::vector<int> *labels) {
|
||||
if (str.c_str() == nullptr || str.length() <= 0) {
|
||||
tprintf("Empty truth string!\n");
|
||||
return false;
|
||||
@ -795,7 +827,8 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar
|
||||
labels->push_back(null_char);
|
||||
}
|
||||
std::string cleaned = unicharset.CleanupString(str.c_str());
|
||||
if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr, &err_index)) {
|
||||
if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr,
|
||||
&err_index)) {
|
||||
bool success = true;
|
||||
for (auto internal_label : internal_labels) {
|
||||
if (recoder != nullptr) {
|
||||
@ -835,19 +868,23 @@ bool LSTMTrainer::EncodeString(const std::string &str, const UNICHARSET &unichar
|
||||
|
||||
// Performs forward-backward on the given trainingdata.
|
||||
// Returns a Trainability enum to indicate the suitability of the sample.
|
||||
Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, bool batch) {
|
||||
Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata,
|
||||
bool batch) {
|
||||
NetworkIO fwd_outputs, targets;
|
||||
Trainability trainable = PrepareForBackward(trainingdata, &fwd_outputs, &targets);
|
||||
Trainability trainable =
|
||||
PrepareForBackward(trainingdata, &fwd_outputs, &targets);
|
||||
++sample_iteration_;
|
||||
if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
|
||||
return trainable; // Sample was unusable.
|
||||
}
|
||||
bool debug = debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
|
||||
bool debug =
|
||||
debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
|
||||
// Run backprop on the output.
|
||||
NetworkIO bp_deltas;
|
||||
if (network_->IsTraining() &&
|
||||
(trainable != PERFECT ||
|
||||
training_iteration() > last_perfect_training_iteration_ + perfect_delay_)) {
|
||||
training_iteration() >
|
||||
last_perfect_training_iteration_ + perfect_delay_)) {
|
||||
network_->Backward(debug, targets, &scratch_space_, &bp_deltas);
|
||||
network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_,
|
||||
training_iteration_ + 1);
|
||||
@ -864,18 +901,21 @@ Trainability LSTMTrainer::TrainOnLine(const ImageData *trainingdata, bool batch)
|
||||
|
||||
// Prepares the ground truth, runs forward, and prepares the targets.
|
||||
// Returns a Trainability enum to indicate the suitability of the sample.
|
||||
Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs,
|
||||
Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata,
|
||||
NetworkIO *fwd_outputs,
|
||||
NetworkIO *targets) {
|
||||
if (trainingdata == nullptr) {
|
||||
tprintf("Null trainingdata.\n");
|
||||
return UNENCODABLE;
|
||||
}
|
||||
// Ensure repeatability of random elements even across checkpoints.
|
||||
bool debug = debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
|
||||
bool debug =
|
||||
debug_interval_ > 0 && training_iteration() % debug_interval_ == 0;
|
||||
std::vector<int> truth_labels;
|
||||
if (!EncodeString(trainingdata->transcription(), &truth_labels)) {
|
||||
tprintf("Can't encode transcription: '%s' in language '%s'\n",
|
||||
trainingdata->transcription().c_str(), trainingdata->language().c_str());
|
||||
trainingdata->transcription().c_str(),
|
||||
trainingdata->language().c_str());
|
||||
return UNENCODABLE;
|
||||
}
|
||||
bool upside_down = false;
|
||||
@ -908,8 +948,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
float image_scale;
|
||||
NetworkIO inputs;
|
||||
bool invert = trainingdata->boxes().empty();
|
||||
if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down, &image_scale, &inputs,
|
||||
fwd_outputs)) {
|
||||
if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down,
|
||||
&image_scale, &inputs, fwd_outputs)) {
|
||||
tprintf("Image %s not trainable\n", trainingdata->imagefilename().c_str());
|
||||
return UNENCODABLE;
|
||||
}
|
||||
@ -917,12 +957,14 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
LossType loss_type = OutputLossType();
|
||||
if (loss_type == LT_SOFTMAX) {
|
||||
if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {
|
||||
tprintf("Compute simple targets failed for %s!\n", trainingdata->imagefilename().c_str());
|
||||
tprintf("Compute simple targets failed for %s!\n",
|
||||
trainingdata->imagefilename().c_str());
|
||||
return UNENCODABLE;
|
||||
}
|
||||
} else if (loss_type == LT_CTC) {
|
||||
if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) {
|
||||
tprintf("Compute CTC targets failed for %s!\n", trainingdata->imagefilename().c_str());
|
||||
tprintf("Compute CTC targets failed for %s!\n",
|
||||
trainingdata->imagefilename().c_str());
|
||||
return UNENCODABLE;
|
||||
}
|
||||
} else {
|
||||
@ -936,7 +978,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
if (loss_type != LT_CTC) {
|
||||
LabelsFromOutputs(*targets, &truth_labels, &xcoords);
|
||||
}
|
||||
if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels, *targets)) {
|
||||
if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,
|
||||
*targets)) {
|
||||
tprintf("Input width was %d\n", inputs.Width());
|
||||
return UNENCODABLE;
|
||||
}
|
||||
@ -945,7 +988,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
targets->SubtractAllFromFloat(*fwd_outputs);
|
||||
if (debug_interval_ != 0) {
|
||||
if (truth_text != ocr_text) {
|
||||
tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(), ocr_text.c_str());
|
||||
tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(),
|
||||
ocr_text.c_str());
|
||||
}
|
||||
}
|
||||
double char_error = ComputeCharError(truth_labels, ocr_labels);
|
||||
@ -968,7 +1012,8 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata, Netw
|
||||
// restored. *this must always be the master trainer that retains the only
|
||||
// copy of the training data and language model. trainer is the model that is
|
||||
// actually serialized.
|
||||
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer,
|
||||
bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount,
|
||||
const LSTMTrainer &trainer,
|
||||
std::vector<char> *data) const {
|
||||
TFile fp;
|
||||
fp.OpenWrite(data);
|
||||
@ -976,7 +1021,8 @@ bool LSTMTrainer::SaveTrainingDump(SerializeAmount serialize_amount, const LSTMT
|
||||
}
|
||||
|
||||
// Restores the model to *this.
|
||||
bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size) {
|
||||
bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr,
|
||||
const char *data, int size) {
|
||||
if (size == 0) {
|
||||
tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n");
|
||||
return false;
|
||||
@ -990,7 +1036,8 @@ bool LSTMTrainer::ReadLocalTrainingDump(const TessdataManager *mgr, const char *
|
||||
bool LSTMTrainer::SaveTraineddata(const char *filename) {
|
||||
std::vector<char> recognizer_data;
|
||||
SaveRecognitionDump(&recognizer_data);
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0], recognizer_data.size());
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
|
||||
recognizer_data.size());
|
||||
return mgr_.SaveFile(filename, SaveDataToFile);
|
||||
}
|
||||
|
||||
@ -1025,8 +1072,8 @@ void LSTMTrainer::FillErrorBuffer(double new_error, ErrorTypes type) {
|
||||
|
||||
// Helper generates a map from each current recoder_ code (ie softmax index)
|
||||
// to the corresponding old_recoder code, or -1 if there isn't one.
|
||||
std::vector<int> LSTMTrainer::MapRecoder(const UNICHARSET &old_chset,
|
||||
const UnicharCompress &old_recoder) const {
|
||||
std::vector<int> LSTMTrainer::MapRecoder(
|
||||
const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const {
|
||||
int num_new_codes = recoder_.code_range();
|
||||
int num_new_unichars = GetUnicharset().size();
|
||||
std::vector<int> code_map(num_new_codes, -1);
|
||||
@ -1045,9 +1092,10 @@ std::vector<int> LSTMTrainer::MapRecoder(const UNICHARSET &old_chset,
|
||||
continue;
|
||||
}
|
||||
// The old unicharset must have the same unichar.
|
||||
int old_uid = uid < num_new_unichars
|
||||
? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid))
|
||||
: old_chset.size() - 1;
|
||||
int old_uid =
|
||||
uid < num_new_unichars
|
||||
? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid))
|
||||
: old_chset.size() - 1;
|
||||
if (old_uid == INVALID_UNICHAR_ID) {
|
||||
continue;
|
||||
}
|
||||
@ -1079,7 +1127,8 @@ void LSTMTrainer::InitCharSet() {
|
||||
|
||||
// Helper computes and sets the null_char_.
|
||||
void LSTMTrainer::SetNullChar() {
|
||||
null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN : GetUnicharset().size();
|
||||
null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN
|
||||
: GetUnicharset().size();
|
||||
RecodedCharID code;
|
||||
recoder_.EncodeUnichar(null_char_, &code);
|
||||
null_char_ = code(0);
|
||||
@ -1103,7 +1152,8 @@ void LSTMTrainer::EmptyConstructor() {
|
||||
// as an image in the given window, and the corresponding labels at the
|
||||
// corresponding x_starts.
|
||||
// Returns false if the truth string is empty.
|
||||
bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata,
|
||||
bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs,
|
||||
const ImageData &trainingdata,
|
||||
const NetworkIO &fwd_outputs,
|
||||
const std::vector<int> &truth_labels,
|
||||
const NetworkIO &outputs) {
|
||||
@ -1118,12 +1168,15 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
|
||||
std::vector<int> xcoords;
|
||||
LabelsFromOutputs(outputs, &labels, &xcoords);
|
||||
std::string text = DecodeLabels(labels);
|
||||
tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(), truth_text.c_str());
|
||||
tprintf("Iteration %d: GROUND TRUTH : %s\n", training_iteration(),
|
||||
truth_text.c_str());
|
||||
if (truth_text != text) {
|
||||
tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(), text.c_str());
|
||||
tprintf("Iteration %d: ALIGNED TRUTH : %s\n", training_iteration(),
|
||||
text.c_str());
|
||||
}
|
||||
if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) {
|
||||
tprintf("TRAINING activation path for truth string %s\n", truth_text.c_str());
|
||||
tprintf("TRAINING activation path for truth string %s\n",
|
||||
truth_text.c_str());
|
||||
DebugActivationPath(outputs, labels, xcoords);
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_);
|
||||
@ -1140,11 +1193,12 @@ bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs, const ImageData &tr
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
|
||||
// Displays the network targets as line a line graph.
|
||||
void LSTMTrainer::DisplayTargets(const NetworkIO &targets, const char *window_name,
|
||||
ScrollView **window) {
|
||||
void LSTMTrainer::DisplayTargets(const NetworkIO &targets,
|
||||
const char *window_name, ScrollView **window) {
|
||||
int width = targets.Width();
|
||||
int num_features = targets.NumFeatures();
|
||||
Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale, window);
|
||||
Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale,
|
||||
window);
|
||||
for (int c = 0; c < num_features; ++c) {
|
||||
int color = c % (ScrollView::GREEN_YELLOW - 1) + 2;
|
||||
(*window)->Pen(static_cast<ScrollView::Color>(color));
|
||||
@ -1176,7 +1230,8 @@ void LSTMTrainer::DisplayTargets(const NetworkIO &targets, const char *window_na
|
||||
|
||||
// Builds a no-compromises target where the first positions should be the
|
||||
// truth labels and the rest is padded with the null_char_.
|
||||
bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, const std::vector<int> &truth_labels,
|
||||
bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs,
|
||||
const std::vector<int> &truth_labels,
|
||||
NetworkIO *targets) {
|
||||
if (truth_labels.size() > targets->Width()) {
|
||||
tprintf("Error: transcription %s too long to fit into target of width %d\n",
|
||||
@ -1197,18 +1252,19 @@ bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs, const std::vector
|
||||
// Builds a target using standard CTC. truth_labels should be pre-padded with
|
||||
// nulls wherever desired. They don't have to be between all labels.
|
||||
// outputs is input-output, as it gets clipped to minimum probability.
|
||||
bool LSTMTrainer::ComputeCTCTargets(const std::vector<int> &truth_labels, NetworkIO *outputs,
|
||||
NetworkIO *targets) {
|
||||
bool LSTMTrainer::ComputeCTCTargets(const std::vector<int> &truth_labels,
|
||||
NetworkIO *outputs, NetworkIO *targets) {
|
||||
// Bottom-clip outputs to a minimum probability.
|
||||
CTC::NormalizeProbs(outputs);
|
||||
return CTC::ComputeCTCTargets(truth_labels, null_char_, outputs->float_array(), targets);
|
||||
return CTC::ComputeCTCTargets(truth_labels, null_char_,
|
||||
outputs->float_array(), targets);
|
||||
}
|
||||
|
||||
// Computes network errors, and stores the results in the rolling buffers,
|
||||
// along with the supplied text_error.
|
||||
// Returns the delta error of the current sample (not running average.)
|
||||
double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas, double char_error,
|
||||
double word_error) {
|
||||
double LSTMTrainer::ComputeErrorRates(const NetworkIO &deltas,
|
||||
double char_error, double word_error) {
|
||||
UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS);
|
||||
// Delta error is the fraction of timesteps with >0.5 error in the top choice
|
||||
// score. If zero, then the top choice characters are guaranteed correct,
|
||||
@ -1253,7 +1309,7 @@ double LSTMTrainer::ComputeWinnerError(const NetworkIO &deltas) {
|
||||
for (int t = 0; t < width; ++t) {
|
||||
const float *class_errs = deltas.f(t);
|
||||
for (int c = 0; c < num_classes; ++c) {
|
||||
float abs_delta = fabs(class_errs[c]);
|
||||
float abs_delta = std::fabs(class_errs[c]);
|
||||
// TODO(rays) Filtering cases where the delta is very large to cut out
|
||||
// GT errors doesn't work. Find a better way or get better truth.
|
||||
if (0.5 <= abs_delta) {
|
||||
@ -1292,7 +1348,8 @@ double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,
|
||||
|
||||
// Computes word recall error rate using a very simple bag of words algorithm.
|
||||
// NOTE that this is destructive on both input strings.
|
||||
double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_str) {
|
||||
double LSTMTrainer::ComputeWordError(std::string *truth_str,
|
||||
std::string *ocr_str) {
|
||||
using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
|
||||
std::vector<std::string> truth_words = split(*truth_str, ' ');
|
||||
if (truth_words.empty()) {
|
||||
@ -1300,7 +1357,7 @@ double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_st
|
||||
}
|
||||
std::vector<std::string> ocr_words = split(*ocr_str, ' ');
|
||||
StrMap word_counts;
|
||||
for (auto truth_word : truth_words) {
|
||||
for (const auto &truth_word : truth_words) {
|
||||
std::string truth_word_string(truth_word.c_str());
|
||||
auto it = word_counts.find(truth_word_string);
|
||||
if (it == word_counts.end()) {
|
||||
@ -1309,7 +1366,7 @@ double LSTMTrainer::ComputeWordError(std::string *truth_str, std::string *ocr_st
|
||||
++it->second;
|
||||
}
|
||||
}
|
||||
for (auto ocr_word : ocr_words) {
|
||||
for (const auto &ocr_word : ocr_words) {
|
||||
std::string ocr_word_string(ocr_word.c_str());
|
||||
auto it = word_counts.find(ocr_word_string);
|
||||
if (it == word_counts.end()) {
|
||||
@ -1333,7 +1390,8 @@ void LSTMTrainer::UpdateErrorBuffer(double new_error, ErrorTypes type) {
|
||||
int index = training_iteration_ % kRollingBufferSize_;
|
||||
error_buffers_[type][index] = new_error;
|
||||
// Compute the mean error.
|
||||
int mean_count = std::min<int>(training_iteration_ + 1, error_buffers_[type].size());
|
||||
int mean_count =
|
||||
std::min<int>(training_iteration_ + 1, error_buffers_[type].size());
|
||||
double buffer_sum = 0.0;
|
||||
for (int i = 0; i < mean_count; ++i) {
|
||||
buffer_sum += error_buffers_[type][i];
|
||||
@ -1353,8 +1411,9 @@ void LSTMTrainer::RollErrorBuffers() {
|
||||
}
|
||||
++training_iteration_;
|
||||
if (debug_interval_ != 0) {
|
||||
tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n", error_rates_[ET_RMS],
|
||||
error_rates_[ET_DELTA], error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
|
||||
tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n",
|
||||
error_rates_[ET_RMS], error_rates_[ET_DELTA],
|
||||
error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
|
||||
error_rates_[ET_SKIP_RATIO]);
|
||||
}
|
||||
}
|
||||
@ -1364,11 +1423,14 @@ void LSTMTrainer::RollErrorBuffers() {
|
||||
// Tester is an externally supplied callback function that tests on some
|
||||
// data set with a given model and records the error rates in a graph.
|
||||
std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
||||
const std::vector<char> &model_data, TestCallback tester) {
|
||||
if (error_rate > best_error_rate_ && iteration < best_iteration_ + kErrorGraphInterval) {
|
||||
const std::vector<char> &model_data,
|
||||
const TestCallback &tester) {
|
||||
if (error_rate > best_error_rate_ &&
|
||||
iteration < best_iteration_ + kErrorGraphInterval) {
|
||||
// Too soon to record a new point.
|
||||
if (tester != nullptr && !worst_model_data_.empty()) {
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size());
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
|
||||
worst_model_data_.size());
|
||||
return tester(worst_iteration_, nullptr, mgr_, CurrentTrainingStage());
|
||||
} else {
|
||||
return "";
|
||||
@ -1384,8 +1446,10 @@ std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
||||
if (error_rate < best_error_rate_) {
|
||||
// This is a new (global) minimum.
|
||||
if (tester != nullptr && !worst_model_data_.empty()) {
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size());
|
||||
result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage());
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
|
||||
worst_model_data_.size());
|
||||
result = tester(worst_iteration_, worst_error_rates_, mgr_,
|
||||
CurrentTrainingStage());
|
||||
worst_model_data_.clear();
|
||||
best_model_data_ = model_data;
|
||||
}
|
||||
@ -1397,23 +1461,28 @@ std::string LSTMTrainer::UpdateErrorGraph(int iteration, double error_rate,
|
||||
// Compute 2% decay time.
|
||||
double two_percent_more = error_rate + 2.0;
|
||||
int i;
|
||||
for (i = best_error_history_.size() - 1; i >= 0 && best_error_history_[i] < two_percent_more;
|
||||
--i) {
|
||||
for (i = best_error_history_.size() - 1;
|
||||
i >= 0 && best_error_history_[i] < two_percent_more; --i) {
|
||||
}
|
||||
int old_iteration = i >= 0 ? best_error_iterations_[i] : 0;
|
||||
improvement_steps_ = iteration - old_iteration;
|
||||
tprintf("2 Percent improvement time=%d, best error was %g @ %d\n", improvement_steps_,
|
||||
i >= 0 ? best_error_history_[i] : 100.0, old_iteration);
|
||||
tprintf("2 Percent improvement time=%d, best error was %g @ %d\n",
|
||||
improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0,
|
||||
old_iteration);
|
||||
} else if (error_rate > best_error_rate_) {
|
||||
// This is a new (local) maximum.
|
||||
if (tester != nullptr) {
|
||||
if (!best_model_data_.empty()) {
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0], best_model_data_.size());
|
||||
result = tester(best_iteration_, best_error_rates_, mgr_, CurrentTrainingStage());
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0],
|
||||
best_model_data_.size());
|
||||
result = tester(best_iteration_, best_error_rates_, mgr_,
|
||||
CurrentTrainingStage());
|
||||
} else if (!worst_model_data_.empty()) {
|
||||
// Allow for multiple data points with "worst" error rate.
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0], worst_model_data_.size());
|
||||
result = tester(worst_iteration_, worst_error_rates_, mgr_, CurrentTrainingStage());
|
||||
mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
|
||||
worst_model_data_.size());
|
||||
result = tester(worst_iteration_, worst_error_rates_, mgr_,
|
||||
CurrentTrainingStage());
|
||||
}
|
||||
if (result.length() > 0) {
|
||||
best_model_data_.clear();
|
||||
|
@ -73,7 +73,8 @@ class LSTMTrainer;
|
||||
// Function to compute and record error rates on some external test set(s).
|
||||
// Args are: iteration, mean errors, model, training stage.
|
||||
// Returns a string containing logging information about the tests.
|
||||
using TestCallback = std::function<std::string(int, const double *, const TessdataManager &, int)>;
|
||||
using TestCallback = std::function<std::string(int, const double *,
|
||||
const TessdataManager &, int)>;
|
||||
|
||||
// Trainer class for LSTM networks. Most of the effort is in creating the
|
||||
// ideal target outputs from the transcription. A box file is used if it is
|
||||
@ -82,8 +83,8 @@ using TestCallback = std::function<std::string(int, const double *, const Tessda
|
||||
class TESS_UNICHARSET_TRAINING_API LSTMTrainer : public LSTMRecognizer {
|
||||
public:
|
||||
LSTMTrainer();
|
||||
LSTMTrainer(const char *model_base, const char *checkpoint_name, int debug_interval,
|
||||
int64_t max_memory);
|
||||
LSTMTrainer(const char *model_base, const char *checkpoint_name,
|
||||
int debug_interval, int64_t max_memory);
|
||||
virtual ~LSTMTrainer();
|
||||
|
||||
// Tries to deserialize a trainer from the given file and silently returns
|
||||
@ -113,8 +114,9 @@ public:
|
||||
// are implemented.
|
||||
// For other args see NetworkBuilder::InitNetwork.
|
||||
// Note: Be sure to call InitCharSet before InitNetwork!
|
||||
bool InitNetwork(const char *network_spec, int append_index, int net_flags, float weight_range,
|
||||
float learning_rate, float momentum, float adam_beta);
|
||||
bool InitNetwork(const char *network_spec, int append_index, int net_flags,
|
||||
float weight_range, float learning_rate, float momentum,
|
||||
float adam_beta);
|
||||
// Initializes a trainer from a serialized TFNetworkModel proto.
|
||||
// Returns the global step of TensorFlow graph or 0 if failed.
|
||||
// Building a compatible TF graph: See tfnetwork.proto.
|
||||
@ -160,7 +162,8 @@ public:
|
||||
// NewSingleError.
|
||||
double LastSingleError(ErrorTypes type) const {
|
||||
return error_buffers_[type]
|
||||
[(training_iteration() + kRollingBufferSize_ - 1) % kRollingBufferSize_];
|
||||
[(training_iteration() + kRollingBufferSize_ - 1) %
|
||||
kRollingBufferSize_];
|
||||
}
|
||||
const DocumentCache &training_data() const {
|
||||
return training_data_;
|
||||
@ -172,11 +175,10 @@ public:
|
||||
// If the training sample is usable, grid searches for the optimal
|
||||
// dict_ratio/cert_offset, and returns the results in a string of space-
|
||||
// separated triplets of ratio,offset=worderr.
|
||||
Trainability GridSearchDictParams(const ImageData *trainingdata, int iteration,
|
||||
double min_dict_ratio, double dict_ratio_step,
|
||||
double max_dict_ratio, double min_cert_offset,
|
||||
double cert_offset_step, double max_cert_offset,
|
||||
std::string &results);
|
||||
Trainability GridSearchDictParams(
|
||||
const ImageData *trainingdata, int iteration, double min_dict_ratio,
|
||||
double dict_ratio_step, double max_dict_ratio, double min_cert_offset,
|
||||
double cert_offset_step, double max_cert_offset, std::string &results);
|
||||
|
||||
// Provides output on the distribution of weight values.
|
||||
void DebugNetwork();
|
||||
@ -184,20 +186,22 @@ public:
|
||||
// Loads a set of lstmf files that were created using the lstm.train config to
|
||||
// tesseract into memory ready for training. Returns false if nothing was
|
||||
// loaded.
|
||||
bool LoadAllTrainingData(const std::vector<std::string> &filenames, CachingStrategy cache_strategy,
|
||||
bool LoadAllTrainingData(const std::vector<std::string> &filenames,
|
||||
CachingStrategy cache_strategy,
|
||||
bool randomly_rotate);
|
||||
|
||||
// Keeps track of best and locally worst error rate, using internally computed
|
||||
// values. See MaintainCheckpointsSpecific for more detail.
|
||||
bool MaintainCheckpoints(TestCallback tester, std::string &log_msg);
|
||||
bool MaintainCheckpoints(const TestCallback &tester, std::string &log_msg);
|
||||
// Keeps track of best and locally worst error_rate (whatever it is) and
|
||||
// launches tests using rec_model, when a new min or max is reached.
|
||||
// Writes checkpoints using train_model at appropriate times and builds and
|
||||
// returns a log message to indicate progress. Returns false if nothing
|
||||
// interesting happened.
|
||||
bool MaintainCheckpointsSpecific(int iteration, const std::vector<char> *train_model,
|
||||
const std::vector<char> *rec_model, TestCallback tester,
|
||||
std::string &log_msg);
|
||||
bool MaintainCheckpointsSpecific(int iteration,
|
||||
const std::vector<char> *train_model,
|
||||
const std::vector<char> *rec_model,
|
||||
TestCallback tester, std::string &log_msg);
|
||||
// Builds a string containing a progress message with current error rates.
|
||||
void PrepareLogMsg(std::string &log_msg) const;
|
||||
// Appends <intro_str> iteration learning_iteration()/training_iteration()/
|
||||
@ -214,7 +218,8 @@ public:
|
||||
}
|
||||
|
||||
// Writes to the given file. Returns false in case of error.
|
||||
bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr, TFile *fp) const;
|
||||
bool Serialize(SerializeAmount serialize_amount, const TessdataManager *mgr,
|
||||
TFile *fp) const;
|
||||
// Reads from the given file. Returns false in case of error.
|
||||
bool DeSerialize(const TessdataManager *mgr, TFile *fp);
|
||||
|
||||
@ -240,18 +245,20 @@ public:
|
||||
// Even if it looks like all weights should remain the same, an adjustment
|
||||
// will be made to guarantee a different result when reverting to an old best.
|
||||
// Returns the number of layer learning rates that were reduced.
|
||||
int ReduceLayerLearningRates(TFloat factor, int num_samples, LSTMTrainer *samples_trainer);
|
||||
int ReduceLayerLearningRates(TFloat factor, int num_samples,
|
||||
LSTMTrainer *samples_trainer);
|
||||
|
||||
// Converts the string to integer class labels, with appropriate null_char_s
|
||||
// in between if not in SimpleTextOutput mode. Returns false on failure.
|
||||
bool EncodeString(const std::string &str, std::vector<int> *labels) const {
|
||||
return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : nullptr,
|
||||
SimpleTextOutput(), null_char_, labels);
|
||||
return EncodeString(str, GetUnicharset(),
|
||||
IsRecoding() ? &recoder_ : nullptr, SimpleTextOutput(),
|
||||
null_char_, labels);
|
||||
}
|
||||
// Static version operates on supplied unicharset, encoder, simple_text.
|
||||
static bool EncodeString(const std::string &str, const UNICHARSET &unicharset,
|
||||
const UnicharCompress *recoder, bool simple_text, int null_char,
|
||||
std::vector<int> *labels);
|
||||
const UnicharCompress *recoder, bool simple_text,
|
||||
int null_char, std::vector<int> *labels);
|
||||
|
||||
// Performs forward-backward on the given trainingdata.
|
||||
// Returns the sample that was used or nullptr if the next sample was deemed
|
||||
@ -259,7 +266,8 @@ public:
|
||||
// holds the training samples.
|
||||
const ImageData *TrainOnLine(LSTMTrainer *samples_trainer, bool batch) {
|
||||
int sample_index = sample_iteration();
|
||||
const ImageData *image = samples_trainer->training_data_.GetPageBySerial(sample_index);
|
||||
const ImageData *image =
|
||||
samples_trainer->training_data_.GetPageBySerial(sample_index);
|
||||
if (image != nullptr) {
|
||||
Trainability trainable = TrainOnLine(image, batch);
|
||||
if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
|
||||
@ -274,30 +282,34 @@ public:
|
||||
|
||||
// Prepares the ground truth, runs forward, and prepares the targets.
|
||||
// Returns a Trainability enum to indicate the suitability of the sample.
|
||||
Trainability PrepareForBackward(const ImageData *trainingdata, NetworkIO *fwd_outputs,
|
||||
NetworkIO *targets);
|
||||
Trainability PrepareForBackward(const ImageData *trainingdata,
|
||||
NetworkIO *fwd_outputs, NetworkIO *targets);
|
||||
|
||||
// Writes the trainer to memory, so that the current training state can be
|
||||
// restored. *this must always be the master trainer that retains the only
|
||||
// copy of the training data and language model. trainer is the model that is
|
||||
// actually serialized.
|
||||
bool SaveTrainingDump(SerializeAmount serialize_amount, const LSTMTrainer &trainer,
|
||||
bool SaveTrainingDump(SerializeAmount serialize_amount,
|
||||
const LSTMTrainer &trainer,
|
||||
std::vector<char> *data) const;
|
||||
|
||||
// Reads previously saved trainer from memory. *this must always be the
|
||||
// master trainer that retains the only copy of the training data and
|
||||
// language model. trainer is the model that is restored.
|
||||
bool ReadTrainingDump(const std::vector<char> &data, LSTMTrainer &trainer) const {
|
||||
bool ReadTrainingDump(const std::vector<char> &data,
|
||||
LSTMTrainer &trainer) const {
|
||||
if (data.empty()) {
|
||||
return false;
|
||||
}
|
||||
return ReadSizedTrainingDump(&data[0], data.size(), trainer);
|
||||
}
|
||||
bool ReadSizedTrainingDump(const char *data, int size, LSTMTrainer &trainer) const {
|
||||
bool ReadSizedTrainingDump(const char *data, int size,
|
||||
LSTMTrainer &trainer) const {
|
||||
return trainer.ReadLocalTrainingDump(&mgr_, data, size);
|
||||
}
|
||||
// Restores the model to *this.
|
||||
bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data, int size);
|
||||
bool ReadLocalTrainingDump(const TessdataManager *mgr, const char *data,
|
||||
int size);
|
||||
|
||||
// Sets up the data for MaintainCheckpoints from a light ReadTrainingDump.
|
||||
void SetupCheckpointInfo();
|
||||
@ -334,26 +346,30 @@ protected:
|
||||
// corresponding x_starts.
|
||||
// Returns false if the truth string is empty.
|
||||
bool DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata,
|
||||
const NetworkIO &fwd_outputs, const std::vector<int> &truth_labels,
|
||||
const NetworkIO &fwd_outputs,
|
||||
const std::vector<int> &truth_labels,
|
||||
const NetworkIO &outputs);
|
||||
// Displays the network targets as line a line graph.
|
||||
void DisplayTargets(const NetworkIO &targets, const char *window_name, ScrollView **window);
|
||||
void DisplayTargets(const NetworkIO &targets, const char *window_name,
|
||||
ScrollView **window);
|
||||
|
||||
// Builds a no-compromises target where the first positions should be the
|
||||
// truth labels and the rest is padded with the null_char_.
|
||||
bool ComputeTextTargets(const NetworkIO &outputs, const std::vector<int> &truth_labels,
|
||||
bool ComputeTextTargets(const NetworkIO &outputs,
|
||||
const std::vector<int> &truth_labels,
|
||||
NetworkIO *targets);
|
||||
|
||||
// Builds a target using standard CTC. truth_labels should be pre-padded with
|
||||
// nulls wherever desired. They don't have to be between all labels.
|
||||
// outputs is input-output, as it gets clipped to minimum probability.
|
||||
bool ComputeCTCTargets(const std::vector<int> &truth_labels, NetworkIO *outputs,
|
||||
NetworkIO *targets);
|
||||
bool ComputeCTCTargets(const std::vector<int> &truth_labels,
|
||||
NetworkIO *outputs, NetworkIO *targets);
|
||||
|
||||
// Computes network errors, and stores the results in the rolling buffers,
|
||||
// along with the supplied text_error.
|
||||
// Returns the delta error of the current sample (not running average.)
|
||||
double ComputeErrorRates(const NetworkIO &deltas, double char_error, double word_error);
|
||||
double ComputeErrorRates(const NetworkIO &deltas, double char_error,
|
||||
double word_error);
|
||||
|
||||
// Computes the network activation RMS error rate.
|
||||
double ComputeRMSError(const NetworkIO &deltas);
|
||||
@ -366,7 +382,8 @@ protected:
|
||||
double ComputeWinnerError(const NetworkIO &deltas);
|
||||
|
||||
// Computes a very simple bag of chars char error rate.
|
||||
double ComputeCharError(const std::vector<int> &truth_str, const std::vector<int> &ocr_str);
|
||||
double ComputeCharError(const std::vector<int> &truth_str,
|
||||
const std::vector<int> &ocr_str);
|
||||
// Computes a very simple bag of words word recall error rate.
|
||||
// NOTE that this is destructive on both input strings.
|
||||
double ComputeWordError(std::string *truth_str, std::string *ocr_str);
|
||||
@ -380,8 +397,9 @@ protected:
|
||||
|
||||
// Given that error_rate is either a new min or max, updates the best/worst
|
||||
// error rates, and record of progress.
|
||||
std::string UpdateErrorGraph(int iteration, double error_rate, const std::vector<char> &model_data,
|
||||
TestCallback tester);
|
||||
std::string UpdateErrorGraph(int iteration, double error_rate,
|
||||
const std::vector<char> &model_data,
|
||||
const TestCallback &tester);
|
||||
|
||||
protected:
|
||||
#ifndef GRAPHICS_DISABLED
|
||||
|
@ -74,9 +74,9 @@ void SVSync::StartProcess(const char *executable, const char *args) {
|
||||
STARTUPINFO start_info;
|
||||
PROCESS_INFORMATION proc_info;
|
||||
GetStartupInfo(&start_info);
|
||||
if (!CreateProcess(nullptr, const_cast<char *>(proc.c_str()), nullptr, nullptr, FALSE,
|
||||
CREATE_NO_WINDOW | DETACHED_PROCESS, nullptr, nullptr, &start_info,
|
||||
&proc_info))
|
||||
if (!CreateProcess(nullptr, const_cast<char *>(proc.c_str()), nullptr,
|
||||
nullptr, FALSE, CREATE_NO_WINDOW | DETACHED_PROCESS,
|
||||
nullptr, nullptr, &start_info, &proc_info))
|
||||
return;
|
||||
# else
|
||||
int pid = fork();
|
||||
@ -131,13 +131,13 @@ SVSemaphore::SVSemaphore() {
|
||||
}
|
||||
|
||||
SVSemaphore::~SVSemaphore() {
|
||||
#ifdef _WIN32
|
||||
# ifdef _WIN32
|
||||
CloseHandle(semaphore_);
|
||||
#elif defined(__APPLE__)
|
||||
# elif defined(__APPLE__)
|
||||
sem_close(semaphore_);
|
||||
#else
|
||||
# else
|
||||
sem_close(&semaphore_);
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
void SVSemaphore::Signal() {
|
||||
@ -243,14 +243,15 @@ static const char *ScrollViewProg() {
|
||||
}
|
||||
|
||||
// The arguments to the program to invoke to start ScrollView
|
||||
static std::string ScrollViewCommand(std::string scrollview_path) {
|
||||
static std::string ScrollViewCommand(const std::string &scrollview_path) {
|
||||
// The following ugly ifdef is to enable the output of the java runtime
|
||||
// to be sent down a black hole on non-windows to ignore all the
|
||||
// exceptions in piccolo. Ideally piccolo would be debugged to make
|
||||
// this unnecessary.
|
||||
// Also the path has to be separated by ; on windows and : otherwise.
|
||||
# ifdef _WIN32
|
||||
const char cmd_template[] = "-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\"";
|
||||
const char cmd_template[] =
|
||||
"-Djava.library.path=\"%s\" -jar \"%s/ScrollView.jar\"";
|
||||
|
||||
# else
|
||||
const char cmd_template[] =
|
||||
@ -289,14 +290,15 @@ SVNetwork::SVNetwork(const char *hostname, int port) {
|
||||
# endif // _WIN32
|
||||
|
||||
if (getaddrinfo(hostname, port_string.c_str(), nullptr, &addr_info) != 0) {
|
||||
std::cerr << "Error resolving name for ScrollView host " << std::string(hostname) << ":" << port
|
||||
<< std::endl;
|
||||
std::cerr << "Error resolving name for ScrollView host "
|
||||
<< std::string(hostname) << ":" << port << std::endl;
|
||||
# ifdef _WIN32
|
||||
WSACleanup();
|
||||
# endif // _WIN32
|
||||
}
|
||||
|
||||
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, addr_info->ai_protocol);
|
||||
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype,
|
||||
addr_info->ai_protocol);
|
||||
|
||||
if (stream_ < 0) {
|
||||
std::cerr << "Failed to open socket" << std::endl;
|
||||
@ -324,7 +326,8 @@ SVNetwork::SVNetwork(const char *hostname, int port) {
|
||||
|
||||
Close();
|
||||
for (;;) {
|
||||
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype, addr_info->ai_protocol);
|
||||
stream_ = socket(addr_info->ai_family, addr_info->ai_socktype,
|
||||
addr_info->ai_protocol);
|
||||
if (stream_ >= 0) {
|
||||
if (connect(stream_, addr_info->ai_addr, addr_info->ai_addrlen) == 0) {
|
||||
break;
|
||||
|
@ -111,7 +111,7 @@ int Wordrec::angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
|
||||
if (static_cast<int>(length) == 0) {
|
||||
return (0);
|
||||
}
|
||||
angle = static_cast<int>(floor(asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5));
|
||||
angle = static_cast<int>(floor(std::asin(vector1.cross(vector2) / length) / M_PI * 180.0 + 0.5));
|
||||
|
||||
/* Use dot product */
|
||||
if (vector1.dot(vector2) < 0) {
|
||||
|
@ -46,7 +46,7 @@
|
||||
/* How many to keep */
|
||||
#define MAX_NUM_SEAMS 150
|
||||
/* How many to keep */
|
||||
#define NO_FULL_PRIORITY -1 /* Special marker for pri. */
|
||||
#define NO_FULL_PRIORITY (-1) // Special marker for pri.
|
||||
/* Evaluate right away */
|
||||
#define BAD_PRIORITY 9999.0
|
||||
|
||||
|
@ -51,7 +51,7 @@ PRIORITY Wordrec::grade_split_length(SPLIT *split) {
|
||||
if (split_length <= 0) {
|
||||
grade = 0;
|
||||
} else {
|
||||
grade = sqrt(split_length) * chop_split_dist_knob;
|
||||
grade = std::sqrt(split_length) * chop_split_dist_knob;
|
||||
}
|
||||
|
||||
return (std::max(0.0f, grade));
|
||||
|
@ -984,8 +984,8 @@ float LanguageModel::ComputeNgramCost(const char *unichar, float certainty, floa
|
||||
*found_small_prob = true;
|
||||
prob = language_model_ngram_small_prob;
|
||||
}
|
||||
*ngram_cost = -1.0 * log2(prob);
|
||||
float ngram_and_classifier_cost = -1.0 * log2(CertaintyScore(certainty) / denom) +
|
||||
*ngram_cost = -1 * std::log2(prob);
|
||||
float ngram_and_classifier_cost = -1 * std::log2(CertaintyScore(certainty) / denom) +
|
||||
*ngram_cost * language_model_ngram_scale_factor;
|
||||
if (language_model_debug_level > 1) {
|
||||
tprintf("-log [ p(%s) * p(%s | %s) ] = -log2(%g*%g) = %g\n", unichar, unichar, context_ptr,
|
||||
@ -1341,24 +1341,24 @@ void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float
|
||||
int permuter = vse.dawg_info->permuter;
|
||||
if (permuter == NUMBER_PERM || permuter == USER_PATTERN_PERM) {
|
||||
if (vse.consistency_info.num_digits == vse.length) {
|
||||
features[PTRAIN_DIGITS_SHORT + len] = 1.0;
|
||||
features[PTRAIN_DIGITS_SHORT + len] = 1.0f;
|
||||
} else {
|
||||
features[PTRAIN_NUM_SHORT + len] = 1.0;
|
||||
features[PTRAIN_NUM_SHORT + len] = 1.0f;
|
||||
}
|
||||
} else if (permuter == DOC_DAWG_PERM) {
|
||||
features[PTRAIN_DOC_SHORT + len] = 1.0;
|
||||
features[PTRAIN_DOC_SHORT + len] = 1.0f;
|
||||
} else if (permuter == SYSTEM_DAWG_PERM || permuter == USER_DAWG_PERM ||
|
||||
permuter == COMPOUND_PERM) {
|
||||
features[PTRAIN_DICT_SHORT + len] = 1.0;
|
||||
features[PTRAIN_DICT_SHORT + len] = 1.0f;
|
||||
} else if (permuter == FREQ_DAWG_PERM) {
|
||||
features[PTRAIN_FREQ_SHORT + len] = 1.0;
|
||||
features[PTRAIN_FREQ_SHORT + len] = 1.0f;
|
||||
}
|
||||
}
|
||||
// Record shape cost feature (normalized by path length).
|
||||
features[PTRAIN_SHAPE_COST_PER_CHAR] =
|
||||
vse.associate_stats.shape_cost / static_cast<float>(vse.length);
|
||||
// Record ngram cost. (normalized by the path length).
|
||||
features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0;
|
||||
features[PTRAIN_NGRAM_COST_PER_CHAR] = 0.0f;
|
||||
if (vse.ngram_info != nullptr) {
|
||||
features[PTRAIN_NGRAM_COST_PER_CHAR] =
|
||||
vse.ngram_info->ngram_cost / static_cast<float>(vse.length);
|
||||
@ -1369,7 +1369,7 @@ void LanguageModel::ExtractFeaturesFromPath(const ViterbiStateEntry &vse, float
|
||||
features[PTRAIN_NUM_BAD_CASE] = vse.consistency_info.NumInconsistentCase();
|
||||
features[PTRAIN_XHEIGHT_CONSISTENCY] = vse.consistency_info.xht_decision;
|
||||
features[PTRAIN_NUM_BAD_CHAR_TYPE] =
|
||||
vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() : 0.0;
|
||||
vse.dawg_info == nullptr ? vse.consistency_info.NumInconsistentChartype() : 0.0f;
|
||||
features[PTRAIN_NUM_BAD_SPACING] = vse.consistency_info.NumInconsistentSpaces();
|
||||
// Disabled this feature for now due to its poor performance.
|
||||
// features[PTRAIN_NUM_BAD_FONT] = vse.consistency_info.inconsistent_font;
|
||||
|
@ -94,7 +94,7 @@ bool ParamsModel::Equivalent(const ParamsModel &that) const {
|
||||
}
|
||||
for (unsigned i = 0; i < weights_vec_[p].size(); i++) {
|
||||
if (weights_vec_[p][i] != that.weights_vec_[p][i] &&
|
||||
fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) {
|
||||
std::fabs(weights_vec_[p][i] - that.weights_vec_[p][i]) > epsilon) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user